xref: /openbmc/linux/arch/powerpc/mm/book3s64/radix_tlb.c (revision 19b438592238b3b40c3f945bb5f9c4ca971c0c45)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * TLB flush routines for radix kernels.
4  *
5  * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
13 
14 #include <asm/ppc-opcode.h>
15 #include <asm/tlb.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 #include <asm/plpar_wrappers.h>
20 
21 #include "internal.h"
22 
23 /*
24  * tlbiel instruction for radix, set invalidation
25  * i.e., r=1 and is=01 or is=10 or is=11
26  */
27 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
28 					unsigned int pid,
29 					unsigned int ric, unsigned int prs)
30 {
31 	unsigned long rb;
32 	unsigned long rs;
33 
34 	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
35 	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
36 
37 	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
38 		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
39 		     : "memory");
40 }
41 
42 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
43 {
44 	unsigned int set;
45 
46 	asm volatile("ptesync": : :"memory");
47 
48 	/*
49 	 * Flush the first set of the TLB, and the entire Page Walk Cache
50 	 * and partition table entries. Then flush the remaining sets of the
51 	 * TLB.
52 	 */
53 
54 	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
55 		/* MSR[HV] should flush partition scope translations first. */
56 		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
57 
58 		if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
59 			for (set = 1; set < num_sets; set++)
60 				tlbiel_radix_set_isa300(set, is, 0,
61 							RIC_FLUSH_TLB, 0);
62 		}
63 	}
64 
65 	/* Flush process scoped entries. */
66 	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
67 
68 	if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
69 		for (set = 1; set < num_sets; set++)
70 			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
71 	}
72 
73 	ppc_after_tlbiel_barrier();
74 }
75 
76 void radix__tlbiel_all(unsigned int action)
77 {
78 	unsigned int is;
79 
80 	switch (action) {
81 	case TLB_INVAL_SCOPE_GLOBAL:
82 		is = 3;
83 		break;
84 	case TLB_INVAL_SCOPE_LPID:
85 		is = 2;
86 		break;
87 	default:
88 		BUG();
89 	}
90 
91 	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
92 		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
93 	else
94 		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
95 
96 	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
97 }
98 
99 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
100 				unsigned long ric)
101 {
102 	unsigned long rb,rs,prs,r;
103 
104 	rb = PPC_BIT(53); /* IS = 1 */
105 	rb |= set << PPC_BITLSHIFT(51);
106 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
107 	prs = 1; /* process scoped */
108 	r = 1;   /* radix format */
109 
110 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
111 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
112 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
113 }
114 
115 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
116 {
117 	unsigned long rb,rs,prs,r;
118 
119 	rb = PPC_BIT(53); /* IS = 1 */
120 	rs = pid << PPC_BITLSHIFT(31);
121 	prs = 1; /* process scoped */
122 	r = 1;   /* radix format */
123 
124 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
125 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
126 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
127 }
128 
129 static __always_inline void __tlbie_pid_lpid(unsigned long pid,
130 					     unsigned long lpid,
131 					     unsigned long ric)
132 {
133 	unsigned long rb, rs, prs, r;
134 
135 	rb = PPC_BIT(53); /* IS = 1 */
136 	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
137 	prs = 1; /* process scoped */
138 	r = 1;   /* radix format */
139 
140 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
141 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
142 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
143 }
144 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
145 {
146 	unsigned long rb,rs,prs,r;
147 
148 	rb = PPC_BIT(52); /* IS = 2 */
149 	rs = lpid;
150 	prs = 0; /* partition scoped */
151 	r = 1;   /* radix format */
152 
153 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
154 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
155 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
156 }
157 
158 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
159 {
160 	unsigned long rb,rs,prs,r;
161 
162 	rb = PPC_BIT(52); /* IS = 2 */
163 	rs = lpid;
164 	prs = 1; /* process scoped */
165 	r = 1;   /* radix format */
166 
167 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
168 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
169 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
170 }
171 
172 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
173 					unsigned long ap, unsigned long ric)
174 {
175 	unsigned long rb,rs,prs,r;
176 
177 	rb = va & ~(PPC_BITMASK(52, 63));
178 	rb |= ap << PPC_BITLSHIFT(58);
179 	rs = pid << PPC_BITLSHIFT(31);
180 	prs = 1; /* process scoped */
181 	r = 1;   /* radix format */
182 
183 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
184 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
185 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
186 }
187 
188 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
189 				       unsigned long ap, unsigned long ric)
190 {
191 	unsigned long rb,rs,prs,r;
192 
193 	rb = va & ~(PPC_BITMASK(52, 63));
194 	rb |= ap << PPC_BITLSHIFT(58);
195 	rs = pid << PPC_BITLSHIFT(31);
196 	prs = 1; /* process scoped */
197 	r = 1;   /* radix format */
198 
199 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
200 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
201 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
202 }
203 
204 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
205 					    unsigned long lpid,
206 					    unsigned long ap, unsigned long ric)
207 {
208 	unsigned long rb, rs, prs, r;
209 
210 	rb = va & ~(PPC_BITMASK(52, 63));
211 	rb |= ap << PPC_BITLSHIFT(58);
212 	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
213 	prs = 1; /* process scoped */
214 	r = 1;   /* radix format */
215 
216 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
217 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
218 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
219 }
220 
221 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
222 					    unsigned long ap, unsigned long ric)
223 {
224 	unsigned long rb,rs,prs,r;
225 
226 	rb = va & ~(PPC_BITMASK(52, 63));
227 	rb |= ap << PPC_BITLSHIFT(58);
228 	rs = lpid;
229 	prs = 0; /* partition scoped */
230 	r = 1;   /* radix format */
231 
232 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
233 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
234 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
235 }
236 
237 
238 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
239 				  unsigned long ap)
240 {
241 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
242 		asm volatile("ptesync": : :"memory");
243 		__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
244 	}
245 
246 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
247 		asm volatile("ptesync": : :"memory");
248 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
249 	}
250 }
251 
252 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
253 					unsigned long ap)
254 {
255 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
256 		asm volatile("ptesync": : :"memory");
257 		__tlbie_pid(0, RIC_FLUSH_TLB);
258 	}
259 
260 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
261 		asm volatile("ptesync": : :"memory");
262 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
263 	}
264 }
265 
266 static inline void fixup_tlbie_va_range_lpid(unsigned long va,
267 					     unsigned long pid,
268 					     unsigned long lpid,
269 					     unsigned long ap)
270 {
271 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
272 		asm volatile("ptesync" : : : "memory");
273 		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
274 	}
275 
276 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
277 		asm volatile("ptesync" : : : "memory");
278 		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
279 	}
280 }
281 
282 static inline void fixup_tlbie_pid(unsigned long pid)
283 {
284 	/*
285 	 * We can use any address for the invalidation, pick one which is
286 	 * probably unused as an optimisation.
287 	 */
288 	unsigned long va = ((1UL << 52) - 1);
289 
290 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
291 		asm volatile("ptesync": : :"memory");
292 		__tlbie_pid(0, RIC_FLUSH_TLB);
293 	}
294 
295 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
296 		asm volatile("ptesync": : :"memory");
297 		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
298 	}
299 }
300 
301 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
302 {
303 	/*
304 	 * We can use any address for the invalidation, pick one which is
305 	 * probably unused as an optimisation.
306 	 */
307 	unsigned long va = ((1UL << 52) - 1);
308 
309 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
310 		asm volatile("ptesync" : : : "memory");
311 		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
312 	}
313 
314 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
315 		asm volatile("ptesync" : : : "memory");
316 		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
317 				RIC_FLUSH_TLB);
318 	}
319 }
320 
321 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
322 				       unsigned long ap)
323 {
324 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
325 		asm volatile("ptesync": : :"memory");
326 		__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
327 	}
328 
329 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
330 		asm volatile("ptesync": : :"memory");
331 		__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
332 	}
333 }
334 
335 static inline void fixup_tlbie_lpid(unsigned long lpid)
336 {
337 	/*
338 	 * We can use any address for the invalidation, pick one which is
339 	 * probably unused as an optimisation.
340 	 */
341 	unsigned long va = ((1UL << 52) - 1);
342 
343 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
344 		asm volatile("ptesync": : :"memory");
345 		__tlbie_lpid(0, RIC_FLUSH_TLB);
346 	}
347 
348 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
349 		asm volatile("ptesync": : :"memory");
350 		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
351 	}
352 }
353 
354 /*
355  * We use 128 set in radix mode and 256 set in hpt mode.
356  */
357 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
358 {
359 	int set;
360 
361 	asm volatile("ptesync": : :"memory");
362 
363 	/*
364 	 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
365 	 * also flush the entire Page Walk Cache.
366 	 */
367 	__tlbiel_pid(pid, 0, ric);
368 
369 	/* For PWC, only one flush is needed */
370 	if (ric == RIC_FLUSH_PWC) {
371 		ppc_after_tlbiel_barrier();
372 		return;
373 	}
374 
375 	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
376 		/* For the remaining sets, just flush the TLB */
377 		for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
378 			__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
379 	}
380 
381 	ppc_after_tlbiel_barrier();
382 	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
383 }
384 
385 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
386 {
387 	asm volatile("ptesync": : :"memory");
388 
389 	/*
390 	 * Workaround the fact that the "ric" argument to __tlbie_pid
391 	 * must be a compile-time contraint to match the "i" constraint
392 	 * in the asm statement.
393 	 */
394 	switch (ric) {
395 	case RIC_FLUSH_TLB:
396 		__tlbie_pid(pid, RIC_FLUSH_TLB);
397 		fixup_tlbie_pid(pid);
398 		break;
399 	case RIC_FLUSH_PWC:
400 		__tlbie_pid(pid, RIC_FLUSH_PWC);
401 		break;
402 	case RIC_FLUSH_ALL:
403 	default:
404 		__tlbie_pid(pid, RIC_FLUSH_ALL);
405 		fixup_tlbie_pid(pid);
406 	}
407 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
408 }
409 
410 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
411 				   unsigned long ric)
412 {
413 	asm volatile("ptesync" : : : "memory");
414 
415 	/*
416 	 * Workaround the fact that the "ric" argument to __tlbie_pid
417 	 * must be a compile-time contraint to match the "i" constraint
418 	 * in the asm statement.
419 	 */
420 	switch (ric) {
421 	case RIC_FLUSH_TLB:
422 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
423 		fixup_tlbie_pid_lpid(pid, lpid);
424 		break;
425 	case RIC_FLUSH_PWC:
426 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
427 		break;
428 	case RIC_FLUSH_ALL:
429 	default:
430 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
431 		fixup_tlbie_pid_lpid(pid, lpid);
432 	}
433 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
434 }
435 struct tlbiel_pid {
436 	unsigned long pid;
437 	unsigned long ric;
438 };
439 
440 static void do_tlbiel_pid(void *info)
441 {
442 	struct tlbiel_pid *t = info;
443 
444 	if (t->ric == RIC_FLUSH_TLB)
445 		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
446 	else if (t->ric == RIC_FLUSH_PWC)
447 		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
448 	else
449 		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
450 }
451 
452 static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
453 				unsigned long pid, unsigned long ric)
454 {
455 	struct cpumask *cpus = mm_cpumask(mm);
456 	struct tlbiel_pid t = { .pid = pid, .ric = ric };
457 
458 	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
459 	/*
460 	 * Always want the CPU translations to be invalidated with tlbiel in
461 	 * these paths, so while coprocessors must use tlbie, we can not
462 	 * optimise away the tlbiel component.
463 	 */
464 	if (atomic_read(&mm->context.copros) > 0)
465 		_tlbie_pid(pid, RIC_FLUSH_ALL);
466 }
467 
468 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
469 {
470 	asm volatile("ptesync": : :"memory");
471 
472 	/*
473 	 * Workaround the fact that the "ric" argument to __tlbie_pid
474 	 * must be a compile-time contraint to match the "i" constraint
475 	 * in the asm statement.
476 	 */
477 	switch (ric) {
478 	case RIC_FLUSH_TLB:
479 		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
480 		fixup_tlbie_lpid(lpid);
481 		break;
482 	case RIC_FLUSH_PWC:
483 		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
484 		break;
485 	case RIC_FLUSH_ALL:
486 	default:
487 		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
488 		fixup_tlbie_lpid(lpid);
489 	}
490 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
491 }
492 
493 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
494 {
495 	/*
496 	 * Workaround the fact that the "ric" argument to __tlbie_pid
497 	 * must be a compile-time contraint to match the "i" constraint
498 	 * in the asm statement.
499 	 */
500 	switch (ric) {
501 	case RIC_FLUSH_TLB:
502 		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
503 		break;
504 	case RIC_FLUSH_PWC:
505 		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
506 		break;
507 	case RIC_FLUSH_ALL:
508 	default:
509 		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
510 	}
511 	fixup_tlbie_lpid(lpid);
512 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
513 }
514 
515 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
516 				    unsigned long pid, unsigned long page_size,
517 				    unsigned long psize)
518 {
519 	unsigned long addr;
520 	unsigned long ap = mmu_get_ap(psize);
521 
522 	for (addr = start; addr < end; addr += page_size)
523 		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
524 }
525 
526 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
527 				       unsigned long psize, unsigned long ric)
528 {
529 	unsigned long ap = mmu_get_ap(psize);
530 
531 	asm volatile("ptesync": : :"memory");
532 	__tlbiel_va(va, pid, ap, ric);
533 	ppc_after_tlbiel_barrier();
534 }
535 
536 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
537 				    unsigned long pid, unsigned long page_size,
538 				    unsigned long psize, bool also_pwc)
539 {
540 	asm volatile("ptesync": : :"memory");
541 	if (also_pwc)
542 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
543 	__tlbiel_va_range(start, end, pid, page_size, psize);
544 	ppc_after_tlbiel_barrier();
545 }
546 
547 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
548 				    unsigned long pid, unsigned long page_size,
549 				    unsigned long psize)
550 {
551 	unsigned long addr;
552 	unsigned long ap = mmu_get_ap(psize);
553 
554 	for (addr = start; addr < end; addr += page_size)
555 		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
556 
557 	fixup_tlbie_va_range(addr - page_size, pid, ap);
558 }
559 
560 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
561 					 unsigned long pid, unsigned long lpid,
562 					 unsigned long page_size,
563 					 unsigned long psize)
564 {
565 	unsigned long addr;
566 	unsigned long ap = mmu_get_ap(psize);
567 
568 	for (addr = start; addr < end; addr += page_size)
569 		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
570 
571 	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
572 }
573 
574 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
575 				      unsigned long psize, unsigned long ric)
576 {
577 	unsigned long ap = mmu_get_ap(psize);
578 
579 	asm volatile("ptesync": : :"memory");
580 	__tlbie_va(va, pid, ap, ric);
581 	fixup_tlbie_va(va, pid, ap);
582 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
583 }
584 
585 struct tlbiel_va {
586 	unsigned long pid;
587 	unsigned long va;
588 	unsigned long psize;
589 	unsigned long ric;
590 };
591 
592 static void do_tlbiel_va(void *info)
593 {
594 	struct tlbiel_va *t = info;
595 
596 	if (t->ric == RIC_FLUSH_TLB)
597 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
598 	else if (t->ric == RIC_FLUSH_PWC)
599 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
600 	else
601 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
602 }
603 
604 static inline void _tlbiel_va_multicast(struct mm_struct *mm,
605 				unsigned long va, unsigned long pid,
606 				unsigned long psize, unsigned long ric)
607 {
608 	struct cpumask *cpus = mm_cpumask(mm);
609 	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
610 	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
611 	if (atomic_read(&mm->context.copros) > 0)
612 		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
613 }
614 
615 struct tlbiel_va_range {
616 	unsigned long pid;
617 	unsigned long start;
618 	unsigned long end;
619 	unsigned long page_size;
620 	unsigned long psize;
621 	bool also_pwc;
622 };
623 
624 static void do_tlbiel_va_range(void *info)
625 {
626 	struct tlbiel_va_range *t = info;
627 
628 	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
629 				    t->psize, t->also_pwc);
630 }
631 
632 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
633 			      unsigned long psize, unsigned long ric)
634 {
635 	unsigned long ap = mmu_get_ap(psize);
636 
637 	asm volatile("ptesync": : :"memory");
638 	__tlbie_lpid_va(va, lpid, ap, ric);
639 	fixup_tlbie_lpid_va(va, lpid, ap);
640 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
641 }
642 
643 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
644 				    unsigned long pid, unsigned long page_size,
645 				    unsigned long psize, bool also_pwc)
646 {
647 	asm volatile("ptesync": : :"memory");
648 	if (also_pwc)
649 		__tlbie_pid(pid, RIC_FLUSH_PWC);
650 	__tlbie_va_range(start, end, pid, page_size, psize);
651 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
652 }
653 
654 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
655 					unsigned long pid, unsigned long lpid,
656 					unsigned long page_size,
657 					unsigned long psize, bool also_pwc)
658 {
659 	asm volatile("ptesync" : : : "memory");
660 	if (also_pwc)
661 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
662 	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
663 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
664 }
665 
666 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
667 				unsigned long start, unsigned long end,
668 				unsigned long pid, unsigned long page_size,
669 				unsigned long psize, bool also_pwc)
670 {
671 	struct cpumask *cpus = mm_cpumask(mm);
672 	struct tlbiel_va_range t = { .start = start, .end = end,
673 				.pid = pid, .page_size = page_size,
674 				.psize = psize, .also_pwc = also_pwc };
675 
676 	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
677 	if (atomic_read(&mm->context.copros) > 0)
678 		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
679 }
680 
681 /*
682  * Base TLB flushing operations:
683  *
684  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
685  *  - flush_tlb_page(vma, vmaddr) flushes one page
686  *  - flush_tlb_range(vma, start, end) flushes a range of pages
687  *  - flush_tlb_kernel_range(start, end) flushes kernel pages
688  *
689  *  - local_* variants of page and mm only apply to the current
690  *    processor
691  */
692 void radix__local_flush_tlb_mm(struct mm_struct *mm)
693 {
694 	unsigned long pid;
695 
696 	preempt_disable();
697 	pid = mm->context.id;
698 	if (pid != MMU_NO_CONTEXT)
699 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
700 	preempt_enable();
701 }
702 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
703 
704 #ifndef CONFIG_SMP
705 void radix__local_flush_all_mm(struct mm_struct *mm)
706 {
707 	unsigned long pid;
708 
709 	preempt_disable();
710 	pid = mm->context.id;
711 	if (pid != MMU_NO_CONTEXT)
712 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
713 	preempt_enable();
714 }
715 EXPORT_SYMBOL(radix__local_flush_all_mm);
716 
717 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
718 {
719 	radix__local_flush_all_mm(mm);
720 }
721 #endif /* CONFIG_SMP */
722 
723 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
724 				       int psize)
725 {
726 	unsigned long pid;
727 
728 	preempt_disable();
729 	pid = mm->context.id;
730 	if (pid != MMU_NO_CONTEXT)
731 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
732 	preempt_enable();
733 }
734 
735 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
736 {
737 #ifdef CONFIG_HUGETLB_PAGE
738 	/* need the return fix for nohash.c */
739 	if (is_vm_hugetlb_page(vma))
740 		return radix__local_flush_hugetlb_page(vma, vmaddr);
741 #endif
742 	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
743 }
744 EXPORT_SYMBOL(radix__local_flush_tlb_page);
745 
746 static bool mm_needs_flush_escalation(struct mm_struct *mm)
747 {
748 	/*
749 	 * P9 nest MMU has issues with the page walk cache
750 	 * caching PTEs and not flushing them properly when
751 	 * RIC = 0 for a PID/LPID invalidate
752 	 */
753 	if (atomic_read(&mm->context.copros) > 0)
754 		return true;
755 	return false;
756 }
757 
758 /*
759  * If always_flush is true, then flush even if this CPU can't be removed
760  * from mm_cpumask.
761  */
762 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
763 {
764 	unsigned long pid = mm->context.id;
765 	int cpu = smp_processor_id();
766 
767 	/*
768 	 * A kthread could have done a mmget_not_zero() after the flushing CPU
769 	 * checked mm_cpumask, and be in the process of kthread_use_mm when
770 	 * interrupted here. In that case, current->mm will be set to mm,
771 	 * because kthread_use_mm() setting ->mm and switching to the mm is
772 	 * done with interrupts off.
773 	 */
774 	if (current->mm == mm)
775 		goto out;
776 
777 	if (current->active_mm == mm) {
778 		WARN_ON_ONCE(current->mm != NULL);
779 		/* Is a kernel thread and is using mm as the lazy tlb */
780 		mmgrab(&init_mm);
781 		current->active_mm = &init_mm;
782 		switch_mm_irqs_off(mm, &init_mm, current);
783 		mmdrop(mm);
784 	}
785 
786 	/*
787 	 * This IPI may be initiated from any source including those not
788 	 * running the mm, so there may be a racing IPI that comes after
789 	 * this one which finds the cpumask already clear. Check and avoid
790 	 * underflowing the active_cpus count in that case. The race should
791 	 * not otherwise be a problem, but the TLB must be flushed because
792 	 * that's what the caller expects.
793 	 */
794 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
795 		atomic_dec(&mm->context.active_cpus);
796 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
797 		always_flush = true;
798 	}
799 
800 out:
801 	if (always_flush)
802 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
803 }
804 
805 #ifdef CONFIG_SMP
806 static void do_exit_flush_lazy_tlb(void *arg)
807 {
808 	struct mm_struct *mm = arg;
809 	exit_lazy_flush_tlb(mm, true);
810 }
811 
812 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
813 {
814 	/*
815 	 * Would be nice if this was async so it could be run in
816 	 * parallel with our local flush, but generic code does not
817 	 * give a good API for it. Could extend the generic code or
818 	 * make a special powerpc IPI for flushing TLBs.
819 	 * For now it's not too performance critical.
820 	 */
821 	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
822 				(void *)mm, 1);
823 }
824 
825 #else /* CONFIG_SMP */
826 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
827 #endif /* CONFIG_SMP */
828 
829 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
830 
831 /*
832  * Interval between flushes at which we send out IPIs to check whether the
833  * mm_cpumask can be trimmed for the case where it's not a single-threaded
834  * process flushing its own mm. The intent is to reduce the cost of later
835  * flushes. Don't want this to be so low that it adds noticable cost to TLB
836  * flushing, or so high that it doesn't help reduce global TLBIEs.
837  */
838 static unsigned long tlb_mm_cpumask_trim_timer = 1073;
839 
840 static bool tick_and_test_trim_clock(void)
841 {
842 	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
843 			tlb_mm_cpumask_trim_timer) {
844 		__this_cpu_write(mm_cpumask_trim_clock, 0);
845 		return true;
846 	}
847 	return false;
848 }
849 
850 enum tlb_flush_type {
851 	FLUSH_TYPE_NONE,
852 	FLUSH_TYPE_LOCAL,
853 	FLUSH_TYPE_GLOBAL,
854 };
855 
856 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
857 {
858 	int active_cpus = atomic_read(&mm->context.active_cpus);
859 	int cpu = smp_processor_id();
860 
861 	if (active_cpus == 0)
862 		return FLUSH_TYPE_NONE;
863 	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
864 		if (current->mm != mm) {
865 			/*
866 			 * Asynchronous flush sources may trim down to nothing
867 			 * if the process is not running, so occasionally try
868 			 * to trim.
869 			 */
870 			if (tick_and_test_trim_clock()) {
871 				exit_lazy_flush_tlb(mm, true);
872 				return FLUSH_TYPE_NONE;
873 			}
874 		}
875 		return FLUSH_TYPE_LOCAL;
876 	}
877 
878 	/* Coprocessors require TLBIE to invalidate nMMU. */
879 	if (atomic_read(&mm->context.copros) > 0)
880 		return FLUSH_TYPE_GLOBAL;
881 
882 	/*
883 	 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
884 	 * because the mm is being taken down anyway, and a TLBIE tends to
885 	 * be faster than an IPI+TLBIEL.
886 	 */
887 	if (fullmm)
888 		return FLUSH_TYPE_GLOBAL;
889 
890 	/*
891 	 * If we are running the only thread of a single-threaded process,
892 	 * then we should almost always be able to trim off the rest of the
893 	 * CPU mask (except in the case of use_mm() races), so always try
894 	 * trimming the mask.
895 	 */
896 	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
897 		exit_flush_lazy_tlbs(mm);
898 		/*
899 		 * use_mm() race could prevent IPIs from being able to clear
900 		 * the cpumask here, however those users are established
901 		 * after our first check (and so after the PTEs are removed),
902 		 * and the TLB still gets flushed by the IPI, so this CPU
903 		 * will only require a local flush.
904 		 */
905 		return FLUSH_TYPE_LOCAL;
906 	}
907 
908 	/*
909 	 * Occasionally try to trim down the cpumask. It's possible this can
910 	 * bring the mask to zero, which results in no flush.
911 	 */
912 	if (tick_and_test_trim_clock()) {
913 		exit_flush_lazy_tlbs(mm);
914 		if (current->mm == mm)
915 			return FLUSH_TYPE_LOCAL;
916 		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
917 			exit_lazy_flush_tlb(mm, true);
918 		return FLUSH_TYPE_NONE;
919 	}
920 
921 	return FLUSH_TYPE_GLOBAL;
922 }
923 
924 #ifdef CONFIG_SMP
925 void radix__flush_tlb_mm(struct mm_struct *mm)
926 {
927 	unsigned long pid;
928 	enum tlb_flush_type type;
929 
930 	pid = mm->context.id;
931 	if (unlikely(pid == MMU_NO_CONTEXT))
932 		return;
933 
934 	preempt_disable();
935 	/*
936 	 * Order loads of mm_cpumask (in flush_type_needed) vs previous
937 	 * stores to clear ptes before the invalidate. See barrier in
938 	 * switch_mm_irqs_off
939 	 */
940 	smp_mb();
941 	type = flush_type_needed(mm, false);
942 	if (type == FLUSH_TYPE_LOCAL) {
943 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
944 	} else if (type == FLUSH_TYPE_GLOBAL) {
945 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
946 			unsigned long tgt = H_RPTI_TARGET_CMMU;
947 
948 			if (atomic_read(&mm->context.copros) > 0)
949 				tgt |= H_RPTI_TARGET_NMMU;
950 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
951 					       H_RPTI_PAGE_ALL, 0, -1UL);
952 		} else if (cputlb_use_tlbie()) {
953 			if (mm_needs_flush_escalation(mm))
954 				_tlbie_pid(pid, RIC_FLUSH_ALL);
955 			else
956 				_tlbie_pid(pid, RIC_FLUSH_TLB);
957 		} else {
958 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
959 		}
960 	}
961 	preempt_enable();
962 }
963 EXPORT_SYMBOL(radix__flush_tlb_mm);
964 
965 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
966 {
967 	unsigned long pid;
968 	enum tlb_flush_type type;
969 
970 	pid = mm->context.id;
971 	if (unlikely(pid == MMU_NO_CONTEXT))
972 		return;
973 
974 	preempt_disable();
975 	smp_mb(); /* see radix__flush_tlb_mm */
976 	type = flush_type_needed(mm, fullmm);
977 	if (type == FLUSH_TYPE_LOCAL) {
978 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
979 	} else if (type == FLUSH_TYPE_GLOBAL) {
980 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
981 			unsigned long tgt = H_RPTI_TARGET_CMMU;
982 			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
983 					     H_RPTI_TYPE_PRT;
984 
985 			if (atomic_read(&mm->context.copros) > 0)
986 				tgt |= H_RPTI_TARGET_NMMU;
987 			pseries_rpt_invalidate(pid, tgt, type,
988 					       H_RPTI_PAGE_ALL, 0, -1UL);
989 		} else if (cputlb_use_tlbie())
990 			_tlbie_pid(pid, RIC_FLUSH_ALL);
991 		else
992 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
993 	}
994 	preempt_enable();
995 }
996 
997 void radix__flush_all_mm(struct mm_struct *mm)
998 {
999 	__flush_all_mm(mm, false);
1000 }
1001 EXPORT_SYMBOL(radix__flush_all_mm);
1002 
1003 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
1004 				 int psize)
1005 {
1006 	unsigned long pid;
1007 	enum tlb_flush_type type;
1008 
1009 	pid = mm->context.id;
1010 	if (unlikely(pid == MMU_NO_CONTEXT))
1011 		return;
1012 
1013 	preempt_disable();
1014 	smp_mb(); /* see radix__flush_tlb_mm */
1015 	type = flush_type_needed(mm, false);
1016 	if (type == FLUSH_TYPE_LOCAL) {
1017 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1018 	} else if (type == FLUSH_TYPE_GLOBAL) {
1019 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
1020 			unsigned long tgt, pg_sizes, size;
1021 
1022 			tgt = H_RPTI_TARGET_CMMU;
1023 			pg_sizes = psize_to_rpti_pgsize(psize);
1024 			size = 1UL << mmu_psize_to_shift(psize);
1025 
1026 			if (atomic_read(&mm->context.copros) > 0)
1027 				tgt |= H_RPTI_TARGET_NMMU;
1028 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
1029 					       pg_sizes, vmaddr,
1030 					       vmaddr + size);
1031 		} else if (cputlb_use_tlbie())
1032 			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1033 		else
1034 			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
1035 	}
1036 	preempt_enable();
1037 }
1038 
1039 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
1040 {
1041 #ifdef CONFIG_HUGETLB_PAGE
1042 	if (is_vm_hugetlb_page(vma))
1043 		return radix__flush_hugetlb_page(vma, vmaddr);
1044 #endif
1045 	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
1046 }
1047 EXPORT_SYMBOL(radix__flush_tlb_page);
1048 
1049 #endif /* CONFIG_SMP */
1050 
1051 static void do_tlbiel_kernel(void *info)
1052 {
1053 	_tlbiel_pid(0, RIC_FLUSH_ALL);
1054 }
1055 
1056 static inline void _tlbiel_kernel_broadcast(void)
1057 {
1058 	on_each_cpu(do_tlbiel_kernel, NULL, 1);
1059 	if (tlbie_capable) {
1060 		/*
1061 		 * Coherent accelerators don't refcount kernel memory mappings,
1062 		 * so have to always issue a tlbie for them. This is quite a
1063 		 * slow path anyway.
1064 		 */
1065 		_tlbie_pid(0, RIC_FLUSH_ALL);
1066 	}
1067 }
1068 
1069 /*
1070  * If kernel TLBIs ever become local rather than global, then
1071  * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
1072  * assumes kernel TLBIs are global.
1073  */
1074 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
1075 {
1076 	if (!mmu_has_feature(MMU_FTR_GTSE)) {
1077 		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
1078 		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1079 				     H_RPTI_TYPE_PRT;
1080 
1081 		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
1082 				       start, end);
1083 	} else if (cputlb_use_tlbie())
1084 		_tlbie_pid(0, RIC_FLUSH_ALL);
1085 	else
1086 		_tlbiel_kernel_broadcast();
1087 }
1088 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
1089 
1090 #define TLB_FLUSH_ALL -1UL
1091 
1092 /*
1093  * Number of pages above which we invalidate the entire PID rather than
1094  * flush individual pages, for local and global flushes respectively.
1095  *
1096  * tlbie goes out to the interconnect and individual ops are more costly.
1097  * It also does not iterate over sets like the local tlbiel variant when
1098  * invalidating a full PID, so it has a far lower threshold to change from
1099  * individual page flushes to full-pid flushes.
1100  */
1101 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
1102 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
1103 
1104 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
1105 					    unsigned long start, unsigned long end)
1106 
1107 {
1108 	unsigned long pid;
1109 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
1110 	unsigned long page_size = 1UL << page_shift;
1111 	unsigned long nr_pages = (end - start) >> page_shift;
1112 	bool fullmm = (end == TLB_FLUSH_ALL);
1113 	bool flush_pid;
1114 	enum tlb_flush_type type;
1115 
1116 	pid = mm->context.id;
1117 	if (unlikely(pid == MMU_NO_CONTEXT))
1118 		return;
1119 
1120 	preempt_disable();
1121 	smp_mb(); /* see radix__flush_tlb_mm */
1122 	type = flush_type_needed(mm, fullmm);
1123 	if (type == FLUSH_TYPE_NONE)
1124 		goto out;
1125 
1126 	if (fullmm)
1127 		flush_pid = true;
1128 	else if (type == FLUSH_TYPE_GLOBAL)
1129 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1130 	else
1131 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1132 
1133 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1134 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1135 		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1136 
1137 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1138 			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1139 		if (atomic_read(&mm->context.copros) > 0)
1140 			tgt |= H_RPTI_TARGET_NMMU;
1141 		pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
1142 				       start, end);
1143 	} else if (flush_pid) {
1144 		if (type == FLUSH_TYPE_LOCAL) {
1145 			_tlbiel_pid(pid, RIC_FLUSH_TLB);
1146 		} else {
1147 			if (cputlb_use_tlbie()) {
1148 				if (mm_needs_flush_escalation(mm))
1149 					_tlbie_pid(pid, RIC_FLUSH_ALL);
1150 				else
1151 					_tlbie_pid(pid, RIC_FLUSH_TLB);
1152 			} else {
1153 				_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
1154 			}
1155 		}
1156 	} else {
1157 		bool hflush = false;
1158 		unsigned long hstart, hend;
1159 
1160 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
1161 			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1162 			hend = end & PMD_MASK;
1163 			if (hstart < hend)
1164 				hflush = true;
1165 		}
1166 
1167 		if (type == FLUSH_TYPE_LOCAL) {
1168 			asm volatile("ptesync": : :"memory");
1169 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1170 			if (hflush)
1171 				__tlbiel_va_range(hstart, hend, pid,
1172 						PMD_SIZE, MMU_PAGE_2M);
1173 			ppc_after_tlbiel_barrier();
1174 		} else if (cputlb_use_tlbie()) {
1175 			asm volatile("ptesync": : :"memory");
1176 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1177 			if (hflush)
1178 				__tlbie_va_range(hstart, hend, pid,
1179 						PMD_SIZE, MMU_PAGE_2M);
1180 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
1181 		} else {
1182 			_tlbiel_va_range_multicast(mm,
1183 					start, end, pid, page_size, mmu_virtual_psize, false);
1184 			if (hflush)
1185 				_tlbiel_va_range_multicast(mm,
1186 					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
1187 		}
1188 	}
1189 out:
1190 	preempt_enable();
1191 }
1192 
1193 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1194 		     unsigned long end)
1195 
1196 {
1197 #ifdef CONFIG_HUGETLB_PAGE
1198 	if (is_vm_hugetlb_page(vma))
1199 		return radix__flush_hugetlb_tlb_range(vma, start, end);
1200 #endif
1201 
1202 	__radix__flush_tlb_range(vma->vm_mm, start, end);
1203 }
1204 EXPORT_SYMBOL(radix__flush_tlb_range);
1205 
1206 static int radix_get_mmu_psize(int page_size)
1207 {
1208 	int psize;
1209 
1210 	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1211 		psize = mmu_virtual_psize;
1212 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1213 		psize = MMU_PAGE_2M;
1214 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1215 		psize = MMU_PAGE_1G;
1216 	else
1217 		return -1;
1218 	return psize;
1219 }
1220 
1221 /*
1222  * Flush partition scoped LPID address translation for all CPUs.
1223  */
1224 void radix__flush_tlb_lpid_page(unsigned int lpid,
1225 					unsigned long addr,
1226 					unsigned long page_size)
1227 {
1228 	int psize = radix_get_mmu_psize(page_size);
1229 
1230 	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1231 }
1232 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1233 
1234 /*
1235  * Flush partition scoped PWC from LPID for all CPUs.
1236  */
1237 void radix__flush_pwc_lpid(unsigned int lpid)
1238 {
1239 	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
1240 }
1241 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1242 
1243 /*
1244  * Flush partition scoped translations from LPID (=LPIDR)
1245  */
1246 void radix__flush_all_lpid(unsigned int lpid)
1247 {
1248 	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
1249 }
1250 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1251 
1252 /*
1253  * Flush process scoped translations from LPID (=LPIDR)
1254  */
1255 void radix__flush_all_lpid_guest(unsigned int lpid)
1256 {
1257 	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1258 }
1259 
1260 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1261 				  unsigned long end, int psize);
1262 
1263 void radix__tlb_flush(struct mmu_gather *tlb)
1264 {
1265 	int psize = 0;
1266 	struct mm_struct *mm = tlb->mm;
1267 	int page_size = tlb->page_size;
1268 	unsigned long start = tlb->start;
1269 	unsigned long end = tlb->end;
1270 
1271 	/*
1272 	 * if page size is not something we understand, do a full mm flush
1273 	 *
1274 	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1275 	 * that flushes the process table entry cache upon process teardown.
1276 	 * See the comment for radix in arch_exit_mmap().
1277 	 */
1278 	if (tlb->fullmm || tlb->need_flush_all) {
1279 		__flush_all_mm(mm, true);
1280 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1281 		if (!tlb->freed_tables)
1282 			radix__flush_tlb_mm(mm);
1283 		else
1284 			radix__flush_all_mm(mm);
1285 	} else {
1286 		if (!tlb->freed_tables)
1287 			radix__flush_tlb_range_psize(mm, start, end, psize);
1288 		else
1289 			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1290 	}
1291 }
1292 
1293 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1294 				unsigned long start, unsigned long end,
1295 				int psize, bool also_pwc)
1296 {
1297 	unsigned long pid;
1298 	unsigned int page_shift = mmu_psize_defs[psize].shift;
1299 	unsigned long page_size = 1UL << page_shift;
1300 	unsigned long nr_pages = (end - start) >> page_shift;
1301 	bool fullmm = (end == TLB_FLUSH_ALL);
1302 	bool flush_pid;
1303 	enum tlb_flush_type type;
1304 
1305 	pid = mm->context.id;
1306 	if (unlikely(pid == MMU_NO_CONTEXT))
1307 		return;
1308 
1309 	fullmm = (end == TLB_FLUSH_ALL);
1310 
1311 	preempt_disable();
1312 	smp_mb(); /* see radix__flush_tlb_mm */
1313 	type = flush_type_needed(mm, fullmm);
1314 	if (type == FLUSH_TYPE_NONE)
1315 		goto out;
1316 
1317 	if (fullmm)
1318 		flush_pid = true;
1319 	else if (type == FLUSH_TYPE_GLOBAL)
1320 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1321 	else
1322 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1323 
1324 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1325 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1326 		unsigned long type = H_RPTI_TYPE_TLB;
1327 		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1328 
1329 		if (also_pwc)
1330 			type |= H_RPTI_TYPE_PWC;
1331 		if (atomic_read(&mm->context.copros) > 0)
1332 			tgt |= H_RPTI_TARGET_NMMU;
1333 		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1334 	} else if (flush_pid) {
1335 		if (type == FLUSH_TYPE_LOCAL) {
1336 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1337 		} else {
1338 			if (cputlb_use_tlbie()) {
1339 				if (mm_needs_flush_escalation(mm))
1340 					also_pwc = true;
1341 
1342 				_tlbie_pid(pid,
1343 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1344 			} else {
1345 				_tlbiel_pid_multicast(mm, pid,
1346 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1347 			}
1348 
1349 		}
1350 	} else {
1351 		if (type == FLUSH_TYPE_LOCAL)
1352 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1353 		else if (cputlb_use_tlbie())
1354 			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1355 		else
1356 			_tlbiel_va_range_multicast(mm,
1357 					start, end, pid, page_size, psize, also_pwc);
1358 	}
1359 out:
1360 	preempt_enable();
1361 }
1362 
1363 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1364 				  unsigned long end, int psize)
1365 {
1366 	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1367 }
1368 
1369 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1370 				  unsigned long end, int psize)
1371 {
1372 	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
1373 }
1374 
1375 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1376 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1377 {
1378 	unsigned long pid, end;
1379 	enum tlb_flush_type type;
1380 
1381 	pid = mm->context.id;
1382 	if (unlikely(pid == MMU_NO_CONTEXT))
1383 		return;
1384 
1385 	/* 4k page size, just blow the world */
1386 	if (PAGE_SIZE == 0x1000) {
1387 		radix__flush_all_mm(mm);
1388 		return;
1389 	}
1390 
1391 	end = addr + HPAGE_PMD_SIZE;
1392 
1393 	/* Otherwise first do the PWC, then iterate the pages. */
1394 	preempt_disable();
1395 	smp_mb(); /* see radix__flush_tlb_mm */
1396 	type = flush_type_needed(mm, false);
1397 	if (type == FLUSH_TYPE_LOCAL) {
1398 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1399 	} else if (type == FLUSH_TYPE_GLOBAL) {
1400 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
1401 			unsigned long tgt, type, pg_sizes;
1402 
1403 			tgt = H_RPTI_TARGET_CMMU;
1404 			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1405 			       H_RPTI_TYPE_PRT;
1406 			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1407 
1408 			if (atomic_read(&mm->context.copros) > 0)
1409 				tgt |= H_RPTI_TARGET_NMMU;
1410 			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1411 					       addr, end);
1412 		} else if (cputlb_use_tlbie())
1413 			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1414 		else
1415 			_tlbiel_va_range_multicast(mm,
1416 					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1417 	}
1418 
1419 	preempt_enable();
1420 }
1421 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1422 
1423 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1424 				unsigned long start, unsigned long end)
1425 {
1426 	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1427 }
1428 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1429 
1430 void radix__flush_tlb_all(void)
1431 {
1432 	unsigned long rb,prs,r,rs;
1433 	unsigned long ric = RIC_FLUSH_ALL;
1434 
1435 	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1436 	prs = 0; /* partition scoped */
1437 	r = 1;   /* radix format */
1438 	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1439 
1440 	asm volatile("ptesync": : :"memory");
1441 	/*
1442 	 * now flush guest entries by passing PRS = 1 and LPID != 0
1443 	 */
1444 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1445 		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1446 	/*
1447 	 * now flush host entires by passing PRS = 0 and LPID == 0
1448 	 */
1449 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1450 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1451 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
1452 }
1453 
1454 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1455 /*
1456  * Performs process-scoped invalidations for a given LPID
1457  * as part of H_RPT_INVALIDATE hcall.
1458  */
1459 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
1460 			     unsigned long type, unsigned long pg_sizes,
1461 			     unsigned long start, unsigned long end)
1462 {
1463 	unsigned long psize, nr_pages;
1464 	struct mmu_psize_def *def;
1465 	bool flush_pid;
1466 
1467 	/*
1468 	 * A H_RPTI_TYPE_ALL request implies RIC=3, hence
1469 	 * do a single IS=1 based flush.
1470 	 */
1471 	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
1472 		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
1473 		return;
1474 	}
1475 
1476 	if (type & H_RPTI_TYPE_PWC)
1477 		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
1478 
1479 	/* Full PID flush */
1480 	if (start == 0 && end == -1)
1481 		return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1482 
1483 	/* Do range invalidation for all the valid page sizes */
1484 	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
1485 		def = &mmu_psize_defs[psize];
1486 		if (!(pg_sizes & def->h_rpt_pgsize))
1487 			continue;
1488 
1489 		nr_pages = (end - start) >> def->shift;
1490 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1491 
1492 		/*
1493 		 * If the number of pages spanning the range is above
1494 		 * the ceiling, convert the request into a full PID flush.
1495 		 * And since PID flush takes out all the page sizes, there
1496 		 * is no need to consider remaining page sizes.
1497 		 */
1498 		if (flush_pid) {
1499 			_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1500 			return;
1501 		}
1502 		_tlbie_va_range_lpid(start, end, pid, lpid,
1503 				     (1UL << def->shift), psize, false);
1504 	}
1505 }
1506 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
1507 
1508 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1509