1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * TLB flush routines for radix kernels.
4  *
5  * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
13 
14 #include <asm/ppc-opcode.h>
15 #include <asm/tlb.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 #include <asm/plpar_wrappers.h>
20 
21 #include "internal.h"
22 
23 /*
24  * tlbiel instruction for radix, set invalidation
25  * i.e., r=1 and is=01 or is=10 or is=11
26  */
27 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
28 					unsigned int pid,
29 					unsigned int ric, unsigned int prs)
30 {
31 	unsigned long rb;
32 	unsigned long rs;
33 
34 	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
35 	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
36 
37 	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
38 		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
39 		     : "memory");
40 }
41 
42 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
43 {
44 	unsigned int set;
45 
46 	asm volatile("ptesync": : :"memory");
47 
48 	/*
49 	 * Flush the first set of the TLB, and the entire Page Walk Cache
50 	 * and partition table entries. Then flush the remaining sets of the
51 	 * TLB.
52 	 */
53 
54 	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
55 		/* MSR[HV] should flush partition scope translations first. */
56 		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
57 
58 		if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
59 			for (set = 1; set < num_sets; set++)
60 				tlbiel_radix_set_isa300(set, is, 0,
61 							RIC_FLUSH_TLB, 0);
62 		}
63 	}
64 
65 	/* Flush process scoped entries. */
66 	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
67 
68 	if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
69 		for (set = 1; set < num_sets; set++)
70 			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
71 	}
72 
73 	ppc_after_tlbiel_barrier();
74 }
75 
76 void radix__tlbiel_all(unsigned int action)
77 {
78 	unsigned int is;
79 
80 	switch (action) {
81 	case TLB_INVAL_SCOPE_GLOBAL:
82 		is = 3;
83 		break;
84 	case TLB_INVAL_SCOPE_LPID:
85 		is = 2;
86 		break;
87 	default:
88 		BUG();
89 	}
90 
91 	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
92 		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
93 	else
94 		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
95 
96 	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
97 }
98 
99 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
100 				unsigned long ric)
101 {
102 	unsigned long rb,rs,prs,r;
103 
104 	rb = PPC_BIT(53); /* IS = 1 */
105 	rb |= set << PPC_BITLSHIFT(51);
106 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
107 	prs = 1; /* process scoped */
108 	r = 1;   /* radix format */
109 
110 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
111 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
112 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
113 }
114 
115 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
116 {
117 	unsigned long rb,rs,prs,r;
118 
119 	rb = PPC_BIT(53); /* IS = 1 */
120 	rs = pid << PPC_BITLSHIFT(31);
121 	prs = 1; /* process scoped */
122 	r = 1;   /* radix format */
123 
124 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
125 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
126 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
127 }
128 
129 static __always_inline void __tlbie_pid_lpid(unsigned long pid,
130 					     unsigned long lpid,
131 					     unsigned long ric)
132 {
133 	unsigned long rb, rs, prs, r;
134 
135 	rb = PPC_BIT(53); /* IS = 1 */
136 	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
137 	prs = 1; /* process scoped */
138 	r = 1;   /* radix format */
139 
140 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
141 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
142 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
143 }
144 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
145 {
146 	unsigned long rb,rs,prs,r;
147 
148 	rb = PPC_BIT(52); /* IS = 2 */
149 	rs = lpid;
150 	prs = 0; /* partition scoped */
151 	r = 1;   /* radix format */
152 
153 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
154 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
155 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
156 }
157 
158 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
159 {
160 	unsigned long rb,rs,prs,r;
161 
162 	rb = PPC_BIT(52); /* IS = 2 */
163 	rs = lpid;
164 	prs = 1; /* process scoped */
165 	r = 1;   /* radix format */
166 
167 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
168 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
169 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
170 }
171 
172 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
173 					unsigned long ap, unsigned long ric)
174 {
175 	unsigned long rb,rs,prs,r;
176 
177 	rb = va & ~(PPC_BITMASK(52, 63));
178 	rb |= ap << PPC_BITLSHIFT(58);
179 	rs = pid << PPC_BITLSHIFT(31);
180 	prs = 1; /* process scoped */
181 	r = 1;   /* radix format */
182 
183 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
184 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
185 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
186 }
187 
188 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
189 				       unsigned long ap, unsigned long ric)
190 {
191 	unsigned long rb,rs,prs,r;
192 
193 	rb = va & ~(PPC_BITMASK(52, 63));
194 	rb |= ap << PPC_BITLSHIFT(58);
195 	rs = pid << PPC_BITLSHIFT(31);
196 	prs = 1; /* process scoped */
197 	r = 1;   /* radix format */
198 
199 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
200 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
201 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
202 }
203 
204 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
205 					    unsigned long lpid,
206 					    unsigned long ap, unsigned long ric)
207 {
208 	unsigned long rb, rs, prs, r;
209 
210 	rb = va & ~(PPC_BITMASK(52, 63));
211 	rb |= ap << PPC_BITLSHIFT(58);
212 	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
213 	prs = 1; /* process scoped */
214 	r = 1;   /* radix format */
215 
216 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
217 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
218 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
219 }
220 
221 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
222 					    unsigned long ap, unsigned long ric)
223 {
224 	unsigned long rb,rs,prs,r;
225 
226 	rb = va & ~(PPC_BITMASK(52, 63));
227 	rb |= ap << PPC_BITLSHIFT(58);
228 	rs = lpid;
229 	prs = 0; /* partition scoped */
230 	r = 1;   /* radix format */
231 
232 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
233 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
234 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
235 }
236 
237 
238 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
239 				  unsigned long ap)
240 {
241 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
242 		asm volatile("ptesync": : :"memory");
243 		__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
244 	}
245 
246 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
247 		asm volatile("ptesync": : :"memory");
248 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
249 	}
250 }
251 
252 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
253 					unsigned long ap)
254 {
255 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
256 		asm volatile("ptesync": : :"memory");
257 		__tlbie_pid(0, RIC_FLUSH_TLB);
258 	}
259 
260 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
261 		asm volatile("ptesync": : :"memory");
262 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
263 	}
264 }
265 
266 static inline void fixup_tlbie_va_range_lpid(unsigned long va,
267 					     unsigned long pid,
268 					     unsigned long lpid,
269 					     unsigned long ap)
270 {
271 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
272 		asm volatile("ptesync" : : : "memory");
273 		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
274 	}
275 
276 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
277 		asm volatile("ptesync" : : : "memory");
278 		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
279 	}
280 }
281 
282 static inline void fixup_tlbie_pid(unsigned long pid)
283 {
284 	/*
285 	 * We can use any address for the invalidation, pick one which is
286 	 * probably unused as an optimisation.
287 	 */
288 	unsigned long va = ((1UL << 52) - 1);
289 
290 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
291 		asm volatile("ptesync": : :"memory");
292 		__tlbie_pid(0, RIC_FLUSH_TLB);
293 	}
294 
295 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
296 		asm volatile("ptesync": : :"memory");
297 		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
298 	}
299 }
300 
301 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
302 {
303 	/*
304 	 * We can use any address for the invalidation, pick one which is
305 	 * probably unused as an optimisation.
306 	 */
307 	unsigned long va = ((1UL << 52) - 1);
308 
309 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
310 		asm volatile("ptesync" : : : "memory");
311 		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
312 	}
313 
314 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
315 		asm volatile("ptesync" : : : "memory");
316 		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
317 				RIC_FLUSH_TLB);
318 	}
319 }
320 
321 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
322 				       unsigned long ap)
323 {
324 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
325 		asm volatile("ptesync": : :"memory");
326 		__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
327 	}
328 
329 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
330 		asm volatile("ptesync": : :"memory");
331 		__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
332 	}
333 }
334 
335 static inline void fixup_tlbie_lpid(unsigned long lpid)
336 {
337 	/*
338 	 * We can use any address for the invalidation, pick one which is
339 	 * probably unused as an optimisation.
340 	 */
341 	unsigned long va = ((1UL << 52) - 1);
342 
343 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
344 		asm volatile("ptesync": : :"memory");
345 		__tlbie_lpid(0, RIC_FLUSH_TLB);
346 	}
347 
348 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
349 		asm volatile("ptesync": : :"memory");
350 		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
351 	}
352 }
353 
354 /*
355  * We use 128 set in radix mode and 256 set in hpt mode.
356  */
357 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
358 {
359 	int set;
360 
361 	asm volatile("ptesync": : :"memory");
362 
363 	switch (ric) {
364 	case RIC_FLUSH_PWC:
365 
366 		/* For PWC, only one flush is needed */
367 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
368 		ppc_after_tlbiel_barrier();
369 		return;
370 	case RIC_FLUSH_TLB:
371 		__tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
372 		break;
373 	case RIC_FLUSH_ALL:
374 	default:
375 		/*
376 		 * Flush the first set of the TLB, and if
377 		 * we're doing a RIC_FLUSH_ALL, also flush
378 		 * the entire Page Walk Cache.
379 		 */
380 		__tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
381 	}
382 
383 	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
384 		/* For the remaining sets, just flush the TLB */
385 		for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
386 			__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
387 	}
388 
389 	ppc_after_tlbiel_barrier();
390 	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
391 }
392 
393 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
394 {
395 	asm volatile("ptesync": : :"memory");
396 
397 	/*
398 	 * Workaround the fact that the "ric" argument to __tlbie_pid
399 	 * must be a compile-time contraint to match the "i" constraint
400 	 * in the asm statement.
401 	 */
402 	switch (ric) {
403 	case RIC_FLUSH_TLB:
404 		__tlbie_pid(pid, RIC_FLUSH_TLB);
405 		fixup_tlbie_pid(pid);
406 		break;
407 	case RIC_FLUSH_PWC:
408 		__tlbie_pid(pid, RIC_FLUSH_PWC);
409 		break;
410 	case RIC_FLUSH_ALL:
411 	default:
412 		__tlbie_pid(pid, RIC_FLUSH_ALL);
413 		fixup_tlbie_pid(pid);
414 	}
415 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
416 }
417 
418 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
419 				   unsigned long ric)
420 {
421 	asm volatile("ptesync" : : : "memory");
422 
423 	/*
424 	 * Workaround the fact that the "ric" argument to __tlbie_pid
425 	 * must be a compile-time contraint to match the "i" constraint
426 	 * in the asm statement.
427 	 */
428 	switch (ric) {
429 	case RIC_FLUSH_TLB:
430 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
431 		fixup_tlbie_pid_lpid(pid, lpid);
432 		break;
433 	case RIC_FLUSH_PWC:
434 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
435 		break;
436 	case RIC_FLUSH_ALL:
437 	default:
438 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
439 		fixup_tlbie_pid_lpid(pid, lpid);
440 	}
441 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
442 }
443 struct tlbiel_pid {
444 	unsigned long pid;
445 	unsigned long ric;
446 };
447 
448 static void do_tlbiel_pid(void *info)
449 {
450 	struct tlbiel_pid *t = info;
451 
452 	if (t->ric == RIC_FLUSH_TLB)
453 		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
454 	else if (t->ric == RIC_FLUSH_PWC)
455 		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
456 	else
457 		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
458 }
459 
460 static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
461 				unsigned long pid, unsigned long ric)
462 {
463 	struct cpumask *cpus = mm_cpumask(mm);
464 	struct tlbiel_pid t = { .pid = pid, .ric = ric };
465 
466 	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
467 	/*
468 	 * Always want the CPU translations to be invalidated with tlbiel in
469 	 * these paths, so while coprocessors must use tlbie, we can not
470 	 * optimise away the tlbiel component.
471 	 */
472 	if (atomic_read(&mm->context.copros) > 0)
473 		_tlbie_pid(pid, RIC_FLUSH_ALL);
474 }
475 
476 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
477 {
478 	asm volatile("ptesync": : :"memory");
479 
480 	/*
481 	 * Workaround the fact that the "ric" argument to __tlbie_pid
482 	 * must be a compile-time contraint to match the "i" constraint
483 	 * in the asm statement.
484 	 */
485 	switch (ric) {
486 	case RIC_FLUSH_TLB:
487 		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
488 		fixup_tlbie_lpid(lpid);
489 		break;
490 	case RIC_FLUSH_PWC:
491 		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
492 		break;
493 	case RIC_FLUSH_ALL:
494 	default:
495 		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
496 		fixup_tlbie_lpid(lpid);
497 	}
498 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
499 }
500 
501 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
502 {
503 	/*
504 	 * Workaround the fact that the "ric" argument to __tlbie_pid
505 	 * must be a compile-time contraint to match the "i" constraint
506 	 * in the asm statement.
507 	 */
508 	switch (ric) {
509 	case RIC_FLUSH_TLB:
510 		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
511 		break;
512 	case RIC_FLUSH_PWC:
513 		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
514 		break;
515 	case RIC_FLUSH_ALL:
516 	default:
517 		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
518 	}
519 	fixup_tlbie_lpid(lpid);
520 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
521 }
522 
523 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
524 				    unsigned long pid, unsigned long page_size,
525 				    unsigned long psize)
526 {
527 	unsigned long addr;
528 	unsigned long ap = mmu_get_ap(psize);
529 
530 	for (addr = start; addr < end; addr += page_size)
531 		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
532 }
533 
534 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
535 				       unsigned long psize, unsigned long ric)
536 {
537 	unsigned long ap = mmu_get_ap(psize);
538 
539 	asm volatile("ptesync": : :"memory");
540 	__tlbiel_va(va, pid, ap, ric);
541 	ppc_after_tlbiel_barrier();
542 }
543 
544 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
545 				    unsigned long pid, unsigned long page_size,
546 				    unsigned long psize, bool also_pwc)
547 {
548 	asm volatile("ptesync": : :"memory");
549 	if (also_pwc)
550 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
551 	__tlbiel_va_range(start, end, pid, page_size, psize);
552 	ppc_after_tlbiel_barrier();
553 }
554 
555 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
556 				    unsigned long pid, unsigned long page_size,
557 				    unsigned long psize)
558 {
559 	unsigned long addr;
560 	unsigned long ap = mmu_get_ap(psize);
561 
562 	for (addr = start; addr < end; addr += page_size)
563 		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
564 
565 	fixup_tlbie_va_range(addr - page_size, pid, ap);
566 }
567 
568 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
569 					 unsigned long pid, unsigned long lpid,
570 					 unsigned long page_size,
571 					 unsigned long psize)
572 {
573 	unsigned long addr;
574 	unsigned long ap = mmu_get_ap(psize);
575 
576 	for (addr = start; addr < end; addr += page_size)
577 		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
578 
579 	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
580 }
581 
582 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
583 				      unsigned long psize, unsigned long ric)
584 {
585 	unsigned long ap = mmu_get_ap(psize);
586 
587 	asm volatile("ptesync": : :"memory");
588 	__tlbie_va(va, pid, ap, ric);
589 	fixup_tlbie_va(va, pid, ap);
590 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
591 }
592 
593 struct tlbiel_va {
594 	unsigned long pid;
595 	unsigned long va;
596 	unsigned long psize;
597 	unsigned long ric;
598 };
599 
600 static void do_tlbiel_va(void *info)
601 {
602 	struct tlbiel_va *t = info;
603 
604 	if (t->ric == RIC_FLUSH_TLB)
605 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
606 	else if (t->ric == RIC_FLUSH_PWC)
607 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
608 	else
609 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
610 }
611 
612 static inline void _tlbiel_va_multicast(struct mm_struct *mm,
613 				unsigned long va, unsigned long pid,
614 				unsigned long psize, unsigned long ric)
615 {
616 	struct cpumask *cpus = mm_cpumask(mm);
617 	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
618 	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
619 	if (atomic_read(&mm->context.copros) > 0)
620 		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
621 }
622 
623 struct tlbiel_va_range {
624 	unsigned long pid;
625 	unsigned long start;
626 	unsigned long end;
627 	unsigned long page_size;
628 	unsigned long psize;
629 	bool also_pwc;
630 };
631 
632 static void do_tlbiel_va_range(void *info)
633 {
634 	struct tlbiel_va_range *t = info;
635 
636 	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
637 				    t->psize, t->also_pwc);
638 }
639 
640 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
641 			      unsigned long psize, unsigned long ric)
642 {
643 	unsigned long ap = mmu_get_ap(psize);
644 
645 	asm volatile("ptesync": : :"memory");
646 	__tlbie_lpid_va(va, lpid, ap, ric);
647 	fixup_tlbie_lpid_va(va, lpid, ap);
648 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
649 }
650 
651 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
652 				    unsigned long pid, unsigned long page_size,
653 				    unsigned long psize, bool also_pwc)
654 {
655 	asm volatile("ptesync": : :"memory");
656 	if (also_pwc)
657 		__tlbie_pid(pid, RIC_FLUSH_PWC);
658 	__tlbie_va_range(start, end, pid, page_size, psize);
659 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
660 }
661 
662 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
663 					unsigned long pid, unsigned long lpid,
664 					unsigned long page_size,
665 					unsigned long psize, bool also_pwc)
666 {
667 	asm volatile("ptesync" : : : "memory");
668 	if (also_pwc)
669 		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
670 	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
671 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
672 }
673 
674 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
675 				unsigned long start, unsigned long end,
676 				unsigned long pid, unsigned long page_size,
677 				unsigned long psize, bool also_pwc)
678 {
679 	struct cpumask *cpus = mm_cpumask(mm);
680 	struct tlbiel_va_range t = { .start = start, .end = end,
681 				.pid = pid, .page_size = page_size,
682 				.psize = psize, .also_pwc = also_pwc };
683 
684 	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
685 	if (atomic_read(&mm->context.copros) > 0)
686 		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
687 }
688 
689 /*
690  * Base TLB flushing operations:
691  *
692  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
693  *  - flush_tlb_page(vma, vmaddr) flushes one page
694  *  - flush_tlb_range(vma, start, end) flushes a range of pages
695  *  - flush_tlb_kernel_range(start, end) flushes kernel pages
696  *
697  *  - local_* variants of page and mm only apply to the current
698  *    processor
699  */
700 void radix__local_flush_tlb_mm(struct mm_struct *mm)
701 {
702 	unsigned long pid;
703 
704 	preempt_disable();
705 	pid = mm->context.id;
706 	if (pid != MMU_NO_CONTEXT)
707 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
708 	preempt_enable();
709 }
710 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
711 
712 #ifndef CONFIG_SMP
713 void radix__local_flush_all_mm(struct mm_struct *mm)
714 {
715 	unsigned long pid;
716 
717 	preempt_disable();
718 	pid = mm->context.id;
719 	if (pid != MMU_NO_CONTEXT)
720 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
721 	preempt_enable();
722 }
723 EXPORT_SYMBOL(radix__local_flush_all_mm);
724 
725 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
726 {
727 	radix__local_flush_all_mm(mm);
728 }
729 #endif /* CONFIG_SMP */
730 
731 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
732 				       int psize)
733 {
734 	unsigned long pid;
735 
736 	preempt_disable();
737 	pid = mm->context.id;
738 	if (pid != MMU_NO_CONTEXT)
739 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
740 	preempt_enable();
741 }
742 
743 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
744 {
745 #ifdef CONFIG_HUGETLB_PAGE
746 	/* need the return fix for nohash.c */
747 	if (is_vm_hugetlb_page(vma))
748 		return radix__local_flush_hugetlb_page(vma, vmaddr);
749 #endif
750 	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
751 }
752 EXPORT_SYMBOL(radix__local_flush_tlb_page);
753 
754 static bool mm_needs_flush_escalation(struct mm_struct *mm)
755 {
756 	/*
757 	 * P9 nest MMU has issues with the page walk cache
758 	 * caching PTEs and not flushing them properly when
759 	 * RIC = 0 for a PID/LPID invalidate
760 	 */
761 	if (atomic_read(&mm->context.copros) > 0)
762 		return true;
763 	return false;
764 }
765 
766 /*
767  * If always_flush is true, then flush even if this CPU can't be removed
768  * from mm_cpumask.
769  */
770 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
771 {
772 	unsigned long pid = mm->context.id;
773 	int cpu = smp_processor_id();
774 
775 	/*
776 	 * A kthread could have done a mmget_not_zero() after the flushing CPU
777 	 * checked mm_cpumask, and be in the process of kthread_use_mm when
778 	 * interrupted here. In that case, current->mm will be set to mm,
779 	 * because kthread_use_mm() setting ->mm and switching to the mm is
780 	 * done with interrupts off.
781 	 */
782 	if (current->mm == mm)
783 		goto out;
784 
785 	if (current->active_mm == mm) {
786 		WARN_ON_ONCE(current->mm != NULL);
787 		/* Is a kernel thread and is using mm as the lazy tlb */
788 		mmgrab(&init_mm);
789 		current->active_mm = &init_mm;
790 		switch_mm_irqs_off(mm, &init_mm, current);
791 		mmdrop(mm);
792 	}
793 
794 	/*
795 	 * This IPI may be initiated from any source including those not
796 	 * running the mm, so there may be a racing IPI that comes after
797 	 * this one which finds the cpumask already clear. Check and avoid
798 	 * underflowing the active_cpus count in that case. The race should
799 	 * not otherwise be a problem, but the TLB must be flushed because
800 	 * that's what the caller expects.
801 	 */
802 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
803 		atomic_dec(&mm->context.active_cpus);
804 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
805 		always_flush = true;
806 	}
807 
808 out:
809 	if (always_flush)
810 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
811 }
812 
813 #ifdef CONFIG_SMP
814 static void do_exit_flush_lazy_tlb(void *arg)
815 {
816 	struct mm_struct *mm = arg;
817 	exit_lazy_flush_tlb(mm, true);
818 }
819 
820 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
821 {
822 	/*
823 	 * Would be nice if this was async so it could be run in
824 	 * parallel with our local flush, but generic code does not
825 	 * give a good API for it. Could extend the generic code or
826 	 * make a special powerpc IPI for flushing TLBs.
827 	 * For now it's not too performance critical.
828 	 */
829 	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
830 				(void *)mm, 1);
831 }
832 
833 #else /* CONFIG_SMP */
834 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
835 #endif /* CONFIG_SMP */
836 
837 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
838 
839 /*
840  * Interval between flushes at which we send out IPIs to check whether the
841  * mm_cpumask can be trimmed for the case where it's not a single-threaded
842  * process flushing its own mm. The intent is to reduce the cost of later
843  * flushes. Don't want this to be so low that it adds noticable cost to TLB
844  * flushing, or so high that it doesn't help reduce global TLBIEs.
845  */
846 static unsigned long tlb_mm_cpumask_trim_timer = 1073;
847 
848 static bool tick_and_test_trim_clock(void)
849 {
850 	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
851 			tlb_mm_cpumask_trim_timer) {
852 		__this_cpu_write(mm_cpumask_trim_clock, 0);
853 		return true;
854 	}
855 	return false;
856 }
857 
858 enum tlb_flush_type {
859 	FLUSH_TYPE_NONE,
860 	FLUSH_TYPE_LOCAL,
861 	FLUSH_TYPE_GLOBAL,
862 };
863 
864 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
865 {
866 	int active_cpus = atomic_read(&mm->context.active_cpus);
867 	int cpu = smp_processor_id();
868 
869 	if (active_cpus == 0)
870 		return FLUSH_TYPE_NONE;
871 	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
872 		if (current->mm != mm) {
873 			/*
874 			 * Asynchronous flush sources may trim down to nothing
875 			 * if the process is not running, so occasionally try
876 			 * to trim.
877 			 */
878 			if (tick_and_test_trim_clock()) {
879 				exit_lazy_flush_tlb(mm, true);
880 				return FLUSH_TYPE_NONE;
881 			}
882 		}
883 		return FLUSH_TYPE_LOCAL;
884 	}
885 
886 	/* Coprocessors require TLBIE to invalidate nMMU. */
887 	if (atomic_read(&mm->context.copros) > 0)
888 		return FLUSH_TYPE_GLOBAL;
889 
890 	/*
891 	 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
892 	 * because the mm is being taken down anyway, and a TLBIE tends to
893 	 * be faster than an IPI+TLBIEL.
894 	 */
895 	if (fullmm)
896 		return FLUSH_TYPE_GLOBAL;
897 
898 	/*
899 	 * If we are running the only thread of a single-threaded process,
900 	 * then we should almost always be able to trim off the rest of the
901 	 * CPU mask (except in the case of use_mm() races), so always try
902 	 * trimming the mask.
903 	 */
904 	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
905 		exit_flush_lazy_tlbs(mm);
906 		/*
907 		 * use_mm() race could prevent IPIs from being able to clear
908 		 * the cpumask here, however those users are established
909 		 * after our first check (and so after the PTEs are removed),
910 		 * and the TLB still gets flushed by the IPI, so this CPU
911 		 * will only require a local flush.
912 		 */
913 		return FLUSH_TYPE_LOCAL;
914 	}
915 
916 	/*
917 	 * Occasionally try to trim down the cpumask. It's possible this can
918 	 * bring the mask to zero, which results in no flush.
919 	 */
920 	if (tick_and_test_trim_clock()) {
921 		exit_flush_lazy_tlbs(mm);
922 		if (current->mm == mm)
923 			return FLUSH_TYPE_LOCAL;
924 		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
925 			exit_lazy_flush_tlb(mm, true);
926 		return FLUSH_TYPE_NONE;
927 	}
928 
929 	return FLUSH_TYPE_GLOBAL;
930 }
931 
932 #ifdef CONFIG_SMP
933 void radix__flush_tlb_mm(struct mm_struct *mm)
934 {
935 	unsigned long pid;
936 	enum tlb_flush_type type;
937 
938 	pid = mm->context.id;
939 	if (unlikely(pid == MMU_NO_CONTEXT))
940 		return;
941 
942 	preempt_disable();
943 	/*
944 	 * Order loads of mm_cpumask (in flush_type_needed) vs previous
945 	 * stores to clear ptes before the invalidate. See barrier in
946 	 * switch_mm_irqs_off
947 	 */
948 	smp_mb();
949 	type = flush_type_needed(mm, false);
950 	if (type == FLUSH_TYPE_LOCAL) {
951 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
952 	} else if (type == FLUSH_TYPE_GLOBAL) {
953 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
954 			unsigned long tgt = H_RPTI_TARGET_CMMU;
955 
956 			if (atomic_read(&mm->context.copros) > 0)
957 				tgt |= H_RPTI_TARGET_NMMU;
958 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
959 					       H_RPTI_PAGE_ALL, 0, -1UL);
960 		} else if (cputlb_use_tlbie()) {
961 			if (mm_needs_flush_escalation(mm))
962 				_tlbie_pid(pid, RIC_FLUSH_ALL);
963 			else
964 				_tlbie_pid(pid, RIC_FLUSH_TLB);
965 		} else {
966 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
967 		}
968 	}
969 	preempt_enable();
970 }
971 EXPORT_SYMBOL(radix__flush_tlb_mm);
972 
973 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
974 {
975 	unsigned long pid;
976 	enum tlb_flush_type type;
977 
978 	pid = mm->context.id;
979 	if (unlikely(pid == MMU_NO_CONTEXT))
980 		return;
981 
982 	preempt_disable();
983 	smp_mb(); /* see radix__flush_tlb_mm */
984 	type = flush_type_needed(mm, fullmm);
985 	if (type == FLUSH_TYPE_LOCAL) {
986 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
987 	} else if (type == FLUSH_TYPE_GLOBAL) {
988 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
989 			unsigned long tgt = H_RPTI_TARGET_CMMU;
990 			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
991 					     H_RPTI_TYPE_PRT;
992 
993 			if (atomic_read(&mm->context.copros) > 0)
994 				tgt |= H_RPTI_TARGET_NMMU;
995 			pseries_rpt_invalidate(pid, tgt, type,
996 					       H_RPTI_PAGE_ALL, 0, -1UL);
997 		} else if (cputlb_use_tlbie())
998 			_tlbie_pid(pid, RIC_FLUSH_ALL);
999 		else
1000 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
1001 	}
1002 	preempt_enable();
1003 }
1004 
1005 void radix__flush_all_mm(struct mm_struct *mm)
1006 {
1007 	__flush_all_mm(mm, false);
1008 }
1009 EXPORT_SYMBOL(radix__flush_all_mm);
1010 
1011 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
1012 				 int psize)
1013 {
1014 	unsigned long pid;
1015 	enum tlb_flush_type type;
1016 
1017 	pid = mm->context.id;
1018 	if (unlikely(pid == MMU_NO_CONTEXT))
1019 		return;
1020 
1021 	preempt_disable();
1022 	smp_mb(); /* see radix__flush_tlb_mm */
1023 	type = flush_type_needed(mm, false);
1024 	if (type == FLUSH_TYPE_LOCAL) {
1025 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1026 	} else if (type == FLUSH_TYPE_GLOBAL) {
1027 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
1028 			unsigned long tgt, pg_sizes, size;
1029 
1030 			tgt = H_RPTI_TARGET_CMMU;
1031 			pg_sizes = psize_to_rpti_pgsize(psize);
1032 			size = 1UL << mmu_psize_to_shift(psize);
1033 
1034 			if (atomic_read(&mm->context.copros) > 0)
1035 				tgt |= H_RPTI_TARGET_NMMU;
1036 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
1037 					       pg_sizes, vmaddr,
1038 					       vmaddr + size);
1039 		} else if (cputlb_use_tlbie())
1040 			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1041 		else
1042 			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
1043 	}
1044 	preempt_enable();
1045 }
1046 
1047 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
1048 {
1049 #ifdef CONFIG_HUGETLB_PAGE
1050 	if (is_vm_hugetlb_page(vma))
1051 		return radix__flush_hugetlb_page(vma, vmaddr);
1052 #endif
1053 	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
1054 }
1055 EXPORT_SYMBOL(radix__flush_tlb_page);
1056 
1057 #endif /* CONFIG_SMP */
1058 
1059 static void do_tlbiel_kernel(void *info)
1060 {
1061 	_tlbiel_pid(0, RIC_FLUSH_ALL);
1062 }
1063 
1064 static inline void _tlbiel_kernel_broadcast(void)
1065 {
1066 	on_each_cpu(do_tlbiel_kernel, NULL, 1);
1067 	if (tlbie_capable) {
1068 		/*
1069 		 * Coherent accelerators don't refcount kernel memory mappings,
1070 		 * so have to always issue a tlbie for them. This is quite a
1071 		 * slow path anyway.
1072 		 */
1073 		_tlbie_pid(0, RIC_FLUSH_ALL);
1074 	}
1075 }
1076 
1077 /*
1078  * If kernel TLBIs ever become local rather than global, then
1079  * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
1080  * assumes kernel TLBIs are global.
1081  */
1082 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
1083 {
1084 	if (!mmu_has_feature(MMU_FTR_GTSE)) {
1085 		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
1086 		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1087 				     H_RPTI_TYPE_PRT;
1088 
1089 		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
1090 				       start, end);
1091 	} else if (cputlb_use_tlbie())
1092 		_tlbie_pid(0, RIC_FLUSH_ALL);
1093 	else
1094 		_tlbiel_kernel_broadcast();
1095 }
1096 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
1097 
1098 #define TLB_FLUSH_ALL -1UL
1099 
1100 /*
1101  * Number of pages above which we invalidate the entire PID rather than
1102  * flush individual pages, for local and global flushes respectively.
1103  *
1104  * tlbie goes out to the interconnect and individual ops are more costly.
1105  * It also does not iterate over sets like the local tlbiel variant when
1106  * invalidating a full PID, so it has a far lower threshold to change from
1107  * individual page flushes to full-pid flushes.
1108  */
1109 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
1110 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
1111 
1112 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
1113 					    unsigned long start, unsigned long end)
1114 {
1115 	unsigned long pid;
1116 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
1117 	unsigned long page_size = 1UL << page_shift;
1118 	unsigned long nr_pages = (end - start) >> page_shift;
1119 	bool fullmm = (end == TLB_FLUSH_ALL);
1120 	bool flush_pid, flush_pwc = false;
1121 	enum tlb_flush_type type;
1122 
1123 	pid = mm->context.id;
1124 	if (unlikely(pid == MMU_NO_CONTEXT))
1125 		return;
1126 
1127 	preempt_disable();
1128 	smp_mb(); /* see radix__flush_tlb_mm */
1129 	type = flush_type_needed(mm, fullmm);
1130 	if (type == FLUSH_TYPE_NONE)
1131 		goto out;
1132 
1133 	if (fullmm)
1134 		flush_pid = true;
1135 	else if (type == FLUSH_TYPE_GLOBAL)
1136 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1137 	else
1138 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1139 	/*
1140 	 * full pid flush already does the PWC flush. if it is not full pid
1141 	 * flush check the range is more than PMD and force a pwc flush
1142 	 * mremap() depends on this behaviour.
1143 	 */
1144 	if (!flush_pid && (end - start) >= PMD_SIZE)
1145 		flush_pwc = true;
1146 
1147 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1148 		unsigned long type = H_RPTI_TYPE_TLB;
1149 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1150 		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1151 
1152 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1153 			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1154 		if (atomic_read(&mm->context.copros) > 0)
1155 			tgt |= H_RPTI_TARGET_NMMU;
1156 		if (flush_pwc)
1157 			type |= H_RPTI_TYPE_PWC;
1158 		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1159 	} else if (flush_pid) {
1160 		/*
1161 		 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
1162 		 */
1163 		if (type == FLUSH_TYPE_LOCAL) {
1164 			_tlbiel_pid(pid, RIC_FLUSH_ALL);
1165 		} else {
1166 			if (cputlb_use_tlbie()) {
1167 				_tlbie_pid(pid, RIC_FLUSH_ALL);
1168 			} else {
1169 				_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
1170 			}
1171 		}
1172 	} else {
1173 		bool hflush = false;
1174 		unsigned long hstart, hend;
1175 
1176 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
1177 			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1178 			hend = end & PMD_MASK;
1179 			if (hstart < hend)
1180 				hflush = true;
1181 		}
1182 
1183 		if (type == FLUSH_TYPE_LOCAL) {
1184 			asm volatile("ptesync": : :"memory");
1185 			if (flush_pwc)
1186 				/* For PWC, only one flush is needed */
1187 				__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
1188 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1189 			if (hflush)
1190 				__tlbiel_va_range(hstart, hend, pid,
1191 						PMD_SIZE, MMU_PAGE_2M);
1192 			ppc_after_tlbiel_barrier();
1193 		} else if (cputlb_use_tlbie()) {
1194 			asm volatile("ptesync": : :"memory");
1195 			if (flush_pwc)
1196 				__tlbie_pid(pid, RIC_FLUSH_PWC);
1197 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1198 			if (hflush)
1199 				__tlbie_va_range(hstart, hend, pid,
1200 						PMD_SIZE, MMU_PAGE_2M);
1201 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
1202 		} else {
1203 			_tlbiel_va_range_multicast(mm,
1204 					start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
1205 			if (hflush)
1206 				_tlbiel_va_range_multicast(mm,
1207 					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
1208 		}
1209 	}
1210 out:
1211 	preempt_enable();
1212 }
1213 
1214 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1215 		     unsigned long end)
1216 
1217 {
1218 #ifdef CONFIG_HUGETLB_PAGE
1219 	if (is_vm_hugetlb_page(vma))
1220 		return radix__flush_hugetlb_tlb_range(vma, start, end);
1221 #endif
1222 
1223 	__radix__flush_tlb_range(vma->vm_mm, start, end);
1224 }
1225 EXPORT_SYMBOL(radix__flush_tlb_range);
1226 
1227 static int radix_get_mmu_psize(int page_size)
1228 {
1229 	int psize;
1230 
1231 	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1232 		psize = mmu_virtual_psize;
1233 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1234 		psize = MMU_PAGE_2M;
1235 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1236 		psize = MMU_PAGE_1G;
1237 	else
1238 		return -1;
1239 	return psize;
1240 }
1241 
1242 /*
1243  * Flush partition scoped LPID address translation for all CPUs.
1244  */
1245 void radix__flush_tlb_lpid_page(unsigned int lpid,
1246 					unsigned long addr,
1247 					unsigned long page_size)
1248 {
1249 	int psize = radix_get_mmu_psize(page_size);
1250 
1251 	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1252 }
1253 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1254 
1255 /*
1256  * Flush partition scoped PWC from LPID for all CPUs.
1257  */
1258 void radix__flush_pwc_lpid(unsigned int lpid)
1259 {
1260 	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
1261 }
1262 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1263 
1264 /*
1265  * Flush partition scoped translations from LPID (=LPIDR)
1266  */
1267 void radix__flush_all_lpid(unsigned int lpid)
1268 {
1269 	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
1270 }
1271 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1272 
1273 /*
1274  * Flush process scoped translations from LPID (=LPIDR)
1275  */
1276 void radix__flush_all_lpid_guest(unsigned int lpid)
1277 {
1278 	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1279 }
1280 
1281 void radix__tlb_flush(struct mmu_gather *tlb)
1282 {
1283 	int psize = 0;
1284 	struct mm_struct *mm = tlb->mm;
1285 	int page_size = tlb->page_size;
1286 	unsigned long start = tlb->start;
1287 	unsigned long end = tlb->end;
1288 
1289 	/*
1290 	 * if page size is not something we understand, do a full mm flush
1291 	 *
1292 	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1293 	 * that flushes the process table entry cache upon process teardown.
1294 	 * See the comment for radix in arch_exit_mmap().
1295 	 */
1296 	if (tlb->fullmm || tlb->need_flush_all) {
1297 		__flush_all_mm(mm, true);
1298 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1299 		if (!tlb->freed_tables)
1300 			radix__flush_tlb_mm(mm);
1301 		else
1302 			radix__flush_all_mm(mm);
1303 	} else {
1304 		if (!tlb->freed_tables)
1305 			radix__flush_tlb_range_psize(mm, start, end, psize);
1306 		else
1307 			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1308 	}
1309 }
1310 
1311 static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1312 				unsigned long start, unsigned long end,
1313 				int psize, bool also_pwc)
1314 {
1315 	unsigned long pid;
1316 	unsigned int page_shift = mmu_psize_defs[psize].shift;
1317 	unsigned long page_size = 1UL << page_shift;
1318 	unsigned long nr_pages = (end - start) >> page_shift;
1319 	bool fullmm = (end == TLB_FLUSH_ALL);
1320 	bool flush_pid;
1321 	enum tlb_flush_type type;
1322 
1323 	pid = mm->context.id;
1324 	if (unlikely(pid == MMU_NO_CONTEXT))
1325 		return;
1326 
1327 	fullmm = (end == TLB_FLUSH_ALL);
1328 
1329 	preempt_disable();
1330 	smp_mb(); /* see radix__flush_tlb_mm */
1331 	type = flush_type_needed(mm, fullmm);
1332 	if (type == FLUSH_TYPE_NONE)
1333 		goto out;
1334 
1335 	if (fullmm)
1336 		flush_pid = true;
1337 	else if (type == FLUSH_TYPE_GLOBAL)
1338 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1339 	else
1340 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1341 
1342 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1343 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1344 		unsigned long type = H_RPTI_TYPE_TLB;
1345 		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1346 
1347 		if (also_pwc)
1348 			type |= H_RPTI_TYPE_PWC;
1349 		if (atomic_read(&mm->context.copros) > 0)
1350 			tgt |= H_RPTI_TARGET_NMMU;
1351 		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1352 	} else if (flush_pid) {
1353 		if (type == FLUSH_TYPE_LOCAL) {
1354 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1355 		} else {
1356 			if (cputlb_use_tlbie()) {
1357 				if (mm_needs_flush_escalation(mm))
1358 					also_pwc = true;
1359 
1360 				_tlbie_pid(pid,
1361 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1362 			} else {
1363 				_tlbiel_pid_multicast(mm, pid,
1364 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1365 			}
1366 
1367 		}
1368 	} else {
1369 		if (type == FLUSH_TYPE_LOCAL)
1370 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1371 		else if (cputlb_use_tlbie())
1372 			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1373 		else
1374 			_tlbiel_va_range_multicast(mm,
1375 					start, end, pid, page_size, psize, also_pwc);
1376 	}
1377 out:
1378 	preempt_enable();
1379 }
1380 
1381 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1382 				  unsigned long end, int psize)
1383 {
1384 	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1385 }
1386 
1387 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1388 				      unsigned long end, int psize)
1389 {
1390 	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
1391 }
1392 
1393 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1394 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1395 {
1396 	unsigned long pid, end;
1397 	enum tlb_flush_type type;
1398 
1399 	pid = mm->context.id;
1400 	if (unlikely(pid == MMU_NO_CONTEXT))
1401 		return;
1402 
1403 	/* 4k page size, just blow the world */
1404 	if (PAGE_SIZE == 0x1000) {
1405 		radix__flush_all_mm(mm);
1406 		return;
1407 	}
1408 
1409 	end = addr + HPAGE_PMD_SIZE;
1410 
1411 	/* Otherwise first do the PWC, then iterate the pages. */
1412 	preempt_disable();
1413 	smp_mb(); /* see radix__flush_tlb_mm */
1414 	type = flush_type_needed(mm, false);
1415 	if (type == FLUSH_TYPE_LOCAL) {
1416 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1417 	} else if (type == FLUSH_TYPE_GLOBAL) {
1418 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
1419 			unsigned long tgt, type, pg_sizes;
1420 
1421 			tgt = H_RPTI_TARGET_CMMU;
1422 			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1423 			       H_RPTI_TYPE_PRT;
1424 			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1425 
1426 			if (atomic_read(&mm->context.copros) > 0)
1427 				tgt |= H_RPTI_TARGET_NMMU;
1428 			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1429 					       addr, end);
1430 		} else if (cputlb_use_tlbie())
1431 			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1432 		else
1433 			_tlbiel_va_range_multicast(mm,
1434 					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1435 	}
1436 
1437 	preempt_enable();
1438 }
1439 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1440 
1441 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1442 				unsigned long start, unsigned long end)
1443 {
1444 	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1445 }
1446 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1447 
1448 void radix__flush_tlb_all(void)
1449 {
1450 	unsigned long rb,prs,r,rs;
1451 	unsigned long ric = RIC_FLUSH_ALL;
1452 
1453 	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1454 	prs = 0; /* partition scoped */
1455 	r = 1;   /* radix format */
1456 	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1457 
1458 	asm volatile("ptesync": : :"memory");
1459 	/*
1460 	 * now flush guest entries by passing PRS = 1 and LPID != 0
1461 	 */
1462 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1463 		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1464 	/*
1465 	 * now flush host entires by passing PRS = 0 and LPID == 0
1466 	 */
1467 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1468 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1469 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
1470 }
1471 
1472 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1473 /*
1474  * Performs process-scoped invalidations for a given LPID
1475  * as part of H_RPT_INVALIDATE hcall.
1476  */
1477 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
1478 			     unsigned long type, unsigned long pg_sizes,
1479 			     unsigned long start, unsigned long end)
1480 {
1481 	unsigned long psize, nr_pages;
1482 	struct mmu_psize_def *def;
1483 	bool flush_pid;
1484 
1485 	/*
1486 	 * A H_RPTI_TYPE_ALL request implies RIC=3, hence
1487 	 * do a single IS=1 based flush.
1488 	 */
1489 	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
1490 		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
1491 		return;
1492 	}
1493 
1494 	if (type & H_RPTI_TYPE_PWC)
1495 		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
1496 
1497 	/* Full PID flush */
1498 	if (start == 0 && end == -1)
1499 		return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1500 
1501 	/* Do range invalidation for all the valid page sizes */
1502 	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
1503 		def = &mmu_psize_defs[psize];
1504 		if (!(pg_sizes & def->h_rpt_pgsize))
1505 			continue;
1506 
1507 		nr_pages = (end - start) >> def->shift;
1508 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1509 
1510 		/*
1511 		 * If the number of pages spanning the range is above
1512 		 * the ceiling, convert the request into a full PID flush.
1513 		 * And since PID flush takes out all the page sizes, there
1514 		 * is no need to consider remaining page sizes.
1515 		 */
1516 		if (flush_pid) {
1517 			_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1518 			return;
1519 		}
1520 		_tlbie_va_range_lpid(start, end, pid, lpid,
1521 				     (1UL << def->shift), psize, false);
1522 	}
1523 }
1524 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
1525 
1526 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1527