1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * TLB flush routines for radix kernels.
4  *
5  * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
13 
14 #include <asm/ppc-opcode.h>
15 #include <asm/tlb.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 #include <asm/plpar_wrappers.h>
20 
21 #include "internal.h"
22 
23 #define RIC_FLUSH_TLB 0
24 #define RIC_FLUSH_PWC 1
25 #define RIC_FLUSH_ALL 2
26 
27 /*
28  * tlbiel instruction for radix, set invalidation
29  * i.e., r=1 and is=01 or is=10 or is=11
30  */
31 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
32 					unsigned int pid,
33 					unsigned int ric, unsigned int prs)
34 {
35 	unsigned long rb;
36 	unsigned long rs;
37 
38 	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
39 	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
40 
41 	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
42 		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
43 		     : "memory");
44 }
45 
46 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
47 {
48 	unsigned int set;
49 
50 	asm volatile("ptesync": : :"memory");
51 
52 	/*
53 	 * Flush the first set of the TLB, and the entire Page Walk Cache
54 	 * and partition table entries. Then flush the remaining sets of the
55 	 * TLB.
56 	 */
57 
58 	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
59 		/* MSR[HV] should flush partition scope translations first. */
60 		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
61 
62 		if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
63 			for (set = 1; set < num_sets; set++)
64 				tlbiel_radix_set_isa300(set, is, 0,
65 							RIC_FLUSH_TLB, 0);
66 		}
67 	}
68 
69 	/* Flush process scoped entries. */
70 	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
71 
72 	if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
73 		for (set = 1; set < num_sets; set++)
74 			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
75 	}
76 
77 	ppc_after_tlbiel_barrier();
78 }
79 
80 void radix__tlbiel_all(unsigned int action)
81 {
82 	unsigned int is;
83 
84 	switch (action) {
85 	case TLB_INVAL_SCOPE_GLOBAL:
86 		is = 3;
87 		break;
88 	case TLB_INVAL_SCOPE_LPID:
89 		is = 2;
90 		break;
91 	default:
92 		BUG();
93 	}
94 
95 	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
96 		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
97 	else
98 		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
99 
100 	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
101 }
102 
103 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
104 				unsigned long ric)
105 {
106 	unsigned long rb,rs,prs,r;
107 
108 	rb = PPC_BIT(53); /* IS = 1 */
109 	rb |= set << PPC_BITLSHIFT(51);
110 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
111 	prs = 1; /* process scoped */
112 	r = 1;   /* radix format */
113 
114 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
115 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
116 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
117 }
118 
119 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
120 {
121 	unsigned long rb,rs,prs,r;
122 
123 	rb = PPC_BIT(53); /* IS = 1 */
124 	rs = pid << PPC_BITLSHIFT(31);
125 	prs = 1; /* process scoped */
126 	r = 1;   /* radix format */
127 
128 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
129 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
130 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
131 }
132 
133 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
134 {
135 	unsigned long rb,rs,prs,r;
136 
137 	rb = PPC_BIT(52); /* IS = 2 */
138 	rs = lpid;
139 	prs = 0; /* partition scoped */
140 	r = 1;   /* radix format */
141 
142 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
143 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
144 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
145 }
146 
147 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
148 {
149 	unsigned long rb,rs,prs,r;
150 
151 	rb = PPC_BIT(52); /* IS = 2 */
152 	rs = lpid;
153 	prs = 1; /* process scoped */
154 	r = 1;   /* radix format */
155 
156 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
157 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
158 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
159 }
160 
161 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
162 					unsigned long ap, unsigned long ric)
163 {
164 	unsigned long rb,rs,prs,r;
165 
166 	rb = va & ~(PPC_BITMASK(52, 63));
167 	rb |= ap << PPC_BITLSHIFT(58);
168 	rs = pid << PPC_BITLSHIFT(31);
169 	prs = 1; /* process scoped */
170 	r = 1;   /* radix format */
171 
172 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
173 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
174 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
175 }
176 
177 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
178 				       unsigned long ap, unsigned long ric)
179 {
180 	unsigned long rb,rs,prs,r;
181 
182 	rb = va & ~(PPC_BITMASK(52, 63));
183 	rb |= ap << PPC_BITLSHIFT(58);
184 	rs = pid << PPC_BITLSHIFT(31);
185 	prs = 1; /* process scoped */
186 	r = 1;   /* radix format */
187 
188 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
189 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
190 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
191 }
192 
193 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
194 					    unsigned long ap, unsigned long ric)
195 {
196 	unsigned long rb,rs,prs,r;
197 
198 	rb = va & ~(PPC_BITMASK(52, 63));
199 	rb |= ap << PPC_BITLSHIFT(58);
200 	rs = lpid;
201 	prs = 0; /* partition scoped */
202 	r = 1;   /* radix format */
203 
204 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
205 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
206 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
207 }
208 
209 
210 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
211 				  unsigned long ap)
212 {
213 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
214 		asm volatile("ptesync": : :"memory");
215 		__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
216 	}
217 
218 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
219 		asm volatile("ptesync": : :"memory");
220 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
221 	}
222 }
223 
224 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
225 					unsigned long ap)
226 {
227 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
228 		asm volatile("ptesync": : :"memory");
229 		__tlbie_pid(0, RIC_FLUSH_TLB);
230 	}
231 
232 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
233 		asm volatile("ptesync": : :"memory");
234 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
235 	}
236 }
237 
238 static inline void fixup_tlbie_pid(unsigned long pid)
239 {
240 	/*
241 	 * We can use any address for the invalidation, pick one which is
242 	 * probably unused as an optimisation.
243 	 */
244 	unsigned long va = ((1UL << 52) - 1);
245 
246 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
247 		asm volatile("ptesync": : :"memory");
248 		__tlbie_pid(0, RIC_FLUSH_TLB);
249 	}
250 
251 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
252 		asm volatile("ptesync": : :"memory");
253 		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
254 	}
255 }
256 
257 
258 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
259 				       unsigned long ap)
260 {
261 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
262 		asm volatile("ptesync": : :"memory");
263 		__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
264 	}
265 
266 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
267 		asm volatile("ptesync": : :"memory");
268 		__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
269 	}
270 }
271 
272 static inline void fixup_tlbie_lpid(unsigned long lpid)
273 {
274 	/*
275 	 * We can use any address for the invalidation, pick one which is
276 	 * probably unused as an optimisation.
277 	 */
278 	unsigned long va = ((1UL << 52) - 1);
279 
280 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
281 		asm volatile("ptesync": : :"memory");
282 		__tlbie_lpid(0, RIC_FLUSH_TLB);
283 	}
284 
285 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
286 		asm volatile("ptesync": : :"memory");
287 		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
288 	}
289 }
290 
291 /*
292  * We use 128 set in radix mode and 256 set in hpt mode.
293  */
294 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
295 {
296 	int set;
297 
298 	asm volatile("ptesync": : :"memory");
299 
300 	switch (ric) {
301 	case RIC_FLUSH_PWC:
302 
303 		/* For PWC, only one flush is needed */
304 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
305 		ppc_after_tlbiel_barrier();
306 		return;
307 	case RIC_FLUSH_TLB:
308 		__tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
309 		break;
310 	case RIC_FLUSH_ALL:
311 	default:
312 		/*
313 		 * Flush the first set of the TLB, and if
314 		 * we're doing a RIC_FLUSH_ALL, also flush
315 		 * the entire Page Walk Cache.
316 		 */
317 		__tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
318 	}
319 
320 	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
321 		/* For the remaining sets, just flush the TLB */
322 		for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
323 			__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
324 	}
325 
326 	ppc_after_tlbiel_barrier();
327 	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
328 }
329 
330 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
331 {
332 	asm volatile("ptesync": : :"memory");
333 
334 	/*
335 	 * Workaround the fact that the "ric" argument to __tlbie_pid
336 	 * must be a compile-time contraint to match the "i" constraint
337 	 * in the asm statement.
338 	 */
339 	switch (ric) {
340 	case RIC_FLUSH_TLB:
341 		__tlbie_pid(pid, RIC_FLUSH_TLB);
342 		fixup_tlbie_pid(pid);
343 		break;
344 	case RIC_FLUSH_PWC:
345 		__tlbie_pid(pid, RIC_FLUSH_PWC);
346 		break;
347 	case RIC_FLUSH_ALL:
348 	default:
349 		__tlbie_pid(pid, RIC_FLUSH_ALL);
350 		fixup_tlbie_pid(pid);
351 	}
352 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
353 }
354 
355 struct tlbiel_pid {
356 	unsigned long pid;
357 	unsigned long ric;
358 };
359 
360 static void do_tlbiel_pid(void *info)
361 {
362 	struct tlbiel_pid *t = info;
363 
364 	if (t->ric == RIC_FLUSH_TLB)
365 		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
366 	else if (t->ric == RIC_FLUSH_PWC)
367 		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
368 	else
369 		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
370 }
371 
372 static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
373 				unsigned long pid, unsigned long ric)
374 {
375 	struct cpumask *cpus = mm_cpumask(mm);
376 	struct tlbiel_pid t = { .pid = pid, .ric = ric };
377 
378 	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
379 	/*
380 	 * Always want the CPU translations to be invalidated with tlbiel in
381 	 * these paths, so while coprocessors must use tlbie, we can not
382 	 * optimise away the tlbiel component.
383 	 */
384 	if (atomic_read(&mm->context.copros) > 0)
385 		_tlbie_pid(pid, RIC_FLUSH_ALL);
386 }
387 
388 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
389 {
390 	asm volatile("ptesync": : :"memory");
391 
392 	/*
393 	 * Workaround the fact that the "ric" argument to __tlbie_pid
394 	 * must be a compile-time contraint to match the "i" constraint
395 	 * in the asm statement.
396 	 */
397 	switch (ric) {
398 	case RIC_FLUSH_TLB:
399 		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
400 		fixup_tlbie_lpid(lpid);
401 		break;
402 	case RIC_FLUSH_PWC:
403 		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
404 		break;
405 	case RIC_FLUSH_ALL:
406 	default:
407 		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
408 		fixup_tlbie_lpid(lpid);
409 	}
410 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
411 }
412 
413 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
414 {
415 	/*
416 	 * Workaround the fact that the "ric" argument to __tlbie_pid
417 	 * must be a compile-time contraint to match the "i" constraint
418 	 * in the asm statement.
419 	 */
420 	switch (ric) {
421 	case RIC_FLUSH_TLB:
422 		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
423 		break;
424 	case RIC_FLUSH_PWC:
425 		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
426 		break;
427 	case RIC_FLUSH_ALL:
428 	default:
429 		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
430 	}
431 	fixup_tlbie_lpid(lpid);
432 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
433 }
434 
435 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
436 				    unsigned long pid, unsigned long page_size,
437 				    unsigned long psize)
438 {
439 	unsigned long addr;
440 	unsigned long ap = mmu_get_ap(psize);
441 
442 	for (addr = start; addr < end; addr += page_size)
443 		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
444 }
445 
446 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
447 				       unsigned long psize, unsigned long ric)
448 {
449 	unsigned long ap = mmu_get_ap(psize);
450 
451 	asm volatile("ptesync": : :"memory");
452 	__tlbiel_va(va, pid, ap, ric);
453 	ppc_after_tlbiel_barrier();
454 }
455 
456 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
457 				    unsigned long pid, unsigned long page_size,
458 				    unsigned long psize, bool also_pwc)
459 {
460 	asm volatile("ptesync": : :"memory");
461 	if (also_pwc)
462 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
463 	__tlbiel_va_range(start, end, pid, page_size, psize);
464 	ppc_after_tlbiel_barrier();
465 }
466 
467 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
468 				    unsigned long pid, unsigned long page_size,
469 				    unsigned long psize)
470 {
471 	unsigned long addr;
472 	unsigned long ap = mmu_get_ap(psize);
473 
474 	for (addr = start; addr < end; addr += page_size)
475 		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
476 
477 	fixup_tlbie_va_range(addr - page_size, pid, ap);
478 }
479 
480 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
481 				      unsigned long psize, unsigned long ric)
482 {
483 	unsigned long ap = mmu_get_ap(psize);
484 
485 	asm volatile("ptesync": : :"memory");
486 	__tlbie_va(va, pid, ap, ric);
487 	fixup_tlbie_va(va, pid, ap);
488 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
489 }
490 
491 struct tlbiel_va {
492 	unsigned long pid;
493 	unsigned long va;
494 	unsigned long psize;
495 	unsigned long ric;
496 };
497 
498 static void do_tlbiel_va(void *info)
499 {
500 	struct tlbiel_va *t = info;
501 
502 	if (t->ric == RIC_FLUSH_TLB)
503 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
504 	else if (t->ric == RIC_FLUSH_PWC)
505 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
506 	else
507 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
508 }
509 
510 static inline void _tlbiel_va_multicast(struct mm_struct *mm,
511 				unsigned long va, unsigned long pid,
512 				unsigned long psize, unsigned long ric)
513 {
514 	struct cpumask *cpus = mm_cpumask(mm);
515 	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
516 	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
517 	if (atomic_read(&mm->context.copros) > 0)
518 		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
519 }
520 
521 struct tlbiel_va_range {
522 	unsigned long pid;
523 	unsigned long start;
524 	unsigned long end;
525 	unsigned long page_size;
526 	unsigned long psize;
527 	bool also_pwc;
528 };
529 
530 static void do_tlbiel_va_range(void *info)
531 {
532 	struct tlbiel_va_range *t = info;
533 
534 	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
535 				    t->psize, t->also_pwc);
536 }
537 
538 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
539 			      unsigned long psize, unsigned long ric)
540 {
541 	unsigned long ap = mmu_get_ap(psize);
542 
543 	asm volatile("ptesync": : :"memory");
544 	__tlbie_lpid_va(va, lpid, ap, ric);
545 	fixup_tlbie_lpid_va(va, lpid, ap);
546 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
547 }
548 
549 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
550 				    unsigned long pid, unsigned long page_size,
551 				    unsigned long psize, bool also_pwc)
552 {
553 	asm volatile("ptesync": : :"memory");
554 	if (also_pwc)
555 		__tlbie_pid(pid, RIC_FLUSH_PWC);
556 	__tlbie_va_range(start, end, pid, page_size, psize);
557 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
558 }
559 
560 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
561 				unsigned long start, unsigned long end,
562 				unsigned long pid, unsigned long page_size,
563 				unsigned long psize, bool also_pwc)
564 {
565 	struct cpumask *cpus = mm_cpumask(mm);
566 	struct tlbiel_va_range t = { .start = start, .end = end,
567 				.pid = pid, .page_size = page_size,
568 				.psize = psize, .also_pwc = also_pwc };
569 
570 	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
571 	if (atomic_read(&mm->context.copros) > 0)
572 		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
573 }
574 
575 /*
576  * Base TLB flushing operations:
577  *
578  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
579  *  - flush_tlb_page(vma, vmaddr) flushes one page
580  *  - flush_tlb_range(vma, start, end) flushes a range of pages
581  *  - flush_tlb_kernel_range(start, end) flushes kernel pages
582  *
583  *  - local_* variants of page and mm only apply to the current
584  *    processor
585  */
586 void radix__local_flush_tlb_mm(struct mm_struct *mm)
587 {
588 	unsigned long pid;
589 
590 	preempt_disable();
591 	pid = mm->context.id;
592 	if (pid != MMU_NO_CONTEXT)
593 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
594 	preempt_enable();
595 }
596 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
597 
598 #ifndef CONFIG_SMP
599 void radix__local_flush_all_mm(struct mm_struct *mm)
600 {
601 	unsigned long pid;
602 
603 	preempt_disable();
604 	pid = mm->context.id;
605 	if (pid != MMU_NO_CONTEXT)
606 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
607 	preempt_enable();
608 }
609 EXPORT_SYMBOL(radix__local_flush_all_mm);
610 
611 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
612 {
613 	radix__local_flush_all_mm(mm);
614 }
615 #endif /* CONFIG_SMP */
616 
617 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
618 				       int psize)
619 {
620 	unsigned long pid;
621 
622 	preempt_disable();
623 	pid = mm->context.id;
624 	if (pid != MMU_NO_CONTEXT)
625 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
626 	preempt_enable();
627 }
628 
629 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
630 {
631 #ifdef CONFIG_HUGETLB_PAGE
632 	/* need the return fix for nohash.c */
633 	if (is_vm_hugetlb_page(vma))
634 		return radix__local_flush_hugetlb_page(vma, vmaddr);
635 #endif
636 	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
637 }
638 EXPORT_SYMBOL(radix__local_flush_tlb_page);
639 
640 static bool mm_needs_flush_escalation(struct mm_struct *mm)
641 {
642 	/*
643 	 * P9 nest MMU has issues with the page walk cache
644 	 * caching PTEs and not flushing them properly when
645 	 * RIC = 0 for a PID/LPID invalidate
646 	 */
647 	if (atomic_read(&mm->context.copros) > 0)
648 		return true;
649 	return false;
650 }
651 
652 /*
653  * If always_flush is true, then flush even if this CPU can't be removed
654  * from mm_cpumask.
655  */
656 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
657 {
658 	unsigned long pid = mm->context.id;
659 	int cpu = smp_processor_id();
660 
661 	/*
662 	 * A kthread could have done a mmget_not_zero() after the flushing CPU
663 	 * checked mm_cpumask, and be in the process of kthread_use_mm when
664 	 * interrupted here. In that case, current->mm will be set to mm,
665 	 * because kthread_use_mm() setting ->mm and switching to the mm is
666 	 * done with interrupts off.
667 	 */
668 	if (current->mm == mm)
669 		goto out;
670 
671 	if (current->active_mm == mm) {
672 		WARN_ON_ONCE(current->mm != NULL);
673 		/* Is a kernel thread and is using mm as the lazy tlb */
674 		mmgrab(&init_mm);
675 		current->active_mm = &init_mm;
676 		switch_mm_irqs_off(mm, &init_mm, current);
677 		mmdrop(mm);
678 	}
679 
680 	/*
681 	 * This IPI may be initiated from any source including those not
682 	 * running the mm, so there may be a racing IPI that comes after
683 	 * this one which finds the cpumask already clear. Check and avoid
684 	 * underflowing the active_cpus count in that case. The race should
685 	 * not otherwise be a problem, but the TLB must be flushed because
686 	 * that's what the caller expects.
687 	 */
688 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
689 		atomic_dec(&mm->context.active_cpus);
690 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
691 		always_flush = true;
692 	}
693 
694 out:
695 	if (always_flush)
696 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
697 }
698 
699 #ifdef CONFIG_SMP
700 static void do_exit_flush_lazy_tlb(void *arg)
701 {
702 	struct mm_struct *mm = arg;
703 	exit_lazy_flush_tlb(mm, true);
704 }
705 
706 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
707 {
708 	/*
709 	 * Would be nice if this was async so it could be run in
710 	 * parallel with our local flush, but generic code does not
711 	 * give a good API for it. Could extend the generic code or
712 	 * make a special powerpc IPI for flushing TLBs.
713 	 * For now it's not too performance critical.
714 	 */
715 	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
716 				(void *)mm, 1);
717 }
718 
719 #else /* CONFIG_SMP */
720 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
721 #endif /* CONFIG_SMP */
722 
723 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
724 
725 /*
726  * Interval between flushes at which we send out IPIs to check whether the
727  * mm_cpumask can be trimmed for the case where it's not a single-threaded
728  * process flushing its own mm. The intent is to reduce the cost of later
729  * flushes. Don't want this to be so low that it adds noticable cost to TLB
730  * flushing, or so high that it doesn't help reduce global TLBIEs.
731  */
732 static unsigned long tlb_mm_cpumask_trim_timer = 1073;
733 
734 static bool tick_and_test_trim_clock(void)
735 {
736 	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
737 			tlb_mm_cpumask_trim_timer) {
738 		__this_cpu_write(mm_cpumask_trim_clock, 0);
739 		return true;
740 	}
741 	return false;
742 }
743 
744 enum tlb_flush_type {
745 	FLUSH_TYPE_NONE,
746 	FLUSH_TYPE_LOCAL,
747 	FLUSH_TYPE_GLOBAL,
748 };
749 
750 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
751 {
752 	int active_cpus = atomic_read(&mm->context.active_cpus);
753 	int cpu = smp_processor_id();
754 
755 	if (active_cpus == 0)
756 		return FLUSH_TYPE_NONE;
757 	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
758 		if (current->mm != mm) {
759 			/*
760 			 * Asynchronous flush sources may trim down to nothing
761 			 * if the process is not running, so occasionally try
762 			 * to trim.
763 			 */
764 			if (tick_and_test_trim_clock()) {
765 				exit_lazy_flush_tlb(mm, true);
766 				return FLUSH_TYPE_NONE;
767 			}
768 		}
769 		return FLUSH_TYPE_LOCAL;
770 	}
771 
772 	/* Coprocessors require TLBIE to invalidate nMMU. */
773 	if (atomic_read(&mm->context.copros) > 0)
774 		return FLUSH_TYPE_GLOBAL;
775 
776 	/*
777 	 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
778 	 * because the mm is being taken down anyway, and a TLBIE tends to
779 	 * be faster than an IPI+TLBIEL.
780 	 */
781 	if (fullmm)
782 		return FLUSH_TYPE_GLOBAL;
783 
784 	/*
785 	 * If we are running the only thread of a single-threaded process,
786 	 * then we should almost always be able to trim off the rest of the
787 	 * CPU mask (except in the case of use_mm() races), so always try
788 	 * trimming the mask.
789 	 */
790 	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
791 		exit_flush_lazy_tlbs(mm);
792 		/*
793 		 * use_mm() race could prevent IPIs from being able to clear
794 		 * the cpumask here, however those users are established
795 		 * after our first check (and so after the PTEs are removed),
796 		 * and the TLB still gets flushed by the IPI, so this CPU
797 		 * will only require a local flush.
798 		 */
799 		return FLUSH_TYPE_LOCAL;
800 	}
801 
802 	/*
803 	 * Occasionally try to trim down the cpumask. It's possible this can
804 	 * bring the mask to zero, which results in no flush.
805 	 */
806 	if (tick_and_test_trim_clock()) {
807 		exit_flush_lazy_tlbs(mm);
808 		if (current->mm == mm)
809 			return FLUSH_TYPE_LOCAL;
810 		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
811 			exit_lazy_flush_tlb(mm, true);
812 		return FLUSH_TYPE_NONE;
813 	}
814 
815 	return FLUSH_TYPE_GLOBAL;
816 }
817 
818 #ifdef CONFIG_SMP
819 void radix__flush_tlb_mm(struct mm_struct *mm)
820 {
821 	unsigned long pid;
822 	enum tlb_flush_type type;
823 
824 	pid = mm->context.id;
825 	if (unlikely(pid == MMU_NO_CONTEXT))
826 		return;
827 
828 	preempt_disable();
829 	/*
830 	 * Order loads of mm_cpumask (in flush_type_needed) vs previous
831 	 * stores to clear ptes before the invalidate. See barrier in
832 	 * switch_mm_irqs_off
833 	 */
834 	smp_mb();
835 	type = flush_type_needed(mm, false);
836 	if (type == FLUSH_TYPE_LOCAL) {
837 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
838 	} else if (type == FLUSH_TYPE_GLOBAL) {
839 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
840 			unsigned long tgt = H_RPTI_TARGET_CMMU;
841 
842 			if (atomic_read(&mm->context.copros) > 0)
843 				tgt |= H_RPTI_TARGET_NMMU;
844 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
845 					       H_RPTI_PAGE_ALL, 0, -1UL);
846 		} else if (cputlb_use_tlbie()) {
847 			if (mm_needs_flush_escalation(mm))
848 				_tlbie_pid(pid, RIC_FLUSH_ALL);
849 			else
850 				_tlbie_pid(pid, RIC_FLUSH_TLB);
851 		} else {
852 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
853 		}
854 	}
855 	preempt_enable();
856 }
857 EXPORT_SYMBOL(radix__flush_tlb_mm);
858 
859 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
860 {
861 	unsigned long pid;
862 	enum tlb_flush_type type;
863 
864 	pid = mm->context.id;
865 	if (unlikely(pid == MMU_NO_CONTEXT))
866 		return;
867 
868 	preempt_disable();
869 	smp_mb(); /* see radix__flush_tlb_mm */
870 	type = flush_type_needed(mm, fullmm);
871 	if (type == FLUSH_TYPE_LOCAL) {
872 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
873 	} else if (type == FLUSH_TYPE_GLOBAL) {
874 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
875 			unsigned long tgt = H_RPTI_TARGET_CMMU;
876 			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
877 					     H_RPTI_TYPE_PRT;
878 
879 			if (atomic_read(&mm->context.copros) > 0)
880 				tgt |= H_RPTI_TARGET_NMMU;
881 			pseries_rpt_invalidate(pid, tgt, type,
882 					       H_RPTI_PAGE_ALL, 0, -1UL);
883 		} else if (cputlb_use_tlbie())
884 			_tlbie_pid(pid, RIC_FLUSH_ALL);
885 		else
886 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
887 	}
888 	preempt_enable();
889 }
890 
891 void radix__flush_all_mm(struct mm_struct *mm)
892 {
893 	__flush_all_mm(mm, false);
894 }
895 EXPORT_SYMBOL(radix__flush_all_mm);
896 
897 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
898 				 int psize)
899 {
900 	unsigned long pid;
901 	enum tlb_flush_type type;
902 
903 	pid = mm->context.id;
904 	if (unlikely(pid == MMU_NO_CONTEXT))
905 		return;
906 
907 	preempt_disable();
908 	smp_mb(); /* see radix__flush_tlb_mm */
909 	type = flush_type_needed(mm, false);
910 	if (type == FLUSH_TYPE_LOCAL) {
911 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
912 	} else if (type == FLUSH_TYPE_GLOBAL) {
913 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
914 			unsigned long tgt, pg_sizes, size;
915 
916 			tgt = H_RPTI_TARGET_CMMU;
917 			pg_sizes = psize_to_rpti_pgsize(psize);
918 			size = 1UL << mmu_psize_to_shift(psize);
919 
920 			if (atomic_read(&mm->context.copros) > 0)
921 				tgt |= H_RPTI_TARGET_NMMU;
922 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
923 					       pg_sizes, vmaddr,
924 					       vmaddr + size);
925 		} else if (cputlb_use_tlbie())
926 			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
927 		else
928 			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
929 	}
930 	preempt_enable();
931 }
932 
933 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
934 {
935 #ifdef CONFIG_HUGETLB_PAGE
936 	if (is_vm_hugetlb_page(vma))
937 		return radix__flush_hugetlb_page(vma, vmaddr);
938 #endif
939 	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
940 }
941 EXPORT_SYMBOL(radix__flush_tlb_page);
942 
943 #endif /* CONFIG_SMP */
944 
945 static void do_tlbiel_kernel(void *info)
946 {
947 	_tlbiel_pid(0, RIC_FLUSH_ALL);
948 }
949 
950 static inline void _tlbiel_kernel_broadcast(void)
951 {
952 	on_each_cpu(do_tlbiel_kernel, NULL, 1);
953 	if (tlbie_capable) {
954 		/*
955 		 * Coherent accelerators don't refcount kernel memory mappings,
956 		 * so have to always issue a tlbie for them. This is quite a
957 		 * slow path anyway.
958 		 */
959 		_tlbie_pid(0, RIC_FLUSH_ALL);
960 	}
961 }
962 
963 /*
964  * If kernel TLBIs ever become local rather than global, then
965  * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
966  * assumes kernel TLBIs are global.
967  */
968 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
969 {
970 	if (!mmu_has_feature(MMU_FTR_GTSE)) {
971 		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
972 		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
973 				     H_RPTI_TYPE_PRT;
974 
975 		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
976 				       start, end);
977 	} else if (cputlb_use_tlbie())
978 		_tlbie_pid(0, RIC_FLUSH_ALL);
979 	else
980 		_tlbiel_kernel_broadcast();
981 }
982 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
983 
984 #define TLB_FLUSH_ALL -1UL
985 
986 /*
987  * Number of pages above which we invalidate the entire PID rather than
988  * flush individual pages, for local and global flushes respectively.
989  *
990  * tlbie goes out to the interconnect and individual ops are more costly.
991  * It also does not iterate over sets like the local tlbiel variant when
992  * invalidating a full PID, so it has a far lower threshold to change from
993  * individual page flushes to full-pid flushes.
994  */
995 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
996 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
997 
998 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
999 					    unsigned long start, unsigned long end)
1000 
1001 {
1002 	unsigned long pid;
1003 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
1004 	unsigned long page_size = 1UL << page_shift;
1005 	unsigned long nr_pages = (end - start) >> page_shift;
1006 	bool fullmm = (end == TLB_FLUSH_ALL);
1007 	bool flush_pid;
1008 	enum tlb_flush_type type;
1009 
1010 	pid = mm->context.id;
1011 	if (unlikely(pid == MMU_NO_CONTEXT))
1012 		return;
1013 
1014 	preempt_disable();
1015 	smp_mb(); /* see radix__flush_tlb_mm */
1016 	type = flush_type_needed(mm, fullmm);
1017 	if (type == FLUSH_TYPE_NONE)
1018 		goto out;
1019 
1020 	if (fullmm)
1021 		flush_pid = true;
1022 	else if (type == FLUSH_TYPE_GLOBAL)
1023 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1024 	else
1025 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1026 
1027 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1028 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1029 		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1030 
1031 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1032 			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1033 		if (atomic_read(&mm->context.copros) > 0)
1034 			tgt |= H_RPTI_TARGET_NMMU;
1035 		pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
1036 				       start, end);
1037 	} else if (flush_pid) {
1038 		if (type == FLUSH_TYPE_LOCAL) {
1039 			_tlbiel_pid(pid, RIC_FLUSH_TLB);
1040 		} else {
1041 			if (cputlb_use_tlbie()) {
1042 				if (mm_needs_flush_escalation(mm))
1043 					_tlbie_pid(pid, RIC_FLUSH_ALL);
1044 				else
1045 					_tlbie_pid(pid, RIC_FLUSH_TLB);
1046 			} else {
1047 				_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
1048 			}
1049 		}
1050 	} else {
1051 		bool hflush = false;
1052 		unsigned long hstart, hend;
1053 
1054 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
1055 			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1056 			hend = end & PMD_MASK;
1057 			if (hstart < hend)
1058 				hflush = true;
1059 		}
1060 
1061 		if (type == FLUSH_TYPE_LOCAL) {
1062 			asm volatile("ptesync": : :"memory");
1063 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1064 			if (hflush)
1065 				__tlbiel_va_range(hstart, hend, pid,
1066 						PMD_SIZE, MMU_PAGE_2M);
1067 			ppc_after_tlbiel_barrier();
1068 		} else if (cputlb_use_tlbie()) {
1069 			asm volatile("ptesync": : :"memory");
1070 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1071 			if (hflush)
1072 				__tlbie_va_range(hstart, hend, pid,
1073 						PMD_SIZE, MMU_PAGE_2M);
1074 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
1075 		} else {
1076 			_tlbiel_va_range_multicast(mm,
1077 					start, end, pid, page_size, mmu_virtual_psize, false);
1078 			if (hflush)
1079 				_tlbiel_va_range_multicast(mm,
1080 					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
1081 		}
1082 	}
1083 out:
1084 	preempt_enable();
1085 }
1086 
1087 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1088 		     unsigned long end)
1089 
1090 {
1091 #ifdef CONFIG_HUGETLB_PAGE
1092 	if (is_vm_hugetlb_page(vma))
1093 		return radix__flush_hugetlb_tlb_range(vma, start, end);
1094 #endif
1095 
1096 	__radix__flush_tlb_range(vma->vm_mm, start, end);
1097 }
1098 EXPORT_SYMBOL(radix__flush_tlb_range);
1099 
1100 static int radix_get_mmu_psize(int page_size)
1101 {
1102 	int psize;
1103 
1104 	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1105 		psize = mmu_virtual_psize;
1106 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1107 		psize = MMU_PAGE_2M;
1108 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1109 		psize = MMU_PAGE_1G;
1110 	else
1111 		return -1;
1112 	return psize;
1113 }
1114 
1115 /*
1116  * Flush partition scoped LPID address translation for all CPUs.
1117  */
1118 void radix__flush_tlb_lpid_page(unsigned int lpid,
1119 					unsigned long addr,
1120 					unsigned long page_size)
1121 {
1122 	int psize = radix_get_mmu_psize(page_size);
1123 
1124 	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1125 }
1126 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1127 
1128 /*
1129  * Flush partition scoped PWC from LPID for all CPUs.
1130  */
1131 void radix__flush_pwc_lpid(unsigned int lpid)
1132 {
1133 	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
1134 }
1135 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1136 
1137 /*
1138  * Flush partition scoped translations from LPID (=LPIDR)
1139  */
1140 void radix__flush_all_lpid(unsigned int lpid)
1141 {
1142 	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
1143 }
1144 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1145 
1146 /*
1147  * Flush process scoped translations from LPID (=LPIDR)
1148  */
1149 void radix__flush_all_lpid_guest(unsigned int lpid)
1150 {
1151 	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1152 }
1153 
1154 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1155 				  unsigned long end, int psize);
1156 
1157 void radix__tlb_flush(struct mmu_gather *tlb)
1158 {
1159 	int psize = 0;
1160 	struct mm_struct *mm = tlb->mm;
1161 	int page_size = tlb->page_size;
1162 	unsigned long start = tlb->start;
1163 	unsigned long end = tlb->end;
1164 
1165 	/*
1166 	 * if page size is not something we understand, do a full mm flush
1167 	 *
1168 	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1169 	 * that flushes the process table entry cache upon process teardown.
1170 	 * See the comment for radix in arch_exit_mmap().
1171 	 */
1172 	if (tlb->fullmm || tlb->need_flush_all) {
1173 		__flush_all_mm(mm, true);
1174 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1175 		if (!tlb->freed_tables)
1176 			radix__flush_tlb_mm(mm);
1177 		else
1178 			radix__flush_all_mm(mm);
1179 	} else {
1180 		if (!tlb->freed_tables)
1181 			radix__flush_tlb_range_psize(mm, start, end, psize);
1182 		else
1183 			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1184 	}
1185 }
1186 
1187 static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1188 				unsigned long start, unsigned long end,
1189 				int psize, bool also_pwc)
1190 {
1191 	unsigned long pid;
1192 	unsigned int page_shift = mmu_psize_defs[psize].shift;
1193 	unsigned long page_size = 1UL << page_shift;
1194 	unsigned long nr_pages = (end - start) >> page_shift;
1195 	bool fullmm = (end == TLB_FLUSH_ALL);
1196 	bool flush_pid;
1197 	enum tlb_flush_type type;
1198 
1199 	pid = mm->context.id;
1200 	if (unlikely(pid == MMU_NO_CONTEXT))
1201 		return;
1202 
1203 	fullmm = (end == TLB_FLUSH_ALL);
1204 
1205 	preempt_disable();
1206 	smp_mb(); /* see radix__flush_tlb_mm */
1207 	type = flush_type_needed(mm, fullmm);
1208 	if (type == FLUSH_TYPE_NONE)
1209 		goto out;
1210 
1211 	if (fullmm)
1212 		flush_pid = true;
1213 	else if (type == FLUSH_TYPE_GLOBAL)
1214 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1215 	else
1216 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1217 
1218 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1219 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1220 		unsigned long type = H_RPTI_TYPE_TLB;
1221 		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1222 
1223 		if (also_pwc)
1224 			type |= H_RPTI_TYPE_PWC;
1225 		if (atomic_read(&mm->context.copros) > 0)
1226 			tgt |= H_RPTI_TARGET_NMMU;
1227 		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1228 	} else if (flush_pid) {
1229 		if (type == FLUSH_TYPE_LOCAL) {
1230 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1231 		} else {
1232 			if (cputlb_use_tlbie()) {
1233 				if (mm_needs_flush_escalation(mm))
1234 					also_pwc = true;
1235 
1236 				_tlbie_pid(pid,
1237 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1238 			} else {
1239 				_tlbiel_pid_multicast(mm, pid,
1240 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1241 			}
1242 
1243 		}
1244 	} else {
1245 		if (type == FLUSH_TYPE_LOCAL)
1246 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1247 		else if (cputlb_use_tlbie())
1248 			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1249 		else
1250 			_tlbiel_va_range_multicast(mm,
1251 					start, end, pid, page_size, psize, also_pwc);
1252 	}
1253 out:
1254 	preempt_enable();
1255 }
1256 
1257 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1258 				  unsigned long end, int psize)
1259 {
1260 	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1261 }
1262 
1263 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1264 				  unsigned long end, int psize)
1265 {
1266 	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
1267 }
1268 
1269 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1270 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1271 {
1272 	unsigned long pid, end;
1273 	enum tlb_flush_type type;
1274 
1275 	pid = mm->context.id;
1276 	if (unlikely(pid == MMU_NO_CONTEXT))
1277 		return;
1278 
1279 	/* 4k page size, just blow the world */
1280 	if (PAGE_SIZE == 0x1000) {
1281 		radix__flush_all_mm(mm);
1282 		return;
1283 	}
1284 
1285 	end = addr + HPAGE_PMD_SIZE;
1286 
1287 	/* Otherwise first do the PWC, then iterate the pages. */
1288 	preempt_disable();
1289 	smp_mb(); /* see radix__flush_tlb_mm */
1290 	type = flush_type_needed(mm, false);
1291 	if (type == FLUSH_TYPE_LOCAL) {
1292 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1293 	} else if (type == FLUSH_TYPE_GLOBAL) {
1294 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
1295 			unsigned long tgt, type, pg_sizes;
1296 
1297 			tgt = H_RPTI_TARGET_CMMU;
1298 			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1299 			       H_RPTI_TYPE_PRT;
1300 			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1301 
1302 			if (atomic_read(&mm->context.copros) > 0)
1303 				tgt |= H_RPTI_TARGET_NMMU;
1304 			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1305 					       addr, end);
1306 		} else if (cputlb_use_tlbie())
1307 			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1308 		else
1309 			_tlbiel_va_range_multicast(mm,
1310 					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1311 	}
1312 
1313 	preempt_enable();
1314 }
1315 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1316 
1317 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1318 				unsigned long start, unsigned long end)
1319 {
1320 	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1321 }
1322 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1323 
1324 void radix__flush_tlb_all(void)
1325 {
1326 	unsigned long rb,prs,r,rs;
1327 	unsigned long ric = RIC_FLUSH_ALL;
1328 
1329 	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1330 	prs = 0; /* partition scoped */
1331 	r = 1;   /* radix format */
1332 	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1333 
1334 	asm volatile("ptesync": : :"memory");
1335 	/*
1336 	 * now flush guest entries by passing PRS = 1 and LPID != 0
1337 	 */
1338 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1339 		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1340 	/*
1341 	 * now flush host entires by passing PRS = 0 and LPID == 0
1342 	 */
1343 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1344 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1345 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
1346 }
1347