1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 #include <asm/plpar_wrappers.h> 20 21 #include "internal.h" 22 23 #define RIC_FLUSH_TLB 0 24 #define RIC_FLUSH_PWC 1 25 #define RIC_FLUSH_ALL 2 26 27 /* 28 * tlbiel instruction for radix, set invalidation 29 * i.e., r=1 and is=01 or is=10 or is=11 30 */ 31 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 32 unsigned int pid, 33 unsigned int ric, unsigned int prs) 34 { 35 unsigned long rb; 36 unsigned long rs; 37 38 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 39 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 40 41 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 42 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 43 : "memory"); 44 } 45 46 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 47 { 48 unsigned int set; 49 50 asm volatile("ptesync": : :"memory"); 51 52 /* 53 * Flush the first set of the TLB, and the entire Page Walk Cache 54 * and partition table entries. Then flush the remaining sets of the 55 * TLB. 56 */ 57 58 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 59 /* MSR[HV] should flush partition scope translations first. */ 60 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 61 62 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 63 for (set = 1; set < num_sets; set++) 64 tlbiel_radix_set_isa300(set, is, 0, 65 RIC_FLUSH_TLB, 0); 66 } 67 } 68 69 /* Flush process scoped entries. */ 70 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 71 72 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 73 for (set = 1; set < num_sets; set++) 74 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 75 } 76 77 ppc_after_tlbiel_barrier(); 78 } 79 80 void radix__tlbiel_all(unsigned int action) 81 { 82 unsigned int is; 83 84 switch (action) { 85 case TLB_INVAL_SCOPE_GLOBAL: 86 is = 3; 87 break; 88 case TLB_INVAL_SCOPE_LPID: 89 is = 2; 90 break; 91 default: 92 BUG(); 93 } 94 95 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 96 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 97 else 98 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 99 100 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 101 } 102 103 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 104 unsigned long ric) 105 { 106 unsigned long rb,rs,prs,r; 107 108 rb = PPC_BIT(53); /* IS = 1 */ 109 rb |= set << PPC_BITLSHIFT(51); 110 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 111 prs = 1; /* process scoped */ 112 r = 1; /* radix format */ 113 114 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 115 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 116 trace_tlbie(0, 1, rb, rs, ric, prs, r); 117 } 118 119 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 120 { 121 unsigned long rb,rs,prs,r; 122 123 rb = PPC_BIT(53); /* IS = 1 */ 124 rs = pid << PPC_BITLSHIFT(31); 125 prs = 1; /* process scoped */ 126 r = 1; /* radix format */ 127 128 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 129 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 130 trace_tlbie(0, 0, rb, rs, ric, prs, r); 131 } 132 133 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 134 { 135 unsigned long rb,rs,prs,r; 136 137 rb = PPC_BIT(52); /* IS = 2 */ 138 rs = lpid; 139 prs = 0; /* partition scoped */ 140 r = 1; /* radix format */ 141 142 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 143 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 144 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 145 } 146 147 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 148 { 149 unsigned long rb,rs,prs,r; 150 151 rb = PPC_BIT(52); /* IS = 2 */ 152 rs = lpid; 153 prs = 1; /* process scoped */ 154 r = 1; /* radix format */ 155 156 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 157 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 158 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 159 } 160 161 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 162 unsigned long ap, unsigned long ric) 163 { 164 unsigned long rb,rs,prs,r; 165 166 rb = va & ~(PPC_BITMASK(52, 63)); 167 rb |= ap << PPC_BITLSHIFT(58); 168 rs = pid << PPC_BITLSHIFT(31); 169 prs = 1; /* process scoped */ 170 r = 1; /* radix format */ 171 172 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 173 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 174 trace_tlbie(0, 1, rb, rs, ric, prs, r); 175 } 176 177 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 178 unsigned long ap, unsigned long ric) 179 { 180 unsigned long rb,rs,prs,r; 181 182 rb = va & ~(PPC_BITMASK(52, 63)); 183 rb |= ap << PPC_BITLSHIFT(58); 184 rs = pid << PPC_BITLSHIFT(31); 185 prs = 1; /* process scoped */ 186 r = 1; /* radix format */ 187 188 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 189 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 190 trace_tlbie(0, 0, rb, rs, ric, prs, r); 191 } 192 193 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 194 unsigned long ap, unsigned long ric) 195 { 196 unsigned long rb,rs,prs,r; 197 198 rb = va & ~(PPC_BITMASK(52, 63)); 199 rb |= ap << PPC_BITLSHIFT(58); 200 rs = lpid; 201 prs = 0; /* partition scoped */ 202 r = 1; /* radix format */ 203 204 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 205 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 206 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 207 } 208 209 210 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 211 unsigned long ap) 212 { 213 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 214 asm volatile("ptesync": : :"memory"); 215 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 216 } 217 218 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 219 asm volatile("ptesync": : :"memory"); 220 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 221 } 222 } 223 224 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 225 unsigned long ap) 226 { 227 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 228 asm volatile("ptesync": : :"memory"); 229 __tlbie_pid(0, RIC_FLUSH_TLB); 230 } 231 232 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 233 asm volatile("ptesync": : :"memory"); 234 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 235 } 236 } 237 238 static inline void fixup_tlbie_pid(unsigned long pid) 239 { 240 /* 241 * We can use any address for the invalidation, pick one which is 242 * probably unused as an optimisation. 243 */ 244 unsigned long va = ((1UL << 52) - 1); 245 246 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 247 asm volatile("ptesync": : :"memory"); 248 __tlbie_pid(0, RIC_FLUSH_TLB); 249 } 250 251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 252 asm volatile("ptesync": : :"memory"); 253 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 254 } 255 } 256 257 258 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 259 unsigned long ap) 260 { 261 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 262 asm volatile("ptesync": : :"memory"); 263 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 264 } 265 266 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 267 asm volatile("ptesync": : :"memory"); 268 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 269 } 270 } 271 272 static inline void fixup_tlbie_lpid(unsigned long lpid) 273 { 274 /* 275 * We can use any address for the invalidation, pick one which is 276 * probably unused as an optimisation. 277 */ 278 unsigned long va = ((1UL << 52) - 1); 279 280 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 281 asm volatile("ptesync": : :"memory"); 282 __tlbie_lpid(0, RIC_FLUSH_TLB); 283 } 284 285 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 286 asm volatile("ptesync": : :"memory"); 287 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 288 } 289 } 290 291 /* 292 * We use 128 set in radix mode and 256 set in hpt mode. 293 */ 294 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 295 { 296 int set; 297 298 asm volatile("ptesync": : :"memory"); 299 300 /* 301 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 302 * also flush the entire Page Walk Cache. 303 */ 304 __tlbiel_pid(pid, 0, ric); 305 306 /* For PWC, only one flush is needed */ 307 if (ric == RIC_FLUSH_PWC) { 308 ppc_after_tlbiel_barrier(); 309 return; 310 } 311 312 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 313 /* For the remaining sets, just flush the TLB */ 314 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 315 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 316 } 317 318 ppc_after_tlbiel_barrier(); 319 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 320 } 321 322 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 323 { 324 asm volatile("ptesync": : :"memory"); 325 326 /* 327 * Workaround the fact that the "ric" argument to __tlbie_pid 328 * must be a compile-time contraint to match the "i" constraint 329 * in the asm statement. 330 */ 331 switch (ric) { 332 case RIC_FLUSH_TLB: 333 __tlbie_pid(pid, RIC_FLUSH_TLB); 334 fixup_tlbie_pid(pid); 335 break; 336 case RIC_FLUSH_PWC: 337 __tlbie_pid(pid, RIC_FLUSH_PWC); 338 break; 339 case RIC_FLUSH_ALL: 340 default: 341 __tlbie_pid(pid, RIC_FLUSH_ALL); 342 fixup_tlbie_pid(pid); 343 } 344 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 345 } 346 347 struct tlbiel_pid { 348 unsigned long pid; 349 unsigned long ric; 350 }; 351 352 static void do_tlbiel_pid(void *info) 353 { 354 struct tlbiel_pid *t = info; 355 356 if (t->ric == RIC_FLUSH_TLB) 357 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 358 else if (t->ric == RIC_FLUSH_PWC) 359 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 360 else 361 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 362 } 363 364 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 365 unsigned long pid, unsigned long ric) 366 { 367 struct cpumask *cpus = mm_cpumask(mm); 368 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 369 370 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 371 /* 372 * Always want the CPU translations to be invalidated with tlbiel in 373 * these paths, so while coprocessors must use tlbie, we can not 374 * optimise away the tlbiel component. 375 */ 376 if (atomic_read(&mm->context.copros) > 0) 377 _tlbie_pid(pid, RIC_FLUSH_ALL); 378 } 379 380 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 381 { 382 asm volatile("ptesync": : :"memory"); 383 384 /* 385 * Workaround the fact that the "ric" argument to __tlbie_pid 386 * must be a compile-time contraint to match the "i" constraint 387 * in the asm statement. 388 */ 389 switch (ric) { 390 case RIC_FLUSH_TLB: 391 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 392 fixup_tlbie_lpid(lpid); 393 break; 394 case RIC_FLUSH_PWC: 395 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 396 break; 397 case RIC_FLUSH_ALL: 398 default: 399 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 400 fixup_tlbie_lpid(lpid); 401 } 402 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 403 } 404 405 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 406 { 407 /* 408 * Workaround the fact that the "ric" argument to __tlbie_pid 409 * must be a compile-time contraint to match the "i" constraint 410 * in the asm statement. 411 */ 412 switch (ric) { 413 case RIC_FLUSH_TLB: 414 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 415 break; 416 case RIC_FLUSH_PWC: 417 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 418 break; 419 case RIC_FLUSH_ALL: 420 default: 421 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 422 } 423 fixup_tlbie_lpid(lpid); 424 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 425 } 426 427 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 428 unsigned long pid, unsigned long page_size, 429 unsigned long psize) 430 { 431 unsigned long addr; 432 unsigned long ap = mmu_get_ap(psize); 433 434 for (addr = start; addr < end; addr += page_size) 435 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 436 } 437 438 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 439 unsigned long psize, unsigned long ric) 440 { 441 unsigned long ap = mmu_get_ap(psize); 442 443 asm volatile("ptesync": : :"memory"); 444 __tlbiel_va(va, pid, ap, ric); 445 ppc_after_tlbiel_barrier(); 446 } 447 448 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 449 unsigned long pid, unsigned long page_size, 450 unsigned long psize, bool also_pwc) 451 { 452 asm volatile("ptesync": : :"memory"); 453 if (also_pwc) 454 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 455 __tlbiel_va_range(start, end, pid, page_size, psize); 456 ppc_after_tlbiel_barrier(); 457 } 458 459 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 460 unsigned long pid, unsigned long page_size, 461 unsigned long psize) 462 { 463 unsigned long addr; 464 unsigned long ap = mmu_get_ap(psize); 465 466 for (addr = start; addr < end; addr += page_size) 467 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 468 469 fixup_tlbie_va_range(addr - page_size, pid, ap); 470 } 471 472 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 473 unsigned long psize, unsigned long ric) 474 { 475 unsigned long ap = mmu_get_ap(psize); 476 477 asm volatile("ptesync": : :"memory"); 478 __tlbie_va(va, pid, ap, ric); 479 fixup_tlbie_va(va, pid, ap); 480 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 481 } 482 483 struct tlbiel_va { 484 unsigned long pid; 485 unsigned long va; 486 unsigned long psize; 487 unsigned long ric; 488 }; 489 490 static void do_tlbiel_va(void *info) 491 { 492 struct tlbiel_va *t = info; 493 494 if (t->ric == RIC_FLUSH_TLB) 495 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 496 else if (t->ric == RIC_FLUSH_PWC) 497 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 498 else 499 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 500 } 501 502 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 503 unsigned long va, unsigned long pid, 504 unsigned long psize, unsigned long ric) 505 { 506 struct cpumask *cpus = mm_cpumask(mm); 507 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 508 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 509 if (atomic_read(&mm->context.copros) > 0) 510 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 511 } 512 513 struct tlbiel_va_range { 514 unsigned long pid; 515 unsigned long start; 516 unsigned long end; 517 unsigned long page_size; 518 unsigned long psize; 519 bool also_pwc; 520 }; 521 522 static void do_tlbiel_va_range(void *info) 523 { 524 struct tlbiel_va_range *t = info; 525 526 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 527 t->psize, t->also_pwc); 528 } 529 530 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 531 unsigned long psize, unsigned long ric) 532 { 533 unsigned long ap = mmu_get_ap(psize); 534 535 asm volatile("ptesync": : :"memory"); 536 __tlbie_lpid_va(va, lpid, ap, ric); 537 fixup_tlbie_lpid_va(va, lpid, ap); 538 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 539 } 540 541 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 542 unsigned long pid, unsigned long page_size, 543 unsigned long psize, bool also_pwc) 544 { 545 asm volatile("ptesync": : :"memory"); 546 if (also_pwc) 547 __tlbie_pid(pid, RIC_FLUSH_PWC); 548 __tlbie_va_range(start, end, pid, page_size, psize); 549 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 550 } 551 552 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 553 unsigned long start, unsigned long end, 554 unsigned long pid, unsigned long page_size, 555 unsigned long psize, bool also_pwc) 556 { 557 struct cpumask *cpus = mm_cpumask(mm); 558 struct tlbiel_va_range t = { .start = start, .end = end, 559 .pid = pid, .page_size = page_size, 560 .psize = psize, .also_pwc = also_pwc }; 561 562 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 563 if (atomic_read(&mm->context.copros) > 0) 564 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 565 } 566 567 /* 568 * Base TLB flushing operations: 569 * 570 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 571 * - flush_tlb_page(vma, vmaddr) flushes one page 572 * - flush_tlb_range(vma, start, end) flushes a range of pages 573 * - flush_tlb_kernel_range(start, end) flushes kernel pages 574 * 575 * - local_* variants of page and mm only apply to the current 576 * processor 577 */ 578 void radix__local_flush_tlb_mm(struct mm_struct *mm) 579 { 580 unsigned long pid; 581 582 preempt_disable(); 583 pid = mm->context.id; 584 if (pid != MMU_NO_CONTEXT) 585 _tlbiel_pid(pid, RIC_FLUSH_TLB); 586 preempt_enable(); 587 } 588 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 589 590 #ifndef CONFIG_SMP 591 void radix__local_flush_all_mm(struct mm_struct *mm) 592 { 593 unsigned long pid; 594 595 preempt_disable(); 596 pid = mm->context.id; 597 if (pid != MMU_NO_CONTEXT) 598 _tlbiel_pid(pid, RIC_FLUSH_ALL); 599 preempt_enable(); 600 } 601 EXPORT_SYMBOL(radix__local_flush_all_mm); 602 603 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 604 { 605 radix__local_flush_all_mm(mm); 606 } 607 #endif /* CONFIG_SMP */ 608 609 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 610 int psize) 611 { 612 unsigned long pid; 613 614 preempt_disable(); 615 pid = mm->context.id; 616 if (pid != MMU_NO_CONTEXT) 617 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 618 preempt_enable(); 619 } 620 621 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 622 { 623 #ifdef CONFIG_HUGETLB_PAGE 624 /* need the return fix for nohash.c */ 625 if (is_vm_hugetlb_page(vma)) 626 return radix__local_flush_hugetlb_page(vma, vmaddr); 627 #endif 628 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 629 } 630 EXPORT_SYMBOL(radix__local_flush_tlb_page); 631 632 static bool mm_needs_flush_escalation(struct mm_struct *mm) 633 { 634 /* 635 * P9 nest MMU has issues with the page walk cache 636 * caching PTEs and not flushing them properly when 637 * RIC = 0 for a PID/LPID invalidate 638 */ 639 if (atomic_read(&mm->context.copros) > 0) 640 return true; 641 return false; 642 } 643 644 /* 645 * If always_flush is true, then flush even if this CPU can't be removed 646 * from mm_cpumask. 647 */ 648 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 649 { 650 unsigned long pid = mm->context.id; 651 int cpu = smp_processor_id(); 652 653 /* 654 * A kthread could have done a mmget_not_zero() after the flushing CPU 655 * checked mm_cpumask, and be in the process of kthread_use_mm when 656 * interrupted here. In that case, current->mm will be set to mm, 657 * because kthread_use_mm() setting ->mm and switching to the mm is 658 * done with interrupts off. 659 */ 660 if (current->mm == mm) 661 goto out; 662 663 if (current->active_mm == mm) { 664 WARN_ON_ONCE(current->mm != NULL); 665 /* Is a kernel thread and is using mm as the lazy tlb */ 666 mmgrab(&init_mm); 667 current->active_mm = &init_mm; 668 switch_mm_irqs_off(mm, &init_mm, current); 669 mmdrop(mm); 670 } 671 672 /* 673 * This IPI may be initiated from any source including those not 674 * running the mm, so there may be a racing IPI that comes after 675 * this one which finds the cpumask already clear. Check and avoid 676 * underflowing the active_cpus count in that case. The race should 677 * not otherwise be a problem, but the TLB must be flushed because 678 * that's what the caller expects. 679 */ 680 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 681 atomic_dec(&mm->context.active_cpus); 682 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 683 always_flush = true; 684 } 685 686 out: 687 if (always_flush) 688 _tlbiel_pid(pid, RIC_FLUSH_ALL); 689 } 690 691 #ifdef CONFIG_SMP 692 static void do_exit_flush_lazy_tlb(void *arg) 693 { 694 struct mm_struct *mm = arg; 695 exit_lazy_flush_tlb(mm, true); 696 } 697 698 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 699 { 700 /* 701 * Would be nice if this was async so it could be run in 702 * parallel with our local flush, but generic code does not 703 * give a good API for it. Could extend the generic code or 704 * make a special powerpc IPI for flushing TLBs. 705 * For now it's not too performance critical. 706 */ 707 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 708 (void *)mm, 1); 709 } 710 711 #else /* CONFIG_SMP */ 712 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 713 #endif /* CONFIG_SMP */ 714 715 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 716 717 /* 718 * Interval between flushes at which we send out IPIs to check whether the 719 * mm_cpumask can be trimmed for the case where it's not a single-threaded 720 * process flushing its own mm. The intent is to reduce the cost of later 721 * flushes. Don't want this to be so low that it adds noticable cost to TLB 722 * flushing, or so high that it doesn't help reduce global TLBIEs. 723 */ 724 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 725 726 static bool tick_and_test_trim_clock(void) 727 { 728 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 729 tlb_mm_cpumask_trim_timer) { 730 __this_cpu_write(mm_cpumask_trim_clock, 0); 731 return true; 732 } 733 return false; 734 } 735 736 enum tlb_flush_type { 737 FLUSH_TYPE_NONE, 738 FLUSH_TYPE_LOCAL, 739 FLUSH_TYPE_GLOBAL, 740 }; 741 742 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 743 { 744 int active_cpus = atomic_read(&mm->context.active_cpus); 745 int cpu = smp_processor_id(); 746 747 if (active_cpus == 0) 748 return FLUSH_TYPE_NONE; 749 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 750 if (current->mm != mm) { 751 /* 752 * Asynchronous flush sources may trim down to nothing 753 * if the process is not running, so occasionally try 754 * to trim. 755 */ 756 if (tick_and_test_trim_clock()) { 757 exit_lazy_flush_tlb(mm, true); 758 return FLUSH_TYPE_NONE; 759 } 760 } 761 return FLUSH_TYPE_LOCAL; 762 } 763 764 /* Coprocessors require TLBIE to invalidate nMMU. */ 765 if (atomic_read(&mm->context.copros) > 0) 766 return FLUSH_TYPE_GLOBAL; 767 768 /* 769 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 770 * because the mm is being taken down anyway, and a TLBIE tends to 771 * be faster than an IPI+TLBIEL. 772 */ 773 if (fullmm) 774 return FLUSH_TYPE_GLOBAL; 775 776 /* 777 * If we are running the only thread of a single-threaded process, 778 * then we should almost always be able to trim off the rest of the 779 * CPU mask (except in the case of use_mm() races), so always try 780 * trimming the mask. 781 */ 782 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 783 exit_flush_lazy_tlbs(mm); 784 /* 785 * use_mm() race could prevent IPIs from being able to clear 786 * the cpumask here, however those users are established 787 * after our first check (and so after the PTEs are removed), 788 * and the TLB still gets flushed by the IPI, so this CPU 789 * will only require a local flush. 790 */ 791 return FLUSH_TYPE_LOCAL; 792 } 793 794 /* 795 * Occasionally try to trim down the cpumask. It's possible this can 796 * bring the mask to zero, which results in no flush. 797 */ 798 if (tick_and_test_trim_clock()) { 799 exit_flush_lazy_tlbs(mm); 800 if (current->mm == mm) 801 return FLUSH_TYPE_LOCAL; 802 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 803 exit_lazy_flush_tlb(mm, true); 804 return FLUSH_TYPE_NONE; 805 } 806 807 return FLUSH_TYPE_GLOBAL; 808 } 809 810 #ifdef CONFIG_SMP 811 void radix__flush_tlb_mm(struct mm_struct *mm) 812 { 813 unsigned long pid; 814 enum tlb_flush_type type; 815 816 pid = mm->context.id; 817 if (unlikely(pid == MMU_NO_CONTEXT)) 818 return; 819 820 preempt_disable(); 821 /* 822 * Order loads of mm_cpumask (in flush_type_needed) vs previous 823 * stores to clear ptes before the invalidate. See barrier in 824 * switch_mm_irqs_off 825 */ 826 smp_mb(); 827 type = flush_type_needed(mm, false); 828 if (type == FLUSH_TYPE_LOCAL) { 829 _tlbiel_pid(pid, RIC_FLUSH_TLB); 830 } else if (type == FLUSH_TYPE_GLOBAL) { 831 if (!mmu_has_feature(MMU_FTR_GTSE)) { 832 unsigned long tgt = H_RPTI_TARGET_CMMU; 833 834 if (atomic_read(&mm->context.copros) > 0) 835 tgt |= H_RPTI_TARGET_NMMU; 836 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 837 H_RPTI_PAGE_ALL, 0, -1UL); 838 } else if (cputlb_use_tlbie()) { 839 if (mm_needs_flush_escalation(mm)) 840 _tlbie_pid(pid, RIC_FLUSH_ALL); 841 else 842 _tlbie_pid(pid, RIC_FLUSH_TLB); 843 } else { 844 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 845 } 846 } 847 preempt_enable(); 848 } 849 EXPORT_SYMBOL(radix__flush_tlb_mm); 850 851 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 852 { 853 unsigned long pid; 854 enum tlb_flush_type type; 855 856 pid = mm->context.id; 857 if (unlikely(pid == MMU_NO_CONTEXT)) 858 return; 859 860 preempt_disable(); 861 smp_mb(); /* see radix__flush_tlb_mm */ 862 type = flush_type_needed(mm, fullmm); 863 if (type == FLUSH_TYPE_LOCAL) { 864 _tlbiel_pid(pid, RIC_FLUSH_ALL); 865 } else if (type == FLUSH_TYPE_GLOBAL) { 866 if (!mmu_has_feature(MMU_FTR_GTSE)) { 867 unsigned long tgt = H_RPTI_TARGET_CMMU; 868 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 869 H_RPTI_TYPE_PRT; 870 871 if (atomic_read(&mm->context.copros) > 0) 872 tgt |= H_RPTI_TARGET_NMMU; 873 pseries_rpt_invalidate(pid, tgt, type, 874 H_RPTI_PAGE_ALL, 0, -1UL); 875 } else if (cputlb_use_tlbie()) 876 _tlbie_pid(pid, RIC_FLUSH_ALL); 877 else 878 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 879 } 880 preempt_enable(); 881 } 882 883 void radix__flush_all_mm(struct mm_struct *mm) 884 { 885 __flush_all_mm(mm, false); 886 } 887 EXPORT_SYMBOL(radix__flush_all_mm); 888 889 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 890 int psize) 891 { 892 unsigned long pid; 893 enum tlb_flush_type type; 894 895 pid = mm->context.id; 896 if (unlikely(pid == MMU_NO_CONTEXT)) 897 return; 898 899 preempt_disable(); 900 smp_mb(); /* see radix__flush_tlb_mm */ 901 type = flush_type_needed(mm, false); 902 if (type == FLUSH_TYPE_LOCAL) { 903 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 904 } else if (type == FLUSH_TYPE_GLOBAL) { 905 if (!mmu_has_feature(MMU_FTR_GTSE)) { 906 unsigned long tgt, pg_sizes, size; 907 908 tgt = H_RPTI_TARGET_CMMU; 909 pg_sizes = psize_to_rpti_pgsize(psize); 910 size = 1UL << mmu_psize_to_shift(psize); 911 912 if (atomic_read(&mm->context.copros) > 0) 913 tgt |= H_RPTI_TARGET_NMMU; 914 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 915 pg_sizes, vmaddr, 916 vmaddr + size); 917 } else if (cputlb_use_tlbie()) 918 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 919 else 920 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 921 } 922 preempt_enable(); 923 } 924 925 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 926 { 927 #ifdef CONFIG_HUGETLB_PAGE 928 if (is_vm_hugetlb_page(vma)) 929 return radix__flush_hugetlb_page(vma, vmaddr); 930 #endif 931 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 932 } 933 EXPORT_SYMBOL(radix__flush_tlb_page); 934 935 #endif /* CONFIG_SMP */ 936 937 static void do_tlbiel_kernel(void *info) 938 { 939 _tlbiel_pid(0, RIC_FLUSH_ALL); 940 } 941 942 static inline void _tlbiel_kernel_broadcast(void) 943 { 944 on_each_cpu(do_tlbiel_kernel, NULL, 1); 945 if (tlbie_capable) { 946 /* 947 * Coherent accelerators don't refcount kernel memory mappings, 948 * so have to always issue a tlbie for them. This is quite a 949 * slow path anyway. 950 */ 951 _tlbie_pid(0, RIC_FLUSH_ALL); 952 } 953 } 954 955 /* 956 * If kernel TLBIs ever become local rather than global, then 957 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 958 * assumes kernel TLBIs are global. 959 */ 960 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 961 { 962 if (!mmu_has_feature(MMU_FTR_GTSE)) { 963 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 964 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 965 H_RPTI_TYPE_PRT; 966 967 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 968 start, end); 969 } else if (cputlb_use_tlbie()) 970 _tlbie_pid(0, RIC_FLUSH_ALL); 971 else 972 _tlbiel_kernel_broadcast(); 973 } 974 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 975 976 #define TLB_FLUSH_ALL -1UL 977 978 /* 979 * Number of pages above which we invalidate the entire PID rather than 980 * flush individual pages, for local and global flushes respectively. 981 * 982 * tlbie goes out to the interconnect and individual ops are more costly. 983 * It also does not iterate over sets like the local tlbiel variant when 984 * invalidating a full PID, so it has a far lower threshold to change from 985 * individual page flushes to full-pid flushes. 986 */ 987 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 988 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 989 990 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 991 unsigned long start, unsigned long end) 992 993 { 994 unsigned long pid; 995 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 996 unsigned long page_size = 1UL << page_shift; 997 unsigned long nr_pages = (end - start) >> page_shift; 998 bool fullmm = (end == TLB_FLUSH_ALL); 999 bool flush_pid; 1000 enum tlb_flush_type type; 1001 1002 pid = mm->context.id; 1003 if (unlikely(pid == MMU_NO_CONTEXT)) 1004 return; 1005 1006 preempt_disable(); 1007 smp_mb(); /* see radix__flush_tlb_mm */ 1008 type = flush_type_needed(mm, fullmm); 1009 if (type == FLUSH_TYPE_NONE) 1010 goto out; 1011 1012 if (fullmm) 1013 flush_pid = true; 1014 else if (type == FLUSH_TYPE_GLOBAL) 1015 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1016 else 1017 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1018 1019 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1020 unsigned long tgt = H_RPTI_TARGET_CMMU; 1021 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1022 1023 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1024 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1025 if (atomic_read(&mm->context.copros) > 0) 1026 tgt |= H_RPTI_TARGET_NMMU; 1027 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, 1028 start, end); 1029 } else if (flush_pid) { 1030 if (type == FLUSH_TYPE_LOCAL) { 1031 _tlbiel_pid(pid, RIC_FLUSH_TLB); 1032 } else { 1033 if (cputlb_use_tlbie()) { 1034 if (mm_needs_flush_escalation(mm)) 1035 _tlbie_pid(pid, RIC_FLUSH_ALL); 1036 else 1037 _tlbie_pid(pid, RIC_FLUSH_TLB); 1038 } else { 1039 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 1040 } 1041 } 1042 } else { 1043 bool hflush = false; 1044 unsigned long hstart, hend; 1045 1046 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 1047 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1048 hend = end & PMD_MASK; 1049 if (hstart < hend) 1050 hflush = true; 1051 } 1052 1053 if (type == FLUSH_TYPE_LOCAL) { 1054 asm volatile("ptesync": : :"memory"); 1055 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1056 if (hflush) 1057 __tlbiel_va_range(hstart, hend, pid, 1058 PMD_SIZE, MMU_PAGE_2M); 1059 ppc_after_tlbiel_barrier(); 1060 } else if (cputlb_use_tlbie()) { 1061 asm volatile("ptesync": : :"memory"); 1062 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1063 if (hflush) 1064 __tlbie_va_range(hstart, hend, pid, 1065 PMD_SIZE, MMU_PAGE_2M); 1066 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1067 } else { 1068 _tlbiel_va_range_multicast(mm, 1069 start, end, pid, page_size, mmu_virtual_psize, false); 1070 if (hflush) 1071 _tlbiel_va_range_multicast(mm, 1072 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); 1073 } 1074 } 1075 out: 1076 preempt_enable(); 1077 } 1078 1079 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1080 unsigned long end) 1081 1082 { 1083 #ifdef CONFIG_HUGETLB_PAGE 1084 if (is_vm_hugetlb_page(vma)) 1085 return radix__flush_hugetlb_tlb_range(vma, start, end); 1086 #endif 1087 1088 __radix__flush_tlb_range(vma->vm_mm, start, end); 1089 } 1090 EXPORT_SYMBOL(radix__flush_tlb_range); 1091 1092 static int radix_get_mmu_psize(int page_size) 1093 { 1094 int psize; 1095 1096 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1097 psize = mmu_virtual_psize; 1098 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1099 psize = MMU_PAGE_2M; 1100 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1101 psize = MMU_PAGE_1G; 1102 else 1103 return -1; 1104 return psize; 1105 } 1106 1107 /* 1108 * Flush partition scoped LPID address translation for all CPUs. 1109 */ 1110 void radix__flush_tlb_lpid_page(unsigned int lpid, 1111 unsigned long addr, 1112 unsigned long page_size) 1113 { 1114 int psize = radix_get_mmu_psize(page_size); 1115 1116 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1117 } 1118 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1119 1120 /* 1121 * Flush partition scoped PWC from LPID for all CPUs. 1122 */ 1123 void radix__flush_pwc_lpid(unsigned int lpid) 1124 { 1125 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1126 } 1127 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1128 1129 /* 1130 * Flush partition scoped translations from LPID (=LPIDR) 1131 */ 1132 void radix__flush_all_lpid(unsigned int lpid) 1133 { 1134 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1135 } 1136 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1137 1138 /* 1139 * Flush process scoped translations from LPID (=LPIDR) 1140 */ 1141 void radix__flush_all_lpid_guest(unsigned int lpid) 1142 { 1143 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1144 } 1145 1146 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1147 unsigned long end, int psize); 1148 1149 void radix__tlb_flush(struct mmu_gather *tlb) 1150 { 1151 int psize = 0; 1152 struct mm_struct *mm = tlb->mm; 1153 int page_size = tlb->page_size; 1154 unsigned long start = tlb->start; 1155 unsigned long end = tlb->end; 1156 1157 /* 1158 * if page size is not something we understand, do a full mm flush 1159 * 1160 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1161 * that flushes the process table entry cache upon process teardown. 1162 * See the comment for radix in arch_exit_mmap(). 1163 */ 1164 if (tlb->fullmm || tlb->need_flush_all) { 1165 __flush_all_mm(mm, true); 1166 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1167 if (!tlb->freed_tables) 1168 radix__flush_tlb_mm(mm); 1169 else 1170 radix__flush_all_mm(mm); 1171 } else { 1172 if (!tlb->freed_tables) 1173 radix__flush_tlb_range_psize(mm, start, end, psize); 1174 else 1175 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1176 } 1177 } 1178 1179 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1180 unsigned long start, unsigned long end, 1181 int psize, bool also_pwc) 1182 { 1183 unsigned long pid; 1184 unsigned int page_shift = mmu_psize_defs[psize].shift; 1185 unsigned long page_size = 1UL << page_shift; 1186 unsigned long nr_pages = (end - start) >> page_shift; 1187 bool fullmm = (end == TLB_FLUSH_ALL); 1188 bool flush_pid; 1189 enum tlb_flush_type type; 1190 1191 pid = mm->context.id; 1192 if (unlikely(pid == MMU_NO_CONTEXT)) 1193 return; 1194 1195 fullmm = (end == TLB_FLUSH_ALL); 1196 1197 preempt_disable(); 1198 smp_mb(); /* see radix__flush_tlb_mm */ 1199 type = flush_type_needed(mm, fullmm); 1200 if (type == FLUSH_TYPE_NONE) 1201 goto out; 1202 1203 if (fullmm) 1204 flush_pid = true; 1205 else if (type == FLUSH_TYPE_GLOBAL) 1206 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1207 else 1208 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1209 1210 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1211 unsigned long tgt = H_RPTI_TARGET_CMMU; 1212 unsigned long type = H_RPTI_TYPE_TLB; 1213 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1214 1215 if (also_pwc) 1216 type |= H_RPTI_TYPE_PWC; 1217 if (atomic_read(&mm->context.copros) > 0) 1218 tgt |= H_RPTI_TARGET_NMMU; 1219 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1220 } else if (flush_pid) { 1221 if (type == FLUSH_TYPE_LOCAL) { 1222 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1223 } else { 1224 if (cputlb_use_tlbie()) { 1225 if (mm_needs_flush_escalation(mm)) 1226 also_pwc = true; 1227 1228 _tlbie_pid(pid, 1229 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1230 } else { 1231 _tlbiel_pid_multicast(mm, pid, 1232 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1233 } 1234 1235 } 1236 } else { 1237 if (type == FLUSH_TYPE_LOCAL) 1238 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1239 else if (cputlb_use_tlbie()) 1240 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1241 else 1242 _tlbiel_va_range_multicast(mm, 1243 start, end, pid, page_size, psize, also_pwc); 1244 } 1245 out: 1246 preempt_enable(); 1247 } 1248 1249 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1250 unsigned long end, int psize) 1251 { 1252 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1253 } 1254 1255 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1256 unsigned long end, int psize) 1257 { 1258 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1259 } 1260 1261 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1262 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1263 { 1264 unsigned long pid, end; 1265 enum tlb_flush_type type; 1266 1267 pid = mm->context.id; 1268 if (unlikely(pid == MMU_NO_CONTEXT)) 1269 return; 1270 1271 /* 4k page size, just blow the world */ 1272 if (PAGE_SIZE == 0x1000) { 1273 radix__flush_all_mm(mm); 1274 return; 1275 } 1276 1277 end = addr + HPAGE_PMD_SIZE; 1278 1279 /* Otherwise first do the PWC, then iterate the pages. */ 1280 preempt_disable(); 1281 smp_mb(); /* see radix__flush_tlb_mm */ 1282 type = flush_type_needed(mm, false); 1283 if (type == FLUSH_TYPE_LOCAL) { 1284 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1285 } else if (type == FLUSH_TYPE_GLOBAL) { 1286 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1287 unsigned long tgt, type, pg_sizes; 1288 1289 tgt = H_RPTI_TARGET_CMMU; 1290 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1291 H_RPTI_TYPE_PRT; 1292 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1293 1294 if (atomic_read(&mm->context.copros) > 0) 1295 tgt |= H_RPTI_TARGET_NMMU; 1296 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1297 addr, end); 1298 } else if (cputlb_use_tlbie()) 1299 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1300 else 1301 _tlbiel_va_range_multicast(mm, 1302 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1303 } 1304 1305 preempt_enable(); 1306 } 1307 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1308 1309 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1310 unsigned long start, unsigned long end) 1311 { 1312 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1313 } 1314 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1315 1316 void radix__flush_tlb_all(void) 1317 { 1318 unsigned long rb,prs,r,rs; 1319 unsigned long ric = RIC_FLUSH_ALL; 1320 1321 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1322 prs = 0; /* partition scoped */ 1323 r = 1; /* radix format */ 1324 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1325 1326 asm volatile("ptesync": : :"memory"); 1327 /* 1328 * now flush guest entries by passing PRS = 1 and LPID != 0 1329 */ 1330 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1331 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1332 /* 1333 * now flush host entires by passing PRS = 0 and LPID == 0 1334 */ 1335 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1336 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1337 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1338 } 1339 1340 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1341 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1342 { 1343 unsigned long pid = mm->context.id; 1344 1345 if (unlikely(pid == MMU_NO_CONTEXT)) 1346 return; 1347 1348 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) 1349 return; 1350 1351 /* 1352 * If this context hasn't run on that CPU before and KVM is 1353 * around, there's a slim chance that the guest on another 1354 * CPU just brought in obsolete translation into the TLB of 1355 * this CPU due to a bad prefetch using the guest PID on 1356 * the way into the hypervisor. 1357 * 1358 * We work around this here. If KVM is possible, we check if 1359 * any sibling thread is in KVM. If it is, the window may exist 1360 * and thus we flush that PID from the core. 1361 * 1362 * A potential future improvement would be to mark which PIDs 1363 * have never been used on the system and avoid it if the PID 1364 * is new and the process has no other cpumask bit set. 1365 */ 1366 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1367 int cpu = smp_processor_id(); 1368 int sib = cpu_first_thread_sibling(cpu); 1369 bool flush = false; 1370 1371 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1372 if (sib == cpu) 1373 continue; 1374 if (!cpu_possible(sib)) 1375 continue; 1376 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1377 flush = true; 1378 } 1379 if (flush) 1380 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1381 } 1382 } 1383 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1384 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1385