1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 for (set = 1; set < num_sets; set++) 59 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 60 } 61 62 /* Flush process scoped entries. */ 63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 64 for (set = 1; set < num_sets; set++) 65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 66 67 asm volatile("ptesync": : :"memory"); 68 } 69 70 void radix__tlbiel_all(unsigned int action) 71 { 72 unsigned int is; 73 74 switch (action) { 75 case TLB_INVAL_SCOPE_GLOBAL: 76 is = 3; 77 break; 78 case TLB_INVAL_SCOPE_LPID: 79 is = 2; 80 break; 81 default: 82 BUG(); 83 } 84 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 87 else 88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 89 90 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 91 } 92 93 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 94 unsigned long ric) 95 { 96 unsigned long rb,rs,prs,r; 97 98 rb = PPC_BIT(53); /* IS = 1 */ 99 rb |= set << PPC_BITLSHIFT(51); 100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 101 prs = 1; /* process scoped */ 102 r = 1; /* radix format */ 103 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 107 } 108 109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 110 { 111 unsigned long rb,rs,prs,r; 112 113 rb = PPC_BIT(53); /* IS = 1 */ 114 rs = pid << PPC_BITLSHIFT(31); 115 prs = 1; /* process scoped */ 116 r = 1; /* radix format */ 117 118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 121 } 122 123 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 124 { 125 unsigned long rb,rs,prs,r; 126 127 rb = PPC_BIT(52); /* IS = 2 */ 128 rs = lpid; 129 prs = 0; /* partition scoped */ 130 r = 1; /* radix format */ 131 132 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 133 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 134 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 135 } 136 137 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 138 { 139 unsigned long rb,rs,prs,r; 140 141 rb = PPC_BIT(52); /* IS = 2 */ 142 rs = lpid; 143 prs = 1; /* process scoped */ 144 r = 1; /* radix format */ 145 146 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 147 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 148 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 149 } 150 151 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 152 unsigned long ap, unsigned long ric) 153 { 154 unsigned long rb,rs,prs,r; 155 156 rb = va & ~(PPC_BITMASK(52, 63)); 157 rb |= ap << PPC_BITLSHIFT(58); 158 rs = pid << PPC_BITLSHIFT(31); 159 prs = 1; /* process scoped */ 160 r = 1; /* radix format */ 161 162 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 163 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 164 trace_tlbie(0, 1, rb, rs, ric, prs, r); 165 } 166 167 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 168 unsigned long ap, unsigned long ric) 169 { 170 unsigned long rb,rs,prs,r; 171 172 rb = va & ~(PPC_BITMASK(52, 63)); 173 rb |= ap << PPC_BITLSHIFT(58); 174 rs = pid << PPC_BITLSHIFT(31); 175 prs = 1; /* process scoped */ 176 r = 1; /* radix format */ 177 178 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 179 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 180 trace_tlbie(0, 0, rb, rs, ric, prs, r); 181 } 182 183 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 184 unsigned long ap, unsigned long ric) 185 { 186 unsigned long rb,rs,prs,r; 187 188 rb = va & ~(PPC_BITMASK(52, 63)); 189 rb |= ap << PPC_BITLSHIFT(58); 190 rs = lpid; 191 prs = 0; /* partition scoped */ 192 r = 1; /* radix format */ 193 194 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 195 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 197 } 198 199 static inline void fixup_tlbie(void) 200 { 201 unsigned long pid = 0; 202 unsigned long va = ((1UL << 52) - 1); 203 204 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 205 asm volatile("ptesync": : :"memory"); 206 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 207 } 208 } 209 210 static inline void fixup_tlbie_lpid(unsigned long lpid) 211 { 212 unsigned long va = ((1UL << 52) - 1); 213 214 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 215 asm volatile("ptesync": : :"memory"); 216 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 217 } 218 } 219 220 /* 221 * We use 128 set in radix mode and 256 set in hpt mode. 222 */ 223 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 224 { 225 int set; 226 227 asm volatile("ptesync": : :"memory"); 228 229 /* 230 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 231 * also flush the entire Page Walk Cache. 232 */ 233 __tlbiel_pid(pid, 0, ric); 234 235 /* For PWC, only one flush is needed */ 236 if (ric == RIC_FLUSH_PWC) { 237 asm volatile("ptesync": : :"memory"); 238 return; 239 } 240 241 /* For the remaining sets, just flush the TLB */ 242 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 243 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 244 245 asm volatile("ptesync": : :"memory"); 246 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 247 } 248 249 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 250 { 251 asm volatile("ptesync": : :"memory"); 252 253 /* 254 * Workaround the fact that the "ric" argument to __tlbie_pid 255 * must be a compile-time contraint to match the "i" constraint 256 * in the asm statement. 257 */ 258 switch (ric) { 259 case RIC_FLUSH_TLB: 260 __tlbie_pid(pid, RIC_FLUSH_TLB); 261 break; 262 case RIC_FLUSH_PWC: 263 __tlbie_pid(pid, RIC_FLUSH_PWC); 264 break; 265 case RIC_FLUSH_ALL: 266 default: 267 __tlbie_pid(pid, RIC_FLUSH_ALL); 268 } 269 fixup_tlbie(); 270 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 271 } 272 273 struct tlbiel_pid { 274 unsigned long pid; 275 unsigned long ric; 276 }; 277 278 static void do_tlbiel_pid(void *info) 279 { 280 struct tlbiel_pid *t = info; 281 282 if (t->ric == RIC_FLUSH_TLB) 283 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 284 else if (t->ric == RIC_FLUSH_PWC) 285 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 286 else 287 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 288 } 289 290 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 291 unsigned long pid, unsigned long ric) 292 { 293 struct cpumask *cpus = mm_cpumask(mm); 294 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 295 296 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 297 /* 298 * Always want the CPU translations to be invalidated with tlbiel in 299 * these paths, so while coprocessors must use tlbie, we can not 300 * optimise away the tlbiel component. 301 */ 302 if (atomic_read(&mm->context.copros) > 0) 303 _tlbie_pid(pid, RIC_FLUSH_ALL); 304 } 305 306 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 307 { 308 asm volatile("ptesync": : :"memory"); 309 310 /* 311 * Workaround the fact that the "ric" argument to __tlbie_pid 312 * must be a compile-time contraint to match the "i" constraint 313 * in the asm statement. 314 */ 315 switch (ric) { 316 case RIC_FLUSH_TLB: 317 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 318 break; 319 case RIC_FLUSH_PWC: 320 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 321 break; 322 case RIC_FLUSH_ALL: 323 default: 324 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 325 } 326 fixup_tlbie_lpid(lpid); 327 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 328 } 329 330 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 331 { 332 /* 333 * Workaround the fact that the "ric" argument to __tlbie_pid 334 * must be a compile-time contraint to match the "i" constraint 335 * in the asm statement. 336 */ 337 switch (ric) { 338 case RIC_FLUSH_TLB: 339 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 340 break; 341 case RIC_FLUSH_PWC: 342 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 343 break; 344 case RIC_FLUSH_ALL: 345 default: 346 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 347 } 348 fixup_tlbie_lpid(lpid); 349 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 350 } 351 352 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 353 unsigned long pid, unsigned long page_size, 354 unsigned long psize) 355 { 356 unsigned long addr; 357 unsigned long ap = mmu_get_ap(psize); 358 359 for (addr = start; addr < end; addr += page_size) 360 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 361 } 362 363 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 364 unsigned long psize, unsigned long ric) 365 { 366 unsigned long ap = mmu_get_ap(psize); 367 368 asm volatile("ptesync": : :"memory"); 369 __tlbiel_va(va, pid, ap, ric); 370 asm volatile("ptesync": : :"memory"); 371 } 372 373 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 374 unsigned long pid, unsigned long page_size, 375 unsigned long psize, bool also_pwc) 376 { 377 asm volatile("ptesync": : :"memory"); 378 if (also_pwc) 379 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 380 __tlbiel_va_range(start, end, pid, page_size, psize); 381 asm volatile("ptesync": : :"memory"); 382 } 383 384 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 385 unsigned long pid, unsigned long page_size, 386 unsigned long psize) 387 { 388 unsigned long addr; 389 unsigned long ap = mmu_get_ap(psize); 390 391 for (addr = start; addr < end; addr += page_size) 392 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 393 } 394 395 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 396 unsigned long psize, unsigned long ric) 397 { 398 unsigned long ap = mmu_get_ap(psize); 399 400 asm volatile("ptesync": : :"memory"); 401 __tlbie_va(va, pid, ap, ric); 402 fixup_tlbie(); 403 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 404 } 405 406 struct tlbiel_va { 407 unsigned long pid; 408 unsigned long va; 409 unsigned long psize; 410 unsigned long ric; 411 }; 412 413 static void do_tlbiel_va(void *info) 414 { 415 struct tlbiel_va *t = info; 416 417 if (t->ric == RIC_FLUSH_TLB) 418 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 419 else if (t->ric == RIC_FLUSH_PWC) 420 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 421 else 422 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 423 } 424 425 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 426 unsigned long va, unsigned long pid, 427 unsigned long psize, unsigned long ric) 428 { 429 struct cpumask *cpus = mm_cpumask(mm); 430 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 431 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 432 if (atomic_read(&mm->context.copros) > 0) 433 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 434 } 435 436 struct tlbiel_va_range { 437 unsigned long pid; 438 unsigned long start; 439 unsigned long end; 440 unsigned long page_size; 441 unsigned long psize; 442 bool also_pwc; 443 }; 444 445 static void do_tlbiel_va_range(void *info) 446 { 447 struct tlbiel_va_range *t = info; 448 449 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 450 t->psize, t->also_pwc); 451 } 452 453 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 454 unsigned long psize, unsigned long ric) 455 { 456 unsigned long ap = mmu_get_ap(psize); 457 458 asm volatile("ptesync": : :"memory"); 459 __tlbie_lpid_va(va, lpid, ap, ric); 460 fixup_tlbie_lpid(lpid); 461 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 462 } 463 464 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 465 unsigned long pid, unsigned long page_size, 466 unsigned long psize, bool also_pwc) 467 { 468 asm volatile("ptesync": : :"memory"); 469 if (also_pwc) 470 __tlbie_pid(pid, RIC_FLUSH_PWC); 471 __tlbie_va_range(start, end, pid, page_size, psize); 472 fixup_tlbie(); 473 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 474 } 475 476 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 477 unsigned long start, unsigned long end, 478 unsigned long pid, unsigned long page_size, 479 unsigned long psize, bool also_pwc) 480 { 481 struct cpumask *cpus = mm_cpumask(mm); 482 struct tlbiel_va_range t = { .start = start, .end = end, 483 .pid = pid, .page_size = page_size, 484 .psize = psize, .also_pwc = also_pwc }; 485 486 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 487 if (atomic_read(&mm->context.copros) > 0) 488 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 489 } 490 491 /* 492 * Base TLB flushing operations: 493 * 494 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 495 * - flush_tlb_page(vma, vmaddr) flushes one page 496 * - flush_tlb_range(vma, start, end) flushes a range of pages 497 * - flush_tlb_kernel_range(start, end) flushes kernel pages 498 * 499 * - local_* variants of page and mm only apply to the current 500 * processor 501 */ 502 void radix__local_flush_tlb_mm(struct mm_struct *mm) 503 { 504 unsigned long pid; 505 506 preempt_disable(); 507 pid = mm->context.id; 508 if (pid != MMU_NO_CONTEXT) 509 _tlbiel_pid(pid, RIC_FLUSH_TLB); 510 preempt_enable(); 511 } 512 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 513 514 #ifndef CONFIG_SMP 515 void radix__local_flush_all_mm(struct mm_struct *mm) 516 { 517 unsigned long pid; 518 519 preempt_disable(); 520 pid = mm->context.id; 521 if (pid != MMU_NO_CONTEXT) 522 _tlbiel_pid(pid, RIC_FLUSH_ALL); 523 preempt_enable(); 524 } 525 EXPORT_SYMBOL(radix__local_flush_all_mm); 526 #endif /* CONFIG_SMP */ 527 528 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 529 int psize) 530 { 531 unsigned long pid; 532 533 preempt_disable(); 534 pid = mm->context.id; 535 if (pid != MMU_NO_CONTEXT) 536 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 537 preempt_enable(); 538 } 539 540 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 541 { 542 #ifdef CONFIG_HUGETLB_PAGE 543 /* need the return fix for nohash.c */ 544 if (is_vm_hugetlb_page(vma)) 545 return radix__local_flush_hugetlb_page(vma, vmaddr); 546 #endif 547 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 548 } 549 EXPORT_SYMBOL(radix__local_flush_tlb_page); 550 551 static bool mm_is_singlethreaded(struct mm_struct *mm) 552 { 553 if (atomic_read(&mm->context.copros) > 0) 554 return false; 555 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 556 return true; 557 return false; 558 } 559 560 static bool mm_needs_flush_escalation(struct mm_struct *mm) 561 { 562 /* 563 * P9 nest MMU has issues with the page walk cache 564 * caching PTEs and not flushing them properly when 565 * RIC = 0 for a PID/LPID invalidate 566 */ 567 if (atomic_read(&mm->context.copros) > 0) 568 return true; 569 return false; 570 } 571 572 #ifdef CONFIG_SMP 573 static void do_exit_flush_lazy_tlb(void *arg) 574 { 575 struct mm_struct *mm = arg; 576 unsigned long pid = mm->context.id; 577 578 if (current->mm == mm) 579 return; /* Local CPU */ 580 581 if (current->active_mm == mm) { 582 /* 583 * Must be a kernel thread because sender is single-threaded. 584 */ 585 BUG_ON(current->mm); 586 mmgrab(&init_mm); 587 switch_mm(mm, &init_mm, current); 588 current->active_mm = &init_mm; 589 mmdrop(mm); 590 } 591 _tlbiel_pid(pid, RIC_FLUSH_ALL); 592 } 593 594 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 595 { 596 /* 597 * Would be nice if this was async so it could be run in 598 * parallel with our local flush, but generic code does not 599 * give a good API for it. Could extend the generic code or 600 * make a special powerpc IPI for flushing TLBs. 601 * For now it's not too performance critical. 602 */ 603 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 604 (void *)mm, 1); 605 mm_reset_thread_local(mm); 606 } 607 608 void radix__flush_tlb_mm(struct mm_struct *mm) 609 { 610 unsigned long pid; 611 612 pid = mm->context.id; 613 if (unlikely(pid == MMU_NO_CONTEXT)) 614 return; 615 616 preempt_disable(); 617 /* 618 * Order loads of mm_cpumask vs previous stores to clear ptes before 619 * the invalidate. See barrier in switch_mm_irqs_off 620 */ 621 smp_mb(); 622 if (!mm_is_thread_local(mm)) { 623 if (unlikely(mm_is_singlethreaded(mm))) { 624 exit_flush_lazy_tlbs(mm); 625 goto local; 626 } 627 628 if (cputlb_use_tlbie()) { 629 if (mm_needs_flush_escalation(mm)) 630 _tlbie_pid(pid, RIC_FLUSH_ALL); 631 else 632 _tlbie_pid(pid, RIC_FLUSH_TLB); 633 } else { 634 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 635 } 636 } else { 637 local: 638 _tlbiel_pid(pid, RIC_FLUSH_TLB); 639 } 640 preempt_enable(); 641 } 642 EXPORT_SYMBOL(radix__flush_tlb_mm); 643 644 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 645 { 646 unsigned long pid; 647 648 pid = mm->context.id; 649 if (unlikely(pid == MMU_NO_CONTEXT)) 650 return; 651 652 preempt_disable(); 653 smp_mb(); /* see radix__flush_tlb_mm */ 654 if (!mm_is_thread_local(mm)) { 655 if (unlikely(mm_is_singlethreaded(mm))) { 656 if (!fullmm) { 657 exit_flush_lazy_tlbs(mm); 658 goto local; 659 } 660 } 661 if (cputlb_use_tlbie()) 662 _tlbie_pid(pid, RIC_FLUSH_ALL); 663 else 664 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 665 } else { 666 local: 667 _tlbiel_pid(pid, RIC_FLUSH_ALL); 668 } 669 preempt_enable(); 670 } 671 void radix__flush_all_mm(struct mm_struct *mm) 672 { 673 __flush_all_mm(mm, false); 674 } 675 EXPORT_SYMBOL(radix__flush_all_mm); 676 677 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) 678 { 679 tlb->need_flush_all = 1; 680 } 681 EXPORT_SYMBOL(radix__flush_tlb_pwc); 682 683 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 684 int psize) 685 { 686 unsigned long pid; 687 688 pid = mm->context.id; 689 if (unlikely(pid == MMU_NO_CONTEXT)) 690 return; 691 692 preempt_disable(); 693 smp_mb(); /* see radix__flush_tlb_mm */ 694 if (!mm_is_thread_local(mm)) { 695 if (unlikely(mm_is_singlethreaded(mm))) { 696 exit_flush_lazy_tlbs(mm); 697 goto local; 698 } 699 if (cputlb_use_tlbie()) 700 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 701 else 702 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 703 } else { 704 local: 705 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 706 } 707 preempt_enable(); 708 } 709 710 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 711 { 712 #ifdef CONFIG_HUGETLB_PAGE 713 if (is_vm_hugetlb_page(vma)) 714 return radix__flush_hugetlb_page(vma, vmaddr); 715 #endif 716 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 717 } 718 EXPORT_SYMBOL(radix__flush_tlb_page); 719 720 #else /* CONFIG_SMP */ 721 #define radix__flush_all_mm radix__local_flush_all_mm 722 #endif /* CONFIG_SMP */ 723 724 static void do_tlbiel_kernel(void *info) 725 { 726 _tlbiel_pid(0, RIC_FLUSH_ALL); 727 } 728 729 static inline void _tlbiel_kernel_broadcast(void) 730 { 731 on_each_cpu(do_tlbiel_kernel, NULL, 1); 732 if (tlbie_capable) { 733 /* 734 * Coherent accelerators don't refcount kernel memory mappings, 735 * so have to always issue a tlbie for them. This is quite a 736 * slow path anyway. 737 */ 738 _tlbie_pid(0, RIC_FLUSH_ALL); 739 } 740 } 741 742 /* 743 * If kernel TLBIs ever become local rather than global, then 744 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 745 * assumes kernel TLBIs are global. 746 */ 747 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 748 { 749 if (cputlb_use_tlbie()) 750 _tlbie_pid(0, RIC_FLUSH_ALL); 751 else 752 _tlbiel_kernel_broadcast(); 753 } 754 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 755 756 #define TLB_FLUSH_ALL -1UL 757 758 /* 759 * Number of pages above which we invalidate the entire PID rather than 760 * flush individual pages, for local and global flushes respectively. 761 * 762 * tlbie goes out to the interconnect and individual ops are more costly. 763 * It also does not iterate over sets like the local tlbiel variant when 764 * invalidating a full PID, so it has a far lower threshold to change from 765 * individual page flushes to full-pid flushes. 766 */ 767 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 768 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 769 770 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 771 unsigned long start, unsigned long end, 772 bool flush_all_sizes) 773 774 { 775 unsigned long pid; 776 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 777 unsigned long page_size = 1UL << page_shift; 778 unsigned long nr_pages = (end - start) >> page_shift; 779 bool local, full; 780 781 pid = mm->context.id; 782 if (unlikely(pid == MMU_NO_CONTEXT)) 783 return; 784 785 preempt_disable(); 786 smp_mb(); /* see radix__flush_tlb_mm */ 787 if (!mm_is_thread_local(mm)) { 788 if (unlikely(mm_is_singlethreaded(mm))) { 789 if (end != TLB_FLUSH_ALL) { 790 exit_flush_lazy_tlbs(mm); 791 goto is_local; 792 } 793 } 794 local = false; 795 full = (end == TLB_FLUSH_ALL || 796 nr_pages > tlb_single_page_flush_ceiling); 797 } else { 798 is_local: 799 local = true; 800 full = (end == TLB_FLUSH_ALL || 801 nr_pages > tlb_local_single_page_flush_ceiling); 802 } 803 804 if (full) { 805 if (local) { 806 _tlbiel_pid(pid, RIC_FLUSH_TLB); 807 } else { 808 if (cputlb_use_tlbie()) { 809 if (mm_needs_flush_escalation(mm)) 810 _tlbie_pid(pid, RIC_FLUSH_ALL); 811 else 812 _tlbie_pid(pid, RIC_FLUSH_TLB); 813 } else { 814 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 815 } 816 } 817 } else { 818 bool hflush = flush_all_sizes; 819 bool gflush = flush_all_sizes; 820 unsigned long hstart, hend; 821 unsigned long gstart, gend; 822 823 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 824 hflush = true; 825 826 if (hflush) { 827 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 828 hend = end & PMD_MASK; 829 if (hstart == hend) 830 hflush = false; 831 } 832 833 if (gflush) { 834 gstart = (start + PUD_SIZE - 1) & PUD_MASK; 835 gend = end & PUD_MASK; 836 if (gstart == gend) 837 gflush = false; 838 } 839 840 if (local) { 841 asm volatile("ptesync": : :"memory"); 842 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 843 if (hflush) 844 __tlbiel_va_range(hstart, hend, pid, 845 PMD_SIZE, MMU_PAGE_2M); 846 if (gflush) 847 __tlbiel_va_range(gstart, gend, pid, 848 PUD_SIZE, MMU_PAGE_1G); 849 asm volatile("ptesync": : :"memory"); 850 } else if (cputlb_use_tlbie()) { 851 asm volatile("ptesync": : :"memory"); 852 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 853 if (hflush) 854 __tlbie_va_range(hstart, hend, pid, 855 PMD_SIZE, MMU_PAGE_2M); 856 if (gflush) 857 __tlbie_va_range(gstart, gend, pid, 858 PUD_SIZE, MMU_PAGE_1G); 859 fixup_tlbie(); 860 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 861 } else { 862 _tlbiel_va_range_multicast(mm, 863 start, end, pid, page_size, mmu_virtual_psize, false); 864 if (hflush) 865 _tlbiel_va_range_multicast(mm, 866 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); 867 if (gflush) 868 _tlbiel_va_range_multicast(mm, 869 gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false); 870 } 871 } 872 preempt_enable(); 873 } 874 875 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 876 unsigned long end) 877 878 { 879 #ifdef CONFIG_HUGETLB_PAGE 880 if (is_vm_hugetlb_page(vma)) 881 return radix__flush_hugetlb_tlb_range(vma, start, end); 882 #endif 883 884 __radix__flush_tlb_range(vma->vm_mm, start, end, false); 885 } 886 EXPORT_SYMBOL(radix__flush_tlb_range); 887 888 static int radix_get_mmu_psize(int page_size) 889 { 890 int psize; 891 892 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 893 psize = mmu_virtual_psize; 894 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 895 psize = MMU_PAGE_2M; 896 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 897 psize = MMU_PAGE_1G; 898 else 899 return -1; 900 return psize; 901 } 902 903 /* 904 * Flush partition scoped LPID address translation for all CPUs. 905 */ 906 void radix__flush_tlb_lpid_page(unsigned int lpid, 907 unsigned long addr, 908 unsigned long page_size) 909 { 910 int psize = radix_get_mmu_psize(page_size); 911 912 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 913 } 914 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 915 916 /* 917 * Flush partition scoped PWC from LPID for all CPUs. 918 */ 919 void radix__flush_pwc_lpid(unsigned int lpid) 920 { 921 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 922 } 923 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 924 925 /* 926 * Flush partition scoped translations from LPID (=LPIDR) 927 */ 928 void radix__flush_all_lpid(unsigned int lpid) 929 { 930 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 931 } 932 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 933 934 /* 935 * Flush process scoped translations from LPID (=LPIDR) 936 */ 937 void radix__flush_all_lpid_guest(unsigned int lpid) 938 { 939 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 940 } 941 942 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 943 unsigned long end, int psize); 944 945 void radix__tlb_flush(struct mmu_gather *tlb) 946 { 947 int psize = 0; 948 struct mm_struct *mm = tlb->mm; 949 int page_size = tlb->page_size; 950 unsigned long start = tlb->start; 951 unsigned long end = tlb->end; 952 953 /* 954 * if page size is not something we understand, do a full mm flush 955 * 956 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 957 * that flushes the process table entry cache upon process teardown. 958 * See the comment for radix in arch_exit_mmap(). 959 */ 960 if (tlb->fullmm) { 961 __flush_all_mm(mm, true); 962 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) 963 } else if (mm_tlb_flush_nested(mm)) { 964 /* 965 * If there is a concurrent invalidation that is clearing ptes, 966 * then it's possible this invalidation will miss one of those 967 * cleared ptes and miss flushing the TLB. If this invalidate 968 * returns before the other one flushes TLBs, that can result 969 * in it returning while there are still valid TLBs inside the 970 * range to be invalidated. 971 * 972 * See mm/memory.c:tlb_finish_mmu() for more details. 973 * 974 * The solution to this is ensure the entire range is always 975 * flushed here. The problem for powerpc is that the flushes 976 * are page size specific, so this "forced flush" would not 977 * do the right thing if there are a mix of page sizes in 978 * the range to be invalidated. So use __flush_tlb_range 979 * which invalidates all possible page sizes in the range. 980 * 981 * PWC flush probably is not be required because the core code 982 * shouldn't free page tables in this path, but accounting 983 * for the possibility makes us a bit more robust. 984 * 985 * need_flush_all is an uncommon case because page table 986 * teardown should be done with exclusive locks held (but 987 * after locks are dropped another invalidate could come 988 * in), it could be optimized further if necessary. 989 */ 990 if (!tlb->need_flush_all) 991 __radix__flush_tlb_range(mm, start, end, true); 992 else 993 radix__flush_all_mm(mm); 994 #endif 995 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 996 if (!tlb->need_flush_all) 997 radix__flush_tlb_mm(mm); 998 else 999 radix__flush_all_mm(mm); 1000 } else { 1001 if (!tlb->need_flush_all) 1002 radix__flush_tlb_range_psize(mm, start, end, psize); 1003 else 1004 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1005 } 1006 tlb->need_flush_all = 0; 1007 } 1008 1009 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1010 unsigned long start, unsigned long end, 1011 int psize, bool also_pwc) 1012 { 1013 unsigned long pid; 1014 unsigned int page_shift = mmu_psize_defs[psize].shift; 1015 unsigned long page_size = 1UL << page_shift; 1016 unsigned long nr_pages = (end - start) >> page_shift; 1017 bool local, full; 1018 1019 pid = mm->context.id; 1020 if (unlikely(pid == MMU_NO_CONTEXT)) 1021 return; 1022 1023 preempt_disable(); 1024 smp_mb(); /* see radix__flush_tlb_mm */ 1025 if (!mm_is_thread_local(mm)) { 1026 if (unlikely(mm_is_singlethreaded(mm))) { 1027 if (end != TLB_FLUSH_ALL) { 1028 exit_flush_lazy_tlbs(mm); 1029 goto is_local; 1030 } 1031 } 1032 local = false; 1033 full = (end == TLB_FLUSH_ALL || 1034 nr_pages > tlb_single_page_flush_ceiling); 1035 } else { 1036 is_local: 1037 local = true; 1038 full = (end == TLB_FLUSH_ALL || 1039 nr_pages > tlb_local_single_page_flush_ceiling); 1040 } 1041 1042 if (full) { 1043 if (local) { 1044 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1045 } else { 1046 if (cputlb_use_tlbie()) { 1047 if (mm_needs_flush_escalation(mm)) 1048 also_pwc = true; 1049 1050 _tlbie_pid(pid, 1051 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1052 } else { 1053 _tlbiel_pid_multicast(mm, pid, 1054 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1055 } 1056 1057 } 1058 } else { 1059 if (local) 1060 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1061 else if (cputlb_use_tlbie()) 1062 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1063 else 1064 _tlbiel_va_range_multicast(mm, 1065 start, end, pid, page_size, psize, also_pwc); 1066 } 1067 preempt_enable(); 1068 } 1069 1070 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1071 unsigned long end, int psize) 1072 { 1073 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1074 } 1075 1076 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1077 unsigned long end, int psize) 1078 { 1079 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1080 } 1081 1082 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1083 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1084 { 1085 unsigned long pid, end; 1086 1087 pid = mm->context.id; 1088 if (unlikely(pid == MMU_NO_CONTEXT)) 1089 return; 1090 1091 /* 4k page size, just blow the world */ 1092 if (PAGE_SIZE == 0x1000) { 1093 radix__flush_all_mm(mm); 1094 return; 1095 } 1096 1097 end = addr + HPAGE_PMD_SIZE; 1098 1099 /* Otherwise first do the PWC, then iterate the pages. */ 1100 preempt_disable(); 1101 smp_mb(); /* see radix__flush_tlb_mm */ 1102 if (!mm_is_thread_local(mm)) { 1103 if (unlikely(mm_is_singlethreaded(mm))) { 1104 exit_flush_lazy_tlbs(mm); 1105 goto local; 1106 } 1107 if (cputlb_use_tlbie()) 1108 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1109 else 1110 _tlbiel_va_range_multicast(mm, 1111 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1112 } else { 1113 local: 1114 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1115 } 1116 1117 preempt_enable(); 1118 } 1119 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1120 1121 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1122 unsigned long start, unsigned long end) 1123 { 1124 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1125 } 1126 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1127 1128 void radix__flush_tlb_all(void) 1129 { 1130 unsigned long rb,prs,r,rs; 1131 unsigned long ric = RIC_FLUSH_ALL; 1132 1133 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1134 prs = 0; /* partition scoped */ 1135 r = 1; /* radix format */ 1136 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1137 1138 asm volatile("ptesync": : :"memory"); 1139 /* 1140 * now flush guest entries by passing PRS = 1 and LPID != 0 1141 */ 1142 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1143 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1144 /* 1145 * now flush host entires by passing PRS = 0 and LPID == 0 1146 */ 1147 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1148 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1149 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1150 } 1151 1152 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1153 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1154 { 1155 unsigned long pid = mm->context.id; 1156 1157 if (unlikely(pid == MMU_NO_CONTEXT)) 1158 return; 1159 1160 /* 1161 * If this context hasn't run on that CPU before and KVM is 1162 * around, there's a slim chance that the guest on another 1163 * CPU just brought in obsolete translation into the TLB of 1164 * this CPU due to a bad prefetch using the guest PID on 1165 * the way into the hypervisor. 1166 * 1167 * We work around this here. If KVM is possible, we check if 1168 * any sibling thread is in KVM. If it is, the window may exist 1169 * and thus we flush that PID from the core. 1170 * 1171 * A potential future improvement would be to mark which PIDs 1172 * have never been used on the system and avoid it if the PID 1173 * is new and the process has no other cpumask bit set. 1174 */ 1175 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1176 int cpu = smp_processor_id(); 1177 int sib = cpu_first_thread_sibling(cpu); 1178 bool flush = false; 1179 1180 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1181 if (sib == cpu) 1182 continue; 1183 if (!cpu_possible(sib)) 1184 continue; 1185 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1186 flush = true; 1187 } 1188 if (flush) 1189 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1190 } 1191 } 1192 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1193 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1194