1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 55 for (set = 1; set < num_sets; set++) 56 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 57 58 /* Do the same for process scoped entries. */ 59 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 62 63 asm volatile("ptesync": : :"memory"); 64 } 65 66 void radix__tlbiel_all(unsigned int action) 67 { 68 unsigned int is; 69 70 switch (action) { 71 case TLB_INVAL_SCOPE_GLOBAL: 72 is = 3; 73 break; 74 case TLB_INVAL_SCOPE_LPID: 75 is = 2; 76 break; 77 default: 78 BUG(); 79 } 80 81 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 82 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 83 else 84 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 85 86 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 87 } 88 89 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 90 unsigned long ric) 91 { 92 unsigned long rb,rs,prs,r; 93 94 rb = PPC_BIT(53); /* IS = 1 */ 95 rb |= set << PPC_BITLSHIFT(51); 96 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 97 prs = 1; /* process scoped */ 98 r = 1; /* radix format */ 99 100 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 101 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 102 trace_tlbie(0, 1, rb, rs, ric, prs, r); 103 } 104 105 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 106 { 107 unsigned long rb,rs,prs,r; 108 109 rb = PPC_BIT(53); /* IS = 1 */ 110 rs = pid << PPC_BITLSHIFT(31); 111 prs = 1; /* process scoped */ 112 r = 1; /* radix format */ 113 114 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 115 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 116 trace_tlbie(0, 0, rb, rs, ric, prs, r); 117 } 118 119 static __always_inline void __tlbiel_lpid(unsigned long lpid, int set, 120 unsigned long ric) 121 { 122 unsigned long rb,rs,prs,r; 123 124 rb = PPC_BIT(52); /* IS = 2 */ 125 rb |= set << PPC_BITLSHIFT(51); 126 rs = 0; /* LPID comes from LPIDR */ 127 prs = 0; /* partition scoped */ 128 r = 1; /* radix format */ 129 130 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 131 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 132 trace_tlbie(lpid, 1, rb, rs, ric, prs, r); 133 } 134 135 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 136 { 137 unsigned long rb,rs,prs,r; 138 139 rb = PPC_BIT(52); /* IS = 2 */ 140 rs = lpid; 141 prs = 0; /* partition scoped */ 142 r = 1; /* radix format */ 143 144 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 145 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 146 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 147 } 148 149 static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set, 150 unsigned long ric) 151 { 152 unsigned long rb,rs,prs,r; 153 154 rb = PPC_BIT(52); /* IS = 2 */ 155 rb |= set << PPC_BITLSHIFT(51); 156 rs = 0; /* LPID comes from LPIDR */ 157 prs = 1; /* process scoped */ 158 r = 1; /* radix format */ 159 160 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 161 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 162 trace_tlbie(lpid, 1, rb, rs, ric, prs, r); 163 } 164 165 166 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 167 unsigned long ap, unsigned long ric) 168 { 169 unsigned long rb,rs,prs,r; 170 171 rb = va & ~(PPC_BITMASK(52, 63)); 172 rb |= ap << PPC_BITLSHIFT(58); 173 rs = pid << PPC_BITLSHIFT(31); 174 prs = 1; /* process scoped */ 175 r = 1; /* radix format */ 176 177 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 178 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 179 trace_tlbie(0, 1, rb, rs, ric, prs, r); 180 } 181 182 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 183 unsigned long ap, unsigned long ric) 184 { 185 unsigned long rb,rs,prs,r; 186 187 rb = va & ~(PPC_BITMASK(52, 63)); 188 rb |= ap << PPC_BITLSHIFT(58); 189 rs = pid << PPC_BITLSHIFT(31); 190 prs = 1; /* process scoped */ 191 r = 1; /* radix format */ 192 193 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 194 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 195 trace_tlbie(0, 0, rb, rs, ric, prs, r); 196 } 197 198 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 199 unsigned long ap, unsigned long ric) 200 { 201 unsigned long rb,rs,prs,r; 202 203 rb = va & ~(PPC_BITMASK(52, 63)); 204 rb |= ap << PPC_BITLSHIFT(58); 205 rs = lpid; 206 prs = 0; /* partition scoped */ 207 r = 1; /* radix format */ 208 209 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 210 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 211 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 212 } 213 214 static inline void fixup_tlbie(void) 215 { 216 unsigned long pid = 0; 217 unsigned long va = ((1UL << 52) - 1); 218 219 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 220 asm volatile("ptesync": : :"memory"); 221 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 222 } 223 } 224 225 static inline void fixup_tlbie_lpid(unsigned long lpid) 226 { 227 unsigned long va = ((1UL << 52) - 1); 228 229 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 230 asm volatile("ptesync": : :"memory"); 231 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 232 } 233 } 234 235 /* 236 * We use 128 set in radix mode and 256 set in hpt mode. 237 */ 238 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 239 { 240 int set; 241 242 asm volatile("ptesync": : :"memory"); 243 244 /* 245 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 246 * also flush the entire Page Walk Cache. 247 */ 248 __tlbiel_pid(pid, 0, ric); 249 250 /* For PWC, only one flush is needed */ 251 if (ric == RIC_FLUSH_PWC) { 252 asm volatile("ptesync": : :"memory"); 253 return; 254 } 255 256 /* For the remaining sets, just flush the TLB */ 257 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 258 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 259 260 asm volatile("ptesync": : :"memory"); 261 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 262 } 263 264 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 265 { 266 asm volatile("ptesync": : :"memory"); 267 268 /* 269 * Workaround the fact that the "ric" argument to __tlbie_pid 270 * must be a compile-time contraint to match the "i" constraint 271 * in the asm statement. 272 */ 273 switch (ric) { 274 case RIC_FLUSH_TLB: 275 __tlbie_pid(pid, RIC_FLUSH_TLB); 276 break; 277 case RIC_FLUSH_PWC: 278 __tlbie_pid(pid, RIC_FLUSH_PWC); 279 break; 280 case RIC_FLUSH_ALL: 281 default: 282 __tlbie_pid(pid, RIC_FLUSH_ALL); 283 } 284 fixup_tlbie(); 285 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 286 } 287 288 static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) 289 { 290 int set; 291 292 VM_BUG_ON(mfspr(SPRN_LPID) != lpid); 293 294 asm volatile("ptesync": : :"memory"); 295 296 /* 297 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 298 * also flush the entire Page Walk Cache. 299 */ 300 __tlbiel_lpid(lpid, 0, ric); 301 302 /* For PWC, only one flush is needed */ 303 if (ric == RIC_FLUSH_PWC) { 304 asm volatile("ptesync": : :"memory"); 305 return; 306 } 307 308 /* For the remaining sets, just flush the TLB */ 309 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 310 __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); 311 312 asm volatile("ptesync": : :"memory"); 313 asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST "; isync" : : :"memory"); 314 } 315 316 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 317 { 318 asm volatile("ptesync": : :"memory"); 319 320 /* 321 * Workaround the fact that the "ric" argument to __tlbie_pid 322 * must be a compile-time contraint to match the "i" constraint 323 * in the asm statement. 324 */ 325 switch (ric) { 326 case RIC_FLUSH_TLB: 327 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 328 break; 329 case RIC_FLUSH_PWC: 330 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 331 break; 332 case RIC_FLUSH_ALL: 333 default: 334 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 335 } 336 fixup_tlbie_lpid(lpid); 337 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 338 } 339 340 static __always_inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) 341 { 342 int set; 343 344 VM_BUG_ON(mfspr(SPRN_LPID) != lpid); 345 346 asm volatile("ptesync": : :"memory"); 347 348 /* 349 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 350 * also flush the entire Page Walk Cache. 351 */ 352 __tlbiel_lpid_guest(lpid, 0, ric); 353 354 /* For PWC, only one flush is needed */ 355 if (ric == RIC_FLUSH_PWC) { 356 asm volatile("ptesync": : :"memory"); 357 return; 358 } 359 360 /* For the remaining sets, just flush the TLB */ 361 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 362 __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); 363 364 asm volatile("ptesync": : :"memory"); 365 asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); 366 } 367 368 369 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 370 unsigned long pid, unsigned long page_size, 371 unsigned long psize) 372 { 373 unsigned long addr; 374 unsigned long ap = mmu_get_ap(psize); 375 376 for (addr = start; addr < end; addr += page_size) 377 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 378 } 379 380 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 381 unsigned long psize, unsigned long ric) 382 { 383 unsigned long ap = mmu_get_ap(psize); 384 385 asm volatile("ptesync": : :"memory"); 386 __tlbiel_va(va, pid, ap, ric); 387 asm volatile("ptesync": : :"memory"); 388 } 389 390 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 391 unsigned long pid, unsigned long page_size, 392 unsigned long psize, bool also_pwc) 393 { 394 asm volatile("ptesync": : :"memory"); 395 if (also_pwc) 396 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 397 __tlbiel_va_range(start, end, pid, page_size, psize); 398 asm volatile("ptesync": : :"memory"); 399 } 400 401 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 402 unsigned long pid, unsigned long page_size, 403 unsigned long psize) 404 { 405 unsigned long addr; 406 unsigned long ap = mmu_get_ap(psize); 407 408 for (addr = start; addr < end; addr += page_size) 409 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 410 } 411 412 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 413 unsigned long psize, unsigned long ric) 414 { 415 unsigned long ap = mmu_get_ap(psize); 416 417 asm volatile("ptesync": : :"memory"); 418 __tlbie_va(va, pid, ap, ric); 419 fixup_tlbie(); 420 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 421 } 422 423 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 424 unsigned long psize, unsigned long ric) 425 { 426 unsigned long ap = mmu_get_ap(psize); 427 428 asm volatile("ptesync": : :"memory"); 429 __tlbie_lpid_va(va, lpid, ap, ric); 430 fixup_tlbie_lpid(lpid); 431 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 432 } 433 434 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 435 unsigned long pid, unsigned long page_size, 436 unsigned long psize, bool also_pwc) 437 { 438 asm volatile("ptesync": : :"memory"); 439 if (also_pwc) 440 __tlbie_pid(pid, RIC_FLUSH_PWC); 441 __tlbie_va_range(start, end, pid, page_size, psize); 442 fixup_tlbie(); 443 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 444 } 445 446 /* 447 * Base TLB flushing operations: 448 * 449 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 450 * - flush_tlb_page(vma, vmaddr) flushes one page 451 * - flush_tlb_range(vma, start, end) flushes a range of pages 452 * - flush_tlb_kernel_range(start, end) flushes kernel pages 453 * 454 * - local_* variants of page and mm only apply to the current 455 * processor 456 */ 457 void radix__local_flush_tlb_mm(struct mm_struct *mm) 458 { 459 unsigned long pid; 460 461 preempt_disable(); 462 pid = mm->context.id; 463 if (pid != MMU_NO_CONTEXT) 464 _tlbiel_pid(pid, RIC_FLUSH_TLB); 465 preempt_enable(); 466 } 467 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 468 469 #ifndef CONFIG_SMP 470 void radix__local_flush_all_mm(struct mm_struct *mm) 471 { 472 unsigned long pid; 473 474 preempt_disable(); 475 pid = mm->context.id; 476 if (pid != MMU_NO_CONTEXT) 477 _tlbiel_pid(pid, RIC_FLUSH_ALL); 478 preempt_enable(); 479 } 480 EXPORT_SYMBOL(radix__local_flush_all_mm); 481 #endif /* CONFIG_SMP */ 482 483 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 484 int psize) 485 { 486 unsigned long pid; 487 488 preempt_disable(); 489 pid = mm->context.id; 490 if (pid != MMU_NO_CONTEXT) 491 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 492 preempt_enable(); 493 } 494 495 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 496 { 497 #ifdef CONFIG_HUGETLB_PAGE 498 /* need the return fix for nohash.c */ 499 if (is_vm_hugetlb_page(vma)) 500 return radix__local_flush_hugetlb_page(vma, vmaddr); 501 #endif 502 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 503 } 504 EXPORT_SYMBOL(radix__local_flush_tlb_page); 505 506 static bool mm_is_singlethreaded(struct mm_struct *mm) 507 { 508 if (atomic_read(&mm->context.copros) > 0) 509 return false; 510 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 511 return true; 512 return false; 513 } 514 515 static bool mm_needs_flush_escalation(struct mm_struct *mm) 516 { 517 /* 518 * P9 nest MMU has issues with the page walk cache 519 * caching PTEs and not flushing them properly when 520 * RIC = 0 for a PID/LPID invalidate 521 */ 522 if (atomic_read(&mm->context.copros) > 0) 523 return true; 524 return false; 525 } 526 527 #ifdef CONFIG_SMP 528 static void do_exit_flush_lazy_tlb(void *arg) 529 { 530 struct mm_struct *mm = arg; 531 unsigned long pid = mm->context.id; 532 533 if (current->mm == mm) 534 return; /* Local CPU */ 535 536 if (current->active_mm == mm) { 537 /* 538 * Must be a kernel thread because sender is single-threaded. 539 */ 540 BUG_ON(current->mm); 541 mmgrab(&init_mm); 542 switch_mm(mm, &init_mm, current); 543 current->active_mm = &init_mm; 544 mmdrop(mm); 545 } 546 _tlbiel_pid(pid, RIC_FLUSH_ALL); 547 } 548 549 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 550 { 551 /* 552 * Would be nice if this was async so it could be run in 553 * parallel with our local flush, but generic code does not 554 * give a good API for it. Could extend the generic code or 555 * make a special powerpc IPI for flushing TLBs. 556 * For now it's not too performance critical. 557 */ 558 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 559 (void *)mm, 1); 560 mm_reset_thread_local(mm); 561 } 562 563 void radix__flush_tlb_mm(struct mm_struct *mm) 564 { 565 unsigned long pid; 566 567 pid = mm->context.id; 568 if (unlikely(pid == MMU_NO_CONTEXT)) 569 return; 570 571 preempt_disable(); 572 /* 573 * Order loads of mm_cpumask vs previous stores to clear ptes before 574 * the invalidate. See barrier in switch_mm_irqs_off 575 */ 576 smp_mb(); 577 if (!mm_is_thread_local(mm)) { 578 if (unlikely(mm_is_singlethreaded(mm))) { 579 exit_flush_lazy_tlbs(mm); 580 goto local; 581 } 582 583 if (mm_needs_flush_escalation(mm)) 584 _tlbie_pid(pid, RIC_FLUSH_ALL); 585 else 586 _tlbie_pid(pid, RIC_FLUSH_TLB); 587 } else { 588 local: 589 _tlbiel_pid(pid, RIC_FLUSH_TLB); 590 } 591 preempt_enable(); 592 } 593 EXPORT_SYMBOL(radix__flush_tlb_mm); 594 595 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 596 { 597 unsigned long pid; 598 599 pid = mm->context.id; 600 if (unlikely(pid == MMU_NO_CONTEXT)) 601 return; 602 603 preempt_disable(); 604 smp_mb(); /* see radix__flush_tlb_mm */ 605 if (!mm_is_thread_local(mm)) { 606 if (unlikely(mm_is_singlethreaded(mm))) { 607 if (!fullmm) { 608 exit_flush_lazy_tlbs(mm); 609 goto local; 610 } 611 } 612 _tlbie_pid(pid, RIC_FLUSH_ALL); 613 } else { 614 local: 615 _tlbiel_pid(pid, RIC_FLUSH_ALL); 616 } 617 preempt_enable(); 618 } 619 void radix__flush_all_mm(struct mm_struct *mm) 620 { 621 __flush_all_mm(mm, false); 622 } 623 EXPORT_SYMBOL(radix__flush_all_mm); 624 625 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) 626 { 627 tlb->need_flush_all = 1; 628 } 629 EXPORT_SYMBOL(radix__flush_tlb_pwc); 630 631 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 632 int psize) 633 { 634 unsigned long pid; 635 636 pid = mm->context.id; 637 if (unlikely(pid == MMU_NO_CONTEXT)) 638 return; 639 640 preempt_disable(); 641 smp_mb(); /* see radix__flush_tlb_mm */ 642 if (!mm_is_thread_local(mm)) { 643 if (unlikely(mm_is_singlethreaded(mm))) { 644 exit_flush_lazy_tlbs(mm); 645 goto local; 646 } 647 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 648 } else { 649 local: 650 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 651 } 652 preempt_enable(); 653 } 654 655 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 656 { 657 #ifdef CONFIG_HUGETLB_PAGE 658 if (is_vm_hugetlb_page(vma)) 659 return radix__flush_hugetlb_page(vma, vmaddr); 660 #endif 661 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 662 } 663 EXPORT_SYMBOL(radix__flush_tlb_page); 664 665 #else /* CONFIG_SMP */ 666 #define radix__flush_all_mm radix__local_flush_all_mm 667 #endif /* CONFIG_SMP */ 668 669 /* 670 * If kernel TLBIs ever become local rather than global, then 671 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 672 * assumes kernel TLBIs are global. 673 */ 674 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 675 { 676 _tlbie_pid(0, RIC_FLUSH_ALL); 677 } 678 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 679 680 #define TLB_FLUSH_ALL -1UL 681 682 /* 683 * Number of pages above which we invalidate the entire PID rather than 684 * flush individual pages, for local and global flushes respectively. 685 * 686 * tlbie goes out to the interconnect and individual ops are more costly. 687 * It also does not iterate over sets like the local tlbiel variant when 688 * invalidating a full PID, so it has a far lower threshold to change from 689 * individual page flushes to full-pid flushes. 690 */ 691 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 692 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 693 694 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 695 unsigned long start, unsigned long end, 696 bool flush_all_sizes) 697 698 { 699 unsigned long pid; 700 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 701 unsigned long page_size = 1UL << page_shift; 702 unsigned long nr_pages = (end - start) >> page_shift; 703 bool local, full; 704 705 pid = mm->context.id; 706 if (unlikely(pid == MMU_NO_CONTEXT)) 707 return; 708 709 preempt_disable(); 710 smp_mb(); /* see radix__flush_tlb_mm */ 711 if (!mm_is_thread_local(mm)) { 712 if (unlikely(mm_is_singlethreaded(mm))) { 713 if (end != TLB_FLUSH_ALL) { 714 exit_flush_lazy_tlbs(mm); 715 goto is_local; 716 } 717 } 718 local = false; 719 full = (end == TLB_FLUSH_ALL || 720 nr_pages > tlb_single_page_flush_ceiling); 721 } else { 722 is_local: 723 local = true; 724 full = (end == TLB_FLUSH_ALL || 725 nr_pages > tlb_local_single_page_flush_ceiling); 726 } 727 728 if (full) { 729 if (local) { 730 _tlbiel_pid(pid, RIC_FLUSH_TLB); 731 } else { 732 if (mm_needs_flush_escalation(mm)) 733 _tlbie_pid(pid, RIC_FLUSH_ALL); 734 else 735 _tlbie_pid(pid, RIC_FLUSH_TLB); 736 } 737 } else { 738 bool hflush = flush_all_sizes; 739 bool gflush = flush_all_sizes; 740 unsigned long hstart, hend; 741 unsigned long gstart, gend; 742 743 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 744 hflush = true; 745 746 if (hflush) { 747 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 748 hend = end & PMD_MASK; 749 if (hstart == hend) 750 hflush = false; 751 } 752 753 if (gflush) { 754 gstart = (start + PUD_SIZE - 1) & PUD_MASK; 755 gend = end & PUD_MASK; 756 if (gstart == gend) 757 gflush = false; 758 } 759 760 asm volatile("ptesync": : :"memory"); 761 if (local) { 762 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 763 if (hflush) 764 __tlbiel_va_range(hstart, hend, pid, 765 PMD_SIZE, MMU_PAGE_2M); 766 if (gflush) 767 __tlbiel_va_range(gstart, gend, pid, 768 PUD_SIZE, MMU_PAGE_1G); 769 asm volatile("ptesync": : :"memory"); 770 } else { 771 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 772 if (hflush) 773 __tlbie_va_range(hstart, hend, pid, 774 PMD_SIZE, MMU_PAGE_2M); 775 if (gflush) 776 __tlbie_va_range(gstart, gend, pid, 777 PUD_SIZE, MMU_PAGE_1G); 778 fixup_tlbie(); 779 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 780 } 781 } 782 preempt_enable(); 783 } 784 785 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 786 unsigned long end) 787 788 { 789 #ifdef CONFIG_HUGETLB_PAGE 790 if (is_vm_hugetlb_page(vma)) 791 return radix__flush_hugetlb_tlb_range(vma, start, end); 792 #endif 793 794 __radix__flush_tlb_range(vma->vm_mm, start, end, false); 795 } 796 EXPORT_SYMBOL(radix__flush_tlb_range); 797 798 static int radix_get_mmu_psize(int page_size) 799 { 800 int psize; 801 802 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 803 psize = mmu_virtual_psize; 804 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 805 psize = MMU_PAGE_2M; 806 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 807 psize = MMU_PAGE_1G; 808 else 809 return -1; 810 return psize; 811 } 812 813 /* 814 * Flush partition scoped LPID address translation for all CPUs. 815 */ 816 void radix__flush_tlb_lpid_page(unsigned int lpid, 817 unsigned long addr, 818 unsigned long page_size) 819 { 820 int psize = radix_get_mmu_psize(page_size); 821 822 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 823 } 824 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 825 826 /* 827 * Flush partition scoped PWC from LPID for all CPUs. 828 */ 829 void radix__flush_pwc_lpid(unsigned int lpid) 830 { 831 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 832 } 833 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 834 835 /* 836 * Flush partition scoped translations from LPID (=LPIDR) 837 */ 838 void radix__flush_tlb_lpid(unsigned int lpid) 839 { 840 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 841 } 842 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid); 843 844 /* 845 * Flush partition scoped translations from LPID (=LPIDR) 846 */ 847 void radix__local_flush_tlb_lpid(unsigned int lpid) 848 { 849 _tlbiel_lpid(lpid, RIC_FLUSH_ALL); 850 } 851 EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); 852 853 /* 854 * Flush process scoped translations from LPID (=LPIDR). 855 * Important difference, the guest normally manages its own translations, 856 * but some cases e.g., vCPU CPU migration require KVM to flush. 857 */ 858 void radix__local_flush_tlb_lpid_guest(unsigned int lpid) 859 { 860 _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); 861 } 862 EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); 863 864 865 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 866 unsigned long end, int psize); 867 868 void radix__tlb_flush(struct mmu_gather *tlb) 869 { 870 int psize = 0; 871 struct mm_struct *mm = tlb->mm; 872 int page_size = tlb->page_size; 873 unsigned long start = tlb->start; 874 unsigned long end = tlb->end; 875 876 /* 877 * if page size is not something we understand, do a full mm flush 878 * 879 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 880 * that flushes the process table entry cache upon process teardown. 881 * See the comment for radix in arch_exit_mmap(). 882 */ 883 if (tlb->fullmm) { 884 __flush_all_mm(mm, true); 885 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) 886 } else if (mm_tlb_flush_nested(mm)) { 887 /* 888 * If there is a concurrent invalidation that is clearing ptes, 889 * then it's possible this invalidation will miss one of those 890 * cleared ptes and miss flushing the TLB. If this invalidate 891 * returns before the other one flushes TLBs, that can result 892 * in it returning while there are still valid TLBs inside the 893 * range to be invalidated. 894 * 895 * See mm/memory.c:tlb_finish_mmu() for more details. 896 * 897 * The solution to this is ensure the entire range is always 898 * flushed here. The problem for powerpc is that the flushes 899 * are page size specific, so this "forced flush" would not 900 * do the right thing if there are a mix of page sizes in 901 * the range to be invalidated. So use __flush_tlb_range 902 * which invalidates all possible page sizes in the range. 903 * 904 * PWC flush probably is not be required because the core code 905 * shouldn't free page tables in this path, but accounting 906 * for the possibility makes us a bit more robust. 907 * 908 * need_flush_all is an uncommon case because page table 909 * teardown should be done with exclusive locks held (but 910 * after locks are dropped another invalidate could come 911 * in), it could be optimized further if necessary. 912 */ 913 if (!tlb->need_flush_all) 914 __radix__flush_tlb_range(mm, start, end, true); 915 else 916 radix__flush_all_mm(mm); 917 #endif 918 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 919 if (!tlb->need_flush_all) 920 radix__flush_tlb_mm(mm); 921 else 922 radix__flush_all_mm(mm); 923 } else { 924 if (!tlb->need_flush_all) 925 radix__flush_tlb_range_psize(mm, start, end, psize); 926 else 927 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 928 } 929 tlb->need_flush_all = 0; 930 } 931 932 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 933 unsigned long start, unsigned long end, 934 int psize, bool also_pwc) 935 { 936 unsigned long pid; 937 unsigned int page_shift = mmu_psize_defs[psize].shift; 938 unsigned long page_size = 1UL << page_shift; 939 unsigned long nr_pages = (end - start) >> page_shift; 940 bool local, full; 941 942 pid = mm->context.id; 943 if (unlikely(pid == MMU_NO_CONTEXT)) 944 return; 945 946 preempt_disable(); 947 smp_mb(); /* see radix__flush_tlb_mm */ 948 if (!mm_is_thread_local(mm)) { 949 if (unlikely(mm_is_singlethreaded(mm))) { 950 if (end != TLB_FLUSH_ALL) { 951 exit_flush_lazy_tlbs(mm); 952 goto is_local; 953 } 954 } 955 local = false; 956 full = (end == TLB_FLUSH_ALL || 957 nr_pages > tlb_single_page_flush_ceiling); 958 } else { 959 is_local: 960 local = true; 961 full = (end == TLB_FLUSH_ALL || 962 nr_pages > tlb_local_single_page_flush_ceiling); 963 } 964 965 if (full) { 966 if (local) { 967 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 968 } else { 969 if (mm_needs_flush_escalation(mm)) 970 also_pwc = true; 971 972 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 973 } 974 } else { 975 if (local) 976 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 977 else 978 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 979 } 980 preempt_enable(); 981 } 982 983 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 984 unsigned long end, int psize) 985 { 986 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 987 } 988 989 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 990 unsigned long end, int psize) 991 { 992 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 993 } 994 995 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 996 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 997 { 998 unsigned long pid, end; 999 1000 pid = mm->context.id; 1001 if (unlikely(pid == MMU_NO_CONTEXT)) 1002 return; 1003 1004 /* 4k page size, just blow the world */ 1005 if (PAGE_SIZE == 0x1000) { 1006 radix__flush_all_mm(mm); 1007 return; 1008 } 1009 1010 end = addr + HPAGE_PMD_SIZE; 1011 1012 /* Otherwise first do the PWC, then iterate the pages. */ 1013 preempt_disable(); 1014 smp_mb(); /* see radix__flush_tlb_mm */ 1015 if (!mm_is_thread_local(mm)) { 1016 if (unlikely(mm_is_singlethreaded(mm))) { 1017 exit_flush_lazy_tlbs(mm); 1018 goto local; 1019 } 1020 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1021 } else { 1022 local: 1023 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1024 } 1025 1026 preempt_enable(); 1027 } 1028 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1029 1030 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1031 unsigned long start, unsigned long end) 1032 { 1033 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1034 } 1035 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1036 1037 void radix__flush_tlb_all(void) 1038 { 1039 unsigned long rb,prs,r,rs; 1040 unsigned long ric = RIC_FLUSH_ALL; 1041 1042 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1043 prs = 0; /* partition scoped */ 1044 r = 1; /* radix format */ 1045 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1046 1047 asm volatile("ptesync": : :"memory"); 1048 /* 1049 * now flush guest entries by passing PRS = 1 and LPID != 0 1050 */ 1051 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1052 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1053 /* 1054 * now flush host entires by passing PRS = 0 and LPID == 0 1055 */ 1056 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1057 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1058 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1059 } 1060 1061 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1062 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1063 { 1064 unsigned long pid = mm->context.id; 1065 1066 if (unlikely(pid == MMU_NO_CONTEXT)) 1067 return; 1068 1069 /* 1070 * If this context hasn't run on that CPU before and KVM is 1071 * around, there's a slim chance that the guest on another 1072 * CPU just brought in obsolete translation into the TLB of 1073 * this CPU due to a bad prefetch using the guest PID on 1074 * the way into the hypervisor. 1075 * 1076 * We work around this here. If KVM is possible, we check if 1077 * any sibling thread is in KVM. If it is, the window may exist 1078 * and thus we flush that PID from the core. 1079 * 1080 * A potential future improvement would be to mark which PIDs 1081 * have never been used on the system and avoid it if the PID 1082 * is new and the process has no other cpumask bit set. 1083 */ 1084 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1085 int cpu = smp_processor_id(); 1086 int sib = cpu_first_thread_sibling(cpu); 1087 bool flush = false; 1088 1089 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1090 if (sib == cpu) 1091 continue; 1092 if (!cpu_possible(sib)) 1093 continue; 1094 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1095 flush = true; 1096 } 1097 if (flush) 1098 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1099 } 1100 } 1101 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1102 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1103