1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 55 for (set = 1; set < num_sets; set++) 56 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 57 58 /* Do the same for process scoped entries. */ 59 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 62 63 asm volatile("ptesync": : :"memory"); 64 } 65 66 void radix__tlbiel_all(unsigned int action) 67 { 68 unsigned int is; 69 70 switch (action) { 71 case TLB_INVAL_SCOPE_GLOBAL: 72 is = 3; 73 break; 74 case TLB_INVAL_SCOPE_LPID: 75 is = 2; 76 break; 77 default: 78 BUG(); 79 } 80 81 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 82 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 83 else 84 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 85 86 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); 87 } 88 89 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 90 unsigned long ric) 91 { 92 unsigned long rb,rs,prs,r; 93 94 rb = PPC_BIT(53); /* IS = 1 */ 95 rb |= set << PPC_BITLSHIFT(51); 96 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 97 prs = 1; /* process scoped */ 98 r = 1; /* radix format */ 99 100 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 101 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 102 trace_tlbie(0, 1, rb, rs, ric, prs, r); 103 } 104 105 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 106 { 107 unsigned long rb,rs,prs,r; 108 109 rb = PPC_BIT(53); /* IS = 1 */ 110 rs = pid << PPC_BITLSHIFT(31); 111 prs = 1; /* process scoped */ 112 r = 1; /* radix format */ 113 114 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 115 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 116 trace_tlbie(0, 0, rb, rs, ric, prs, r); 117 } 118 119 static __always_inline void __tlbiel_lpid(unsigned long lpid, int set, 120 unsigned long ric) 121 { 122 unsigned long rb,rs,prs,r; 123 124 rb = PPC_BIT(52); /* IS = 2 */ 125 rb |= set << PPC_BITLSHIFT(51); 126 rs = 0; /* LPID comes from LPIDR */ 127 prs = 0; /* partition scoped */ 128 r = 1; /* radix format */ 129 130 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 131 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 132 trace_tlbie(lpid, 1, rb, rs, ric, prs, r); 133 } 134 135 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 136 { 137 unsigned long rb,rs,prs,r; 138 139 rb = PPC_BIT(52); /* IS = 2 */ 140 rs = lpid; 141 prs = 0; /* partition scoped */ 142 r = 1; /* radix format */ 143 144 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 145 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 146 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 147 } 148 149 static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, 150 unsigned long ric) 151 { 152 unsigned long rb,rs,prs,r; 153 154 rb = PPC_BIT(52); /* IS = 2 */ 155 rb |= set << PPC_BITLSHIFT(51); 156 rs = 0; /* LPID comes from LPIDR */ 157 prs = 1; /* process scoped */ 158 r = 1; /* radix format */ 159 160 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 161 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 162 trace_tlbie(lpid, 1, rb, rs, ric, prs, r); 163 } 164 165 166 static inline void __tlbiel_va(unsigned long va, unsigned long pid, 167 unsigned long ap, unsigned long ric) 168 { 169 unsigned long rb,rs,prs,r; 170 171 rb = va & ~(PPC_BITMASK(52, 63)); 172 rb |= ap << PPC_BITLSHIFT(58); 173 rs = pid << PPC_BITLSHIFT(31); 174 prs = 1; /* process scoped */ 175 r = 1; /* radix format */ 176 177 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 178 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 179 trace_tlbie(0, 1, rb, rs, ric, prs, r); 180 } 181 182 static inline void __tlbie_va(unsigned long va, unsigned long pid, 183 unsigned long ap, unsigned long ric) 184 { 185 unsigned long rb,rs,prs,r; 186 187 rb = va & ~(PPC_BITMASK(52, 63)); 188 rb |= ap << PPC_BITLSHIFT(58); 189 rs = pid << PPC_BITLSHIFT(31); 190 prs = 1; /* process scoped */ 191 r = 1; /* radix format */ 192 193 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 194 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 195 trace_tlbie(0, 0, rb, rs, ric, prs, r); 196 } 197 198 static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 199 unsigned long ap, unsigned long ric) 200 { 201 unsigned long rb,rs,prs,r; 202 203 rb = va & ~(PPC_BITMASK(52, 63)); 204 rb |= ap << PPC_BITLSHIFT(58); 205 rs = lpid; 206 prs = 0; /* partition scoped */ 207 r = 1; /* radix format */ 208 209 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 210 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 211 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 212 } 213 214 static inline void fixup_tlbie(void) 215 { 216 unsigned long pid = 0; 217 unsigned long va = ((1UL << 52) - 1); 218 219 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 220 asm volatile("ptesync": : :"memory"); 221 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 222 } 223 } 224 225 static inline void fixup_tlbie_lpid(unsigned long lpid) 226 { 227 unsigned long va = ((1UL << 52) - 1); 228 229 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 230 asm volatile("ptesync": : :"memory"); 231 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 232 } 233 } 234 235 /* 236 * We use 128 set in radix mode and 256 set in hpt mode. 237 */ 238 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 239 { 240 int set; 241 242 asm volatile("ptesync": : :"memory"); 243 244 /* 245 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 246 * also flush the entire Page Walk Cache. 247 */ 248 __tlbiel_pid(pid, 0, ric); 249 250 /* For PWC, only one flush is needed */ 251 if (ric == RIC_FLUSH_PWC) { 252 asm volatile("ptesync": : :"memory"); 253 return; 254 } 255 256 /* For the remaining sets, just flush the TLB */ 257 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 258 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 259 260 asm volatile("ptesync": : :"memory"); 261 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); 262 } 263 264 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 265 { 266 asm volatile("ptesync": : :"memory"); 267 268 /* 269 * Workaround the fact that the "ric" argument to __tlbie_pid 270 * must be a compile-time contraint to match the "i" constraint 271 * in the asm statement. 272 */ 273 switch (ric) { 274 case RIC_FLUSH_TLB: 275 __tlbie_pid(pid, RIC_FLUSH_TLB); 276 break; 277 case RIC_FLUSH_PWC: 278 __tlbie_pid(pid, RIC_FLUSH_PWC); 279 break; 280 case RIC_FLUSH_ALL: 281 default: 282 __tlbie_pid(pid, RIC_FLUSH_ALL); 283 } 284 fixup_tlbie(); 285 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 286 } 287 288 static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) 289 { 290 int set; 291 292 VM_BUG_ON(mfspr(SPRN_LPID) != lpid); 293 294 asm volatile("ptesync": : :"memory"); 295 296 /* 297 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 298 * also flush the entire Page Walk Cache. 299 */ 300 __tlbiel_lpid(lpid, 0, ric); 301 302 /* For PWC, only one flush is needed */ 303 if (ric == RIC_FLUSH_PWC) { 304 asm volatile("ptesync": : :"memory"); 305 return; 306 } 307 308 /* For the remaining sets, just flush the TLB */ 309 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 310 __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); 311 312 asm volatile("ptesync": : :"memory"); 313 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); 314 } 315 316 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 317 { 318 asm volatile("ptesync": : :"memory"); 319 320 /* 321 * Workaround the fact that the "ric" argument to __tlbie_pid 322 * must be a compile-time contraint to match the "i" constraint 323 * in the asm statement. 324 */ 325 switch (ric) { 326 case RIC_FLUSH_TLB: 327 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 328 break; 329 case RIC_FLUSH_PWC: 330 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 331 break; 332 case RIC_FLUSH_ALL: 333 default: 334 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 335 } 336 fixup_tlbie_lpid(lpid); 337 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 338 } 339 340 static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) 341 { 342 int set; 343 344 VM_BUG_ON(mfspr(SPRN_LPID) != lpid); 345 346 asm volatile("ptesync": : :"memory"); 347 348 /* 349 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 350 * also flush the entire Page Walk Cache. 351 */ 352 __tlbiel_lpid_guest(lpid, 0, ric); 353 354 /* For PWC, only one flush is needed */ 355 if (ric == RIC_FLUSH_PWC) { 356 asm volatile("ptesync": : :"memory"); 357 return; 358 } 359 360 /* For the remaining sets, just flush the TLB */ 361 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 362 __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); 363 364 asm volatile("ptesync": : :"memory"); 365 asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); 366 } 367 368 369 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 370 unsigned long pid, unsigned long page_size, 371 unsigned long psize) 372 { 373 unsigned long addr; 374 unsigned long ap = mmu_get_ap(psize); 375 376 for (addr = start; addr < end; addr += page_size) 377 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 378 } 379 380 static inline void _tlbiel_va(unsigned long va, unsigned long pid, 381 unsigned long psize, unsigned long ric) 382 { 383 unsigned long ap = mmu_get_ap(psize); 384 385 asm volatile("ptesync": : :"memory"); 386 __tlbiel_va(va, pid, ap, ric); 387 asm volatile("ptesync": : :"memory"); 388 } 389 390 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 391 unsigned long pid, unsigned long page_size, 392 unsigned long psize, bool also_pwc) 393 { 394 asm volatile("ptesync": : :"memory"); 395 if (also_pwc) 396 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 397 __tlbiel_va_range(start, end, pid, page_size, psize); 398 asm volatile("ptesync": : :"memory"); 399 } 400 401 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 402 unsigned long pid, unsigned long page_size, 403 unsigned long psize) 404 { 405 unsigned long addr; 406 unsigned long ap = mmu_get_ap(psize); 407 408 for (addr = start; addr < end; addr += page_size) 409 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 410 } 411 412 static inline void _tlbie_va(unsigned long va, unsigned long pid, 413 unsigned long psize, unsigned long ric) 414 { 415 unsigned long ap = mmu_get_ap(psize); 416 417 asm volatile("ptesync": : :"memory"); 418 __tlbie_va(va, pid, ap, ric); 419 fixup_tlbie(); 420 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 421 } 422 423 static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 424 unsigned long psize, unsigned long ric) 425 { 426 unsigned long ap = mmu_get_ap(psize); 427 428 asm volatile("ptesync": : :"memory"); 429 __tlbie_lpid_va(va, lpid, ap, ric); 430 fixup_tlbie_lpid(lpid); 431 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 432 } 433 434 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 435 unsigned long pid, unsigned long page_size, 436 unsigned long psize, bool also_pwc) 437 { 438 asm volatile("ptesync": : :"memory"); 439 if (also_pwc) 440 __tlbie_pid(pid, RIC_FLUSH_PWC); 441 __tlbie_va_range(start, end, pid, page_size, psize); 442 fixup_tlbie(); 443 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 444 } 445 446 /* 447 * Base TLB flushing operations: 448 * 449 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 450 * - flush_tlb_page(vma, vmaddr) flushes one page 451 * - flush_tlb_range(vma, start, end) flushes a range of pages 452 * - flush_tlb_kernel_range(start, end) flushes kernel pages 453 * 454 * - local_* variants of page and mm only apply to the current 455 * processor 456 */ 457 void radix__local_flush_tlb_mm(struct mm_struct *mm) 458 { 459 unsigned long pid; 460 461 preempt_disable(); 462 pid = mm->context.id; 463 if (pid != MMU_NO_CONTEXT) 464 _tlbiel_pid(pid, RIC_FLUSH_TLB); 465 preempt_enable(); 466 } 467 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 468 469 #ifndef CONFIG_SMP 470 void radix__local_flush_all_mm(struct mm_struct *mm) 471 { 472 unsigned long pid; 473 474 preempt_disable(); 475 pid = mm->context.id; 476 if (pid != MMU_NO_CONTEXT) 477 _tlbiel_pid(pid, RIC_FLUSH_ALL); 478 preempt_enable(); 479 } 480 EXPORT_SYMBOL(radix__local_flush_all_mm); 481 #endif /* CONFIG_SMP */ 482 483 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 484 int psize) 485 { 486 unsigned long pid; 487 488 preempt_disable(); 489 pid = mm->context.id; 490 if (pid != MMU_NO_CONTEXT) 491 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 492 preempt_enable(); 493 } 494 495 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 496 { 497 #ifdef CONFIG_HUGETLB_PAGE 498 /* need the return fix for nohash.c */ 499 if (is_vm_hugetlb_page(vma)) 500 return radix__local_flush_hugetlb_page(vma, vmaddr); 501 #endif 502 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 503 } 504 EXPORT_SYMBOL(radix__local_flush_tlb_page); 505 506 static bool mm_is_singlethreaded(struct mm_struct *mm) 507 { 508 if (atomic_read(&mm->context.copros) > 0) 509 return false; 510 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 511 return true; 512 return false; 513 } 514 515 static bool mm_needs_flush_escalation(struct mm_struct *mm) 516 { 517 /* 518 * P9 nest MMU has issues with the page walk cache 519 * caching PTEs and not flushing them properly when 520 * RIC = 0 for a PID/LPID invalidate 521 */ 522 if (atomic_read(&mm->context.copros) > 0) 523 return true; 524 return false; 525 } 526 527 #ifdef CONFIG_SMP 528 static void do_exit_flush_lazy_tlb(void *arg) 529 { 530 struct mm_struct *mm = arg; 531 unsigned long pid = mm->context.id; 532 533 if (current->mm == mm) 534 return; /* Local CPU */ 535 536 if (current->active_mm == mm) { 537 /* 538 * Must be a kernel thread because sender is single-threaded. 539 */ 540 BUG_ON(current->mm); 541 mmgrab(&init_mm); 542 switch_mm(mm, &init_mm, current); 543 current->active_mm = &init_mm; 544 mmdrop(mm); 545 } 546 _tlbiel_pid(pid, RIC_FLUSH_ALL); 547 } 548 549 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 550 { 551 /* 552 * Would be nice if this was async so it could be run in 553 * parallel with our local flush, but generic code does not 554 * give a good API for it. Could extend the generic code or 555 * make a special powerpc IPI for flushing TLBs. 556 * For now it's not too performance critical. 557 */ 558 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 559 (void *)mm, 1); 560 mm_reset_thread_local(mm); 561 } 562 563 void radix__flush_tlb_mm(struct mm_struct *mm) 564 { 565 unsigned long pid; 566 567 pid = mm->context.id; 568 if (unlikely(pid == MMU_NO_CONTEXT)) 569 return; 570 571 preempt_disable(); 572 /* 573 * Order loads of mm_cpumask vs previous stores to clear ptes before 574 * the invalidate. See barrier in switch_mm_irqs_off 575 */ 576 smp_mb(); 577 if (!mm_is_thread_local(mm)) { 578 if (unlikely(mm_is_singlethreaded(mm))) { 579 exit_flush_lazy_tlbs(mm); 580 goto local; 581 } 582 583 if (mm_needs_flush_escalation(mm)) 584 _tlbie_pid(pid, RIC_FLUSH_ALL); 585 else 586 _tlbie_pid(pid, RIC_FLUSH_TLB); 587 } else { 588 local: 589 _tlbiel_pid(pid, RIC_FLUSH_TLB); 590 } 591 preempt_enable(); 592 } 593 EXPORT_SYMBOL(radix__flush_tlb_mm); 594 595 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 596 { 597 unsigned long pid; 598 599 pid = mm->context.id; 600 if (unlikely(pid == MMU_NO_CONTEXT)) 601 return; 602 603 preempt_disable(); 604 smp_mb(); /* see radix__flush_tlb_mm */ 605 if (!mm_is_thread_local(mm)) { 606 if (unlikely(mm_is_singlethreaded(mm))) { 607 if (!fullmm) { 608 exit_flush_lazy_tlbs(mm); 609 goto local; 610 } 611 } 612 _tlbie_pid(pid, RIC_FLUSH_ALL); 613 } else { 614 local: 615 _tlbiel_pid(pid, RIC_FLUSH_ALL); 616 } 617 preempt_enable(); 618 } 619 void radix__flush_all_mm(struct mm_struct *mm) 620 { 621 __flush_all_mm(mm, false); 622 } 623 EXPORT_SYMBOL(radix__flush_all_mm); 624 625 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) 626 { 627 tlb->need_flush_all = 1; 628 } 629 EXPORT_SYMBOL(radix__flush_tlb_pwc); 630 631 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 632 int psize) 633 { 634 unsigned long pid; 635 636 pid = mm->context.id; 637 if (unlikely(pid == MMU_NO_CONTEXT)) 638 return; 639 640 preempt_disable(); 641 smp_mb(); /* see radix__flush_tlb_mm */ 642 if (!mm_is_thread_local(mm)) { 643 if (unlikely(mm_is_singlethreaded(mm))) { 644 exit_flush_lazy_tlbs(mm); 645 goto local; 646 } 647 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 648 } else { 649 local: 650 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 651 } 652 preempt_enable(); 653 } 654 655 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 656 { 657 #ifdef CONFIG_HUGETLB_PAGE 658 if (is_vm_hugetlb_page(vma)) 659 return radix__flush_hugetlb_page(vma, vmaddr); 660 #endif 661 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 662 } 663 EXPORT_SYMBOL(radix__flush_tlb_page); 664 665 #else /* CONFIG_SMP */ 666 #define radix__flush_all_mm radix__local_flush_all_mm 667 #endif /* CONFIG_SMP */ 668 669 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 670 { 671 _tlbie_pid(0, RIC_FLUSH_ALL); 672 } 673 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 674 675 #define TLB_FLUSH_ALL -1UL 676 677 /* 678 * Number of pages above which we invalidate the entire PID rather than 679 * flush individual pages, for local and global flushes respectively. 680 * 681 * tlbie goes out to the interconnect and individual ops are more costly. 682 * It also does not iterate over sets like the local tlbiel variant when 683 * invalidating a full PID, so it has a far lower threshold to change from 684 * individual page flushes to full-pid flushes. 685 */ 686 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 687 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 688 689 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 690 unsigned long start, unsigned long end, 691 bool flush_all_sizes) 692 693 { 694 unsigned long pid; 695 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 696 unsigned long page_size = 1UL << page_shift; 697 unsigned long nr_pages = (end - start) >> page_shift; 698 bool local, full; 699 700 pid = mm->context.id; 701 if (unlikely(pid == MMU_NO_CONTEXT)) 702 return; 703 704 preempt_disable(); 705 smp_mb(); /* see radix__flush_tlb_mm */ 706 if (!mm_is_thread_local(mm)) { 707 if (unlikely(mm_is_singlethreaded(mm))) { 708 if (end != TLB_FLUSH_ALL) { 709 exit_flush_lazy_tlbs(mm); 710 goto is_local; 711 } 712 } 713 local = false; 714 full = (end == TLB_FLUSH_ALL || 715 nr_pages > tlb_single_page_flush_ceiling); 716 } else { 717 is_local: 718 local = true; 719 full = (end == TLB_FLUSH_ALL || 720 nr_pages > tlb_local_single_page_flush_ceiling); 721 } 722 723 if (full) { 724 if (local) { 725 _tlbiel_pid(pid, RIC_FLUSH_TLB); 726 } else { 727 if (mm_needs_flush_escalation(mm)) 728 _tlbie_pid(pid, RIC_FLUSH_ALL); 729 else 730 _tlbie_pid(pid, RIC_FLUSH_TLB); 731 } 732 } else { 733 bool hflush = flush_all_sizes; 734 bool gflush = flush_all_sizes; 735 unsigned long hstart, hend; 736 unsigned long gstart, gend; 737 738 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 739 hflush = true; 740 741 if (hflush) { 742 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 743 hend = end & PMD_MASK; 744 if (hstart == hend) 745 hflush = false; 746 } 747 748 if (gflush) { 749 gstart = (start + PUD_SIZE - 1) & PUD_MASK; 750 gend = end & PUD_MASK; 751 if (gstart == gend) 752 gflush = false; 753 } 754 755 asm volatile("ptesync": : :"memory"); 756 if (local) { 757 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 758 if (hflush) 759 __tlbiel_va_range(hstart, hend, pid, 760 PMD_SIZE, MMU_PAGE_2M); 761 if (gflush) 762 __tlbiel_va_range(gstart, gend, pid, 763 PUD_SIZE, MMU_PAGE_1G); 764 asm volatile("ptesync": : :"memory"); 765 } else { 766 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 767 if (hflush) 768 __tlbie_va_range(hstart, hend, pid, 769 PMD_SIZE, MMU_PAGE_2M); 770 if (gflush) 771 __tlbie_va_range(gstart, gend, pid, 772 PUD_SIZE, MMU_PAGE_1G); 773 fixup_tlbie(); 774 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 775 } 776 } 777 preempt_enable(); 778 } 779 780 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 781 unsigned long end) 782 783 { 784 #ifdef CONFIG_HUGETLB_PAGE 785 if (is_vm_hugetlb_page(vma)) 786 return radix__flush_hugetlb_tlb_range(vma, start, end); 787 #endif 788 789 __radix__flush_tlb_range(vma->vm_mm, start, end, false); 790 } 791 EXPORT_SYMBOL(radix__flush_tlb_range); 792 793 static int radix_get_mmu_psize(int page_size) 794 { 795 int psize; 796 797 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 798 psize = mmu_virtual_psize; 799 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 800 psize = MMU_PAGE_2M; 801 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 802 psize = MMU_PAGE_1G; 803 else 804 return -1; 805 return psize; 806 } 807 808 /* 809 * Flush partition scoped LPID address translation for all CPUs. 810 */ 811 void radix__flush_tlb_lpid_page(unsigned int lpid, 812 unsigned long addr, 813 unsigned long page_size) 814 { 815 int psize = radix_get_mmu_psize(page_size); 816 817 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 818 } 819 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 820 821 /* 822 * Flush partition scoped PWC from LPID for all CPUs. 823 */ 824 void radix__flush_pwc_lpid(unsigned int lpid) 825 { 826 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 827 } 828 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 829 830 /* 831 * Flush partition scoped translations from LPID (=LPIDR) 832 */ 833 void radix__flush_tlb_lpid(unsigned int lpid) 834 { 835 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 836 } 837 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid); 838 839 /* 840 * Flush partition scoped translations from LPID (=LPIDR) 841 */ 842 void radix__local_flush_tlb_lpid(unsigned int lpid) 843 { 844 _tlbiel_lpid(lpid, RIC_FLUSH_ALL); 845 } 846 EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); 847 848 /* 849 * Flush process scoped translations from LPID (=LPIDR). 850 * Important difference, the guest normally manages its own translations, 851 * but some cases e.g., vCPU CPU migration require KVM to flush. 852 */ 853 void radix__local_flush_tlb_lpid_guest(unsigned int lpid) 854 { 855 _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); 856 } 857 EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); 858 859 860 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 861 unsigned long end, int psize); 862 863 void radix__tlb_flush(struct mmu_gather *tlb) 864 { 865 int psize = 0; 866 struct mm_struct *mm = tlb->mm; 867 int page_size = tlb->page_size; 868 unsigned long start = tlb->start; 869 unsigned long end = tlb->end; 870 871 /* 872 * if page size is not something we understand, do a full mm flush 873 * 874 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 875 * that flushes the process table entry cache upon process teardown. 876 * See the comment for radix in arch_exit_mmap(). 877 */ 878 if (tlb->fullmm) { 879 __flush_all_mm(mm, true); 880 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) 881 } else if (mm_tlb_flush_nested(mm)) { 882 /* 883 * If there is a concurrent invalidation that is clearing ptes, 884 * then it's possible this invalidation will miss one of those 885 * cleared ptes and miss flushing the TLB. If this invalidate 886 * returns before the other one flushes TLBs, that can result 887 * in it returning while there are still valid TLBs inside the 888 * range to be invalidated. 889 * 890 * See mm/memory.c:tlb_finish_mmu() for more details. 891 * 892 * The solution to this is ensure the entire range is always 893 * flushed here. The problem for powerpc is that the flushes 894 * are page size specific, so this "forced flush" would not 895 * do the right thing if there are a mix of page sizes in 896 * the range to be invalidated. So use __flush_tlb_range 897 * which invalidates all possible page sizes in the range. 898 * 899 * PWC flush probably is not be required because the core code 900 * shouldn't free page tables in this path, but accounting 901 * for the possibility makes us a bit more robust. 902 * 903 * need_flush_all is an uncommon case because page table 904 * teardown should be done with exclusive locks held (but 905 * after locks are dropped another invalidate could come 906 * in), it could be optimized further if necessary. 907 */ 908 if (!tlb->need_flush_all) 909 __radix__flush_tlb_range(mm, start, end, true); 910 else 911 radix__flush_all_mm(mm); 912 #endif 913 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 914 if (!tlb->need_flush_all) 915 radix__flush_tlb_mm(mm); 916 else 917 radix__flush_all_mm(mm); 918 } else { 919 if (!tlb->need_flush_all) 920 radix__flush_tlb_range_psize(mm, start, end, psize); 921 else 922 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 923 } 924 tlb->need_flush_all = 0; 925 } 926 927 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 928 unsigned long start, unsigned long end, 929 int psize, bool also_pwc) 930 { 931 unsigned long pid; 932 unsigned int page_shift = mmu_psize_defs[psize].shift; 933 unsigned long page_size = 1UL << page_shift; 934 unsigned long nr_pages = (end - start) >> page_shift; 935 bool local, full; 936 937 pid = mm->context.id; 938 if (unlikely(pid == MMU_NO_CONTEXT)) 939 return; 940 941 preempt_disable(); 942 smp_mb(); /* see radix__flush_tlb_mm */ 943 if (!mm_is_thread_local(mm)) { 944 if (unlikely(mm_is_singlethreaded(mm))) { 945 if (end != TLB_FLUSH_ALL) { 946 exit_flush_lazy_tlbs(mm); 947 goto is_local; 948 } 949 } 950 local = false; 951 full = (end == TLB_FLUSH_ALL || 952 nr_pages > tlb_single_page_flush_ceiling); 953 } else { 954 is_local: 955 local = true; 956 full = (end == TLB_FLUSH_ALL || 957 nr_pages > tlb_local_single_page_flush_ceiling); 958 } 959 960 if (full) { 961 if (local) { 962 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 963 } else { 964 if (mm_needs_flush_escalation(mm)) 965 also_pwc = true; 966 967 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 968 } 969 } else { 970 if (local) 971 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 972 else 973 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 974 } 975 preempt_enable(); 976 } 977 978 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 979 unsigned long end, int psize) 980 { 981 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 982 } 983 984 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 985 unsigned long end, int psize) 986 { 987 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 988 } 989 990 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 991 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 992 { 993 unsigned long pid, end; 994 995 pid = mm->context.id; 996 if (unlikely(pid == MMU_NO_CONTEXT)) 997 return; 998 999 /* 4k page size, just blow the world */ 1000 if (PAGE_SIZE == 0x1000) { 1001 radix__flush_all_mm(mm); 1002 return; 1003 } 1004 1005 end = addr + HPAGE_PMD_SIZE; 1006 1007 /* Otherwise first do the PWC, then iterate the pages. */ 1008 preempt_disable(); 1009 smp_mb(); /* see radix__flush_tlb_mm */ 1010 if (!mm_is_thread_local(mm)) { 1011 if (unlikely(mm_is_singlethreaded(mm))) { 1012 exit_flush_lazy_tlbs(mm); 1013 goto local; 1014 } 1015 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1016 } else { 1017 local: 1018 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1019 } 1020 1021 preempt_enable(); 1022 } 1023 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1024 1025 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1026 unsigned long start, unsigned long end) 1027 { 1028 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1029 } 1030 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1031 1032 void radix__flush_tlb_all(void) 1033 { 1034 unsigned long rb,prs,r,rs; 1035 unsigned long ric = RIC_FLUSH_ALL; 1036 1037 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1038 prs = 0; /* partition scoped */ 1039 r = 1; /* radix format */ 1040 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1041 1042 asm volatile("ptesync": : :"memory"); 1043 /* 1044 * now flush guest entries by passing PRS = 1 and LPID != 0 1045 */ 1046 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1047 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1048 /* 1049 * now flush host entires by passing PRS = 0 and LPID == 0 1050 */ 1051 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1052 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1053 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1054 } 1055 1056 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1057 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1058 { 1059 unsigned long pid = mm->context.id; 1060 1061 if (unlikely(pid == MMU_NO_CONTEXT)) 1062 return; 1063 1064 /* 1065 * If this context hasn't run on that CPU before and KVM is 1066 * around, there's a slim chance that the guest on another 1067 * CPU just brought in obsolete translation into the TLB of 1068 * this CPU due to a bad prefetch using the guest PID on 1069 * the way into the hypervisor. 1070 * 1071 * We work around this here. If KVM is possible, we check if 1072 * any sibling thread is in KVM. If it is, the window may exist 1073 * and thus we flush that PID from the core. 1074 * 1075 * A potential future improvement would be to mark which PIDs 1076 * have never been used on the system and avoid it if the PID 1077 * is new and the process has no other cpumask bit set. 1078 */ 1079 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1080 int cpu = smp_processor_id(); 1081 int sib = cpu_first_thread_sibling(cpu); 1082 bool flush = false; 1083 1084 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1085 if (sib == cpu) 1086 continue; 1087 if (!cpu_possible(sib)) 1088 continue; 1089 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1090 flush = true; 1091 } 1092 if (flush) 1093 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1094 } 1095 } 1096 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1097 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1098