1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 for (set = 1; set < num_sets; set++) 59 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 60 } 61 62 /* Flush process scoped entries. */ 63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 64 for (set = 1; set < num_sets; set++) 65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 66 67 asm volatile("ptesync": : :"memory"); 68 } 69 70 void radix__tlbiel_all(unsigned int action) 71 { 72 unsigned int is; 73 74 switch (action) { 75 case TLB_INVAL_SCOPE_GLOBAL: 76 is = 3; 77 break; 78 case TLB_INVAL_SCOPE_LPID: 79 is = 2; 80 break; 81 default: 82 BUG(); 83 } 84 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 87 else 88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 89 90 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 91 } 92 93 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 94 unsigned long ric) 95 { 96 unsigned long rb,rs,prs,r; 97 98 rb = PPC_BIT(53); /* IS = 1 */ 99 rb |= set << PPC_BITLSHIFT(51); 100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 101 prs = 1; /* process scoped */ 102 r = 1; /* radix format */ 103 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 107 } 108 109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 110 { 111 unsigned long rb,rs,prs,r; 112 113 rb = PPC_BIT(53); /* IS = 1 */ 114 rs = pid << PPC_BITLSHIFT(31); 115 prs = 1; /* process scoped */ 116 r = 1; /* radix format */ 117 118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 121 } 122 123 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 124 { 125 unsigned long rb,rs,prs,r; 126 127 rb = PPC_BIT(52); /* IS = 2 */ 128 rs = lpid; 129 prs = 0; /* partition scoped */ 130 r = 1; /* radix format */ 131 132 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 133 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 134 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 135 } 136 137 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 138 { 139 unsigned long rb,rs,prs,r; 140 141 rb = PPC_BIT(52); /* IS = 2 */ 142 rs = lpid; 143 prs = 1; /* process scoped */ 144 r = 1; /* radix format */ 145 146 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 147 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 148 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 149 } 150 151 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 152 unsigned long ap, unsigned long ric) 153 { 154 unsigned long rb,rs,prs,r; 155 156 rb = va & ~(PPC_BITMASK(52, 63)); 157 rb |= ap << PPC_BITLSHIFT(58); 158 rs = pid << PPC_BITLSHIFT(31); 159 prs = 1; /* process scoped */ 160 r = 1; /* radix format */ 161 162 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 163 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 164 trace_tlbie(0, 1, rb, rs, ric, prs, r); 165 } 166 167 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 168 unsigned long ap, unsigned long ric) 169 { 170 unsigned long rb,rs,prs,r; 171 172 rb = va & ~(PPC_BITMASK(52, 63)); 173 rb |= ap << PPC_BITLSHIFT(58); 174 rs = pid << PPC_BITLSHIFT(31); 175 prs = 1; /* process scoped */ 176 r = 1; /* radix format */ 177 178 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 179 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 180 trace_tlbie(0, 0, rb, rs, ric, prs, r); 181 } 182 183 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 184 unsigned long ap, unsigned long ric) 185 { 186 unsigned long rb,rs,prs,r; 187 188 rb = va & ~(PPC_BITMASK(52, 63)); 189 rb |= ap << PPC_BITLSHIFT(58); 190 rs = lpid; 191 prs = 0; /* partition scoped */ 192 r = 1; /* radix format */ 193 194 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 195 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 197 } 198 199 static inline void fixup_tlbie(void) 200 { 201 unsigned long pid = 0; 202 unsigned long va = ((1UL << 52) - 1); 203 204 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 205 asm volatile("ptesync": : :"memory"); 206 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 207 } 208 } 209 210 static inline void fixup_tlbie_lpid(unsigned long lpid) 211 { 212 unsigned long va = ((1UL << 52) - 1); 213 214 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 215 asm volatile("ptesync": : :"memory"); 216 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 217 } 218 } 219 220 /* 221 * We use 128 set in radix mode and 256 set in hpt mode. 222 */ 223 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 224 { 225 int set; 226 227 asm volatile("ptesync": : :"memory"); 228 229 /* 230 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 231 * also flush the entire Page Walk Cache. 232 */ 233 __tlbiel_pid(pid, 0, ric); 234 235 /* For PWC, only one flush is needed */ 236 if (ric == RIC_FLUSH_PWC) { 237 asm volatile("ptesync": : :"memory"); 238 return; 239 } 240 241 /* For the remaining sets, just flush the TLB */ 242 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 243 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 244 245 asm volatile("ptesync": : :"memory"); 246 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 247 } 248 249 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 250 { 251 asm volatile("ptesync": : :"memory"); 252 253 /* 254 * Workaround the fact that the "ric" argument to __tlbie_pid 255 * must be a compile-time contraint to match the "i" constraint 256 * in the asm statement. 257 */ 258 switch (ric) { 259 case RIC_FLUSH_TLB: 260 __tlbie_pid(pid, RIC_FLUSH_TLB); 261 break; 262 case RIC_FLUSH_PWC: 263 __tlbie_pid(pid, RIC_FLUSH_PWC); 264 break; 265 case RIC_FLUSH_ALL: 266 default: 267 __tlbie_pid(pid, RIC_FLUSH_ALL); 268 } 269 fixup_tlbie(); 270 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 271 } 272 273 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 274 { 275 asm volatile("ptesync": : :"memory"); 276 277 /* 278 * Workaround the fact that the "ric" argument to __tlbie_pid 279 * must be a compile-time contraint to match the "i" constraint 280 * in the asm statement. 281 */ 282 switch (ric) { 283 case RIC_FLUSH_TLB: 284 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 285 break; 286 case RIC_FLUSH_PWC: 287 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 288 break; 289 case RIC_FLUSH_ALL: 290 default: 291 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 292 } 293 fixup_tlbie_lpid(lpid); 294 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 295 } 296 297 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 298 { 299 /* 300 * Workaround the fact that the "ric" argument to __tlbie_pid 301 * must be a compile-time contraint to match the "i" constraint 302 * in the asm statement. 303 */ 304 switch (ric) { 305 case RIC_FLUSH_TLB: 306 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 307 break; 308 case RIC_FLUSH_PWC: 309 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 310 break; 311 case RIC_FLUSH_ALL: 312 default: 313 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 314 } 315 fixup_tlbie_lpid(lpid); 316 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 317 } 318 319 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 320 unsigned long pid, unsigned long page_size, 321 unsigned long psize) 322 { 323 unsigned long addr; 324 unsigned long ap = mmu_get_ap(psize); 325 326 for (addr = start; addr < end; addr += page_size) 327 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 328 } 329 330 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 331 unsigned long psize, unsigned long ric) 332 { 333 unsigned long ap = mmu_get_ap(psize); 334 335 asm volatile("ptesync": : :"memory"); 336 __tlbiel_va(va, pid, ap, ric); 337 asm volatile("ptesync": : :"memory"); 338 } 339 340 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 341 unsigned long pid, unsigned long page_size, 342 unsigned long psize, bool also_pwc) 343 { 344 asm volatile("ptesync": : :"memory"); 345 if (also_pwc) 346 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 347 __tlbiel_va_range(start, end, pid, page_size, psize); 348 asm volatile("ptesync": : :"memory"); 349 } 350 351 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 352 unsigned long pid, unsigned long page_size, 353 unsigned long psize) 354 { 355 unsigned long addr; 356 unsigned long ap = mmu_get_ap(psize); 357 358 for (addr = start; addr < end; addr += page_size) 359 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 360 } 361 362 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 363 unsigned long psize, unsigned long ric) 364 { 365 unsigned long ap = mmu_get_ap(psize); 366 367 asm volatile("ptesync": : :"memory"); 368 __tlbie_va(va, pid, ap, ric); 369 fixup_tlbie(); 370 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 371 } 372 373 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 374 unsigned long psize, unsigned long ric) 375 { 376 unsigned long ap = mmu_get_ap(psize); 377 378 asm volatile("ptesync": : :"memory"); 379 __tlbie_lpid_va(va, lpid, ap, ric); 380 fixup_tlbie_lpid(lpid); 381 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 382 } 383 384 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 385 unsigned long pid, unsigned long page_size, 386 unsigned long psize, bool also_pwc) 387 { 388 asm volatile("ptesync": : :"memory"); 389 if (also_pwc) 390 __tlbie_pid(pid, RIC_FLUSH_PWC); 391 __tlbie_va_range(start, end, pid, page_size, psize); 392 fixup_tlbie(); 393 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 394 } 395 396 /* 397 * Base TLB flushing operations: 398 * 399 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 400 * - flush_tlb_page(vma, vmaddr) flushes one page 401 * - flush_tlb_range(vma, start, end) flushes a range of pages 402 * - flush_tlb_kernel_range(start, end) flushes kernel pages 403 * 404 * - local_* variants of page and mm only apply to the current 405 * processor 406 */ 407 void radix__local_flush_tlb_mm(struct mm_struct *mm) 408 { 409 unsigned long pid; 410 411 preempt_disable(); 412 pid = mm->context.id; 413 if (pid != MMU_NO_CONTEXT) 414 _tlbiel_pid(pid, RIC_FLUSH_TLB); 415 preempt_enable(); 416 } 417 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 418 419 #ifndef CONFIG_SMP 420 void radix__local_flush_all_mm(struct mm_struct *mm) 421 { 422 unsigned long pid; 423 424 preempt_disable(); 425 pid = mm->context.id; 426 if (pid != MMU_NO_CONTEXT) 427 _tlbiel_pid(pid, RIC_FLUSH_ALL); 428 preempt_enable(); 429 } 430 EXPORT_SYMBOL(radix__local_flush_all_mm); 431 #endif /* CONFIG_SMP */ 432 433 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 434 int psize) 435 { 436 unsigned long pid; 437 438 preempt_disable(); 439 pid = mm->context.id; 440 if (pid != MMU_NO_CONTEXT) 441 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 442 preempt_enable(); 443 } 444 445 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 446 { 447 #ifdef CONFIG_HUGETLB_PAGE 448 /* need the return fix for nohash.c */ 449 if (is_vm_hugetlb_page(vma)) 450 return radix__local_flush_hugetlb_page(vma, vmaddr); 451 #endif 452 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 453 } 454 EXPORT_SYMBOL(radix__local_flush_tlb_page); 455 456 static bool mm_is_singlethreaded(struct mm_struct *mm) 457 { 458 if (atomic_read(&mm->context.copros) > 0) 459 return false; 460 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 461 return true; 462 return false; 463 } 464 465 static bool mm_needs_flush_escalation(struct mm_struct *mm) 466 { 467 /* 468 * P9 nest MMU has issues with the page walk cache 469 * caching PTEs and not flushing them properly when 470 * RIC = 0 for a PID/LPID invalidate 471 */ 472 if (atomic_read(&mm->context.copros) > 0) 473 return true; 474 return false; 475 } 476 477 #ifdef CONFIG_SMP 478 static void do_exit_flush_lazy_tlb(void *arg) 479 { 480 struct mm_struct *mm = arg; 481 unsigned long pid = mm->context.id; 482 483 if (current->mm == mm) 484 return; /* Local CPU */ 485 486 if (current->active_mm == mm) { 487 /* 488 * Must be a kernel thread because sender is single-threaded. 489 */ 490 BUG_ON(current->mm); 491 mmgrab(&init_mm); 492 switch_mm(mm, &init_mm, current); 493 current->active_mm = &init_mm; 494 mmdrop(mm); 495 } 496 _tlbiel_pid(pid, RIC_FLUSH_ALL); 497 } 498 499 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 500 { 501 /* 502 * Would be nice if this was async so it could be run in 503 * parallel with our local flush, but generic code does not 504 * give a good API for it. Could extend the generic code or 505 * make a special powerpc IPI for flushing TLBs. 506 * For now it's not too performance critical. 507 */ 508 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 509 (void *)mm, 1); 510 mm_reset_thread_local(mm); 511 } 512 513 void radix__flush_tlb_mm(struct mm_struct *mm) 514 { 515 unsigned long pid; 516 517 pid = mm->context.id; 518 if (unlikely(pid == MMU_NO_CONTEXT)) 519 return; 520 521 preempt_disable(); 522 /* 523 * Order loads of mm_cpumask vs previous stores to clear ptes before 524 * the invalidate. See barrier in switch_mm_irqs_off 525 */ 526 smp_mb(); 527 if (!mm_is_thread_local(mm)) { 528 if (unlikely(mm_is_singlethreaded(mm))) { 529 exit_flush_lazy_tlbs(mm); 530 goto local; 531 } 532 533 if (mm_needs_flush_escalation(mm)) 534 _tlbie_pid(pid, RIC_FLUSH_ALL); 535 else 536 _tlbie_pid(pid, RIC_FLUSH_TLB); 537 } else { 538 local: 539 _tlbiel_pid(pid, RIC_FLUSH_TLB); 540 } 541 preempt_enable(); 542 } 543 EXPORT_SYMBOL(radix__flush_tlb_mm); 544 545 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 546 { 547 unsigned long pid; 548 549 pid = mm->context.id; 550 if (unlikely(pid == MMU_NO_CONTEXT)) 551 return; 552 553 preempt_disable(); 554 smp_mb(); /* see radix__flush_tlb_mm */ 555 if (!mm_is_thread_local(mm)) { 556 if (unlikely(mm_is_singlethreaded(mm))) { 557 if (!fullmm) { 558 exit_flush_lazy_tlbs(mm); 559 goto local; 560 } 561 } 562 _tlbie_pid(pid, RIC_FLUSH_ALL); 563 } else { 564 local: 565 _tlbiel_pid(pid, RIC_FLUSH_ALL); 566 } 567 preempt_enable(); 568 } 569 void radix__flush_all_mm(struct mm_struct *mm) 570 { 571 __flush_all_mm(mm, false); 572 } 573 EXPORT_SYMBOL(radix__flush_all_mm); 574 575 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) 576 { 577 tlb->need_flush_all = 1; 578 } 579 EXPORT_SYMBOL(radix__flush_tlb_pwc); 580 581 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 582 int psize) 583 { 584 unsigned long pid; 585 586 pid = mm->context.id; 587 if (unlikely(pid == MMU_NO_CONTEXT)) 588 return; 589 590 preempt_disable(); 591 smp_mb(); /* see radix__flush_tlb_mm */ 592 if (!mm_is_thread_local(mm)) { 593 if (unlikely(mm_is_singlethreaded(mm))) { 594 exit_flush_lazy_tlbs(mm); 595 goto local; 596 } 597 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 598 } else { 599 local: 600 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 601 } 602 preempt_enable(); 603 } 604 605 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 606 { 607 #ifdef CONFIG_HUGETLB_PAGE 608 if (is_vm_hugetlb_page(vma)) 609 return radix__flush_hugetlb_page(vma, vmaddr); 610 #endif 611 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 612 } 613 EXPORT_SYMBOL(radix__flush_tlb_page); 614 615 #else /* CONFIG_SMP */ 616 #define radix__flush_all_mm radix__local_flush_all_mm 617 #endif /* CONFIG_SMP */ 618 619 /* 620 * If kernel TLBIs ever become local rather than global, then 621 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 622 * assumes kernel TLBIs are global. 623 */ 624 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 625 { 626 _tlbie_pid(0, RIC_FLUSH_ALL); 627 } 628 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 629 630 #define TLB_FLUSH_ALL -1UL 631 632 /* 633 * Number of pages above which we invalidate the entire PID rather than 634 * flush individual pages, for local and global flushes respectively. 635 * 636 * tlbie goes out to the interconnect and individual ops are more costly. 637 * It also does not iterate over sets like the local tlbiel variant when 638 * invalidating a full PID, so it has a far lower threshold to change from 639 * individual page flushes to full-pid flushes. 640 */ 641 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 642 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 643 644 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 645 unsigned long start, unsigned long end, 646 bool flush_all_sizes) 647 648 { 649 unsigned long pid; 650 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 651 unsigned long page_size = 1UL << page_shift; 652 unsigned long nr_pages = (end - start) >> page_shift; 653 bool local, full; 654 655 pid = mm->context.id; 656 if (unlikely(pid == MMU_NO_CONTEXT)) 657 return; 658 659 preempt_disable(); 660 smp_mb(); /* see radix__flush_tlb_mm */ 661 if (!mm_is_thread_local(mm)) { 662 if (unlikely(mm_is_singlethreaded(mm))) { 663 if (end != TLB_FLUSH_ALL) { 664 exit_flush_lazy_tlbs(mm); 665 goto is_local; 666 } 667 } 668 local = false; 669 full = (end == TLB_FLUSH_ALL || 670 nr_pages > tlb_single_page_flush_ceiling); 671 } else { 672 is_local: 673 local = true; 674 full = (end == TLB_FLUSH_ALL || 675 nr_pages > tlb_local_single_page_flush_ceiling); 676 } 677 678 if (full) { 679 if (local) { 680 _tlbiel_pid(pid, RIC_FLUSH_TLB); 681 } else { 682 if (mm_needs_flush_escalation(mm)) 683 _tlbie_pid(pid, RIC_FLUSH_ALL); 684 else 685 _tlbie_pid(pid, RIC_FLUSH_TLB); 686 } 687 } else { 688 bool hflush = flush_all_sizes; 689 bool gflush = flush_all_sizes; 690 unsigned long hstart, hend; 691 unsigned long gstart, gend; 692 693 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 694 hflush = true; 695 696 if (hflush) { 697 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 698 hend = end & PMD_MASK; 699 if (hstart == hend) 700 hflush = false; 701 } 702 703 if (gflush) { 704 gstart = (start + PUD_SIZE - 1) & PUD_MASK; 705 gend = end & PUD_MASK; 706 if (gstart == gend) 707 gflush = false; 708 } 709 710 asm volatile("ptesync": : :"memory"); 711 if (local) { 712 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 713 if (hflush) 714 __tlbiel_va_range(hstart, hend, pid, 715 PMD_SIZE, MMU_PAGE_2M); 716 if (gflush) 717 __tlbiel_va_range(gstart, gend, pid, 718 PUD_SIZE, MMU_PAGE_1G); 719 asm volatile("ptesync": : :"memory"); 720 } else { 721 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 722 if (hflush) 723 __tlbie_va_range(hstart, hend, pid, 724 PMD_SIZE, MMU_PAGE_2M); 725 if (gflush) 726 __tlbie_va_range(gstart, gend, pid, 727 PUD_SIZE, MMU_PAGE_1G); 728 fixup_tlbie(); 729 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 730 } 731 } 732 preempt_enable(); 733 } 734 735 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 736 unsigned long end) 737 738 { 739 #ifdef CONFIG_HUGETLB_PAGE 740 if (is_vm_hugetlb_page(vma)) 741 return radix__flush_hugetlb_tlb_range(vma, start, end); 742 #endif 743 744 __radix__flush_tlb_range(vma->vm_mm, start, end, false); 745 } 746 EXPORT_SYMBOL(radix__flush_tlb_range); 747 748 static int radix_get_mmu_psize(int page_size) 749 { 750 int psize; 751 752 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 753 psize = mmu_virtual_psize; 754 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 755 psize = MMU_PAGE_2M; 756 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 757 psize = MMU_PAGE_1G; 758 else 759 return -1; 760 return psize; 761 } 762 763 /* 764 * Flush partition scoped LPID address translation for all CPUs. 765 */ 766 void radix__flush_tlb_lpid_page(unsigned int lpid, 767 unsigned long addr, 768 unsigned long page_size) 769 { 770 int psize = radix_get_mmu_psize(page_size); 771 772 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 773 } 774 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 775 776 /* 777 * Flush partition scoped PWC from LPID for all CPUs. 778 */ 779 void radix__flush_pwc_lpid(unsigned int lpid) 780 { 781 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 782 } 783 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 784 785 /* 786 * Flush partition scoped translations from LPID (=LPIDR) 787 */ 788 void radix__flush_all_lpid(unsigned int lpid) 789 { 790 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 791 } 792 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 793 794 /* 795 * Flush process scoped translations from LPID (=LPIDR) 796 */ 797 void radix__flush_all_lpid_guest(unsigned int lpid) 798 { 799 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 800 } 801 802 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 803 unsigned long end, int psize); 804 805 void radix__tlb_flush(struct mmu_gather *tlb) 806 { 807 int psize = 0; 808 struct mm_struct *mm = tlb->mm; 809 int page_size = tlb->page_size; 810 unsigned long start = tlb->start; 811 unsigned long end = tlb->end; 812 813 /* 814 * if page size is not something we understand, do a full mm flush 815 * 816 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 817 * that flushes the process table entry cache upon process teardown. 818 * See the comment for radix in arch_exit_mmap(). 819 */ 820 if (tlb->fullmm) { 821 __flush_all_mm(mm, true); 822 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) 823 } else if (mm_tlb_flush_nested(mm)) { 824 /* 825 * If there is a concurrent invalidation that is clearing ptes, 826 * then it's possible this invalidation will miss one of those 827 * cleared ptes and miss flushing the TLB. If this invalidate 828 * returns before the other one flushes TLBs, that can result 829 * in it returning while there are still valid TLBs inside the 830 * range to be invalidated. 831 * 832 * See mm/memory.c:tlb_finish_mmu() for more details. 833 * 834 * The solution to this is ensure the entire range is always 835 * flushed here. The problem for powerpc is that the flushes 836 * are page size specific, so this "forced flush" would not 837 * do the right thing if there are a mix of page sizes in 838 * the range to be invalidated. So use __flush_tlb_range 839 * which invalidates all possible page sizes in the range. 840 * 841 * PWC flush probably is not be required because the core code 842 * shouldn't free page tables in this path, but accounting 843 * for the possibility makes us a bit more robust. 844 * 845 * need_flush_all is an uncommon case because page table 846 * teardown should be done with exclusive locks held (but 847 * after locks are dropped another invalidate could come 848 * in), it could be optimized further if necessary. 849 */ 850 if (!tlb->need_flush_all) 851 __radix__flush_tlb_range(mm, start, end, true); 852 else 853 radix__flush_all_mm(mm); 854 #endif 855 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 856 if (!tlb->need_flush_all) 857 radix__flush_tlb_mm(mm); 858 else 859 radix__flush_all_mm(mm); 860 } else { 861 if (!tlb->need_flush_all) 862 radix__flush_tlb_range_psize(mm, start, end, psize); 863 else 864 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 865 } 866 tlb->need_flush_all = 0; 867 } 868 869 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 870 unsigned long start, unsigned long end, 871 int psize, bool also_pwc) 872 { 873 unsigned long pid; 874 unsigned int page_shift = mmu_psize_defs[psize].shift; 875 unsigned long page_size = 1UL << page_shift; 876 unsigned long nr_pages = (end - start) >> page_shift; 877 bool local, full; 878 879 pid = mm->context.id; 880 if (unlikely(pid == MMU_NO_CONTEXT)) 881 return; 882 883 preempt_disable(); 884 smp_mb(); /* see radix__flush_tlb_mm */ 885 if (!mm_is_thread_local(mm)) { 886 if (unlikely(mm_is_singlethreaded(mm))) { 887 if (end != TLB_FLUSH_ALL) { 888 exit_flush_lazy_tlbs(mm); 889 goto is_local; 890 } 891 } 892 local = false; 893 full = (end == TLB_FLUSH_ALL || 894 nr_pages > tlb_single_page_flush_ceiling); 895 } else { 896 is_local: 897 local = true; 898 full = (end == TLB_FLUSH_ALL || 899 nr_pages > tlb_local_single_page_flush_ceiling); 900 } 901 902 if (full) { 903 if (local) { 904 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 905 } else { 906 if (mm_needs_flush_escalation(mm)) 907 also_pwc = true; 908 909 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 910 } 911 } else { 912 if (local) 913 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 914 else 915 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 916 } 917 preempt_enable(); 918 } 919 920 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 921 unsigned long end, int psize) 922 { 923 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 924 } 925 926 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 927 unsigned long end, int psize) 928 { 929 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 930 } 931 932 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 933 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 934 { 935 unsigned long pid, end; 936 937 pid = mm->context.id; 938 if (unlikely(pid == MMU_NO_CONTEXT)) 939 return; 940 941 /* 4k page size, just blow the world */ 942 if (PAGE_SIZE == 0x1000) { 943 radix__flush_all_mm(mm); 944 return; 945 } 946 947 end = addr + HPAGE_PMD_SIZE; 948 949 /* Otherwise first do the PWC, then iterate the pages. */ 950 preempt_disable(); 951 smp_mb(); /* see radix__flush_tlb_mm */ 952 if (!mm_is_thread_local(mm)) { 953 if (unlikely(mm_is_singlethreaded(mm))) { 954 exit_flush_lazy_tlbs(mm); 955 goto local; 956 } 957 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 958 } else { 959 local: 960 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 961 } 962 963 preempt_enable(); 964 } 965 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 966 967 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 968 unsigned long start, unsigned long end) 969 { 970 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 971 } 972 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 973 974 void radix__flush_tlb_all(void) 975 { 976 unsigned long rb,prs,r,rs; 977 unsigned long ric = RIC_FLUSH_ALL; 978 979 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 980 prs = 0; /* partition scoped */ 981 r = 1; /* radix format */ 982 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 983 984 asm volatile("ptesync": : :"memory"); 985 /* 986 * now flush guest entries by passing PRS = 1 and LPID != 0 987 */ 988 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 989 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 990 /* 991 * now flush host entires by passing PRS = 0 and LPID == 0 992 */ 993 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 994 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 995 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 996 } 997 998 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 999 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1000 { 1001 unsigned long pid = mm->context.id; 1002 1003 if (unlikely(pid == MMU_NO_CONTEXT)) 1004 return; 1005 1006 /* 1007 * If this context hasn't run on that CPU before and KVM is 1008 * around, there's a slim chance that the guest on another 1009 * CPU just brought in obsolete translation into the TLB of 1010 * this CPU due to a bad prefetch using the guest PID on 1011 * the way into the hypervisor. 1012 * 1013 * We work around this here. If KVM is possible, we check if 1014 * any sibling thread is in KVM. If it is, the window may exist 1015 * and thus we flush that PID from the core. 1016 * 1017 * A potential future improvement would be to mark which PIDs 1018 * have never been used on the system and avoid it if the PID 1019 * is new and the process has no other cpumask bit set. 1020 */ 1021 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1022 int cpu = smp_processor_id(); 1023 int sib = cpu_first_thread_sibling(cpu); 1024 bool flush = false; 1025 1026 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1027 if (sib == cpu) 1028 continue; 1029 if (!cpu_possible(sib)) 1030 continue; 1031 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1032 flush = true; 1033 } 1034 if (flush) 1035 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1036 } 1037 } 1038 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1039 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1040