1 /* 2 * TLB flush routines for radix kernels. 3 * 4 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 #include <linux/memblock.h> 15 #include <linux/mmu_context.h> 16 #include <linux/sched/mm.h> 17 18 #include <asm/ppc-opcode.h> 19 #include <asm/tlb.h> 20 #include <asm/tlbflush.h> 21 #include <asm/trace.h> 22 #include <asm/cputhreads.h> 23 24 #define RIC_FLUSH_TLB 0 25 #define RIC_FLUSH_PWC 1 26 #define RIC_FLUSH_ALL 2 27 28 /* 29 * tlbiel instruction for radix, set invalidation 30 * i.e., r=1 and is=01 or is=10 or is=11 31 */ 32 static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 33 unsigned int pid, 34 unsigned int ric, unsigned int prs) 35 { 36 unsigned long rb; 37 unsigned long rs; 38 39 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 40 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 41 42 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 43 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 44 : "memory"); 45 } 46 47 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 48 { 49 unsigned int set; 50 51 asm volatile("ptesync": : :"memory"); 52 53 /* 54 * Flush the first set of the TLB, and the entire Page Walk Cache 55 * and partition table entries. Then flush the remaining sets of the 56 * TLB. 57 */ 58 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 59 for (set = 1; set < num_sets; set++) 60 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 61 62 /* Do the same for process scoped entries. */ 63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 64 for (set = 1; set < num_sets; set++) 65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 66 67 asm volatile("ptesync": : :"memory"); 68 } 69 70 void radix__tlbiel_all(unsigned int action) 71 { 72 unsigned int is; 73 74 switch (action) { 75 case TLB_INVAL_SCOPE_GLOBAL: 76 is = 3; 77 break; 78 case TLB_INVAL_SCOPE_LPID: 79 is = 2; 80 break; 81 default: 82 BUG(); 83 } 84 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 87 else 88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 89 90 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); 91 } 92 93 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 94 unsigned long ric) 95 { 96 unsigned long rb,rs,prs,r; 97 98 rb = PPC_BIT(53); /* IS = 1 */ 99 rb |= set << PPC_BITLSHIFT(51); 100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 101 prs = 1; /* process scoped */ 102 r = 1; /* radix format */ 103 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 107 } 108 109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 110 { 111 unsigned long rb,rs,prs,r; 112 113 rb = PPC_BIT(53); /* IS = 1 */ 114 rs = pid << PPC_BITLSHIFT(31); 115 prs = 1; /* process scoped */ 116 r = 1; /* radix format */ 117 118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 121 } 122 123 static __always_inline void __tlbiel_lpid(unsigned long lpid, int set, 124 unsigned long ric) 125 { 126 unsigned long rb,rs,prs,r; 127 128 rb = PPC_BIT(52); /* IS = 2 */ 129 rb |= set << PPC_BITLSHIFT(51); 130 rs = 0; /* LPID comes from LPIDR */ 131 prs = 0; /* partition scoped */ 132 r = 1; /* radix format */ 133 134 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 135 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 136 trace_tlbie(lpid, 1, rb, rs, ric, prs, r); 137 } 138 139 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 140 { 141 unsigned long rb,rs,prs,r; 142 143 rb = PPC_BIT(52); /* IS = 2 */ 144 rs = lpid; 145 prs = 0; /* partition scoped */ 146 r = 1; /* radix format */ 147 148 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 149 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 150 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 151 } 152 153 static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, 154 unsigned long ric) 155 { 156 unsigned long rb,rs,prs,r; 157 158 rb = PPC_BIT(52); /* IS = 2 */ 159 rb |= set << PPC_BITLSHIFT(51); 160 rs = 0; /* LPID comes from LPIDR */ 161 prs = 1; /* process scoped */ 162 r = 1; /* radix format */ 163 164 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 165 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 166 trace_tlbie(lpid, 1, rb, rs, ric, prs, r); 167 } 168 169 170 static inline void __tlbiel_va(unsigned long va, unsigned long pid, 171 unsigned long ap, unsigned long ric) 172 { 173 unsigned long rb,rs,prs,r; 174 175 rb = va & ~(PPC_BITMASK(52, 63)); 176 rb |= ap << PPC_BITLSHIFT(58); 177 rs = pid << PPC_BITLSHIFT(31); 178 prs = 1; /* process scoped */ 179 r = 1; /* radix format */ 180 181 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 182 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 183 trace_tlbie(0, 1, rb, rs, ric, prs, r); 184 } 185 186 static inline void __tlbie_va(unsigned long va, unsigned long pid, 187 unsigned long ap, unsigned long ric) 188 { 189 unsigned long rb,rs,prs,r; 190 191 rb = va & ~(PPC_BITMASK(52, 63)); 192 rb |= ap << PPC_BITLSHIFT(58); 193 rs = pid << PPC_BITLSHIFT(31); 194 prs = 1; /* process scoped */ 195 r = 1; /* radix format */ 196 197 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 198 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 199 trace_tlbie(0, 0, rb, rs, ric, prs, r); 200 } 201 202 static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 203 unsigned long ap, unsigned long ric) 204 { 205 unsigned long rb,rs,prs,r; 206 207 rb = va & ~(PPC_BITMASK(52, 63)); 208 rb |= ap << PPC_BITLSHIFT(58); 209 rs = lpid; 210 prs = 0; /* partition scoped */ 211 r = 1; /* radix format */ 212 213 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 214 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 215 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 216 } 217 218 static inline void fixup_tlbie(void) 219 { 220 unsigned long pid = 0; 221 unsigned long va = ((1UL << 52) - 1); 222 223 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 224 asm volatile("ptesync": : :"memory"); 225 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 226 } 227 } 228 229 static inline void fixup_tlbie_lpid(unsigned long lpid) 230 { 231 unsigned long va = ((1UL << 52) - 1); 232 233 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 234 asm volatile("ptesync": : :"memory"); 235 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 236 } 237 } 238 239 /* 240 * We use 128 set in radix mode and 256 set in hpt mode. 241 */ 242 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 243 { 244 int set; 245 246 asm volatile("ptesync": : :"memory"); 247 248 /* 249 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 250 * also flush the entire Page Walk Cache. 251 */ 252 __tlbiel_pid(pid, 0, ric); 253 254 /* For PWC, only one flush is needed */ 255 if (ric == RIC_FLUSH_PWC) { 256 asm volatile("ptesync": : :"memory"); 257 return; 258 } 259 260 /* For the remaining sets, just flush the TLB */ 261 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 262 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 263 264 asm volatile("ptesync": : :"memory"); 265 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); 266 } 267 268 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 269 { 270 asm volatile("ptesync": : :"memory"); 271 272 /* 273 * Workaround the fact that the "ric" argument to __tlbie_pid 274 * must be a compile-time contraint to match the "i" constraint 275 * in the asm statement. 276 */ 277 switch (ric) { 278 case RIC_FLUSH_TLB: 279 __tlbie_pid(pid, RIC_FLUSH_TLB); 280 break; 281 case RIC_FLUSH_PWC: 282 __tlbie_pid(pid, RIC_FLUSH_PWC); 283 break; 284 case RIC_FLUSH_ALL: 285 default: 286 __tlbie_pid(pid, RIC_FLUSH_ALL); 287 } 288 fixup_tlbie(); 289 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 290 } 291 292 static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) 293 { 294 int set; 295 296 VM_BUG_ON(mfspr(SPRN_LPID) != lpid); 297 298 asm volatile("ptesync": : :"memory"); 299 300 /* 301 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 302 * also flush the entire Page Walk Cache. 303 */ 304 __tlbiel_lpid(lpid, 0, ric); 305 306 /* For PWC, only one flush is needed */ 307 if (ric == RIC_FLUSH_PWC) { 308 asm volatile("ptesync": : :"memory"); 309 return; 310 } 311 312 /* For the remaining sets, just flush the TLB */ 313 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 314 __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); 315 316 asm volatile("ptesync": : :"memory"); 317 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); 318 } 319 320 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 321 { 322 asm volatile("ptesync": : :"memory"); 323 324 /* 325 * Workaround the fact that the "ric" argument to __tlbie_pid 326 * must be a compile-time contraint to match the "i" constraint 327 * in the asm statement. 328 */ 329 switch (ric) { 330 case RIC_FLUSH_TLB: 331 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 332 break; 333 case RIC_FLUSH_PWC: 334 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 335 break; 336 case RIC_FLUSH_ALL: 337 default: 338 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 339 } 340 fixup_tlbie_lpid(lpid); 341 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 342 } 343 344 static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) 345 { 346 int set; 347 348 VM_BUG_ON(mfspr(SPRN_LPID) != lpid); 349 350 asm volatile("ptesync": : :"memory"); 351 352 /* 353 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 354 * also flush the entire Page Walk Cache. 355 */ 356 __tlbiel_lpid_guest(lpid, 0, ric); 357 358 /* For PWC, only one flush is needed */ 359 if (ric == RIC_FLUSH_PWC) { 360 asm volatile("ptesync": : :"memory"); 361 return; 362 } 363 364 /* For the remaining sets, just flush the TLB */ 365 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 366 __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); 367 368 asm volatile("ptesync": : :"memory"); 369 asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); 370 } 371 372 373 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 374 unsigned long pid, unsigned long page_size, 375 unsigned long psize) 376 { 377 unsigned long addr; 378 unsigned long ap = mmu_get_ap(psize); 379 380 for (addr = start; addr < end; addr += page_size) 381 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 382 } 383 384 static inline void _tlbiel_va(unsigned long va, unsigned long pid, 385 unsigned long psize, unsigned long ric) 386 { 387 unsigned long ap = mmu_get_ap(psize); 388 389 asm volatile("ptesync": : :"memory"); 390 __tlbiel_va(va, pid, ap, ric); 391 asm volatile("ptesync": : :"memory"); 392 } 393 394 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 395 unsigned long pid, unsigned long page_size, 396 unsigned long psize, bool also_pwc) 397 { 398 asm volatile("ptesync": : :"memory"); 399 if (also_pwc) 400 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 401 __tlbiel_va_range(start, end, pid, page_size, psize); 402 asm volatile("ptesync": : :"memory"); 403 } 404 405 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 406 unsigned long pid, unsigned long page_size, 407 unsigned long psize) 408 { 409 unsigned long addr; 410 unsigned long ap = mmu_get_ap(psize); 411 412 for (addr = start; addr < end; addr += page_size) 413 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 414 } 415 416 static inline void _tlbie_va(unsigned long va, unsigned long pid, 417 unsigned long psize, unsigned long ric) 418 { 419 unsigned long ap = mmu_get_ap(psize); 420 421 asm volatile("ptesync": : :"memory"); 422 __tlbie_va(va, pid, ap, ric); 423 fixup_tlbie(); 424 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 425 } 426 427 static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 428 unsigned long psize, unsigned long ric) 429 { 430 unsigned long ap = mmu_get_ap(psize); 431 432 asm volatile("ptesync": : :"memory"); 433 __tlbie_lpid_va(va, lpid, ap, ric); 434 fixup_tlbie_lpid(lpid); 435 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 436 } 437 438 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 439 unsigned long pid, unsigned long page_size, 440 unsigned long psize, bool also_pwc) 441 { 442 asm volatile("ptesync": : :"memory"); 443 if (also_pwc) 444 __tlbie_pid(pid, RIC_FLUSH_PWC); 445 __tlbie_va_range(start, end, pid, page_size, psize); 446 fixup_tlbie(); 447 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 448 } 449 450 /* 451 * Base TLB flushing operations: 452 * 453 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 454 * - flush_tlb_page(vma, vmaddr) flushes one page 455 * - flush_tlb_range(vma, start, end) flushes a range of pages 456 * - flush_tlb_kernel_range(start, end) flushes kernel pages 457 * 458 * - local_* variants of page and mm only apply to the current 459 * processor 460 */ 461 void radix__local_flush_tlb_mm(struct mm_struct *mm) 462 { 463 unsigned long pid; 464 465 preempt_disable(); 466 pid = mm->context.id; 467 if (pid != MMU_NO_CONTEXT) 468 _tlbiel_pid(pid, RIC_FLUSH_TLB); 469 preempt_enable(); 470 } 471 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 472 473 #ifndef CONFIG_SMP 474 void radix__local_flush_all_mm(struct mm_struct *mm) 475 { 476 unsigned long pid; 477 478 preempt_disable(); 479 pid = mm->context.id; 480 if (pid != MMU_NO_CONTEXT) 481 _tlbiel_pid(pid, RIC_FLUSH_ALL); 482 preempt_enable(); 483 } 484 EXPORT_SYMBOL(radix__local_flush_all_mm); 485 #endif /* CONFIG_SMP */ 486 487 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 488 int psize) 489 { 490 unsigned long pid; 491 492 preempt_disable(); 493 pid = mm->context.id; 494 if (pid != MMU_NO_CONTEXT) 495 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 496 preempt_enable(); 497 } 498 499 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 500 { 501 #ifdef CONFIG_HUGETLB_PAGE 502 /* need the return fix for nohash.c */ 503 if (is_vm_hugetlb_page(vma)) 504 return radix__local_flush_hugetlb_page(vma, vmaddr); 505 #endif 506 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 507 } 508 EXPORT_SYMBOL(radix__local_flush_tlb_page); 509 510 static bool mm_is_singlethreaded(struct mm_struct *mm) 511 { 512 if (atomic_read(&mm->context.copros) > 0) 513 return false; 514 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 515 return true; 516 return false; 517 } 518 519 static bool mm_needs_flush_escalation(struct mm_struct *mm) 520 { 521 /* 522 * P9 nest MMU has issues with the page walk cache 523 * caching PTEs and not flushing them properly when 524 * RIC = 0 for a PID/LPID invalidate 525 */ 526 if (atomic_read(&mm->context.copros) > 0) 527 return true; 528 return false; 529 } 530 531 #ifdef CONFIG_SMP 532 static void do_exit_flush_lazy_tlb(void *arg) 533 { 534 struct mm_struct *mm = arg; 535 unsigned long pid = mm->context.id; 536 537 if (current->mm == mm) 538 return; /* Local CPU */ 539 540 if (current->active_mm == mm) { 541 /* 542 * Must be a kernel thread because sender is single-threaded. 543 */ 544 BUG_ON(current->mm); 545 mmgrab(&init_mm); 546 switch_mm(mm, &init_mm, current); 547 current->active_mm = &init_mm; 548 mmdrop(mm); 549 } 550 _tlbiel_pid(pid, RIC_FLUSH_ALL); 551 } 552 553 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 554 { 555 /* 556 * Would be nice if this was async so it could be run in 557 * parallel with our local flush, but generic code does not 558 * give a good API for it. Could extend the generic code or 559 * make a special powerpc IPI for flushing TLBs. 560 * For now it's not too performance critical. 561 */ 562 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 563 (void *)mm, 1); 564 mm_reset_thread_local(mm); 565 } 566 567 void radix__flush_tlb_mm(struct mm_struct *mm) 568 { 569 unsigned long pid; 570 571 pid = mm->context.id; 572 if (unlikely(pid == MMU_NO_CONTEXT)) 573 return; 574 575 preempt_disable(); 576 /* 577 * Order loads of mm_cpumask vs previous stores to clear ptes before 578 * the invalidate. See barrier in switch_mm_irqs_off 579 */ 580 smp_mb(); 581 if (!mm_is_thread_local(mm)) { 582 if (unlikely(mm_is_singlethreaded(mm))) { 583 exit_flush_lazy_tlbs(mm); 584 goto local; 585 } 586 587 if (mm_needs_flush_escalation(mm)) 588 _tlbie_pid(pid, RIC_FLUSH_ALL); 589 else 590 _tlbie_pid(pid, RIC_FLUSH_TLB); 591 } else { 592 local: 593 _tlbiel_pid(pid, RIC_FLUSH_TLB); 594 } 595 preempt_enable(); 596 } 597 EXPORT_SYMBOL(radix__flush_tlb_mm); 598 599 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 600 { 601 unsigned long pid; 602 603 pid = mm->context.id; 604 if (unlikely(pid == MMU_NO_CONTEXT)) 605 return; 606 607 preempt_disable(); 608 smp_mb(); /* see radix__flush_tlb_mm */ 609 if (!mm_is_thread_local(mm)) { 610 if (unlikely(mm_is_singlethreaded(mm))) { 611 if (!fullmm) { 612 exit_flush_lazy_tlbs(mm); 613 goto local; 614 } 615 } 616 _tlbie_pid(pid, RIC_FLUSH_ALL); 617 } else { 618 local: 619 _tlbiel_pid(pid, RIC_FLUSH_ALL); 620 } 621 preempt_enable(); 622 } 623 void radix__flush_all_mm(struct mm_struct *mm) 624 { 625 __flush_all_mm(mm, false); 626 } 627 EXPORT_SYMBOL(radix__flush_all_mm); 628 629 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) 630 { 631 tlb->need_flush_all = 1; 632 } 633 EXPORT_SYMBOL(radix__flush_tlb_pwc); 634 635 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 636 int psize) 637 { 638 unsigned long pid; 639 640 pid = mm->context.id; 641 if (unlikely(pid == MMU_NO_CONTEXT)) 642 return; 643 644 preempt_disable(); 645 smp_mb(); /* see radix__flush_tlb_mm */ 646 if (!mm_is_thread_local(mm)) { 647 if (unlikely(mm_is_singlethreaded(mm))) { 648 exit_flush_lazy_tlbs(mm); 649 goto local; 650 } 651 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 652 } else { 653 local: 654 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 655 } 656 preempt_enable(); 657 } 658 659 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 660 { 661 #ifdef CONFIG_HUGETLB_PAGE 662 if (is_vm_hugetlb_page(vma)) 663 return radix__flush_hugetlb_page(vma, vmaddr); 664 #endif 665 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 666 } 667 EXPORT_SYMBOL(radix__flush_tlb_page); 668 669 #else /* CONFIG_SMP */ 670 #define radix__flush_all_mm radix__local_flush_all_mm 671 #endif /* CONFIG_SMP */ 672 673 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 674 { 675 _tlbie_pid(0, RIC_FLUSH_ALL); 676 } 677 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 678 679 #define TLB_FLUSH_ALL -1UL 680 681 /* 682 * Number of pages above which we invalidate the entire PID rather than 683 * flush individual pages, for local and global flushes respectively. 684 * 685 * tlbie goes out to the interconnect and individual ops are more costly. 686 * It also does not iterate over sets like the local tlbiel variant when 687 * invalidating a full PID, so it has a far lower threshold to change from 688 * individual page flushes to full-pid flushes. 689 */ 690 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 691 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 692 693 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 694 unsigned long start, unsigned long end, 695 bool flush_all_sizes) 696 697 { 698 unsigned long pid; 699 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 700 unsigned long page_size = 1UL << page_shift; 701 unsigned long nr_pages = (end - start) >> page_shift; 702 bool local, full; 703 704 pid = mm->context.id; 705 if (unlikely(pid == MMU_NO_CONTEXT)) 706 return; 707 708 preempt_disable(); 709 smp_mb(); /* see radix__flush_tlb_mm */ 710 if (!mm_is_thread_local(mm)) { 711 if (unlikely(mm_is_singlethreaded(mm))) { 712 if (end != TLB_FLUSH_ALL) { 713 exit_flush_lazy_tlbs(mm); 714 goto is_local; 715 } 716 } 717 local = false; 718 full = (end == TLB_FLUSH_ALL || 719 nr_pages > tlb_single_page_flush_ceiling); 720 } else { 721 is_local: 722 local = true; 723 full = (end == TLB_FLUSH_ALL || 724 nr_pages > tlb_local_single_page_flush_ceiling); 725 } 726 727 if (full) { 728 if (local) { 729 _tlbiel_pid(pid, RIC_FLUSH_TLB); 730 } else { 731 if (mm_needs_flush_escalation(mm)) 732 _tlbie_pid(pid, RIC_FLUSH_ALL); 733 else 734 _tlbie_pid(pid, RIC_FLUSH_TLB); 735 } 736 } else { 737 bool hflush = flush_all_sizes; 738 bool gflush = flush_all_sizes; 739 unsigned long hstart, hend; 740 unsigned long gstart, gend; 741 742 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 743 hflush = true; 744 745 if (hflush) { 746 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 747 hend = end & PMD_MASK; 748 if (hstart == hend) 749 hflush = false; 750 } 751 752 if (gflush) { 753 gstart = (start + PUD_SIZE - 1) & PUD_MASK; 754 gend = end & PUD_MASK; 755 if (gstart == gend) 756 gflush = false; 757 } 758 759 asm volatile("ptesync": : :"memory"); 760 if (local) { 761 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 762 if (hflush) 763 __tlbiel_va_range(hstart, hend, pid, 764 PMD_SIZE, MMU_PAGE_2M); 765 if (gflush) 766 __tlbiel_va_range(gstart, gend, pid, 767 PUD_SIZE, MMU_PAGE_1G); 768 asm volatile("ptesync": : :"memory"); 769 } else { 770 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 771 if (hflush) 772 __tlbie_va_range(hstart, hend, pid, 773 PMD_SIZE, MMU_PAGE_2M); 774 if (gflush) 775 __tlbie_va_range(gstart, gend, pid, 776 PUD_SIZE, MMU_PAGE_1G); 777 fixup_tlbie(); 778 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 779 } 780 } 781 preempt_enable(); 782 } 783 784 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 785 unsigned long end) 786 787 { 788 #ifdef CONFIG_HUGETLB_PAGE 789 if (is_vm_hugetlb_page(vma)) 790 return radix__flush_hugetlb_tlb_range(vma, start, end); 791 #endif 792 793 __radix__flush_tlb_range(vma->vm_mm, start, end, false); 794 } 795 EXPORT_SYMBOL(radix__flush_tlb_range); 796 797 static int radix_get_mmu_psize(int page_size) 798 { 799 int psize; 800 801 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 802 psize = mmu_virtual_psize; 803 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 804 psize = MMU_PAGE_2M; 805 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 806 psize = MMU_PAGE_1G; 807 else 808 return -1; 809 return psize; 810 } 811 812 /* 813 * Flush partition scoped LPID address translation for all CPUs. 814 */ 815 void radix__flush_tlb_lpid_page(unsigned int lpid, 816 unsigned long addr, 817 unsigned long page_size) 818 { 819 int psize = radix_get_mmu_psize(page_size); 820 821 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 822 } 823 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 824 825 /* 826 * Flush partition scoped PWC from LPID for all CPUs. 827 */ 828 void radix__flush_pwc_lpid(unsigned int lpid) 829 { 830 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 831 } 832 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 833 834 /* 835 * Flush partition scoped translations from LPID (=LPIDR) 836 */ 837 void radix__flush_tlb_lpid(unsigned int lpid) 838 { 839 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 840 } 841 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid); 842 843 /* 844 * Flush partition scoped translations from LPID (=LPIDR) 845 */ 846 void radix__local_flush_tlb_lpid(unsigned int lpid) 847 { 848 _tlbiel_lpid(lpid, RIC_FLUSH_ALL); 849 } 850 EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); 851 852 /* 853 * Flush process scoped translations from LPID (=LPIDR). 854 * Important difference, the guest normally manages its own translations, 855 * but some cases e.g., vCPU CPU migration require KVM to flush. 856 */ 857 void radix__local_flush_tlb_lpid_guest(unsigned int lpid) 858 { 859 _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); 860 } 861 EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); 862 863 864 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 865 unsigned long end, int psize); 866 867 void radix__tlb_flush(struct mmu_gather *tlb) 868 { 869 int psize = 0; 870 struct mm_struct *mm = tlb->mm; 871 int page_size = tlb->page_size; 872 unsigned long start = tlb->start; 873 unsigned long end = tlb->end; 874 875 /* 876 * if page size is not something we understand, do a full mm flush 877 * 878 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 879 * that flushes the process table entry cache upon process teardown. 880 * See the comment for radix in arch_exit_mmap(). 881 */ 882 if (tlb->fullmm) { 883 __flush_all_mm(mm, true); 884 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) 885 } else if (mm_tlb_flush_nested(mm)) { 886 /* 887 * If there is a concurrent invalidation that is clearing ptes, 888 * then it's possible this invalidation will miss one of those 889 * cleared ptes and miss flushing the TLB. If this invalidate 890 * returns before the other one flushes TLBs, that can result 891 * in it returning while there are still valid TLBs inside the 892 * range to be invalidated. 893 * 894 * See mm/memory.c:tlb_finish_mmu() for more details. 895 * 896 * The solution to this is ensure the entire range is always 897 * flushed here. The problem for powerpc is that the flushes 898 * are page size specific, so this "forced flush" would not 899 * do the right thing if there are a mix of page sizes in 900 * the range to be invalidated. So use __flush_tlb_range 901 * which invalidates all possible page sizes in the range. 902 * 903 * PWC flush probably is not be required because the core code 904 * shouldn't free page tables in this path, but accounting 905 * for the possibility makes us a bit more robust. 906 * 907 * need_flush_all is an uncommon case because page table 908 * teardown should be done with exclusive locks held (but 909 * after locks are dropped another invalidate could come 910 * in), it could be optimized further if necessary. 911 */ 912 if (!tlb->need_flush_all) 913 __radix__flush_tlb_range(mm, start, end, true); 914 else 915 radix__flush_all_mm(mm); 916 #endif 917 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 918 if (!tlb->need_flush_all) 919 radix__flush_tlb_mm(mm); 920 else 921 radix__flush_all_mm(mm); 922 } else { 923 if (!tlb->need_flush_all) 924 radix__flush_tlb_range_psize(mm, start, end, psize); 925 else 926 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 927 } 928 tlb->need_flush_all = 0; 929 } 930 931 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 932 unsigned long start, unsigned long end, 933 int psize, bool also_pwc) 934 { 935 unsigned long pid; 936 unsigned int page_shift = mmu_psize_defs[psize].shift; 937 unsigned long page_size = 1UL << page_shift; 938 unsigned long nr_pages = (end - start) >> page_shift; 939 bool local, full; 940 941 pid = mm->context.id; 942 if (unlikely(pid == MMU_NO_CONTEXT)) 943 return; 944 945 preempt_disable(); 946 smp_mb(); /* see radix__flush_tlb_mm */ 947 if (!mm_is_thread_local(mm)) { 948 if (unlikely(mm_is_singlethreaded(mm))) { 949 if (end != TLB_FLUSH_ALL) { 950 exit_flush_lazy_tlbs(mm); 951 goto is_local; 952 } 953 } 954 local = false; 955 full = (end == TLB_FLUSH_ALL || 956 nr_pages > tlb_single_page_flush_ceiling); 957 } else { 958 is_local: 959 local = true; 960 full = (end == TLB_FLUSH_ALL || 961 nr_pages > tlb_local_single_page_flush_ceiling); 962 } 963 964 if (full) { 965 if (local) { 966 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 967 } else { 968 if (mm_needs_flush_escalation(mm)) 969 also_pwc = true; 970 971 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 972 } 973 } else { 974 if (local) 975 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 976 else 977 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 978 } 979 preempt_enable(); 980 } 981 982 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 983 unsigned long end, int psize) 984 { 985 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 986 } 987 988 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 989 unsigned long end, int psize) 990 { 991 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 992 } 993 994 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 995 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 996 { 997 unsigned long pid, end; 998 999 pid = mm->context.id; 1000 if (unlikely(pid == MMU_NO_CONTEXT)) 1001 return; 1002 1003 /* 4k page size, just blow the world */ 1004 if (PAGE_SIZE == 0x1000) { 1005 radix__flush_all_mm(mm); 1006 return; 1007 } 1008 1009 end = addr + HPAGE_PMD_SIZE; 1010 1011 /* Otherwise first do the PWC, then iterate the pages. */ 1012 preempt_disable(); 1013 smp_mb(); /* see radix__flush_tlb_mm */ 1014 if (!mm_is_thread_local(mm)) { 1015 if (unlikely(mm_is_singlethreaded(mm))) { 1016 exit_flush_lazy_tlbs(mm); 1017 goto local; 1018 } 1019 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1020 } else { 1021 local: 1022 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1023 } 1024 1025 preempt_enable(); 1026 } 1027 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1028 1029 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1030 unsigned long start, unsigned long end) 1031 { 1032 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1033 } 1034 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1035 1036 void radix__flush_tlb_all(void) 1037 { 1038 unsigned long rb,prs,r,rs; 1039 unsigned long ric = RIC_FLUSH_ALL; 1040 1041 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1042 prs = 0; /* partition scoped */ 1043 r = 1; /* radix format */ 1044 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1045 1046 asm volatile("ptesync": : :"memory"); 1047 /* 1048 * now flush guest entries by passing PRS = 1 and LPID != 0 1049 */ 1050 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1051 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1052 /* 1053 * now flush host entires by passing PRS = 0 and LPID == 0 1054 */ 1055 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1056 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1057 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1058 } 1059 1060 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1061 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1062 { 1063 unsigned long pid = mm->context.id; 1064 1065 if (unlikely(pid == MMU_NO_CONTEXT)) 1066 return; 1067 1068 /* 1069 * If this context hasn't run on that CPU before and KVM is 1070 * around, there's a slim chance that the guest on another 1071 * CPU just brought in obsolete translation into the TLB of 1072 * this CPU due to a bad prefetch using the guest PID on 1073 * the way into the hypervisor. 1074 * 1075 * We work around this here. If KVM is possible, we check if 1076 * any sibling thread is in KVM. If it is, the window may exist 1077 * and thus we flush that PID from the core. 1078 * 1079 * A potential future improvement would be to mark which PIDs 1080 * have never been used on the system and avoid it if the PID 1081 * is new and the process has no other cpumask bit set. 1082 */ 1083 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1084 int cpu = smp_processor_id(); 1085 int sib = cpu_first_thread_sibling(cpu); 1086 bool flush = false; 1087 1088 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1089 if (sib == cpu) 1090 continue; 1091 if (!cpu_possible(sib)) 1092 continue; 1093 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1094 flush = true; 1095 } 1096 if (flush) 1097 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1098 } 1099 } 1100 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1101 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1102