1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 for (set = 1; set < num_sets; set++) 59 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 60 } 61 62 /* Flush process scoped entries. */ 63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 64 for (set = 1; set < num_sets; set++) 65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 66 67 asm volatile("ptesync": : :"memory"); 68 } 69 70 void radix__tlbiel_all(unsigned int action) 71 { 72 unsigned int is; 73 74 switch (action) { 75 case TLB_INVAL_SCOPE_GLOBAL: 76 is = 3; 77 break; 78 case TLB_INVAL_SCOPE_LPID: 79 is = 2; 80 break; 81 default: 82 BUG(); 83 } 84 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 87 else 88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 89 90 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 91 } 92 93 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 94 unsigned long ric) 95 { 96 unsigned long rb,rs,prs,r; 97 98 rb = PPC_BIT(53); /* IS = 1 */ 99 rb |= set << PPC_BITLSHIFT(51); 100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 101 prs = 1; /* process scoped */ 102 r = 1; /* radix format */ 103 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 107 } 108 109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 110 { 111 unsigned long rb,rs,prs,r; 112 113 rb = PPC_BIT(53); /* IS = 1 */ 114 rs = pid << PPC_BITLSHIFT(31); 115 prs = 1; /* process scoped */ 116 r = 1; /* radix format */ 117 118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 121 } 122 123 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 124 { 125 unsigned long rb,rs,prs,r; 126 127 rb = PPC_BIT(52); /* IS = 2 */ 128 rs = lpid; 129 prs = 0; /* partition scoped */ 130 r = 1; /* radix format */ 131 132 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 133 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 134 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 135 } 136 137 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 138 { 139 unsigned long rb,rs,prs,r; 140 141 rb = PPC_BIT(52); /* IS = 2 */ 142 rs = lpid; 143 prs = 1; /* process scoped */ 144 r = 1; /* radix format */ 145 146 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 147 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 148 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 149 } 150 151 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 152 unsigned long ap, unsigned long ric) 153 { 154 unsigned long rb,rs,prs,r; 155 156 rb = va & ~(PPC_BITMASK(52, 63)); 157 rb |= ap << PPC_BITLSHIFT(58); 158 rs = pid << PPC_BITLSHIFT(31); 159 prs = 1; /* process scoped */ 160 r = 1; /* radix format */ 161 162 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 163 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 164 trace_tlbie(0, 1, rb, rs, ric, prs, r); 165 } 166 167 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 168 unsigned long ap, unsigned long ric) 169 { 170 unsigned long rb,rs,prs,r; 171 172 rb = va & ~(PPC_BITMASK(52, 63)); 173 rb |= ap << PPC_BITLSHIFT(58); 174 rs = pid << PPC_BITLSHIFT(31); 175 prs = 1; /* process scoped */ 176 r = 1; /* radix format */ 177 178 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 179 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 180 trace_tlbie(0, 0, rb, rs, ric, prs, r); 181 } 182 183 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 184 unsigned long ap, unsigned long ric) 185 { 186 unsigned long rb,rs,prs,r; 187 188 rb = va & ~(PPC_BITMASK(52, 63)); 189 rb |= ap << PPC_BITLSHIFT(58); 190 rs = lpid; 191 prs = 0; /* partition scoped */ 192 r = 1; /* radix format */ 193 194 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 195 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 197 } 198 199 200 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 201 unsigned long ap) 202 { 203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 204 asm volatile("ptesync": : :"memory"); 205 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 206 } 207 208 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 209 asm volatile("ptesync": : :"memory"); 210 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 211 } 212 } 213 214 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 215 unsigned long ap) 216 { 217 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 218 asm volatile("ptesync": : :"memory"); 219 __tlbie_pid(0, RIC_FLUSH_TLB); 220 } 221 222 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 223 asm volatile("ptesync": : :"memory"); 224 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 225 } 226 } 227 228 static inline void fixup_tlbie_pid(unsigned long pid) 229 { 230 /* 231 * We can use any address for the invalidation, pick one which is 232 * probably unused as an optimisation. 233 */ 234 unsigned long va = ((1UL << 52) - 1); 235 236 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 237 asm volatile("ptesync": : :"memory"); 238 __tlbie_pid(0, RIC_FLUSH_TLB); 239 } 240 241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 242 asm volatile("ptesync": : :"memory"); 243 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 244 } 245 } 246 247 248 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 249 unsigned long ap) 250 { 251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 252 asm volatile("ptesync": : :"memory"); 253 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 254 } 255 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 259 } 260 } 261 262 static inline void fixup_tlbie_lpid(unsigned long lpid) 263 { 264 /* 265 * We can use any address for the invalidation, pick one which is 266 * probably unused as an optimisation. 267 */ 268 unsigned long va = ((1UL << 52) - 1); 269 270 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 271 asm volatile("ptesync": : :"memory"); 272 __tlbie_lpid(0, RIC_FLUSH_TLB); 273 } 274 275 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 276 asm volatile("ptesync": : :"memory"); 277 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 278 } 279 } 280 281 /* 282 * We use 128 set in radix mode and 256 set in hpt mode. 283 */ 284 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 285 { 286 int set; 287 288 asm volatile("ptesync": : :"memory"); 289 290 /* 291 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 292 * also flush the entire Page Walk Cache. 293 */ 294 __tlbiel_pid(pid, 0, ric); 295 296 /* For PWC, only one flush is needed */ 297 if (ric == RIC_FLUSH_PWC) { 298 asm volatile("ptesync": : :"memory"); 299 return; 300 } 301 302 /* For the remaining sets, just flush the TLB */ 303 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 304 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 305 306 asm volatile("ptesync": : :"memory"); 307 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 308 } 309 310 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 311 { 312 asm volatile("ptesync": : :"memory"); 313 314 /* 315 * Workaround the fact that the "ric" argument to __tlbie_pid 316 * must be a compile-time contraint to match the "i" constraint 317 * in the asm statement. 318 */ 319 switch (ric) { 320 case RIC_FLUSH_TLB: 321 __tlbie_pid(pid, RIC_FLUSH_TLB); 322 fixup_tlbie_pid(pid); 323 break; 324 case RIC_FLUSH_PWC: 325 __tlbie_pid(pid, RIC_FLUSH_PWC); 326 break; 327 case RIC_FLUSH_ALL: 328 default: 329 __tlbie_pid(pid, RIC_FLUSH_ALL); 330 fixup_tlbie_pid(pid); 331 } 332 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 333 } 334 335 struct tlbiel_pid { 336 unsigned long pid; 337 unsigned long ric; 338 }; 339 340 static void do_tlbiel_pid(void *info) 341 { 342 struct tlbiel_pid *t = info; 343 344 if (t->ric == RIC_FLUSH_TLB) 345 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 346 else if (t->ric == RIC_FLUSH_PWC) 347 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 348 else 349 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 350 } 351 352 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 353 unsigned long pid, unsigned long ric) 354 { 355 struct cpumask *cpus = mm_cpumask(mm); 356 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 357 358 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 359 /* 360 * Always want the CPU translations to be invalidated with tlbiel in 361 * these paths, so while coprocessors must use tlbie, we can not 362 * optimise away the tlbiel component. 363 */ 364 if (atomic_read(&mm->context.copros) > 0) 365 _tlbie_pid(pid, RIC_FLUSH_ALL); 366 } 367 368 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 369 { 370 asm volatile("ptesync": : :"memory"); 371 372 /* 373 * Workaround the fact that the "ric" argument to __tlbie_pid 374 * must be a compile-time contraint to match the "i" constraint 375 * in the asm statement. 376 */ 377 switch (ric) { 378 case RIC_FLUSH_TLB: 379 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 380 fixup_tlbie_lpid(lpid); 381 break; 382 case RIC_FLUSH_PWC: 383 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 384 break; 385 case RIC_FLUSH_ALL: 386 default: 387 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 388 fixup_tlbie_lpid(lpid); 389 } 390 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 391 } 392 393 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 394 { 395 /* 396 * Workaround the fact that the "ric" argument to __tlbie_pid 397 * must be a compile-time contraint to match the "i" constraint 398 * in the asm statement. 399 */ 400 switch (ric) { 401 case RIC_FLUSH_TLB: 402 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 403 break; 404 case RIC_FLUSH_PWC: 405 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 406 break; 407 case RIC_FLUSH_ALL: 408 default: 409 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 410 } 411 fixup_tlbie_lpid(lpid); 412 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 413 } 414 415 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 416 unsigned long pid, unsigned long page_size, 417 unsigned long psize) 418 { 419 unsigned long addr; 420 unsigned long ap = mmu_get_ap(psize); 421 422 for (addr = start; addr < end; addr += page_size) 423 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 424 } 425 426 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 427 unsigned long psize, unsigned long ric) 428 { 429 unsigned long ap = mmu_get_ap(psize); 430 431 asm volatile("ptesync": : :"memory"); 432 __tlbiel_va(va, pid, ap, ric); 433 asm volatile("ptesync": : :"memory"); 434 } 435 436 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 437 unsigned long pid, unsigned long page_size, 438 unsigned long psize, bool also_pwc) 439 { 440 asm volatile("ptesync": : :"memory"); 441 if (also_pwc) 442 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 443 __tlbiel_va_range(start, end, pid, page_size, psize); 444 asm volatile("ptesync": : :"memory"); 445 } 446 447 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 448 unsigned long pid, unsigned long page_size, 449 unsigned long psize) 450 { 451 unsigned long addr; 452 unsigned long ap = mmu_get_ap(psize); 453 454 for (addr = start; addr < end; addr += page_size) 455 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 456 457 fixup_tlbie_va_range(addr - page_size, pid, ap); 458 } 459 460 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 461 unsigned long psize, unsigned long ric) 462 { 463 unsigned long ap = mmu_get_ap(psize); 464 465 asm volatile("ptesync": : :"memory"); 466 __tlbie_va(va, pid, ap, ric); 467 fixup_tlbie_va(va, pid, ap); 468 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 469 } 470 471 struct tlbiel_va { 472 unsigned long pid; 473 unsigned long va; 474 unsigned long psize; 475 unsigned long ric; 476 }; 477 478 static void do_tlbiel_va(void *info) 479 { 480 struct tlbiel_va *t = info; 481 482 if (t->ric == RIC_FLUSH_TLB) 483 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 484 else if (t->ric == RIC_FLUSH_PWC) 485 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 486 else 487 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 488 } 489 490 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 491 unsigned long va, unsigned long pid, 492 unsigned long psize, unsigned long ric) 493 { 494 struct cpumask *cpus = mm_cpumask(mm); 495 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 496 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 497 if (atomic_read(&mm->context.copros) > 0) 498 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 499 } 500 501 struct tlbiel_va_range { 502 unsigned long pid; 503 unsigned long start; 504 unsigned long end; 505 unsigned long page_size; 506 unsigned long psize; 507 bool also_pwc; 508 }; 509 510 static void do_tlbiel_va_range(void *info) 511 { 512 struct tlbiel_va_range *t = info; 513 514 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 515 t->psize, t->also_pwc); 516 } 517 518 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 519 unsigned long psize, unsigned long ric) 520 { 521 unsigned long ap = mmu_get_ap(psize); 522 523 asm volatile("ptesync": : :"memory"); 524 __tlbie_lpid_va(va, lpid, ap, ric); 525 fixup_tlbie_lpid_va(va, lpid, ap); 526 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 527 } 528 529 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 530 unsigned long pid, unsigned long page_size, 531 unsigned long psize, bool also_pwc) 532 { 533 asm volatile("ptesync": : :"memory"); 534 if (also_pwc) 535 __tlbie_pid(pid, RIC_FLUSH_PWC); 536 __tlbie_va_range(start, end, pid, page_size, psize); 537 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 538 } 539 540 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 541 unsigned long start, unsigned long end, 542 unsigned long pid, unsigned long page_size, 543 unsigned long psize, bool also_pwc) 544 { 545 struct cpumask *cpus = mm_cpumask(mm); 546 struct tlbiel_va_range t = { .start = start, .end = end, 547 .pid = pid, .page_size = page_size, 548 .psize = psize, .also_pwc = also_pwc }; 549 550 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 551 if (atomic_read(&mm->context.copros) > 0) 552 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 553 } 554 555 /* 556 * Base TLB flushing operations: 557 * 558 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 559 * - flush_tlb_page(vma, vmaddr) flushes one page 560 * - flush_tlb_range(vma, start, end) flushes a range of pages 561 * - flush_tlb_kernel_range(start, end) flushes kernel pages 562 * 563 * - local_* variants of page and mm only apply to the current 564 * processor 565 */ 566 void radix__local_flush_tlb_mm(struct mm_struct *mm) 567 { 568 unsigned long pid; 569 570 preempt_disable(); 571 pid = mm->context.id; 572 if (pid != MMU_NO_CONTEXT) 573 _tlbiel_pid(pid, RIC_FLUSH_TLB); 574 preempt_enable(); 575 } 576 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 577 578 #ifndef CONFIG_SMP 579 void radix__local_flush_all_mm(struct mm_struct *mm) 580 { 581 unsigned long pid; 582 583 preempt_disable(); 584 pid = mm->context.id; 585 if (pid != MMU_NO_CONTEXT) 586 _tlbiel_pid(pid, RIC_FLUSH_ALL); 587 preempt_enable(); 588 } 589 EXPORT_SYMBOL(radix__local_flush_all_mm); 590 591 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 592 { 593 radix__local_flush_all_mm(mm); 594 } 595 #endif /* CONFIG_SMP */ 596 597 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 598 int psize) 599 { 600 unsigned long pid; 601 602 preempt_disable(); 603 pid = mm->context.id; 604 if (pid != MMU_NO_CONTEXT) 605 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 606 preempt_enable(); 607 } 608 609 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 610 { 611 #ifdef CONFIG_HUGETLB_PAGE 612 /* need the return fix for nohash.c */ 613 if (is_vm_hugetlb_page(vma)) 614 return radix__local_flush_hugetlb_page(vma, vmaddr); 615 #endif 616 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 617 } 618 EXPORT_SYMBOL(radix__local_flush_tlb_page); 619 620 static bool mm_is_singlethreaded(struct mm_struct *mm) 621 { 622 if (atomic_read(&mm->context.copros) > 0) 623 return false; 624 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 625 return true; 626 return false; 627 } 628 629 static bool mm_needs_flush_escalation(struct mm_struct *mm) 630 { 631 /* 632 * P9 nest MMU has issues with the page walk cache 633 * caching PTEs and not flushing them properly when 634 * RIC = 0 for a PID/LPID invalidate 635 */ 636 if (atomic_read(&mm->context.copros) > 0) 637 return true; 638 return false; 639 } 640 641 #ifdef CONFIG_SMP 642 static void do_exit_flush_lazy_tlb(void *arg) 643 { 644 struct mm_struct *mm = arg; 645 unsigned long pid = mm->context.id; 646 647 if (current->mm == mm) 648 return; /* Local CPU */ 649 650 if (current->active_mm == mm) { 651 /* 652 * Must be a kernel thread because sender is single-threaded. 653 */ 654 BUG_ON(current->mm); 655 mmgrab(&init_mm); 656 switch_mm(mm, &init_mm, current); 657 current->active_mm = &init_mm; 658 mmdrop(mm); 659 } 660 _tlbiel_pid(pid, RIC_FLUSH_ALL); 661 } 662 663 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 664 { 665 /* 666 * Would be nice if this was async so it could be run in 667 * parallel with our local flush, but generic code does not 668 * give a good API for it. Could extend the generic code or 669 * make a special powerpc IPI for flushing TLBs. 670 * For now it's not too performance critical. 671 */ 672 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 673 (void *)mm, 1); 674 mm_reset_thread_local(mm); 675 } 676 677 void radix__flush_tlb_mm(struct mm_struct *mm) 678 { 679 unsigned long pid; 680 681 pid = mm->context.id; 682 if (unlikely(pid == MMU_NO_CONTEXT)) 683 return; 684 685 preempt_disable(); 686 /* 687 * Order loads of mm_cpumask vs previous stores to clear ptes before 688 * the invalidate. See barrier in switch_mm_irqs_off 689 */ 690 smp_mb(); 691 if (!mm_is_thread_local(mm)) { 692 if (unlikely(mm_is_singlethreaded(mm))) { 693 exit_flush_lazy_tlbs(mm); 694 goto local; 695 } 696 697 if (cputlb_use_tlbie()) { 698 if (mm_needs_flush_escalation(mm)) 699 _tlbie_pid(pid, RIC_FLUSH_ALL); 700 else 701 _tlbie_pid(pid, RIC_FLUSH_TLB); 702 } else { 703 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 704 } 705 } else { 706 local: 707 _tlbiel_pid(pid, RIC_FLUSH_TLB); 708 } 709 preempt_enable(); 710 } 711 EXPORT_SYMBOL(radix__flush_tlb_mm); 712 713 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 714 { 715 unsigned long pid; 716 717 pid = mm->context.id; 718 if (unlikely(pid == MMU_NO_CONTEXT)) 719 return; 720 721 preempt_disable(); 722 smp_mb(); /* see radix__flush_tlb_mm */ 723 if (!mm_is_thread_local(mm)) { 724 if (unlikely(mm_is_singlethreaded(mm))) { 725 if (!fullmm) { 726 exit_flush_lazy_tlbs(mm); 727 goto local; 728 } 729 } 730 if (cputlb_use_tlbie()) 731 _tlbie_pid(pid, RIC_FLUSH_ALL); 732 else 733 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 734 } else { 735 local: 736 _tlbiel_pid(pid, RIC_FLUSH_ALL); 737 } 738 preempt_enable(); 739 } 740 741 void radix__flush_all_mm(struct mm_struct *mm) 742 { 743 __flush_all_mm(mm, false); 744 } 745 EXPORT_SYMBOL(radix__flush_all_mm); 746 747 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 748 int psize) 749 { 750 unsigned long pid; 751 752 pid = mm->context.id; 753 if (unlikely(pid == MMU_NO_CONTEXT)) 754 return; 755 756 preempt_disable(); 757 smp_mb(); /* see radix__flush_tlb_mm */ 758 if (!mm_is_thread_local(mm)) { 759 if (unlikely(mm_is_singlethreaded(mm))) { 760 exit_flush_lazy_tlbs(mm); 761 goto local; 762 } 763 if (cputlb_use_tlbie()) 764 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 765 else 766 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 767 } else { 768 local: 769 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 770 } 771 preempt_enable(); 772 } 773 774 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 775 { 776 #ifdef CONFIG_HUGETLB_PAGE 777 if (is_vm_hugetlb_page(vma)) 778 return radix__flush_hugetlb_page(vma, vmaddr); 779 #endif 780 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 781 } 782 EXPORT_SYMBOL(radix__flush_tlb_page); 783 784 #else /* CONFIG_SMP */ 785 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 786 #endif /* CONFIG_SMP */ 787 788 static void do_tlbiel_kernel(void *info) 789 { 790 _tlbiel_pid(0, RIC_FLUSH_ALL); 791 } 792 793 static inline void _tlbiel_kernel_broadcast(void) 794 { 795 on_each_cpu(do_tlbiel_kernel, NULL, 1); 796 if (tlbie_capable) { 797 /* 798 * Coherent accelerators don't refcount kernel memory mappings, 799 * so have to always issue a tlbie for them. This is quite a 800 * slow path anyway. 801 */ 802 _tlbie_pid(0, RIC_FLUSH_ALL); 803 } 804 } 805 806 /* 807 * If kernel TLBIs ever become local rather than global, then 808 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 809 * assumes kernel TLBIs are global. 810 */ 811 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 812 { 813 if (cputlb_use_tlbie()) 814 _tlbie_pid(0, RIC_FLUSH_ALL); 815 else 816 _tlbiel_kernel_broadcast(); 817 } 818 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 819 820 #define TLB_FLUSH_ALL -1UL 821 822 /* 823 * Number of pages above which we invalidate the entire PID rather than 824 * flush individual pages, for local and global flushes respectively. 825 * 826 * tlbie goes out to the interconnect and individual ops are more costly. 827 * It also does not iterate over sets like the local tlbiel variant when 828 * invalidating a full PID, so it has a far lower threshold to change from 829 * individual page flushes to full-pid flushes. 830 */ 831 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 832 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 833 834 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 835 unsigned long start, unsigned long end) 836 837 { 838 unsigned long pid; 839 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 840 unsigned long page_size = 1UL << page_shift; 841 unsigned long nr_pages = (end - start) >> page_shift; 842 bool local, full; 843 844 pid = mm->context.id; 845 if (unlikely(pid == MMU_NO_CONTEXT)) 846 return; 847 848 preempt_disable(); 849 smp_mb(); /* see radix__flush_tlb_mm */ 850 if (!mm_is_thread_local(mm)) { 851 if (unlikely(mm_is_singlethreaded(mm))) { 852 if (end != TLB_FLUSH_ALL) { 853 exit_flush_lazy_tlbs(mm); 854 goto is_local; 855 } 856 } 857 local = false; 858 full = (end == TLB_FLUSH_ALL || 859 nr_pages > tlb_single_page_flush_ceiling); 860 } else { 861 is_local: 862 local = true; 863 full = (end == TLB_FLUSH_ALL || 864 nr_pages > tlb_local_single_page_flush_ceiling); 865 } 866 867 if (full) { 868 if (local) { 869 _tlbiel_pid(pid, RIC_FLUSH_TLB); 870 } else { 871 if (cputlb_use_tlbie()) { 872 if (mm_needs_flush_escalation(mm)) 873 _tlbie_pid(pid, RIC_FLUSH_ALL); 874 else 875 _tlbie_pid(pid, RIC_FLUSH_TLB); 876 } else { 877 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 878 } 879 } 880 } else { 881 bool hflush = false; 882 unsigned long hstart, hend; 883 884 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 885 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 886 hend = end & PMD_MASK; 887 if (hstart < hend) 888 hflush = true; 889 } 890 891 if (local) { 892 asm volatile("ptesync": : :"memory"); 893 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 894 if (hflush) 895 __tlbiel_va_range(hstart, hend, pid, 896 PMD_SIZE, MMU_PAGE_2M); 897 asm volatile("ptesync": : :"memory"); 898 } else if (cputlb_use_tlbie()) { 899 asm volatile("ptesync": : :"memory"); 900 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 901 if (hflush) 902 __tlbie_va_range(hstart, hend, pid, 903 PMD_SIZE, MMU_PAGE_2M); 904 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 905 } else { 906 _tlbiel_va_range_multicast(mm, 907 start, end, pid, page_size, mmu_virtual_psize, false); 908 if (hflush) 909 _tlbiel_va_range_multicast(mm, 910 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); 911 } 912 } 913 preempt_enable(); 914 } 915 916 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 917 unsigned long end) 918 919 { 920 #ifdef CONFIG_HUGETLB_PAGE 921 if (is_vm_hugetlb_page(vma)) 922 return radix__flush_hugetlb_tlb_range(vma, start, end); 923 #endif 924 925 __radix__flush_tlb_range(vma->vm_mm, start, end); 926 } 927 EXPORT_SYMBOL(radix__flush_tlb_range); 928 929 static int radix_get_mmu_psize(int page_size) 930 { 931 int psize; 932 933 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 934 psize = mmu_virtual_psize; 935 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 936 psize = MMU_PAGE_2M; 937 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 938 psize = MMU_PAGE_1G; 939 else 940 return -1; 941 return psize; 942 } 943 944 /* 945 * Flush partition scoped LPID address translation for all CPUs. 946 */ 947 void radix__flush_tlb_lpid_page(unsigned int lpid, 948 unsigned long addr, 949 unsigned long page_size) 950 { 951 int psize = radix_get_mmu_psize(page_size); 952 953 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 954 } 955 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 956 957 /* 958 * Flush partition scoped PWC from LPID for all CPUs. 959 */ 960 void radix__flush_pwc_lpid(unsigned int lpid) 961 { 962 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 963 } 964 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 965 966 /* 967 * Flush partition scoped translations from LPID (=LPIDR) 968 */ 969 void radix__flush_all_lpid(unsigned int lpid) 970 { 971 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 972 } 973 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 974 975 /* 976 * Flush process scoped translations from LPID (=LPIDR) 977 */ 978 void radix__flush_all_lpid_guest(unsigned int lpid) 979 { 980 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 981 } 982 983 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 984 unsigned long end, int psize); 985 986 void radix__tlb_flush(struct mmu_gather *tlb) 987 { 988 int psize = 0; 989 struct mm_struct *mm = tlb->mm; 990 int page_size = tlb->page_size; 991 unsigned long start = tlb->start; 992 unsigned long end = tlb->end; 993 994 /* 995 * if page size is not something we understand, do a full mm flush 996 * 997 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 998 * that flushes the process table entry cache upon process teardown. 999 * See the comment for radix in arch_exit_mmap(). 1000 */ 1001 if (tlb->fullmm || tlb->need_flush_all) { 1002 __flush_all_mm(mm, true); 1003 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1004 if (!tlb->freed_tables) 1005 radix__flush_tlb_mm(mm); 1006 else 1007 radix__flush_all_mm(mm); 1008 } else { 1009 if (!tlb->freed_tables) 1010 radix__flush_tlb_range_psize(mm, start, end, psize); 1011 else 1012 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1013 } 1014 } 1015 1016 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1017 unsigned long start, unsigned long end, 1018 int psize, bool also_pwc) 1019 { 1020 unsigned long pid; 1021 unsigned int page_shift = mmu_psize_defs[psize].shift; 1022 unsigned long page_size = 1UL << page_shift; 1023 unsigned long nr_pages = (end - start) >> page_shift; 1024 bool local, full; 1025 1026 pid = mm->context.id; 1027 if (unlikely(pid == MMU_NO_CONTEXT)) 1028 return; 1029 1030 preempt_disable(); 1031 smp_mb(); /* see radix__flush_tlb_mm */ 1032 if (!mm_is_thread_local(mm)) { 1033 if (unlikely(mm_is_singlethreaded(mm))) { 1034 if (end != TLB_FLUSH_ALL) { 1035 exit_flush_lazy_tlbs(mm); 1036 goto is_local; 1037 } 1038 } 1039 local = false; 1040 full = (end == TLB_FLUSH_ALL || 1041 nr_pages > tlb_single_page_flush_ceiling); 1042 } else { 1043 is_local: 1044 local = true; 1045 full = (end == TLB_FLUSH_ALL || 1046 nr_pages > tlb_local_single_page_flush_ceiling); 1047 } 1048 1049 if (full) { 1050 if (local) { 1051 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1052 } else { 1053 if (cputlb_use_tlbie()) { 1054 if (mm_needs_flush_escalation(mm)) 1055 also_pwc = true; 1056 1057 _tlbie_pid(pid, 1058 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1059 } else { 1060 _tlbiel_pid_multicast(mm, pid, 1061 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1062 } 1063 1064 } 1065 } else { 1066 if (local) 1067 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1068 else if (cputlb_use_tlbie()) 1069 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1070 else 1071 _tlbiel_va_range_multicast(mm, 1072 start, end, pid, page_size, psize, also_pwc); 1073 } 1074 preempt_enable(); 1075 } 1076 1077 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1078 unsigned long end, int psize) 1079 { 1080 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1081 } 1082 1083 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1084 unsigned long end, int psize) 1085 { 1086 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1087 } 1088 1089 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1090 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1091 { 1092 unsigned long pid, end; 1093 1094 pid = mm->context.id; 1095 if (unlikely(pid == MMU_NO_CONTEXT)) 1096 return; 1097 1098 /* 4k page size, just blow the world */ 1099 if (PAGE_SIZE == 0x1000) { 1100 radix__flush_all_mm(mm); 1101 return; 1102 } 1103 1104 end = addr + HPAGE_PMD_SIZE; 1105 1106 /* Otherwise first do the PWC, then iterate the pages. */ 1107 preempt_disable(); 1108 smp_mb(); /* see radix__flush_tlb_mm */ 1109 if (!mm_is_thread_local(mm)) { 1110 if (unlikely(mm_is_singlethreaded(mm))) { 1111 exit_flush_lazy_tlbs(mm); 1112 goto local; 1113 } 1114 if (cputlb_use_tlbie()) 1115 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1116 else 1117 _tlbiel_va_range_multicast(mm, 1118 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1119 } else { 1120 local: 1121 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1122 } 1123 1124 preempt_enable(); 1125 } 1126 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1127 1128 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1129 unsigned long start, unsigned long end) 1130 { 1131 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1132 } 1133 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1134 1135 void radix__flush_tlb_all(void) 1136 { 1137 unsigned long rb,prs,r,rs; 1138 unsigned long ric = RIC_FLUSH_ALL; 1139 1140 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1141 prs = 0; /* partition scoped */ 1142 r = 1; /* radix format */ 1143 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1144 1145 asm volatile("ptesync": : :"memory"); 1146 /* 1147 * now flush guest entries by passing PRS = 1 and LPID != 0 1148 */ 1149 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1150 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1151 /* 1152 * now flush host entires by passing PRS = 0 and LPID == 0 1153 */ 1154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1155 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1156 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1157 } 1158 1159 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1160 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1161 { 1162 unsigned long pid = mm->context.id; 1163 1164 if (unlikely(pid == MMU_NO_CONTEXT)) 1165 return; 1166 1167 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) 1168 return; 1169 1170 /* 1171 * If this context hasn't run on that CPU before and KVM is 1172 * around, there's a slim chance that the guest on another 1173 * CPU just brought in obsolete translation into the TLB of 1174 * this CPU due to a bad prefetch using the guest PID on 1175 * the way into the hypervisor. 1176 * 1177 * We work around this here. If KVM is possible, we check if 1178 * any sibling thread is in KVM. If it is, the window may exist 1179 * and thus we flush that PID from the core. 1180 * 1181 * A potential future improvement would be to mark which PIDs 1182 * have never been used on the system and avoid it if the PID 1183 * is new and the process has no other cpumask bit set. 1184 */ 1185 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1186 int cpu = smp_processor_id(); 1187 int sib = cpu_first_thread_sibling(cpu); 1188 bool flush = false; 1189 1190 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1191 if (sib == cpu) 1192 continue; 1193 if (!cpu_possible(sib)) 1194 continue; 1195 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1196 flush = true; 1197 } 1198 if (flush) 1199 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1200 } 1201 } 1202 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1203 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1204