1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 for (set = 1; set < num_sets; set++) 59 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 60 } 61 62 /* Flush process scoped entries. */ 63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 64 for (set = 1; set < num_sets; set++) 65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 66 67 asm volatile("ptesync": : :"memory"); 68 } 69 70 void radix__tlbiel_all(unsigned int action) 71 { 72 unsigned int is; 73 74 switch (action) { 75 case TLB_INVAL_SCOPE_GLOBAL: 76 is = 3; 77 break; 78 case TLB_INVAL_SCOPE_LPID: 79 is = 2; 80 break; 81 default: 82 BUG(); 83 } 84 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 87 else 88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 89 90 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 91 } 92 93 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 94 unsigned long ric) 95 { 96 unsigned long rb,rs,prs,r; 97 98 rb = PPC_BIT(53); /* IS = 1 */ 99 rb |= set << PPC_BITLSHIFT(51); 100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 101 prs = 1; /* process scoped */ 102 r = 1; /* radix format */ 103 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 107 } 108 109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 110 { 111 unsigned long rb,rs,prs,r; 112 113 rb = PPC_BIT(53); /* IS = 1 */ 114 rs = pid << PPC_BITLSHIFT(31); 115 prs = 1; /* process scoped */ 116 r = 1; /* radix format */ 117 118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 121 } 122 123 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 124 { 125 unsigned long rb,rs,prs,r; 126 127 rb = PPC_BIT(52); /* IS = 2 */ 128 rs = lpid; 129 prs = 0; /* partition scoped */ 130 r = 1; /* radix format */ 131 132 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 133 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 134 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 135 } 136 137 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 138 { 139 unsigned long rb,rs,prs,r; 140 141 rb = PPC_BIT(52); /* IS = 2 */ 142 rs = lpid; 143 prs = 1; /* process scoped */ 144 r = 1; /* radix format */ 145 146 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 147 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 148 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 149 } 150 151 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 152 unsigned long ap, unsigned long ric) 153 { 154 unsigned long rb,rs,prs,r; 155 156 rb = va & ~(PPC_BITMASK(52, 63)); 157 rb |= ap << PPC_BITLSHIFT(58); 158 rs = pid << PPC_BITLSHIFT(31); 159 prs = 1; /* process scoped */ 160 r = 1; /* radix format */ 161 162 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 163 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 164 trace_tlbie(0, 1, rb, rs, ric, prs, r); 165 } 166 167 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 168 unsigned long ap, unsigned long ric) 169 { 170 unsigned long rb,rs,prs,r; 171 172 rb = va & ~(PPC_BITMASK(52, 63)); 173 rb |= ap << PPC_BITLSHIFT(58); 174 rs = pid << PPC_BITLSHIFT(31); 175 prs = 1; /* process scoped */ 176 r = 1; /* radix format */ 177 178 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 179 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 180 trace_tlbie(0, 0, rb, rs, ric, prs, r); 181 } 182 183 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 184 unsigned long ap, unsigned long ric) 185 { 186 unsigned long rb,rs,prs,r; 187 188 rb = va & ~(PPC_BITMASK(52, 63)); 189 rb |= ap << PPC_BITLSHIFT(58); 190 rs = lpid; 191 prs = 0; /* partition scoped */ 192 r = 1; /* radix format */ 193 194 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 195 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 197 } 198 199 200 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 201 unsigned long ap) 202 { 203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 204 asm volatile("ptesync": : :"memory"); 205 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 206 } 207 208 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 209 asm volatile("ptesync": : :"memory"); 210 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 211 } 212 } 213 214 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 215 unsigned long ap) 216 { 217 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 218 asm volatile("ptesync": : :"memory"); 219 __tlbie_pid(0, RIC_FLUSH_TLB); 220 } 221 222 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 223 asm volatile("ptesync": : :"memory"); 224 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 225 } 226 } 227 228 static inline void fixup_tlbie_pid(unsigned long pid) 229 { 230 /* 231 * We can use any address for the invalidation, pick one which is 232 * probably unused as an optimisation. 233 */ 234 unsigned long va = ((1UL << 52) - 1); 235 236 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 237 asm volatile("ptesync": : :"memory"); 238 __tlbie_pid(0, RIC_FLUSH_TLB); 239 } 240 241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 242 asm volatile("ptesync": : :"memory"); 243 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 244 } 245 } 246 247 248 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 249 unsigned long ap) 250 { 251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 252 asm volatile("ptesync": : :"memory"); 253 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 254 } 255 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 259 } 260 } 261 262 static inline void fixup_tlbie_lpid(unsigned long lpid) 263 { 264 /* 265 * We can use any address for the invalidation, pick one which is 266 * probably unused as an optimisation. 267 */ 268 unsigned long va = ((1UL << 52) - 1); 269 270 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 271 asm volatile("ptesync": : :"memory"); 272 __tlbie_lpid(0, RIC_FLUSH_TLB); 273 } 274 275 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 276 asm volatile("ptesync": : :"memory"); 277 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 278 } 279 } 280 281 /* 282 * We use 128 set in radix mode and 256 set in hpt mode. 283 */ 284 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 285 { 286 int set; 287 288 asm volatile("ptesync": : :"memory"); 289 290 /* 291 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 292 * also flush the entire Page Walk Cache. 293 */ 294 __tlbiel_pid(pid, 0, ric); 295 296 /* For PWC, only one flush is needed */ 297 if (ric == RIC_FLUSH_PWC) { 298 asm volatile("ptesync": : :"memory"); 299 return; 300 } 301 302 /* For the remaining sets, just flush the TLB */ 303 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 304 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 305 306 asm volatile("ptesync": : :"memory"); 307 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 308 } 309 310 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 311 { 312 asm volatile("ptesync": : :"memory"); 313 314 /* 315 * Workaround the fact that the "ric" argument to __tlbie_pid 316 * must be a compile-time contraint to match the "i" constraint 317 * in the asm statement. 318 */ 319 switch (ric) { 320 case RIC_FLUSH_TLB: 321 __tlbie_pid(pid, RIC_FLUSH_TLB); 322 fixup_tlbie_pid(pid); 323 break; 324 case RIC_FLUSH_PWC: 325 __tlbie_pid(pid, RIC_FLUSH_PWC); 326 break; 327 case RIC_FLUSH_ALL: 328 default: 329 __tlbie_pid(pid, RIC_FLUSH_ALL); 330 fixup_tlbie_pid(pid); 331 } 332 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 333 } 334 335 struct tlbiel_pid { 336 unsigned long pid; 337 unsigned long ric; 338 }; 339 340 static void do_tlbiel_pid(void *info) 341 { 342 struct tlbiel_pid *t = info; 343 344 if (t->ric == RIC_FLUSH_TLB) 345 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 346 else if (t->ric == RIC_FLUSH_PWC) 347 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 348 else 349 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 350 } 351 352 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 353 unsigned long pid, unsigned long ric) 354 { 355 struct cpumask *cpus = mm_cpumask(mm); 356 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 357 358 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 359 /* 360 * Always want the CPU translations to be invalidated with tlbiel in 361 * these paths, so while coprocessors must use tlbie, we can not 362 * optimise away the tlbiel component. 363 */ 364 if (atomic_read(&mm->context.copros) > 0) 365 _tlbie_pid(pid, RIC_FLUSH_ALL); 366 } 367 368 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 369 { 370 asm volatile("ptesync": : :"memory"); 371 372 /* 373 * Workaround the fact that the "ric" argument to __tlbie_pid 374 * must be a compile-time contraint to match the "i" constraint 375 * in the asm statement. 376 */ 377 switch (ric) { 378 case RIC_FLUSH_TLB: 379 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 380 fixup_tlbie_lpid(lpid); 381 break; 382 case RIC_FLUSH_PWC: 383 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 384 break; 385 case RIC_FLUSH_ALL: 386 default: 387 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 388 fixup_tlbie_lpid(lpid); 389 } 390 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 391 } 392 393 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 394 { 395 /* 396 * Workaround the fact that the "ric" argument to __tlbie_pid 397 * must be a compile-time contraint to match the "i" constraint 398 * in the asm statement. 399 */ 400 switch (ric) { 401 case RIC_FLUSH_TLB: 402 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 403 break; 404 case RIC_FLUSH_PWC: 405 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 406 break; 407 case RIC_FLUSH_ALL: 408 default: 409 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 410 } 411 fixup_tlbie_lpid(lpid); 412 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 413 } 414 415 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 416 unsigned long pid, unsigned long page_size, 417 unsigned long psize) 418 { 419 unsigned long addr; 420 unsigned long ap = mmu_get_ap(psize); 421 422 for (addr = start; addr < end; addr += page_size) 423 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 424 } 425 426 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 427 unsigned long psize, unsigned long ric) 428 { 429 unsigned long ap = mmu_get_ap(psize); 430 431 asm volatile("ptesync": : :"memory"); 432 __tlbiel_va(va, pid, ap, ric); 433 asm volatile("ptesync": : :"memory"); 434 } 435 436 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 437 unsigned long pid, unsigned long page_size, 438 unsigned long psize, bool also_pwc) 439 { 440 asm volatile("ptesync": : :"memory"); 441 if (also_pwc) 442 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 443 __tlbiel_va_range(start, end, pid, page_size, psize); 444 asm volatile("ptesync": : :"memory"); 445 } 446 447 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 448 unsigned long pid, unsigned long page_size, 449 unsigned long psize) 450 { 451 unsigned long addr; 452 unsigned long ap = mmu_get_ap(psize); 453 454 for (addr = start; addr < end; addr += page_size) 455 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 456 457 fixup_tlbie_va_range(addr - page_size, pid, ap); 458 } 459 460 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 461 unsigned long psize, unsigned long ric) 462 { 463 unsigned long ap = mmu_get_ap(psize); 464 465 asm volatile("ptesync": : :"memory"); 466 __tlbie_va(va, pid, ap, ric); 467 fixup_tlbie_va(va, pid, ap); 468 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 469 } 470 471 struct tlbiel_va { 472 unsigned long pid; 473 unsigned long va; 474 unsigned long psize; 475 unsigned long ric; 476 }; 477 478 static void do_tlbiel_va(void *info) 479 { 480 struct tlbiel_va *t = info; 481 482 if (t->ric == RIC_FLUSH_TLB) 483 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 484 else if (t->ric == RIC_FLUSH_PWC) 485 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 486 else 487 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 488 } 489 490 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 491 unsigned long va, unsigned long pid, 492 unsigned long psize, unsigned long ric) 493 { 494 struct cpumask *cpus = mm_cpumask(mm); 495 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 496 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 497 if (atomic_read(&mm->context.copros) > 0) 498 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 499 } 500 501 struct tlbiel_va_range { 502 unsigned long pid; 503 unsigned long start; 504 unsigned long end; 505 unsigned long page_size; 506 unsigned long psize; 507 bool also_pwc; 508 }; 509 510 static void do_tlbiel_va_range(void *info) 511 { 512 struct tlbiel_va_range *t = info; 513 514 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 515 t->psize, t->also_pwc); 516 } 517 518 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 519 unsigned long psize, unsigned long ric) 520 { 521 unsigned long ap = mmu_get_ap(psize); 522 523 asm volatile("ptesync": : :"memory"); 524 __tlbie_lpid_va(va, lpid, ap, ric); 525 fixup_tlbie_lpid_va(va, lpid, ap); 526 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 527 } 528 529 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 530 unsigned long pid, unsigned long page_size, 531 unsigned long psize, bool also_pwc) 532 { 533 asm volatile("ptesync": : :"memory"); 534 if (also_pwc) 535 __tlbie_pid(pid, RIC_FLUSH_PWC); 536 __tlbie_va_range(start, end, pid, page_size, psize); 537 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 538 } 539 540 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 541 unsigned long start, unsigned long end, 542 unsigned long pid, unsigned long page_size, 543 unsigned long psize, bool also_pwc) 544 { 545 struct cpumask *cpus = mm_cpumask(mm); 546 struct tlbiel_va_range t = { .start = start, .end = end, 547 .pid = pid, .page_size = page_size, 548 .psize = psize, .also_pwc = also_pwc }; 549 550 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 551 if (atomic_read(&mm->context.copros) > 0) 552 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 553 } 554 555 /* 556 * Base TLB flushing operations: 557 * 558 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 559 * - flush_tlb_page(vma, vmaddr) flushes one page 560 * - flush_tlb_range(vma, start, end) flushes a range of pages 561 * - flush_tlb_kernel_range(start, end) flushes kernel pages 562 * 563 * - local_* variants of page and mm only apply to the current 564 * processor 565 */ 566 void radix__local_flush_tlb_mm(struct mm_struct *mm) 567 { 568 unsigned long pid; 569 570 preempt_disable(); 571 pid = mm->context.id; 572 if (pid != MMU_NO_CONTEXT) 573 _tlbiel_pid(pid, RIC_FLUSH_TLB); 574 preempt_enable(); 575 } 576 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 577 578 #ifndef CONFIG_SMP 579 void radix__local_flush_all_mm(struct mm_struct *mm) 580 { 581 unsigned long pid; 582 583 preempt_disable(); 584 pid = mm->context.id; 585 if (pid != MMU_NO_CONTEXT) 586 _tlbiel_pid(pid, RIC_FLUSH_ALL); 587 preempt_enable(); 588 } 589 EXPORT_SYMBOL(radix__local_flush_all_mm); 590 #endif /* CONFIG_SMP */ 591 592 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 593 int psize) 594 { 595 unsigned long pid; 596 597 preempt_disable(); 598 pid = mm->context.id; 599 if (pid != MMU_NO_CONTEXT) 600 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 601 preempt_enable(); 602 } 603 604 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 605 { 606 #ifdef CONFIG_HUGETLB_PAGE 607 /* need the return fix for nohash.c */ 608 if (is_vm_hugetlb_page(vma)) 609 return radix__local_flush_hugetlb_page(vma, vmaddr); 610 #endif 611 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 612 } 613 EXPORT_SYMBOL(radix__local_flush_tlb_page); 614 615 static bool mm_is_singlethreaded(struct mm_struct *mm) 616 { 617 if (atomic_read(&mm->context.copros) > 0) 618 return false; 619 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 620 return true; 621 return false; 622 } 623 624 static bool mm_needs_flush_escalation(struct mm_struct *mm) 625 { 626 /* 627 * P9 nest MMU has issues with the page walk cache 628 * caching PTEs and not flushing them properly when 629 * RIC = 0 for a PID/LPID invalidate 630 */ 631 if (atomic_read(&mm->context.copros) > 0) 632 return true; 633 return false; 634 } 635 636 #ifdef CONFIG_SMP 637 static void do_exit_flush_lazy_tlb(void *arg) 638 { 639 struct mm_struct *mm = arg; 640 unsigned long pid = mm->context.id; 641 642 if (current->mm == mm) 643 return; /* Local CPU */ 644 645 if (current->active_mm == mm) { 646 /* 647 * Must be a kernel thread because sender is single-threaded. 648 */ 649 BUG_ON(current->mm); 650 mmgrab(&init_mm); 651 switch_mm(mm, &init_mm, current); 652 current->active_mm = &init_mm; 653 mmdrop(mm); 654 } 655 _tlbiel_pid(pid, RIC_FLUSH_ALL); 656 } 657 658 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 659 { 660 /* 661 * Would be nice if this was async so it could be run in 662 * parallel with our local flush, but generic code does not 663 * give a good API for it. Could extend the generic code or 664 * make a special powerpc IPI for flushing TLBs. 665 * For now it's not too performance critical. 666 */ 667 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 668 (void *)mm, 1); 669 mm_reset_thread_local(mm); 670 } 671 672 void radix__flush_tlb_mm(struct mm_struct *mm) 673 { 674 unsigned long pid; 675 676 pid = mm->context.id; 677 if (unlikely(pid == MMU_NO_CONTEXT)) 678 return; 679 680 preempt_disable(); 681 /* 682 * Order loads of mm_cpumask vs previous stores to clear ptes before 683 * the invalidate. See barrier in switch_mm_irqs_off 684 */ 685 smp_mb(); 686 if (!mm_is_thread_local(mm)) { 687 if (unlikely(mm_is_singlethreaded(mm))) { 688 exit_flush_lazy_tlbs(mm); 689 goto local; 690 } 691 692 if (cputlb_use_tlbie()) { 693 if (mm_needs_flush_escalation(mm)) 694 _tlbie_pid(pid, RIC_FLUSH_ALL); 695 else 696 _tlbie_pid(pid, RIC_FLUSH_TLB); 697 } else { 698 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 699 } 700 } else { 701 local: 702 _tlbiel_pid(pid, RIC_FLUSH_TLB); 703 } 704 preempt_enable(); 705 } 706 EXPORT_SYMBOL(radix__flush_tlb_mm); 707 708 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 709 { 710 unsigned long pid; 711 712 pid = mm->context.id; 713 if (unlikely(pid == MMU_NO_CONTEXT)) 714 return; 715 716 preempt_disable(); 717 smp_mb(); /* see radix__flush_tlb_mm */ 718 if (!mm_is_thread_local(mm)) { 719 if (unlikely(mm_is_singlethreaded(mm))) { 720 if (!fullmm) { 721 exit_flush_lazy_tlbs(mm); 722 goto local; 723 } 724 } 725 if (cputlb_use_tlbie()) 726 _tlbie_pid(pid, RIC_FLUSH_ALL); 727 else 728 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 729 } else { 730 local: 731 _tlbiel_pid(pid, RIC_FLUSH_ALL); 732 } 733 preempt_enable(); 734 } 735 736 void radix__flush_all_mm(struct mm_struct *mm) 737 { 738 __flush_all_mm(mm, false); 739 } 740 EXPORT_SYMBOL(radix__flush_all_mm); 741 742 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 743 int psize) 744 { 745 unsigned long pid; 746 747 pid = mm->context.id; 748 if (unlikely(pid == MMU_NO_CONTEXT)) 749 return; 750 751 preempt_disable(); 752 smp_mb(); /* see radix__flush_tlb_mm */ 753 if (!mm_is_thread_local(mm)) { 754 if (unlikely(mm_is_singlethreaded(mm))) { 755 exit_flush_lazy_tlbs(mm); 756 goto local; 757 } 758 if (cputlb_use_tlbie()) 759 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 760 else 761 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 762 } else { 763 local: 764 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 765 } 766 preempt_enable(); 767 } 768 769 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 770 { 771 #ifdef CONFIG_HUGETLB_PAGE 772 if (is_vm_hugetlb_page(vma)) 773 return radix__flush_hugetlb_page(vma, vmaddr); 774 #endif 775 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 776 } 777 EXPORT_SYMBOL(radix__flush_tlb_page); 778 779 #else /* CONFIG_SMP */ 780 #define radix__flush_all_mm radix__local_flush_all_mm 781 #endif /* CONFIG_SMP */ 782 783 static void do_tlbiel_kernel(void *info) 784 { 785 _tlbiel_pid(0, RIC_FLUSH_ALL); 786 } 787 788 static inline void _tlbiel_kernel_broadcast(void) 789 { 790 on_each_cpu(do_tlbiel_kernel, NULL, 1); 791 if (tlbie_capable) { 792 /* 793 * Coherent accelerators don't refcount kernel memory mappings, 794 * so have to always issue a tlbie for them. This is quite a 795 * slow path anyway. 796 */ 797 _tlbie_pid(0, RIC_FLUSH_ALL); 798 } 799 } 800 801 /* 802 * If kernel TLBIs ever become local rather than global, then 803 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 804 * assumes kernel TLBIs are global. 805 */ 806 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 807 { 808 if (cputlb_use_tlbie()) 809 _tlbie_pid(0, RIC_FLUSH_ALL); 810 else 811 _tlbiel_kernel_broadcast(); 812 } 813 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 814 815 #define TLB_FLUSH_ALL -1UL 816 817 /* 818 * Number of pages above which we invalidate the entire PID rather than 819 * flush individual pages, for local and global flushes respectively. 820 * 821 * tlbie goes out to the interconnect and individual ops are more costly. 822 * It also does not iterate over sets like the local tlbiel variant when 823 * invalidating a full PID, so it has a far lower threshold to change from 824 * individual page flushes to full-pid flushes. 825 */ 826 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 827 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 828 829 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 830 unsigned long start, unsigned long end) 831 832 { 833 unsigned long pid; 834 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 835 unsigned long page_size = 1UL << page_shift; 836 unsigned long nr_pages = (end - start) >> page_shift; 837 bool local, full; 838 839 pid = mm->context.id; 840 if (unlikely(pid == MMU_NO_CONTEXT)) 841 return; 842 843 preempt_disable(); 844 smp_mb(); /* see radix__flush_tlb_mm */ 845 if (!mm_is_thread_local(mm)) { 846 if (unlikely(mm_is_singlethreaded(mm))) { 847 if (end != TLB_FLUSH_ALL) { 848 exit_flush_lazy_tlbs(mm); 849 goto is_local; 850 } 851 } 852 local = false; 853 full = (end == TLB_FLUSH_ALL || 854 nr_pages > tlb_single_page_flush_ceiling); 855 } else { 856 is_local: 857 local = true; 858 full = (end == TLB_FLUSH_ALL || 859 nr_pages > tlb_local_single_page_flush_ceiling); 860 } 861 862 if (full) { 863 if (local) { 864 _tlbiel_pid(pid, RIC_FLUSH_TLB); 865 } else { 866 if (cputlb_use_tlbie()) { 867 if (mm_needs_flush_escalation(mm)) 868 _tlbie_pid(pid, RIC_FLUSH_ALL); 869 else 870 _tlbie_pid(pid, RIC_FLUSH_TLB); 871 } else { 872 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 873 } 874 } 875 } else { 876 bool hflush = false; 877 unsigned long hstart, hend; 878 879 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 880 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 881 hend = end & PMD_MASK; 882 if (hstart == hend) 883 hflush = false; 884 else 885 hflush = true; 886 } 887 888 if (local) { 889 asm volatile("ptesync": : :"memory"); 890 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 891 if (hflush) 892 __tlbiel_va_range(hstart, hend, pid, 893 PMD_SIZE, MMU_PAGE_2M); 894 asm volatile("ptesync": : :"memory"); 895 } else if (cputlb_use_tlbie()) { 896 asm volatile("ptesync": : :"memory"); 897 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 898 if (hflush) 899 __tlbie_va_range(hstart, hend, pid, 900 PMD_SIZE, MMU_PAGE_2M); 901 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 902 } else { 903 _tlbiel_va_range_multicast(mm, 904 start, end, pid, page_size, mmu_virtual_psize, false); 905 if (hflush) 906 _tlbiel_va_range_multicast(mm, 907 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); 908 } 909 } 910 preempt_enable(); 911 } 912 913 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 914 unsigned long end) 915 916 { 917 #ifdef CONFIG_HUGETLB_PAGE 918 if (is_vm_hugetlb_page(vma)) 919 return radix__flush_hugetlb_tlb_range(vma, start, end); 920 #endif 921 922 __radix__flush_tlb_range(vma->vm_mm, start, end); 923 } 924 EXPORT_SYMBOL(radix__flush_tlb_range); 925 926 static int radix_get_mmu_psize(int page_size) 927 { 928 int psize; 929 930 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 931 psize = mmu_virtual_psize; 932 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 933 psize = MMU_PAGE_2M; 934 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 935 psize = MMU_PAGE_1G; 936 else 937 return -1; 938 return psize; 939 } 940 941 /* 942 * Flush partition scoped LPID address translation for all CPUs. 943 */ 944 void radix__flush_tlb_lpid_page(unsigned int lpid, 945 unsigned long addr, 946 unsigned long page_size) 947 { 948 int psize = radix_get_mmu_psize(page_size); 949 950 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 951 } 952 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 953 954 /* 955 * Flush partition scoped PWC from LPID for all CPUs. 956 */ 957 void radix__flush_pwc_lpid(unsigned int lpid) 958 { 959 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 960 } 961 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 962 963 /* 964 * Flush partition scoped translations from LPID (=LPIDR) 965 */ 966 void radix__flush_all_lpid(unsigned int lpid) 967 { 968 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 969 } 970 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 971 972 /* 973 * Flush process scoped translations from LPID (=LPIDR) 974 */ 975 void radix__flush_all_lpid_guest(unsigned int lpid) 976 { 977 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 978 } 979 980 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 981 unsigned long end, int psize); 982 983 void radix__tlb_flush(struct mmu_gather *tlb) 984 { 985 int psize = 0; 986 struct mm_struct *mm = tlb->mm; 987 int page_size = tlb->page_size; 988 unsigned long start = tlb->start; 989 unsigned long end = tlb->end; 990 991 /* 992 * if page size is not something we understand, do a full mm flush 993 * 994 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 995 * that flushes the process table entry cache upon process teardown. 996 * See the comment for radix in arch_exit_mmap(). 997 */ 998 if (tlb->fullmm || tlb->need_flush_all) { 999 __flush_all_mm(mm, true); 1000 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1001 if (!tlb->freed_tables) 1002 radix__flush_tlb_mm(mm); 1003 else 1004 radix__flush_all_mm(mm); 1005 } else { 1006 if (!tlb->freed_tables) 1007 radix__flush_tlb_range_psize(mm, start, end, psize); 1008 else 1009 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1010 } 1011 } 1012 1013 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1014 unsigned long start, unsigned long end, 1015 int psize, bool also_pwc) 1016 { 1017 unsigned long pid; 1018 unsigned int page_shift = mmu_psize_defs[psize].shift; 1019 unsigned long page_size = 1UL << page_shift; 1020 unsigned long nr_pages = (end - start) >> page_shift; 1021 bool local, full; 1022 1023 pid = mm->context.id; 1024 if (unlikely(pid == MMU_NO_CONTEXT)) 1025 return; 1026 1027 preempt_disable(); 1028 smp_mb(); /* see radix__flush_tlb_mm */ 1029 if (!mm_is_thread_local(mm)) { 1030 if (unlikely(mm_is_singlethreaded(mm))) { 1031 if (end != TLB_FLUSH_ALL) { 1032 exit_flush_lazy_tlbs(mm); 1033 goto is_local; 1034 } 1035 } 1036 local = false; 1037 full = (end == TLB_FLUSH_ALL || 1038 nr_pages > tlb_single_page_flush_ceiling); 1039 } else { 1040 is_local: 1041 local = true; 1042 full = (end == TLB_FLUSH_ALL || 1043 nr_pages > tlb_local_single_page_flush_ceiling); 1044 } 1045 1046 if (full) { 1047 if (local) { 1048 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1049 } else { 1050 if (cputlb_use_tlbie()) { 1051 if (mm_needs_flush_escalation(mm)) 1052 also_pwc = true; 1053 1054 _tlbie_pid(pid, 1055 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1056 } else { 1057 _tlbiel_pid_multicast(mm, pid, 1058 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1059 } 1060 1061 } 1062 } else { 1063 if (local) 1064 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1065 else if (cputlb_use_tlbie()) 1066 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1067 else 1068 _tlbiel_va_range_multicast(mm, 1069 start, end, pid, page_size, psize, also_pwc); 1070 } 1071 preempt_enable(); 1072 } 1073 1074 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1075 unsigned long end, int psize) 1076 { 1077 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1078 } 1079 1080 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1081 unsigned long end, int psize) 1082 { 1083 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1084 } 1085 1086 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1087 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1088 { 1089 unsigned long pid, end; 1090 1091 pid = mm->context.id; 1092 if (unlikely(pid == MMU_NO_CONTEXT)) 1093 return; 1094 1095 /* 4k page size, just blow the world */ 1096 if (PAGE_SIZE == 0x1000) { 1097 radix__flush_all_mm(mm); 1098 return; 1099 } 1100 1101 end = addr + HPAGE_PMD_SIZE; 1102 1103 /* Otherwise first do the PWC, then iterate the pages. */ 1104 preempt_disable(); 1105 smp_mb(); /* see radix__flush_tlb_mm */ 1106 if (!mm_is_thread_local(mm)) { 1107 if (unlikely(mm_is_singlethreaded(mm))) { 1108 exit_flush_lazy_tlbs(mm); 1109 goto local; 1110 } 1111 if (cputlb_use_tlbie()) 1112 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1113 else 1114 _tlbiel_va_range_multicast(mm, 1115 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1116 } else { 1117 local: 1118 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1119 } 1120 1121 preempt_enable(); 1122 } 1123 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1124 1125 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1126 unsigned long start, unsigned long end) 1127 { 1128 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1129 } 1130 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1131 1132 void radix__flush_tlb_all(void) 1133 { 1134 unsigned long rb,prs,r,rs; 1135 unsigned long ric = RIC_FLUSH_ALL; 1136 1137 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1138 prs = 0; /* partition scoped */ 1139 r = 1; /* radix format */ 1140 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1141 1142 asm volatile("ptesync": : :"memory"); 1143 /* 1144 * now flush guest entries by passing PRS = 1 and LPID != 0 1145 */ 1146 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1147 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1148 /* 1149 * now flush host entires by passing PRS = 0 and LPID == 0 1150 */ 1151 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1152 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1153 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1154 } 1155 1156 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1157 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1158 { 1159 unsigned long pid = mm->context.id; 1160 1161 if (unlikely(pid == MMU_NO_CONTEXT)) 1162 return; 1163 1164 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) 1165 return; 1166 1167 /* 1168 * If this context hasn't run on that CPU before and KVM is 1169 * around, there's a slim chance that the guest on another 1170 * CPU just brought in obsolete translation into the TLB of 1171 * this CPU due to a bad prefetch using the guest PID on 1172 * the way into the hypervisor. 1173 * 1174 * We work around this here. If KVM is possible, we check if 1175 * any sibling thread is in KVM. If it is, the window may exist 1176 * and thus we flush that PID from the core. 1177 * 1178 * A potential future improvement would be to mark which PIDs 1179 * have never been used on the system and avoid it if the PID 1180 * is new and the process has no other cpumask bit set. 1181 */ 1182 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1183 int cpu = smp_processor_id(); 1184 int sib = cpu_first_thread_sibling(cpu); 1185 bool flush = false; 1186 1187 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1188 if (sib == cpu) 1189 continue; 1190 if (!cpu_possible(sib)) 1191 continue; 1192 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1193 flush = true; 1194 } 1195 if (flush) 1196 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1197 } 1198 } 1199 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1200 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1201