1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 for (set = 1; set < num_sets; set++) 59 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 60 } 61 62 /* Flush process scoped entries. */ 63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 64 for (set = 1; set < num_sets; set++) 65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 66 67 asm volatile("ptesync": : :"memory"); 68 } 69 70 void radix__tlbiel_all(unsigned int action) 71 { 72 unsigned int is; 73 74 switch (action) { 75 case TLB_INVAL_SCOPE_GLOBAL: 76 is = 3; 77 break; 78 case TLB_INVAL_SCOPE_LPID: 79 is = 2; 80 break; 81 default: 82 BUG(); 83 } 84 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 87 else 88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 89 90 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 91 } 92 93 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 94 unsigned long ric) 95 { 96 unsigned long rb,rs,prs,r; 97 98 rb = PPC_BIT(53); /* IS = 1 */ 99 rb |= set << PPC_BITLSHIFT(51); 100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 101 prs = 1; /* process scoped */ 102 r = 1; /* radix format */ 103 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 107 } 108 109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 110 { 111 unsigned long rb,rs,prs,r; 112 113 rb = PPC_BIT(53); /* IS = 1 */ 114 rs = pid << PPC_BITLSHIFT(31); 115 prs = 1; /* process scoped */ 116 r = 1; /* radix format */ 117 118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 121 } 122 123 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 124 { 125 unsigned long rb,rs,prs,r; 126 127 rb = PPC_BIT(52); /* IS = 2 */ 128 rs = lpid; 129 prs = 0; /* partition scoped */ 130 r = 1; /* radix format */ 131 132 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 133 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 134 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 135 } 136 137 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 138 { 139 unsigned long rb,rs,prs,r; 140 141 rb = PPC_BIT(52); /* IS = 2 */ 142 rs = lpid; 143 prs = 1; /* process scoped */ 144 r = 1; /* radix format */ 145 146 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 147 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 148 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 149 } 150 151 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 152 unsigned long ap, unsigned long ric) 153 { 154 unsigned long rb,rs,prs,r; 155 156 rb = va & ~(PPC_BITMASK(52, 63)); 157 rb |= ap << PPC_BITLSHIFT(58); 158 rs = pid << PPC_BITLSHIFT(31); 159 prs = 1; /* process scoped */ 160 r = 1; /* radix format */ 161 162 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 163 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 164 trace_tlbie(0, 1, rb, rs, ric, prs, r); 165 } 166 167 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 168 unsigned long ap, unsigned long ric) 169 { 170 unsigned long rb,rs,prs,r; 171 172 rb = va & ~(PPC_BITMASK(52, 63)); 173 rb |= ap << PPC_BITLSHIFT(58); 174 rs = pid << PPC_BITLSHIFT(31); 175 prs = 1; /* process scoped */ 176 r = 1; /* radix format */ 177 178 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 179 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 180 trace_tlbie(0, 0, rb, rs, ric, prs, r); 181 } 182 183 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 184 unsigned long ap, unsigned long ric) 185 { 186 unsigned long rb,rs,prs,r; 187 188 rb = va & ~(PPC_BITMASK(52, 63)); 189 rb |= ap << PPC_BITLSHIFT(58); 190 rs = lpid; 191 prs = 0; /* partition scoped */ 192 r = 1; /* radix format */ 193 194 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 195 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 197 } 198 199 200 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 201 unsigned long ap) 202 { 203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 204 asm volatile("ptesync": : :"memory"); 205 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 206 } 207 208 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 209 asm volatile("ptesync": : :"memory"); 210 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 211 } 212 } 213 214 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 215 unsigned long ap) 216 { 217 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 218 asm volatile("ptesync": : :"memory"); 219 __tlbie_pid(0, RIC_FLUSH_TLB); 220 } 221 222 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 223 asm volatile("ptesync": : :"memory"); 224 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 225 } 226 } 227 228 static inline void fixup_tlbie_pid(unsigned long pid) 229 { 230 /* 231 * We can use any address for the invalidation, pick one which is 232 * probably unused as an optimisation. 233 */ 234 unsigned long va = ((1UL << 52) - 1); 235 236 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 237 asm volatile("ptesync": : :"memory"); 238 __tlbie_pid(0, RIC_FLUSH_TLB); 239 } 240 241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 242 asm volatile("ptesync": : :"memory"); 243 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 244 } 245 } 246 247 248 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 249 unsigned long ap) 250 { 251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 252 asm volatile("ptesync": : :"memory"); 253 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 254 } 255 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 259 } 260 } 261 262 static inline void fixup_tlbie_lpid(unsigned long lpid) 263 { 264 /* 265 * We can use any address for the invalidation, pick one which is 266 * probably unused as an optimisation. 267 */ 268 unsigned long va = ((1UL << 52) - 1); 269 270 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 271 asm volatile("ptesync": : :"memory"); 272 __tlbie_lpid(0, RIC_FLUSH_TLB); 273 } 274 275 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 276 asm volatile("ptesync": : :"memory"); 277 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 278 } 279 } 280 281 /* 282 * We use 128 set in radix mode and 256 set in hpt mode. 283 */ 284 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 285 { 286 int set; 287 288 asm volatile("ptesync": : :"memory"); 289 290 /* 291 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 292 * also flush the entire Page Walk Cache. 293 */ 294 __tlbiel_pid(pid, 0, ric); 295 296 /* For PWC, only one flush is needed */ 297 if (ric == RIC_FLUSH_PWC) { 298 asm volatile("ptesync": : :"memory"); 299 return; 300 } 301 302 /* For the remaining sets, just flush the TLB */ 303 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 304 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 305 306 asm volatile("ptesync": : :"memory"); 307 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 308 } 309 310 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 311 { 312 asm volatile("ptesync": : :"memory"); 313 314 /* 315 * Workaround the fact that the "ric" argument to __tlbie_pid 316 * must be a compile-time contraint to match the "i" constraint 317 * in the asm statement. 318 */ 319 switch (ric) { 320 case RIC_FLUSH_TLB: 321 __tlbie_pid(pid, RIC_FLUSH_TLB); 322 fixup_tlbie_pid(pid); 323 break; 324 case RIC_FLUSH_PWC: 325 __tlbie_pid(pid, RIC_FLUSH_PWC); 326 break; 327 case RIC_FLUSH_ALL: 328 default: 329 __tlbie_pid(pid, RIC_FLUSH_ALL); 330 fixup_tlbie_pid(pid); 331 } 332 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 333 } 334 335 struct tlbiel_pid { 336 unsigned long pid; 337 unsigned long ric; 338 }; 339 340 static void do_tlbiel_pid(void *info) 341 { 342 struct tlbiel_pid *t = info; 343 344 if (t->ric == RIC_FLUSH_TLB) 345 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 346 else if (t->ric == RIC_FLUSH_PWC) 347 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 348 else 349 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 350 } 351 352 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 353 unsigned long pid, unsigned long ric) 354 { 355 struct cpumask *cpus = mm_cpumask(mm); 356 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 357 358 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 359 /* 360 * Always want the CPU translations to be invalidated with tlbiel in 361 * these paths, so while coprocessors must use tlbie, we can not 362 * optimise away the tlbiel component. 363 */ 364 if (atomic_read(&mm->context.copros) > 0) 365 _tlbie_pid(pid, RIC_FLUSH_ALL); 366 } 367 368 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 369 { 370 asm volatile("ptesync": : :"memory"); 371 372 /* 373 * Workaround the fact that the "ric" argument to __tlbie_pid 374 * must be a compile-time contraint to match the "i" constraint 375 * in the asm statement. 376 */ 377 switch (ric) { 378 case RIC_FLUSH_TLB: 379 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 380 fixup_tlbie_lpid(lpid); 381 break; 382 case RIC_FLUSH_PWC: 383 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 384 break; 385 case RIC_FLUSH_ALL: 386 default: 387 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 388 fixup_tlbie_lpid(lpid); 389 } 390 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 391 } 392 393 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 394 { 395 /* 396 * Workaround the fact that the "ric" argument to __tlbie_pid 397 * must be a compile-time contraint to match the "i" constraint 398 * in the asm statement. 399 */ 400 switch (ric) { 401 case RIC_FLUSH_TLB: 402 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 403 break; 404 case RIC_FLUSH_PWC: 405 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 406 break; 407 case RIC_FLUSH_ALL: 408 default: 409 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 410 } 411 fixup_tlbie_lpid(lpid); 412 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 413 } 414 415 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 416 unsigned long pid, unsigned long page_size, 417 unsigned long psize) 418 { 419 unsigned long addr; 420 unsigned long ap = mmu_get_ap(psize); 421 422 for (addr = start; addr < end; addr += page_size) 423 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 424 } 425 426 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 427 unsigned long psize, unsigned long ric) 428 { 429 unsigned long ap = mmu_get_ap(psize); 430 431 asm volatile("ptesync": : :"memory"); 432 __tlbiel_va(va, pid, ap, ric); 433 asm volatile("ptesync": : :"memory"); 434 } 435 436 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 437 unsigned long pid, unsigned long page_size, 438 unsigned long psize, bool also_pwc) 439 { 440 asm volatile("ptesync": : :"memory"); 441 if (also_pwc) 442 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 443 __tlbiel_va_range(start, end, pid, page_size, psize); 444 asm volatile("ptesync": : :"memory"); 445 } 446 447 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 448 unsigned long pid, unsigned long page_size, 449 unsigned long psize) 450 { 451 unsigned long addr; 452 unsigned long ap = mmu_get_ap(psize); 453 454 for (addr = start; addr < end; addr += page_size) 455 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 456 457 fixup_tlbie_va_range(addr - page_size, pid, ap); 458 } 459 460 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 461 unsigned long psize, unsigned long ric) 462 { 463 unsigned long ap = mmu_get_ap(psize); 464 465 asm volatile("ptesync": : :"memory"); 466 __tlbie_va(va, pid, ap, ric); 467 fixup_tlbie_va(va, pid, ap); 468 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 469 } 470 471 struct tlbiel_va { 472 unsigned long pid; 473 unsigned long va; 474 unsigned long psize; 475 unsigned long ric; 476 }; 477 478 static void do_tlbiel_va(void *info) 479 { 480 struct tlbiel_va *t = info; 481 482 if (t->ric == RIC_FLUSH_TLB) 483 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 484 else if (t->ric == RIC_FLUSH_PWC) 485 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 486 else 487 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 488 } 489 490 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 491 unsigned long va, unsigned long pid, 492 unsigned long psize, unsigned long ric) 493 { 494 struct cpumask *cpus = mm_cpumask(mm); 495 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 496 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 497 if (atomic_read(&mm->context.copros) > 0) 498 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 499 } 500 501 struct tlbiel_va_range { 502 unsigned long pid; 503 unsigned long start; 504 unsigned long end; 505 unsigned long page_size; 506 unsigned long psize; 507 bool also_pwc; 508 }; 509 510 static void do_tlbiel_va_range(void *info) 511 { 512 struct tlbiel_va_range *t = info; 513 514 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 515 t->psize, t->also_pwc); 516 } 517 518 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 519 unsigned long psize, unsigned long ric) 520 { 521 unsigned long ap = mmu_get_ap(psize); 522 523 asm volatile("ptesync": : :"memory"); 524 __tlbie_lpid_va(va, lpid, ap, ric); 525 fixup_tlbie_lpid_va(va, lpid, ap); 526 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 527 } 528 529 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 530 unsigned long pid, unsigned long page_size, 531 unsigned long psize, bool also_pwc) 532 { 533 asm volatile("ptesync": : :"memory"); 534 if (also_pwc) 535 __tlbie_pid(pid, RIC_FLUSH_PWC); 536 __tlbie_va_range(start, end, pid, page_size, psize); 537 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 538 } 539 540 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 541 unsigned long start, unsigned long end, 542 unsigned long pid, unsigned long page_size, 543 unsigned long psize, bool also_pwc) 544 { 545 struct cpumask *cpus = mm_cpumask(mm); 546 struct tlbiel_va_range t = { .start = start, .end = end, 547 .pid = pid, .page_size = page_size, 548 .psize = psize, .also_pwc = also_pwc }; 549 550 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 551 if (atomic_read(&mm->context.copros) > 0) 552 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 553 } 554 555 /* 556 * Base TLB flushing operations: 557 * 558 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 559 * - flush_tlb_page(vma, vmaddr) flushes one page 560 * - flush_tlb_range(vma, start, end) flushes a range of pages 561 * - flush_tlb_kernel_range(start, end) flushes kernel pages 562 * 563 * - local_* variants of page and mm only apply to the current 564 * processor 565 */ 566 void radix__local_flush_tlb_mm(struct mm_struct *mm) 567 { 568 unsigned long pid; 569 570 preempt_disable(); 571 pid = mm->context.id; 572 if (pid != MMU_NO_CONTEXT) 573 _tlbiel_pid(pid, RIC_FLUSH_TLB); 574 preempt_enable(); 575 } 576 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 577 578 #ifndef CONFIG_SMP 579 void radix__local_flush_all_mm(struct mm_struct *mm) 580 { 581 unsigned long pid; 582 583 preempt_disable(); 584 pid = mm->context.id; 585 if (pid != MMU_NO_CONTEXT) 586 _tlbiel_pid(pid, RIC_FLUSH_ALL); 587 preempt_enable(); 588 } 589 EXPORT_SYMBOL(radix__local_flush_all_mm); 590 591 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 592 { 593 radix__local_flush_all_mm(mm); 594 } 595 #endif /* CONFIG_SMP */ 596 597 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 598 int psize) 599 { 600 unsigned long pid; 601 602 preempt_disable(); 603 pid = mm->context.id; 604 if (pid != MMU_NO_CONTEXT) 605 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 606 preempt_enable(); 607 } 608 609 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 610 { 611 #ifdef CONFIG_HUGETLB_PAGE 612 /* need the return fix for nohash.c */ 613 if (is_vm_hugetlb_page(vma)) 614 return radix__local_flush_hugetlb_page(vma, vmaddr); 615 #endif 616 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 617 } 618 EXPORT_SYMBOL(radix__local_flush_tlb_page); 619 620 static bool mm_is_singlethreaded(struct mm_struct *mm) 621 { 622 if (atomic_read(&mm->context.copros) > 0) 623 return false; 624 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 625 return true; 626 return false; 627 } 628 629 static bool mm_needs_flush_escalation(struct mm_struct *mm) 630 { 631 /* 632 * P9 nest MMU has issues with the page walk cache 633 * caching PTEs and not flushing them properly when 634 * RIC = 0 for a PID/LPID invalidate 635 */ 636 if (atomic_read(&mm->context.copros) > 0) 637 return true; 638 return false; 639 } 640 641 #ifdef CONFIG_SMP 642 static void do_exit_flush_lazy_tlb(void *arg) 643 { 644 struct mm_struct *mm = arg; 645 unsigned long pid = mm->context.id; 646 647 if (current->mm == mm) 648 return; /* Local CPU */ 649 650 if (current->active_mm == mm) { 651 /* 652 * Must be a kernel thread because sender is single-threaded. 653 */ 654 BUG_ON(current->mm); 655 mmgrab(&init_mm); 656 switch_mm(mm, &init_mm, current); 657 current->active_mm = &init_mm; 658 mmdrop(mm); 659 } 660 _tlbiel_pid(pid, RIC_FLUSH_ALL); 661 } 662 663 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 664 { 665 /* 666 * Would be nice if this was async so it could be run in 667 * parallel with our local flush, but generic code does not 668 * give a good API for it. Could extend the generic code or 669 * make a special powerpc IPI for flushing TLBs. 670 * For now it's not too performance critical. 671 */ 672 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 673 (void *)mm, 1); 674 mm_reset_thread_local(mm); 675 } 676 677 void radix__flush_tlb_mm(struct mm_struct *mm) 678 { 679 unsigned long pid; 680 681 pid = mm->context.id; 682 if (unlikely(pid == MMU_NO_CONTEXT)) 683 return; 684 685 preempt_disable(); 686 /* 687 * Order loads of mm_cpumask vs previous stores to clear ptes before 688 * the invalidate. See barrier in switch_mm_irqs_off 689 */ 690 smp_mb(); 691 if (!mm_is_thread_local(mm)) { 692 if (unlikely(mm_is_singlethreaded(mm))) { 693 exit_flush_lazy_tlbs(mm); 694 goto local; 695 } 696 697 if (cputlb_use_tlbie()) { 698 if (mm_needs_flush_escalation(mm)) 699 _tlbie_pid(pid, RIC_FLUSH_ALL); 700 else 701 _tlbie_pid(pid, RIC_FLUSH_TLB); 702 } else { 703 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 704 } 705 } else { 706 local: 707 _tlbiel_pid(pid, RIC_FLUSH_TLB); 708 } 709 preempt_enable(); 710 } 711 EXPORT_SYMBOL(radix__flush_tlb_mm); 712 713 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 714 { 715 unsigned long pid; 716 717 pid = mm->context.id; 718 if (unlikely(pid == MMU_NO_CONTEXT)) 719 return; 720 721 preempt_disable(); 722 smp_mb(); /* see radix__flush_tlb_mm */ 723 if (!mm_is_thread_local(mm)) { 724 if (unlikely(mm_is_singlethreaded(mm))) { 725 if (!fullmm) { 726 exit_flush_lazy_tlbs(mm); 727 goto local; 728 } 729 } 730 if (cputlb_use_tlbie()) 731 _tlbie_pid(pid, RIC_FLUSH_ALL); 732 else 733 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 734 } else { 735 local: 736 _tlbiel_pid(pid, RIC_FLUSH_ALL); 737 } 738 preempt_enable(); 739 } 740 741 void radix__flush_all_mm(struct mm_struct *mm) 742 { 743 __flush_all_mm(mm, false); 744 } 745 EXPORT_SYMBOL(radix__flush_all_mm); 746 747 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 748 int psize) 749 { 750 unsigned long pid; 751 752 pid = mm->context.id; 753 if (unlikely(pid == MMU_NO_CONTEXT)) 754 return; 755 756 preempt_disable(); 757 smp_mb(); /* see radix__flush_tlb_mm */ 758 if (!mm_is_thread_local(mm)) { 759 if (unlikely(mm_is_singlethreaded(mm))) { 760 exit_flush_lazy_tlbs(mm); 761 goto local; 762 } 763 if (cputlb_use_tlbie()) 764 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 765 else 766 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 767 } else { 768 local: 769 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 770 } 771 preempt_enable(); 772 } 773 774 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 775 { 776 #ifdef CONFIG_HUGETLB_PAGE 777 if (is_vm_hugetlb_page(vma)) 778 return radix__flush_hugetlb_page(vma, vmaddr); 779 #endif 780 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 781 } 782 EXPORT_SYMBOL(radix__flush_tlb_page); 783 784 #else /* CONFIG_SMP */ 785 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 786 #endif /* CONFIG_SMP */ 787 788 static void do_tlbiel_kernel(void *info) 789 { 790 _tlbiel_pid(0, RIC_FLUSH_ALL); 791 } 792 793 static inline void _tlbiel_kernel_broadcast(void) 794 { 795 on_each_cpu(do_tlbiel_kernel, NULL, 1); 796 if (tlbie_capable) { 797 /* 798 * Coherent accelerators don't refcount kernel memory mappings, 799 * so have to always issue a tlbie for them. This is quite a 800 * slow path anyway. 801 */ 802 _tlbie_pid(0, RIC_FLUSH_ALL); 803 } 804 } 805 806 /* 807 * If kernel TLBIs ever become local rather than global, then 808 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 809 * assumes kernel TLBIs are global. 810 */ 811 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 812 { 813 if (cputlb_use_tlbie()) 814 _tlbie_pid(0, RIC_FLUSH_ALL); 815 else 816 _tlbiel_kernel_broadcast(); 817 } 818 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 819 820 #define TLB_FLUSH_ALL -1UL 821 822 /* 823 * Number of pages above which we invalidate the entire PID rather than 824 * flush individual pages, for local and global flushes respectively. 825 * 826 * tlbie goes out to the interconnect and individual ops are more costly. 827 * It also does not iterate over sets like the local tlbiel variant when 828 * invalidating a full PID, so it has a far lower threshold to change from 829 * individual page flushes to full-pid flushes. 830 */ 831 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 832 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 833 834 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 835 unsigned long start, unsigned long end) 836 837 { 838 unsigned long pid; 839 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 840 unsigned long page_size = 1UL << page_shift; 841 unsigned long nr_pages = (end - start) >> page_shift; 842 bool local, full; 843 844 pid = mm->context.id; 845 if (unlikely(pid == MMU_NO_CONTEXT)) 846 return; 847 848 preempt_disable(); 849 smp_mb(); /* see radix__flush_tlb_mm */ 850 if (!mm_is_thread_local(mm)) { 851 if (unlikely(mm_is_singlethreaded(mm))) { 852 if (end != TLB_FLUSH_ALL) { 853 exit_flush_lazy_tlbs(mm); 854 goto is_local; 855 } 856 } 857 local = false; 858 full = (end == TLB_FLUSH_ALL || 859 nr_pages > tlb_single_page_flush_ceiling); 860 } else { 861 is_local: 862 local = true; 863 full = (end == TLB_FLUSH_ALL || 864 nr_pages > tlb_local_single_page_flush_ceiling); 865 } 866 867 if (full) { 868 if (local) { 869 _tlbiel_pid(pid, RIC_FLUSH_TLB); 870 } else { 871 if (cputlb_use_tlbie()) { 872 if (mm_needs_flush_escalation(mm)) 873 _tlbie_pid(pid, RIC_FLUSH_ALL); 874 else 875 _tlbie_pid(pid, RIC_FLUSH_TLB); 876 } else { 877 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 878 } 879 } 880 } else { 881 bool hflush = false; 882 unsigned long hstart, hend; 883 884 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 885 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 886 hend = end & PMD_MASK; 887 if (hstart == hend) 888 hflush = false; 889 else 890 hflush = true; 891 } 892 893 if (local) { 894 asm volatile("ptesync": : :"memory"); 895 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 896 if (hflush) 897 __tlbiel_va_range(hstart, hend, pid, 898 PMD_SIZE, MMU_PAGE_2M); 899 asm volatile("ptesync": : :"memory"); 900 } else if (cputlb_use_tlbie()) { 901 asm volatile("ptesync": : :"memory"); 902 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 903 if (hflush) 904 __tlbie_va_range(hstart, hend, pid, 905 PMD_SIZE, MMU_PAGE_2M); 906 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 907 } else { 908 _tlbiel_va_range_multicast(mm, 909 start, end, pid, page_size, mmu_virtual_psize, false); 910 if (hflush) 911 _tlbiel_va_range_multicast(mm, 912 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); 913 } 914 } 915 preempt_enable(); 916 } 917 918 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 919 unsigned long end) 920 921 { 922 #ifdef CONFIG_HUGETLB_PAGE 923 if (is_vm_hugetlb_page(vma)) 924 return radix__flush_hugetlb_tlb_range(vma, start, end); 925 #endif 926 927 __radix__flush_tlb_range(vma->vm_mm, start, end); 928 } 929 EXPORT_SYMBOL(radix__flush_tlb_range); 930 931 static int radix_get_mmu_psize(int page_size) 932 { 933 int psize; 934 935 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 936 psize = mmu_virtual_psize; 937 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 938 psize = MMU_PAGE_2M; 939 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 940 psize = MMU_PAGE_1G; 941 else 942 return -1; 943 return psize; 944 } 945 946 /* 947 * Flush partition scoped LPID address translation for all CPUs. 948 */ 949 void radix__flush_tlb_lpid_page(unsigned int lpid, 950 unsigned long addr, 951 unsigned long page_size) 952 { 953 int psize = radix_get_mmu_psize(page_size); 954 955 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 956 } 957 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 958 959 /* 960 * Flush partition scoped PWC from LPID for all CPUs. 961 */ 962 void radix__flush_pwc_lpid(unsigned int lpid) 963 { 964 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 965 } 966 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 967 968 /* 969 * Flush partition scoped translations from LPID (=LPIDR) 970 */ 971 void radix__flush_all_lpid(unsigned int lpid) 972 { 973 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 974 } 975 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 976 977 /* 978 * Flush process scoped translations from LPID (=LPIDR) 979 */ 980 void radix__flush_all_lpid_guest(unsigned int lpid) 981 { 982 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 983 } 984 985 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 986 unsigned long end, int psize); 987 988 void radix__tlb_flush(struct mmu_gather *tlb) 989 { 990 int psize = 0; 991 struct mm_struct *mm = tlb->mm; 992 int page_size = tlb->page_size; 993 unsigned long start = tlb->start; 994 unsigned long end = tlb->end; 995 996 /* 997 * if page size is not something we understand, do a full mm flush 998 * 999 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1000 * that flushes the process table entry cache upon process teardown. 1001 * See the comment for radix in arch_exit_mmap(). 1002 */ 1003 if (tlb->fullmm || tlb->need_flush_all) { 1004 __flush_all_mm(mm, true); 1005 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1006 if (!tlb->freed_tables) 1007 radix__flush_tlb_mm(mm); 1008 else 1009 radix__flush_all_mm(mm); 1010 } else { 1011 if (!tlb->freed_tables) 1012 radix__flush_tlb_range_psize(mm, start, end, psize); 1013 else 1014 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1015 } 1016 } 1017 1018 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1019 unsigned long start, unsigned long end, 1020 int psize, bool also_pwc) 1021 { 1022 unsigned long pid; 1023 unsigned int page_shift = mmu_psize_defs[psize].shift; 1024 unsigned long page_size = 1UL << page_shift; 1025 unsigned long nr_pages = (end - start) >> page_shift; 1026 bool local, full; 1027 1028 pid = mm->context.id; 1029 if (unlikely(pid == MMU_NO_CONTEXT)) 1030 return; 1031 1032 preempt_disable(); 1033 smp_mb(); /* see radix__flush_tlb_mm */ 1034 if (!mm_is_thread_local(mm)) { 1035 if (unlikely(mm_is_singlethreaded(mm))) { 1036 if (end != TLB_FLUSH_ALL) { 1037 exit_flush_lazy_tlbs(mm); 1038 goto is_local; 1039 } 1040 } 1041 local = false; 1042 full = (end == TLB_FLUSH_ALL || 1043 nr_pages > tlb_single_page_flush_ceiling); 1044 } else { 1045 is_local: 1046 local = true; 1047 full = (end == TLB_FLUSH_ALL || 1048 nr_pages > tlb_local_single_page_flush_ceiling); 1049 } 1050 1051 if (full) { 1052 if (local) { 1053 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1054 } else { 1055 if (cputlb_use_tlbie()) { 1056 if (mm_needs_flush_escalation(mm)) 1057 also_pwc = true; 1058 1059 _tlbie_pid(pid, 1060 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1061 } else { 1062 _tlbiel_pid_multicast(mm, pid, 1063 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1064 } 1065 1066 } 1067 } else { 1068 if (local) 1069 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1070 else if (cputlb_use_tlbie()) 1071 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1072 else 1073 _tlbiel_va_range_multicast(mm, 1074 start, end, pid, page_size, psize, also_pwc); 1075 } 1076 preempt_enable(); 1077 } 1078 1079 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1080 unsigned long end, int psize) 1081 { 1082 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1083 } 1084 1085 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1086 unsigned long end, int psize) 1087 { 1088 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1089 } 1090 1091 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1092 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1093 { 1094 unsigned long pid, end; 1095 1096 pid = mm->context.id; 1097 if (unlikely(pid == MMU_NO_CONTEXT)) 1098 return; 1099 1100 /* 4k page size, just blow the world */ 1101 if (PAGE_SIZE == 0x1000) { 1102 radix__flush_all_mm(mm); 1103 return; 1104 } 1105 1106 end = addr + HPAGE_PMD_SIZE; 1107 1108 /* Otherwise first do the PWC, then iterate the pages. */ 1109 preempt_disable(); 1110 smp_mb(); /* see radix__flush_tlb_mm */ 1111 if (!mm_is_thread_local(mm)) { 1112 if (unlikely(mm_is_singlethreaded(mm))) { 1113 exit_flush_lazy_tlbs(mm); 1114 goto local; 1115 } 1116 if (cputlb_use_tlbie()) 1117 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1118 else 1119 _tlbiel_va_range_multicast(mm, 1120 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1121 } else { 1122 local: 1123 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1124 } 1125 1126 preempt_enable(); 1127 } 1128 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1129 1130 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1131 unsigned long start, unsigned long end) 1132 { 1133 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1134 } 1135 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1136 1137 void radix__flush_tlb_all(void) 1138 { 1139 unsigned long rb,prs,r,rs; 1140 unsigned long ric = RIC_FLUSH_ALL; 1141 1142 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1143 prs = 0; /* partition scoped */ 1144 r = 1; /* radix format */ 1145 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1146 1147 asm volatile("ptesync": : :"memory"); 1148 /* 1149 * now flush guest entries by passing PRS = 1 and LPID != 0 1150 */ 1151 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1152 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1153 /* 1154 * now flush host entires by passing PRS = 0 and LPID == 0 1155 */ 1156 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1157 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1158 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1159 } 1160 1161 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1162 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1163 { 1164 unsigned long pid = mm->context.id; 1165 1166 if (unlikely(pid == MMU_NO_CONTEXT)) 1167 return; 1168 1169 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) 1170 return; 1171 1172 /* 1173 * If this context hasn't run on that CPU before and KVM is 1174 * around, there's a slim chance that the guest on another 1175 * CPU just brought in obsolete translation into the TLB of 1176 * this CPU due to a bad prefetch using the guest PID on 1177 * the way into the hypervisor. 1178 * 1179 * We work around this here. If KVM is possible, we check if 1180 * any sibling thread is in KVM. If it is, the window may exist 1181 * and thus we flush that PID from the core. 1182 * 1183 * A potential future improvement would be to mark which PIDs 1184 * have never been used on the system and avoid it if the PID 1185 * is new and the process has no other cpumask bit set. 1186 */ 1187 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1188 int cpu = smp_processor_id(); 1189 int sib = cpu_first_thread_sibling(cpu); 1190 bool flush = false; 1191 1192 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1193 if (sib == cpu) 1194 continue; 1195 if (!cpu_possible(sib)) 1196 continue; 1197 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1198 flush = true; 1199 } 1200 if (flush) 1201 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1202 } 1203 } 1204 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1205 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1206