1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 20 #define RIC_FLUSH_TLB 0 21 #define RIC_FLUSH_PWC 1 22 #define RIC_FLUSH_ALL 2 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 for (set = 1; set < num_sets; set++) 59 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); 60 } 61 62 /* Flush process scoped entries. */ 63 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 64 for (set = 1; set < num_sets; set++) 65 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 66 67 asm volatile("ptesync": : :"memory"); 68 } 69 70 void radix__tlbiel_all(unsigned int action) 71 { 72 unsigned int is; 73 74 switch (action) { 75 case TLB_INVAL_SCOPE_GLOBAL: 76 is = 3; 77 break; 78 case TLB_INVAL_SCOPE_LPID: 79 is = 2; 80 break; 81 default: 82 BUG(); 83 } 84 85 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 86 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 87 else 88 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 89 90 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 91 } 92 93 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 94 unsigned long ric) 95 { 96 unsigned long rb,rs,prs,r; 97 98 rb = PPC_BIT(53); /* IS = 1 */ 99 rb |= set << PPC_BITLSHIFT(51); 100 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 101 prs = 1; /* process scoped */ 102 r = 1; /* radix format */ 103 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 106 trace_tlbie(0, 1, rb, rs, ric, prs, r); 107 } 108 109 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 110 { 111 unsigned long rb,rs,prs,r; 112 113 rb = PPC_BIT(53); /* IS = 1 */ 114 rs = pid << PPC_BITLSHIFT(31); 115 prs = 1; /* process scoped */ 116 r = 1; /* radix format */ 117 118 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 119 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 121 } 122 123 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 124 { 125 unsigned long rb,rs,prs,r; 126 127 rb = PPC_BIT(52); /* IS = 2 */ 128 rs = lpid; 129 prs = 0; /* partition scoped */ 130 r = 1; /* radix format */ 131 132 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 133 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 134 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 135 } 136 137 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 138 { 139 unsigned long rb,rs,prs,r; 140 141 rb = PPC_BIT(52); /* IS = 2 */ 142 rs = lpid; 143 prs = 1; /* process scoped */ 144 r = 1; /* radix format */ 145 146 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 147 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 148 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 149 } 150 151 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 152 unsigned long ap, unsigned long ric) 153 { 154 unsigned long rb,rs,prs,r; 155 156 rb = va & ~(PPC_BITMASK(52, 63)); 157 rb |= ap << PPC_BITLSHIFT(58); 158 rs = pid << PPC_BITLSHIFT(31); 159 prs = 1; /* process scoped */ 160 r = 1; /* radix format */ 161 162 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 163 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 164 trace_tlbie(0, 1, rb, rs, ric, prs, r); 165 } 166 167 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 168 unsigned long ap, unsigned long ric) 169 { 170 unsigned long rb,rs,prs,r; 171 172 rb = va & ~(PPC_BITMASK(52, 63)); 173 rb |= ap << PPC_BITLSHIFT(58); 174 rs = pid << PPC_BITLSHIFT(31); 175 prs = 1; /* process scoped */ 176 r = 1; /* radix format */ 177 178 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 179 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 180 trace_tlbie(0, 0, rb, rs, ric, prs, r); 181 } 182 183 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 184 unsigned long ap, unsigned long ric) 185 { 186 unsigned long rb,rs,prs,r; 187 188 rb = va & ~(PPC_BITMASK(52, 63)); 189 rb |= ap << PPC_BITLSHIFT(58); 190 rs = lpid; 191 prs = 0; /* partition scoped */ 192 r = 1; /* radix format */ 193 194 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 195 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 196 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 197 } 198 199 200 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 201 unsigned long ap) 202 { 203 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 204 asm volatile("ptesync": : :"memory"); 205 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 206 } 207 208 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 209 asm volatile("ptesync": : :"memory"); 210 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 211 } 212 } 213 214 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 215 unsigned long ap) 216 { 217 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 218 asm volatile("ptesync": : :"memory"); 219 __tlbie_pid(0, RIC_FLUSH_TLB); 220 } 221 222 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 223 asm volatile("ptesync": : :"memory"); 224 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 225 } 226 } 227 228 static inline void fixup_tlbie_pid(unsigned long pid) 229 { 230 /* 231 * We can use any address for the invalidation, pick one which is 232 * probably unused as an optimisation. 233 */ 234 unsigned long va = ((1UL << 52) - 1); 235 236 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 237 asm volatile("ptesync": : :"memory"); 238 __tlbie_pid(0, RIC_FLUSH_TLB); 239 } 240 241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 242 asm volatile("ptesync": : :"memory"); 243 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 244 } 245 } 246 247 248 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 249 unsigned long ap) 250 { 251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 252 asm volatile("ptesync": : :"memory"); 253 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 254 } 255 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 259 } 260 } 261 262 static inline void fixup_tlbie_lpid(unsigned long lpid) 263 { 264 /* 265 * We can use any address for the invalidation, pick one which is 266 * probably unused as an optimisation. 267 */ 268 unsigned long va = ((1UL << 52) - 1); 269 270 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 271 asm volatile("ptesync": : :"memory"); 272 __tlbie_lpid(0, RIC_FLUSH_TLB); 273 } 274 275 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 276 asm volatile("ptesync": : :"memory"); 277 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 278 } 279 } 280 281 /* 282 * We use 128 set in radix mode and 256 set in hpt mode. 283 */ 284 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 285 { 286 int set; 287 288 asm volatile("ptesync": : :"memory"); 289 290 /* 291 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, 292 * also flush the entire Page Walk Cache. 293 */ 294 __tlbiel_pid(pid, 0, ric); 295 296 /* For PWC, only one flush is needed */ 297 if (ric == RIC_FLUSH_PWC) { 298 asm volatile("ptesync": : :"memory"); 299 return; 300 } 301 302 /* For the remaining sets, just flush the TLB */ 303 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 304 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 305 306 asm volatile("ptesync": : :"memory"); 307 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 308 } 309 310 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 311 { 312 asm volatile("ptesync": : :"memory"); 313 314 /* 315 * Workaround the fact that the "ric" argument to __tlbie_pid 316 * must be a compile-time contraint to match the "i" constraint 317 * in the asm statement. 318 */ 319 switch (ric) { 320 case RIC_FLUSH_TLB: 321 __tlbie_pid(pid, RIC_FLUSH_TLB); 322 fixup_tlbie_pid(pid); 323 break; 324 case RIC_FLUSH_PWC: 325 __tlbie_pid(pid, RIC_FLUSH_PWC); 326 break; 327 case RIC_FLUSH_ALL: 328 default: 329 __tlbie_pid(pid, RIC_FLUSH_ALL); 330 fixup_tlbie_pid(pid); 331 } 332 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 333 } 334 335 struct tlbiel_pid { 336 unsigned long pid; 337 unsigned long ric; 338 }; 339 340 static void do_tlbiel_pid(void *info) 341 { 342 struct tlbiel_pid *t = info; 343 344 if (t->ric == RIC_FLUSH_TLB) 345 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 346 else if (t->ric == RIC_FLUSH_PWC) 347 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 348 else 349 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 350 } 351 352 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 353 unsigned long pid, unsigned long ric) 354 { 355 struct cpumask *cpus = mm_cpumask(mm); 356 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 357 358 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 359 /* 360 * Always want the CPU translations to be invalidated with tlbiel in 361 * these paths, so while coprocessors must use tlbie, we can not 362 * optimise away the tlbiel component. 363 */ 364 if (atomic_read(&mm->context.copros) > 0) 365 _tlbie_pid(pid, RIC_FLUSH_ALL); 366 } 367 368 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 369 { 370 asm volatile("ptesync": : :"memory"); 371 372 /* 373 * Workaround the fact that the "ric" argument to __tlbie_pid 374 * must be a compile-time contraint to match the "i" constraint 375 * in the asm statement. 376 */ 377 switch (ric) { 378 case RIC_FLUSH_TLB: 379 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 380 fixup_tlbie_lpid(lpid); 381 break; 382 case RIC_FLUSH_PWC: 383 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 384 break; 385 case RIC_FLUSH_ALL: 386 default: 387 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 388 fixup_tlbie_lpid(lpid); 389 } 390 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 391 } 392 393 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 394 { 395 /* 396 * Workaround the fact that the "ric" argument to __tlbie_pid 397 * must be a compile-time contraint to match the "i" constraint 398 * in the asm statement. 399 */ 400 switch (ric) { 401 case RIC_FLUSH_TLB: 402 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 403 break; 404 case RIC_FLUSH_PWC: 405 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 406 break; 407 case RIC_FLUSH_ALL: 408 default: 409 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 410 } 411 fixup_tlbie_lpid(lpid); 412 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 413 } 414 415 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 416 unsigned long pid, unsigned long page_size, 417 unsigned long psize) 418 { 419 unsigned long addr; 420 unsigned long ap = mmu_get_ap(psize); 421 422 for (addr = start; addr < end; addr += page_size) 423 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 424 } 425 426 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 427 unsigned long psize, unsigned long ric) 428 { 429 unsigned long ap = mmu_get_ap(psize); 430 431 asm volatile("ptesync": : :"memory"); 432 __tlbiel_va(va, pid, ap, ric); 433 asm volatile("ptesync": : :"memory"); 434 } 435 436 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 437 unsigned long pid, unsigned long page_size, 438 unsigned long psize, bool also_pwc) 439 { 440 asm volatile("ptesync": : :"memory"); 441 if (also_pwc) 442 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 443 __tlbiel_va_range(start, end, pid, page_size, psize); 444 asm volatile("ptesync": : :"memory"); 445 } 446 447 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 448 unsigned long pid, unsigned long page_size, 449 unsigned long psize) 450 { 451 unsigned long addr; 452 unsigned long ap = mmu_get_ap(psize); 453 454 for (addr = start; addr < end; addr += page_size) 455 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 456 457 fixup_tlbie_va_range(addr - page_size, pid, ap); 458 } 459 460 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 461 unsigned long psize, unsigned long ric) 462 { 463 unsigned long ap = mmu_get_ap(psize); 464 465 asm volatile("ptesync": : :"memory"); 466 __tlbie_va(va, pid, ap, ric); 467 fixup_tlbie_va(va, pid, ap); 468 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 469 } 470 471 struct tlbiel_va { 472 unsigned long pid; 473 unsigned long va; 474 unsigned long psize; 475 unsigned long ric; 476 }; 477 478 static void do_tlbiel_va(void *info) 479 { 480 struct tlbiel_va *t = info; 481 482 if (t->ric == RIC_FLUSH_TLB) 483 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 484 else if (t->ric == RIC_FLUSH_PWC) 485 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 486 else 487 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 488 } 489 490 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 491 unsigned long va, unsigned long pid, 492 unsigned long psize, unsigned long ric) 493 { 494 struct cpumask *cpus = mm_cpumask(mm); 495 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 496 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 497 if (atomic_read(&mm->context.copros) > 0) 498 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 499 } 500 501 struct tlbiel_va_range { 502 unsigned long pid; 503 unsigned long start; 504 unsigned long end; 505 unsigned long page_size; 506 unsigned long psize; 507 bool also_pwc; 508 }; 509 510 static void do_tlbiel_va_range(void *info) 511 { 512 struct tlbiel_va_range *t = info; 513 514 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 515 t->psize, t->also_pwc); 516 } 517 518 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 519 unsigned long psize, unsigned long ric) 520 { 521 unsigned long ap = mmu_get_ap(psize); 522 523 asm volatile("ptesync": : :"memory"); 524 __tlbie_lpid_va(va, lpid, ap, ric); 525 fixup_tlbie_lpid_va(va, lpid, ap); 526 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 527 } 528 529 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 530 unsigned long pid, unsigned long page_size, 531 unsigned long psize, bool also_pwc) 532 { 533 asm volatile("ptesync": : :"memory"); 534 if (also_pwc) 535 __tlbie_pid(pid, RIC_FLUSH_PWC); 536 __tlbie_va_range(start, end, pid, page_size, psize); 537 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 538 } 539 540 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 541 unsigned long start, unsigned long end, 542 unsigned long pid, unsigned long page_size, 543 unsigned long psize, bool also_pwc) 544 { 545 struct cpumask *cpus = mm_cpumask(mm); 546 struct tlbiel_va_range t = { .start = start, .end = end, 547 .pid = pid, .page_size = page_size, 548 .psize = psize, .also_pwc = also_pwc }; 549 550 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 551 if (atomic_read(&mm->context.copros) > 0) 552 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 553 } 554 555 /* 556 * Base TLB flushing operations: 557 * 558 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 559 * - flush_tlb_page(vma, vmaddr) flushes one page 560 * - flush_tlb_range(vma, start, end) flushes a range of pages 561 * - flush_tlb_kernel_range(start, end) flushes kernel pages 562 * 563 * - local_* variants of page and mm only apply to the current 564 * processor 565 */ 566 void radix__local_flush_tlb_mm(struct mm_struct *mm) 567 { 568 unsigned long pid; 569 570 preempt_disable(); 571 pid = mm->context.id; 572 if (pid != MMU_NO_CONTEXT) 573 _tlbiel_pid(pid, RIC_FLUSH_TLB); 574 preempt_enable(); 575 } 576 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 577 578 #ifndef CONFIG_SMP 579 void radix__local_flush_all_mm(struct mm_struct *mm) 580 { 581 unsigned long pid; 582 583 preempt_disable(); 584 pid = mm->context.id; 585 if (pid != MMU_NO_CONTEXT) 586 _tlbiel_pid(pid, RIC_FLUSH_ALL); 587 preempt_enable(); 588 } 589 EXPORT_SYMBOL(radix__local_flush_all_mm); 590 #endif /* CONFIG_SMP */ 591 592 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 593 int psize) 594 { 595 unsigned long pid; 596 597 preempt_disable(); 598 pid = mm->context.id; 599 if (pid != MMU_NO_CONTEXT) 600 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 601 preempt_enable(); 602 } 603 604 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 605 { 606 #ifdef CONFIG_HUGETLB_PAGE 607 /* need the return fix for nohash.c */ 608 if (is_vm_hugetlb_page(vma)) 609 return radix__local_flush_hugetlb_page(vma, vmaddr); 610 #endif 611 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 612 } 613 EXPORT_SYMBOL(radix__local_flush_tlb_page); 614 615 static bool mm_is_singlethreaded(struct mm_struct *mm) 616 { 617 if (atomic_read(&mm->context.copros) > 0) 618 return false; 619 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) 620 return true; 621 return false; 622 } 623 624 static bool mm_needs_flush_escalation(struct mm_struct *mm) 625 { 626 /* 627 * P9 nest MMU has issues with the page walk cache 628 * caching PTEs and not flushing them properly when 629 * RIC = 0 for a PID/LPID invalidate 630 */ 631 if (atomic_read(&mm->context.copros) > 0) 632 return true; 633 return false; 634 } 635 636 #ifdef CONFIG_SMP 637 static void do_exit_flush_lazy_tlb(void *arg) 638 { 639 struct mm_struct *mm = arg; 640 unsigned long pid = mm->context.id; 641 642 if (current->mm == mm) 643 return; /* Local CPU */ 644 645 if (current->active_mm == mm) { 646 /* 647 * Must be a kernel thread because sender is single-threaded. 648 */ 649 BUG_ON(current->mm); 650 mmgrab(&init_mm); 651 switch_mm(mm, &init_mm, current); 652 current->active_mm = &init_mm; 653 mmdrop(mm); 654 } 655 _tlbiel_pid(pid, RIC_FLUSH_ALL); 656 } 657 658 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 659 { 660 /* 661 * Would be nice if this was async so it could be run in 662 * parallel with our local flush, but generic code does not 663 * give a good API for it. Could extend the generic code or 664 * make a special powerpc IPI for flushing TLBs. 665 * For now it's not too performance critical. 666 */ 667 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 668 (void *)mm, 1); 669 mm_reset_thread_local(mm); 670 } 671 672 void radix__flush_tlb_mm(struct mm_struct *mm) 673 { 674 unsigned long pid; 675 676 pid = mm->context.id; 677 if (unlikely(pid == MMU_NO_CONTEXT)) 678 return; 679 680 preempt_disable(); 681 /* 682 * Order loads of mm_cpumask vs previous stores to clear ptes before 683 * the invalidate. See barrier in switch_mm_irqs_off 684 */ 685 smp_mb(); 686 if (!mm_is_thread_local(mm)) { 687 if (unlikely(mm_is_singlethreaded(mm))) { 688 exit_flush_lazy_tlbs(mm); 689 goto local; 690 } 691 692 if (cputlb_use_tlbie()) { 693 if (mm_needs_flush_escalation(mm)) 694 _tlbie_pid(pid, RIC_FLUSH_ALL); 695 else 696 _tlbie_pid(pid, RIC_FLUSH_TLB); 697 } else { 698 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 699 } 700 } else { 701 local: 702 _tlbiel_pid(pid, RIC_FLUSH_TLB); 703 } 704 preempt_enable(); 705 } 706 EXPORT_SYMBOL(radix__flush_tlb_mm); 707 708 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 709 { 710 unsigned long pid; 711 712 pid = mm->context.id; 713 if (unlikely(pid == MMU_NO_CONTEXT)) 714 return; 715 716 preempt_disable(); 717 smp_mb(); /* see radix__flush_tlb_mm */ 718 if (!mm_is_thread_local(mm)) { 719 if (unlikely(mm_is_singlethreaded(mm))) { 720 if (!fullmm) { 721 exit_flush_lazy_tlbs(mm); 722 goto local; 723 } 724 } 725 if (cputlb_use_tlbie()) 726 _tlbie_pid(pid, RIC_FLUSH_ALL); 727 else 728 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 729 } else { 730 local: 731 _tlbiel_pid(pid, RIC_FLUSH_ALL); 732 } 733 preempt_enable(); 734 } 735 void radix__flush_all_mm(struct mm_struct *mm) 736 { 737 __flush_all_mm(mm, false); 738 } 739 EXPORT_SYMBOL(radix__flush_all_mm); 740 741 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) 742 { 743 tlb->need_flush_all = 1; 744 } 745 EXPORT_SYMBOL(radix__flush_tlb_pwc); 746 747 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 748 int psize) 749 { 750 unsigned long pid; 751 752 pid = mm->context.id; 753 if (unlikely(pid == MMU_NO_CONTEXT)) 754 return; 755 756 preempt_disable(); 757 smp_mb(); /* see radix__flush_tlb_mm */ 758 if (!mm_is_thread_local(mm)) { 759 if (unlikely(mm_is_singlethreaded(mm))) { 760 exit_flush_lazy_tlbs(mm); 761 goto local; 762 } 763 if (cputlb_use_tlbie()) 764 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 765 else 766 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 767 } else { 768 local: 769 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 770 } 771 preempt_enable(); 772 } 773 774 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 775 { 776 #ifdef CONFIG_HUGETLB_PAGE 777 if (is_vm_hugetlb_page(vma)) 778 return radix__flush_hugetlb_page(vma, vmaddr); 779 #endif 780 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 781 } 782 EXPORT_SYMBOL(radix__flush_tlb_page); 783 784 #else /* CONFIG_SMP */ 785 #define radix__flush_all_mm radix__local_flush_all_mm 786 #endif /* CONFIG_SMP */ 787 788 static void do_tlbiel_kernel(void *info) 789 { 790 _tlbiel_pid(0, RIC_FLUSH_ALL); 791 } 792 793 static inline void _tlbiel_kernel_broadcast(void) 794 { 795 on_each_cpu(do_tlbiel_kernel, NULL, 1); 796 if (tlbie_capable) { 797 /* 798 * Coherent accelerators don't refcount kernel memory mappings, 799 * so have to always issue a tlbie for them. This is quite a 800 * slow path anyway. 801 */ 802 _tlbie_pid(0, RIC_FLUSH_ALL); 803 } 804 } 805 806 /* 807 * If kernel TLBIs ever become local rather than global, then 808 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 809 * assumes kernel TLBIs are global. 810 */ 811 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 812 { 813 if (cputlb_use_tlbie()) 814 _tlbie_pid(0, RIC_FLUSH_ALL); 815 else 816 _tlbiel_kernel_broadcast(); 817 } 818 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 819 820 #define TLB_FLUSH_ALL -1UL 821 822 /* 823 * Number of pages above which we invalidate the entire PID rather than 824 * flush individual pages, for local and global flushes respectively. 825 * 826 * tlbie goes out to the interconnect and individual ops are more costly. 827 * It also does not iterate over sets like the local tlbiel variant when 828 * invalidating a full PID, so it has a far lower threshold to change from 829 * individual page flushes to full-pid flushes. 830 */ 831 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 832 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 833 834 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 835 unsigned long start, unsigned long end, 836 bool flush_all_sizes) 837 838 { 839 unsigned long pid; 840 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 841 unsigned long page_size = 1UL << page_shift; 842 unsigned long nr_pages = (end - start) >> page_shift; 843 bool local, full; 844 845 pid = mm->context.id; 846 if (unlikely(pid == MMU_NO_CONTEXT)) 847 return; 848 849 preempt_disable(); 850 smp_mb(); /* see radix__flush_tlb_mm */ 851 if (!mm_is_thread_local(mm)) { 852 if (unlikely(mm_is_singlethreaded(mm))) { 853 if (end != TLB_FLUSH_ALL) { 854 exit_flush_lazy_tlbs(mm); 855 goto is_local; 856 } 857 } 858 local = false; 859 full = (end == TLB_FLUSH_ALL || 860 nr_pages > tlb_single_page_flush_ceiling); 861 } else { 862 is_local: 863 local = true; 864 full = (end == TLB_FLUSH_ALL || 865 nr_pages > tlb_local_single_page_flush_ceiling); 866 } 867 868 if (full) { 869 if (local) { 870 _tlbiel_pid(pid, RIC_FLUSH_TLB); 871 } else { 872 if (cputlb_use_tlbie()) { 873 if (mm_needs_flush_escalation(mm)) 874 _tlbie_pid(pid, RIC_FLUSH_ALL); 875 else 876 _tlbie_pid(pid, RIC_FLUSH_TLB); 877 } else { 878 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 879 } 880 } 881 } else { 882 bool hflush = flush_all_sizes; 883 bool gflush = flush_all_sizes; 884 unsigned long hstart, hend; 885 unsigned long gstart, gend; 886 887 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 888 hflush = true; 889 890 if (hflush) { 891 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 892 hend = end & PMD_MASK; 893 if (hstart == hend) 894 hflush = false; 895 } 896 897 if (gflush) { 898 gstart = (start + PUD_SIZE - 1) & PUD_MASK; 899 gend = end & PUD_MASK; 900 if (gstart == gend) 901 gflush = false; 902 } 903 904 if (local) { 905 asm volatile("ptesync": : :"memory"); 906 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 907 if (hflush) 908 __tlbiel_va_range(hstart, hend, pid, 909 PMD_SIZE, MMU_PAGE_2M); 910 if (gflush) 911 __tlbiel_va_range(gstart, gend, pid, 912 PUD_SIZE, MMU_PAGE_1G); 913 asm volatile("ptesync": : :"memory"); 914 } else if (cputlb_use_tlbie()) { 915 asm volatile("ptesync": : :"memory"); 916 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 917 if (hflush) 918 __tlbie_va_range(hstart, hend, pid, 919 PMD_SIZE, MMU_PAGE_2M); 920 if (gflush) 921 __tlbie_va_range(gstart, gend, pid, 922 PUD_SIZE, MMU_PAGE_1G); 923 924 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 925 } else { 926 _tlbiel_va_range_multicast(mm, 927 start, end, pid, page_size, mmu_virtual_psize, false); 928 if (hflush) 929 _tlbiel_va_range_multicast(mm, 930 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); 931 if (gflush) 932 _tlbiel_va_range_multicast(mm, 933 gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false); 934 } 935 } 936 preempt_enable(); 937 } 938 939 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 940 unsigned long end) 941 942 { 943 #ifdef CONFIG_HUGETLB_PAGE 944 if (is_vm_hugetlb_page(vma)) 945 return radix__flush_hugetlb_tlb_range(vma, start, end); 946 #endif 947 948 __radix__flush_tlb_range(vma->vm_mm, start, end, false); 949 } 950 EXPORT_SYMBOL(radix__flush_tlb_range); 951 952 static int radix_get_mmu_psize(int page_size) 953 { 954 int psize; 955 956 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 957 psize = mmu_virtual_psize; 958 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 959 psize = MMU_PAGE_2M; 960 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 961 psize = MMU_PAGE_1G; 962 else 963 return -1; 964 return psize; 965 } 966 967 /* 968 * Flush partition scoped LPID address translation for all CPUs. 969 */ 970 void radix__flush_tlb_lpid_page(unsigned int lpid, 971 unsigned long addr, 972 unsigned long page_size) 973 { 974 int psize = radix_get_mmu_psize(page_size); 975 976 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 977 } 978 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 979 980 /* 981 * Flush partition scoped PWC from LPID for all CPUs. 982 */ 983 void radix__flush_pwc_lpid(unsigned int lpid) 984 { 985 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 986 } 987 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 988 989 /* 990 * Flush partition scoped translations from LPID (=LPIDR) 991 */ 992 void radix__flush_all_lpid(unsigned int lpid) 993 { 994 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 995 } 996 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 997 998 /* 999 * Flush process scoped translations from LPID (=LPIDR) 1000 */ 1001 void radix__flush_all_lpid_guest(unsigned int lpid) 1002 { 1003 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1004 } 1005 1006 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1007 unsigned long end, int psize); 1008 1009 void radix__tlb_flush(struct mmu_gather *tlb) 1010 { 1011 int psize = 0; 1012 struct mm_struct *mm = tlb->mm; 1013 int page_size = tlb->page_size; 1014 unsigned long start = tlb->start; 1015 unsigned long end = tlb->end; 1016 1017 /* 1018 * if page size is not something we understand, do a full mm flush 1019 * 1020 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1021 * that flushes the process table entry cache upon process teardown. 1022 * See the comment for radix in arch_exit_mmap(). 1023 */ 1024 if (tlb->fullmm) { 1025 __flush_all_mm(mm, true); 1026 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) 1027 } else if (mm_tlb_flush_nested(mm)) { 1028 /* 1029 * If there is a concurrent invalidation that is clearing ptes, 1030 * then it's possible this invalidation will miss one of those 1031 * cleared ptes and miss flushing the TLB. If this invalidate 1032 * returns before the other one flushes TLBs, that can result 1033 * in it returning while there are still valid TLBs inside the 1034 * range to be invalidated. 1035 * 1036 * See mm/memory.c:tlb_finish_mmu() for more details. 1037 * 1038 * The solution to this is ensure the entire range is always 1039 * flushed here. The problem for powerpc is that the flushes 1040 * are page size specific, so this "forced flush" would not 1041 * do the right thing if there are a mix of page sizes in 1042 * the range to be invalidated. So use __flush_tlb_range 1043 * which invalidates all possible page sizes in the range. 1044 * 1045 * PWC flush probably is not be required because the core code 1046 * shouldn't free page tables in this path, but accounting 1047 * for the possibility makes us a bit more robust. 1048 * 1049 * need_flush_all is an uncommon case because page table 1050 * teardown should be done with exclusive locks held (but 1051 * after locks are dropped another invalidate could come 1052 * in), it could be optimized further if necessary. 1053 */ 1054 if (!tlb->need_flush_all) 1055 __radix__flush_tlb_range(mm, start, end, true); 1056 else 1057 radix__flush_all_mm(mm); 1058 #endif 1059 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1060 if (!tlb->need_flush_all) 1061 radix__flush_tlb_mm(mm); 1062 else 1063 radix__flush_all_mm(mm); 1064 } else { 1065 if (!tlb->need_flush_all) 1066 radix__flush_tlb_range_psize(mm, start, end, psize); 1067 else 1068 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1069 } 1070 tlb->need_flush_all = 0; 1071 } 1072 1073 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1074 unsigned long start, unsigned long end, 1075 int psize, bool also_pwc) 1076 { 1077 unsigned long pid; 1078 unsigned int page_shift = mmu_psize_defs[psize].shift; 1079 unsigned long page_size = 1UL << page_shift; 1080 unsigned long nr_pages = (end - start) >> page_shift; 1081 bool local, full; 1082 1083 pid = mm->context.id; 1084 if (unlikely(pid == MMU_NO_CONTEXT)) 1085 return; 1086 1087 preempt_disable(); 1088 smp_mb(); /* see radix__flush_tlb_mm */ 1089 if (!mm_is_thread_local(mm)) { 1090 if (unlikely(mm_is_singlethreaded(mm))) { 1091 if (end != TLB_FLUSH_ALL) { 1092 exit_flush_lazy_tlbs(mm); 1093 goto is_local; 1094 } 1095 } 1096 local = false; 1097 full = (end == TLB_FLUSH_ALL || 1098 nr_pages > tlb_single_page_flush_ceiling); 1099 } else { 1100 is_local: 1101 local = true; 1102 full = (end == TLB_FLUSH_ALL || 1103 nr_pages > tlb_local_single_page_flush_ceiling); 1104 } 1105 1106 if (full) { 1107 if (local) { 1108 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1109 } else { 1110 if (cputlb_use_tlbie()) { 1111 if (mm_needs_flush_escalation(mm)) 1112 also_pwc = true; 1113 1114 _tlbie_pid(pid, 1115 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1116 } else { 1117 _tlbiel_pid_multicast(mm, pid, 1118 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1119 } 1120 1121 } 1122 } else { 1123 if (local) 1124 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1125 else if (cputlb_use_tlbie()) 1126 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1127 else 1128 _tlbiel_va_range_multicast(mm, 1129 start, end, pid, page_size, psize, also_pwc); 1130 } 1131 preempt_enable(); 1132 } 1133 1134 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1135 unsigned long end, int psize) 1136 { 1137 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1138 } 1139 1140 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1141 unsigned long end, int psize) 1142 { 1143 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1144 } 1145 1146 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1147 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1148 { 1149 unsigned long pid, end; 1150 1151 pid = mm->context.id; 1152 if (unlikely(pid == MMU_NO_CONTEXT)) 1153 return; 1154 1155 /* 4k page size, just blow the world */ 1156 if (PAGE_SIZE == 0x1000) { 1157 radix__flush_all_mm(mm); 1158 return; 1159 } 1160 1161 end = addr + HPAGE_PMD_SIZE; 1162 1163 /* Otherwise first do the PWC, then iterate the pages. */ 1164 preempt_disable(); 1165 smp_mb(); /* see radix__flush_tlb_mm */ 1166 if (!mm_is_thread_local(mm)) { 1167 if (unlikely(mm_is_singlethreaded(mm))) { 1168 exit_flush_lazy_tlbs(mm); 1169 goto local; 1170 } 1171 if (cputlb_use_tlbie()) 1172 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1173 else 1174 _tlbiel_va_range_multicast(mm, 1175 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1176 } else { 1177 local: 1178 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1179 } 1180 1181 preempt_enable(); 1182 } 1183 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1184 1185 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1186 unsigned long start, unsigned long end) 1187 { 1188 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1189 } 1190 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1191 1192 void radix__flush_tlb_all(void) 1193 { 1194 unsigned long rb,prs,r,rs; 1195 unsigned long ric = RIC_FLUSH_ALL; 1196 1197 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1198 prs = 0; /* partition scoped */ 1199 r = 1; /* radix format */ 1200 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1201 1202 asm volatile("ptesync": : :"memory"); 1203 /* 1204 * now flush guest entries by passing PRS = 1 and LPID != 0 1205 */ 1206 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1207 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1208 /* 1209 * now flush host entires by passing PRS = 0 and LPID == 0 1210 */ 1211 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1212 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1213 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1214 } 1215 1216 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1217 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 1218 { 1219 unsigned long pid = mm->context.id; 1220 1221 if (unlikely(pid == MMU_NO_CONTEXT)) 1222 return; 1223 1224 /* 1225 * If this context hasn't run on that CPU before and KVM is 1226 * around, there's a slim chance that the guest on another 1227 * CPU just brought in obsolete translation into the TLB of 1228 * this CPU due to a bad prefetch using the guest PID on 1229 * the way into the hypervisor. 1230 * 1231 * We work around this here. If KVM is possible, we check if 1232 * any sibling thread is in KVM. If it is, the window may exist 1233 * and thus we flush that PID from the core. 1234 * 1235 * A potential future improvement would be to mark which PIDs 1236 * have never been used on the system and avoid it if the PID 1237 * is new and the process has no other cpumask bit set. 1238 */ 1239 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { 1240 int cpu = smp_processor_id(); 1241 int sib = cpu_first_thread_sibling(cpu); 1242 bool flush = false; 1243 1244 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 1245 if (sib == cpu) 1246 continue; 1247 if (!cpu_possible(sib)) 1248 continue; 1249 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 1250 flush = true; 1251 } 1252 if (flush) 1253 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1254 } 1255 } 1256 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); 1257 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1258