1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 #include <linux/debugfs.h> 14 15 #include <asm/ppc-opcode.h> 16 #include <asm/tlb.h> 17 #include <asm/tlbflush.h> 18 #include <asm/trace.h> 19 #include <asm/cputhreads.h> 20 #include <asm/plpar_wrappers.h> 21 22 #include "internal.h" 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 59 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, 62 RIC_FLUSH_TLB, 0); 63 } 64 } 65 66 /* Flush process scoped entries. */ 67 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 68 69 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 70 for (set = 1; set < num_sets; set++) 71 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 72 } 73 74 ppc_after_tlbiel_barrier(); 75 } 76 77 void radix__tlbiel_all(unsigned int action) 78 { 79 unsigned int is; 80 81 switch (action) { 82 case TLB_INVAL_SCOPE_GLOBAL: 83 is = 3; 84 break; 85 case TLB_INVAL_SCOPE_LPID: 86 is = 2; 87 break; 88 default: 89 BUG(); 90 } 91 92 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 93 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 94 else 95 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 96 97 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 98 } 99 100 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 101 unsigned long ric) 102 { 103 unsigned long rb,rs,prs,r; 104 105 rb = PPC_BIT(53); /* IS = 1 */ 106 rb |= set << PPC_BITLSHIFT(51); 107 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 108 prs = 1; /* process scoped */ 109 r = 1; /* radix format */ 110 111 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 112 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 113 trace_tlbie(0, 1, rb, rs, ric, prs, r); 114 } 115 116 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 117 { 118 unsigned long rb,rs,prs,r; 119 120 rb = PPC_BIT(53); /* IS = 1 */ 121 rs = pid << PPC_BITLSHIFT(31); 122 prs = 1; /* process scoped */ 123 r = 1; /* radix format */ 124 125 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 126 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 127 trace_tlbie(0, 0, rb, rs, ric, prs, r); 128 } 129 130 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 131 { 132 unsigned long rb,rs,prs,r; 133 134 rb = PPC_BIT(52); /* IS = 2 */ 135 rs = lpid; 136 prs = 0; /* partition scoped */ 137 r = 1; /* radix format */ 138 139 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 140 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 141 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 142 } 143 144 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 145 { 146 unsigned long rb,rs,prs,r; 147 148 rb = PPC_BIT(52); /* IS = 2 */ 149 rs = lpid; 150 prs = 1; /* process scoped */ 151 r = 1; /* radix format */ 152 153 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 154 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 155 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 156 } 157 158 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 159 unsigned long ap, unsigned long ric) 160 { 161 unsigned long rb,rs,prs,r; 162 163 rb = va & ~(PPC_BITMASK(52, 63)); 164 rb |= ap << PPC_BITLSHIFT(58); 165 rs = pid << PPC_BITLSHIFT(31); 166 prs = 1; /* process scoped */ 167 r = 1; /* radix format */ 168 169 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 170 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 171 trace_tlbie(0, 1, rb, rs, ric, prs, r); 172 } 173 174 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 175 unsigned long ap, unsigned long ric) 176 { 177 unsigned long rb,rs,prs,r; 178 179 rb = va & ~(PPC_BITMASK(52, 63)); 180 rb |= ap << PPC_BITLSHIFT(58); 181 rs = pid << PPC_BITLSHIFT(31); 182 prs = 1; /* process scoped */ 183 r = 1; /* radix format */ 184 185 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 186 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 187 trace_tlbie(0, 0, rb, rs, ric, prs, r); 188 } 189 190 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 191 unsigned long ap, unsigned long ric) 192 { 193 unsigned long rb,rs,prs,r; 194 195 rb = va & ~(PPC_BITMASK(52, 63)); 196 rb |= ap << PPC_BITLSHIFT(58); 197 rs = lpid; 198 prs = 0; /* partition scoped */ 199 r = 1; /* radix format */ 200 201 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 202 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 203 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 204 } 205 206 207 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 208 unsigned long ap) 209 { 210 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 211 asm volatile("ptesync": : :"memory"); 212 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 213 } 214 215 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 216 asm volatile("ptesync": : :"memory"); 217 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 218 } 219 } 220 221 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 222 unsigned long ap) 223 { 224 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 225 asm volatile("ptesync": : :"memory"); 226 __tlbie_pid(0, RIC_FLUSH_TLB); 227 } 228 229 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 230 asm volatile("ptesync": : :"memory"); 231 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 232 } 233 } 234 235 static inline void fixup_tlbie_pid(unsigned long pid) 236 { 237 /* 238 * We can use any address for the invalidation, pick one which is 239 * probably unused as an optimisation. 240 */ 241 unsigned long va = ((1UL << 52) - 1); 242 243 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 244 asm volatile("ptesync": : :"memory"); 245 __tlbie_pid(0, RIC_FLUSH_TLB); 246 } 247 248 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 249 asm volatile("ptesync": : :"memory"); 250 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 251 } 252 } 253 254 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 255 unsigned long ap) 256 { 257 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 258 asm volatile("ptesync": : :"memory"); 259 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 260 } 261 262 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 263 asm volatile("ptesync": : :"memory"); 264 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 265 } 266 } 267 268 static inline void fixup_tlbie_lpid(unsigned long lpid) 269 { 270 /* 271 * We can use any address for the invalidation, pick one which is 272 * probably unused as an optimisation. 273 */ 274 unsigned long va = ((1UL << 52) - 1); 275 276 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 277 asm volatile("ptesync": : :"memory"); 278 __tlbie_lpid(0, RIC_FLUSH_TLB); 279 } 280 281 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 282 asm volatile("ptesync": : :"memory"); 283 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 284 } 285 } 286 287 /* 288 * We use 128 set in radix mode and 256 set in hpt mode. 289 */ 290 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 291 { 292 int set; 293 294 asm volatile("ptesync": : :"memory"); 295 296 switch (ric) { 297 case RIC_FLUSH_PWC: 298 299 /* For PWC, only one flush is needed */ 300 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 301 ppc_after_tlbiel_barrier(); 302 return; 303 case RIC_FLUSH_TLB: 304 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 305 break; 306 case RIC_FLUSH_ALL: 307 default: 308 /* 309 * Flush the first set of the TLB, and if 310 * we're doing a RIC_FLUSH_ALL, also flush 311 * the entire Page Walk Cache. 312 */ 313 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 314 } 315 316 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 317 /* For the remaining sets, just flush the TLB */ 318 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 319 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 320 } 321 322 ppc_after_tlbiel_barrier(); 323 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 324 } 325 326 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 327 { 328 asm volatile("ptesync": : :"memory"); 329 330 /* 331 * Workaround the fact that the "ric" argument to __tlbie_pid 332 * must be a compile-time constraint to match the "i" constraint 333 * in the asm statement. 334 */ 335 switch (ric) { 336 case RIC_FLUSH_TLB: 337 __tlbie_pid(pid, RIC_FLUSH_TLB); 338 fixup_tlbie_pid(pid); 339 break; 340 case RIC_FLUSH_PWC: 341 __tlbie_pid(pid, RIC_FLUSH_PWC); 342 break; 343 case RIC_FLUSH_ALL: 344 default: 345 __tlbie_pid(pid, RIC_FLUSH_ALL); 346 fixup_tlbie_pid(pid); 347 } 348 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 349 } 350 351 struct tlbiel_pid { 352 unsigned long pid; 353 unsigned long ric; 354 }; 355 356 static void do_tlbiel_pid(void *info) 357 { 358 struct tlbiel_pid *t = info; 359 360 if (t->ric == RIC_FLUSH_TLB) 361 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 362 else if (t->ric == RIC_FLUSH_PWC) 363 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 364 else 365 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 366 } 367 368 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 369 unsigned long pid, unsigned long ric) 370 { 371 struct cpumask *cpus = mm_cpumask(mm); 372 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 373 374 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 375 /* 376 * Always want the CPU translations to be invalidated with tlbiel in 377 * these paths, so while coprocessors must use tlbie, we can not 378 * optimise away the tlbiel component. 379 */ 380 if (atomic_read(&mm->context.copros) > 0) 381 _tlbie_pid(pid, RIC_FLUSH_ALL); 382 } 383 384 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 385 { 386 asm volatile("ptesync": : :"memory"); 387 388 /* 389 * Workaround the fact that the "ric" argument to __tlbie_pid 390 * must be a compile-time contraint to match the "i" constraint 391 * in the asm statement. 392 */ 393 switch (ric) { 394 case RIC_FLUSH_TLB: 395 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 396 fixup_tlbie_lpid(lpid); 397 break; 398 case RIC_FLUSH_PWC: 399 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 400 break; 401 case RIC_FLUSH_ALL: 402 default: 403 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 404 fixup_tlbie_lpid(lpid); 405 } 406 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 407 } 408 409 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 410 { 411 /* 412 * Workaround the fact that the "ric" argument to __tlbie_pid 413 * must be a compile-time contraint to match the "i" constraint 414 * in the asm statement. 415 */ 416 switch (ric) { 417 case RIC_FLUSH_TLB: 418 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 419 break; 420 case RIC_FLUSH_PWC: 421 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 422 break; 423 case RIC_FLUSH_ALL: 424 default: 425 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 426 } 427 fixup_tlbie_lpid(lpid); 428 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 429 } 430 431 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 432 unsigned long pid, unsigned long page_size, 433 unsigned long psize) 434 { 435 unsigned long addr; 436 unsigned long ap = mmu_get_ap(psize); 437 438 for (addr = start; addr < end; addr += page_size) 439 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 440 } 441 442 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 443 unsigned long psize, unsigned long ric) 444 { 445 unsigned long ap = mmu_get_ap(psize); 446 447 asm volatile("ptesync": : :"memory"); 448 __tlbiel_va(va, pid, ap, ric); 449 ppc_after_tlbiel_barrier(); 450 } 451 452 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 453 unsigned long pid, unsigned long page_size, 454 unsigned long psize, bool also_pwc) 455 { 456 asm volatile("ptesync": : :"memory"); 457 if (also_pwc) 458 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 459 __tlbiel_va_range(start, end, pid, page_size, psize); 460 ppc_after_tlbiel_barrier(); 461 } 462 463 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 464 unsigned long pid, unsigned long page_size, 465 unsigned long psize) 466 { 467 unsigned long addr; 468 unsigned long ap = mmu_get_ap(psize); 469 470 for (addr = start; addr < end; addr += page_size) 471 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 472 473 fixup_tlbie_va_range(addr - page_size, pid, ap); 474 } 475 476 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 477 unsigned long psize, unsigned long ric) 478 { 479 unsigned long ap = mmu_get_ap(psize); 480 481 asm volatile("ptesync": : :"memory"); 482 __tlbie_va(va, pid, ap, ric); 483 fixup_tlbie_va(va, pid, ap); 484 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 485 } 486 487 struct tlbiel_va { 488 unsigned long pid; 489 unsigned long va; 490 unsigned long psize; 491 unsigned long ric; 492 }; 493 494 static void do_tlbiel_va(void *info) 495 { 496 struct tlbiel_va *t = info; 497 498 if (t->ric == RIC_FLUSH_TLB) 499 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 500 else if (t->ric == RIC_FLUSH_PWC) 501 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 502 else 503 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 504 } 505 506 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 507 unsigned long va, unsigned long pid, 508 unsigned long psize, unsigned long ric) 509 { 510 struct cpumask *cpus = mm_cpumask(mm); 511 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 512 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 513 if (atomic_read(&mm->context.copros) > 0) 514 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 515 } 516 517 struct tlbiel_va_range { 518 unsigned long pid; 519 unsigned long start; 520 unsigned long end; 521 unsigned long page_size; 522 unsigned long psize; 523 bool also_pwc; 524 }; 525 526 static void do_tlbiel_va_range(void *info) 527 { 528 struct tlbiel_va_range *t = info; 529 530 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 531 t->psize, t->also_pwc); 532 } 533 534 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 535 unsigned long psize, unsigned long ric) 536 { 537 unsigned long ap = mmu_get_ap(psize); 538 539 asm volatile("ptesync": : :"memory"); 540 __tlbie_lpid_va(va, lpid, ap, ric); 541 fixup_tlbie_lpid_va(va, lpid, ap); 542 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 543 } 544 545 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 546 unsigned long pid, unsigned long page_size, 547 unsigned long psize, bool also_pwc) 548 { 549 asm volatile("ptesync": : :"memory"); 550 if (also_pwc) 551 __tlbie_pid(pid, RIC_FLUSH_PWC); 552 __tlbie_va_range(start, end, pid, page_size, psize); 553 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 554 } 555 556 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 557 unsigned long start, unsigned long end, 558 unsigned long pid, unsigned long page_size, 559 unsigned long psize, bool also_pwc) 560 { 561 struct cpumask *cpus = mm_cpumask(mm); 562 struct tlbiel_va_range t = { .start = start, .end = end, 563 .pid = pid, .page_size = page_size, 564 .psize = psize, .also_pwc = also_pwc }; 565 566 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 567 if (atomic_read(&mm->context.copros) > 0) 568 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 569 } 570 571 /* 572 * Base TLB flushing operations: 573 * 574 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 575 * - flush_tlb_page(vma, vmaddr) flushes one page 576 * - flush_tlb_range(vma, start, end) flushes a range of pages 577 * - flush_tlb_kernel_range(start, end) flushes kernel pages 578 * 579 * - local_* variants of page and mm only apply to the current 580 * processor 581 */ 582 void radix__local_flush_tlb_mm(struct mm_struct *mm) 583 { 584 unsigned long pid = mm->context.id; 585 586 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 587 return; 588 589 preempt_disable(); 590 _tlbiel_pid(pid, RIC_FLUSH_TLB); 591 preempt_enable(); 592 } 593 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 594 595 #ifndef CONFIG_SMP 596 void radix__local_flush_all_mm(struct mm_struct *mm) 597 { 598 unsigned long pid = mm->context.id; 599 600 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 601 return; 602 603 preempt_disable(); 604 _tlbiel_pid(pid, RIC_FLUSH_ALL); 605 preempt_enable(); 606 } 607 EXPORT_SYMBOL(radix__local_flush_all_mm); 608 609 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 610 { 611 radix__local_flush_all_mm(mm); 612 } 613 #endif /* CONFIG_SMP */ 614 615 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 616 int psize) 617 { 618 unsigned long pid = mm->context.id; 619 620 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 621 return; 622 623 preempt_disable(); 624 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 625 preempt_enable(); 626 } 627 628 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 629 { 630 #ifdef CONFIG_HUGETLB_PAGE 631 /* need the return fix for nohash.c */ 632 if (is_vm_hugetlb_page(vma)) 633 return radix__local_flush_hugetlb_page(vma, vmaddr); 634 #endif 635 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 636 } 637 EXPORT_SYMBOL(radix__local_flush_tlb_page); 638 639 static bool mm_needs_flush_escalation(struct mm_struct *mm) 640 { 641 /* 642 * The P9 nest MMU has issues with the page walk cache caching PTEs 643 * and not flushing them when RIC = 0 for a PID/LPID invalidate. 644 * 645 * This may have been fixed in shipping firmware (by disabling PWC 646 * or preventing it from caching PTEs), but until that is confirmed, 647 * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes 648 * to RIC=2. 649 * 650 * POWER10 (and P9P) does not have this problem. 651 */ 652 if (cpu_has_feature(CPU_FTR_ARCH_31)) 653 return false; 654 if (atomic_read(&mm->context.copros) > 0) 655 return true; 656 return false; 657 } 658 659 /* 660 * If always_flush is true, then flush even if this CPU can't be removed 661 * from mm_cpumask. 662 */ 663 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 664 { 665 unsigned long pid = mm->context.id; 666 int cpu = smp_processor_id(); 667 668 /* 669 * A kthread could have done a mmget_not_zero() after the flushing CPU 670 * checked mm_cpumask, and be in the process of kthread_use_mm when 671 * interrupted here. In that case, current->mm will be set to mm, 672 * because kthread_use_mm() setting ->mm and switching to the mm is 673 * done with interrupts off. 674 */ 675 if (current->mm == mm) 676 goto out; 677 678 if (current->active_mm == mm) { 679 unsigned long flags; 680 681 WARN_ON_ONCE(current->mm != NULL); 682 /* 683 * It is a kernel thread and is using mm as the lazy tlb, so 684 * switch it to init_mm. This is not always called from IPI 685 * (e.g., flush_type_needed), so must disable irqs. 686 */ 687 local_irq_save(flags); 688 mmgrab_lazy_tlb(&init_mm); 689 current->active_mm = &init_mm; 690 switch_mm_irqs_off(mm, &init_mm, current); 691 mmdrop_lazy_tlb(mm); 692 local_irq_restore(flags); 693 } 694 695 /* 696 * This IPI may be initiated from any source including those not 697 * running the mm, so there may be a racing IPI that comes after 698 * this one which finds the cpumask already clear. Check and avoid 699 * underflowing the active_cpus count in that case. The race should 700 * not otherwise be a problem, but the TLB must be flushed because 701 * that's what the caller expects. 702 */ 703 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 704 dec_mm_active_cpus(mm); 705 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 706 always_flush = true; 707 } 708 709 out: 710 if (always_flush) 711 _tlbiel_pid(pid, RIC_FLUSH_ALL); 712 } 713 714 #ifdef CONFIG_SMP 715 static void do_exit_flush_lazy_tlb(void *arg) 716 { 717 struct mm_struct *mm = arg; 718 exit_lazy_flush_tlb(mm, true); 719 } 720 721 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 722 { 723 /* 724 * Would be nice if this was async so it could be run in 725 * parallel with our local flush, but generic code does not 726 * give a good API for it. Could extend the generic code or 727 * make a special powerpc IPI for flushing TLBs. 728 * For now it's not too performance critical. 729 */ 730 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 731 (void *)mm, 1); 732 } 733 734 #else /* CONFIG_SMP */ 735 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 736 #endif /* CONFIG_SMP */ 737 738 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 739 740 /* 741 * Interval between flushes at which we send out IPIs to check whether the 742 * mm_cpumask can be trimmed for the case where it's not a single-threaded 743 * process flushing its own mm. The intent is to reduce the cost of later 744 * flushes. Don't want this to be so low that it adds noticable cost to TLB 745 * flushing, or so high that it doesn't help reduce global TLBIEs. 746 */ 747 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 748 749 static bool tick_and_test_trim_clock(void) 750 { 751 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 752 tlb_mm_cpumask_trim_timer) { 753 __this_cpu_write(mm_cpumask_trim_clock, 0); 754 return true; 755 } 756 return false; 757 } 758 759 enum tlb_flush_type { 760 FLUSH_TYPE_NONE, 761 FLUSH_TYPE_LOCAL, 762 FLUSH_TYPE_GLOBAL, 763 }; 764 765 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 766 { 767 int active_cpus = atomic_read(&mm->context.active_cpus); 768 int cpu = smp_processor_id(); 769 770 if (active_cpus == 0) 771 return FLUSH_TYPE_NONE; 772 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 773 if (current->mm != mm) { 774 /* 775 * Asynchronous flush sources may trim down to nothing 776 * if the process is not running, so occasionally try 777 * to trim. 778 */ 779 if (tick_and_test_trim_clock()) { 780 exit_lazy_flush_tlb(mm, true); 781 return FLUSH_TYPE_NONE; 782 } 783 } 784 return FLUSH_TYPE_LOCAL; 785 } 786 787 /* Coprocessors require TLBIE to invalidate nMMU. */ 788 if (atomic_read(&mm->context.copros) > 0) 789 return FLUSH_TYPE_GLOBAL; 790 791 /* 792 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 793 * because the mm is being taken down anyway, and a TLBIE tends to 794 * be faster than an IPI+TLBIEL. 795 */ 796 if (fullmm) 797 return FLUSH_TYPE_GLOBAL; 798 799 /* 800 * If we are running the only thread of a single-threaded process, 801 * then we should almost always be able to trim off the rest of the 802 * CPU mask (except in the case of use_mm() races), so always try 803 * trimming the mask. 804 */ 805 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 806 exit_flush_lazy_tlbs(mm); 807 /* 808 * use_mm() race could prevent IPIs from being able to clear 809 * the cpumask here, however those users are established 810 * after our first check (and so after the PTEs are removed), 811 * and the TLB still gets flushed by the IPI, so this CPU 812 * will only require a local flush. 813 */ 814 return FLUSH_TYPE_LOCAL; 815 } 816 817 /* 818 * Occasionally try to trim down the cpumask. It's possible this can 819 * bring the mask to zero, which results in no flush. 820 */ 821 if (tick_and_test_trim_clock()) { 822 exit_flush_lazy_tlbs(mm); 823 if (current->mm == mm) 824 return FLUSH_TYPE_LOCAL; 825 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 826 exit_lazy_flush_tlb(mm, true); 827 return FLUSH_TYPE_NONE; 828 } 829 830 return FLUSH_TYPE_GLOBAL; 831 } 832 833 #ifdef CONFIG_SMP 834 void radix__flush_tlb_mm(struct mm_struct *mm) 835 { 836 unsigned long pid; 837 enum tlb_flush_type type; 838 839 pid = mm->context.id; 840 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 841 return; 842 843 preempt_disable(); 844 /* 845 * Order loads of mm_cpumask (in flush_type_needed) vs previous 846 * stores to clear ptes before the invalidate. See barrier in 847 * switch_mm_irqs_off 848 */ 849 smp_mb(); 850 type = flush_type_needed(mm, false); 851 if (type == FLUSH_TYPE_LOCAL) { 852 _tlbiel_pid(pid, RIC_FLUSH_TLB); 853 } else if (type == FLUSH_TYPE_GLOBAL) { 854 if (!mmu_has_feature(MMU_FTR_GTSE)) { 855 unsigned long tgt = H_RPTI_TARGET_CMMU; 856 857 if (atomic_read(&mm->context.copros) > 0) 858 tgt |= H_RPTI_TARGET_NMMU; 859 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 860 H_RPTI_PAGE_ALL, 0, -1UL); 861 } else if (cputlb_use_tlbie()) { 862 if (mm_needs_flush_escalation(mm)) 863 _tlbie_pid(pid, RIC_FLUSH_ALL); 864 else 865 _tlbie_pid(pid, RIC_FLUSH_TLB); 866 } else { 867 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 868 } 869 } 870 preempt_enable(); 871 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 872 } 873 EXPORT_SYMBOL(radix__flush_tlb_mm); 874 875 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 876 { 877 unsigned long pid; 878 enum tlb_flush_type type; 879 880 pid = mm->context.id; 881 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 882 return; 883 884 preempt_disable(); 885 smp_mb(); /* see radix__flush_tlb_mm */ 886 type = flush_type_needed(mm, fullmm); 887 if (type == FLUSH_TYPE_LOCAL) { 888 _tlbiel_pid(pid, RIC_FLUSH_ALL); 889 } else if (type == FLUSH_TYPE_GLOBAL) { 890 if (!mmu_has_feature(MMU_FTR_GTSE)) { 891 unsigned long tgt = H_RPTI_TARGET_CMMU; 892 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 893 H_RPTI_TYPE_PRT; 894 895 if (atomic_read(&mm->context.copros) > 0) 896 tgt |= H_RPTI_TARGET_NMMU; 897 pseries_rpt_invalidate(pid, tgt, type, 898 H_RPTI_PAGE_ALL, 0, -1UL); 899 } else if (cputlb_use_tlbie()) 900 _tlbie_pid(pid, RIC_FLUSH_ALL); 901 else 902 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 903 } 904 preempt_enable(); 905 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 906 } 907 908 void radix__flush_all_mm(struct mm_struct *mm) 909 { 910 __flush_all_mm(mm, false); 911 } 912 EXPORT_SYMBOL(radix__flush_all_mm); 913 914 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 915 int psize) 916 { 917 unsigned long pid; 918 enum tlb_flush_type type; 919 920 pid = mm->context.id; 921 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 922 return; 923 924 preempt_disable(); 925 smp_mb(); /* see radix__flush_tlb_mm */ 926 type = flush_type_needed(mm, false); 927 if (type == FLUSH_TYPE_LOCAL) { 928 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 929 } else if (type == FLUSH_TYPE_GLOBAL) { 930 if (!mmu_has_feature(MMU_FTR_GTSE)) { 931 unsigned long tgt, pg_sizes, size; 932 933 tgt = H_RPTI_TARGET_CMMU; 934 pg_sizes = psize_to_rpti_pgsize(psize); 935 size = 1UL << mmu_psize_to_shift(psize); 936 937 if (atomic_read(&mm->context.copros) > 0) 938 tgt |= H_RPTI_TARGET_NMMU; 939 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 940 pg_sizes, vmaddr, 941 vmaddr + size); 942 } else if (cputlb_use_tlbie()) 943 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 944 else 945 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 946 } 947 preempt_enable(); 948 } 949 950 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 951 { 952 #ifdef CONFIG_HUGETLB_PAGE 953 if (is_vm_hugetlb_page(vma)) 954 return radix__flush_hugetlb_page(vma, vmaddr); 955 #endif 956 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 957 } 958 EXPORT_SYMBOL(radix__flush_tlb_page); 959 960 #endif /* CONFIG_SMP */ 961 962 static void do_tlbiel_kernel(void *info) 963 { 964 _tlbiel_pid(0, RIC_FLUSH_ALL); 965 } 966 967 static inline void _tlbiel_kernel_broadcast(void) 968 { 969 on_each_cpu(do_tlbiel_kernel, NULL, 1); 970 if (tlbie_capable) { 971 /* 972 * Coherent accelerators don't refcount kernel memory mappings, 973 * so have to always issue a tlbie for them. This is quite a 974 * slow path anyway. 975 */ 976 _tlbie_pid(0, RIC_FLUSH_ALL); 977 } 978 } 979 980 /* 981 * If kernel TLBIs ever become local rather than global, then 982 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 983 * assumes kernel TLBIs are global. 984 */ 985 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 986 { 987 if (!mmu_has_feature(MMU_FTR_GTSE)) { 988 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 989 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 990 H_RPTI_TYPE_PRT; 991 992 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 993 start, end); 994 } else if (cputlb_use_tlbie()) 995 _tlbie_pid(0, RIC_FLUSH_ALL); 996 else 997 _tlbiel_kernel_broadcast(); 998 } 999 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1000 1001 /* 1002 * Doesn't appear to be used anywhere. Remove. 1003 */ 1004 #define TLB_FLUSH_ALL -1UL 1005 1006 /* 1007 * Number of pages above which we invalidate the entire PID rather than 1008 * flush individual pages, for local and global flushes respectively. 1009 * 1010 * tlbie goes out to the interconnect and individual ops are more costly. 1011 * It also does not iterate over sets like the local tlbiel variant when 1012 * invalidating a full PID, so it has a far lower threshold to change from 1013 * individual page flushes to full-pid flushes. 1014 */ 1015 static u32 tlb_single_page_flush_ceiling __read_mostly = 33; 1016 static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1017 1018 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1019 unsigned long start, unsigned long end) 1020 { 1021 unsigned long pid; 1022 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1023 unsigned long page_size = 1UL << page_shift; 1024 unsigned long nr_pages = (end - start) >> page_shift; 1025 bool flush_pid, flush_pwc = false; 1026 enum tlb_flush_type type; 1027 1028 pid = mm->context.id; 1029 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1030 return; 1031 1032 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1033 1034 preempt_disable(); 1035 smp_mb(); /* see radix__flush_tlb_mm */ 1036 type = flush_type_needed(mm, false); 1037 if (type == FLUSH_TYPE_NONE) 1038 goto out; 1039 1040 if (type == FLUSH_TYPE_GLOBAL) 1041 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1042 else 1043 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1044 /* 1045 * full pid flush already does the PWC flush. if it is not full pid 1046 * flush check the range is more than PMD and force a pwc flush 1047 * mremap() depends on this behaviour. 1048 */ 1049 if (!flush_pid && (end - start) >= PMD_SIZE) 1050 flush_pwc = true; 1051 1052 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1053 unsigned long type = H_RPTI_TYPE_TLB; 1054 unsigned long tgt = H_RPTI_TARGET_CMMU; 1055 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1056 1057 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1058 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1059 if (atomic_read(&mm->context.copros) > 0) 1060 tgt |= H_RPTI_TARGET_NMMU; 1061 if (flush_pwc) 1062 type |= H_RPTI_TYPE_PWC; 1063 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1064 } else if (flush_pid) { 1065 /* 1066 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL 1067 */ 1068 if (type == FLUSH_TYPE_LOCAL) { 1069 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1070 } else { 1071 if (cputlb_use_tlbie()) { 1072 _tlbie_pid(pid, RIC_FLUSH_ALL); 1073 } else { 1074 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1075 } 1076 } 1077 } else { 1078 bool hflush; 1079 unsigned long hstart, hend; 1080 1081 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1082 hend = end & PMD_MASK; 1083 hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend; 1084 1085 if (type == FLUSH_TYPE_LOCAL) { 1086 asm volatile("ptesync": : :"memory"); 1087 if (flush_pwc) 1088 /* For PWC, only one flush is needed */ 1089 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 1090 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1091 if (hflush) 1092 __tlbiel_va_range(hstart, hend, pid, 1093 PMD_SIZE, MMU_PAGE_2M); 1094 ppc_after_tlbiel_barrier(); 1095 } else if (cputlb_use_tlbie()) { 1096 asm volatile("ptesync": : :"memory"); 1097 if (flush_pwc) 1098 __tlbie_pid(pid, RIC_FLUSH_PWC); 1099 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1100 if (hflush) 1101 __tlbie_va_range(hstart, hend, pid, 1102 PMD_SIZE, MMU_PAGE_2M); 1103 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1104 } else { 1105 _tlbiel_va_range_multicast(mm, 1106 start, end, pid, page_size, mmu_virtual_psize, flush_pwc); 1107 if (hflush) 1108 _tlbiel_va_range_multicast(mm, 1109 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc); 1110 } 1111 } 1112 out: 1113 preempt_enable(); 1114 mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); 1115 } 1116 1117 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1118 unsigned long end) 1119 1120 { 1121 #ifdef CONFIG_HUGETLB_PAGE 1122 if (is_vm_hugetlb_page(vma)) 1123 return radix__flush_hugetlb_tlb_range(vma, start, end); 1124 #endif 1125 1126 __radix__flush_tlb_range(vma->vm_mm, start, end); 1127 } 1128 EXPORT_SYMBOL(radix__flush_tlb_range); 1129 1130 static int radix_get_mmu_psize(int page_size) 1131 { 1132 int psize; 1133 1134 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1135 psize = mmu_virtual_psize; 1136 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1137 psize = MMU_PAGE_2M; 1138 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1139 psize = MMU_PAGE_1G; 1140 else 1141 return -1; 1142 return psize; 1143 } 1144 1145 /* 1146 * Flush partition scoped LPID address translation for all CPUs. 1147 */ 1148 void radix__flush_tlb_lpid_page(unsigned int lpid, 1149 unsigned long addr, 1150 unsigned long page_size) 1151 { 1152 int psize = radix_get_mmu_psize(page_size); 1153 1154 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1155 } 1156 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1157 1158 /* 1159 * Flush partition scoped PWC from LPID for all CPUs. 1160 */ 1161 void radix__flush_pwc_lpid(unsigned int lpid) 1162 { 1163 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1164 } 1165 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1166 1167 /* 1168 * Flush partition scoped translations from LPID (=LPIDR) 1169 */ 1170 void radix__flush_all_lpid(unsigned int lpid) 1171 { 1172 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1173 } 1174 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1175 1176 /* 1177 * Flush process scoped translations from LPID (=LPIDR) 1178 */ 1179 void radix__flush_all_lpid_guest(unsigned int lpid) 1180 { 1181 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1182 } 1183 1184 void radix__tlb_flush(struct mmu_gather *tlb) 1185 { 1186 int psize = 0; 1187 struct mm_struct *mm = tlb->mm; 1188 int page_size = tlb->page_size; 1189 unsigned long start = tlb->start; 1190 unsigned long end = tlb->end; 1191 1192 /* 1193 * if page size is not something we understand, do a full mm flush 1194 * 1195 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1196 * that flushes the process table entry cache upon process teardown. 1197 * See the comment for radix in arch_exit_mmap(). 1198 */ 1199 if (tlb->fullmm) { 1200 if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) { 1201 /* 1202 * Shootdown based lazy tlb mm refcounting means we 1203 * have to IPI everyone in the mm_cpumask anyway soon 1204 * when the mm goes away, so might as well do it as 1205 * part of the final flush now. 1206 * 1207 * If lazy shootdown was improved to reduce IPIs (e.g., 1208 * by batching), then it may end up being better to use 1209 * tlbies here instead. 1210 */ 1211 preempt_disable(); 1212 1213 smp_mb(); /* see radix__flush_tlb_mm */ 1214 exit_flush_lazy_tlbs(mm); 1215 __flush_all_mm(mm, true); 1216 1217 preempt_enable(); 1218 } else { 1219 __flush_all_mm(mm, true); 1220 } 1221 1222 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1223 if (!tlb->freed_tables) 1224 radix__flush_tlb_mm(mm); 1225 else 1226 radix__flush_all_mm(mm); 1227 } else { 1228 if (!tlb->freed_tables) 1229 radix__flush_tlb_range_psize(mm, start, end, psize); 1230 else 1231 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1232 } 1233 } 1234 1235 static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1236 unsigned long start, unsigned long end, 1237 int psize, bool also_pwc) 1238 { 1239 unsigned long pid; 1240 unsigned int page_shift = mmu_psize_defs[psize].shift; 1241 unsigned long page_size = 1UL << page_shift; 1242 unsigned long nr_pages = (end - start) >> page_shift; 1243 bool flush_pid; 1244 enum tlb_flush_type type; 1245 1246 pid = mm->context.id; 1247 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1248 return; 1249 1250 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1251 1252 preempt_disable(); 1253 smp_mb(); /* see radix__flush_tlb_mm */ 1254 type = flush_type_needed(mm, false); 1255 if (type == FLUSH_TYPE_NONE) 1256 goto out; 1257 1258 if (type == FLUSH_TYPE_GLOBAL) 1259 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1260 else 1261 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1262 1263 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1264 unsigned long tgt = H_RPTI_TARGET_CMMU; 1265 unsigned long type = H_RPTI_TYPE_TLB; 1266 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1267 1268 if (also_pwc) 1269 type |= H_RPTI_TYPE_PWC; 1270 if (atomic_read(&mm->context.copros) > 0) 1271 tgt |= H_RPTI_TARGET_NMMU; 1272 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1273 } else if (flush_pid) { 1274 if (type == FLUSH_TYPE_LOCAL) { 1275 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1276 } else { 1277 if (cputlb_use_tlbie()) { 1278 if (mm_needs_flush_escalation(mm)) 1279 also_pwc = true; 1280 1281 _tlbie_pid(pid, 1282 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1283 } else { 1284 _tlbiel_pid_multicast(mm, pid, 1285 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1286 } 1287 1288 } 1289 } else { 1290 if (type == FLUSH_TYPE_LOCAL) 1291 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1292 else if (cputlb_use_tlbie()) 1293 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1294 else 1295 _tlbiel_va_range_multicast(mm, 1296 start, end, pid, page_size, psize, also_pwc); 1297 } 1298 out: 1299 preempt_enable(); 1300 mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); 1301 } 1302 1303 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1304 unsigned long end, int psize) 1305 { 1306 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1307 } 1308 1309 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1310 unsigned long end, int psize) 1311 { 1312 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1313 } 1314 1315 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1316 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1317 { 1318 unsigned long pid, end; 1319 enum tlb_flush_type type; 1320 1321 pid = mm->context.id; 1322 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1323 return; 1324 1325 /* 4k page size, just blow the world */ 1326 if (PAGE_SIZE == 0x1000) { 1327 radix__flush_all_mm(mm); 1328 return; 1329 } 1330 1331 end = addr + HPAGE_PMD_SIZE; 1332 1333 /* Otherwise first do the PWC, then iterate the pages. */ 1334 preempt_disable(); 1335 smp_mb(); /* see radix__flush_tlb_mm */ 1336 type = flush_type_needed(mm, false); 1337 if (type == FLUSH_TYPE_LOCAL) { 1338 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1339 } else if (type == FLUSH_TYPE_GLOBAL) { 1340 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1341 unsigned long tgt, type, pg_sizes; 1342 1343 tgt = H_RPTI_TARGET_CMMU; 1344 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1345 H_RPTI_TYPE_PRT; 1346 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1347 1348 if (atomic_read(&mm->context.copros) > 0) 1349 tgt |= H_RPTI_TARGET_NMMU; 1350 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1351 addr, end); 1352 } else if (cputlb_use_tlbie()) 1353 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1354 else 1355 _tlbiel_va_range_multicast(mm, 1356 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1357 } 1358 1359 preempt_enable(); 1360 } 1361 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1362 1363 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1364 unsigned long start, unsigned long end) 1365 { 1366 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1367 } 1368 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1369 1370 void radix__flush_pud_tlb_range(struct vm_area_struct *vma, 1371 unsigned long start, unsigned long end) 1372 { 1373 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G); 1374 } 1375 EXPORT_SYMBOL(radix__flush_pud_tlb_range); 1376 1377 void radix__flush_tlb_all(void) 1378 { 1379 unsigned long rb,prs,r,rs; 1380 unsigned long ric = RIC_FLUSH_ALL; 1381 1382 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1383 prs = 0; /* partition scoped */ 1384 r = 1; /* radix format */ 1385 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1386 1387 asm volatile("ptesync": : :"memory"); 1388 /* 1389 * now flush guest entries by passing PRS = 1 and LPID != 0 1390 */ 1391 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1392 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1393 /* 1394 * now flush host entires by passing PRS = 0 and LPID == 0 1395 */ 1396 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1397 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1398 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1399 } 1400 1401 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1402 static __always_inline void __tlbie_pid_lpid(unsigned long pid, 1403 unsigned long lpid, 1404 unsigned long ric) 1405 { 1406 unsigned long rb, rs, prs, r; 1407 1408 rb = PPC_BIT(53); /* IS = 1 */ 1409 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 1410 prs = 1; /* process scoped */ 1411 r = 1; /* radix format */ 1412 1413 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1414 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 1415 trace_tlbie(0, 0, rb, rs, ric, prs, r); 1416 } 1417 1418 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 1419 unsigned long lpid, 1420 unsigned long ap, unsigned long ric) 1421 { 1422 unsigned long rb, rs, prs, r; 1423 1424 rb = va & ~(PPC_BITMASK(52, 63)); 1425 rb |= ap << PPC_BITLSHIFT(58); 1426 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 1427 prs = 1; /* process scoped */ 1428 r = 1; /* radix format */ 1429 1430 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1431 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 1432 trace_tlbie(0, 0, rb, rs, ric, prs, r); 1433 } 1434 1435 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 1436 { 1437 /* 1438 * We can use any address for the invalidation, pick one which is 1439 * probably unused as an optimisation. 1440 */ 1441 unsigned long va = ((1UL << 52) - 1); 1442 1443 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 1444 asm volatile("ptesync" : : : "memory"); 1445 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 1446 } 1447 1448 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 1449 asm volatile("ptesync" : : : "memory"); 1450 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 1451 RIC_FLUSH_TLB); 1452 } 1453 } 1454 1455 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 1456 unsigned long ric) 1457 { 1458 asm volatile("ptesync" : : : "memory"); 1459 1460 /* 1461 * Workaround the fact that the "ric" argument to __tlbie_pid 1462 * must be a compile-time contraint to match the "i" constraint 1463 * in the asm statement. 1464 */ 1465 switch (ric) { 1466 case RIC_FLUSH_TLB: 1467 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1468 fixup_tlbie_pid_lpid(pid, lpid); 1469 break; 1470 case RIC_FLUSH_PWC: 1471 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1472 break; 1473 case RIC_FLUSH_ALL: 1474 default: 1475 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1476 fixup_tlbie_pid_lpid(pid, lpid); 1477 } 1478 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 1479 } 1480 1481 static inline void fixup_tlbie_va_range_lpid(unsigned long va, 1482 unsigned long pid, 1483 unsigned long lpid, 1484 unsigned long ap) 1485 { 1486 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 1487 asm volatile("ptesync" : : : "memory"); 1488 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 1489 } 1490 1491 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 1492 asm volatile("ptesync" : : : "memory"); 1493 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 1494 } 1495 } 1496 1497 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 1498 unsigned long pid, unsigned long lpid, 1499 unsigned long page_size, 1500 unsigned long psize) 1501 { 1502 unsigned long addr; 1503 unsigned long ap = mmu_get_ap(psize); 1504 1505 for (addr = start; addr < end; addr += page_size) 1506 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 1507 1508 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 1509 } 1510 1511 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 1512 unsigned long pid, unsigned long lpid, 1513 unsigned long page_size, 1514 unsigned long psize, bool also_pwc) 1515 { 1516 asm volatile("ptesync" : : : "memory"); 1517 if (also_pwc) 1518 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1519 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 1520 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 1521 } 1522 1523 /* 1524 * Performs process-scoped invalidations for a given LPID 1525 * as part of H_RPT_INVALIDATE hcall. 1526 */ 1527 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1528 unsigned long type, unsigned long pg_sizes, 1529 unsigned long start, unsigned long end) 1530 { 1531 unsigned long psize, nr_pages; 1532 struct mmu_psize_def *def; 1533 bool flush_pid; 1534 1535 /* 1536 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1537 * do a single IS=1 based flush. 1538 */ 1539 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1540 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1541 return; 1542 } 1543 1544 if (type & H_RPTI_TYPE_PWC) 1545 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1546 1547 /* Full PID flush */ 1548 if (start == 0 && end == -1) 1549 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1550 1551 /* Do range invalidation for all the valid page sizes */ 1552 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1553 def = &mmu_psize_defs[psize]; 1554 if (!(pg_sizes & def->h_rpt_pgsize)) 1555 continue; 1556 1557 nr_pages = (end - start) >> def->shift; 1558 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1559 1560 /* 1561 * If the number of pages spanning the range is above 1562 * the ceiling, convert the request into a full PID flush. 1563 * And since PID flush takes out all the page sizes, there 1564 * is no need to consider remaining page sizes. 1565 */ 1566 if (flush_pid) { 1567 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1568 return; 1569 } 1570 _tlbie_va_range_lpid(start, end, pid, lpid, 1571 (1UL << def->shift), psize, false); 1572 } 1573 } 1574 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1575 1576 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1577 1578 static int __init create_tlb_single_page_flush_ceiling(void) 1579 { 1580 debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, 1581 arch_debugfs_dir, &tlb_single_page_flush_ceiling); 1582 debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, 1583 arch_debugfs_dir, &tlb_local_single_page_flush_ceiling); 1584 return 0; 1585 } 1586 late_initcall(create_tlb_single_page_flush_ceiling); 1587 1588