1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 #include <linux/debugfs.h> 14 15 #include <asm/ppc-opcode.h> 16 #include <asm/tlb.h> 17 #include <asm/tlbflush.h> 18 #include <asm/trace.h> 19 #include <asm/cputhreads.h> 20 #include <asm/plpar_wrappers.h> 21 22 #include "internal.h" 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 59 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, 62 RIC_FLUSH_TLB, 0); 63 } 64 } 65 66 /* Flush process scoped entries. */ 67 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 68 69 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 70 for (set = 1; set < num_sets; set++) 71 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 72 } 73 74 ppc_after_tlbiel_barrier(); 75 } 76 77 void radix__tlbiel_all(unsigned int action) 78 { 79 unsigned int is; 80 81 switch (action) { 82 case TLB_INVAL_SCOPE_GLOBAL: 83 is = 3; 84 break; 85 case TLB_INVAL_SCOPE_LPID: 86 is = 2; 87 break; 88 default: 89 BUG(); 90 } 91 92 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 93 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 94 else 95 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 96 97 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 98 } 99 100 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 101 unsigned long ric) 102 { 103 unsigned long rb,rs,prs,r; 104 105 rb = PPC_BIT(53); /* IS = 1 */ 106 rb |= set << PPC_BITLSHIFT(51); 107 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 108 prs = 1; /* process scoped */ 109 r = 1; /* radix format */ 110 111 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 112 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 113 trace_tlbie(0, 1, rb, rs, ric, prs, r); 114 } 115 116 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 117 { 118 unsigned long rb,rs,prs,r; 119 120 rb = PPC_BIT(53); /* IS = 1 */ 121 rs = pid << PPC_BITLSHIFT(31); 122 prs = 1; /* process scoped */ 123 r = 1; /* radix format */ 124 125 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 126 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 127 trace_tlbie(0, 0, rb, rs, ric, prs, r); 128 } 129 130 static __always_inline void __tlbie_pid_lpid(unsigned long pid, 131 unsigned long lpid, 132 unsigned long ric) 133 { 134 unsigned long rb, rs, prs, r; 135 136 rb = PPC_BIT(53); /* IS = 1 */ 137 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 138 prs = 1; /* process scoped */ 139 r = 1; /* radix format */ 140 141 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 142 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 143 trace_tlbie(0, 0, rb, rs, ric, prs, r); 144 } 145 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 146 { 147 unsigned long rb,rs,prs,r; 148 149 rb = PPC_BIT(52); /* IS = 2 */ 150 rs = lpid; 151 prs = 0; /* partition scoped */ 152 r = 1; /* radix format */ 153 154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 155 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 156 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 157 } 158 159 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 160 { 161 unsigned long rb,rs,prs,r; 162 163 rb = PPC_BIT(52); /* IS = 2 */ 164 rs = lpid; 165 prs = 1; /* process scoped */ 166 r = 1; /* radix format */ 167 168 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 169 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 170 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 171 } 172 173 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 174 unsigned long ap, unsigned long ric) 175 { 176 unsigned long rb,rs,prs,r; 177 178 rb = va & ~(PPC_BITMASK(52, 63)); 179 rb |= ap << PPC_BITLSHIFT(58); 180 rs = pid << PPC_BITLSHIFT(31); 181 prs = 1; /* process scoped */ 182 r = 1; /* radix format */ 183 184 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 185 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 186 trace_tlbie(0, 1, rb, rs, ric, prs, r); 187 } 188 189 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 190 unsigned long ap, unsigned long ric) 191 { 192 unsigned long rb,rs,prs,r; 193 194 rb = va & ~(PPC_BITMASK(52, 63)); 195 rb |= ap << PPC_BITLSHIFT(58); 196 rs = pid << PPC_BITLSHIFT(31); 197 prs = 1; /* process scoped */ 198 r = 1; /* radix format */ 199 200 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 201 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 202 trace_tlbie(0, 0, rb, rs, ric, prs, r); 203 } 204 205 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 206 unsigned long lpid, 207 unsigned long ap, unsigned long ric) 208 { 209 unsigned long rb, rs, prs, r; 210 211 rb = va & ~(PPC_BITMASK(52, 63)); 212 rb |= ap << PPC_BITLSHIFT(58); 213 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 214 prs = 1; /* process scoped */ 215 r = 1; /* radix format */ 216 217 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 218 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 219 trace_tlbie(0, 0, rb, rs, ric, prs, r); 220 } 221 222 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 223 unsigned long ap, unsigned long ric) 224 { 225 unsigned long rb,rs,prs,r; 226 227 rb = va & ~(PPC_BITMASK(52, 63)); 228 rb |= ap << PPC_BITLSHIFT(58); 229 rs = lpid; 230 prs = 0; /* partition scoped */ 231 r = 1; /* radix format */ 232 233 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 234 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 235 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 236 } 237 238 239 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 240 unsigned long ap) 241 { 242 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 243 asm volatile("ptesync": : :"memory"); 244 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 245 } 246 247 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 248 asm volatile("ptesync": : :"memory"); 249 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 250 } 251 } 252 253 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 254 unsigned long ap) 255 { 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_pid(0, RIC_FLUSH_TLB); 259 } 260 261 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 262 asm volatile("ptesync": : :"memory"); 263 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 264 } 265 } 266 267 static inline void fixup_tlbie_va_range_lpid(unsigned long va, 268 unsigned long pid, 269 unsigned long lpid, 270 unsigned long ap) 271 { 272 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 273 asm volatile("ptesync" : : : "memory"); 274 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 275 } 276 277 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 278 asm volatile("ptesync" : : : "memory"); 279 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 280 } 281 } 282 283 static inline void fixup_tlbie_pid(unsigned long pid) 284 { 285 /* 286 * We can use any address for the invalidation, pick one which is 287 * probably unused as an optimisation. 288 */ 289 unsigned long va = ((1UL << 52) - 1); 290 291 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 292 asm volatile("ptesync": : :"memory"); 293 __tlbie_pid(0, RIC_FLUSH_TLB); 294 } 295 296 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 297 asm volatile("ptesync": : :"memory"); 298 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 299 } 300 } 301 302 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 303 { 304 /* 305 * We can use any address for the invalidation, pick one which is 306 * probably unused as an optimisation. 307 */ 308 unsigned long va = ((1UL << 52) - 1); 309 310 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 311 asm volatile("ptesync" : : : "memory"); 312 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 313 } 314 315 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 316 asm volatile("ptesync" : : : "memory"); 317 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 318 RIC_FLUSH_TLB); 319 } 320 } 321 322 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 323 unsigned long ap) 324 { 325 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 326 asm volatile("ptesync": : :"memory"); 327 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 328 } 329 330 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 331 asm volatile("ptesync": : :"memory"); 332 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 333 } 334 } 335 336 static inline void fixup_tlbie_lpid(unsigned long lpid) 337 { 338 /* 339 * We can use any address for the invalidation, pick one which is 340 * probably unused as an optimisation. 341 */ 342 unsigned long va = ((1UL << 52) - 1); 343 344 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 345 asm volatile("ptesync": : :"memory"); 346 __tlbie_lpid(0, RIC_FLUSH_TLB); 347 } 348 349 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 350 asm volatile("ptesync": : :"memory"); 351 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 352 } 353 } 354 355 /* 356 * We use 128 set in radix mode and 256 set in hpt mode. 357 */ 358 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 359 { 360 int set; 361 362 asm volatile("ptesync": : :"memory"); 363 364 switch (ric) { 365 case RIC_FLUSH_PWC: 366 367 /* For PWC, only one flush is needed */ 368 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 369 ppc_after_tlbiel_barrier(); 370 return; 371 case RIC_FLUSH_TLB: 372 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 373 break; 374 case RIC_FLUSH_ALL: 375 default: 376 /* 377 * Flush the first set of the TLB, and if 378 * we're doing a RIC_FLUSH_ALL, also flush 379 * the entire Page Walk Cache. 380 */ 381 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 382 } 383 384 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 385 /* For the remaining sets, just flush the TLB */ 386 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 387 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 388 } 389 390 ppc_after_tlbiel_barrier(); 391 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 392 } 393 394 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 395 { 396 asm volatile("ptesync": : :"memory"); 397 398 /* 399 * Workaround the fact that the "ric" argument to __tlbie_pid 400 * must be a compile-time constraint to match the "i" constraint 401 * in the asm statement. 402 */ 403 switch (ric) { 404 case RIC_FLUSH_TLB: 405 __tlbie_pid(pid, RIC_FLUSH_TLB); 406 fixup_tlbie_pid(pid); 407 break; 408 case RIC_FLUSH_PWC: 409 __tlbie_pid(pid, RIC_FLUSH_PWC); 410 break; 411 case RIC_FLUSH_ALL: 412 default: 413 __tlbie_pid(pid, RIC_FLUSH_ALL); 414 fixup_tlbie_pid(pid); 415 } 416 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 417 } 418 419 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 420 unsigned long ric) 421 { 422 asm volatile("ptesync" : : : "memory"); 423 424 /* 425 * Workaround the fact that the "ric" argument to __tlbie_pid 426 * must be a compile-time contraint to match the "i" constraint 427 * in the asm statement. 428 */ 429 switch (ric) { 430 case RIC_FLUSH_TLB: 431 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 432 fixup_tlbie_pid_lpid(pid, lpid); 433 break; 434 case RIC_FLUSH_PWC: 435 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 436 break; 437 case RIC_FLUSH_ALL: 438 default: 439 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 440 fixup_tlbie_pid_lpid(pid, lpid); 441 } 442 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 443 } 444 struct tlbiel_pid { 445 unsigned long pid; 446 unsigned long ric; 447 }; 448 449 static void do_tlbiel_pid(void *info) 450 { 451 struct tlbiel_pid *t = info; 452 453 if (t->ric == RIC_FLUSH_TLB) 454 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 455 else if (t->ric == RIC_FLUSH_PWC) 456 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 457 else 458 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 459 } 460 461 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 462 unsigned long pid, unsigned long ric) 463 { 464 struct cpumask *cpus = mm_cpumask(mm); 465 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 466 467 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 468 /* 469 * Always want the CPU translations to be invalidated with tlbiel in 470 * these paths, so while coprocessors must use tlbie, we can not 471 * optimise away the tlbiel component. 472 */ 473 if (atomic_read(&mm->context.copros) > 0) 474 _tlbie_pid(pid, RIC_FLUSH_ALL); 475 } 476 477 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 478 { 479 asm volatile("ptesync": : :"memory"); 480 481 /* 482 * Workaround the fact that the "ric" argument to __tlbie_pid 483 * must be a compile-time contraint to match the "i" constraint 484 * in the asm statement. 485 */ 486 switch (ric) { 487 case RIC_FLUSH_TLB: 488 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 489 fixup_tlbie_lpid(lpid); 490 break; 491 case RIC_FLUSH_PWC: 492 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 493 break; 494 case RIC_FLUSH_ALL: 495 default: 496 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 497 fixup_tlbie_lpid(lpid); 498 } 499 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 500 } 501 502 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 503 { 504 /* 505 * Workaround the fact that the "ric" argument to __tlbie_pid 506 * must be a compile-time contraint to match the "i" constraint 507 * in the asm statement. 508 */ 509 switch (ric) { 510 case RIC_FLUSH_TLB: 511 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 512 break; 513 case RIC_FLUSH_PWC: 514 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 515 break; 516 case RIC_FLUSH_ALL: 517 default: 518 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 519 } 520 fixup_tlbie_lpid(lpid); 521 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 522 } 523 524 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 525 unsigned long pid, unsigned long page_size, 526 unsigned long psize) 527 { 528 unsigned long addr; 529 unsigned long ap = mmu_get_ap(psize); 530 531 for (addr = start; addr < end; addr += page_size) 532 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 533 } 534 535 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 536 unsigned long psize, unsigned long ric) 537 { 538 unsigned long ap = mmu_get_ap(psize); 539 540 asm volatile("ptesync": : :"memory"); 541 __tlbiel_va(va, pid, ap, ric); 542 ppc_after_tlbiel_barrier(); 543 } 544 545 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 546 unsigned long pid, unsigned long page_size, 547 unsigned long psize, bool also_pwc) 548 { 549 asm volatile("ptesync": : :"memory"); 550 if (also_pwc) 551 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 552 __tlbiel_va_range(start, end, pid, page_size, psize); 553 ppc_after_tlbiel_barrier(); 554 } 555 556 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 557 unsigned long pid, unsigned long page_size, 558 unsigned long psize) 559 { 560 unsigned long addr; 561 unsigned long ap = mmu_get_ap(psize); 562 563 for (addr = start; addr < end; addr += page_size) 564 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 565 566 fixup_tlbie_va_range(addr - page_size, pid, ap); 567 } 568 569 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 570 unsigned long pid, unsigned long lpid, 571 unsigned long page_size, 572 unsigned long psize) 573 { 574 unsigned long addr; 575 unsigned long ap = mmu_get_ap(psize); 576 577 for (addr = start; addr < end; addr += page_size) 578 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 579 580 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 581 } 582 583 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 584 unsigned long psize, unsigned long ric) 585 { 586 unsigned long ap = mmu_get_ap(psize); 587 588 asm volatile("ptesync": : :"memory"); 589 __tlbie_va(va, pid, ap, ric); 590 fixup_tlbie_va(va, pid, ap); 591 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 592 } 593 594 struct tlbiel_va { 595 unsigned long pid; 596 unsigned long va; 597 unsigned long psize; 598 unsigned long ric; 599 }; 600 601 static void do_tlbiel_va(void *info) 602 { 603 struct tlbiel_va *t = info; 604 605 if (t->ric == RIC_FLUSH_TLB) 606 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 607 else if (t->ric == RIC_FLUSH_PWC) 608 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 609 else 610 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 611 } 612 613 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 614 unsigned long va, unsigned long pid, 615 unsigned long psize, unsigned long ric) 616 { 617 struct cpumask *cpus = mm_cpumask(mm); 618 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 619 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 620 if (atomic_read(&mm->context.copros) > 0) 621 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 622 } 623 624 struct tlbiel_va_range { 625 unsigned long pid; 626 unsigned long start; 627 unsigned long end; 628 unsigned long page_size; 629 unsigned long psize; 630 bool also_pwc; 631 }; 632 633 static void do_tlbiel_va_range(void *info) 634 { 635 struct tlbiel_va_range *t = info; 636 637 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 638 t->psize, t->also_pwc); 639 } 640 641 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 642 unsigned long psize, unsigned long ric) 643 { 644 unsigned long ap = mmu_get_ap(psize); 645 646 asm volatile("ptesync": : :"memory"); 647 __tlbie_lpid_va(va, lpid, ap, ric); 648 fixup_tlbie_lpid_va(va, lpid, ap); 649 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 650 } 651 652 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 653 unsigned long pid, unsigned long page_size, 654 unsigned long psize, bool also_pwc) 655 { 656 asm volatile("ptesync": : :"memory"); 657 if (also_pwc) 658 __tlbie_pid(pid, RIC_FLUSH_PWC); 659 __tlbie_va_range(start, end, pid, page_size, psize); 660 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 661 } 662 663 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 664 unsigned long pid, unsigned long lpid, 665 unsigned long page_size, 666 unsigned long psize, bool also_pwc) 667 { 668 asm volatile("ptesync" : : : "memory"); 669 if (also_pwc) 670 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 671 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 672 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 673 } 674 675 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 676 unsigned long start, unsigned long end, 677 unsigned long pid, unsigned long page_size, 678 unsigned long psize, bool also_pwc) 679 { 680 struct cpumask *cpus = mm_cpumask(mm); 681 struct tlbiel_va_range t = { .start = start, .end = end, 682 .pid = pid, .page_size = page_size, 683 .psize = psize, .also_pwc = also_pwc }; 684 685 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 686 if (atomic_read(&mm->context.copros) > 0) 687 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 688 } 689 690 /* 691 * Base TLB flushing operations: 692 * 693 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 694 * - flush_tlb_page(vma, vmaddr) flushes one page 695 * - flush_tlb_range(vma, start, end) flushes a range of pages 696 * - flush_tlb_kernel_range(start, end) flushes kernel pages 697 * 698 * - local_* variants of page and mm only apply to the current 699 * processor 700 */ 701 void radix__local_flush_tlb_mm(struct mm_struct *mm) 702 { 703 unsigned long pid = mm->context.id; 704 705 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 706 return; 707 708 preempt_disable(); 709 _tlbiel_pid(pid, RIC_FLUSH_TLB); 710 preempt_enable(); 711 } 712 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 713 714 #ifndef CONFIG_SMP 715 void radix__local_flush_all_mm(struct mm_struct *mm) 716 { 717 unsigned long pid = mm->context.id; 718 719 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 720 return; 721 722 preempt_disable(); 723 _tlbiel_pid(pid, RIC_FLUSH_ALL); 724 preempt_enable(); 725 } 726 EXPORT_SYMBOL(radix__local_flush_all_mm); 727 728 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 729 { 730 radix__local_flush_all_mm(mm); 731 } 732 #endif /* CONFIG_SMP */ 733 734 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 735 int psize) 736 { 737 unsigned long pid = mm->context.id; 738 739 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 740 return; 741 742 preempt_disable(); 743 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 744 preempt_enable(); 745 } 746 747 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 748 { 749 #ifdef CONFIG_HUGETLB_PAGE 750 /* need the return fix for nohash.c */ 751 if (is_vm_hugetlb_page(vma)) 752 return radix__local_flush_hugetlb_page(vma, vmaddr); 753 #endif 754 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 755 } 756 EXPORT_SYMBOL(radix__local_flush_tlb_page); 757 758 static bool mm_needs_flush_escalation(struct mm_struct *mm) 759 { 760 /* 761 * The P9 nest MMU has issues with the page walk cache caching PTEs 762 * and not flushing them when RIC = 0 for a PID/LPID invalidate. 763 * 764 * This may have been fixed in shipping firmware (by disabling PWC 765 * or preventing it from caching PTEs), but until that is confirmed, 766 * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes 767 * to RIC=2. 768 * 769 * POWER10 (and P9P) does not have this problem. 770 */ 771 if (cpu_has_feature(CPU_FTR_ARCH_31)) 772 return false; 773 if (atomic_read(&mm->context.copros) > 0) 774 return true; 775 return false; 776 } 777 778 /* 779 * If always_flush is true, then flush even if this CPU can't be removed 780 * from mm_cpumask. 781 */ 782 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 783 { 784 unsigned long pid = mm->context.id; 785 int cpu = smp_processor_id(); 786 787 /* 788 * A kthread could have done a mmget_not_zero() after the flushing CPU 789 * checked mm_cpumask, and be in the process of kthread_use_mm when 790 * interrupted here. In that case, current->mm will be set to mm, 791 * because kthread_use_mm() setting ->mm and switching to the mm is 792 * done with interrupts off. 793 */ 794 if (current->mm == mm) 795 goto out; 796 797 if (current->active_mm == mm) { 798 unsigned long flags; 799 800 WARN_ON_ONCE(current->mm != NULL); 801 /* 802 * It is a kernel thread and is using mm as the lazy tlb, so 803 * switch it to init_mm. This is not always called from IPI 804 * (e.g., flush_type_needed), so must disable irqs. 805 */ 806 local_irq_save(flags); 807 mmgrab_lazy_tlb(&init_mm); 808 current->active_mm = &init_mm; 809 switch_mm_irqs_off(mm, &init_mm, current); 810 mmdrop_lazy_tlb(mm); 811 local_irq_restore(flags); 812 } 813 814 /* 815 * This IPI may be initiated from any source including those not 816 * running the mm, so there may be a racing IPI that comes after 817 * this one which finds the cpumask already clear. Check and avoid 818 * underflowing the active_cpus count in that case. The race should 819 * not otherwise be a problem, but the TLB must be flushed because 820 * that's what the caller expects. 821 */ 822 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 823 atomic_dec(&mm->context.active_cpus); 824 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 825 always_flush = true; 826 } 827 828 out: 829 if (always_flush) 830 _tlbiel_pid(pid, RIC_FLUSH_ALL); 831 } 832 833 #ifdef CONFIG_SMP 834 static void do_exit_flush_lazy_tlb(void *arg) 835 { 836 struct mm_struct *mm = arg; 837 exit_lazy_flush_tlb(mm, true); 838 } 839 840 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 841 { 842 /* 843 * Would be nice if this was async so it could be run in 844 * parallel with our local flush, but generic code does not 845 * give a good API for it. Could extend the generic code or 846 * make a special powerpc IPI for flushing TLBs. 847 * For now it's not too performance critical. 848 */ 849 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 850 (void *)mm, 1); 851 } 852 853 #else /* CONFIG_SMP */ 854 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 855 #endif /* CONFIG_SMP */ 856 857 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 858 859 /* 860 * Interval between flushes at which we send out IPIs to check whether the 861 * mm_cpumask can be trimmed for the case where it's not a single-threaded 862 * process flushing its own mm. The intent is to reduce the cost of later 863 * flushes. Don't want this to be so low that it adds noticable cost to TLB 864 * flushing, or so high that it doesn't help reduce global TLBIEs. 865 */ 866 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 867 868 static bool tick_and_test_trim_clock(void) 869 { 870 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 871 tlb_mm_cpumask_trim_timer) { 872 __this_cpu_write(mm_cpumask_trim_clock, 0); 873 return true; 874 } 875 return false; 876 } 877 878 enum tlb_flush_type { 879 FLUSH_TYPE_NONE, 880 FLUSH_TYPE_LOCAL, 881 FLUSH_TYPE_GLOBAL, 882 }; 883 884 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 885 { 886 int active_cpus = atomic_read(&mm->context.active_cpus); 887 int cpu = smp_processor_id(); 888 889 if (active_cpus == 0) 890 return FLUSH_TYPE_NONE; 891 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 892 if (current->mm != mm) { 893 /* 894 * Asynchronous flush sources may trim down to nothing 895 * if the process is not running, so occasionally try 896 * to trim. 897 */ 898 if (tick_and_test_trim_clock()) { 899 exit_lazy_flush_tlb(mm, true); 900 return FLUSH_TYPE_NONE; 901 } 902 } 903 return FLUSH_TYPE_LOCAL; 904 } 905 906 /* Coprocessors require TLBIE to invalidate nMMU. */ 907 if (atomic_read(&mm->context.copros) > 0) 908 return FLUSH_TYPE_GLOBAL; 909 910 /* 911 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 912 * because the mm is being taken down anyway, and a TLBIE tends to 913 * be faster than an IPI+TLBIEL. 914 */ 915 if (fullmm) 916 return FLUSH_TYPE_GLOBAL; 917 918 /* 919 * If we are running the only thread of a single-threaded process, 920 * then we should almost always be able to trim off the rest of the 921 * CPU mask (except in the case of use_mm() races), so always try 922 * trimming the mask. 923 */ 924 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 925 exit_flush_lazy_tlbs(mm); 926 /* 927 * use_mm() race could prevent IPIs from being able to clear 928 * the cpumask here, however those users are established 929 * after our first check (and so after the PTEs are removed), 930 * and the TLB still gets flushed by the IPI, so this CPU 931 * will only require a local flush. 932 */ 933 return FLUSH_TYPE_LOCAL; 934 } 935 936 /* 937 * Occasionally try to trim down the cpumask. It's possible this can 938 * bring the mask to zero, which results in no flush. 939 */ 940 if (tick_and_test_trim_clock()) { 941 exit_flush_lazy_tlbs(mm); 942 if (current->mm == mm) 943 return FLUSH_TYPE_LOCAL; 944 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 945 exit_lazy_flush_tlb(mm, true); 946 return FLUSH_TYPE_NONE; 947 } 948 949 return FLUSH_TYPE_GLOBAL; 950 } 951 952 #ifdef CONFIG_SMP 953 void radix__flush_tlb_mm(struct mm_struct *mm) 954 { 955 unsigned long pid; 956 enum tlb_flush_type type; 957 958 pid = mm->context.id; 959 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 960 return; 961 962 preempt_disable(); 963 /* 964 * Order loads of mm_cpumask (in flush_type_needed) vs previous 965 * stores to clear ptes before the invalidate. See barrier in 966 * switch_mm_irqs_off 967 */ 968 smp_mb(); 969 type = flush_type_needed(mm, false); 970 if (type == FLUSH_TYPE_LOCAL) { 971 _tlbiel_pid(pid, RIC_FLUSH_TLB); 972 } else if (type == FLUSH_TYPE_GLOBAL) { 973 if (!mmu_has_feature(MMU_FTR_GTSE)) { 974 unsigned long tgt = H_RPTI_TARGET_CMMU; 975 976 if (atomic_read(&mm->context.copros) > 0) 977 tgt |= H_RPTI_TARGET_NMMU; 978 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 979 H_RPTI_PAGE_ALL, 0, -1UL); 980 } else if (cputlb_use_tlbie()) { 981 if (mm_needs_flush_escalation(mm)) 982 _tlbie_pid(pid, RIC_FLUSH_ALL); 983 else 984 _tlbie_pid(pid, RIC_FLUSH_TLB); 985 } else { 986 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 987 } 988 } 989 preempt_enable(); 990 } 991 EXPORT_SYMBOL(radix__flush_tlb_mm); 992 993 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 994 { 995 unsigned long pid; 996 enum tlb_flush_type type; 997 998 pid = mm->context.id; 999 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1000 return; 1001 1002 preempt_disable(); 1003 smp_mb(); /* see radix__flush_tlb_mm */ 1004 type = flush_type_needed(mm, fullmm); 1005 if (type == FLUSH_TYPE_LOCAL) { 1006 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1007 } else if (type == FLUSH_TYPE_GLOBAL) { 1008 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1009 unsigned long tgt = H_RPTI_TARGET_CMMU; 1010 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1011 H_RPTI_TYPE_PRT; 1012 1013 if (atomic_read(&mm->context.copros) > 0) 1014 tgt |= H_RPTI_TARGET_NMMU; 1015 pseries_rpt_invalidate(pid, tgt, type, 1016 H_RPTI_PAGE_ALL, 0, -1UL); 1017 } else if (cputlb_use_tlbie()) 1018 _tlbie_pid(pid, RIC_FLUSH_ALL); 1019 else 1020 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1021 } 1022 preempt_enable(); 1023 } 1024 1025 void radix__flush_all_mm(struct mm_struct *mm) 1026 { 1027 __flush_all_mm(mm, false); 1028 } 1029 EXPORT_SYMBOL(radix__flush_all_mm); 1030 1031 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 1032 int psize) 1033 { 1034 unsigned long pid; 1035 enum tlb_flush_type type; 1036 1037 pid = mm->context.id; 1038 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1039 return; 1040 1041 preempt_disable(); 1042 smp_mb(); /* see radix__flush_tlb_mm */ 1043 type = flush_type_needed(mm, false); 1044 if (type == FLUSH_TYPE_LOCAL) { 1045 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1046 } else if (type == FLUSH_TYPE_GLOBAL) { 1047 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1048 unsigned long tgt, pg_sizes, size; 1049 1050 tgt = H_RPTI_TARGET_CMMU; 1051 pg_sizes = psize_to_rpti_pgsize(psize); 1052 size = 1UL << mmu_psize_to_shift(psize); 1053 1054 if (atomic_read(&mm->context.copros) > 0) 1055 tgt |= H_RPTI_TARGET_NMMU; 1056 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 1057 pg_sizes, vmaddr, 1058 vmaddr + size); 1059 } else if (cputlb_use_tlbie()) 1060 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1061 else 1062 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 1063 } 1064 preempt_enable(); 1065 } 1066 1067 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 1068 { 1069 #ifdef CONFIG_HUGETLB_PAGE 1070 if (is_vm_hugetlb_page(vma)) 1071 return radix__flush_hugetlb_page(vma, vmaddr); 1072 #endif 1073 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 1074 } 1075 EXPORT_SYMBOL(radix__flush_tlb_page); 1076 1077 #endif /* CONFIG_SMP */ 1078 1079 static void do_tlbiel_kernel(void *info) 1080 { 1081 _tlbiel_pid(0, RIC_FLUSH_ALL); 1082 } 1083 1084 static inline void _tlbiel_kernel_broadcast(void) 1085 { 1086 on_each_cpu(do_tlbiel_kernel, NULL, 1); 1087 if (tlbie_capable) { 1088 /* 1089 * Coherent accelerators don't refcount kernel memory mappings, 1090 * so have to always issue a tlbie for them. This is quite a 1091 * slow path anyway. 1092 */ 1093 _tlbie_pid(0, RIC_FLUSH_ALL); 1094 } 1095 } 1096 1097 /* 1098 * If kernel TLBIs ever become local rather than global, then 1099 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 1100 * assumes kernel TLBIs are global. 1101 */ 1102 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 1103 { 1104 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1105 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 1106 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1107 H_RPTI_TYPE_PRT; 1108 1109 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 1110 start, end); 1111 } else if (cputlb_use_tlbie()) 1112 _tlbie_pid(0, RIC_FLUSH_ALL); 1113 else 1114 _tlbiel_kernel_broadcast(); 1115 } 1116 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1117 1118 /* 1119 * Doesn't appear to be used anywhere. Remove. 1120 */ 1121 #define TLB_FLUSH_ALL -1UL 1122 1123 /* 1124 * Number of pages above which we invalidate the entire PID rather than 1125 * flush individual pages, for local and global flushes respectively. 1126 * 1127 * tlbie goes out to the interconnect and individual ops are more costly. 1128 * It also does not iterate over sets like the local tlbiel variant when 1129 * invalidating a full PID, so it has a far lower threshold to change from 1130 * individual page flushes to full-pid flushes. 1131 */ 1132 static u32 tlb_single_page_flush_ceiling __read_mostly = 33; 1133 static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1134 1135 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1136 unsigned long start, unsigned long end) 1137 { 1138 unsigned long pid; 1139 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1140 unsigned long page_size = 1UL << page_shift; 1141 unsigned long nr_pages = (end - start) >> page_shift; 1142 bool flush_pid, flush_pwc = false; 1143 enum tlb_flush_type type; 1144 1145 pid = mm->context.id; 1146 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1147 return; 1148 1149 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1150 1151 preempt_disable(); 1152 smp_mb(); /* see radix__flush_tlb_mm */ 1153 type = flush_type_needed(mm, false); 1154 if (type == FLUSH_TYPE_NONE) 1155 goto out; 1156 1157 if (type == FLUSH_TYPE_GLOBAL) 1158 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1159 else 1160 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1161 /* 1162 * full pid flush already does the PWC flush. if it is not full pid 1163 * flush check the range is more than PMD and force a pwc flush 1164 * mremap() depends on this behaviour. 1165 */ 1166 if (!flush_pid && (end - start) >= PMD_SIZE) 1167 flush_pwc = true; 1168 1169 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1170 unsigned long type = H_RPTI_TYPE_TLB; 1171 unsigned long tgt = H_RPTI_TARGET_CMMU; 1172 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1173 1174 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1175 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1176 if (atomic_read(&mm->context.copros) > 0) 1177 tgt |= H_RPTI_TARGET_NMMU; 1178 if (flush_pwc) 1179 type |= H_RPTI_TYPE_PWC; 1180 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1181 } else if (flush_pid) { 1182 /* 1183 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL 1184 */ 1185 if (type == FLUSH_TYPE_LOCAL) { 1186 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1187 } else { 1188 if (cputlb_use_tlbie()) { 1189 _tlbie_pid(pid, RIC_FLUSH_ALL); 1190 } else { 1191 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1192 } 1193 } 1194 } else { 1195 bool hflush; 1196 unsigned long hstart, hend; 1197 1198 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1199 hend = end & PMD_MASK; 1200 hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend; 1201 1202 if (type == FLUSH_TYPE_LOCAL) { 1203 asm volatile("ptesync": : :"memory"); 1204 if (flush_pwc) 1205 /* For PWC, only one flush is needed */ 1206 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 1207 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1208 if (hflush) 1209 __tlbiel_va_range(hstart, hend, pid, 1210 PMD_SIZE, MMU_PAGE_2M); 1211 ppc_after_tlbiel_barrier(); 1212 } else if (cputlb_use_tlbie()) { 1213 asm volatile("ptesync": : :"memory"); 1214 if (flush_pwc) 1215 __tlbie_pid(pid, RIC_FLUSH_PWC); 1216 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1217 if (hflush) 1218 __tlbie_va_range(hstart, hend, pid, 1219 PMD_SIZE, MMU_PAGE_2M); 1220 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1221 } else { 1222 _tlbiel_va_range_multicast(mm, 1223 start, end, pid, page_size, mmu_virtual_psize, flush_pwc); 1224 if (hflush) 1225 _tlbiel_va_range_multicast(mm, 1226 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc); 1227 } 1228 } 1229 out: 1230 preempt_enable(); 1231 } 1232 1233 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1234 unsigned long end) 1235 1236 { 1237 #ifdef CONFIG_HUGETLB_PAGE 1238 if (is_vm_hugetlb_page(vma)) 1239 return radix__flush_hugetlb_tlb_range(vma, start, end); 1240 #endif 1241 1242 __radix__flush_tlb_range(vma->vm_mm, start, end); 1243 } 1244 EXPORT_SYMBOL(radix__flush_tlb_range); 1245 1246 static int radix_get_mmu_psize(int page_size) 1247 { 1248 int psize; 1249 1250 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1251 psize = mmu_virtual_psize; 1252 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1253 psize = MMU_PAGE_2M; 1254 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1255 psize = MMU_PAGE_1G; 1256 else 1257 return -1; 1258 return psize; 1259 } 1260 1261 /* 1262 * Flush partition scoped LPID address translation for all CPUs. 1263 */ 1264 void radix__flush_tlb_lpid_page(unsigned int lpid, 1265 unsigned long addr, 1266 unsigned long page_size) 1267 { 1268 int psize = radix_get_mmu_psize(page_size); 1269 1270 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1271 } 1272 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1273 1274 /* 1275 * Flush partition scoped PWC from LPID for all CPUs. 1276 */ 1277 void radix__flush_pwc_lpid(unsigned int lpid) 1278 { 1279 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1280 } 1281 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1282 1283 /* 1284 * Flush partition scoped translations from LPID (=LPIDR) 1285 */ 1286 void radix__flush_all_lpid(unsigned int lpid) 1287 { 1288 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1289 } 1290 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1291 1292 /* 1293 * Flush process scoped translations from LPID (=LPIDR) 1294 */ 1295 void radix__flush_all_lpid_guest(unsigned int lpid) 1296 { 1297 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1298 } 1299 1300 void radix__tlb_flush(struct mmu_gather *tlb) 1301 { 1302 int psize = 0; 1303 struct mm_struct *mm = tlb->mm; 1304 int page_size = tlb->page_size; 1305 unsigned long start = tlb->start; 1306 unsigned long end = tlb->end; 1307 1308 /* 1309 * if page size is not something we understand, do a full mm flush 1310 * 1311 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1312 * that flushes the process table entry cache upon process teardown. 1313 * See the comment for radix in arch_exit_mmap(). 1314 */ 1315 if (tlb->fullmm) { 1316 __flush_all_mm(mm, true); 1317 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1318 if (!tlb->freed_tables) 1319 radix__flush_tlb_mm(mm); 1320 else 1321 radix__flush_all_mm(mm); 1322 } else { 1323 if (!tlb->freed_tables) 1324 radix__flush_tlb_range_psize(mm, start, end, psize); 1325 else 1326 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1327 } 1328 } 1329 1330 static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1331 unsigned long start, unsigned long end, 1332 int psize, bool also_pwc) 1333 { 1334 unsigned long pid; 1335 unsigned int page_shift = mmu_psize_defs[psize].shift; 1336 unsigned long page_size = 1UL << page_shift; 1337 unsigned long nr_pages = (end - start) >> page_shift; 1338 bool flush_pid; 1339 enum tlb_flush_type type; 1340 1341 pid = mm->context.id; 1342 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1343 return; 1344 1345 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1346 1347 preempt_disable(); 1348 smp_mb(); /* see radix__flush_tlb_mm */ 1349 type = flush_type_needed(mm, false); 1350 if (type == FLUSH_TYPE_NONE) 1351 goto out; 1352 1353 if (type == FLUSH_TYPE_GLOBAL) 1354 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1355 else 1356 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1357 1358 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1359 unsigned long tgt = H_RPTI_TARGET_CMMU; 1360 unsigned long type = H_RPTI_TYPE_TLB; 1361 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1362 1363 if (also_pwc) 1364 type |= H_RPTI_TYPE_PWC; 1365 if (atomic_read(&mm->context.copros) > 0) 1366 tgt |= H_RPTI_TARGET_NMMU; 1367 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1368 } else if (flush_pid) { 1369 if (type == FLUSH_TYPE_LOCAL) { 1370 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1371 } else { 1372 if (cputlb_use_tlbie()) { 1373 if (mm_needs_flush_escalation(mm)) 1374 also_pwc = true; 1375 1376 _tlbie_pid(pid, 1377 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1378 } else { 1379 _tlbiel_pid_multicast(mm, pid, 1380 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1381 } 1382 1383 } 1384 } else { 1385 if (type == FLUSH_TYPE_LOCAL) 1386 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1387 else if (cputlb_use_tlbie()) 1388 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1389 else 1390 _tlbiel_va_range_multicast(mm, 1391 start, end, pid, page_size, psize, also_pwc); 1392 } 1393 out: 1394 preempt_enable(); 1395 } 1396 1397 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1398 unsigned long end, int psize) 1399 { 1400 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1401 } 1402 1403 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1404 unsigned long end, int psize) 1405 { 1406 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1407 } 1408 1409 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1410 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1411 { 1412 unsigned long pid, end; 1413 enum tlb_flush_type type; 1414 1415 pid = mm->context.id; 1416 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1417 return; 1418 1419 /* 4k page size, just blow the world */ 1420 if (PAGE_SIZE == 0x1000) { 1421 radix__flush_all_mm(mm); 1422 return; 1423 } 1424 1425 end = addr + HPAGE_PMD_SIZE; 1426 1427 /* Otherwise first do the PWC, then iterate the pages. */ 1428 preempt_disable(); 1429 smp_mb(); /* see radix__flush_tlb_mm */ 1430 type = flush_type_needed(mm, false); 1431 if (type == FLUSH_TYPE_LOCAL) { 1432 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1433 } else if (type == FLUSH_TYPE_GLOBAL) { 1434 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1435 unsigned long tgt, type, pg_sizes; 1436 1437 tgt = H_RPTI_TARGET_CMMU; 1438 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1439 H_RPTI_TYPE_PRT; 1440 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1441 1442 if (atomic_read(&mm->context.copros) > 0) 1443 tgt |= H_RPTI_TARGET_NMMU; 1444 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1445 addr, end); 1446 } else if (cputlb_use_tlbie()) 1447 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1448 else 1449 _tlbiel_va_range_multicast(mm, 1450 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1451 } 1452 1453 preempt_enable(); 1454 } 1455 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1456 1457 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1458 unsigned long start, unsigned long end) 1459 { 1460 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1461 } 1462 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1463 1464 void radix__flush_tlb_all(void) 1465 { 1466 unsigned long rb,prs,r,rs; 1467 unsigned long ric = RIC_FLUSH_ALL; 1468 1469 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1470 prs = 0; /* partition scoped */ 1471 r = 1; /* radix format */ 1472 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1473 1474 asm volatile("ptesync": : :"memory"); 1475 /* 1476 * now flush guest entries by passing PRS = 1 and LPID != 0 1477 */ 1478 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1479 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1480 /* 1481 * now flush host entires by passing PRS = 0 and LPID == 0 1482 */ 1483 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1484 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1485 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1486 } 1487 1488 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1489 /* 1490 * Performs process-scoped invalidations for a given LPID 1491 * as part of H_RPT_INVALIDATE hcall. 1492 */ 1493 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1494 unsigned long type, unsigned long pg_sizes, 1495 unsigned long start, unsigned long end) 1496 { 1497 unsigned long psize, nr_pages; 1498 struct mmu_psize_def *def; 1499 bool flush_pid; 1500 1501 /* 1502 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1503 * do a single IS=1 based flush. 1504 */ 1505 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1506 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1507 return; 1508 } 1509 1510 if (type & H_RPTI_TYPE_PWC) 1511 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1512 1513 /* Full PID flush */ 1514 if (start == 0 && end == -1) 1515 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1516 1517 /* Do range invalidation for all the valid page sizes */ 1518 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1519 def = &mmu_psize_defs[psize]; 1520 if (!(pg_sizes & def->h_rpt_pgsize)) 1521 continue; 1522 1523 nr_pages = (end - start) >> def->shift; 1524 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1525 1526 /* 1527 * If the number of pages spanning the range is above 1528 * the ceiling, convert the request into a full PID flush. 1529 * And since PID flush takes out all the page sizes, there 1530 * is no need to consider remaining page sizes. 1531 */ 1532 if (flush_pid) { 1533 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1534 return; 1535 } 1536 _tlbie_va_range_lpid(start, end, pid, lpid, 1537 (1UL << def->shift), psize, false); 1538 } 1539 } 1540 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1541 1542 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1543 1544 static int __init create_tlb_single_page_flush_ceiling(void) 1545 { 1546 debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, 1547 arch_debugfs_dir, &tlb_single_page_flush_ceiling); 1548 debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, 1549 arch_debugfs_dir, &tlb_local_single_page_flush_ceiling); 1550 return 0; 1551 } 1552 late_initcall(create_tlb_single_page_flush_ceiling); 1553 1554