1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 #include <linux/debugfs.h> 14 15 #include <asm/ppc-opcode.h> 16 #include <asm/tlb.h> 17 #include <asm/tlbflush.h> 18 #include <asm/trace.h> 19 #include <asm/cputhreads.h> 20 #include <asm/plpar_wrappers.h> 21 22 #include "internal.h" 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 59 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, 62 RIC_FLUSH_TLB, 0); 63 } 64 } 65 66 /* Flush process scoped entries. */ 67 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 68 69 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 70 for (set = 1; set < num_sets; set++) 71 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 72 } 73 74 ppc_after_tlbiel_barrier(); 75 } 76 77 void radix__tlbiel_all(unsigned int action) 78 { 79 unsigned int is; 80 81 switch (action) { 82 case TLB_INVAL_SCOPE_GLOBAL: 83 is = 3; 84 break; 85 case TLB_INVAL_SCOPE_LPID: 86 is = 2; 87 break; 88 default: 89 BUG(); 90 } 91 92 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 93 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 94 else 95 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 96 97 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 98 } 99 100 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 101 unsigned long ric) 102 { 103 unsigned long rb,rs,prs,r; 104 105 rb = PPC_BIT(53); /* IS = 1 */ 106 rb |= set << PPC_BITLSHIFT(51); 107 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 108 prs = 1; /* process scoped */ 109 r = 1; /* radix format */ 110 111 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 112 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 113 trace_tlbie(0, 1, rb, rs, ric, prs, r); 114 } 115 116 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 117 { 118 unsigned long rb,rs,prs,r; 119 120 rb = PPC_BIT(53); /* IS = 1 */ 121 rs = pid << PPC_BITLSHIFT(31); 122 prs = 1; /* process scoped */ 123 r = 1; /* radix format */ 124 125 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 126 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 127 trace_tlbie(0, 0, rb, rs, ric, prs, r); 128 } 129 130 static __always_inline void __tlbie_pid_lpid(unsigned long pid, 131 unsigned long lpid, 132 unsigned long ric) 133 { 134 unsigned long rb, rs, prs, r; 135 136 rb = PPC_BIT(53); /* IS = 1 */ 137 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 138 prs = 1; /* process scoped */ 139 r = 1; /* radix format */ 140 141 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 142 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 143 trace_tlbie(0, 0, rb, rs, ric, prs, r); 144 } 145 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 146 { 147 unsigned long rb,rs,prs,r; 148 149 rb = PPC_BIT(52); /* IS = 2 */ 150 rs = lpid; 151 prs = 0; /* partition scoped */ 152 r = 1; /* radix format */ 153 154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 155 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 156 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 157 } 158 159 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 160 { 161 unsigned long rb,rs,prs,r; 162 163 rb = PPC_BIT(52); /* IS = 2 */ 164 rs = lpid; 165 prs = 1; /* process scoped */ 166 r = 1; /* radix format */ 167 168 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 169 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 170 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 171 } 172 173 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 174 unsigned long ap, unsigned long ric) 175 { 176 unsigned long rb,rs,prs,r; 177 178 rb = va & ~(PPC_BITMASK(52, 63)); 179 rb |= ap << PPC_BITLSHIFT(58); 180 rs = pid << PPC_BITLSHIFT(31); 181 prs = 1; /* process scoped */ 182 r = 1; /* radix format */ 183 184 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 185 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 186 trace_tlbie(0, 1, rb, rs, ric, prs, r); 187 } 188 189 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 190 unsigned long ap, unsigned long ric) 191 { 192 unsigned long rb,rs,prs,r; 193 194 rb = va & ~(PPC_BITMASK(52, 63)); 195 rb |= ap << PPC_BITLSHIFT(58); 196 rs = pid << PPC_BITLSHIFT(31); 197 prs = 1; /* process scoped */ 198 r = 1; /* radix format */ 199 200 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 201 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 202 trace_tlbie(0, 0, rb, rs, ric, prs, r); 203 } 204 205 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 206 unsigned long lpid, 207 unsigned long ap, unsigned long ric) 208 { 209 unsigned long rb, rs, prs, r; 210 211 rb = va & ~(PPC_BITMASK(52, 63)); 212 rb |= ap << PPC_BITLSHIFT(58); 213 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 214 prs = 1; /* process scoped */ 215 r = 1; /* radix format */ 216 217 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 218 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 219 trace_tlbie(0, 0, rb, rs, ric, prs, r); 220 } 221 222 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 223 unsigned long ap, unsigned long ric) 224 { 225 unsigned long rb,rs,prs,r; 226 227 rb = va & ~(PPC_BITMASK(52, 63)); 228 rb |= ap << PPC_BITLSHIFT(58); 229 rs = lpid; 230 prs = 0; /* partition scoped */ 231 r = 1; /* radix format */ 232 233 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 234 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 235 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 236 } 237 238 239 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 240 unsigned long ap) 241 { 242 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 243 asm volatile("ptesync": : :"memory"); 244 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 245 } 246 247 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 248 asm volatile("ptesync": : :"memory"); 249 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 250 } 251 } 252 253 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 254 unsigned long ap) 255 { 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_pid(0, RIC_FLUSH_TLB); 259 } 260 261 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 262 asm volatile("ptesync": : :"memory"); 263 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 264 } 265 } 266 267 static inline void fixup_tlbie_va_range_lpid(unsigned long va, 268 unsigned long pid, 269 unsigned long lpid, 270 unsigned long ap) 271 { 272 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 273 asm volatile("ptesync" : : : "memory"); 274 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 275 } 276 277 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 278 asm volatile("ptesync" : : : "memory"); 279 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 280 } 281 } 282 283 static inline void fixup_tlbie_pid(unsigned long pid) 284 { 285 /* 286 * We can use any address for the invalidation, pick one which is 287 * probably unused as an optimisation. 288 */ 289 unsigned long va = ((1UL << 52) - 1); 290 291 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 292 asm volatile("ptesync": : :"memory"); 293 __tlbie_pid(0, RIC_FLUSH_TLB); 294 } 295 296 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 297 asm volatile("ptesync": : :"memory"); 298 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 299 } 300 } 301 302 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 303 { 304 /* 305 * We can use any address for the invalidation, pick one which is 306 * probably unused as an optimisation. 307 */ 308 unsigned long va = ((1UL << 52) - 1); 309 310 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 311 asm volatile("ptesync" : : : "memory"); 312 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 313 } 314 315 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 316 asm volatile("ptesync" : : : "memory"); 317 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 318 RIC_FLUSH_TLB); 319 } 320 } 321 322 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 323 unsigned long ap) 324 { 325 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 326 asm volatile("ptesync": : :"memory"); 327 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 328 } 329 330 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 331 asm volatile("ptesync": : :"memory"); 332 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 333 } 334 } 335 336 static inline void fixup_tlbie_lpid(unsigned long lpid) 337 { 338 /* 339 * We can use any address for the invalidation, pick one which is 340 * probably unused as an optimisation. 341 */ 342 unsigned long va = ((1UL << 52) - 1); 343 344 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 345 asm volatile("ptesync": : :"memory"); 346 __tlbie_lpid(0, RIC_FLUSH_TLB); 347 } 348 349 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 350 asm volatile("ptesync": : :"memory"); 351 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 352 } 353 } 354 355 /* 356 * We use 128 set in radix mode and 256 set in hpt mode. 357 */ 358 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 359 { 360 int set; 361 362 asm volatile("ptesync": : :"memory"); 363 364 switch (ric) { 365 case RIC_FLUSH_PWC: 366 367 /* For PWC, only one flush is needed */ 368 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 369 ppc_after_tlbiel_barrier(); 370 return; 371 case RIC_FLUSH_TLB: 372 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 373 break; 374 case RIC_FLUSH_ALL: 375 default: 376 /* 377 * Flush the first set of the TLB, and if 378 * we're doing a RIC_FLUSH_ALL, also flush 379 * the entire Page Walk Cache. 380 */ 381 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 382 } 383 384 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 385 /* For the remaining sets, just flush the TLB */ 386 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 387 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 388 } 389 390 ppc_after_tlbiel_barrier(); 391 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 392 } 393 394 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 395 { 396 asm volatile("ptesync": : :"memory"); 397 398 /* 399 * Workaround the fact that the "ric" argument to __tlbie_pid 400 * must be a compile-time constraint to match the "i" constraint 401 * in the asm statement. 402 */ 403 switch (ric) { 404 case RIC_FLUSH_TLB: 405 __tlbie_pid(pid, RIC_FLUSH_TLB); 406 fixup_tlbie_pid(pid); 407 break; 408 case RIC_FLUSH_PWC: 409 __tlbie_pid(pid, RIC_FLUSH_PWC); 410 break; 411 case RIC_FLUSH_ALL: 412 default: 413 __tlbie_pid(pid, RIC_FLUSH_ALL); 414 fixup_tlbie_pid(pid); 415 } 416 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 417 } 418 419 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 420 unsigned long ric) 421 { 422 asm volatile("ptesync" : : : "memory"); 423 424 /* 425 * Workaround the fact that the "ric" argument to __tlbie_pid 426 * must be a compile-time contraint to match the "i" constraint 427 * in the asm statement. 428 */ 429 switch (ric) { 430 case RIC_FLUSH_TLB: 431 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 432 fixup_tlbie_pid_lpid(pid, lpid); 433 break; 434 case RIC_FLUSH_PWC: 435 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 436 break; 437 case RIC_FLUSH_ALL: 438 default: 439 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 440 fixup_tlbie_pid_lpid(pid, lpid); 441 } 442 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 443 } 444 struct tlbiel_pid { 445 unsigned long pid; 446 unsigned long ric; 447 }; 448 449 static void do_tlbiel_pid(void *info) 450 { 451 struct tlbiel_pid *t = info; 452 453 if (t->ric == RIC_FLUSH_TLB) 454 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 455 else if (t->ric == RIC_FLUSH_PWC) 456 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 457 else 458 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 459 } 460 461 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 462 unsigned long pid, unsigned long ric) 463 { 464 struct cpumask *cpus = mm_cpumask(mm); 465 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 466 467 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 468 /* 469 * Always want the CPU translations to be invalidated with tlbiel in 470 * these paths, so while coprocessors must use tlbie, we can not 471 * optimise away the tlbiel component. 472 */ 473 if (atomic_read(&mm->context.copros) > 0) 474 _tlbie_pid(pid, RIC_FLUSH_ALL); 475 } 476 477 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 478 { 479 asm volatile("ptesync": : :"memory"); 480 481 /* 482 * Workaround the fact that the "ric" argument to __tlbie_pid 483 * must be a compile-time contraint to match the "i" constraint 484 * in the asm statement. 485 */ 486 switch (ric) { 487 case RIC_FLUSH_TLB: 488 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 489 fixup_tlbie_lpid(lpid); 490 break; 491 case RIC_FLUSH_PWC: 492 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 493 break; 494 case RIC_FLUSH_ALL: 495 default: 496 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 497 fixup_tlbie_lpid(lpid); 498 } 499 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 500 } 501 502 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 503 { 504 /* 505 * Workaround the fact that the "ric" argument to __tlbie_pid 506 * must be a compile-time contraint to match the "i" constraint 507 * in the asm statement. 508 */ 509 switch (ric) { 510 case RIC_FLUSH_TLB: 511 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 512 break; 513 case RIC_FLUSH_PWC: 514 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 515 break; 516 case RIC_FLUSH_ALL: 517 default: 518 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 519 } 520 fixup_tlbie_lpid(lpid); 521 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 522 } 523 524 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 525 unsigned long pid, unsigned long page_size, 526 unsigned long psize) 527 { 528 unsigned long addr; 529 unsigned long ap = mmu_get_ap(psize); 530 531 for (addr = start; addr < end; addr += page_size) 532 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 533 } 534 535 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 536 unsigned long psize, unsigned long ric) 537 { 538 unsigned long ap = mmu_get_ap(psize); 539 540 asm volatile("ptesync": : :"memory"); 541 __tlbiel_va(va, pid, ap, ric); 542 ppc_after_tlbiel_barrier(); 543 } 544 545 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 546 unsigned long pid, unsigned long page_size, 547 unsigned long psize, bool also_pwc) 548 { 549 asm volatile("ptesync": : :"memory"); 550 if (also_pwc) 551 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 552 __tlbiel_va_range(start, end, pid, page_size, psize); 553 ppc_after_tlbiel_barrier(); 554 } 555 556 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 557 unsigned long pid, unsigned long page_size, 558 unsigned long psize) 559 { 560 unsigned long addr; 561 unsigned long ap = mmu_get_ap(psize); 562 563 for (addr = start; addr < end; addr += page_size) 564 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 565 566 fixup_tlbie_va_range(addr - page_size, pid, ap); 567 } 568 569 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 570 unsigned long pid, unsigned long lpid, 571 unsigned long page_size, 572 unsigned long psize) 573 { 574 unsigned long addr; 575 unsigned long ap = mmu_get_ap(psize); 576 577 for (addr = start; addr < end; addr += page_size) 578 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 579 580 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 581 } 582 583 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 584 unsigned long psize, unsigned long ric) 585 { 586 unsigned long ap = mmu_get_ap(psize); 587 588 asm volatile("ptesync": : :"memory"); 589 __tlbie_va(va, pid, ap, ric); 590 fixup_tlbie_va(va, pid, ap); 591 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 592 } 593 594 struct tlbiel_va { 595 unsigned long pid; 596 unsigned long va; 597 unsigned long psize; 598 unsigned long ric; 599 }; 600 601 static void do_tlbiel_va(void *info) 602 { 603 struct tlbiel_va *t = info; 604 605 if (t->ric == RIC_FLUSH_TLB) 606 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 607 else if (t->ric == RIC_FLUSH_PWC) 608 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 609 else 610 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 611 } 612 613 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 614 unsigned long va, unsigned long pid, 615 unsigned long psize, unsigned long ric) 616 { 617 struct cpumask *cpus = mm_cpumask(mm); 618 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 619 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 620 if (atomic_read(&mm->context.copros) > 0) 621 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 622 } 623 624 struct tlbiel_va_range { 625 unsigned long pid; 626 unsigned long start; 627 unsigned long end; 628 unsigned long page_size; 629 unsigned long psize; 630 bool also_pwc; 631 }; 632 633 static void do_tlbiel_va_range(void *info) 634 { 635 struct tlbiel_va_range *t = info; 636 637 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 638 t->psize, t->also_pwc); 639 } 640 641 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 642 unsigned long psize, unsigned long ric) 643 { 644 unsigned long ap = mmu_get_ap(psize); 645 646 asm volatile("ptesync": : :"memory"); 647 __tlbie_lpid_va(va, lpid, ap, ric); 648 fixup_tlbie_lpid_va(va, lpid, ap); 649 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 650 } 651 652 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 653 unsigned long pid, unsigned long page_size, 654 unsigned long psize, bool also_pwc) 655 { 656 asm volatile("ptesync": : :"memory"); 657 if (also_pwc) 658 __tlbie_pid(pid, RIC_FLUSH_PWC); 659 __tlbie_va_range(start, end, pid, page_size, psize); 660 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 661 } 662 663 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 664 unsigned long pid, unsigned long lpid, 665 unsigned long page_size, 666 unsigned long psize, bool also_pwc) 667 { 668 asm volatile("ptesync" : : : "memory"); 669 if (also_pwc) 670 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 671 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 672 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 673 } 674 675 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 676 unsigned long start, unsigned long end, 677 unsigned long pid, unsigned long page_size, 678 unsigned long psize, bool also_pwc) 679 { 680 struct cpumask *cpus = mm_cpumask(mm); 681 struct tlbiel_va_range t = { .start = start, .end = end, 682 .pid = pid, .page_size = page_size, 683 .psize = psize, .also_pwc = also_pwc }; 684 685 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 686 if (atomic_read(&mm->context.copros) > 0) 687 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 688 } 689 690 /* 691 * Base TLB flushing operations: 692 * 693 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 694 * - flush_tlb_page(vma, vmaddr) flushes one page 695 * - flush_tlb_range(vma, start, end) flushes a range of pages 696 * - flush_tlb_kernel_range(start, end) flushes kernel pages 697 * 698 * - local_* variants of page and mm only apply to the current 699 * processor 700 */ 701 void radix__local_flush_tlb_mm(struct mm_struct *mm) 702 { 703 unsigned long pid = mm->context.id; 704 705 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 706 return; 707 708 preempt_disable(); 709 _tlbiel_pid(pid, RIC_FLUSH_TLB); 710 preempt_enable(); 711 } 712 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 713 714 #ifndef CONFIG_SMP 715 void radix__local_flush_all_mm(struct mm_struct *mm) 716 { 717 unsigned long pid = mm->context.id; 718 719 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 720 return; 721 722 preempt_disable(); 723 _tlbiel_pid(pid, RIC_FLUSH_ALL); 724 preempt_enable(); 725 } 726 EXPORT_SYMBOL(radix__local_flush_all_mm); 727 728 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 729 { 730 radix__local_flush_all_mm(mm); 731 } 732 #endif /* CONFIG_SMP */ 733 734 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 735 int psize) 736 { 737 unsigned long pid = mm->context.id; 738 739 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 740 return; 741 742 preempt_disable(); 743 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 744 preempt_enable(); 745 } 746 747 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 748 { 749 #ifdef CONFIG_HUGETLB_PAGE 750 /* need the return fix for nohash.c */ 751 if (is_vm_hugetlb_page(vma)) 752 return radix__local_flush_hugetlb_page(vma, vmaddr); 753 #endif 754 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 755 } 756 EXPORT_SYMBOL(radix__local_flush_tlb_page); 757 758 static bool mm_needs_flush_escalation(struct mm_struct *mm) 759 { 760 /* 761 * The P9 nest MMU has issues with the page walk cache caching PTEs 762 * and not flushing them when RIC = 0 for a PID/LPID invalidate. 763 * 764 * This may have been fixed in shipping firmware (by disabling PWC 765 * or preventing it from caching PTEs), but until that is confirmed, 766 * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes 767 * to RIC=2. 768 * 769 * POWER10 (and P9P) does not have this problem. 770 */ 771 if (cpu_has_feature(CPU_FTR_ARCH_31)) 772 return false; 773 if (atomic_read(&mm->context.copros) > 0) 774 return true; 775 return false; 776 } 777 778 /* 779 * If always_flush is true, then flush even if this CPU can't be removed 780 * from mm_cpumask. 781 */ 782 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 783 { 784 unsigned long pid = mm->context.id; 785 int cpu = smp_processor_id(); 786 787 /* 788 * A kthread could have done a mmget_not_zero() after the flushing CPU 789 * checked mm_cpumask, and be in the process of kthread_use_mm when 790 * interrupted here. In that case, current->mm will be set to mm, 791 * because kthread_use_mm() setting ->mm and switching to the mm is 792 * done with interrupts off. 793 */ 794 if (current->mm == mm) 795 goto out; 796 797 if (current->active_mm == mm) { 798 unsigned long flags; 799 800 WARN_ON_ONCE(current->mm != NULL); 801 /* 802 * It is a kernel thread and is using mm as the lazy tlb, so 803 * switch it to init_mm. This is not always called from IPI 804 * (e.g., flush_type_needed), so must disable irqs. 805 */ 806 local_irq_save(flags); 807 mmgrab_lazy_tlb(&init_mm); 808 current->active_mm = &init_mm; 809 switch_mm_irqs_off(mm, &init_mm, current); 810 mmdrop_lazy_tlb(mm); 811 local_irq_restore(flags); 812 } 813 814 /* 815 * This IPI may be initiated from any source including those not 816 * running the mm, so there may be a racing IPI that comes after 817 * this one which finds the cpumask already clear. Check and avoid 818 * underflowing the active_cpus count in that case. The race should 819 * not otherwise be a problem, but the TLB must be flushed because 820 * that's what the caller expects. 821 */ 822 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 823 atomic_dec(&mm->context.active_cpus); 824 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 825 always_flush = true; 826 } 827 828 out: 829 if (always_flush) 830 _tlbiel_pid(pid, RIC_FLUSH_ALL); 831 } 832 833 #ifdef CONFIG_SMP 834 static void do_exit_flush_lazy_tlb(void *arg) 835 { 836 struct mm_struct *mm = arg; 837 exit_lazy_flush_tlb(mm, true); 838 } 839 840 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 841 { 842 /* 843 * Would be nice if this was async so it could be run in 844 * parallel with our local flush, but generic code does not 845 * give a good API for it. Could extend the generic code or 846 * make a special powerpc IPI for flushing TLBs. 847 * For now it's not too performance critical. 848 */ 849 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 850 (void *)mm, 1); 851 } 852 853 #else /* CONFIG_SMP */ 854 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 855 #endif /* CONFIG_SMP */ 856 857 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 858 859 /* 860 * Interval between flushes at which we send out IPIs to check whether the 861 * mm_cpumask can be trimmed for the case where it's not a single-threaded 862 * process flushing its own mm. The intent is to reduce the cost of later 863 * flushes. Don't want this to be so low that it adds noticable cost to TLB 864 * flushing, or so high that it doesn't help reduce global TLBIEs. 865 */ 866 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 867 868 static bool tick_and_test_trim_clock(void) 869 { 870 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 871 tlb_mm_cpumask_trim_timer) { 872 __this_cpu_write(mm_cpumask_trim_clock, 0); 873 return true; 874 } 875 return false; 876 } 877 878 enum tlb_flush_type { 879 FLUSH_TYPE_NONE, 880 FLUSH_TYPE_LOCAL, 881 FLUSH_TYPE_GLOBAL, 882 }; 883 884 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 885 { 886 int active_cpus = atomic_read(&mm->context.active_cpus); 887 int cpu = smp_processor_id(); 888 889 if (active_cpus == 0) 890 return FLUSH_TYPE_NONE; 891 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 892 if (current->mm != mm) { 893 /* 894 * Asynchronous flush sources may trim down to nothing 895 * if the process is not running, so occasionally try 896 * to trim. 897 */ 898 if (tick_and_test_trim_clock()) { 899 exit_lazy_flush_tlb(mm, true); 900 return FLUSH_TYPE_NONE; 901 } 902 } 903 return FLUSH_TYPE_LOCAL; 904 } 905 906 /* Coprocessors require TLBIE to invalidate nMMU. */ 907 if (atomic_read(&mm->context.copros) > 0) 908 return FLUSH_TYPE_GLOBAL; 909 910 /* 911 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 912 * because the mm is being taken down anyway, and a TLBIE tends to 913 * be faster than an IPI+TLBIEL. 914 */ 915 if (fullmm) 916 return FLUSH_TYPE_GLOBAL; 917 918 /* 919 * If we are running the only thread of a single-threaded process, 920 * then we should almost always be able to trim off the rest of the 921 * CPU mask (except in the case of use_mm() races), so always try 922 * trimming the mask. 923 */ 924 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 925 exit_flush_lazy_tlbs(mm); 926 /* 927 * use_mm() race could prevent IPIs from being able to clear 928 * the cpumask here, however those users are established 929 * after our first check (and so after the PTEs are removed), 930 * and the TLB still gets flushed by the IPI, so this CPU 931 * will only require a local flush. 932 */ 933 return FLUSH_TYPE_LOCAL; 934 } 935 936 /* 937 * Occasionally try to trim down the cpumask. It's possible this can 938 * bring the mask to zero, which results in no flush. 939 */ 940 if (tick_and_test_trim_clock()) { 941 exit_flush_lazy_tlbs(mm); 942 if (current->mm == mm) 943 return FLUSH_TYPE_LOCAL; 944 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 945 exit_lazy_flush_tlb(mm, true); 946 return FLUSH_TYPE_NONE; 947 } 948 949 return FLUSH_TYPE_GLOBAL; 950 } 951 952 #ifdef CONFIG_SMP 953 void radix__flush_tlb_mm(struct mm_struct *mm) 954 { 955 unsigned long pid; 956 enum tlb_flush_type type; 957 958 pid = mm->context.id; 959 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 960 return; 961 962 preempt_disable(); 963 /* 964 * Order loads of mm_cpumask (in flush_type_needed) vs previous 965 * stores to clear ptes before the invalidate. See barrier in 966 * switch_mm_irqs_off 967 */ 968 smp_mb(); 969 type = flush_type_needed(mm, false); 970 if (type == FLUSH_TYPE_LOCAL) { 971 _tlbiel_pid(pid, RIC_FLUSH_TLB); 972 } else if (type == FLUSH_TYPE_GLOBAL) { 973 if (!mmu_has_feature(MMU_FTR_GTSE)) { 974 unsigned long tgt = H_RPTI_TARGET_CMMU; 975 976 if (atomic_read(&mm->context.copros) > 0) 977 tgt |= H_RPTI_TARGET_NMMU; 978 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 979 H_RPTI_PAGE_ALL, 0, -1UL); 980 } else if (cputlb_use_tlbie()) { 981 if (mm_needs_flush_escalation(mm)) 982 _tlbie_pid(pid, RIC_FLUSH_ALL); 983 else 984 _tlbie_pid(pid, RIC_FLUSH_TLB); 985 } else { 986 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 987 } 988 } 989 preempt_enable(); 990 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 991 } 992 EXPORT_SYMBOL(radix__flush_tlb_mm); 993 994 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 995 { 996 unsigned long pid; 997 enum tlb_flush_type type; 998 999 pid = mm->context.id; 1000 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1001 return; 1002 1003 preempt_disable(); 1004 smp_mb(); /* see radix__flush_tlb_mm */ 1005 type = flush_type_needed(mm, fullmm); 1006 if (type == FLUSH_TYPE_LOCAL) { 1007 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1008 } else if (type == FLUSH_TYPE_GLOBAL) { 1009 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1010 unsigned long tgt = H_RPTI_TARGET_CMMU; 1011 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1012 H_RPTI_TYPE_PRT; 1013 1014 if (atomic_read(&mm->context.copros) > 0) 1015 tgt |= H_RPTI_TARGET_NMMU; 1016 pseries_rpt_invalidate(pid, tgt, type, 1017 H_RPTI_PAGE_ALL, 0, -1UL); 1018 } else if (cputlb_use_tlbie()) 1019 _tlbie_pid(pid, RIC_FLUSH_ALL); 1020 else 1021 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1022 } 1023 preempt_enable(); 1024 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 1025 } 1026 1027 void radix__flush_all_mm(struct mm_struct *mm) 1028 { 1029 __flush_all_mm(mm, false); 1030 } 1031 EXPORT_SYMBOL(radix__flush_all_mm); 1032 1033 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 1034 int psize) 1035 { 1036 unsigned long pid; 1037 enum tlb_flush_type type; 1038 1039 pid = mm->context.id; 1040 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1041 return; 1042 1043 preempt_disable(); 1044 smp_mb(); /* see radix__flush_tlb_mm */ 1045 type = flush_type_needed(mm, false); 1046 if (type == FLUSH_TYPE_LOCAL) { 1047 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1048 } else if (type == FLUSH_TYPE_GLOBAL) { 1049 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1050 unsigned long tgt, pg_sizes, size; 1051 1052 tgt = H_RPTI_TARGET_CMMU; 1053 pg_sizes = psize_to_rpti_pgsize(psize); 1054 size = 1UL << mmu_psize_to_shift(psize); 1055 1056 if (atomic_read(&mm->context.copros) > 0) 1057 tgt |= H_RPTI_TARGET_NMMU; 1058 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 1059 pg_sizes, vmaddr, 1060 vmaddr + size); 1061 } else if (cputlb_use_tlbie()) 1062 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1063 else 1064 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 1065 } 1066 preempt_enable(); 1067 } 1068 1069 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 1070 { 1071 #ifdef CONFIG_HUGETLB_PAGE 1072 if (is_vm_hugetlb_page(vma)) 1073 return radix__flush_hugetlb_page(vma, vmaddr); 1074 #endif 1075 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 1076 } 1077 EXPORT_SYMBOL(radix__flush_tlb_page); 1078 1079 #endif /* CONFIG_SMP */ 1080 1081 static void do_tlbiel_kernel(void *info) 1082 { 1083 _tlbiel_pid(0, RIC_FLUSH_ALL); 1084 } 1085 1086 static inline void _tlbiel_kernel_broadcast(void) 1087 { 1088 on_each_cpu(do_tlbiel_kernel, NULL, 1); 1089 if (tlbie_capable) { 1090 /* 1091 * Coherent accelerators don't refcount kernel memory mappings, 1092 * so have to always issue a tlbie for them. This is quite a 1093 * slow path anyway. 1094 */ 1095 _tlbie_pid(0, RIC_FLUSH_ALL); 1096 } 1097 } 1098 1099 /* 1100 * If kernel TLBIs ever become local rather than global, then 1101 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 1102 * assumes kernel TLBIs are global. 1103 */ 1104 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 1105 { 1106 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1107 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 1108 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1109 H_RPTI_TYPE_PRT; 1110 1111 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 1112 start, end); 1113 } else if (cputlb_use_tlbie()) 1114 _tlbie_pid(0, RIC_FLUSH_ALL); 1115 else 1116 _tlbiel_kernel_broadcast(); 1117 } 1118 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1119 1120 /* 1121 * Doesn't appear to be used anywhere. Remove. 1122 */ 1123 #define TLB_FLUSH_ALL -1UL 1124 1125 /* 1126 * Number of pages above which we invalidate the entire PID rather than 1127 * flush individual pages, for local and global flushes respectively. 1128 * 1129 * tlbie goes out to the interconnect and individual ops are more costly. 1130 * It also does not iterate over sets like the local tlbiel variant when 1131 * invalidating a full PID, so it has a far lower threshold to change from 1132 * individual page flushes to full-pid flushes. 1133 */ 1134 static u32 tlb_single_page_flush_ceiling __read_mostly = 33; 1135 static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1136 1137 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1138 unsigned long start, unsigned long end) 1139 { 1140 unsigned long pid; 1141 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1142 unsigned long page_size = 1UL << page_shift; 1143 unsigned long nr_pages = (end - start) >> page_shift; 1144 bool flush_pid, flush_pwc = false; 1145 enum tlb_flush_type type; 1146 1147 pid = mm->context.id; 1148 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1149 return; 1150 1151 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1152 1153 preempt_disable(); 1154 smp_mb(); /* see radix__flush_tlb_mm */ 1155 type = flush_type_needed(mm, false); 1156 if (type == FLUSH_TYPE_NONE) 1157 goto out; 1158 1159 if (type == FLUSH_TYPE_GLOBAL) 1160 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1161 else 1162 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1163 /* 1164 * full pid flush already does the PWC flush. if it is not full pid 1165 * flush check the range is more than PMD and force a pwc flush 1166 * mremap() depends on this behaviour. 1167 */ 1168 if (!flush_pid && (end - start) >= PMD_SIZE) 1169 flush_pwc = true; 1170 1171 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1172 unsigned long type = H_RPTI_TYPE_TLB; 1173 unsigned long tgt = H_RPTI_TARGET_CMMU; 1174 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1175 1176 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1177 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1178 if (atomic_read(&mm->context.copros) > 0) 1179 tgt |= H_RPTI_TARGET_NMMU; 1180 if (flush_pwc) 1181 type |= H_RPTI_TYPE_PWC; 1182 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1183 } else if (flush_pid) { 1184 /* 1185 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL 1186 */ 1187 if (type == FLUSH_TYPE_LOCAL) { 1188 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1189 } else { 1190 if (cputlb_use_tlbie()) { 1191 _tlbie_pid(pid, RIC_FLUSH_ALL); 1192 } else { 1193 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1194 } 1195 } 1196 } else { 1197 bool hflush; 1198 unsigned long hstart, hend; 1199 1200 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1201 hend = end & PMD_MASK; 1202 hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend; 1203 1204 if (type == FLUSH_TYPE_LOCAL) { 1205 asm volatile("ptesync": : :"memory"); 1206 if (flush_pwc) 1207 /* For PWC, only one flush is needed */ 1208 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 1209 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1210 if (hflush) 1211 __tlbiel_va_range(hstart, hend, pid, 1212 PMD_SIZE, MMU_PAGE_2M); 1213 ppc_after_tlbiel_barrier(); 1214 } else if (cputlb_use_tlbie()) { 1215 asm volatile("ptesync": : :"memory"); 1216 if (flush_pwc) 1217 __tlbie_pid(pid, RIC_FLUSH_PWC); 1218 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1219 if (hflush) 1220 __tlbie_va_range(hstart, hend, pid, 1221 PMD_SIZE, MMU_PAGE_2M); 1222 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1223 } else { 1224 _tlbiel_va_range_multicast(mm, 1225 start, end, pid, page_size, mmu_virtual_psize, flush_pwc); 1226 if (hflush) 1227 _tlbiel_va_range_multicast(mm, 1228 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc); 1229 } 1230 } 1231 out: 1232 preempt_enable(); 1233 mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); 1234 } 1235 1236 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1237 unsigned long end) 1238 1239 { 1240 #ifdef CONFIG_HUGETLB_PAGE 1241 if (is_vm_hugetlb_page(vma)) 1242 return radix__flush_hugetlb_tlb_range(vma, start, end); 1243 #endif 1244 1245 __radix__flush_tlb_range(vma->vm_mm, start, end); 1246 } 1247 EXPORT_SYMBOL(radix__flush_tlb_range); 1248 1249 static int radix_get_mmu_psize(int page_size) 1250 { 1251 int psize; 1252 1253 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1254 psize = mmu_virtual_psize; 1255 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1256 psize = MMU_PAGE_2M; 1257 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1258 psize = MMU_PAGE_1G; 1259 else 1260 return -1; 1261 return psize; 1262 } 1263 1264 /* 1265 * Flush partition scoped LPID address translation for all CPUs. 1266 */ 1267 void radix__flush_tlb_lpid_page(unsigned int lpid, 1268 unsigned long addr, 1269 unsigned long page_size) 1270 { 1271 int psize = radix_get_mmu_psize(page_size); 1272 1273 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1274 } 1275 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1276 1277 /* 1278 * Flush partition scoped PWC from LPID for all CPUs. 1279 */ 1280 void radix__flush_pwc_lpid(unsigned int lpid) 1281 { 1282 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1283 } 1284 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1285 1286 /* 1287 * Flush partition scoped translations from LPID (=LPIDR) 1288 */ 1289 void radix__flush_all_lpid(unsigned int lpid) 1290 { 1291 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1292 } 1293 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1294 1295 /* 1296 * Flush process scoped translations from LPID (=LPIDR) 1297 */ 1298 void radix__flush_all_lpid_guest(unsigned int lpid) 1299 { 1300 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1301 } 1302 1303 void radix__tlb_flush(struct mmu_gather *tlb) 1304 { 1305 int psize = 0; 1306 struct mm_struct *mm = tlb->mm; 1307 int page_size = tlb->page_size; 1308 unsigned long start = tlb->start; 1309 unsigned long end = tlb->end; 1310 1311 /* 1312 * if page size is not something we understand, do a full mm flush 1313 * 1314 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1315 * that flushes the process table entry cache upon process teardown. 1316 * See the comment for radix in arch_exit_mmap(). 1317 */ 1318 if (tlb->fullmm) { 1319 __flush_all_mm(mm, true); 1320 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1321 if (!tlb->freed_tables) 1322 radix__flush_tlb_mm(mm); 1323 else 1324 radix__flush_all_mm(mm); 1325 } else { 1326 if (!tlb->freed_tables) 1327 radix__flush_tlb_range_psize(mm, start, end, psize); 1328 else 1329 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1330 } 1331 } 1332 1333 static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1334 unsigned long start, unsigned long end, 1335 int psize, bool also_pwc) 1336 { 1337 unsigned long pid; 1338 unsigned int page_shift = mmu_psize_defs[psize].shift; 1339 unsigned long page_size = 1UL << page_shift; 1340 unsigned long nr_pages = (end - start) >> page_shift; 1341 bool flush_pid; 1342 enum tlb_flush_type type; 1343 1344 pid = mm->context.id; 1345 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1346 return; 1347 1348 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1349 1350 preempt_disable(); 1351 smp_mb(); /* see radix__flush_tlb_mm */ 1352 type = flush_type_needed(mm, false); 1353 if (type == FLUSH_TYPE_NONE) 1354 goto out; 1355 1356 if (type == FLUSH_TYPE_GLOBAL) 1357 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1358 else 1359 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1360 1361 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1362 unsigned long tgt = H_RPTI_TARGET_CMMU; 1363 unsigned long type = H_RPTI_TYPE_TLB; 1364 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1365 1366 if (also_pwc) 1367 type |= H_RPTI_TYPE_PWC; 1368 if (atomic_read(&mm->context.copros) > 0) 1369 tgt |= H_RPTI_TARGET_NMMU; 1370 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1371 } else if (flush_pid) { 1372 if (type == FLUSH_TYPE_LOCAL) { 1373 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1374 } else { 1375 if (cputlb_use_tlbie()) { 1376 if (mm_needs_flush_escalation(mm)) 1377 also_pwc = true; 1378 1379 _tlbie_pid(pid, 1380 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1381 } else { 1382 _tlbiel_pid_multicast(mm, pid, 1383 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1384 } 1385 1386 } 1387 } else { 1388 if (type == FLUSH_TYPE_LOCAL) 1389 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1390 else if (cputlb_use_tlbie()) 1391 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1392 else 1393 _tlbiel_va_range_multicast(mm, 1394 start, end, pid, page_size, psize, also_pwc); 1395 } 1396 out: 1397 preempt_enable(); 1398 mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); 1399 } 1400 1401 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1402 unsigned long end, int psize) 1403 { 1404 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1405 } 1406 1407 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1408 unsigned long end, int psize) 1409 { 1410 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1411 } 1412 1413 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1414 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1415 { 1416 unsigned long pid, end; 1417 enum tlb_flush_type type; 1418 1419 pid = mm->context.id; 1420 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1421 return; 1422 1423 /* 4k page size, just blow the world */ 1424 if (PAGE_SIZE == 0x1000) { 1425 radix__flush_all_mm(mm); 1426 return; 1427 } 1428 1429 end = addr + HPAGE_PMD_SIZE; 1430 1431 /* Otherwise first do the PWC, then iterate the pages. */ 1432 preempt_disable(); 1433 smp_mb(); /* see radix__flush_tlb_mm */ 1434 type = flush_type_needed(mm, false); 1435 if (type == FLUSH_TYPE_LOCAL) { 1436 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1437 } else if (type == FLUSH_TYPE_GLOBAL) { 1438 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1439 unsigned long tgt, type, pg_sizes; 1440 1441 tgt = H_RPTI_TARGET_CMMU; 1442 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1443 H_RPTI_TYPE_PRT; 1444 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1445 1446 if (atomic_read(&mm->context.copros) > 0) 1447 tgt |= H_RPTI_TARGET_NMMU; 1448 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1449 addr, end); 1450 } else if (cputlb_use_tlbie()) 1451 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1452 else 1453 _tlbiel_va_range_multicast(mm, 1454 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1455 } 1456 1457 preempt_enable(); 1458 } 1459 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1460 1461 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1462 unsigned long start, unsigned long end) 1463 { 1464 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1465 } 1466 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1467 1468 void radix__flush_pud_tlb_range(struct vm_area_struct *vma, 1469 unsigned long start, unsigned long end) 1470 { 1471 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G); 1472 } 1473 EXPORT_SYMBOL(radix__flush_pud_tlb_range); 1474 1475 void radix__flush_tlb_all(void) 1476 { 1477 unsigned long rb,prs,r,rs; 1478 unsigned long ric = RIC_FLUSH_ALL; 1479 1480 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1481 prs = 0; /* partition scoped */ 1482 r = 1; /* radix format */ 1483 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1484 1485 asm volatile("ptesync": : :"memory"); 1486 /* 1487 * now flush guest entries by passing PRS = 1 and LPID != 0 1488 */ 1489 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1490 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1491 /* 1492 * now flush host entires by passing PRS = 0 and LPID == 0 1493 */ 1494 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1495 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1496 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1497 } 1498 1499 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1500 /* 1501 * Performs process-scoped invalidations for a given LPID 1502 * as part of H_RPT_INVALIDATE hcall. 1503 */ 1504 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1505 unsigned long type, unsigned long pg_sizes, 1506 unsigned long start, unsigned long end) 1507 { 1508 unsigned long psize, nr_pages; 1509 struct mmu_psize_def *def; 1510 bool flush_pid; 1511 1512 /* 1513 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1514 * do a single IS=1 based flush. 1515 */ 1516 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1517 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1518 return; 1519 } 1520 1521 if (type & H_RPTI_TYPE_PWC) 1522 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1523 1524 /* Full PID flush */ 1525 if (start == 0 && end == -1) 1526 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1527 1528 /* Do range invalidation for all the valid page sizes */ 1529 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1530 def = &mmu_psize_defs[psize]; 1531 if (!(pg_sizes & def->h_rpt_pgsize)) 1532 continue; 1533 1534 nr_pages = (end - start) >> def->shift; 1535 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1536 1537 /* 1538 * If the number of pages spanning the range is above 1539 * the ceiling, convert the request into a full PID flush. 1540 * And since PID flush takes out all the page sizes, there 1541 * is no need to consider remaining page sizes. 1542 */ 1543 if (flush_pid) { 1544 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1545 return; 1546 } 1547 _tlbie_va_range_lpid(start, end, pid, lpid, 1548 (1UL << def->shift), psize, false); 1549 } 1550 } 1551 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1552 1553 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1554 1555 static int __init create_tlb_single_page_flush_ceiling(void) 1556 { 1557 debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, 1558 arch_debugfs_dir, &tlb_single_page_flush_ceiling); 1559 debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, 1560 arch_debugfs_dir, &tlb_local_single_page_flush_ceiling); 1561 return 0; 1562 } 1563 late_initcall(create_tlb_single_page_flush_ceiling); 1564 1565