1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 #include <linux/debugfs.h> 14 15 #include <asm/ppc-opcode.h> 16 #include <asm/tlb.h> 17 #include <asm/tlbflush.h> 18 #include <asm/trace.h> 19 #include <asm/cputhreads.h> 20 #include <asm/plpar_wrappers.h> 21 22 #include "internal.h" 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 59 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, 62 RIC_FLUSH_TLB, 0); 63 } 64 } 65 66 /* Flush process scoped entries. */ 67 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 68 69 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 70 for (set = 1; set < num_sets; set++) 71 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 72 } 73 74 ppc_after_tlbiel_barrier(); 75 } 76 77 void radix__tlbiel_all(unsigned int action) 78 { 79 unsigned int is; 80 81 switch (action) { 82 case TLB_INVAL_SCOPE_GLOBAL: 83 is = 3; 84 break; 85 case TLB_INVAL_SCOPE_LPID: 86 is = 2; 87 break; 88 default: 89 BUG(); 90 } 91 92 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 93 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 94 else 95 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 96 97 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 98 } 99 100 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 101 unsigned long ric) 102 { 103 unsigned long rb,rs,prs,r; 104 105 rb = PPC_BIT(53); /* IS = 1 */ 106 rb |= set << PPC_BITLSHIFT(51); 107 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 108 prs = 1; /* process scoped */ 109 r = 1; /* radix format */ 110 111 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 112 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 113 trace_tlbie(0, 1, rb, rs, ric, prs, r); 114 } 115 116 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 117 { 118 unsigned long rb,rs,prs,r; 119 120 rb = PPC_BIT(53); /* IS = 1 */ 121 rs = pid << PPC_BITLSHIFT(31); 122 prs = 1; /* process scoped */ 123 r = 1; /* radix format */ 124 125 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 126 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 127 trace_tlbie(0, 0, rb, rs, ric, prs, r); 128 } 129 130 static __always_inline void __tlbie_pid_lpid(unsigned long pid, 131 unsigned long lpid, 132 unsigned long ric) 133 { 134 unsigned long rb, rs, prs, r; 135 136 rb = PPC_BIT(53); /* IS = 1 */ 137 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 138 prs = 1; /* process scoped */ 139 r = 1; /* radix format */ 140 141 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 142 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 143 trace_tlbie(0, 0, rb, rs, ric, prs, r); 144 } 145 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 146 { 147 unsigned long rb,rs,prs,r; 148 149 rb = PPC_BIT(52); /* IS = 2 */ 150 rs = lpid; 151 prs = 0; /* partition scoped */ 152 r = 1; /* radix format */ 153 154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 155 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 156 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 157 } 158 159 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 160 { 161 unsigned long rb,rs,prs,r; 162 163 rb = PPC_BIT(52); /* IS = 2 */ 164 rs = lpid; 165 prs = 1; /* process scoped */ 166 r = 1; /* radix format */ 167 168 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 169 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 170 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 171 } 172 173 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 174 unsigned long ap, unsigned long ric) 175 { 176 unsigned long rb,rs,prs,r; 177 178 rb = va & ~(PPC_BITMASK(52, 63)); 179 rb |= ap << PPC_BITLSHIFT(58); 180 rs = pid << PPC_BITLSHIFT(31); 181 prs = 1; /* process scoped */ 182 r = 1; /* radix format */ 183 184 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 185 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 186 trace_tlbie(0, 1, rb, rs, ric, prs, r); 187 } 188 189 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 190 unsigned long ap, unsigned long ric) 191 { 192 unsigned long rb,rs,prs,r; 193 194 rb = va & ~(PPC_BITMASK(52, 63)); 195 rb |= ap << PPC_BITLSHIFT(58); 196 rs = pid << PPC_BITLSHIFT(31); 197 prs = 1; /* process scoped */ 198 r = 1; /* radix format */ 199 200 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 201 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 202 trace_tlbie(0, 0, rb, rs, ric, prs, r); 203 } 204 205 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 206 unsigned long lpid, 207 unsigned long ap, unsigned long ric) 208 { 209 unsigned long rb, rs, prs, r; 210 211 rb = va & ~(PPC_BITMASK(52, 63)); 212 rb |= ap << PPC_BITLSHIFT(58); 213 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 214 prs = 1; /* process scoped */ 215 r = 1; /* radix format */ 216 217 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 218 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 219 trace_tlbie(0, 0, rb, rs, ric, prs, r); 220 } 221 222 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 223 unsigned long ap, unsigned long ric) 224 { 225 unsigned long rb,rs,prs,r; 226 227 rb = va & ~(PPC_BITMASK(52, 63)); 228 rb |= ap << PPC_BITLSHIFT(58); 229 rs = lpid; 230 prs = 0; /* partition scoped */ 231 r = 1; /* radix format */ 232 233 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 234 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 235 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 236 } 237 238 239 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 240 unsigned long ap) 241 { 242 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 243 asm volatile("ptesync": : :"memory"); 244 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 245 } 246 247 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 248 asm volatile("ptesync": : :"memory"); 249 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 250 } 251 } 252 253 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 254 unsigned long ap) 255 { 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_pid(0, RIC_FLUSH_TLB); 259 } 260 261 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 262 asm volatile("ptesync": : :"memory"); 263 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 264 } 265 } 266 267 static inline void fixup_tlbie_va_range_lpid(unsigned long va, 268 unsigned long pid, 269 unsigned long lpid, 270 unsigned long ap) 271 { 272 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 273 asm volatile("ptesync" : : : "memory"); 274 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 275 } 276 277 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 278 asm volatile("ptesync" : : : "memory"); 279 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 280 } 281 } 282 283 static inline void fixup_tlbie_pid(unsigned long pid) 284 { 285 /* 286 * We can use any address for the invalidation, pick one which is 287 * probably unused as an optimisation. 288 */ 289 unsigned long va = ((1UL << 52) - 1); 290 291 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 292 asm volatile("ptesync": : :"memory"); 293 __tlbie_pid(0, RIC_FLUSH_TLB); 294 } 295 296 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 297 asm volatile("ptesync": : :"memory"); 298 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 299 } 300 } 301 302 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 303 { 304 /* 305 * We can use any address for the invalidation, pick one which is 306 * probably unused as an optimisation. 307 */ 308 unsigned long va = ((1UL << 52) - 1); 309 310 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 311 asm volatile("ptesync" : : : "memory"); 312 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 313 } 314 315 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 316 asm volatile("ptesync" : : : "memory"); 317 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 318 RIC_FLUSH_TLB); 319 } 320 } 321 322 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 323 unsigned long ap) 324 { 325 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 326 asm volatile("ptesync": : :"memory"); 327 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 328 } 329 330 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 331 asm volatile("ptesync": : :"memory"); 332 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 333 } 334 } 335 336 static inline void fixup_tlbie_lpid(unsigned long lpid) 337 { 338 /* 339 * We can use any address for the invalidation, pick one which is 340 * probably unused as an optimisation. 341 */ 342 unsigned long va = ((1UL << 52) - 1); 343 344 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 345 asm volatile("ptesync": : :"memory"); 346 __tlbie_lpid(0, RIC_FLUSH_TLB); 347 } 348 349 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 350 asm volatile("ptesync": : :"memory"); 351 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 352 } 353 } 354 355 /* 356 * We use 128 set in radix mode and 256 set in hpt mode. 357 */ 358 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 359 { 360 int set; 361 362 asm volatile("ptesync": : :"memory"); 363 364 switch (ric) { 365 case RIC_FLUSH_PWC: 366 367 /* For PWC, only one flush is needed */ 368 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 369 ppc_after_tlbiel_barrier(); 370 return; 371 case RIC_FLUSH_TLB: 372 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 373 break; 374 case RIC_FLUSH_ALL: 375 default: 376 /* 377 * Flush the first set of the TLB, and if 378 * we're doing a RIC_FLUSH_ALL, also flush 379 * the entire Page Walk Cache. 380 */ 381 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 382 } 383 384 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 385 /* For the remaining sets, just flush the TLB */ 386 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 387 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 388 } 389 390 ppc_after_tlbiel_barrier(); 391 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 392 } 393 394 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 395 { 396 asm volatile("ptesync": : :"memory"); 397 398 /* 399 * Workaround the fact that the "ric" argument to __tlbie_pid 400 * must be a compile-time constraint to match the "i" constraint 401 * in the asm statement. 402 */ 403 switch (ric) { 404 case RIC_FLUSH_TLB: 405 __tlbie_pid(pid, RIC_FLUSH_TLB); 406 fixup_tlbie_pid(pid); 407 break; 408 case RIC_FLUSH_PWC: 409 __tlbie_pid(pid, RIC_FLUSH_PWC); 410 break; 411 case RIC_FLUSH_ALL: 412 default: 413 __tlbie_pid(pid, RIC_FLUSH_ALL); 414 fixup_tlbie_pid(pid); 415 } 416 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 417 } 418 419 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 420 unsigned long ric) 421 { 422 asm volatile("ptesync" : : : "memory"); 423 424 /* 425 * Workaround the fact that the "ric" argument to __tlbie_pid 426 * must be a compile-time contraint to match the "i" constraint 427 * in the asm statement. 428 */ 429 switch (ric) { 430 case RIC_FLUSH_TLB: 431 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 432 fixup_tlbie_pid_lpid(pid, lpid); 433 break; 434 case RIC_FLUSH_PWC: 435 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 436 break; 437 case RIC_FLUSH_ALL: 438 default: 439 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 440 fixup_tlbie_pid_lpid(pid, lpid); 441 } 442 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 443 } 444 struct tlbiel_pid { 445 unsigned long pid; 446 unsigned long ric; 447 }; 448 449 static void do_tlbiel_pid(void *info) 450 { 451 struct tlbiel_pid *t = info; 452 453 if (t->ric == RIC_FLUSH_TLB) 454 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 455 else if (t->ric == RIC_FLUSH_PWC) 456 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 457 else 458 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 459 } 460 461 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 462 unsigned long pid, unsigned long ric) 463 { 464 struct cpumask *cpus = mm_cpumask(mm); 465 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 466 467 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 468 /* 469 * Always want the CPU translations to be invalidated with tlbiel in 470 * these paths, so while coprocessors must use tlbie, we can not 471 * optimise away the tlbiel component. 472 */ 473 if (atomic_read(&mm->context.copros) > 0) 474 _tlbie_pid(pid, RIC_FLUSH_ALL); 475 } 476 477 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 478 { 479 asm volatile("ptesync": : :"memory"); 480 481 /* 482 * Workaround the fact that the "ric" argument to __tlbie_pid 483 * must be a compile-time contraint to match the "i" constraint 484 * in the asm statement. 485 */ 486 switch (ric) { 487 case RIC_FLUSH_TLB: 488 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 489 fixup_tlbie_lpid(lpid); 490 break; 491 case RIC_FLUSH_PWC: 492 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 493 break; 494 case RIC_FLUSH_ALL: 495 default: 496 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 497 fixup_tlbie_lpid(lpid); 498 } 499 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 500 } 501 502 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 503 { 504 /* 505 * Workaround the fact that the "ric" argument to __tlbie_pid 506 * must be a compile-time contraint to match the "i" constraint 507 * in the asm statement. 508 */ 509 switch (ric) { 510 case RIC_FLUSH_TLB: 511 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 512 break; 513 case RIC_FLUSH_PWC: 514 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 515 break; 516 case RIC_FLUSH_ALL: 517 default: 518 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 519 } 520 fixup_tlbie_lpid(lpid); 521 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 522 } 523 524 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 525 unsigned long pid, unsigned long page_size, 526 unsigned long psize) 527 { 528 unsigned long addr; 529 unsigned long ap = mmu_get_ap(psize); 530 531 for (addr = start; addr < end; addr += page_size) 532 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 533 } 534 535 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 536 unsigned long psize, unsigned long ric) 537 { 538 unsigned long ap = mmu_get_ap(psize); 539 540 asm volatile("ptesync": : :"memory"); 541 __tlbiel_va(va, pid, ap, ric); 542 ppc_after_tlbiel_barrier(); 543 } 544 545 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 546 unsigned long pid, unsigned long page_size, 547 unsigned long psize, bool also_pwc) 548 { 549 asm volatile("ptesync": : :"memory"); 550 if (also_pwc) 551 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 552 __tlbiel_va_range(start, end, pid, page_size, psize); 553 ppc_after_tlbiel_barrier(); 554 } 555 556 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 557 unsigned long pid, unsigned long page_size, 558 unsigned long psize) 559 { 560 unsigned long addr; 561 unsigned long ap = mmu_get_ap(psize); 562 563 for (addr = start; addr < end; addr += page_size) 564 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 565 566 fixup_tlbie_va_range(addr - page_size, pid, ap); 567 } 568 569 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 570 unsigned long pid, unsigned long lpid, 571 unsigned long page_size, 572 unsigned long psize) 573 { 574 unsigned long addr; 575 unsigned long ap = mmu_get_ap(psize); 576 577 for (addr = start; addr < end; addr += page_size) 578 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 579 580 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 581 } 582 583 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 584 unsigned long psize, unsigned long ric) 585 { 586 unsigned long ap = mmu_get_ap(psize); 587 588 asm volatile("ptesync": : :"memory"); 589 __tlbie_va(va, pid, ap, ric); 590 fixup_tlbie_va(va, pid, ap); 591 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 592 } 593 594 struct tlbiel_va { 595 unsigned long pid; 596 unsigned long va; 597 unsigned long psize; 598 unsigned long ric; 599 }; 600 601 static void do_tlbiel_va(void *info) 602 { 603 struct tlbiel_va *t = info; 604 605 if (t->ric == RIC_FLUSH_TLB) 606 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 607 else if (t->ric == RIC_FLUSH_PWC) 608 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 609 else 610 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 611 } 612 613 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 614 unsigned long va, unsigned long pid, 615 unsigned long psize, unsigned long ric) 616 { 617 struct cpumask *cpus = mm_cpumask(mm); 618 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 619 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 620 if (atomic_read(&mm->context.copros) > 0) 621 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 622 } 623 624 struct tlbiel_va_range { 625 unsigned long pid; 626 unsigned long start; 627 unsigned long end; 628 unsigned long page_size; 629 unsigned long psize; 630 bool also_pwc; 631 }; 632 633 static void do_tlbiel_va_range(void *info) 634 { 635 struct tlbiel_va_range *t = info; 636 637 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 638 t->psize, t->also_pwc); 639 } 640 641 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 642 unsigned long psize, unsigned long ric) 643 { 644 unsigned long ap = mmu_get_ap(psize); 645 646 asm volatile("ptesync": : :"memory"); 647 __tlbie_lpid_va(va, lpid, ap, ric); 648 fixup_tlbie_lpid_va(va, lpid, ap); 649 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 650 } 651 652 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 653 unsigned long pid, unsigned long page_size, 654 unsigned long psize, bool also_pwc) 655 { 656 asm volatile("ptesync": : :"memory"); 657 if (also_pwc) 658 __tlbie_pid(pid, RIC_FLUSH_PWC); 659 __tlbie_va_range(start, end, pid, page_size, psize); 660 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 661 } 662 663 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 664 unsigned long pid, unsigned long lpid, 665 unsigned long page_size, 666 unsigned long psize, bool also_pwc) 667 { 668 asm volatile("ptesync" : : : "memory"); 669 if (also_pwc) 670 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 671 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 672 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 673 } 674 675 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 676 unsigned long start, unsigned long end, 677 unsigned long pid, unsigned long page_size, 678 unsigned long psize, bool also_pwc) 679 { 680 struct cpumask *cpus = mm_cpumask(mm); 681 struct tlbiel_va_range t = { .start = start, .end = end, 682 .pid = pid, .page_size = page_size, 683 .psize = psize, .also_pwc = also_pwc }; 684 685 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 686 if (atomic_read(&mm->context.copros) > 0) 687 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 688 } 689 690 /* 691 * Base TLB flushing operations: 692 * 693 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 694 * - flush_tlb_page(vma, vmaddr) flushes one page 695 * - flush_tlb_range(vma, start, end) flushes a range of pages 696 * - flush_tlb_kernel_range(start, end) flushes kernel pages 697 * 698 * - local_* variants of page and mm only apply to the current 699 * processor 700 */ 701 void radix__local_flush_tlb_mm(struct mm_struct *mm) 702 { 703 unsigned long pid; 704 705 preempt_disable(); 706 pid = mm->context.id; 707 if (pid != MMU_NO_CONTEXT) 708 _tlbiel_pid(pid, RIC_FLUSH_TLB); 709 preempt_enable(); 710 } 711 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 712 713 #ifndef CONFIG_SMP 714 void radix__local_flush_all_mm(struct mm_struct *mm) 715 { 716 unsigned long pid; 717 718 preempt_disable(); 719 pid = mm->context.id; 720 if (pid != MMU_NO_CONTEXT) 721 _tlbiel_pid(pid, RIC_FLUSH_ALL); 722 preempt_enable(); 723 } 724 EXPORT_SYMBOL(radix__local_flush_all_mm); 725 726 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 727 { 728 radix__local_flush_all_mm(mm); 729 } 730 #endif /* CONFIG_SMP */ 731 732 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 733 int psize) 734 { 735 unsigned long pid; 736 737 preempt_disable(); 738 pid = mm->context.id; 739 if (pid != MMU_NO_CONTEXT) 740 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 741 preempt_enable(); 742 } 743 744 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 745 { 746 #ifdef CONFIG_HUGETLB_PAGE 747 /* need the return fix for nohash.c */ 748 if (is_vm_hugetlb_page(vma)) 749 return radix__local_flush_hugetlb_page(vma, vmaddr); 750 #endif 751 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 752 } 753 EXPORT_SYMBOL(radix__local_flush_tlb_page); 754 755 static bool mm_needs_flush_escalation(struct mm_struct *mm) 756 { 757 /* 758 * The P9 nest MMU has issues with the page walk cache caching PTEs 759 * and not flushing them when RIC = 0 for a PID/LPID invalidate. 760 * 761 * This may have been fixed in shipping firmware (by disabling PWC 762 * or preventing it from caching PTEs), but until that is confirmed, 763 * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes 764 * to RIC=2. 765 * 766 * POWER10 (and P9P) does not have this problem. 767 */ 768 if (cpu_has_feature(CPU_FTR_ARCH_31)) 769 return false; 770 if (atomic_read(&mm->context.copros) > 0) 771 return true; 772 return false; 773 } 774 775 /* 776 * If always_flush is true, then flush even if this CPU can't be removed 777 * from mm_cpumask. 778 */ 779 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 780 { 781 unsigned long pid = mm->context.id; 782 int cpu = smp_processor_id(); 783 784 /* 785 * A kthread could have done a mmget_not_zero() after the flushing CPU 786 * checked mm_cpumask, and be in the process of kthread_use_mm when 787 * interrupted here. In that case, current->mm will be set to mm, 788 * because kthread_use_mm() setting ->mm and switching to the mm is 789 * done with interrupts off. 790 */ 791 if (current->mm == mm) 792 goto out; 793 794 if (current->active_mm == mm) { 795 WARN_ON_ONCE(current->mm != NULL); 796 /* Is a kernel thread and is using mm as the lazy tlb */ 797 mmgrab(&init_mm); 798 current->active_mm = &init_mm; 799 switch_mm_irqs_off(mm, &init_mm, current); 800 mmdrop(mm); 801 } 802 803 /* 804 * This IPI may be initiated from any source including those not 805 * running the mm, so there may be a racing IPI that comes after 806 * this one which finds the cpumask already clear. Check and avoid 807 * underflowing the active_cpus count in that case. The race should 808 * not otherwise be a problem, but the TLB must be flushed because 809 * that's what the caller expects. 810 */ 811 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 812 atomic_dec(&mm->context.active_cpus); 813 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 814 always_flush = true; 815 } 816 817 out: 818 if (always_flush) 819 _tlbiel_pid(pid, RIC_FLUSH_ALL); 820 } 821 822 #ifdef CONFIG_SMP 823 static void do_exit_flush_lazy_tlb(void *arg) 824 { 825 struct mm_struct *mm = arg; 826 exit_lazy_flush_tlb(mm, true); 827 } 828 829 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 830 { 831 /* 832 * Would be nice if this was async so it could be run in 833 * parallel with our local flush, but generic code does not 834 * give a good API for it. Could extend the generic code or 835 * make a special powerpc IPI for flushing TLBs. 836 * For now it's not too performance critical. 837 */ 838 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 839 (void *)mm, 1); 840 } 841 842 #else /* CONFIG_SMP */ 843 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 844 #endif /* CONFIG_SMP */ 845 846 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 847 848 /* 849 * Interval between flushes at which we send out IPIs to check whether the 850 * mm_cpumask can be trimmed for the case where it's not a single-threaded 851 * process flushing its own mm. The intent is to reduce the cost of later 852 * flushes. Don't want this to be so low that it adds noticable cost to TLB 853 * flushing, or so high that it doesn't help reduce global TLBIEs. 854 */ 855 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 856 857 static bool tick_and_test_trim_clock(void) 858 { 859 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 860 tlb_mm_cpumask_trim_timer) { 861 __this_cpu_write(mm_cpumask_trim_clock, 0); 862 return true; 863 } 864 return false; 865 } 866 867 enum tlb_flush_type { 868 FLUSH_TYPE_NONE, 869 FLUSH_TYPE_LOCAL, 870 FLUSH_TYPE_GLOBAL, 871 }; 872 873 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 874 { 875 int active_cpus = atomic_read(&mm->context.active_cpus); 876 int cpu = smp_processor_id(); 877 878 if (active_cpus == 0) 879 return FLUSH_TYPE_NONE; 880 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 881 if (current->mm != mm) { 882 /* 883 * Asynchronous flush sources may trim down to nothing 884 * if the process is not running, so occasionally try 885 * to trim. 886 */ 887 if (tick_and_test_trim_clock()) { 888 exit_lazy_flush_tlb(mm, true); 889 return FLUSH_TYPE_NONE; 890 } 891 } 892 return FLUSH_TYPE_LOCAL; 893 } 894 895 /* Coprocessors require TLBIE to invalidate nMMU. */ 896 if (atomic_read(&mm->context.copros) > 0) 897 return FLUSH_TYPE_GLOBAL; 898 899 /* 900 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 901 * because the mm is being taken down anyway, and a TLBIE tends to 902 * be faster than an IPI+TLBIEL. 903 */ 904 if (fullmm) 905 return FLUSH_TYPE_GLOBAL; 906 907 /* 908 * If we are running the only thread of a single-threaded process, 909 * then we should almost always be able to trim off the rest of the 910 * CPU mask (except in the case of use_mm() races), so always try 911 * trimming the mask. 912 */ 913 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 914 exit_flush_lazy_tlbs(mm); 915 /* 916 * use_mm() race could prevent IPIs from being able to clear 917 * the cpumask here, however those users are established 918 * after our first check (and so after the PTEs are removed), 919 * and the TLB still gets flushed by the IPI, so this CPU 920 * will only require a local flush. 921 */ 922 return FLUSH_TYPE_LOCAL; 923 } 924 925 /* 926 * Occasionally try to trim down the cpumask. It's possible this can 927 * bring the mask to zero, which results in no flush. 928 */ 929 if (tick_and_test_trim_clock()) { 930 exit_flush_lazy_tlbs(mm); 931 if (current->mm == mm) 932 return FLUSH_TYPE_LOCAL; 933 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 934 exit_lazy_flush_tlb(mm, true); 935 return FLUSH_TYPE_NONE; 936 } 937 938 return FLUSH_TYPE_GLOBAL; 939 } 940 941 #ifdef CONFIG_SMP 942 void radix__flush_tlb_mm(struct mm_struct *mm) 943 { 944 unsigned long pid; 945 enum tlb_flush_type type; 946 947 pid = mm->context.id; 948 if (unlikely(pid == MMU_NO_CONTEXT)) 949 return; 950 951 preempt_disable(); 952 /* 953 * Order loads of mm_cpumask (in flush_type_needed) vs previous 954 * stores to clear ptes before the invalidate. See barrier in 955 * switch_mm_irqs_off 956 */ 957 smp_mb(); 958 type = flush_type_needed(mm, false); 959 if (type == FLUSH_TYPE_LOCAL) { 960 _tlbiel_pid(pid, RIC_FLUSH_TLB); 961 } else if (type == FLUSH_TYPE_GLOBAL) { 962 if (!mmu_has_feature(MMU_FTR_GTSE)) { 963 unsigned long tgt = H_RPTI_TARGET_CMMU; 964 965 if (atomic_read(&mm->context.copros) > 0) 966 tgt |= H_RPTI_TARGET_NMMU; 967 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 968 H_RPTI_PAGE_ALL, 0, -1UL); 969 } else if (cputlb_use_tlbie()) { 970 if (mm_needs_flush_escalation(mm)) 971 _tlbie_pid(pid, RIC_FLUSH_ALL); 972 else 973 _tlbie_pid(pid, RIC_FLUSH_TLB); 974 } else { 975 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 976 } 977 } 978 preempt_enable(); 979 } 980 EXPORT_SYMBOL(radix__flush_tlb_mm); 981 982 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 983 { 984 unsigned long pid; 985 enum tlb_flush_type type; 986 987 pid = mm->context.id; 988 if (unlikely(pid == MMU_NO_CONTEXT)) 989 return; 990 991 preempt_disable(); 992 smp_mb(); /* see radix__flush_tlb_mm */ 993 type = flush_type_needed(mm, fullmm); 994 if (type == FLUSH_TYPE_LOCAL) { 995 _tlbiel_pid(pid, RIC_FLUSH_ALL); 996 } else if (type == FLUSH_TYPE_GLOBAL) { 997 if (!mmu_has_feature(MMU_FTR_GTSE)) { 998 unsigned long tgt = H_RPTI_TARGET_CMMU; 999 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1000 H_RPTI_TYPE_PRT; 1001 1002 if (atomic_read(&mm->context.copros) > 0) 1003 tgt |= H_RPTI_TARGET_NMMU; 1004 pseries_rpt_invalidate(pid, tgt, type, 1005 H_RPTI_PAGE_ALL, 0, -1UL); 1006 } else if (cputlb_use_tlbie()) 1007 _tlbie_pid(pid, RIC_FLUSH_ALL); 1008 else 1009 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1010 } 1011 preempt_enable(); 1012 } 1013 1014 void radix__flush_all_mm(struct mm_struct *mm) 1015 { 1016 __flush_all_mm(mm, false); 1017 } 1018 EXPORT_SYMBOL(radix__flush_all_mm); 1019 1020 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 1021 int psize) 1022 { 1023 unsigned long pid; 1024 enum tlb_flush_type type; 1025 1026 pid = mm->context.id; 1027 if (unlikely(pid == MMU_NO_CONTEXT)) 1028 return; 1029 1030 preempt_disable(); 1031 smp_mb(); /* see radix__flush_tlb_mm */ 1032 type = flush_type_needed(mm, false); 1033 if (type == FLUSH_TYPE_LOCAL) { 1034 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1035 } else if (type == FLUSH_TYPE_GLOBAL) { 1036 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1037 unsigned long tgt, pg_sizes, size; 1038 1039 tgt = H_RPTI_TARGET_CMMU; 1040 pg_sizes = psize_to_rpti_pgsize(psize); 1041 size = 1UL << mmu_psize_to_shift(psize); 1042 1043 if (atomic_read(&mm->context.copros) > 0) 1044 tgt |= H_RPTI_TARGET_NMMU; 1045 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 1046 pg_sizes, vmaddr, 1047 vmaddr + size); 1048 } else if (cputlb_use_tlbie()) 1049 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1050 else 1051 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 1052 } 1053 preempt_enable(); 1054 } 1055 1056 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 1057 { 1058 #ifdef CONFIG_HUGETLB_PAGE 1059 if (is_vm_hugetlb_page(vma)) 1060 return radix__flush_hugetlb_page(vma, vmaddr); 1061 #endif 1062 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 1063 } 1064 EXPORT_SYMBOL(radix__flush_tlb_page); 1065 1066 #endif /* CONFIG_SMP */ 1067 1068 static void do_tlbiel_kernel(void *info) 1069 { 1070 _tlbiel_pid(0, RIC_FLUSH_ALL); 1071 } 1072 1073 static inline void _tlbiel_kernel_broadcast(void) 1074 { 1075 on_each_cpu(do_tlbiel_kernel, NULL, 1); 1076 if (tlbie_capable) { 1077 /* 1078 * Coherent accelerators don't refcount kernel memory mappings, 1079 * so have to always issue a tlbie for them. This is quite a 1080 * slow path anyway. 1081 */ 1082 _tlbie_pid(0, RIC_FLUSH_ALL); 1083 } 1084 } 1085 1086 /* 1087 * If kernel TLBIs ever become local rather than global, then 1088 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 1089 * assumes kernel TLBIs are global. 1090 */ 1091 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 1092 { 1093 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1094 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 1095 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1096 H_RPTI_TYPE_PRT; 1097 1098 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 1099 start, end); 1100 } else if (cputlb_use_tlbie()) 1101 _tlbie_pid(0, RIC_FLUSH_ALL); 1102 else 1103 _tlbiel_kernel_broadcast(); 1104 } 1105 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1106 1107 #define TLB_FLUSH_ALL -1UL 1108 1109 /* 1110 * Number of pages above which we invalidate the entire PID rather than 1111 * flush individual pages, for local and global flushes respectively. 1112 * 1113 * tlbie goes out to the interconnect and individual ops are more costly. 1114 * It also does not iterate over sets like the local tlbiel variant when 1115 * invalidating a full PID, so it has a far lower threshold to change from 1116 * individual page flushes to full-pid flushes. 1117 */ 1118 static u32 tlb_single_page_flush_ceiling __read_mostly = 33; 1119 static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1120 1121 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1122 unsigned long start, unsigned long end) 1123 { 1124 unsigned long pid; 1125 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1126 unsigned long page_size = 1UL << page_shift; 1127 unsigned long nr_pages = (end - start) >> page_shift; 1128 bool fullmm = (end == TLB_FLUSH_ALL); 1129 bool flush_pid, flush_pwc = false; 1130 enum tlb_flush_type type; 1131 1132 pid = mm->context.id; 1133 if (unlikely(pid == MMU_NO_CONTEXT)) 1134 return; 1135 1136 preempt_disable(); 1137 smp_mb(); /* see radix__flush_tlb_mm */ 1138 type = flush_type_needed(mm, fullmm); 1139 if (type == FLUSH_TYPE_NONE) 1140 goto out; 1141 1142 if (fullmm) 1143 flush_pid = true; 1144 else if (type == FLUSH_TYPE_GLOBAL) 1145 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1146 else 1147 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1148 /* 1149 * full pid flush already does the PWC flush. if it is not full pid 1150 * flush check the range is more than PMD and force a pwc flush 1151 * mremap() depends on this behaviour. 1152 */ 1153 if (!flush_pid && (end - start) >= PMD_SIZE) 1154 flush_pwc = true; 1155 1156 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1157 unsigned long type = H_RPTI_TYPE_TLB; 1158 unsigned long tgt = H_RPTI_TARGET_CMMU; 1159 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1160 1161 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1162 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1163 if (atomic_read(&mm->context.copros) > 0) 1164 tgt |= H_RPTI_TARGET_NMMU; 1165 if (flush_pwc) 1166 type |= H_RPTI_TYPE_PWC; 1167 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1168 } else if (flush_pid) { 1169 /* 1170 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL 1171 */ 1172 if (type == FLUSH_TYPE_LOCAL) { 1173 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1174 } else { 1175 if (cputlb_use_tlbie()) { 1176 _tlbie_pid(pid, RIC_FLUSH_ALL); 1177 } else { 1178 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1179 } 1180 } 1181 } else { 1182 bool hflush = false; 1183 unsigned long hstart, hend; 1184 1185 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 1186 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1187 hend = end & PMD_MASK; 1188 if (hstart < hend) 1189 hflush = true; 1190 } 1191 1192 if (type == FLUSH_TYPE_LOCAL) { 1193 asm volatile("ptesync": : :"memory"); 1194 if (flush_pwc) 1195 /* For PWC, only one flush is needed */ 1196 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 1197 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1198 if (hflush) 1199 __tlbiel_va_range(hstart, hend, pid, 1200 PMD_SIZE, MMU_PAGE_2M); 1201 ppc_after_tlbiel_barrier(); 1202 } else if (cputlb_use_tlbie()) { 1203 asm volatile("ptesync": : :"memory"); 1204 if (flush_pwc) 1205 __tlbie_pid(pid, RIC_FLUSH_PWC); 1206 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1207 if (hflush) 1208 __tlbie_va_range(hstart, hend, pid, 1209 PMD_SIZE, MMU_PAGE_2M); 1210 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1211 } else { 1212 _tlbiel_va_range_multicast(mm, 1213 start, end, pid, page_size, mmu_virtual_psize, flush_pwc); 1214 if (hflush) 1215 _tlbiel_va_range_multicast(mm, 1216 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc); 1217 } 1218 } 1219 out: 1220 preempt_enable(); 1221 } 1222 1223 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1224 unsigned long end) 1225 1226 { 1227 #ifdef CONFIG_HUGETLB_PAGE 1228 if (is_vm_hugetlb_page(vma)) 1229 return radix__flush_hugetlb_tlb_range(vma, start, end); 1230 #endif 1231 1232 __radix__flush_tlb_range(vma->vm_mm, start, end); 1233 } 1234 EXPORT_SYMBOL(radix__flush_tlb_range); 1235 1236 static int radix_get_mmu_psize(int page_size) 1237 { 1238 int psize; 1239 1240 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1241 psize = mmu_virtual_psize; 1242 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1243 psize = MMU_PAGE_2M; 1244 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1245 psize = MMU_PAGE_1G; 1246 else 1247 return -1; 1248 return psize; 1249 } 1250 1251 /* 1252 * Flush partition scoped LPID address translation for all CPUs. 1253 */ 1254 void radix__flush_tlb_lpid_page(unsigned int lpid, 1255 unsigned long addr, 1256 unsigned long page_size) 1257 { 1258 int psize = radix_get_mmu_psize(page_size); 1259 1260 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1261 } 1262 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1263 1264 /* 1265 * Flush partition scoped PWC from LPID for all CPUs. 1266 */ 1267 void radix__flush_pwc_lpid(unsigned int lpid) 1268 { 1269 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1270 } 1271 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1272 1273 /* 1274 * Flush partition scoped translations from LPID (=LPIDR) 1275 */ 1276 void radix__flush_all_lpid(unsigned int lpid) 1277 { 1278 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1279 } 1280 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1281 1282 /* 1283 * Flush process scoped translations from LPID (=LPIDR) 1284 */ 1285 void radix__flush_all_lpid_guest(unsigned int lpid) 1286 { 1287 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1288 } 1289 1290 void radix__tlb_flush(struct mmu_gather *tlb) 1291 { 1292 int psize = 0; 1293 struct mm_struct *mm = tlb->mm; 1294 int page_size = tlb->page_size; 1295 unsigned long start = tlb->start; 1296 unsigned long end = tlb->end; 1297 1298 /* 1299 * if page size is not something we understand, do a full mm flush 1300 * 1301 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1302 * that flushes the process table entry cache upon process teardown. 1303 * See the comment for radix in arch_exit_mmap(). 1304 */ 1305 if (tlb->fullmm || tlb->need_flush_all) { 1306 __flush_all_mm(mm, true); 1307 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1308 if (!tlb->freed_tables) 1309 radix__flush_tlb_mm(mm); 1310 else 1311 radix__flush_all_mm(mm); 1312 } else { 1313 if (!tlb->freed_tables) 1314 radix__flush_tlb_range_psize(mm, start, end, psize); 1315 else 1316 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1317 } 1318 } 1319 1320 static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1321 unsigned long start, unsigned long end, 1322 int psize, bool also_pwc) 1323 { 1324 unsigned long pid; 1325 unsigned int page_shift = mmu_psize_defs[psize].shift; 1326 unsigned long page_size = 1UL << page_shift; 1327 unsigned long nr_pages = (end - start) >> page_shift; 1328 bool fullmm = (end == TLB_FLUSH_ALL); 1329 bool flush_pid; 1330 enum tlb_flush_type type; 1331 1332 pid = mm->context.id; 1333 if (unlikely(pid == MMU_NO_CONTEXT)) 1334 return; 1335 1336 fullmm = (end == TLB_FLUSH_ALL); 1337 1338 preempt_disable(); 1339 smp_mb(); /* see radix__flush_tlb_mm */ 1340 type = flush_type_needed(mm, fullmm); 1341 if (type == FLUSH_TYPE_NONE) 1342 goto out; 1343 1344 if (fullmm) 1345 flush_pid = true; 1346 else if (type == FLUSH_TYPE_GLOBAL) 1347 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1348 else 1349 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1350 1351 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1352 unsigned long tgt = H_RPTI_TARGET_CMMU; 1353 unsigned long type = H_RPTI_TYPE_TLB; 1354 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1355 1356 if (also_pwc) 1357 type |= H_RPTI_TYPE_PWC; 1358 if (atomic_read(&mm->context.copros) > 0) 1359 tgt |= H_RPTI_TARGET_NMMU; 1360 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1361 } else if (flush_pid) { 1362 if (type == FLUSH_TYPE_LOCAL) { 1363 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1364 } else { 1365 if (cputlb_use_tlbie()) { 1366 if (mm_needs_flush_escalation(mm)) 1367 also_pwc = true; 1368 1369 _tlbie_pid(pid, 1370 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1371 } else { 1372 _tlbiel_pid_multicast(mm, pid, 1373 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1374 } 1375 1376 } 1377 } else { 1378 if (type == FLUSH_TYPE_LOCAL) 1379 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1380 else if (cputlb_use_tlbie()) 1381 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1382 else 1383 _tlbiel_va_range_multicast(mm, 1384 start, end, pid, page_size, psize, also_pwc); 1385 } 1386 out: 1387 preempt_enable(); 1388 } 1389 1390 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1391 unsigned long end, int psize) 1392 { 1393 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1394 } 1395 1396 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1397 unsigned long end, int psize) 1398 { 1399 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1400 } 1401 1402 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1403 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1404 { 1405 unsigned long pid, end; 1406 enum tlb_flush_type type; 1407 1408 pid = mm->context.id; 1409 if (unlikely(pid == MMU_NO_CONTEXT)) 1410 return; 1411 1412 /* 4k page size, just blow the world */ 1413 if (PAGE_SIZE == 0x1000) { 1414 radix__flush_all_mm(mm); 1415 return; 1416 } 1417 1418 end = addr + HPAGE_PMD_SIZE; 1419 1420 /* Otherwise first do the PWC, then iterate the pages. */ 1421 preempt_disable(); 1422 smp_mb(); /* see radix__flush_tlb_mm */ 1423 type = flush_type_needed(mm, false); 1424 if (type == FLUSH_TYPE_LOCAL) { 1425 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1426 } else if (type == FLUSH_TYPE_GLOBAL) { 1427 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1428 unsigned long tgt, type, pg_sizes; 1429 1430 tgt = H_RPTI_TARGET_CMMU; 1431 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1432 H_RPTI_TYPE_PRT; 1433 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1434 1435 if (atomic_read(&mm->context.copros) > 0) 1436 tgt |= H_RPTI_TARGET_NMMU; 1437 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1438 addr, end); 1439 } else if (cputlb_use_tlbie()) 1440 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1441 else 1442 _tlbiel_va_range_multicast(mm, 1443 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1444 } 1445 1446 preempt_enable(); 1447 } 1448 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1449 1450 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1451 unsigned long start, unsigned long end) 1452 { 1453 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1454 } 1455 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1456 1457 void radix__flush_tlb_all(void) 1458 { 1459 unsigned long rb,prs,r,rs; 1460 unsigned long ric = RIC_FLUSH_ALL; 1461 1462 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1463 prs = 0; /* partition scoped */ 1464 r = 1; /* radix format */ 1465 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1466 1467 asm volatile("ptesync": : :"memory"); 1468 /* 1469 * now flush guest entries by passing PRS = 1 and LPID != 0 1470 */ 1471 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1472 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1473 /* 1474 * now flush host entires by passing PRS = 0 and LPID == 0 1475 */ 1476 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1477 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1478 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1479 } 1480 1481 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1482 /* 1483 * Performs process-scoped invalidations for a given LPID 1484 * as part of H_RPT_INVALIDATE hcall. 1485 */ 1486 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1487 unsigned long type, unsigned long pg_sizes, 1488 unsigned long start, unsigned long end) 1489 { 1490 unsigned long psize, nr_pages; 1491 struct mmu_psize_def *def; 1492 bool flush_pid; 1493 1494 /* 1495 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1496 * do a single IS=1 based flush. 1497 */ 1498 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1499 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1500 return; 1501 } 1502 1503 if (type & H_RPTI_TYPE_PWC) 1504 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1505 1506 /* Full PID flush */ 1507 if (start == 0 && end == -1) 1508 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1509 1510 /* Do range invalidation for all the valid page sizes */ 1511 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1512 def = &mmu_psize_defs[psize]; 1513 if (!(pg_sizes & def->h_rpt_pgsize)) 1514 continue; 1515 1516 nr_pages = (end - start) >> def->shift; 1517 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1518 1519 /* 1520 * If the number of pages spanning the range is above 1521 * the ceiling, convert the request into a full PID flush. 1522 * And since PID flush takes out all the page sizes, there 1523 * is no need to consider remaining page sizes. 1524 */ 1525 if (flush_pid) { 1526 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1527 return; 1528 } 1529 _tlbie_va_range_lpid(start, end, pid, lpid, 1530 (1UL << def->shift), psize, false); 1531 } 1532 } 1533 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1534 1535 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1536 1537 static int __init create_tlb_single_page_flush_ceiling(void) 1538 { 1539 debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, 1540 arch_debugfs_dir, &tlb_single_page_flush_ceiling); 1541 debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, 1542 arch_debugfs_dir, &tlb_local_single_page_flush_ceiling); 1543 return 0; 1544 } 1545 late_initcall(create_tlb_single_page_flush_ceiling); 1546 1547