1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/kernel.h> 9 #include <linux/errno.h> 10 #include <linux/gfp.h> 11 #include <linux/mm.h> 12 #include <linux/swap.h> 13 #include <linux/smp.h> 14 #include <linux/spinlock.h> 15 #include <linux/rcupdate.h> 16 #include <linux/slab.h> 17 #include <linux/swapops.h> 18 #include <linux/sysctl.h> 19 #include <linux/ksm.h> 20 #include <linux/mman.h> 21 22 #include <asm/tlb.h> 23 #include <asm/tlbflush.h> 24 #include <asm/mmu_context.h> 25 #include <asm/page-states.h> 26 27 pgprot_t pgprot_writecombine(pgprot_t prot) 28 { 29 /* 30 * mio_wb_bit_mask may be set on a different CPU, but it is only set 31 * once at init and only read afterwards. 32 */ 33 return __pgprot(pgprot_val(prot) | mio_wb_bit_mask); 34 } 35 EXPORT_SYMBOL_GPL(pgprot_writecombine); 36 37 pgprot_t pgprot_writethrough(pgprot_t prot) 38 { 39 /* 40 * mio_wb_bit_mask may be set on a different CPU, but it is only set 41 * once at init and only read afterwards. 42 */ 43 return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); 44 } 45 EXPORT_SYMBOL_GPL(pgprot_writethrough); 46 47 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, 48 pte_t *ptep, int nodat) 49 { 50 unsigned long opt, asce; 51 52 if (MACHINE_HAS_TLB_GUEST) { 53 opt = 0; 54 asce = READ_ONCE(mm->context.gmap_asce); 55 if (asce == 0UL || nodat) 56 opt |= IPTE_NODAT; 57 if (asce != -1UL) { 58 asce = asce ? : mm->context.asce; 59 opt |= IPTE_GUEST_ASCE; 60 } 61 __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); 62 } else { 63 __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); 64 } 65 } 66 67 static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, 68 pte_t *ptep, int nodat) 69 { 70 unsigned long opt, asce; 71 72 if (MACHINE_HAS_TLB_GUEST) { 73 opt = 0; 74 asce = READ_ONCE(mm->context.gmap_asce); 75 if (asce == 0UL || nodat) 76 opt |= IPTE_NODAT; 77 if (asce != -1UL) { 78 asce = asce ? : mm->context.asce; 79 opt |= IPTE_GUEST_ASCE; 80 } 81 __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); 82 } else { 83 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 84 } 85 } 86 87 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 88 unsigned long addr, pte_t *ptep, 89 int nodat) 90 { 91 pte_t old; 92 93 old = *ptep; 94 if (unlikely(pte_val(old) & _PAGE_INVALID)) 95 return old; 96 atomic_inc(&mm->context.flush_count); 97 if (MACHINE_HAS_TLB_LC && 98 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 99 ptep_ipte_local(mm, addr, ptep, nodat); 100 else 101 ptep_ipte_global(mm, addr, ptep, nodat); 102 atomic_dec(&mm->context.flush_count); 103 return old; 104 } 105 106 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 107 unsigned long addr, pte_t *ptep, 108 int nodat) 109 { 110 pte_t old; 111 112 old = *ptep; 113 if (unlikely(pte_val(old) & _PAGE_INVALID)) 114 return old; 115 atomic_inc(&mm->context.flush_count); 116 if (cpumask_equal(&mm->context.cpu_attach_mask, 117 cpumask_of(smp_processor_id()))) { 118 set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID))); 119 mm->context.flush_mm = 1; 120 } else 121 ptep_ipte_global(mm, addr, ptep, nodat); 122 atomic_dec(&mm->context.flush_count); 123 return old; 124 } 125 126 static inline pgste_t pgste_get_lock(pte_t *ptep) 127 { 128 unsigned long new = 0; 129 #ifdef CONFIG_PGSTE 130 unsigned long old; 131 132 asm( 133 " lg %0,%2\n" 134 "0: lgr %1,%0\n" 135 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 136 " oihh %1,0x0080\n" /* set PCL bit in new */ 137 " csg %0,%1,%2\n" 138 " jl 0b\n" 139 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 140 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 141 #endif 142 return __pgste(new); 143 } 144 145 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 146 { 147 #ifdef CONFIG_PGSTE 148 asm( 149 " nihh %1,0xff7f\n" /* clear PCL bit */ 150 " stg %1,%0\n" 151 : "=Q" (ptep[PTRS_PER_PTE]) 152 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 153 : "cc", "memory"); 154 #endif 155 } 156 157 static inline pgste_t pgste_get(pte_t *ptep) 158 { 159 unsigned long pgste = 0; 160 #ifdef CONFIG_PGSTE 161 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 162 #endif 163 return __pgste(pgste); 164 } 165 166 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 167 { 168 #ifdef CONFIG_PGSTE 169 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 170 #endif 171 } 172 173 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 174 struct mm_struct *mm) 175 { 176 #ifdef CONFIG_PGSTE 177 unsigned long address, bits, skey; 178 179 if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID) 180 return pgste; 181 address = pte_val(pte) & PAGE_MASK; 182 skey = (unsigned long) page_get_storage_key(address); 183 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 184 /* Transfer page changed & referenced bit to guest bits in pgste */ 185 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 186 /* Copy page access key and fetch protection bit to pgste */ 187 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 188 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 189 #endif 190 return pgste; 191 192 } 193 194 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 195 struct mm_struct *mm) 196 { 197 #ifdef CONFIG_PGSTE 198 unsigned long address; 199 unsigned long nkey; 200 201 if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID) 202 return; 203 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 204 address = pte_val(entry) & PAGE_MASK; 205 /* 206 * Set page access key and fetch protection bit from pgste. 207 * The guest C/R information is still in the PGSTE, set real 208 * key C/R to 0. 209 */ 210 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 211 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 212 page_set_storage_key(address, nkey, 0); 213 #endif 214 } 215 216 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 217 { 218 #ifdef CONFIG_PGSTE 219 if ((pte_val(entry) & _PAGE_PRESENT) && 220 (pte_val(entry) & _PAGE_WRITE) && 221 !(pte_val(entry) & _PAGE_INVALID)) { 222 if (!MACHINE_HAS_ESOP) { 223 /* 224 * Without enhanced suppression-on-protection force 225 * the dirty bit on for all writable ptes. 226 */ 227 entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY)); 228 entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT)); 229 } 230 if (!(pte_val(entry) & _PAGE_PROTECT)) 231 /* This pte allows write access, set user-dirty */ 232 pgste_val(pgste) |= PGSTE_UC_BIT; 233 } 234 #endif 235 set_pte(ptep, entry); 236 return pgste; 237 } 238 239 static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 240 unsigned long addr, 241 pte_t *ptep, pgste_t pgste) 242 { 243 #ifdef CONFIG_PGSTE 244 unsigned long bits; 245 246 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 247 if (bits) { 248 pgste_val(pgste) ^= bits; 249 ptep_notify(mm, addr, ptep, bits); 250 } 251 #endif 252 return pgste; 253 } 254 255 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 256 unsigned long addr, pte_t *ptep) 257 { 258 pgste_t pgste = __pgste(0); 259 260 if (mm_has_pgste(mm)) { 261 pgste = pgste_get_lock(ptep); 262 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 263 } 264 return pgste; 265 } 266 267 static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 268 unsigned long addr, pte_t *ptep, 269 pgste_t pgste, pte_t old, pte_t new) 270 { 271 if (mm_has_pgste(mm)) { 272 if (pte_val(old) & _PAGE_INVALID) 273 pgste_set_key(ptep, pgste, new, mm); 274 if (pte_val(new) & _PAGE_INVALID) { 275 pgste = pgste_update_all(old, pgste, mm); 276 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 277 _PGSTE_GPS_USAGE_UNUSED) 278 old = set_pte_bit(old, __pgprot(_PAGE_UNUSED)); 279 } 280 pgste = pgste_set_pte(ptep, pgste, new); 281 pgste_set_unlock(ptep, pgste); 282 } else { 283 set_pte(ptep, new); 284 } 285 return old; 286 } 287 288 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 289 pte_t *ptep, pte_t new) 290 { 291 pgste_t pgste; 292 pte_t old; 293 int nodat; 294 295 preempt_disable(); 296 pgste = ptep_xchg_start(mm, addr, ptep); 297 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 298 old = ptep_flush_direct(mm, addr, ptep, nodat); 299 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 300 preempt_enable(); 301 return old; 302 } 303 EXPORT_SYMBOL(ptep_xchg_direct); 304 305 /* 306 * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that 307 * RDP can be used instead of IPTE. See also comments at pte_allow_rdp(). 308 */ 309 void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 310 pte_t new) 311 { 312 preempt_disable(); 313 atomic_inc(&mm->context.flush_count); 314 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 315 __ptep_rdp(addr, ptep, 0, 0, 1); 316 else 317 __ptep_rdp(addr, ptep, 0, 0, 0); 318 /* 319 * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That 320 * means it is still valid and active, and must not be changed according 321 * to the architecture. But writing a new value that only differs in SW 322 * bits is allowed. 323 */ 324 set_pte(ptep, new); 325 atomic_dec(&mm->context.flush_count); 326 preempt_enable(); 327 } 328 EXPORT_SYMBOL(ptep_reset_dat_prot); 329 330 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 331 pte_t *ptep, pte_t new) 332 { 333 pgste_t pgste; 334 pte_t old; 335 int nodat; 336 337 preempt_disable(); 338 pgste = ptep_xchg_start(mm, addr, ptep); 339 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 340 old = ptep_flush_lazy(mm, addr, ptep, nodat); 341 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 342 preempt_enable(); 343 return old; 344 } 345 EXPORT_SYMBOL(ptep_xchg_lazy); 346 347 pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, 348 pte_t *ptep) 349 { 350 pgste_t pgste; 351 pte_t old; 352 int nodat; 353 struct mm_struct *mm = vma->vm_mm; 354 355 preempt_disable(); 356 pgste = ptep_xchg_start(mm, addr, ptep); 357 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 358 old = ptep_flush_lazy(mm, addr, ptep, nodat); 359 if (mm_has_pgste(mm)) { 360 pgste = pgste_update_all(old, pgste, mm); 361 pgste_set(ptep, pgste); 362 } 363 return old; 364 } 365 366 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, 367 pte_t *ptep, pte_t old_pte, pte_t pte) 368 { 369 pgste_t pgste; 370 struct mm_struct *mm = vma->vm_mm; 371 372 if (!MACHINE_HAS_NX) 373 pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); 374 if (mm_has_pgste(mm)) { 375 pgste = pgste_get(ptep); 376 pgste_set_key(ptep, pgste, pte, mm); 377 pgste = pgste_set_pte(ptep, pgste, pte); 378 pgste_set_unlock(ptep, pgste); 379 } else { 380 set_pte(ptep, pte); 381 } 382 preempt_enable(); 383 } 384 385 static inline void pmdp_idte_local(struct mm_struct *mm, 386 unsigned long addr, pmd_t *pmdp) 387 { 388 if (MACHINE_HAS_TLB_GUEST) 389 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 390 mm->context.asce, IDTE_LOCAL); 391 else 392 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); 393 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 394 gmap_pmdp_idte_local(mm, addr); 395 } 396 397 static inline void pmdp_idte_global(struct mm_struct *mm, 398 unsigned long addr, pmd_t *pmdp) 399 { 400 if (MACHINE_HAS_TLB_GUEST) { 401 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 402 mm->context.asce, IDTE_GLOBAL); 403 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 404 gmap_pmdp_idte_global(mm, addr); 405 } else if (MACHINE_HAS_IDTE) { 406 __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); 407 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 408 gmap_pmdp_idte_global(mm, addr); 409 } else { 410 __pmdp_csp(pmdp); 411 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 412 gmap_pmdp_csp(mm, addr); 413 } 414 } 415 416 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 417 unsigned long addr, pmd_t *pmdp) 418 { 419 pmd_t old; 420 421 old = *pmdp; 422 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 423 return old; 424 atomic_inc(&mm->context.flush_count); 425 if (MACHINE_HAS_TLB_LC && 426 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 427 pmdp_idte_local(mm, addr, pmdp); 428 else 429 pmdp_idte_global(mm, addr, pmdp); 430 atomic_dec(&mm->context.flush_count); 431 return old; 432 } 433 434 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 435 unsigned long addr, pmd_t *pmdp) 436 { 437 pmd_t old; 438 439 old = *pmdp; 440 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 441 return old; 442 atomic_inc(&mm->context.flush_count); 443 if (cpumask_equal(&mm->context.cpu_attach_mask, 444 cpumask_of(smp_processor_id()))) { 445 set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID))); 446 mm->context.flush_mm = 1; 447 if (mm_has_pgste(mm)) 448 gmap_pmdp_invalidate(mm, addr); 449 } else { 450 pmdp_idte_global(mm, addr, pmdp); 451 } 452 atomic_dec(&mm->context.flush_count); 453 return old; 454 } 455 456 #ifdef CONFIG_PGSTE 457 static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp) 458 { 459 struct vm_area_struct *vma; 460 pgd_t *pgd; 461 p4d_t *p4d; 462 pud_t *pud; 463 464 /* We need a valid VMA, otherwise this is clearly a fault. */ 465 vma = vma_lookup(mm, addr); 466 if (!vma) 467 return -EFAULT; 468 469 pgd = pgd_offset(mm, addr); 470 if (!pgd_present(*pgd)) 471 return -ENOENT; 472 473 p4d = p4d_offset(pgd, addr); 474 if (!p4d_present(*p4d)) 475 return -ENOENT; 476 477 pud = pud_offset(p4d, addr); 478 if (!pud_present(*pud)) 479 return -ENOENT; 480 481 /* Large PUDs are not supported yet. */ 482 if (pud_leaf(*pud)) 483 return -EFAULT; 484 485 *pmdp = pmd_offset(pud, addr); 486 return 0; 487 } 488 #endif 489 490 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 491 pmd_t *pmdp, pmd_t new) 492 { 493 pmd_t old; 494 495 preempt_disable(); 496 old = pmdp_flush_direct(mm, addr, pmdp); 497 set_pmd(pmdp, new); 498 preempt_enable(); 499 return old; 500 } 501 EXPORT_SYMBOL(pmdp_xchg_direct); 502 503 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 504 pmd_t *pmdp, pmd_t new) 505 { 506 pmd_t old; 507 508 preempt_disable(); 509 old = pmdp_flush_lazy(mm, addr, pmdp); 510 set_pmd(pmdp, new); 511 preempt_enable(); 512 return old; 513 } 514 EXPORT_SYMBOL(pmdp_xchg_lazy); 515 516 static inline void pudp_idte_local(struct mm_struct *mm, 517 unsigned long addr, pud_t *pudp) 518 { 519 if (MACHINE_HAS_TLB_GUEST) 520 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 521 mm->context.asce, IDTE_LOCAL); 522 else 523 __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); 524 } 525 526 static inline void pudp_idte_global(struct mm_struct *mm, 527 unsigned long addr, pud_t *pudp) 528 { 529 if (MACHINE_HAS_TLB_GUEST) 530 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 531 mm->context.asce, IDTE_GLOBAL); 532 else if (MACHINE_HAS_IDTE) 533 __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); 534 else 535 /* 536 * Invalid bit position is the same for pmd and pud, so we can 537 * re-use _pmd_csp() here 538 */ 539 __pmdp_csp((pmd_t *) pudp); 540 } 541 542 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 543 unsigned long addr, pud_t *pudp) 544 { 545 pud_t old; 546 547 old = *pudp; 548 if (pud_val(old) & _REGION_ENTRY_INVALID) 549 return old; 550 atomic_inc(&mm->context.flush_count); 551 if (MACHINE_HAS_TLB_LC && 552 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 553 pudp_idte_local(mm, addr, pudp); 554 else 555 pudp_idte_global(mm, addr, pudp); 556 atomic_dec(&mm->context.flush_count); 557 return old; 558 } 559 560 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 561 pud_t *pudp, pud_t new) 562 { 563 pud_t old; 564 565 preempt_disable(); 566 old = pudp_flush_direct(mm, addr, pudp); 567 set_pud(pudp, new); 568 preempt_enable(); 569 return old; 570 } 571 EXPORT_SYMBOL(pudp_xchg_direct); 572 573 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 574 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 575 pgtable_t pgtable) 576 { 577 struct list_head *lh = (struct list_head *) pgtable; 578 579 assert_spin_locked(pmd_lockptr(mm, pmdp)); 580 581 /* FIFO */ 582 if (!pmd_huge_pte(mm, pmdp)) 583 INIT_LIST_HEAD(lh); 584 else 585 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 586 pmd_huge_pte(mm, pmdp) = pgtable; 587 } 588 589 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 590 { 591 struct list_head *lh; 592 pgtable_t pgtable; 593 pte_t *ptep; 594 595 assert_spin_locked(pmd_lockptr(mm, pmdp)); 596 597 /* FIFO */ 598 pgtable = pmd_huge_pte(mm, pmdp); 599 lh = (struct list_head *) pgtable; 600 if (list_empty(lh)) 601 pmd_huge_pte(mm, pmdp) = NULL; 602 else { 603 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 604 list_del(lh); 605 } 606 ptep = (pte_t *) pgtable; 607 set_pte(ptep, __pte(_PAGE_INVALID)); 608 ptep++; 609 set_pte(ptep, __pte(_PAGE_INVALID)); 610 return pgtable; 611 } 612 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 613 614 #ifdef CONFIG_PGSTE 615 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 616 pte_t *ptep, pte_t entry) 617 { 618 pgste_t pgste; 619 620 /* the mm_has_pgste() check is done in set_pte_at() */ 621 preempt_disable(); 622 pgste = pgste_get_lock(ptep); 623 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 624 pgste_set_key(ptep, pgste, entry, mm); 625 pgste = pgste_set_pte(ptep, pgste, entry); 626 pgste_set_unlock(ptep, pgste); 627 preempt_enable(); 628 } 629 630 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 631 { 632 pgste_t pgste; 633 634 preempt_disable(); 635 pgste = pgste_get_lock(ptep); 636 pgste_val(pgste) |= PGSTE_IN_BIT; 637 pgste_set_unlock(ptep, pgste); 638 preempt_enable(); 639 } 640 641 /** 642 * ptep_force_prot - change access rights of a locked pte 643 * @mm: pointer to the process mm_struct 644 * @addr: virtual address in the guest address space 645 * @ptep: pointer to the page table entry 646 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 647 * @bit: pgste bit to set (e.g. for notification) 648 * 649 * Returns 0 if the access rights were changed and -EAGAIN if the current 650 * and requested access rights are incompatible. 651 */ 652 int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 653 pte_t *ptep, int prot, unsigned long bit) 654 { 655 pte_t entry; 656 pgste_t pgste; 657 int pte_i, pte_p, nodat; 658 659 pgste = pgste_get_lock(ptep); 660 entry = *ptep; 661 /* Check pte entry after all locks have been acquired */ 662 pte_i = pte_val(entry) & _PAGE_INVALID; 663 pte_p = pte_val(entry) & _PAGE_PROTECT; 664 if ((pte_i && (prot != PROT_NONE)) || 665 (pte_p && (prot & PROT_WRITE))) { 666 pgste_set_unlock(ptep, pgste); 667 return -EAGAIN; 668 } 669 /* Change access rights and set pgste bit */ 670 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 671 if (prot == PROT_NONE && !pte_i) { 672 ptep_flush_direct(mm, addr, ptep, nodat); 673 pgste = pgste_update_all(entry, pgste, mm); 674 entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID)); 675 } 676 if (prot == PROT_READ && !pte_p) { 677 ptep_flush_direct(mm, addr, ptep, nodat); 678 entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); 679 entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); 680 } 681 pgste_val(pgste) |= bit; 682 pgste = pgste_set_pte(ptep, pgste, entry); 683 pgste_set_unlock(ptep, pgste); 684 return 0; 685 } 686 687 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 688 pte_t *sptep, pte_t *tptep, pte_t pte) 689 { 690 pgste_t spgste, tpgste; 691 pte_t spte, tpte; 692 int rc = -EAGAIN; 693 694 if (!(pte_val(*tptep) & _PAGE_INVALID)) 695 return 0; /* already shadowed */ 696 spgste = pgste_get_lock(sptep); 697 spte = *sptep; 698 if (!(pte_val(spte) & _PAGE_INVALID) && 699 !((pte_val(spte) & _PAGE_PROTECT) && 700 !(pte_val(pte) & _PAGE_PROTECT))) { 701 pgste_val(spgste) |= PGSTE_VSIE_BIT; 702 tpgste = pgste_get_lock(tptep); 703 tpte = __pte((pte_val(spte) & PAGE_MASK) | 704 (pte_val(pte) & _PAGE_PROTECT)); 705 /* don't touch the storage key - it belongs to parent pgste */ 706 tpgste = pgste_set_pte(tptep, tpgste, tpte); 707 pgste_set_unlock(tptep, tpgste); 708 rc = 1; 709 } 710 pgste_set_unlock(sptep, spgste); 711 return rc; 712 } 713 714 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 715 { 716 pgste_t pgste; 717 int nodat; 718 719 pgste = pgste_get_lock(ptep); 720 /* notifier is called by the caller */ 721 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 722 ptep_flush_direct(mm, saddr, ptep, nodat); 723 /* don't touch the storage key - it belongs to parent pgste */ 724 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 725 pgste_set_unlock(ptep, pgste); 726 } 727 728 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 729 { 730 if (!non_swap_entry(entry)) 731 dec_mm_counter(mm, MM_SWAPENTS); 732 else if (is_migration_entry(entry)) { 733 struct page *page = pfn_swap_entry_to_page(entry); 734 735 dec_mm_counter(mm, mm_counter(page)); 736 } 737 free_swap_and_cache(entry); 738 } 739 740 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 741 pte_t *ptep, int reset) 742 { 743 unsigned long pgstev; 744 pgste_t pgste; 745 pte_t pte; 746 747 /* Zap unused and logically-zero pages */ 748 preempt_disable(); 749 pgste = pgste_get_lock(ptep); 750 pgstev = pgste_val(pgste); 751 pte = *ptep; 752 if (!reset && pte_swap(pte) && 753 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 754 (pgstev & _PGSTE_GPS_ZERO))) { 755 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 756 pte_clear(mm, addr, ptep); 757 } 758 if (reset) 759 pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 760 pgste_set_unlock(ptep, pgste); 761 preempt_enable(); 762 } 763 764 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 765 { 766 unsigned long ptev; 767 pgste_t pgste; 768 769 /* Clear storage key ACC and F, but set R/C */ 770 preempt_disable(); 771 pgste = pgste_get_lock(ptep); 772 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 773 pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; 774 ptev = pte_val(*ptep); 775 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 776 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); 777 pgste_set_unlock(ptep, pgste); 778 preempt_enable(); 779 } 780 781 /* 782 * Test and reset if a guest page is dirty 783 */ 784 bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, 785 pte_t *ptep) 786 { 787 pgste_t pgste; 788 pte_t pte; 789 bool dirty; 790 int nodat; 791 792 pgste = pgste_get_lock(ptep); 793 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 794 pgste_val(pgste) &= ~PGSTE_UC_BIT; 795 pte = *ptep; 796 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 797 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 798 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 799 ptep_ipte_global(mm, addr, ptep, nodat); 800 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 801 pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); 802 else 803 pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); 804 set_pte(ptep, pte); 805 } 806 pgste_set_unlock(ptep, pgste); 807 return dirty; 808 } 809 EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); 810 811 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 812 unsigned char key, bool nq) 813 { 814 unsigned long keyul, paddr; 815 spinlock_t *ptl; 816 pgste_t old, new; 817 pmd_t *pmdp; 818 pte_t *ptep; 819 820 /* 821 * If we don't have a PTE table and if there is no huge page mapped, 822 * we can ignore attempts to set the key to 0, because it already is 0. 823 */ 824 switch (pmd_lookup(mm, addr, &pmdp)) { 825 case -ENOENT: 826 return key ? -EFAULT : 0; 827 case 0: 828 break; 829 default: 830 return -EFAULT; 831 } 832 again: 833 ptl = pmd_lock(mm, pmdp); 834 if (!pmd_present(*pmdp)) { 835 spin_unlock(ptl); 836 return key ? -EFAULT : 0; 837 } 838 839 if (pmd_large(*pmdp)) { 840 paddr = pmd_val(*pmdp) & HPAGE_MASK; 841 paddr |= addr & ~HPAGE_MASK; 842 /* 843 * Huge pmds need quiescing operations, they are 844 * always mapped. 845 */ 846 page_set_storage_key(paddr, key, 1); 847 spin_unlock(ptl); 848 return 0; 849 } 850 spin_unlock(ptl); 851 852 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); 853 if (!ptep) 854 goto again; 855 new = old = pgste_get_lock(ptep); 856 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 857 PGSTE_ACC_BITS | PGSTE_FP_BIT); 858 keyul = (unsigned long) key; 859 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 860 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 861 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 862 unsigned long bits, skey; 863 864 paddr = pte_val(*ptep) & PAGE_MASK; 865 skey = (unsigned long) page_get_storage_key(paddr); 866 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 867 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 868 /* Set storage key ACC and FP */ 869 page_set_storage_key(paddr, skey, !nq); 870 /* Merge host changed & referenced into pgste */ 871 pgste_val(new) |= bits << 52; 872 } 873 /* changing the guest storage key is considered a change of the page */ 874 if ((pgste_val(new) ^ pgste_val(old)) & 875 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 876 pgste_val(new) |= PGSTE_UC_BIT; 877 878 pgste_set_unlock(ptep, new); 879 pte_unmap_unlock(ptep, ptl); 880 return 0; 881 } 882 EXPORT_SYMBOL(set_guest_storage_key); 883 884 /* 885 * Conditionally set a guest storage key (handling csske). 886 * oldkey will be updated when either mr or mc is set and a pointer is given. 887 * 888 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 889 * storage key was updated and -EFAULT on access errors. 890 */ 891 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 892 unsigned char key, unsigned char *oldkey, 893 bool nq, bool mr, bool mc) 894 { 895 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 896 int rc; 897 898 /* we can drop the pgste lock between getting and setting the key */ 899 if (mr | mc) { 900 rc = get_guest_storage_key(current->mm, addr, &tmp); 901 if (rc) 902 return rc; 903 if (oldkey) 904 *oldkey = tmp; 905 if (!mr) 906 mask |= _PAGE_REFERENCED; 907 if (!mc) 908 mask |= _PAGE_CHANGED; 909 if (!((tmp ^ key) & mask)) 910 return 0; 911 } 912 rc = set_guest_storage_key(current->mm, addr, key, nq); 913 return rc < 0 ? rc : 1; 914 } 915 EXPORT_SYMBOL(cond_set_guest_storage_key); 916 917 /* 918 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 919 * 920 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 921 */ 922 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 923 { 924 spinlock_t *ptl; 925 unsigned long paddr; 926 pgste_t old, new; 927 pmd_t *pmdp; 928 pte_t *ptep; 929 int cc = 0; 930 931 /* 932 * If we don't have a PTE table and if there is no huge page mapped, 933 * the storage key is 0 and there is nothing for us to do. 934 */ 935 switch (pmd_lookup(mm, addr, &pmdp)) { 936 case -ENOENT: 937 return 0; 938 case 0: 939 break; 940 default: 941 return -EFAULT; 942 } 943 again: 944 ptl = pmd_lock(mm, pmdp); 945 if (!pmd_present(*pmdp)) { 946 spin_unlock(ptl); 947 return 0; 948 } 949 950 if (pmd_large(*pmdp)) { 951 paddr = pmd_val(*pmdp) & HPAGE_MASK; 952 paddr |= addr & ~HPAGE_MASK; 953 cc = page_reset_referenced(paddr); 954 spin_unlock(ptl); 955 return cc; 956 } 957 spin_unlock(ptl); 958 959 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); 960 if (!ptep) 961 goto again; 962 new = old = pgste_get_lock(ptep); 963 /* Reset guest reference bit only */ 964 pgste_val(new) &= ~PGSTE_GR_BIT; 965 966 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 967 paddr = pte_val(*ptep) & PAGE_MASK; 968 cc = page_reset_referenced(paddr); 969 /* Merge real referenced bit into host-set */ 970 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 971 } 972 /* Reflect guest's logical view, not physical */ 973 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 974 /* Changing the guest storage key is considered a change of the page */ 975 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 976 pgste_val(new) |= PGSTE_UC_BIT; 977 978 pgste_set_unlock(ptep, new); 979 pte_unmap_unlock(ptep, ptl); 980 return cc; 981 } 982 EXPORT_SYMBOL(reset_guest_reference_bit); 983 984 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 985 unsigned char *key) 986 { 987 unsigned long paddr; 988 spinlock_t *ptl; 989 pgste_t pgste; 990 pmd_t *pmdp; 991 pte_t *ptep; 992 993 /* 994 * If we don't have a PTE table and if there is no huge page mapped, 995 * the storage key is 0. 996 */ 997 *key = 0; 998 999 switch (pmd_lookup(mm, addr, &pmdp)) { 1000 case -ENOENT: 1001 return 0; 1002 case 0: 1003 break; 1004 default: 1005 return -EFAULT; 1006 } 1007 again: 1008 ptl = pmd_lock(mm, pmdp); 1009 if (!pmd_present(*pmdp)) { 1010 spin_unlock(ptl); 1011 return 0; 1012 } 1013 1014 if (pmd_large(*pmdp)) { 1015 paddr = pmd_val(*pmdp) & HPAGE_MASK; 1016 paddr |= addr & ~HPAGE_MASK; 1017 *key = page_get_storage_key(paddr); 1018 spin_unlock(ptl); 1019 return 0; 1020 } 1021 spin_unlock(ptl); 1022 1023 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); 1024 if (!ptep) 1025 goto again; 1026 pgste = pgste_get_lock(ptep); 1027 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 1028 paddr = pte_val(*ptep) & PAGE_MASK; 1029 if (!(pte_val(*ptep) & _PAGE_INVALID)) 1030 *key = page_get_storage_key(paddr); 1031 /* Reflect guest's logical view, not physical */ 1032 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 1033 pgste_set_unlock(ptep, pgste); 1034 pte_unmap_unlock(ptep, ptl); 1035 return 0; 1036 } 1037 EXPORT_SYMBOL(get_guest_storage_key); 1038 1039 /** 1040 * pgste_perform_essa - perform ESSA actions on the PGSTE. 1041 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1042 * @hva: the host virtual address of the page whose PGSTE is to be processed 1043 * @orc: the specific action to perform, see the ESSA_SET_* macros. 1044 * @oldpte: the PTE will be saved there if the pointer is not NULL. 1045 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 1046 * 1047 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 1048 * or < 0 in case of error. -EINVAL is returned for invalid values 1049 * of orc, -EFAULT for invalid addresses. 1050 */ 1051 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 1052 unsigned long *oldpte, unsigned long *oldpgste) 1053 { 1054 struct vm_area_struct *vma; 1055 unsigned long pgstev; 1056 spinlock_t *ptl; 1057 pgste_t pgste; 1058 pte_t *ptep; 1059 int res = 0; 1060 1061 WARN_ON_ONCE(orc > ESSA_MAX); 1062 if (unlikely(orc > ESSA_MAX)) 1063 return -EINVAL; 1064 1065 vma = vma_lookup(mm, hva); 1066 if (!vma || is_vm_hugetlb_page(vma)) 1067 return -EFAULT; 1068 ptep = get_locked_pte(mm, hva, &ptl); 1069 if (unlikely(!ptep)) 1070 return -EFAULT; 1071 pgste = pgste_get_lock(ptep); 1072 pgstev = pgste_val(pgste); 1073 if (oldpte) 1074 *oldpte = pte_val(*ptep); 1075 if (oldpgste) 1076 *oldpgste = pgstev; 1077 1078 switch (orc) { 1079 case ESSA_GET_STATE: 1080 break; 1081 case ESSA_SET_STABLE: 1082 pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 1083 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1084 break; 1085 case ESSA_SET_UNUSED: 1086 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1087 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 1088 if (pte_val(*ptep) & _PAGE_INVALID) 1089 res = 1; 1090 break; 1091 case ESSA_SET_VOLATILE: 1092 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1093 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1094 if (pte_val(*ptep) & _PAGE_INVALID) 1095 res = 1; 1096 break; 1097 case ESSA_SET_POT_VOLATILE: 1098 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1099 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1100 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 1101 break; 1102 } 1103 if (pgstev & _PGSTE_GPS_ZERO) { 1104 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1105 break; 1106 } 1107 if (!(pgstev & PGSTE_GC_BIT)) { 1108 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1109 res = 1; 1110 break; 1111 } 1112 break; 1113 case ESSA_SET_STABLE_RESIDENT: 1114 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1115 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1116 /* 1117 * Since the resident state can go away any time after this 1118 * call, we will not make this page resident. We can revisit 1119 * this decision if a guest will ever start using this. 1120 */ 1121 break; 1122 case ESSA_SET_STABLE_IF_RESIDENT: 1123 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1124 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1125 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1126 } 1127 break; 1128 case ESSA_SET_STABLE_NODAT: 1129 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1130 pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; 1131 break; 1132 default: 1133 /* we should never get here! */ 1134 break; 1135 } 1136 /* If we are discarding a page, set it to logical zero */ 1137 if (res) 1138 pgstev |= _PGSTE_GPS_ZERO; 1139 1140 pgste_val(pgste) = pgstev; 1141 pgste_set_unlock(ptep, pgste); 1142 pte_unmap_unlock(ptep, ptl); 1143 return res; 1144 } 1145 EXPORT_SYMBOL(pgste_perform_essa); 1146 1147 /** 1148 * set_pgste_bits - set specific PGSTE bits. 1149 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1150 * @hva: the host virtual address of the page whose PGSTE is to be processed 1151 * @bits: a bitmask representing the bits that will be touched 1152 * @value: the values of the bits to be written. Only the bits in the mask 1153 * will be written. 1154 * 1155 * Return: 0 on success, < 0 in case of error. 1156 */ 1157 int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 1158 unsigned long bits, unsigned long value) 1159 { 1160 struct vm_area_struct *vma; 1161 spinlock_t *ptl; 1162 pgste_t new; 1163 pte_t *ptep; 1164 1165 vma = vma_lookup(mm, hva); 1166 if (!vma || is_vm_hugetlb_page(vma)) 1167 return -EFAULT; 1168 ptep = get_locked_pte(mm, hva, &ptl); 1169 if (unlikely(!ptep)) 1170 return -EFAULT; 1171 new = pgste_get_lock(ptep); 1172 1173 pgste_val(new) &= ~bits; 1174 pgste_val(new) |= value & bits; 1175 1176 pgste_set_unlock(ptep, new); 1177 pte_unmap_unlock(ptep, ptl); 1178 return 0; 1179 } 1180 EXPORT_SYMBOL(set_pgste_bits); 1181 1182 /** 1183 * get_pgste - get the current PGSTE for the given address. 1184 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1185 * @hva: the host virtual address of the page whose PGSTE is to be processed 1186 * @pgstep: will be written with the current PGSTE for the given address. 1187 * 1188 * Return: 0 on success, < 0 in case of error. 1189 */ 1190 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 1191 { 1192 struct vm_area_struct *vma; 1193 spinlock_t *ptl; 1194 pte_t *ptep; 1195 1196 vma = vma_lookup(mm, hva); 1197 if (!vma || is_vm_hugetlb_page(vma)) 1198 return -EFAULT; 1199 ptep = get_locked_pte(mm, hva, &ptl); 1200 if (unlikely(!ptep)) 1201 return -EFAULT; 1202 *pgstep = pgste_val(pgste_get(ptep)); 1203 pte_unmap_unlock(ptep, ptl); 1204 return 0; 1205 } 1206 EXPORT_SYMBOL(get_pgste); 1207 #endif 1208