1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/kernel.h> 9 #include <linux/errno.h> 10 #include <linux/gfp.h> 11 #include <linux/mm.h> 12 #include <linux/swap.h> 13 #include <linux/smp.h> 14 #include <linux/spinlock.h> 15 #include <linux/rcupdate.h> 16 #include <linux/slab.h> 17 #include <linux/swapops.h> 18 #include <linux/sysctl.h> 19 #include <linux/ksm.h> 20 #include <linux/mman.h> 21 22 #include <asm/tlb.h> 23 #include <asm/tlbflush.h> 24 #include <asm/mmu_context.h> 25 #include <asm/page-states.h> 26 27 pgprot_t pgprot_writecombine(pgprot_t prot) 28 { 29 /* 30 * mio_wb_bit_mask may be set on a different CPU, but it is only set 31 * once at init and only read afterwards. 32 */ 33 return __pgprot(pgprot_val(prot) | mio_wb_bit_mask); 34 } 35 EXPORT_SYMBOL_GPL(pgprot_writecombine); 36 37 pgprot_t pgprot_writethrough(pgprot_t prot) 38 { 39 /* 40 * mio_wb_bit_mask may be set on a different CPU, but it is only set 41 * once at init and only read afterwards. 42 */ 43 return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); 44 } 45 EXPORT_SYMBOL_GPL(pgprot_writethrough); 46 47 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, 48 pte_t *ptep, int nodat) 49 { 50 unsigned long opt, asce; 51 52 if (MACHINE_HAS_TLB_GUEST) { 53 opt = 0; 54 asce = READ_ONCE(mm->context.gmap_asce); 55 if (asce == 0UL || nodat) 56 opt |= IPTE_NODAT; 57 if (asce != -1UL) { 58 asce = asce ? : mm->context.asce; 59 opt |= IPTE_GUEST_ASCE; 60 } 61 __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); 62 } else { 63 __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); 64 } 65 } 66 67 static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, 68 pte_t *ptep, int nodat) 69 { 70 unsigned long opt, asce; 71 72 if (MACHINE_HAS_TLB_GUEST) { 73 opt = 0; 74 asce = READ_ONCE(mm->context.gmap_asce); 75 if (asce == 0UL || nodat) 76 opt |= IPTE_NODAT; 77 if (asce != -1UL) { 78 asce = asce ? : mm->context.asce; 79 opt |= IPTE_GUEST_ASCE; 80 } 81 __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); 82 } else { 83 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 84 } 85 } 86 87 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 88 unsigned long addr, pte_t *ptep, 89 int nodat) 90 { 91 pte_t old; 92 93 old = *ptep; 94 if (unlikely(pte_val(old) & _PAGE_INVALID)) 95 return old; 96 atomic_inc(&mm->context.flush_count); 97 if (MACHINE_HAS_TLB_LC && 98 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 99 ptep_ipte_local(mm, addr, ptep, nodat); 100 else 101 ptep_ipte_global(mm, addr, ptep, nodat); 102 atomic_dec(&mm->context.flush_count); 103 return old; 104 } 105 106 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 107 unsigned long addr, pte_t *ptep, 108 int nodat) 109 { 110 pte_t old; 111 112 old = *ptep; 113 if (unlikely(pte_val(old) & _PAGE_INVALID)) 114 return old; 115 atomic_inc(&mm->context.flush_count); 116 if (cpumask_equal(&mm->context.cpu_attach_mask, 117 cpumask_of(smp_processor_id()))) { 118 pte_val(*ptep) |= _PAGE_INVALID; 119 mm->context.flush_mm = 1; 120 } else 121 ptep_ipte_global(mm, addr, ptep, nodat); 122 atomic_dec(&mm->context.flush_count); 123 return old; 124 } 125 126 static inline pgste_t pgste_get_lock(pte_t *ptep) 127 { 128 unsigned long new = 0; 129 #ifdef CONFIG_PGSTE 130 unsigned long old; 131 132 asm( 133 " lg %0,%2\n" 134 "0: lgr %1,%0\n" 135 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 136 " oihh %1,0x0080\n" /* set PCL bit in new */ 137 " csg %0,%1,%2\n" 138 " jl 0b\n" 139 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 140 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 141 #endif 142 return __pgste(new); 143 } 144 145 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 146 { 147 #ifdef CONFIG_PGSTE 148 asm( 149 " nihh %1,0xff7f\n" /* clear PCL bit */ 150 " stg %1,%0\n" 151 : "=Q" (ptep[PTRS_PER_PTE]) 152 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 153 : "cc", "memory"); 154 #endif 155 } 156 157 static inline pgste_t pgste_get(pte_t *ptep) 158 { 159 unsigned long pgste = 0; 160 #ifdef CONFIG_PGSTE 161 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 162 #endif 163 return __pgste(pgste); 164 } 165 166 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 167 { 168 #ifdef CONFIG_PGSTE 169 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 170 #endif 171 } 172 173 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 174 struct mm_struct *mm) 175 { 176 #ifdef CONFIG_PGSTE 177 unsigned long address, bits, skey; 178 179 if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID) 180 return pgste; 181 address = pte_val(pte) & PAGE_MASK; 182 skey = (unsigned long) page_get_storage_key(address); 183 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 184 /* Transfer page changed & referenced bit to guest bits in pgste */ 185 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 186 /* Copy page access key and fetch protection bit to pgste */ 187 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 188 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 189 #endif 190 return pgste; 191 192 } 193 194 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 195 struct mm_struct *mm) 196 { 197 #ifdef CONFIG_PGSTE 198 unsigned long address; 199 unsigned long nkey; 200 201 if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID) 202 return; 203 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 204 address = pte_val(entry) & PAGE_MASK; 205 /* 206 * Set page access key and fetch protection bit from pgste. 207 * The guest C/R information is still in the PGSTE, set real 208 * key C/R to 0. 209 */ 210 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 211 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 212 page_set_storage_key(address, nkey, 0); 213 #endif 214 } 215 216 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 217 { 218 #ifdef CONFIG_PGSTE 219 if ((pte_val(entry) & _PAGE_PRESENT) && 220 (pte_val(entry) & _PAGE_WRITE) && 221 !(pte_val(entry) & _PAGE_INVALID)) { 222 if (!MACHINE_HAS_ESOP) { 223 /* 224 * Without enhanced suppression-on-protection force 225 * the dirty bit on for all writable ptes. 226 */ 227 pte_val(entry) |= _PAGE_DIRTY; 228 pte_val(entry) &= ~_PAGE_PROTECT; 229 } 230 if (!(pte_val(entry) & _PAGE_PROTECT)) 231 /* This pte allows write access, set user-dirty */ 232 pgste_val(pgste) |= PGSTE_UC_BIT; 233 } 234 #endif 235 *ptep = entry; 236 return pgste; 237 } 238 239 static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 240 unsigned long addr, 241 pte_t *ptep, pgste_t pgste) 242 { 243 #ifdef CONFIG_PGSTE 244 unsigned long bits; 245 246 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 247 if (bits) { 248 pgste_val(pgste) ^= bits; 249 ptep_notify(mm, addr, ptep, bits); 250 } 251 #endif 252 return pgste; 253 } 254 255 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 256 unsigned long addr, pte_t *ptep) 257 { 258 pgste_t pgste = __pgste(0); 259 260 if (mm_has_pgste(mm)) { 261 pgste = pgste_get_lock(ptep); 262 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 263 } 264 return pgste; 265 } 266 267 static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 268 unsigned long addr, pte_t *ptep, 269 pgste_t pgste, pte_t old, pte_t new) 270 { 271 if (mm_has_pgste(mm)) { 272 if (pte_val(old) & _PAGE_INVALID) 273 pgste_set_key(ptep, pgste, new, mm); 274 if (pte_val(new) & _PAGE_INVALID) { 275 pgste = pgste_update_all(old, pgste, mm); 276 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 277 _PGSTE_GPS_USAGE_UNUSED) 278 pte_val(old) |= _PAGE_UNUSED; 279 } 280 pgste = pgste_set_pte(ptep, pgste, new); 281 pgste_set_unlock(ptep, pgste); 282 } else { 283 *ptep = new; 284 } 285 return old; 286 } 287 288 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 289 pte_t *ptep, pte_t new) 290 { 291 pgste_t pgste; 292 pte_t old; 293 int nodat; 294 295 preempt_disable(); 296 pgste = ptep_xchg_start(mm, addr, ptep); 297 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 298 old = ptep_flush_direct(mm, addr, ptep, nodat); 299 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 300 preempt_enable(); 301 return old; 302 } 303 EXPORT_SYMBOL(ptep_xchg_direct); 304 305 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 306 pte_t *ptep, pte_t new) 307 { 308 pgste_t pgste; 309 pte_t old; 310 int nodat; 311 312 preempt_disable(); 313 pgste = ptep_xchg_start(mm, addr, ptep); 314 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 315 old = ptep_flush_lazy(mm, addr, ptep, nodat); 316 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 317 preempt_enable(); 318 return old; 319 } 320 EXPORT_SYMBOL(ptep_xchg_lazy); 321 322 pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, 323 pte_t *ptep) 324 { 325 pgste_t pgste; 326 pte_t old; 327 int nodat; 328 struct mm_struct *mm = vma->vm_mm; 329 330 preempt_disable(); 331 pgste = ptep_xchg_start(mm, addr, ptep); 332 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 333 old = ptep_flush_lazy(mm, addr, ptep, nodat); 334 if (mm_has_pgste(mm)) { 335 pgste = pgste_update_all(old, pgste, mm); 336 pgste_set(ptep, pgste); 337 } 338 return old; 339 } 340 341 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, 342 pte_t *ptep, pte_t old_pte, pte_t pte) 343 { 344 pgste_t pgste; 345 struct mm_struct *mm = vma->vm_mm; 346 347 if (!MACHINE_HAS_NX) 348 pte_val(pte) &= ~_PAGE_NOEXEC; 349 if (mm_has_pgste(mm)) { 350 pgste = pgste_get(ptep); 351 pgste_set_key(ptep, pgste, pte, mm); 352 pgste = pgste_set_pte(ptep, pgste, pte); 353 pgste_set_unlock(ptep, pgste); 354 } else { 355 *ptep = pte; 356 } 357 preempt_enable(); 358 } 359 360 static inline void pmdp_idte_local(struct mm_struct *mm, 361 unsigned long addr, pmd_t *pmdp) 362 { 363 if (MACHINE_HAS_TLB_GUEST) 364 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 365 mm->context.asce, IDTE_LOCAL); 366 else 367 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); 368 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 369 gmap_pmdp_idte_local(mm, addr); 370 } 371 372 static inline void pmdp_idte_global(struct mm_struct *mm, 373 unsigned long addr, pmd_t *pmdp) 374 { 375 if (MACHINE_HAS_TLB_GUEST) { 376 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 377 mm->context.asce, IDTE_GLOBAL); 378 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 379 gmap_pmdp_idte_global(mm, addr); 380 } else if (MACHINE_HAS_IDTE) { 381 __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); 382 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 383 gmap_pmdp_idte_global(mm, addr); 384 } else { 385 __pmdp_csp(pmdp); 386 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 387 gmap_pmdp_csp(mm, addr); 388 } 389 } 390 391 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 392 unsigned long addr, pmd_t *pmdp) 393 { 394 pmd_t old; 395 396 old = *pmdp; 397 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 398 return old; 399 atomic_inc(&mm->context.flush_count); 400 if (MACHINE_HAS_TLB_LC && 401 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 402 pmdp_idte_local(mm, addr, pmdp); 403 else 404 pmdp_idte_global(mm, addr, pmdp); 405 atomic_dec(&mm->context.flush_count); 406 return old; 407 } 408 409 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 410 unsigned long addr, pmd_t *pmdp) 411 { 412 pmd_t old; 413 414 old = *pmdp; 415 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 416 return old; 417 atomic_inc(&mm->context.flush_count); 418 if (cpumask_equal(&mm->context.cpu_attach_mask, 419 cpumask_of(smp_processor_id()))) { 420 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 421 mm->context.flush_mm = 1; 422 if (mm_has_pgste(mm)) 423 gmap_pmdp_invalidate(mm, addr); 424 } else { 425 pmdp_idte_global(mm, addr, pmdp); 426 } 427 atomic_dec(&mm->context.flush_count); 428 return old; 429 } 430 431 #ifdef CONFIG_PGSTE 432 static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) 433 { 434 pgd_t *pgd; 435 p4d_t *p4d; 436 pud_t *pud; 437 pmd_t *pmd; 438 439 pgd = pgd_offset(mm, addr); 440 p4d = p4d_alloc(mm, pgd, addr); 441 if (!p4d) 442 return NULL; 443 pud = pud_alloc(mm, p4d, addr); 444 if (!pud) 445 return NULL; 446 pmd = pmd_alloc(mm, pud, addr); 447 return pmd; 448 } 449 #endif 450 451 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 452 pmd_t *pmdp, pmd_t new) 453 { 454 pmd_t old; 455 456 preempt_disable(); 457 old = pmdp_flush_direct(mm, addr, pmdp); 458 *pmdp = new; 459 preempt_enable(); 460 return old; 461 } 462 EXPORT_SYMBOL(pmdp_xchg_direct); 463 464 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 465 pmd_t *pmdp, pmd_t new) 466 { 467 pmd_t old; 468 469 preempt_disable(); 470 old = pmdp_flush_lazy(mm, addr, pmdp); 471 *pmdp = new; 472 preempt_enable(); 473 return old; 474 } 475 EXPORT_SYMBOL(pmdp_xchg_lazy); 476 477 static inline void pudp_idte_local(struct mm_struct *mm, 478 unsigned long addr, pud_t *pudp) 479 { 480 if (MACHINE_HAS_TLB_GUEST) 481 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 482 mm->context.asce, IDTE_LOCAL); 483 else 484 __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); 485 } 486 487 static inline void pudp_idte_global(struct mm_struct *mm, 488 unsigned long addr, pud_t *pudp) 489 { 490 if (MACHINE_HAS_TLB_GUEST) 491 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 492 mm->context.asce, IDTE_GLOBAL); 493 else if (MACHINE_HAS_IDTE) 494 __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); 495 else 496 /* 497 * Invalid bit position is the same for pmd and pud, so we can 498 * re-use _pmd_csp() here 499 */ 500 __pmdp_csp((pmd_t *) pudp); 501 } 502 503 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 504 unsigned long addr, pud_t *pudp) 505 { 506 pud_t old; 507 508 old = *pudp; 509 if (pud_val(old) & _REGION_ENTRY_INVALID) 510 return old; 511 atomic_inc(&mm->context.flush_count); 512 if (MACHINE_HAS_TLB_LC && 513 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 514 pudp_idte_local(mm, addr, pudp); 515 else 516 pudp_idte_global(mm, addr, pudp); 517 atomic_dec(&mm->context.flush_count); 518 return old; 519 } 520 521 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 522 pud_t *pudp, pud_t new) 523 { 524 pud_t old; 525 526 preempt_disable(); 527 old = pudp_flush_direct(mm, addr, pudp); 528 *pudp = new; 529 preempt_enable(); 530 return old; 531 } 532 EXPORT_SYMBOL(pudp_xchg_direct); 533 534 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 535 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 536 pgtable_t pgtable) 537 { 538 struct list_head *lh = (struct list_head *) pgtable; 539 540 assert_spin_locked(pmd_lockptr(mm, pmdp)); 541 542 /* FIFO */ 543 if (!pmd_huge_pte(mm, pmdp)) 544 INIT_LIST_HEAD(lh); 545 else 546 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 547 pmd_huge_pte(mm, pmdp) = pgtable; 548 } 549 550 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 551 { 552 struct list_head *lh; 553 pgtable_t pgtable; 554 pte_t *ptep; 555 556 assert_spin_locked(pmd_lockptr(mm, pmdp)); 557 558 /* FIFO */ 559 pgtable = pmd_huge_pte(mm, pmdp); 560 lh = (struct list_head *) pgtable; 561 if (list_empty(lh)) 562 pmd_huge_pte(mm, pmdp) = NULL; 563 else { 564 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 565 list_del(lh); 566 } 567 ptep = (pte_t *) pgtable; 568 pte_val(*ptep) = _PAGE_INVALID; 569 ptep++; 570 pte_val(*ptep) = _PAGE_INVALID; 571 return pgtable; 572 } 573 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 574 575 #ifdef CONFIG_PGSTE 576 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 577 pte_t *ptep, pte_t entry) 578 { 579 pgste_t pgste; 580 581 /* the mm_has_pgste() check is done in set_pte_at() */ 582 preempt_disable(); 583 pgste = pgste_get_lock(ptep); 584 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 585 pgste_set_key(ptep, pgste, entry, mm); 586 pgste = pgste_set_pte(ptep, pgste, entry); 587 pgste_set_unlock(ptep, pgste); 588 preempt_enable(); 589 } 590 591 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 592 { 593 pgste_t pgste; 594 595 preempt_disable(); 596 pgste = pgste_get_lock(ptep); 597 pgste_val(pgste) |= PGSTE_IN_BIT; 598 pgste_set_unlock(ptep, pgste); 599 preempt_enable(); 600 } 601 602 /** 603 * ptep_force_prot - change access rights of a locked pte 604 * @mm: pointer to the process mm_struct 605 * @addr: virtual address in the guest address space 606 * @ptep: pointer to the page table entry 607 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 608 * @bit: pgste bit to set (e.g. for notification) 609 * 610 * Returns 0 if the access rights were changed and -EAGAIN if the current 611 * and requested access rights are incompatible. 612 */ 613 int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 614 pte_t *ptep, int prot, unsigned long bit) 615 { 616 pte_t entry; 617 pgste_t pgste; 618 int pte_i, pte_p, nodat; 619 620 pgste = pgste_get_lock(ptep); 621 entry = *ptep; 622 /* Check pte entry after all locks have been acquired */ 623 pte_i = pte_val(entry) & _PAGE_INVALID; 624 pte_p = pte_val(entry) & _PAGE_PROTECT; 625 if ((pte_i && (prot != PROT_NONE)) || 626 (pte_p && (prot & PROT_WRITE))) { 627 pgste_set_unlock(ptep, pgste); 628 return -EAGAIN; 629 } 630 /* Change access rights and set pgste bit */ 631 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 632 if (prot == PROT_NONE && !pte_i) { 633 ptep_flush_direct(mm, addr, ptep, nodat); 634 pgste = pgste_update_all(entry, pgste, mm); 635 pte_val(entry) |= _PAGE_INVALID; 636 } 637 if (prot == PROT_READ && !pte_p) { 638 ptep_flush_direct(mm, addr, ptep, nodat); 639 pte_val(entry) &= ~_PAGE_INVALID; 640 pte_val(entry) |= _PAGE_PROTECT; 641 } 642 pgste_val(pgste) |= bit; 643 pgste = pgste_set_pte(ptep, pgste, entry); 644 pgste_set_unlock(ptep, pgste); 645 return 0; 646 } 647 648 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 649 pte_t *sptep, pte_t *tptep, pte_t pte) 650 { 651 pgste_t spgste, tpgste; 652 pte_t spte, tpte; 653 int rc = -EAGAIN; 654 655 if (!(pte_val(*tptep) & _PAGE_INVALID)) 656 return 0; /* already shadowed */ 657 spgste = pgste_get_lock(sptep); 658 spte = *sptep; 659 if (!(pte_val(spte) & _PAGE_INVALID) && 660 !((pte_val(spte) & _PAGE_PROTECT) && 661 !(pte_val(pte) & _PAGE_PROTECT))) { 662 pgste_val(spgste) |= PGSTE_VSIE_BIT; 663 tpgste = pgste_get_lock(tptep); 664 pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | 665 (pte_val(pte) & _PAGE_PROTECT); 666 /* don't touch the storage key - it belongs to parent pgste */ 667 tpgste = pgste_set_pte(tptep, tpgste, tpte); 668 pgste_set_unlock(tptep, tpgste); 669 rc = 1; 670 } 671 pgste_set_unlock(sptep, spgste); 672 return rc; 673 } 674 675 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 676 { 677 pgste_t pgste; 678 int nodat; 679 680 pgste = pgste_get_lock(ptep); 681 /* notifier is called by the caller */ 682 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 683 ptep_flush_direct(mm, saddr, ptep, nodat); 684 /* don't touch the storage key - it belongs to parent pgste */ 685 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 686 pgste_set_unlock(ptep, pgste); 687 } 688 689 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 690 { 691 if (!non_swap_entry(entry)) 692 dec_mm_counter(mm, MM_SWAPENTS); 693 else if (is_migration_entry(entry)) { 694 struct page *page = pfn_swap_entry_to_page(entry); 695 696 dec_mm_counter(mm, mm_counter(page)); 697 } 698 free_swap_and_cache(entry); 699 } 700 701 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 702 pte_t *ptep, int reset) 703 { 704 unsigned long pgstev; 705 pgste_t pgste; 706 pte_t pte; 707 708 /* Zap unused and logically-zero pages */ 709 preempt_disable(); 710 pgste = pgste_get_lock(ptep); 711 pgstev = pgste_val(pgste); 712 pte = *ptep; 713 if (!reset && pte_swap(pte) && 714 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 715 (pgstev & _PGSTE_GPS_ZERO))) { 716 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 717 pte_clear(mm, addr, ptep); 718 } 719 if (reset) 720 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 721 pgste_set_unlock(ptep, pgste); 722 preempt_enable(); 723 } 724 725 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 726 { 727 unsigned long ptev; 728 pgste_t pgste; 729 730 /* Clear storage key ACC and F, but set R/C */ 731 preempt_disable(); 732 pgste = pgste_get_lock(ptep); 733 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 734 pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; 735 ptev = pte_val(*ptep); 736 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 737 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 738 pgste_set_unlock(ptep, pgste); 739 preempt_enable(); 740 } 741 742 /* 743 * Test and reset if a guest page is dirty 744 */ 745 bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, 746 pte_t *ptep) 747 { 748 pgste_t pgste; 749 pte_t pte; 750 bool dirty; 751 int nodat; 752 753 pgste = pgste_get_lock(ptep); 754 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 755 pgste_val(pgste) &= ~PGSTE_UC_BIT; 756 pte = *ptep; 757 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 758 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 759 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 760 ptep_ipte_global(mm, addr, ptep, nodat); 761 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 762 pte_val(pte) |= _PAGE_PROTECT; 763 else 764 pte_val(pte) |= _PAGE_INVALID; 765 *ptep = pte; 766 } 767 pgste_set_unlock(ptep, pgste); 768 return dirty; 769 } 770 EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); 771 772 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 773 unsigned char key, bool nq) 774 { 775 unsigned long keyul, paddr; 776 spinlock_t *ptl; 777 pgste_t old, new; 778 pmd_t *pmdp; 779 pte_t *ptep; 780 781 pmdp = pmd_alloc_map(mm, addr); 782 if (unlikely(!pmdp)) 783 return -EFAULT; 784 785 ptl = pmd_lock(mm, pmdp); 786 if (!pmd_present(*pmdp)) { 787 spin_unlock(ptl); 788 return -EFAULT; 789 } 790 791 if (pmd_large(*pmdp)) { 792 paddr = pmd_val(*pmdp) & HPAGE_MASK; 793 paddr |= addr & ~HPAGE_MASK; 794 /* 795 * Huge pmds need quiescing operations, they are 796 * always mapped. 797 */ 798 page_set_storage_key(paddr, key, 1); 799 spin_unlock(ptl); 800 return 0; 801 } 802 spin_unlock(ptl); 803 804 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 805 if (unlikely(!ptep)) 806 return -EFAULT; 807 808 new = old = pgste_get_lock(ptep); 809 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 810 PGSTE_ACC_BITS | PGSTE_FP_BIT); 811 keyul = (unsigned long) key; 812 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 813 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 814 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 815 unsigned long bits, skey; 816 817 paddr = pte_val(*ptep) & PAGE_MASK; 818 skey = (unsigned long) page_get_storage_key(paddr); 819 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 820 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 821 /* Set storage key ACC and FP */ 822 page_set_storage_key(paddr, skey, !nq); 823 /* Merge host changed & referenced into pgste */ 824 pgste_val(new) |= bits << 52; 825 } 826 /* changing the guest storage key is considered a change of the page */ 827 if ((pgste_val(new) ^ pgste_val(old)) & 828 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 829 pgste_val(new) |= PGSTE_UC_BIT; 830 831 pgste_set_unlock(ptep, new); 832 pte_unmap_unlock(ptep, ptl); 833 return 0; 834 } 835 EXPORT_SYMBOL(set_guest_storage_key); 836 837 /** 838 * Conditionally set a guest storage key (handling csske). 839 * oldkey will be updated when either mr or mc is set and a pointer is given. 840 * 841 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 842 * storage key was updated and -EFAULT on access errors. 843 */ 844 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 845 unsigned char key, unsigned char *oldkey, 846 bool nq, bool mr, bool mc) 847 { 848 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 849 int rc; 850 851 /* we can drop the pgste lock between getting and setting the key */ 852 if (mr | mc) { 853 rc = get_guest_storage_key(current->mm, addr, &tmp); 854 if (rc) 855 return rc; 856 if (oldkey) 857 *oldkey = tmp; 858 if (!mr) 859 mask |= _PAGE_REFERENCED; 860 if (!mc) 861 mask |= _PAGE_CHANGED; 862 if (!((tmp ^ key) & mask)) 863 return 0; 864 } 865 rc = set_guest_storage_key(current->mm, addr, key, nq); 866 return rc < 0 ? rc : 1; 867 } 868 EXPORT_SYMBOL(cond_set_guest_storage_key); 869 870 /** 871 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 872 * 873 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 874 */ 875 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 876 { 877 spinlock_t *ptl; 878 unsigned long paddr; 879 pgste_t old, new; 880 pmd_t *pmdp; 881 pte_t *ptep; 882 int cc = 0; 883 884 pmdp = pmd_alloc_map(mm, addr); 885 if (unlikely(!pmdp)) 886 return -EFAULT; 887 888 ptl = pmd_lock(mm, pmdp); 889 if (!pmd_present(*pmdp)) { 890 spin_unlock(ptl); 891 return -EFAULT; 892 } 893 894 if (pmd_large(*pmdp)) { 895 paddr = pmd_val(*pmdp) & HPAGE_MASK; 896 paddr |= addr & ~HPAGE_MASK; 897 cc = page_reset_referenced(paddr); 898 spin_unlock(ptl); 899 return cc; 900 } 901 spin_unlock(ptl); 902 903 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 904 if (unlikely(!ptep)) 905 return -EFAULT; 906 907 new = old = pgste_get_lock(ptep); 908 /* Reset guest reference bit only */ 909 pgste_val(new) &= ~PGSTE_GR_BIT; 910 911 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 912 paddr = pte_val(*ptep) & PAGE_MASK; 913 cc = page_reset_referenced(paddr); 914 /* Merge real referenced bit into host-set */ 915 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 916 } 917 /* Reflect guest's logical view, not physical */ 918 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 919 /* Changing the guest storage key is considered a change of the page */ 920 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 921 pgste_val(new) |= PGSTE_UC_BIT; 922 923 pgste_set_unlock(ptep, new); 924 pte_unmap_unlock(ptep, ptl); 925 return cc; 926 } 927 EXPORT_SYMBOL(reset_guest_reference_bit); 928 929 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 930 unsigned char *key) 931 { 932 unsigned long paddr; 933 spinlock_t *ptl; 934 pgste_t pgste; 935 pmd_t *pmdp; 936 pte_t *ptep; 937 938 pmdp = pmd_alloc_map(mm, addr); 939 if (unlikely(!pmdp)) 940 return -EFAULT; 941 942 ptl = pmd_lock(mm, pmdp); 943 if (!pmd_present(*pmdp)) { 944 /* Not yet mapped memory has a zero key */ 945 spin_unlock(ptl); 946 *key = 0; 947 return 0; 948 } 949 950 if (pmd_large(*pmdp)) { 951 paddr = pmd_val(*pmdp) & HPAGE_MASK; 952 paddr |= addr & ~HPAGE_MASK; 953 *key = page_get_storage_key(paddr); 954 spin_unlock(ptl); 955 return 0; 956 } 957 spin_unlock(ptl); 958 959 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 960 if (unlikely(!ptep)) 961 return -EFAULT; 962 963 pgste = pgste_get_lock(ptep); 964 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 965 paddr = pte_val(*ptep) & PAGE_MASK; 966 if (!(pte_val(*ptep) & _PAGE_INVALID)) 967 *key = page_get_storage_key(paddr); 968 /* Reflect guest's logical view, not physical */ 969 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 970 pgste_set_unlock(ptep, pgste); 971 pte_unmap_unlock(ptep, ptl); 972 return 0; 973 } 974 EXPORT_SYMBOL(get_guest_storage_key); 975 976 /** 977 * pgste_perform_essa - perform ESSA actions on the PGSTE. 978 * @mm: the memory context. It must have PGSTEs, no check is performed here! 979 * @hva: the host virtual address of the page whose PGSTE is to be processed 980 * @orc: the specific action to perform, see the ESSA_SET_* macros. 981 * @oldpte: the PTE will be saved there if the pointer is not NULL. 982 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 983 * 984 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 985 * or < 0 in case of error. -EINVAL is returned for invalid values 986 * of orc, -EFAULT for invalid addresses. 987 */ 988 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 989 unsigned long *oldpte, unsigned long *oldpgste) 990 { 991 unsigned long pgstev; 992 spinlock_t *ptl; 993 pgste_t pgste; 994 pte_t *ptep; 995 int res = 0; 996 997 WARN_ON_ONCE(orc > ESSA_MAX); 998 if (unlikely(orc > ESSA_MAX)) 999 return -EINVAL; 1000 ptep = get_locked_pte(mm, hva, &ptl); 1001 if (unlikely(!ptep)) 1002 return -EFAULT; 1003 pgste = pgste_get_lock(ptep); 1004 pgstev = pgste_val(pgste); 1005 if (oldpte) 1006 *oldpte = pte_val(*ptep); 1007 if (oldpgste) 1008 *oldpgste = pgstev; 1009 1010 switch (orc) { 1011 case ESSA_GET_STATE: 1012 break; 1013 case ESSA_SET_STABLE: 1014 pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 1015 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1016 break; 1017 case ESSA_SET_UNUSED: 1018 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1019 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 1020 if (pte_val(*ptep) & _PAGE_INVALID) 1021 res = 1; 1022 break; 1023 case ESSA_SET_VOLATILE: 1024 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1025 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1026 if (pte_val(*ptep) & _PAGE_INVALID) 1027 res = 1; 1028 break; 1029 case ESSA_SET_POT_VOLATILE: 1030 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1031 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1032 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 1033 break; 1034 } 1035 if (pgstev & _PGSTE_GPS_ZERO) { 1036 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1037 break; 1038 } 1039 if (!(pgstev & PGSTE_GC_BIT)) { 1040 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1041 res = 1; 1042 break; 1043 } 1044 break; 1045 case ESSA_SET_STABLE_RESIDENT: 1046 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1047 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1048 /* 1049 * Since the resident state can go away any time after this 1050 * call, we will not make this page resident. We can revisit 1051 * this decision if a guest will ever start using this. 1052 */ 1053 break; 1054 case ESSA_SET_STABLE_IF_RESIDENT: 1055 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1056 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1057 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1058 } 1059 break; 1060 case ESSA_SET_STABLE_NODAT: 1061 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1062 pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; 1063 break; 1064 default: 1065 /* we should never get here! */ 1066 break; 1067 } 1068 /* If we are discarding a page, set it to logical zero */ 1069 if (res) 1070 pgstev |= _PGSTE_GPS_ZERO; 1071 1072 pgste_val(pgste) = pgstev; 1073 pgste_set_unlock(ptep, pgste); 1074 pte_unmap_unlock(ptep, ptl); 1075 return res; 1076 } 1077 EXPORT_SYMBOL(pgste_perform_essa); 1078 1079 /** 1080 * set_pgste_bits - set specific PGSTE bits. 1081 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1082 * @hva: the host virtual address of the page whose PGSTE is to be processed 1083 * @bits: a bitmask representing the bits that will be touched 1084 * @value: the values of the bits to be written. Only the bits in the mask 1085 * will be written. 1086 * 1087 * Return: 0 on success, < 0 in case of error. 1088 */ 1089 int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 1090 unsigned long bits, unsigned long value) 1091 { 1092 spinlock_t *ptl; 1093 pgste_t new; 1094 pte_t *ptep; 1095 1096 ptep = get_locked_pte(mm, hva, &ptl); 1097 if (unlikely(!ptep)) 1098 return -EFAULT; 1099 new = pgste_get_lock(ptep); 1100 1101 pgste_val(new) &= ~bits; 1102 pgste_val(new) |= value & bits; 1103 1104 pgste_set_unlock(ptep, new); 1105 pte_unmap_unlock(ptep, ptl); 1106 return 0; 1107 } 1108 EXPORT_SYMBOL(set_pgste_bits); 1109 1110 /** 1111 * get_pgste - get the current PGSTE for the given address. 1112 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1113 * @hva: the host virtual address of the page whose PGSTE is to be processed 1114 * @pgstep: will be written with the current PGSTE for the given address. 1115 * 1116 * Return: 0 on success, < 0 in case of error. 1117 */ 1118 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 1119 { 1120 spinlock_t *ptl; 1121 pte_t *ptep; 1122 1123 ptep = get_locked_pte(mm, hva, &ptl); 1124 if (unlikely(!ptep)) 1125 return -EFAULT; 1126 *pgstep = pgste_val(pgste_get(ptep)); 1127 pte_unmap_unlock(ptep, ptl); 1128 return 0; 1129 } 1130 EXPORT_SYMBOL(get_pgste); 1131 #endif 1132