1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/kernel.h> 9 #include <linux/errno.h> 10 #include <linux/gfp.h> 11 #include <linux/mm.h> 12 #include <linux/swap.h> 13 #include <linux/smp.h> 14 #include <linux/spinlock.h> 15 #include <linux/rcupdate.h> 16 #include <linux/slab.h> 17 #include <linux/swapops.h> 18 #include <linux/sysctl.h> 19 #include <linux/ksm.h> 20 #include <linux/mman.h> 21 22 #include <asm/pgtable.h> 23 #include <asm/pgalloc.h> 24 #include <asm/tlb.h> 25 #include <asm/tlbflush.h> 26 #include <asm/mmu_context.h> 27 #include <asm/page-states.h> 28 29 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, 30 pte_t *ptep, int nodat) 31 { 32 unsigned long opt, asce; 33 34 if (MACHINE_HAS_TLB_GUEST) { 35 opt = 0; 36 asce = READ_ONCE(mm->context.gmap_asce); 37 if (asce == 0UL || nodat) 38 opt |= IPTE_NODAT; 39 if (asce != -1UL) { 40 asce = asce ? : mm->context.asce; 41 opt |= IPTE_GUEST_ASCE; 42 } 43 __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); 44 } else { 45 __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); 46 } 47 } 48 49 static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, 50 pte_t *ptep, int nodat) 51 { 52 unsigned long opt, asce; 53 54 if (MACHINE_HAS_TLB_GUEST) { 55 opt = 0; 56 asce = READ_ONCE(mm->context.gmap_asce); 57 if (asce == 0UL || nodat) 58 opt |= IPTE_NODAT; 59 if (asce != -1UL) { 60 asce = asce ? : mm->context.asce; 61 opt |= IPTE_GUEST_ASCE; 62 } 63 __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); 64 } else { 65 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 66 } 67 } 68 69 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 70 unsigned long addr, pte_t *ptep, 71 int nodat) 72 { 73 pte_t old; 74 75 old = *ptep; 76 if (unlikely(pte_val(old) & _PAGE_INVALID)) 77 return old; 78 atomic_inc(&mm->context.flush_count); 79 if (MACHINE_HAS_TLB_LC && 80 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 81 ptep_ipte_local(mm, addr, ptep, nodat); 82 else 83 ptep_ipte_global(mm, addr, ptep, nodat); 84 atomic_dec(&mm->context.flush_count); 85 return old; 86 } 87 88 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 89 unsigned long addr, pte_t *ptep, 90 int nodat) 91 { 92 pte_t old; 93 94 old = *ptep; 95 if (unlikely(pte_val(old) & _PAGE_INVALID)) 96 return old; 97 atomic_inc(&mm->context.flush_count); 98 if (cpumask_equal(&mm->context.cpu_attach_mask, 99 cpumask_of(smp_processor_id()))) { 100 pte_val(*ptep) |= _PAGE_INVALID; 101 mm->context.flush_mm = 1; 102 } else 103 ptep_ipte_global(mm, addr, ptep, nodat); 104 atomic_dec(&mm->context.flush_count); 105 return old; 106 } 107 108 static inline pgste_t pgste_get_lock(pte_t *ptep) 109 { 110 unsigned long new = 0; 111 #ifdef CONFIG_PGSTE 112 unsigned long old; 113 114 asm( 115 " lg %0,%2\n" 116 "0: lgr %1,%0\n" 117 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 118 " oihh %1,0x0080\n" /* set PCL bit in new */ 119 " csg %0,%1,%2\n" 120 " jl 0b\n" 121 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 122 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 123 #endif 124 return __pgste(new); 125 } 126 127 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 128 { 129 #ifdef CONFIG_PGSTE 130 asm( 131 " nihh %1,0xff7f\n" /* clear PCL bit */ 132 " stg %1,%0\n" 133 : "=Q" (ptep[PTRS_PER_PTE]) 134 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 135 : "cc", "memory"); 136 #endif 137 } 138 139 static inline pgste_t pgste_get(pte_t *ptep) 140 { 141 unsigned long pgste = 0; 142 #ifdef CONFIG_PGSTE 143 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 144 #endif 145 return __pgste(pgste); 146 } 147 148 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 149 { 150 #ifdef CONFIG_PGSTE 151 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 152 #endif 153 } 154 155 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 156 struct mm_struct *mm) 157 { 158 #ifdef CONFIG_PGSTE 159 unsigned long address, bits, skey; 160 161 if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID) 162 return pgste; 163 address = pte_val(pte) & PAGE_MASK; 164 skey = (unsigned long) page_get_storage_key(address); 165 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 166 /* Transfer page changed & referenced bit to guest bits in pgste */ 167 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 168 /* Copy page access key and fetch protection bit to pgste */ 169 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 170 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 171 #endif 172 return pgste; 173 174 } 175 176 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 177 struct mm_struct *mm) 178 { 179 #ifdef CONFIG_PGSTE 180 unsigned long address; 181 unsigned long nkey; 182 183 if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID) 184 return; 185 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 186 address = pte_val(entry) & PAGE_MASK; 187 /* 188 * Set page access key and fetch protection bit from pgste. 189 * The guest C/R information is still in the PGSTE, set real 190 * key C/R to 0. 191 */ 192 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 193 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 194 page_set_storage_key(address, nkey, 0); 195 #endif 196 } 197 198 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 199 { 200 #ifdef CONFIG_PGSTE 201 if ((pte_val(entry) & _PAGE_PRESENT) && 202 (pte_val(entry) & _PAGE_WRITE) && 203 !(pte_val(entry) & _PAGE_INVALID)) { 204 if (!MACHINE_HAS_ESOP) { 205 /* 206 * Without enhanced suppression-on-protection force 207 * the dirty bit on for all writable ptes. 208 */ 209 pte_val(entry) |= _PAGE_DIRTY; 210 pte_val(entry) &= ~_PAGE_PROTECT; 211 } 212 if (!(pte_val(entry) & _PAGE_PROTECT)) 213 /* This pte allows write access, set user-dirty */ 214 pgste_val(pgste) |= PGSTE_UC_BIT; 215 } 216 #endif 217 *ptep = entry; 218 return pgste; 219 } 220 221 static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 222 unsigned long addr, 223 pte_t *ptep, pgste_t pgste) 224 { 225 #ifdef CONFIG_PGSTE 226 unsigned long bits; 227 228 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 229 if (bits) { 230 pgste_val(pgste) ^= bits; 231 ptep_notify(mm, addr, ptep, bits); 232 } 233 #endif 234 return pgste; 235 } 236 237 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 238 unsigned long addr, pte_t *ptep) 239 { 240 pgste_t pgste = __pgste(0); 241 242 if (mm_has_pgste(mm)) { 243 pgste = pgste_get_lock(ptep); 244 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 245 } 246 return pgste; 247 } 248 249 static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 250 unsigned long addr, pte_t *ptep, 251 pgste_t pgste, pte_t old, pte_t new) 252 { 253 if (mm_has_pgste(mm)) { 254 if (pte_val(old) & _PAGE_INVALID) 255 pgste_set_key(ptep, pgste, new, mm); 256 if (pte_val(new) & _PAGE_INVALID) { 257 pgste = pgste_update_all(old, pgste, mm); 258 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 259 _PGSTE_GPS_USAGE_UNUSED) 260 pte_val(old) |= _PAGE_UNUSED; 261 } 262 pgste = pgste_set_pte(ptep, pgste, new); 263 pgste_set_unlock(ptep, pgste); 264 } else { 265 *ptep = new; 266 } 267 return old; 268 } 269 270 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 271 pte_t *ptep, pte_t new) 272 { 273 pgste_t pgste; 274 pte_t old; 275 int nodat; 276 277 preempt_disable(); 278 pgste = ptep_xchg_start(mm, addr, ptep); 279 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 280 old = ptep_flush_direct(mm, addr, ptep, nodat); 281 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 282 preempt_enable(); 283 return old; 284 } 285 EXPORT_SYMBOL(ptep_xchg_direct); 286 287 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 288 pte_t *ptep, pte_t new) 289 { 290 pgste_t pgste; 291 pte_t old; 292 int nodat; 293 294 preempt_disable(); 295 pgste = ptep_xchg_start(mm, addr, ptep); 296 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 297 old = ptep_flush_lazy(mm, addr, ptep, nodat); 298 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 299 preempt_enable(); 300 return old; 301 } 302 EXPORT_SYMBOL(ptep_xchg_lazy); 303 304 pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, 305 pte_t *ptep) 306 { 307 pgste_t pgste; 308 pte_t old; 309 int nodat; 310 struct mm_struct *mm = vma->vm_mm; 311 312 preempt_disable(); 313 pgste = ptep_xchg_start(mm, addr, ptep); 314 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 315 old = ptep_flush_lazy(mm, addr, ptep, nodat); 316 if (mm_has_pgste(mm)) { 317 pgste = pgste_update_all(old, pgste, mm); 318 pgste_set(ptep, pgste); 319 } 320 return old; 321 } 322 323 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, 324 pte_t *ptep, pte_t old_pte, pte_t pte) 325 { 326 pgste_t pgste; 327 struct mm_struct *mm = vma->vm_mm; 328 329 if (!MACHINE_HAS_NX) 330 pte_val(pte) &= ~_PAGE_NOEXEC; 331 if (mm_has_pgste(mm)) { 332 pgste = pgste_get(ptep); 333 pgste_set_key(ptep, pgste, pte, mm); 334 pgste = pgste_set_pte(ptep, pgste, pte); 335 pgste_set_unlock(ptep, pgste); 336 } else { 337 *ptep = pte; 338 } 339 preempt_enable(); 340 } 341 342 static inline void pmdp_idte_local(struct mm_struct *mm, 343 unsigned long addr, pmd_t *pmdp) 344 { 345 if (MACHINE_HAS_TLB_GUEST) 346 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 347 mm->context.asce, IDTE_LOCAL); 348 else 349 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); 350 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 351 gmap_pmdp_idte_local(mm, addr); 352 } 353 354 static inline void pmdp_idte_global(struct mm_struct *mm, 355 unsigned long addr, pmd_t *pmdp) 356 { 357 if (MACHINE_HAS_TLB_GUEST) { 358 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 359 mm->context.asce, IDTE_GLOBAL); 360 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 361 gmap_pmdp_idte_global(mm, addr); 362 } else if (MACHINE_HAS_IDTE) { 363 __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); 364 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 365 gmap_pmdp_idte_global(mm, addr); 366 } else { 367 __pmdp_csp(pmdp); 368 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 369 gmap_pmdp_csp(mm, addr); 370 } 371 } 372 373 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 374 unsigned long addr, pmd_t *pmdp) 375 { 376 pmd_t old; 377 378 old = *pmdp; 379 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 380 return old; 381 atomic_inc(&mm->context.flush_count); 382 if (MACHINE_HAS_TLB_LC && 383 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 384 pmdp_idte_local(mm, addr, pmdp); 385 else 386 pmdp_idte_global(mm, addr, pmdp); 387 atomic_dec(&mm->context.flush_count); 388 return old; 389 } 390 391 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 392 unsigned long addr, pmd_t *pmdp) 393 { 394 pmd_t old; 395 396 old = *pmdp; 397 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 398 return old; 399 atomic_inc(&mm->context.flush_count); 400 if (cpumask_equal(&mm->context.cpu_attach_mask, 401 cpumask_of(smp_processor_id()))) { 402 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 403 mm->context.flush_mm = 1; 404 if (mm_has_pgste(mm)) 405 gmap_pmdp_invalidate(mm, addr); 406 } else { 407 pmdp_idte_global(mm, addr, pmdp); 408 } 409 atomic_dec(&mm->context.flush_count); 410 return old; 411 } 412 413 #ifdef CONFIG_PGSTE 414 static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) 415 { 416 pgd_t *pgd; 417 p4d_t *p4d; 418 pud_t *pud; 419 pmd_t *pmd; 420 421 pgd = pgd_offset(mm, addr); 422 p4d = p4d_alloc(mm, pgd, addr); 423 if (!p4d) 424 return NULL; 425 pud = pud_alloc(mm, p4d, addr); 426 if (!pud) 427 return NULL; 428 pmd = pmd_alloc(mm, pud, addr); 429 return pmd; 430 } 431 #endif 432 433 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 434 pmd_t *pmdp, pmd_t new) 435 { 436 pmd_t old; 437 438 preempt_disable(); 439 old = pmdp_flush_direct(mm, addr, pmdp); 440 *pmdp = new; 441 preempt_enable(); 442 return old; 443 } 444 EXPORT_SYMBOL(pmdp_xchg_direct); 445 446 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 447 pmd_t *pmdp, pmd_t new) 448 { 449 pmd_t old; 450 451 preempt_disable(); 452 old = pmdp_flush_lazy(mm, addr, pmdp); 453 *pmdp = new; 454 preempt_enable(); 455 return old; 456 } 457 EXPORT_SYMBOL(pmdp_xchg_lazy); 458 459 static inline void pudp_idte_local(struct mm_struct *mm, 460 unsigned long addr, pud_t *pudp) 461 { 462 if (MACHINE_HAS_TLB_GUEST) 463 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 464 mm->context.asce, IDTE_LOCAL); 465 else 466 __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); 467 } 468 469 static inline void pudp_idte_global(struct mm_struct *mm, 470 unsigned long addr, pud_t *pudp) 471 { 472 if (MACHINE_HAS_TLB_GUEST) 473 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 474 mm->context.asce, IDTE_GLOBAL); 475 else if (MACHINE_HAS_IDTE) 476 __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); 477 else 478 /* 479 * Invalid bit position is the same for pmd and pud, so we can 480 * re-use _pmd_csp() here 481 */ 482 __pmdp_csp((pmd_t *) pudp); 483 } 484 485 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 486 unsigned long addr, pud_t *pudp) 487 { 488 pud_t old; 489 490 old = *pudp; 491 if (pud_val(old) & _REGION_ENTRY_INVALID) 492 return old; 493 atomic_inc(&mm->context.flush_count); 494 if (MACHINE_HAS_TLB_LC && 495 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 496 pudp_idte_local(mm, addr, pudp); 497 else 498 pudp_idte_global(mm, addr, pudp); 499 atomic_dec(&mm->context.flush_count); 500 return old; 501 } 502 503 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 504 pud_t *pudp, pud_t new) 505 { 506 pud_t old; 507 508 preempt_disable(); 509 old = pudp_flush_direct(mm, addr, pudp); 510 *pudp = new; 511 preempt_enable(); 512 return old; 513 } 514 EXPORT_SYMBOL(pudp_xchg_direct); 515 516 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 517 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 518 pgtable_t pgtable) 519 { 520 struct list_head *lh = (struct list_head *) pgtable; 521 522 assert_spin_locked(pmd_lockptr(mm, pmdp)); 523 524 /* FIFO */ 525 if (!pmd_huge_pte(mm, pmdp)) 526 INIT_LIST_HEAD(lh); 527 else 528 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 529 pmd_huge_pte(mm, pmdp) = pgtable; 530 } 531 532 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 533 { 534 struct list_head *lh; 535 pgtable_t pgtable; 536 pte_t *ptep; 537 538 assert_spin_locked(pmd_lockptr(mm, pmdp)); 539 540 /* FIFO */ 541 pgtable = pmd_huge_pte(mm, pmdp); 542 lh = (struct list_head *) pgtable; 543 if (list_empty(lh)) 544 pmd_huge_pte(mm, pmdp) = NULL; 545 else { 546 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 547 list_del(lh); 548 } 549 ptep = (pte_t *) pgtable; 550 pte_val(*ptep) = _PAGE_INVALID; 551 ptep++; 552 pte_val(*ptep) = _PAGE_INVALID; 553 return pgtable; 554 } 555 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 556 557 #ifdef CONFIG_PGSTE 558 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 559 pte_t *ptep, pte_t entry) 560 { 561 pgste_t pgste; 562 563 /* the mm_has_pgste() check is done in set_pte_at() */ 564 preempt_disable(); 565 pgste = pgste_get_lock(ptep); 566 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 567 pgste_set_key(ptep, pgste, entry, mm); 568 pgste = pgste_set_pte(ptep, pgste, entry); 569 pgste_set_unlock(ptep, pgste); 570 preempt_enable(); 571 } 572 573 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 574 { 575 pgste_t pgste; 576 577 preempt_disable(); 578 pgste = pgste_get_lock(ptep); 579 pgste_val(pgste) |= PGSTE_IN_BIT; 580 pgste_set_unlock(ptep, pgste); 581 preempt_enable(); 582 } 583 584 /** 585 * ptep_force_prot - change access rights of a locked pte 586 * @mm: pointer to the process mm_struct 587 * @addr: virtual address in the guest address space 588 * @ptep: pointer to the page table entry 589 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 590 * @bit: pgste bit to set (e.g. for notification) 591 * 592 * Returns 0 if the access rights were changed and -EAGAIN if the current 593 * and requested access rights are incompatible. 594 */ 595 int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 596 pte_t *ptep, int prot, unsigned long bit) 597 { 598 pte_t entry; 599 pgste_t pgste; 600 int pte_i, pte_p, nodat; 601 602 pgste = pgste_get_lock(ptep); 603 entry = *ptep; 604 /* Check pte entry after all locks have been acquired */ 605 pte_i = pte_val(entry) & _PAGE_INVALID; 606 pte_p = pte_val(entry) & _PAGE_PROTECT; 607 if ((pte_i && (prot != PROT_NONE)) || 608 (pte_p && (prot & PROT_WRITE))) { 609 pgste_set_unlock(ptep, pgste); 610 return -EAGAIN; 611 } 612 /* Change access rights and set pgste bit */ 613 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 614 if (prot == PROT_NONE && !pte_i) { 615 ptep_flush_direct(mm, addr, ptep, nodat); 616 pgste = pgste_update_all(entry, pgste, mm); 617 pte_val(entry) |= _PAGE_INVALID; 618 } 619 if (prot == PROT_READ && !pte_p) { 620 ptep_flush_direct(mm, addr, ptep, nodat); 621 pte_val(entry) &= ~_PAGE_INVALID; 622 pte_val(entry) |= _PAGE_PROTECT; 623 } 624 pgste_val(pgste) |= bit; 625 pgste = pgste_set_pte(ptep, pgste, entry); 626 pgste_set_unlock(ptep, pgste); 627 return 0; 628 } 629 630 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 631 pte_t *sptep, pte_t *tptep, pte_t pte) 632 { 633 pgste_t spgste, tpgste; 634 pte_t spte, tpte; 635 int rc = -EAGAIN; 636 637 if (!(pte_val(*tptep) & _PAGE_INVALID)) 638 return 0; /* already shadowed */ 639 spgste = pgste_get_lock(sptep); 640 spte = *sptep; 641 if (!(pte_val(spte) & _PAGE_INVALID) && 642 !((pte_val(spte) & _PAGE_PROTECT) && 643 !(pte_val(pte) & _PAGE_PROTECT))) { 644 pgste_val(spgste) |= PGSTE_VSIE_BIT; 645 tpgste = pgste_get_lock(tptep); 646 pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | 647 (pte_val(pte) & _PAGE_PROTECT); 648 /* don't touch the storage key - it belongs to parent pgste */ 649 tpgste = pgste_set_pte(tptep, tpgste, tpte); 650 pgste_set_unlock(tptep, tpgste); 651 rc = 1; 652 } 653 pgste_set_unlock(sptep, spgste); 654 return rc; 655 } 656 657 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 658 { 659 pgste_t pgste; 660 int nodat; 661 662 pgste = pgste_get_lock(ptep); 663 /* notifier is called by the caller */ 664 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 665 ptep_flush_direct(mm, saddr, ptep, nodat); 666 /* don't touch the storage key - it belongs to parent pgste */ 667 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 668 pgste_set_unlock(ptep, pgste); 669 } 670 671 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 672 { 673 if (!non_swap_entry(entry)) 674 dec_mm_counter(mm, MM_SWAPENTS); 675 else if (is_migration_entry(entry)) { 676 struct page *page = migration_entry_to_page(entry); 677 678 dec_mm_counter(mm, mm_counter(page)); 679 } 680 free_swap_and_cache(entry); 681 } 682 683 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 684 pte_t *ptep, int reset) 685 { 686 unsigned long pgstev; 687 pgste_t pgste; 688 pte_t pte; 689 690 /* Zap unused and logically-zero pages */ 691 preempt_disable(); 692 pgste = pgste_get_lock(ptep); 693 pgstev = pgste_val(pgste); 694 pte = *ptep; 695 if (!reset && pte_swap(pte) && 696 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 697 (pgstev & _PGSTE_GPS_ZERO))) { 698 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 699 pte_clear(mm, addr, ptep); 700 } 701 if (reset) 702 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 703 pgste_set_unlock(ptep, pgste); 704 preempt_enable(); 705 } 706 707 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 708 { 709 unsigned long ptev; 710 pgste_t pgste; 711 712 /* Clear storage key ACC and F, but set R/C */ 713 preempt_disable(); 714 pgste = pgste_get_lock(ptep); 715 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 716 pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; 717 ptev = pte_val(*ptep); 718 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 719 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 720 pgste_set_unlock(ptep, pgste); 721 preempt_enable(); 722 } 723 724 /* 725 * Test and reset if a guest page is dirty 726 */ 727 bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, 728 pte_t *ptep) 729 { 730 pgste_t pgste; 731 pte_t pte; 732 bool dirty; 733 int nodat; 734 735 pgste = pgste_get_lock(ptep); 736 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 737 pgste_val(pgste) &= ~PGSTE_UC_BIT; 738 pte = *ptep; 739 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 740 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 741 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 742 ptep_ipte_global(mm, addr, ptep, nodat); 743 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 744 pte_val(pte) |= _PAGE_PROTECT; 745 else 746 pte_val(pte) |= _PAGE_INVALID; 747 *ptep = pte; 748 } 749 pgste_set_unlock(ptep, pgste); 750 return dirty; 751 } 752 EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); 753 754 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 755 unsigned char key, bool nq) 756 { 757 unsigned long keyul, paddr; 758 spinlock_t *ptl; 759 pgste_t old, new; 760 pmd_t *pmdp; 761 pte_t *ptep; 762 763 pmdp = pmd_alloc_map(mm, addr); 764 if (unlikely(!pmdp)) 765 return -EFAULT; 766 767 ptl = pmd_lock(mm, pmdp); 768 if (!pmd_present(*pmdp)) { 769 spin_unlock(ptl); 770 return -EFAULT; 771 } 772 773 if (pmd_large(*pmdp)) { 774 paddr = pmd_val(*pmdp) & HPAGE_MASK; 775 paddr |= addr & ~HPAGE_MASK; 776 /* 777 * Huge pmds need quiescing operations, they are 778 * always mapped. 779 */ 780 page_set_storage_key(paddr, key, 1); 781 spin_unlock(ptl); 782 return 0; 783 } 784 spin_unlock(ptl); 785 786 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 787 if (unlikely(!ptep)) 788 return -EFAULT; 789 790 new = old = pgste_get_lock(ptep); 791 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 792 PGSTE_ACC_BITS | PGSTE_FP_BIT); 793 keyul = (unsigned long) key; 794 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 795 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 796 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 797 unsigned long bits, skey; 798 799 paddr = pte_val(*ptep) & PAGE_MASK; 800 skey = (unsigned long) page_get_storage_key(paddr); 801 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 802 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 803 /* Set storage key ACC and FP */ 804 page_set_storage_key(paddr, skey, !nq); 805 /* Merge host changed & referenced into pgste */ 806 pgste_val(new) |= bits << 52; 807 } 808 /* changing the guest storage key is considered a change of the page */ 809 if ((pgste_val(new) ^ pgste_val(old)) & 810 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 811 pgste_val(new) |= PGSTE_UC_BIT; 812 813 pgste_set_unlock(ptep, new); 814 pte_unmap_unlock(ptep, ptl); 815 return 0; 816 } 817 EXPORT_SYMBOL(set_guest_storage_key); 818 819 /** 820 * Conditionally set a guest storage key (handling csske). 821 * oldkey will be updated when either mr or mc is set and a pointer is given. 822 * 823 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 824 * storage key was updated and -EFAULT on access errors. 825 */ 826 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 827 unsigned char key, unsigned char *oldkey, 828 bool nq, bool mr, bool mc) 829 { 830 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 831 int rc; 832 833 /* we can drop the pgste lock between getting and setting the key */ 834 if (mr | mc) { 835 rc = get_guest_storage_key(current->mm, addr, &tmp); 836 if (rc) 837 return rc; 838 if (oldkey) 839 *oldkey = tmp; 840 if (!mr) 841 mask |= _PAGE_REFERENCED; 842 if (!mc) 843 mask |= _PAGE_CHANGED; 844 if (!((tmp ^ key) & mask)) 845 return 0; 846 } 847 rc = set_guest_storage_key(current->mm, addr, key, nq); 848 return rc < 0 ? rc : 1; 849 } 850 EXPORT_SYMBOL(cond_set_guest_storage_key); 851 852 /** 853 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 854 * 855 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 856 */ 857 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 858 { 859 spinlock_t *ptl; 860 unsigned long paddr; 861 pgste_t old, new; 862 pmd_t *pmdp; 863 pte_t *ptep; 864 int cc = 0; 865 866 pmdp = pmd_alloc_map(mm, addr); 867 if (unlikely(!pmdp)) 868 return -EFAULT; 869 870 ptl = pmd_lock(mm, pmdp); 871 if (!pmd_present(*pmdp)) { 872 spin_unlock(ptl); 873 return -EFAULT; 874 } 875 876 if (pmd_large(*pmdp)) { 877 paddr = pmd_val(*pmdp) & HPAGE_MASK; 878 paddr |= addr & ~HPAGE_MASK; 879 cc = page_reset_referenced(paddr); 880 spin_unlock(ptl); 881 return cc; 882 } 883 spin_unlock(ptl); 884 885 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 886 if (unlikely(!ptep)) 887 return -EFAULT; 888 889 new = old = pgste_get_lock(ptep); 890 /* Reset guest reference bit only */ 891 pgste_val(new) &= ~PGSTE_GR_BIT; 892 893 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 894 paddr = pte_val(*ptep) & PAGE_MASK; 895 cc = page_reset_referenced(paddr); 896 /* Merge real referenced bit into host-set */ 897 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 898 } 899 /* Reflect guest's logical view, not physical */ 900 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 901 /* Changing the guest storage key is considered a change of the page */ 902 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 903 pgste_val(new) |= PGSTE_UC_BIT; 904 905 pgste_set_unlock(ptep, new); 906 pte_unmap_unlock(ptep, ptl); 907 return cc; 908 } 909 EXPORT_SYMBOL(reset_guest_reference_bit); 910 911 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 912 unsigned char *key) 913 { 914 unsigned long paddr; 915 spinlock_t *ptl; 916 pgste_t pgste; 917 pmd_t *pmdp; 918 pte_t *ptep; 919 920 pmdp = pmd_alloc_map(mm, addr); 921 if (unlikely(!pmdp)) 922 return -EFAULT; 923 924 ptl = pmd_lock(mm, pmdp); 925 if (!pmd_present(*pmdp)) { 926 /* Not yet mapped memory has a zero key */ 927 spin_unlock(ptl); 928 *key = 0; 929 return 0; 930 } 931 932 if (pmd_large(*pmdp)) { 933 paddr = pmd_val(*pmdp) & HPAGE_MASK; 934 paddr |= addr & ~HPAGE_MASK; 935 *key = page_get_storage_key(paddr); 936 spin_unlock(ptl); 937 return 0; 938 } 939 spin_unlock(ptl); 940 941 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 942 if (unlikely(!ptep)) 943 return -EFAULT; 944 945 pgste = pgste_get_lock(ptep); 946 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 947 paddr = pte_val(*ptep) & PAGE_MASK; 948 if (!(pte_val(*ptep) & _PAGE_INVALID)) 949 *key = page_get_storage_key(paddr); 950 /* Reflect guest's logical view, not physical */ 951 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 952 pgste_set_unlock(ptep, pgste); 953 pte_unmap_unlock(ptep, ptl); 954 return 0; 955 } 956 EXPORT_SYMBOL(get_guest_storage_key); 957 958 /** 959 * pgste_perform_essa - perform ESSA actions on the PGSTE. 960 * @mm: the memory context. It must have PGSTEs, no check is performed here! 961 * @hva: the host virtual address of the page whose PGSTE is to be processed 962 * @orc: the specific action to perform, see the ESSA_SET_* macros. 963 * @oldpte: the PTE will be saved there if the pointer is not NULL. 964 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 965 * 966 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 967 * or < 0 in case of error. -EINVAL is returned for invalid values 968 * of orc, -EFAULT for invalid addresses. 969 */ 970 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 971 unsigned long *oldpte, unsigned long *oldpgste) 972 { 973 unsigned long pgstev; 974 spinlock_t *ptl; 975 pgste_t pgste; 976 pte_t *ptep; 977 int res = 0; 978 979 WARN_ON_ONCE(orc > ESSA_MAX); 980 if (unlikely(orc > ESSA_MAX)) 981 return -EINVAL; 982 ptep = get_locked_pte(mm, hva, &ptl); 983 if (unlikely(!ptep)) 984 return -EFAULT; 985 pgste = pgste_get_lock(ptep); 986 pgstev = pgste_val(pgste); 987 if (oldpte) 988 *oldpte = pte_val(*ptep); 989 if (oldpgste) 990 *oldpgste = pgstev; 991 992 switch (orc) { 993 case ESSA_GET_STATE: 994 break; 995 case ESSA_SET_STABLE: 996 pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 997 pgstev |= _PGSTE_GPS_USAGE_STABLE; 998 break; 999 case ESSA_SET_UNUSED: 1000 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1001 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 1002 if (pte_val(*ptep) & _PAGE_INVALID) 1003 res = 1; 1004 break; 1005 case ESSA_SET_VOLATILE: 1006 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1007 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1008 if (pte_val(*ptep) & _PAGE_INVALID) 1009 res = 1; 1010 break; 1011 case ESSA_SET_POT_VOLATILE: 1012 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1013 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1014 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 1015 break; 1016 } 1017 if (pgstev & _PGSTE_GPS_ZERO) { 1018 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1019 break; 1020 } 1021 if (!(pgstev & PGSTE_GC_BIT)) { 1022 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1023 res = 1; 1024 break; 1025 } 1026 break; 1027 case ESSA_SET_STABLE_RESIDENT: 1028 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1029 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1030 /* 1031 * Since the resident state can go away any time after this 1032 * call, we will not make this page resident. We can revisit 1033 * this decision if a guest will ever start using this. 1034 */ 1035 break; 1036 case ESSA_SET_STABLE_IF_RESIDENT: 1037 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1038 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1039 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1040 } 1041 break; 1042 case ESSA_SET_STABLE_NODAT: 1043 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1044 pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; 1045 break; 1046 default: 1047 /* we should never get here! */ 1048 break; 1049 } 1050 /* If we are discarding a page, set it to logical zero */ 1051 if (res) 1052 pgstev |= _PGSTE_GPS_ZERO; 1053 1054 pgste_val(pgste) = pgstev; 1055 pgste_set_unlock(ptep, pgste); 1056 pte_unmap_unlock(ptep, ptl); 1057 return res; 1058 } 1059 EXPORT_SYMBOL(pgste_perform_essa); 1060 1061 /** 1062 * set_pgste_bits - set specific PGSTE bits. 1063 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1064 * @hva: the host virtual address of the page whose PGSTE is to be processed 1065 * @bits: a bitmask representing the bits that will be touched 1066 * @value: the values of the bits to be written. Only the bits in the mask 1067 * will be written. 1068 * 1069 * Return: 0 on success, < 0 in case of error. 1070 */ 1071 int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 1072 unsigned long bits, unsigned long value) 1073 { 1074 spinlock_t *ptl; 1075 pgste_t new; 1076 pte_t *ptep; 1077 1078 ptep = get_locked_pte(mm, hva, &ptl); 1079 if (unlikely(!ptep)) 1080 return -EFAULT; 1081 new = pgste_get_lock(ptep); 1082 1083 pgste_val(new) &= ~bits; 1084 pgste_val(new) |= value & bits; 1085 1086 pgste_set_unlock(ptep, new); 1087 pte_unmap_unlock(ptep, ptl); 1088 return 0; 1089 } 1090 EXPORT_SYMBOL(set_pgste_bits); 1091 1092 /** 1093 * get_pgste - get the current PGSTE for the given address. 1094 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1095 * @hva: the host virtual address of the page whose PGSTE is to be processed 1096 * @pgstep: will be written with the current PGSTE for the given address. 1097 * 1098 * Return: 0 on success, < 0 in case of error. 1099 */ 1100 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 1101 { 1102 spinlock_t *ptl; 1103 pte_t *ptep; 1104 1105 ptep = get_locked_pte(mm, hva, &ptl); 1106 if (unlikely(!ptep)) 1107 return -EFAULT; 1108 *pgstep = pgste_val(pgste_get(ptep)); 1109 pte_unmap_unlock(ptep, ptl); 1110 return 0; 1111 } 1112 EXPORT_SYMBOL(get_pgste); 1113 #endif 1114