1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/kernel.h> 9 #include <linux/errno.h> 10 #include <linux/gfp.h> 11 #include <linux/mm.h> 12 #include <linux/swap.h> 13 #include <linux/smp.h> 14 #include <linux/spinlock.h> 15 #include <linux/rcupdate.h> 16 #include <linux/slab.h> 17 #include <linux/swapops.h> 18 #include <linux/sysctl.h> 19 #include <linux/ksm.h> 20 #include <linux/mman.h> 21 22 #include <asm/pgalloc.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include <asm/mmu_context.h> 26 #include <asm/page-states.h> 27 28 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, 29 pte_t *ptep, int nodat) 30 { 31 unsigned long opt, asce; 32 33 if (MACHINE_HAS_TLB_GUEST) { 34 opt = 0; 35 asce = READ_ONCE(mm->context.gmap_asce); 36 if (asce == 0UL || nodat) 37 opt |= IPTE_NODAT; 38 if (asce != -1UL) { 39 asce = asce ? : mm->context.asce; 40 opt |= IPTE_GUEST_ASCE; 41 } 42 __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); 43 } else { 44 __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); 45 } 46 } 47 48 static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, 49 pte_t *ptep, int nodat) 50 { 51 unsigned long opt, asce; 52 53 if (MACHINE_HAS_TLB_GUEST) { 54 opt = 0; 55 asce = READ_ONCE(mm->context.gmap_asce); 56 if (asce == 0UL || nodat) 57 opt |= IPTE_NODAT; 58 if (asce != -1UL) { 59 asce = asce ? : mm->context.asce; 60 opt |= IPTE_GUEST_ASCE; 61 } 62 __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); 63 } else { 64 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 65 } 66 } 67 68 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 69 unsigned long addr, pte_t *ptep, 70 int nodat) 71 { 72 pte_t old; 73 74 old = *ptep; 75 if (unlikely(pte_val(old) & _PAGE_INVALID)) 76 return old; 77 atomic_inc(&mm->context.flush_count); 78 if (MACHINE_HAS_TLB_LC && 79 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 80 ptep_ipte_local(mm, addr, ptep, nodat); 81 else 82 ptep_ipte_global(mm, addr, ptep, nodat); 83 atomic_dec(&mm->context.flush_count); 84 return old; 85 } 86 87 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 88 unsigned long addr, pte_t *ptep, 89 int nodat) 90 { 91 pte_t old; 92 93 old = *ptep; 94 if (unlikely(pte_val(old) & _PAGE_INVALID)) 95 return old; 96 atomic_inc(&mm->context.flush_count); 97 if (cpumask_equal(&mm->context.cpu_attach_mask, 98 cpumask_of(smp_processor_id()))) { 99 pte_val(*ptep) |= _PAGE_INVALID; 100 mm->context.flush_mm = 1; 101 } else 102 ptep_ipte_global(mm, addr, ptep, nodat); 103 atomic_dec(&mm->context.flush_count); 104 return old; 105 } 106 107 static inline pgste_t pgste_get_lock(pte_t *ptep) 108 { 109 unsigned long new = 0; 110 #ifdef CONFIG_PGSTE 111 unsigned long old; 112 113 asm( 114 " lg %0,%2\n" 115 "0: lgr %1,%0\n" 116 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 117 " oihh %1,0x0080\n" /* set PCL bit in new */ 118 " csg %0,%1,%2\n" 119 " jl 0b\n" 120 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 121 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 122 #endif 123 return __pgste(new); 124 } 125 126 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 127 { 128 #ifdef CONFIG_PGSTE 129 asm( 130 " nihh %1,0xff7f\n" /* clear PCL bit */ 131 " stg %1,%0\n" 132 : "=Q" (ptep[PTRS_PER_PTE]) 133 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 134 : "cc", "memory"); 135 #endif 136 } 137 138 static inline pgste_t pgste_get(pte_t *ptep) 139 { 140 unsigned long pgste = 0; 141 #ifdef CONFIG_PGSTE 142 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 143 #endif 144 return __pgste(pgste); 145 } 146 147 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 148 { 149 #ifdef CONFIG_PGSTE 150 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 151 #endif 152 } 153 154 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 155 struct mm_struct *mm) 156 { 157 #ifdef CONFIG_PGSTE 158 unsigned long address, bits, skey; 159 160 if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID) 161 return pgste; 162 address = pte_val(pte) & PAGE_MASK; 163 skey = (unsigned long) page_get_storage_key(address); 164 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 165 /* Transfer page changed & referenced bit to guest bits in pgste */ 166 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 167 /* Copy page access key and fetch protection bit to pgste */ 168 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 169 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 170 #endif 171 return pgste; 172 173 } 174 175 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 176 struct mm_struct *mm) 177 { 178 #ifdef CONFIG_PGSTE 179 unsigned long address; 180 unsigned long nkey; 181 182 if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID) 183 return; 184 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 185 address = pte_val(entry) & PAGE_MASK; 186 /* 187 * Set page access key and fetch protection bit from pgste. 188 * The guest C/R information is still in the PGSTE, set real 189 * key C/R to 0. 190 */ 191 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 192 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 193 page_set_storage_key(address, nkey, 0); 194 #endif 195 } 196 197 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 198 { 199 #ifdef CONFIG_PGSTE 200 if ((pte_val(entry) & _PAGE_PRESENT) && 201 (pte_val(entry) & _PAGE_WRITE) && 202 !(pte_val(entry) & _PAGE_INVALID)) { 203 if (!MACHINE_HAS_ESOP) { 204 /* 205 * Without enhanced suppression-on-protection force 206 * the dirty bit on for all writable ptes. 207 */ 208 pte_val(entry) |= _PAGE_DIRTY; 209 pte_val(entry) &= ~_PAGE_PROTECT; 210 } 211 if (!(pte_val(entry) & _PAGE_PROTECT)) 212 /* This pte allows write access, set user-dirty */ 213 pgste_val(pgste) |= PGSTE_UC_BIT; 214 } 215 #endif 216 *ptep = entry; 217 return pgste; 218 } 219 220 static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 221 unsigned long addr, 222 pte_t *ptep, pgste_t pgste) 223 { 224 #ifdef CONFIG_PGSTE 225 unsigned long bits; 226 227 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 228 if (bits) { 229 pgste_val(pgste) ^= bits; 230 ptep_notify(mm, addr, ptep, bits); 231 } 232 #endif 233 return pgste; 234 } 235 236 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 237 unsigned long addr, pte_t *ptep) 238 { 239 pgste_t pgste = __pgste(0); 240 241 if (mm_has_pgste(mm)) { 242 pgste = pgste_get_lock(ptep); 243 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 244 } 245 return pgste; 246 } 247 248 static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 249 unsigned long addr, pte_t *ptep, 250 pgste_t pgste, pte_t old, pte_t new) 251 { 252 if (mm_has_pgste(mm)) { 253 if (pte_val(old) & _PAGE_INVALID) 254 pgste_set_key(ptep, pgste, new, mm); 255 if (pte_val(new) & _PAGE_INVALID) { 256 pgste = pgste_update_all(old, pgste, mm); 257 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 258 _PGSTE_GPS_USAGE_UNUSED) 259 pte_val(old) |= _PAGE_UNUSED; 260 } 261 pgste = pgste_set_pte(ptep, pgste, new); 262 pgste_set_unlock(ptep, pgste); 263 } else { 264 *ptep = new; 265 } 266 return old; 267 } 268 269 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 270 pte_t *ptep, pte_t new) 271 { 272 pgste_t pgste; 273 pte_t old; 274 int nodat; 275 276 preempt_disable(); 277 pgste = ptep_xchg_start(mm, addr, ptep); 278 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 279 old = ptep_flush_direct(mm, addr, ptep, nodat); 280 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 281 preempt_enable(); 282 return old; 283 } 284 EXPORT_SYMBOL(ptep_xchg_direct); 285 286 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 287 pte_t *ptep, pte_t new) 288 { 289 pgste_t pgste; 290 pte_t old; 291 int nodat; 292 293 preempt_disable(); 294 pgste = ptep_xchg_start(mm, addr, ptep); 295 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 296 old = ptep_flush_lazy(mm, addr, ptep, nodat); 297 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 298 preempt_enable(); 299 return old; 300 } 301 EXPORT_SYMBOL(ptep_xchg_lazy); 302 303 pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, 304 pte_t *ptep) 305 { 306 pgste_t pgste; 307 pte_t old; 308 int nodat; 309 struct mm_struct *mm = vma->vm_mm; 310 311 preempt_disable(); 312 pgste = ptep_xchg_start(mm, addr, ptep); 313 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 314 old = ptep_flush_lazy(mm, addr, ptep, nodat); 315 if (mm_has_pgste(mm)) { 316 pgste = pgste_update_all(old, pgste, mm); 317 pgste_set(ptep, pgste); 318 } 319 return old; 320 } 321 322 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, 323 pte_t *ptep, pte_t old_pte, pte_t pte) 324 { 325 pgste_t pgste; 326 struct mm_struct *mm = vma->vm_mm; 327 328 if (!MACHINE_HAS_NX) 329 pte_val(pte) &= ~_PAGE_NOEXEC; 330 if (mm_has_pgste(mm)) { 331 pgste = pgste_get(ptep); 332 pgste_set_key(ptep, pgste, pte, mm); 333 pgste = pgste_set_pte(ptep, pgste, pte); 334 pgste_set_unlock(ptep, pgste); 335 } else { 336 *ptep = pte; 337 } 338 preempt_enable(); 339 } 340 341 static inline void pmdp_idte_local(struct mm_struct *mm, 342 unsigned long addr, pmd_t *pmdp) 343 { 344 if (MACHINE_HAS_TLB_GUEST) 345 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 346 mm->context.asce, IDTE_LOCAL); 347 else 348 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); 349 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 350 gmap_pmdp_idte_local(mm, addr); 351 } 352 353 static inline void pmdp_idte_global(struct mm_struct *mm, 354 unsigned long addr, pmd_t *pmdp) 355 { 356 if (MACHINE_HAS_TLB_GUEST) { 357 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 358 mm->context.asce, IDTE_GLOBAL); 359 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 360 gmap_pmdp_idte_global(mm, addr); 361 } else if (MACHINE_HAS_IDTE) { 362 __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); 363 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 364 gmap_pmdp_idte_global(mm, addr); 365 } else { 366 __pmdp_csp(pmdp); 367 if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) 368 gmap_pmdp_csp(mm, addr); 369 } 370 } 371 372 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 373 unsigned long addr, pmd_t *pmdp) 374 { 375 pmd_t old; 376 377 old = *pmdp; 378 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 379 return old; 380 atomic_inc(&mm->context.flush_count); 381 if (MACHINE_HAS_TLB_LC && 382 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 383 pmdp_idte_local(mm, addr, pmdp); 384 else 385 pmdp_idte_global(mm, addr, pmdp); 386 atomic_dec(&mm->context.flush_count); 387 return old; 388 } 389 390 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 391 unsigned long addr, pmd_t *pmdp) 392 { 393 pmd_t old; 394 395 old = *pmdp; 396 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 397 return old; 398 atomic_inc(&mm->context.flush_count); 399 if (cpumask_equal(&mm->context.cpu_attach_mask, 400 cpumask_of(smp_processor_id()))) { 401 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 402 mm->context.flush_mm = 1; 403 if (mm_has_pgste(mm)) 404 gmap_pmdp_invalidate(mm, addr); 405 } else { 406 pmdp_idte_global(mm, addr, pmdp); 407 } 408 atomic_dec(&mm->context.flush_count); 409 return old; 410 } 411 412 #ifdef CONFIG_PGSTE 413 static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) 414 { 415 pgd_t *pgd; 416 p4d_t *p4d; 417 pud_t *pud; 418 pmd_t *pmd; 419 420 pgd = pgd_offset(mm, addr); 421 p4d = p4d_alloc(mm, pgd, addr); 422 if (!p4d) 423 return NULL; 424 pud = pud_alloc(mm, p4d, addr); 425 if (!pud) 426 return NULL; 427 pmd = pmd_alloc(mm, pud, addr); 428 return pmd; 429 } 430 #endif 431 432 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 433 pmd_t *pmdp, pmd_t new) 434 { 435 pmd_t old; 436 437 preempt_disable(); 438 old = pmdp_flush_direct(mm, addr, pmdp); 439 *pmdp = new; 440 preempt_enable(); 441 return old; 442 } 443 EXPORT_SYMBOL(pmdp_xchg_direct); 444 445 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 446 pmd_t *pmdp, pmd_t new) 447 { 448 pmd_t old; 449 450 preempt_disable(); 451 old = pmdp_flush_lazy(mm, addr, pmdp); 452 *pmdp = new; 453 preempt_enable(); 454 return old; 455 } 456 EXPORT_SYMBOL(pmdp_xchg_lazy); 457 458 static inline void pudp_idte_local(struct mm_struct *mm, 459 unsigned long addr, pud_t *pudp) 460 { 461 if (MACHINE_HAS_TLB_GUEST) 462 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 463 mm->context.asce, IDTE_LOCAL); 464 else 465 __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); 466 } 467 468 static inline void pudp_idte_global(struct mm_struct *mm, 469 unsigned long addr, pud_t *pudp) 470 { 471 if (MACHINE_HAS_TLB_GUEST) 472 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 473 mm->context.asce, IDTE_GLOBAL); 474 else if (MACHINE_HAS_IDTE) 475 __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); 476 else 477 /* 478 * Invalid bit position is the same for pmd and pud, so we can 479 * re-use _pmd_csp() here 480 */ 481 __pmdp_csp((pmd_t *) pudp); 482 } 483 484 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 485 unsigned long addr, pud_t *pudp) 486 { 487 pud_t old; 488 489 old = *pudp; 490 if (pud_val(old) & _REGION_ENTRY_INVALID) 491 return old; 492 atomic_inc(&mm->context.flush_count); 493 if (MACHINE_HAS_TLB_LC && 494 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 495 pudp_idte_local(mm, addr, pudp); 496 else 497 pudp_idte_global(mm, addr, pudp); 498 atomic_dec(&mm->context.flush_count); 499 return old; 500 } 501 502 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 503 pud_t *pudp, pud_t new) 504 { 505 pud_t old; 506 507 preempt_disable(); 508 old = pudp_flush_direct(mm, addr, pudp); 509 *pudp = new; 510 preempt_enable(); 511 return old; 512 } 513 EXPORT_SYMBOL(pudp_xchg_direct); 514 515 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 516 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 517 pgtable_t pgtable) 518 { 519 struct list_head *lh = (struct list_head *) pgtable; 520 521 assert_spin_locked(pmd_lockptr(mm, pmdp)); 522 523 /* FIFO */ 524 if (!pmd_huge_pte(mm, pmdp)) 525 INIT_LIST_HEAD(lh); 526 else 527 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 528 pmd_huge_pte(mm, pmdp) = pgtable; 529 } 530 531 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 532 { 533 struct list_head *lh; 534 pgtable_t pgtable; 535 pte_t *ptep; 536 537 assert_spin_locked(pmd_lockptr(mm, pmdp)); 538 539 /* FIFO */ 540 pgtable = pmd_huge_pte(mm, pmdp); 541 lh = (struct list_head *) pgtable; 542 if (list_empty(lh)) 543 pmd_huge_pte(mm, pmdp) = NULL; 544 else { 545 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 546 list_del(lh); 547 } 548 ptep = (pte_t *) pgtable; 549 pte_val(*ptep) = _PAGE_INVALID; 550 ptep++; 551 pte_val(*ptep) = _PAGE_INVALID; 552 return pgtable; 553 } 554 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 555 556 #ifdef CONFIG_PGSTE 557 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 558 pte_t *ptep, pte_t entry) 559 { 560 pgste_t pgste; 561 562 /* the mm_has_pgste() check is done in set_pte_at() */ 563 preempt_disable(); 564 pgste = pgste_get_lock(ptep); 565 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 566 pgste_set_key(ptep, pgste, entry, mm); 567 pgste = pgste_set_pte(ptep, pgste, entry); 568 pgste_set_unlock(ptep, pgste); 569 preempt_enable(); 570 } 571 572 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 573 { 574 pgste_t pgste; 575 576 preempt_disable(); 577 pgste = pgste_get_lock(ptep); 578 pgste_val(pgste) |= PGSTE_IN_BIT; 579 pgste_set_unlock(ptep, pgste); 580 preempt_enable(); 581 } 582 583 /** 584 * ptep_force_prot - change access rights of a locked pte 585 * @mm: pointer to the process mm_struct 586 * @addr: virtual address in the guest address space 587 * @ptep: pointer to the page table entry 588 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 589 * @bit: pgste bit to set (e.g. for notification) 590 * 591 * Returns 0 if the access rights were changed and -EAGAIN if the current 592 * and requested access rights are incompatible. 593 */ 594 int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 595 pte_t *ptep, int prot, unsigned long bit) 596 { 597 pte_t entry; 598 pgste_t pgste; 599 int pte_i, pte_p, nodat; 600 601 pgste = pgste_get_lock(ptep); 602 entry = *ptep; 603 /* Check pte entry after all locks have been acquired */ 604 pte_i = pte_val(entry) & _PAGE_INVALID; 605 pte_p = pte_val(entry) & _PAGE_PROTECT; 606 if ((pte_i && (prot != PROT_NONE)) || 607 (pte_p && (prot & PROT_WRITE))) { 608 pgste_set_unlock(ptep, pgste); 609 return -EAGAIN; 610 } 611 /* Change access rights and set pgste bit */ 612 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 613 if (prot == PROT_NONE && !pte_i) { 614 ptep_flush_direct(mm, addr, ptep, nodat); 615 pgste = pgste_update_all(entry, pgste, mm); 616 pte_val(entry) |= _PAGE_INVALID; 617 } 618 if (prot == PROT_READ && !pte_p) { 619 ptep_flush_direct(mm, addr, ptep, nodat); 620 pte_val(entry) &= ~_PAGE_INVALID; 621 pte_val(entry) |= _PAGE_PROTECT; 622 } 623 pgste_val(pgste) |= bit; 624 pgste = pgste_set_pte(ptep, pgste, entry); 625 pgste_set_unlock(ptep, pgste); 626 return 0; 627 } 628 629 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 630 pte_t *sptep, pte_t *tptep, pte_t pte) 631 { 632 pgste_t spgste, tpgste; 633 pte_t spte, tpte; 634 int rc = -EAGAIN; 635 636 if (!(pte_val(*tptep) & _PAGE_INVALID)) 637 return 0; /* already shadowed */ 638 spgste = pgste_get_lock(sptep); 639 spte = *sptep; 640 if (!(pte_val(spte) & _PAGE_INVALID) && 641 !((pte_val(spte) & _PAGE_PROTECT) && 642 !(pte_val(pte) & _PAGE_PROTECT))) { 643 pgste_val(spgste) |= PGSTE_VSIE_BIT; 644 tpgste = pgste_get_lock(tptep); 645 pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | 646 (pte_val(pte) & _PAGE_PROTECT); 647 /* don't touch the storage key - it belongs to parent pgste */ 648 tpgste = pgste_set_pte(tptep, tpgste, tpte); 649 pgste_set_unlock(tptep, tpgste); 650 rc = 1; 651 } 652 pgste_set_unlock(sptep, spgste); 653 return rc; 654 } 655 656 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 657 { 658 pgste_t pgste; 659 int nodat; 660 661 pgste = pgste_get_lock(ptep); 662 /* notifier is called by the caller */ 663 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 664 ptep_flush_direct(mm, saddr, ptep, nodat); 665 /* don't touch the storage key - it belongs to parent pgste */ 666 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 667 pgste_set_unlock(ptep, pgste); 668 } 669 670 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 671 { 672 if (!non_swap_entry(entry)) 673 dec_mm_counter(mm, MM_SWAPENTS); 674 else if (is_migration_entry(entry)) { 675 struct page *page = migration_entry_to_page(entry); 676 677 dec_mm_counter(mm, mm_counter(page)); 678 } 679 free_swap_and_cache(entry); 680 } 681 682 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 683 pte_t *ptep, int reset) 684 { 685 unsigned long pgstev; 686 pgste_t pgste; 687 pte_t pte; 688 689 /* Zap unused and logically-zero pages */ 690 preempt_disable(); 691 pgste = pgste_get_lock(ptep); 692 pgstev = pgste_val(pgste); 693 pte = *ptep; 694 if (!reset && pte_swap(pte) && 695 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 696 (pgstev & _PGSTE_GPS_ZERO))) { 697 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 698 pte_clear(mm, addr, ptep); 699 } 700 if (reset) 701 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 702 pgste_set_unlock(ptep, pgste); 703 preempt_enable(); 704 } 705 706 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 707 { 708 unsigned long ptev; 709 pgste_t pgste; 710 711 /* Clear storage key ACC and F, but set R/C */ 712 preempt_disable(); 713 pgste = pgste_get_lock(ptep); 714 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 715 pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; 716 ptev = pte_val(*ptep); 717 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 718 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 719 pgste_set_unlock(ptep, pgste); 720 preempt_enable(); 721 } 722 723 /* 724 * Test and reset if a guest page is dirty 725 */ 726 bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, 727 pte_t *ptep) 728 { 729 pgste_t pgste; 730 pte_t pte; 731 bool dirty; 732 int nodat; 733 734 pgste = pgste_get_lock(ptep); 735 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 736 pgste_val(pgste) &= ~PGSTE_UC_BIT; 737 pte = *ptep; 738 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 739 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 740 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 741 ptep_ipte_global(mm, addr, ptep, nodat); 742 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 743 pte_val(pte) |= _PAGE_PROTECT; 744 else 745 pte_val(pte) |= _PAGE_INVALID; 746 *ptep = pte; 747 } 748 pgste_set_unlock(ptep, pgste); 749 return dirty; 750 } 751 EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); 752 753 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 754 unsigned char key, bool nq) 755 { 756 unsigned long keyul, paddr; 757 spinlock_t *ptl; 758 pgste_t old, new; 759 pmd_t *pmdp; 760 pte_t *ptep; 761 762 pmdp = pmd_alloc_map(mm, addr); 763 if (unlikely(!pmdp)) 764 return -EFAULT; 765 766 ptl = pmd_lock(mm, pmdp); 767 if (!pmd_present(*pmdp)) { 768 spin_unlock(ptl); 769 return -EFAULT; 770 } 771 772 if (pmd_large(*pmdp)) { 773 paddr = pmd_val(*pmdp) & HPAGE_MASK; 774 paddr |= addr & ~HPAGE_MASK; 775 /* 776 * Huge pmds need quiescing operations, they are 777 * always mapped. 778 */ 779 page_set_storage_key(paddr, key, 1); 780 spin_unlock(ptl); 781 return 0; 782 } 783 spin_unlock(ptl); 784 785 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 786 if (unlikely(!ptep)) 787 return -EFAULT; 788 789 new = old = pgste_get_lock(ptep); 790 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 791 PGSTE_ACC_BITS | PGSTE_FP_BIT); 792 keyul = (unsigned long) key; 793 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 794 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 795 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 796 unsigned long bits, skey; 797 798 paddr = pte_val(*ptep) & PAGE_MASK; 799 skey = (unsigned long) page_get_storage_key(paddr); 800 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 801 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 802 /* Set storage key ACC and FP */ 803 page_set_storage_key(paddr, skey, !nq); 804 /* Merge host changed & referenced into pgste */ 805 pgste_val(new) |= bits << 52; 806 } 807 /* changing the guest storage key is considered a change of the page */ 808 if ((pgste_val(new) ^ pgste_val(old)) & 809 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 810 pgste_val(new) |= PGSTE_UC_BIT; 811 812 pgste_set_unlock(ptep, new); 813 pte_unmap_unlock(ptep, ptl); 814 return 0; 815 } 816 EXPORT_SYMBOL(set_guest_storage_key); 817 818 /** 819 * Conditionally set a guest storage key (handling csske). 820 * oldkey will be updated when either mr or mc is set and a pointer is given. 821 * 822 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 823 * storage key was updated and -EFAULT on access errors. 824 */ 825 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 826 unsigned char key, unsigned char *oldkey, 827 bool nq, bool mr, bool mc) 828 { 829 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 830 int rc; 831 832 /* we can drop the pgste lock between getting and setting the key */ 833 if (mr | mc) { 834 rc = get_guest_storage_key(current->mm, addr, &tmp); 835 if (rc) 836 return rc; 837 if (oldkey) 838 *oldkey = tmp; 839 if (!mr) 840 mask |= _PAGE_REFERENCED; 841 if (!mc) 842 mask |= _PAGE_CHANGED; 843 if (!((tmp ^ key) & mask)) 844 return 0; 845 } 846 rc = set_guest_storage_key(current->mm, addr, key, nq); 847 return rc < 0 ? rc : 1; 848 } 849 EXPORT_SYMBOL(cond_set_guest_storage_key); 850 851 /** 852 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 853 * 854 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 855 */ 856 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 857 { 858 spinlock_t *ptl; 859 unsigned long paddr; 860 pgste_t old, new; 861 pmd_t *pmdp; 862 pte_t *ptep; 863 int cc = 0; 864 865 pmdp = pmd_alloc_map(mm, addr); 866 if (unlikely(!pmdp)) 867 return -EFAULT; 868 869 ptl = pmd_lock(mm, pmdp); 870 if (!pmd_present(*pmdp)) { 871 spin_unlock(ptl); 872 return -EFAULT; 873 } 874 875 if (pmd_large(*pmdp)) { 876 paddr = pmd_val(*pmdp) & HPAGE_MASK; 877 paddr |= addr & ~HPAGE_MASK; 878 cc = page_reset_referenced(paddr); 879 spin_unlock(ptl); 880 return cc; 881 } 882 spin_unlock(ptl); 883 884 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 885 if (unlikely(!ptep)) 886 return -EFAULT; 887 888 new = old = pgste_get_lock(ptep); 889 /* Reset guest reference bit only */ 890 pgste_val(new) &= ~PGSTE_GR_BIT; 891 892 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 893 paddr = pte_val(*ptep) & PAGE_MASK; 894 cc = page_reset_referenced(paddr); 895 /* Merge real referenced bit into host-set */ 896 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 897 } 898 /* Reflect guest's logical view, not physical */ 899 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 900 /* Changing the guest storage key is considered a change of the page */ 901 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 902 pgste_val(new) |= PGSTE_UC_BIT; 903 904 pgste_set_unlock(ptep, new); 905 pte_unmap_unlock(ptep, ptl); 906 return cc; 907 } 908 EXPORT_SYMBOL(reset_guest_reference_bit); 909 910 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 911 unsigned char *key) 912 { 913 unsigned long paddr; 914 spinlock_t *ptl; 915 pgste_t pgste; 916 pmd_t *pmdp; 917 pte_t *ptep; 918 919 pmdp = pmd_alloc_map(mm, addr); 920 if (unlikely(!pmdp)) 921 return -EFAULT; 922 923 ptl = pmd_lock(mm, pmdp); 924 if (!pmd_present(*pmdp)) { 925 /* Not yet mapped memory has a zero key */ 926 spin_unlock(ptl); 927 *key = 0; 928 return 0; 929 } 930 931 if (pmd_large(*pmdp)) { 932 paddr = pmd_val(*pmdp) & HPAGE_MASK; 933 paddr |= addr & ~HPAGE_MASK; 934 *key = page_get_storage_key(paddr); 935 spin_unlock(ptl); 936 return 0; 937 } 938 spin_unlock(ptl); 939 940 ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); 941 if (unlikely(!ptep)) 942 return -EFAULT; 943 944 pgste = pgste_get_lock(ptep); 945 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 946 paddr = pte_val(*ptep) & PAGE_MASK; 947 if (!(pte_val(*ptep) & _PAGE_INVALID)) 948 *key = page_get_storage_key(paddr); 949 /* Reflect guest's logical view, not physical */ 950 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 951 pgste_set_unlock(ptep, pgste); 952 pte_unmap_unlock(ptep, ptl); 953 return 0; 954 } 955 EXPORT_SYMBOL(get_guest_storage_key); 956 957 /** 958 * pgste_perform_essa - perform ESSA actions on the PGSTE. 959 * @mm: the memory context. It must have PGSTEs, no check is performed here! 960 * @hva: the host virtual address of the page whose PGSTE is to be processed 961 * @orc: the specific action to perform, see the ESSA_SET_* macros. 962 * @oldpte: the PTE will be saved there if the pointer is not NULL. 963 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 964 * 965 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 966 * or < 0 in case of error. -EINVAL is returned for invalid values 967 * of orc, -EFAULT for invalid addresses. 968 */ 969 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 970 unsigned long *oldpte, unsigned long *oldpgste) 971 { 972 unsigned long pgstev; 973 spinlock_t *ptl; 974 pgste_t pgste; 975 pte_t *ptep; 976 int res = 0; 977 978 WARN_ON_ONCE(orc > ESSA_MAX); 979 if (unlikely(orc > ESSA_MAX)) 980 return -EINVAL; 981 ptep = get_locked_pte(mm, hva, &ptl); 982 if (unlikely(!ptep)) 983 return -EFAULT; 984 pgste = pgste_get_lock(ptep); 985 pgstev = pgste_val(pgste); 986 if (oldpte) 987 *oldpte = pte_val(*ptep); 988 if (oldpgste) 989 *oldpgste = pgstev; 990 991 switch (orc) { 992 case ESSA_GET_STATE: 993 break; 994 case ESSA_SET_STABLE: 995 pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 996 pgstev |= _PGSTE_GPS_USAGE_STABLE; 997 break; 998 case ESSA_SET_UNUSED: 999 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1000 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 1001 if (pte_val(*ptep) & _PAGE_INVALID) 1002 res = 1; 1003 break; 1004 case ESSA_SET_VOLATILE: 1005 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1006 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1007 if (pte_val(*ptep) & _PAGE_INVALID) 1008 res = 1; 1009 break; 1010 case ESSA_SET_POT_VOLATILE: 1011 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1012 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1013 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 1014 break; 1015 } 1016 if (pgstev & _PGSTE_GPS_ZERO) { 1017 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1018 break; 1019 } 1020 if (!(pgstev & PGSTE_GC_BIT)) { 1021 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 1022 res = 1; 1023 break; 1024 } 1025 break; 1026 case ESSA_SET_STABLE_RESIDENT: 1027 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1028 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1029 /* 1030 * Since the resident state can go away any time after this 1031 * call, we will not make this page resident. We can revisit 1032 * this decision if a guest will ever start using this. 1033 */ 1034 break; 1035 case ESSA_SET_STABLE_IF_RESIDENT: 1036 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1037 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1038 pgstev |= _PGSTE_GPS_USAGE_STABLE; 1039 } 1040 break; 1041 case ESSA_SET_STABLE_NODAT: 1042 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 1043 pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; 1044 break; 1045 default: 1046 /* we should never get here! */ 1047 break; 1048 } 1049 /* If we are discarding a page, set it to logical zero */ 1050 if (res) 1051 pgstev |= _PGSTE_GPS_ZERO; 1052 1053 pgste_val(pgste) = pgstev; 1054 pgste_set_unlock(ptep, pgste); 1055 pte_unmap_unlock(ptep, ptl); 1056 return res; 1057 } 1058 EXPORT_SYMBOL(pgste_perform_essa); 1059 1060 /** 1061 * set_pgste_bits - set specific PGSTE bits. 1062 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1063 * @hva: the host virtual address of the page whose PGSTE is to be processed 1064 * @bits: a bitmask representing the bits that will be touched 1065 * @value: the values of the bits to be written. Only the bits in the mask 1066 * will be written. 1067 * 1068 * Return: 0 on success, < 0 in case of error. 1069 */ 1070 int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 1071 unsigned long bits, unsigned long value) 1072 { 1073 spinlock_t *ptl; 1074 pgste_t new; 1075 pte_t *ptep; 1076 1077 ptep = get_locked_pte(mm, hva, &ptl); 1078 if (unlikely(!ptep)) 1079 return -EFAULT; 1080 new = pgste_get_lock(ptep); 1081 1082 pgste_val(new) &= ~bits; 1083 pgste_val(new) |= value & bits; 1084 1085 pgste_set_unlock(ptep, new); 1086 pte_unmap_unlock(ptep, ptl); 1087 return 0; 1088 } 1089 EXPORT_SYMBOL(set_pgste_bits); 1090 1091 /** 1092 * get_pgste - get the current PGSTE for the given address. 1093 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1094 * @hva: the host virtual address of the page whose PGSTE is to be processed 1095 * @pgstep: will be written with the current PGSTE for the given address. 1096 * 1097 * Return: 0 on success, < 0 in case of error. 1098 */ 1099 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 1100 { 1101 spinlock_t *ptl; 1102 pte_t *ptep; 1103 1104 ptep = get_locked_pte(mm, hva, &ptl); 1105 if (unlikely(!ptep)) 1106 return -EFAULT; 1107 *pgstep = pgste_val(pgste_get(ptep)); 1108 pte_unmap_unlock(ptep, ptl); 1109 return 0; 1110 } 1111 EXPORT_SYMBOL(get_pgste); 1112 #endif 1113