1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/spinlock.h> 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/swapops.h> 17 #include <linux/sysctl.h> 18 #include <linux/ksm.h> 19 #include <linux/mman.h> 20 21 #include <asm/pgtable.h> 22 #include <asm/pgalloc.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include <asm/mmu_context.h> 26 #include <asm/page-states.h> 27 28 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 29 unsigned long addr, pte_t *ptep) 30 { 31 pte_t old; 32 33 old = *ptep; 34 if (unlikely(pte_val(old) & _PAGE_INVALID)) 35 return old; 36 atomic_inc(&mm->context.flush_count); 37 if (MACHINE_HAS_TLB_LC && 38 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 39 __ptep_ipte(addr, ptep, IPTE_LOCAL); 40 else 41 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 42 atomic_dec(&mm->context.flush_count); 43 return old; 44 } 45 46 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 47 unsigned long addr, pte_t *ptep) 48 { 49 pte_t old; 50 51 old = *ptep; 52 if (unlikely(pte_val(old) & _PAGE_INVALID)) 53 return old; 54 atomic_inc(&mm->context.flush_count); 55 if (cpumask_equal(&mm->context.cpu_attach_mask, 56 cpumask_of(smp_processor_id()))) { 57 pte_val(*ptep) |= _PAGE_INVALID; 58 mm->context.flush_mm = 1; 59 } else 60 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 61 atomic_dec(&mm->context.flush_count); 62 return old; 63 } 64 65 static inline pgste_t pgste_get_lock(pte_t *ptep) 66 { 67 unsigned long new = 0; 68 #ifdef CONFIG_PGSTE 69 unsigned long old; 70 71 asm( 72 " lg %0,%2\n" 73 "0: lgr %1,%0\n" 74 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 75 " oihh %1,0x0080\n" /* set PCL bit in new */ 76 " csg %0,%1,%2\n" 77 " jl 0b\n" 78 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 79 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 80 #endif 81 return __pgste(new); 82 } 83 84 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 85 { 86 #ifdef CONFIG_PGSTE 87 asm( 88 " nihh %1,0xff7f\n" /* clear PCL bit */ 89 " stg %1,%0\n" 90 : "=Q" (ptep[PTRS_PER_PTE]) 91 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 92 : "cc", "memory"); 93 #endif 94 } 95 96 static inline pgste_t pgste_get(pte_t *ptep) 97 { 98 unsigned long pgste = 0; 99 #ifdef CONFIG_PGSTE 100 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 101 #endif 102 return __pgste(pgste); 103 } 104 105 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 106 { 107 #ifdef CONFIG_PGSTE 108 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 109 #endif 110 } 111 112 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 113 struct mm_struct *mm) 114 { 115 #ifdef CONFIG_PGSTE 116 unsigned long address, bits, skey; 117 118 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) 119 return pgste; 120 address = pte_val(pte) & PAGE_MASK; 121 skey = (unsigned long) page_get_storage_key(address); 122 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 123 /* Transfer page changed & referenced bit to guest bits in pgste */ 124 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 125 /* Copy page access key and fetch protection bit to pgste */ 126 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 127 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 128 #endif 129 return pgste; 130 131 } 132 133 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 134 struct mm_struct *mm) 135 { 136 #ifdef CONFIG_PGSTE 137 unsigned long address; 138 unsigned long nkey; 139 140 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) 141 return; 142 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 143 address = pte_val(entry) & PAGE_MASK; 144 /* 145 * Set page access key and fetch protection bit from pgste. 146 * The guest C/R information is still in the PGSTE, set real 147 * key C/R to 0. 148 */ 149 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 150 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 151 page_set_storage_key(address, nkey, 0); 152 #endif 153 } 154 155 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 156 { 157 #ifdef CONFIG_PGSTE 158 if ((pte_val(entry) & _PAGE_PRESENT) && 159 (pte_val(entry) & _PAGE_WRITE) && 160 !(pte_val(entry) & _PAGE_INVALID)) { 161 if (!MACHINE_HAS_ESOP) { 162 /* 163 * Without enhanced suppression-on-protection force 164 * the dirty bit on for all writable ptes. 165 */ 166 pte_val(entry) |= _PAGE_DIRTY; 167 pte_val(entry) &= ~_PAGE_PROTECT; 168 } 169 if (!(pte_val(entry) & _PAGE_PROTECT)) 170 /* This pte allows write access, set user-dirty */ 171 pgste_val(pgste) |= PGSTE_UC_BIT; 172 } 173 #endif 174 *ptep = entry; 175 return pgste; 176 } 177 178 static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 179 unsigned long addr, 180 pte_t *ptep, pgste_t pgste) 181 { 182 #ifdef CONFIG_PGSTE 183 unsigned long bits; 184 185 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 186 if (bits) { 187 pgste_val(pgste) ^= bits; 188 ptep_notify(mm, addr, ptep, bits); 189 } 190 #endif 191 return pgste; 192 } 193 194 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 195 unsigned long addr, pte_t *ptep) 196 { 197 pgste_t pgste = __pgste(0); 198 199 if (mm_has_pgste(mm)) { 200 pgste = pgste_get_lock(ptep); 201 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 202 } 203 return pgste; 204 } 205 206 static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 207 unsigned long addr, pte_t *ptep, 208 pgste_t pgste, pte_t old, pte_t new) 209 { 210 if (mm_has_pgste(mm)) { 211 if (pte_val(old) & _PAGE_INVALID) 212 pgste_set_key(ptep, pgste, new, mm); 213 if (pte_val(new) & _PAGE_INVALID) { 214 pgste = pgste_update_all(old, pgste, mm); 215 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 216 _PGSTE_GPS_USAGE_UNUSED) 217 pte_val(old) |= _PAGE_UNUSED; 218 } 219 pgste = pgste_set_pte(ptep, pgste, new); 220 pgste_set_unlock(ptep, pgste); 221 } else { 222 *ptep = new; 223 } 224 return old; 225 } 226 227 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 228 pte_t *ptep, pte_t new) 229 { 230 pgste_t pgste; 231 pte_t old; 232 233 preempt_disable(); 234 pgste = ptep_xchg_start(mm, addr, ptep); 235 old = ptep_flush_direct(mm, addr, ptep); 236 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 237 preempt_enable(); 238 return old; 239 } 240 EXPORT_SYMBOL(ptep_xchg_direct); 241 242 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 243 pte_t *ptep, pte_t new) 244 { 245 pgste_t pgste; 246 pte_t old; 247 248 preempt_disable(); 249 pgste = ptep_xchg_start(mm, addr, ptep); 250 old = ptep_flush_lazy(mm, addr, ptep); 251 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 252 preempt_enable(); 253 return old; 254 } 255 EXPORT_SYMBOL(ptep_xchg_lazy); 256 257 pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 258 pte_t *ptep) 259 { 260 pgste_t pgste; 261 pte_t old; 262 263 preempt_disable(); 264 pgste = ptep_xchg_start(mm, addr, ptep); 265 old = ptep_flush_lazy(mm, addr, ptep); 266 if (mm_has_pgste(mm)) { 267 pgste = pgste_update_all(old, pgste, mm); 268 pgste_set(ptep, pgste); 269 } 270 return old; 271 } 272 EXPORT_SYMBOL(ptep_modify_prot_start); 273 274 void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 275 pte_t *ptep, pte_t pte) 276 { 277 pgste_t pgste; 278 279 if (!MACHINE_HAS_NX) 280 pte_val(pte) &= ~_PAGE_NOEXEC; 281 if (mm_has_pgste(mm)) { 282 pgste = pgste_get(ptep); 283 pgste_set_key(ptep, pgste, pte, mm); 284 pgste = pgste_set_pte(ptep, pgste, pte); 285 pgste_set_unlock(ptep, pgste); 286 } else { 287 *ptep = pte; 288 } 289 preempt_enable(); 290 } 291 EXPORT_SYMBOL(ptep_modify_prot_commit); 292 293 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 294 unsigned long addr, pmd_t *pmdp) 295 { 296 pmd_t old; 297 298 old = *pmdp; 299 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 300 return old; 301 if (!MACHINE_HAS_IDTE) { 302 __pmdp_csp(pmdp); 303 return old; 304 } 305 atomic_inc(&mm->context.flush_count); 306 if (MACHINE_HAS_TLB_LC && 307 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 308 __pmdp_idte(addr, pmdp, IDTE_LOCAL); 309 else 310 __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 311 atomic_dec(&mm->context.flush_count); 312 return old; 313 } 314 315 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 316 unsigned long addr, pmd_t *pmdp) 317 { 318 pmd_t old; 319 320 old = *pmdp; 321 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 322 return old; 323 atomic_inc(&mm->context.flush_count); 324 if (cpumask_equal(&mm->context.cpu_attach_mask, 325 cpumask_of(smp_processor_id()))) { 326 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 327 mm->context.flush_mm = 1; 328 } else if (MACHINE_HAS_IDTE) 329 __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 330 else 331 __pmdp_csp(pmdp); 332 atomic_dec(&mm->context.flush_count); 333 return old; 334 } 335 336 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 337 pmd_t *pmdp, pmd_t new) 338 { 339 pmd_t old; 340 341 preempt_disable(); 342 old = pmdp_flush_direct(mm, addr, pmdp); 343 *pmdp = new; 344 preempt_enable(); 345 return old; 346 } 347 EXPORT_SYMBOL(pmdp_xchg_direct); 348 349 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 350 pmd_t *pmdp, pmd_t new) 351 { 352 pmd_t old; 353 354 preempt_disable(); 355 old = pmdp_flush_lazy(mm, addr, pmdp); 356 *pmdp = new; 357 preempt_enable(); 358 return old; 359 } 360 EXPORT_SYMBOL(pmdp_xchg_lazy); 361 362 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 363 unsigned long addr, pud_t *pudp) 364 { 365 pud_t old; 366 367 old = *pudp; 368 if (pud_val(old) & _REGION_ENTRY_INVALID) 369 return old; 370 if (!MACHINE_HAS_IDTE) { 371 /* 372 * Invalid bit position is the same for pmd and pud, so we can 373 * re-use _pmd_csp() here 374 */ 375 __pmdp_csp((pmd_t *) pudp); 376 return old; 377 } 378 atomic_inc(&mm->context.flush_count); 379 if (MACHINE_HAS_TLB_LC && 380 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 381 __pudp_idte(addr, pudp, IDTE_LOCAL); 382 else 383 __pudp_idte(addr, pudp, IDTE_GLOBAL); 384 atomic_dec(&mm->context.flush_count); 385 return old; 386 } 387 388 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 389 pud_t *pudp, pud_t new) 390 { 391 pud_t old; 392 393 preempt_disable(); 394 old = pudp_flush_direct(mm, addr, pudp); 395 *pudp = new; 396 preempt_enable(); 397 return old; 398 } 399 EXPORT_SYMBOL(pudp_xchg_direct); 400 401 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 402 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 403 pgtable_t pgtable) 404 { 405 struct list_head *lh = (struct list_head *) pgtable; 406 407 assert_spin_locked(pmd_lockptr(mm, pmdp)); 408 409 /* FIFO */ 410 if (!pmd_huge_pte(mm, pmdp)) 411 INIT_LIST_HEAD(lh); 412 else 413 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 414 pmd_huge_pte(mm, pmdp) = pgtable; 415 } 416 417 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 418 { 419 struct list_head *lh; 420 pgtable_t pgtable; 421 pte_t *ptep; 422 423 assert_spin_locked(pmd_lockptr(mm, pmdp)); 424 425 /* FIFO */ 426 pgtable = pmd_huge_pte(mm, pmdp); 427 lh = (struct list_head *) pgtable; 428 if (list_empty(lh)) 429 pmd_huge_pte(mm, pmdp) = NULL; 430 else { 431 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 432 list_del(lh); 433 } 434 ptep = (pte_t *) pgtable; 435 pte_val(*ptep) = _PAGE_INVALID; 436 ptep++; 437 pte_val(*ptep) = _PAGE_INVALID; 438 return pgtable; 439 } 440 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 441 442 #ifdef CONFIG_PGSTE 443 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 444 pte_t *ptep, pte_t entry) 445 { 446 pgste_t pgste; 447 448 /* the mm_has_pgste() check is done in set_pte_at() */ 449 preempt_disable(); 450 pgste = pgste_get_lock(ptep); 451 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 452 pgste_set_key(ptep, pgste, entry, mm); 453 pgste = pgste_set_pte(ptep, pgste, entry); 454 pgste_set_unlock(ptep, pgste); 455 preempt_enable(); 456 } 457 458 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 459 { 460 pgste_t pgste; 461 462 preempt_disable(); 463 pgste = pgste_get_lock(ptep); 464 pgste_val(pgste) |= PGSTE_IN_BIT; 465 pgste_set_unlock(ptep, pgste); 466 preempt_enable(); 467 } 468 469 /** 470 * ptep_force_prot - change access rights of a locked pte 471 * @mm: pointer to the process mm_struct 472 * @addr: virtual address in the guest address space 473 * @ptep: pointer to the page table entry 474 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 475 * @bit: pgste bit to set (e.g. for notification) 476 * 477 * Returns 0 if the access rights were changed and -EAGAIN if the current 478 * and requested access rights are incompatible. 479 */ 480 int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 481 pte_t *ptep, int prot, unsigned long bit) 482 { 483 pte_t entry; 484 pgste_t pgste; 485 int pte_i, pte_p; 486 487 pgste = pgste_get_lock(ptep); 488 entry = *ptep; 489 /* Check pte entry after all locks have been acquired */ 490 pte_i = pte_val(entry) & _PAGE_INVALID; 491 pte_p = pte_val(entry) & _PAGE_PROTECT; 492 if ((pte_i && (prot != PROT_NONE)) || 493 (pte_p && (prot & PROT_WRITE))) { 494 pgste_set_unlock(ptep, pgste); 495 return -EAGAIN; 496 } 497 /* Change access rights and set pgste bit */ 498 if (prot == PROT_NONE && !pte_i) { 499 ptep_flush_direct(mm, addr, ptep); 500 pgste = pgste_update_all(entry, pgste, mm); 501 pte_val(entry) |= _PAGE_INVALID; 502 } 503 if (prot == PROT_READ && !pte_p) { 504 ptep_flush_direct(mm, addr, ptep); 505 pte_val(entry) &= ~_PAGE_INVALID; 506 pte_val(entry) |= _PAGE_PROTECT; 507 } 508 pgste_val(pgste) |= bit; 509 pgste = pgste_set_pte(ptep, pgste, entry); 510 pgste_set_unlock(ptep, pgste); 511 return 0; 512 } 513 514 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 515 pte_t *sptep, pte_t *tptep, pte_t pte) 516 { 517 pgste_t spgste, tpgste; 518 pte_t spte, tpte; 519 int rc = -EAGAIN; 520 521 if (!(pte_val(*tptep) & _PAGE_INVALID)) 522 return 0; /* already shadowed */ 523 spgste = pgste_get_lock(sptep); 524 spte = *sptep; 525 if (!(pte_val(spte) & _PAGE_INVALID) && 526 !((pte_val(spte) & _PAGE_PROTECT) && 527 !(pte_val(pte) & _PAGE_PROTECT))) { 528 pgste_val(spgste) |= PGSTE_VSIE_BIT; 529 tpgste = pgste_get_lock(tptep); 530 pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | 531 (pte_val(pte) & _PAGE_PROTECT); 532 /* don't touch the storage key - it belongs to parent pgste */ 533 tpgste = pgste_set_pte(tptep, tpgste, tpte); 534 pgste_set_unlock(tptep, tpgste); 535 rc = 1; 536 } 537 pgste_set_unlock(sptep, spgste); 538 return rc; 539 } 540 541 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 542 { 543 pgste_t pgste; 544 545 pgste = pgste_get_lock(ptep); 546 /* notifier is called by the caller */ 547 ptep_flush_direct(mm, saddr, ptep); 548 /* don't touch the storage key - it belongs to parent pgste */ 549 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 550 pgste_set_unlock(ptep, pgste); 551 } 552 553 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 554 { 555 if (!non_swap_entry(entry)) 556 dec_mm_counter(mm, MM_SWAPENTS); 557 else if (is_migration_entry(entry)) { 558 struct page *page = migration_entry_to_page(entry); 559 560 dec_mm_counter(mm, mm_counter(page)); 561 } 562 free_swap_and_cache(entry); 563 } 564 565 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 566 pte_t *ptep, int reset) 567 { 568 unsigned long pgstev; 569 pgste_t pgste; 570 pte_t pte; 571 572 /* Zap unused and logically-zero pages */ 573 preempt_disable(); 574 pgste = pgste_get_lock(ptep); 575 pgstev = pgste_val(pgste); 576 pte = *ptep; 577 if (!reset && pte_swap(pte) && 578 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 579 (pgstev & _PGSTE_GPS_ZERO))) { 580 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 581 pte_clear(mm, addr, ptep); 582 } 583 if (reset) 584 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 585 pgste_set_unlock(ptep, pgste); 586 preempt_enable(); 587 } 588 589 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 590 { 591 unsigned long ptev; 592 pgste_t pgste; 593 594 /* Clear storage key */ 595 preempt_disable(); 596 pgste = pgste_get_lock(ptep); 597 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 598 PGSTE_GR_BIT | PGSTE_GC_BIT); 599 ptev = pte_val(*ptep); 600 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 601 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 602 pgste_set_unlock(ptep, pgste); 603 preempt_enable(); 604 } 605 606 /* 607 * Test and reset if a guest page is dirty 608 */ 609 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) 610 { 611 spinlock_t *ptl; 612 pgd_t *pgd; 613 pud_t *pud; 614 pmd_t *pmd; 615 pgste_t pgste; 616 pte_t *ptep; 617 pte_t pte; 618 bool dirty; 619 620 pgd = pgd_offset(mm, addr); 621 pud = pud_alloc(mm, pgd, addr); 622 if (!pud) 623 return false; 624 pmd = pmd_alloc(mm, pud, addr); 625 if (!pmd) 626 return false; 627 /* We can't run guests backed by huge pages, but userspace can 628 * still set them up and then try to migrate them without any 629 * migration support. 630 */ 631 if (pmd_large(*pmd)) 632 return true; 633 634 ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); 635 if (unlikely(!ptep)) 636 return false; 637 638 pgste = pgste_get_lock(ptep); 639 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 640 pgste_val(pgste) &= ~PGSTE_UC_BIT; 641 pte = *ptep; 642 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 643 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 644 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 645 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 646 pte_val(pte) |= _PAGE_PROTECT; 647 else 648 pte_val(pte) |= _PAGE_INVALID; 649 *ptep = pte; 650 } 651 pgste_set_unlock(ptep, pgste); 652 653 spin_unlock(ptl); 654 return dirty; 655 } 656 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); 657 658 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 659 unsigned char key, bool nq) 660 { 661 unsigned long keyul; 662 spinlock_t *ptl; 663 pgste_t old, new; 664 pte_t *ptep; 665 666 ptep = get_locked_pte(mm, addr, &ptl); 667 if (unlikely(!ptep)) 668 return -EFAULT; 669 670 new = old = pgste_get_lock(ptep); 671 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 672 PGSTE_ACC_BITS | PGSTE_FP_BIT); 673 keyul = (unsigned long) key; 674 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 675 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 676 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 677 unsigned long address, bits, skey; 678 679 address = pte_val(*ptep) & PAGE_MASK; 680 skey = (unsigned long) page_get_storage_key(address); 681 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 682 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 683 /* Set storage key ACC and FP */ 684 page_set_storage_key(address, skey, !nq); 685 /* Merge host changed & referenced into pgste */ 686 pgste_val(new) |= bits << 52; 687 } 688 /* changing the guest storage key is considered a change of the page */ 689 if ((pgste_val(new) ^ pgste_val(old)) & 690 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 691 pgste_val(new) |= PGSTE_UC_BIT; 692 693 pgste_set_unlock(ptep, new); 694 pte_unmap_unlock(ptep, ptl); 695 return 0; 696 } 697 EXPORT_SYMBOL(set_guest_storage_key); 698 699 /** 700 * Conditionally set a guest storage key (handling csske). 701 * oldkey will be updated when either mr or mc is set and a pointer is given. 702 * 703 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 704 * storage key was updated and -EFAULT on access errors. 705 */ 706 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 707 unsigned char key, unsigned char *oldkey, 708 bool nq, bool mr, bool mc) 709 { 710 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 711 int rc; 712 713 /* we can drop the pgste lock between getting and setting the key */ 714 if (mr | mc) { 715 rc = get_guest_storage_key(current->mm, addr, &tmp); 716 if (rc) 717 return rc; 718 if (oldkey) 719 *oldkey = tmp; 720 if (!mr) 721 mask |= _PAGE_REFERENCED; 722 if (!mc) 723 mask |= _PAGE_CHANGED; 724 if (!((tmp ^ key) & mask)) 725 return 0; 726 } 727 rc = set_guest_storage_key(current->mm, addr, key, nq); 728 return rc < 0 ? rc : 1; 729 } 730 EXPORT_SYMBOL(cond_set_guest_storage_key); 731 732 /** 733 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 734 * 735 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 736 */ 737 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 738 { 739 spinlock_t *ptl; 740 pgste_t old, new; 741 pte_t *ptep; 742 int cc = 0; 743 744 ptep = get_locked_pte(mm, addr, &ptl); 745 if (unlikely(!ptep)) 746 return -EFAULT; 747 748 new = old = pgste_get_lock(ptep); 749 /* Reset guest reference bit only */ 750 pgste_val(new) &= ~PGSTE_GR_BIT; 751 752 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 753 cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); 754 /* Merge real referenced bit into host-set */ 755 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 756 } 757 /* Reflect guest's logical view, not physical */ 758 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 759 /* Changing the guest storage key is considered a change of the page */ 760 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 761 pgste_val(new) |= PGSTE_UC_BIT; 762 763 pgste_set_unlock(ptep, new); 764 pte_unmap_unlock(ptep, ptl); 765 return cc; 766 } 767 EXPORT_SYMBOL(reset_guest_reference_bit); 768 769 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 770 unsigned char *key) 771 { 772 spinlock_t *ptl; 773 pgste_t pgste; 774 pte_t *ptep; 775 776 ptep = get_locked_pte(mm, addr, &ptl); 777 if (unlikely(!ptep)) 778 return -EFAULT; 779 780 pgste = pgste_get_lock(ptep); 781 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 782 if (!(pte_val(*ptep) & _PAGE_INVALID)) 783 *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); 784 /* Reflect guest's logical view, not physical */ 785 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 786 pgste_set_unlock(ptep, pgste); 787 pte_unmap_unlock(ptep, ptl); 788 return 0; 789 } 790 EXPORT_SYMBOL(get_guest_storage_key); 791 792 /** 793 * pgste_perform_essa - perform ESSA actions on the PGSTE. 794 * @mm: the memory context. It must have PGSTEs, no check is performed here! 795 * @hva: the host virtual address of the page whose PGSTE is to be processed 796 * @orc: the specific action to perform, see the ESSA_SET_* macros. 797 * @oldpte: the PTE will be saved there if the pointer is not NULL. 798 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 799 * 800 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 801 * or < 0 in case of error. -EINVAL is returned for invalid values 802 * of orc, -EFAULT for invalid addresses. 803 */ 804 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 805 unsigned long *oldpte, unsigned long *oldpgste) 806 { 807 unsigned long pgstev; 808 spinlock_t *ptl; 809 pgste_t pgste; 810 pte_t *ptep; 811 int res = 0; 812 813 WARN_ON_ONCE(orc > ESSA_MAX); 814 if (unlikely(orc > ESSA_MAX)) 815 return -EINVAL; 816 ptep = get_locked_pte(mm, hva, &ptl); 817 if (unlikely(!ptep)) 818 return -EFAULT; 819 pgste = pgste_get_lock(ptep); 820 pgstev = pgste_val(pgste); 821 if (oldpte) 822 *oldpte = pte_val(*ptep); 823 if (oldpgste) 824 *oldpgste = pgstev; 825 826 switch (orc) { 827 case ESSA_GET_STATE: 828 break; 829 case ESSA_SET_STABLE: 830 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 831 pgstev |= _PGSTE_GPS_USAGE_STABLE; 832 break; 833 case ESSA_SET_UNUSED: 834 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 835 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 836 if (pte_val(*ptep) & _PAGE_INVALID) 837 res = 1; 838 break; 839 case ESSA_SET_VOLATILE: 840 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 841 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 842 if (pte_val(*ptep) & _PAGE_INVALID) 843 res = 1; 844 break; 845 case ESSA_SET_POT_VOLATILE: 846 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 847 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 848 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 849 break; 850 } 851 if (pgstev & _PGSTE_GPS_ZERO) { 852 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 853 break; 854 } 855 if (!(pgstev & PGSTE_GC_BIT)) { 856 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 857 res = 1; 858 break; 859 } 860 break; 861 case ESSA_SET_STABLE_RESIDENT: 862 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 863 pgstev |= _PGSTE_GPS_USAGE_STABLE; 864 /* 865 * Since the resident state can go away any time after this 866 * call, we will not make this page resident. We can revisit 867 * this decision if a guest will ever start using this. 868 */ 869 break; 870 case ESSA_SET_STABLE_IF_RESIDENT: 871 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 872 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 873 pgstev |= _PGSTE_GPS_USAGE_STABLE; 874 } 875 break; 876 default: 877 /* we should never get here! */ 878 break; 879 } 880 /* If we are discarding a page, set it to logical zero */ 881 if (res) 882 pgstev |= _PGSTE_GPS_ZERO; 883 884 pgste_val(pgste) = pgstev; 885 pgste_set_unlock(ptep, pgste); 886 pte_unmap_unlock(ptep, ptl); 887 return res; 888 } 889 EXPORT_SYMBOL(pgste_perform_essa); 890 891 /** 892 * set_pgste_bits - set specific PGSTE bits. 893 * @mm: the memory context. It must have PGSTEs, no check is performed here! 894 * @hva: the host virtual address of the page whose PGSTE is to be processed 895 * @bits: a bitmask representing the bits that will be touched 896 * @value: the values of the bits to be written. Only the bits in the mask 897 * will be written. 898 * 899 * Return: 0 on success, < 0 in case of error. 900 */ 901 int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 902 unsigned long bits, unsigned long value) 903 { 904 spinlock_t *ptl; 905 pgste_t new; 906 pte_t *ptep; 907 908 ptep = get_locked_pte(mm, hva, &ptl); 909 if (unlikely(!ptep)) 910 return -EFAULT; 911 new = pgste_get_lock(ptep); 912 913 pgste_val(new) &= ~bits; 914 pgste_val(new) |= value & bits; 915 916 pgste_set_unlock(ptep, new); 917 pte_unmap_unlock(ptep, ptl); 918 return 0; 919 } 920 EXPORT_SYMBOL(set_pgste_bits); 921 922 /** 923 * get_pgste - get the current PGSTE for the given address. 924 * @mm: the memory context. It must have PGSTEs, no check is performed here! 925 * @hva: the host virtual address of the page whose PGSTE is to be processed 926 * @pgstep: will be written with the current PGSTE for the given address. 927 * 928 * Return: 0 on success, < 0 in case of error. 929 */ 930 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 931 { 932 spinlock_t *ptl; 933 pte_t *ptep; 934 935 ptep = get_locked_pte(mm, hva, &ptl); 936 if (unlikely(!ptep)) 937 return -EFAULT; 938 *pgstep = pgste_val(pgste_get(ptep)); 939 pte_unmap_unlock(ptep, ptl); 940 return 0; 941 } 942 EXPORT_SYMBOL(get_pgste); 943 #endif 944