1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/spinlock.h> 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/swapops.h> 17 #include <linux/sysctl.h> 18 #include <linux/ksm.h> 19 #include <linux/mman.h> 20 21 #include <asm/pgtable.h> 22 #include <asm/pgalloc.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include <asm/mmu_context.h> 26 #include <asm/page-states.h> 27 28 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 29 unsigned long addr, pte_t *ptep) 30 { 31 pte_t old; 32 33 old = *ptep; 34 if (unlikely(pte_val(old) & _PAGE_INVALID)) 35 return old; 36 atomic_inc(&mm->context.flush_count); 37 if (MACHINE_HAS_TLB_LC && 38 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 39 __ptep_ipte(addr, ptep, IPTE_LOCAL); 40 else 41 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 42 atomic_dec(&mm->context.flush_count); 43 return old; 44 } 45 46 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 47 unsigned long addr, pte_t *ptep) 48 { 49 pte_t old; 50 51 old = *ptep; 52 if (unlikely(pte_val(old) & _PAGE_INVALID)) 53 return old; 54 atomic_inc(&mm->context.flush_count); 55 if (cpumask_equal(&mm->context.cpu_attach_mask, 56 cpumask_of(smp_processor_id()))) { 57 pte_val(*ptep) |= _PAGE_INVALID; 58 mm->context.flush_mm = 1; 59 } else 60 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 61 atomic_dec(&mm->context.flush_count); 62 return old; 63 } 64 65 static inline pgste_t pgste_get_lock(pte_t *ptep) 66 { 67 unsigned long new = 0; 68 #ifdef CONFIG_PGSTE 69 unsigned long old; 70 71 asm( 72 " lg %0,%2\n" 73 "0: lgr %1,%0\n" 74 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 75 " oihh %1,0x0080\n" /* set PCL bit in new */ 76 " csg %0,%1,%2\n" 77 " jl 0b\n" 78 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 79 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 80 #endif 81 return __pgste(new); 82 } 83 84 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 85 { 86 #ifdef CONFIG_PGSTE 87 asm( 88 " nihh %1,0xff7f\n" /* clear PCL bit */ 89 " stg %1,%0\n" 90 : "=Q" (ptep[PTRS_PER_PTE]) 91 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 92 : "cc", "memory"); 93 #endif 94 } 95 96 static inline pgste_t pgste_get(pte_t *ptep) 97 { 98 unsigned long pgste = 0; 99 #ifdef CONFIG_PGSTE 100 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 101 #endif 102 return __pgste(pgste); 103 } 104 105 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 106 { 107 #ifdef CONFIG_PGSTE 108 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 109 #endif 110 } 111 112 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 113 struct mm_struct *mm) 114 { 115 #ifdef CONFIG_PGSTE 116 unsigned long address, bits, skey; 117 118 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) 119 return pgste; 120 address = pte_val(pte) & PAGE_MASK; 121 skey = (unsigned long) page_get_storage_key(address); 122 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 123 /* Transfer page changed & referenced bit to guest bits in pgste */ 124 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 125 /* Copy page access key and fetch protection bit to pgste */ 126 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 127 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 128 #endif 129 return pgste; 130 131 } 132 133 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 134 struct mm_struct *mm) 135 { 136 #ifdef CONFIG_PGSTE 137 unsigned long address; 138 unsigned long nkey; 139 140 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) 141 return; 142 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 143 address = pte_val(entry) & PAGE_MASK; 144 /* 145 * Set page access key and fetch protection bit from pgste. 146 * The guest C/R information is still in the PGSTE, set real 147 * key C/R to 0. 148 */ 149 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 150 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 151 page_set_storage_key(address, nkey, 0); 152 #endif 153 } 154 155 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 156 { 157 #ifdef CONFIG_PGSTE 158 if ((pte_val(entry) & _PAGE_PRESENT) && 159 (pte_val(entry) & _PAGE_WRITE) && 160 !(pte_val(entry) & _PAGE_INVALID)) { 161 if (!MACHINE_HAS_ESOP) { 162 /* 163 * Without enhanced suppression-on-protection force 164 * the dirty bit on for all writable ptes. 165 */ 166 pte_val(entry) |= _PAGE_DIRTY; 167 pte_val(entry) &= ~_PAGE_PROTECT; 168 } 169 if (!(pte_val(entry) & _PAGE_PROTECT)) 170 /* This pte allows write access, set user-dirty */ 171 pgste_val(pgste) |= PGSTE_UC_BIT; 172 } 173 #endif 174 *ptep = entry; 175 return pgste; 176 } 177 178 static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 179 unsigned long addr, 180 pte_t *ptep, pgste_t pgste) 181 { 182 #ifdef CONFIG_PGSTE 183 unsigned long bits; 184 185 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 186 if (bits) { 187 pgste_val(pgste) ^= bits; 188 ptep_notify(mm, addr, ptep, bits); 189 } 190 #endif 191 return pgste; 192 } 193 194 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 195 unsigned long addr, pte_t *ptep) 196 { 197 pgste_t pgste = __pgste(0); 198 199 if (mm_has_pgste(mm)) { 200 pgste = pgste_get_lock(ptep); 201 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 202 } 203 return pgste; 204 } 205 206 static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 207 unsigned long addr, pte_t *ptep, 208 pgste_t pgste, pte_t old, pte_t new) 209 { 210 if (mm_has_pgste(mm)) { 211 if (pte_val(old) & _PAGE_INVALID) 212 pgste_set_key(ptep, pgste, new, mm); 213 if (pte_val(new) & _PAGE_INVALID) { 214 pgste = pgste_update_all(old, pgste, mm); 215 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 216 _PGSTE_GPS_USAGE_UNUSED) 217 pte_val(old) |= _PAGE_UNUSED; 218 } 219 pgste = pgste_set_pte(ptep, pgste, new); 220 pgste_set_unlock(ptep, pgste); 221 } else { 222 *ptep = new; 223 } 224 return old; 225 } 226 227 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 228 pte_t *ptep, pte_t new) 229 { 230 pgste_t pgste; 231 pte_t old; 232 233 preempt_disable(); 234 pgste = ptep_xchg_start(mm, addr, ptep); 235 old = ptep_flush_direct(mm, addr, ptep); 236 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 237 preempt_enable(); 238 return old; 239 } 240 EXPORT_SYMBOL(ptep_xchg_direct); 241 242 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 243 pte_t *ptep, pte_t new) 244 { 245 pgste_t pgste; 246 pte_t old; 247 248 preempt_disable(); 249 pgste = ptep_xchg_start(mm, addr, ptep); 250 old = ptep_flush_lazy(mm, addr, ptep); 251 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 252 preempt_enable(); 253 return old; 254 } 255 EXPORT_SYMBOL(ptep_xchg_lazy); 256 257 pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 258 pte_t *ptep) 259 { 260 pgste_t pgste; 261 pte_t old; 262 263 preempt_disable(); 264 pgste = ptep_xchg_start(mm, addr, ptep); 265 old = ptep_flush_lazy(mm, addr, ptep); 266 if (mm_has_pgste(mm)) { 267 pgste = pgste_update_all(old, pgste, mm); 268 pgste_set(ptep, pgste); 269 } 270 return old; 271 } 272 EXPORT_SYMBOL(ptep_modify_prot_start); 273 274 void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 275 pte_t *ptep, pte_t pte) 276 { 277 pgste_t pgste; 278 279 if (!MACHINE_HAS_NX) 280 pte_val(pte) &= ~_PAGE_NOEXEC; 281 if (mm_has_pgste(mm)) { 282 pgste = pgste_get(ptep); 283 pgste_set_key(ptep, pgste, pte, mm); 284 pgste = pgste_set_pte(ptep, pgste, pte); 285 pgste_set_unlock(ptep, pgste); 286 } else { 287 *ptep = pte; 288 } 289 preempt_enable(); 290 } 291 EXPORT_SYMBOL(ptep_modify_prot_commit); 292 293 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 294 unsigned long addr, pmd_t *pmdp) 295 { 296 pmd_t old; 297 298 old = *pmdp; 299 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 300 return old; 301 if (!MACHINE_HAS_IDTE) { 302 __pmdp_csp(pmdp); 303 return old; 304 } 305 atomic_inc(&mm->context.flush_count); 306 if (MACHINE_HAS_TLB_LC && 307 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 308 __pmdp_idte(addr, pmdp, IDTE_LOCAL); 309 else 310 __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 311 atomic_dec(&mm->context.flush_count); 312 return old; 313 } 314 315 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 316 unsigned long addr, pmd_t *pmdp) 317 { 318 pmd_t old; 319 320 old = *pmdp; 321 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 322 return old; 323 atomic_inc(&mm->context.flush_count); 324 if (cpumask_equal(&mm->context.cpu_attach_mask, 325 cpumask_of(smp_processor_id()))) { 326 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 327 mm->context.flush_mm = 1; 328 } else if (MACHINE_HAS_IDTE) 329 __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 330 else 331 __pmdp_csp(pmdp); 332 atomic_dec(&mm->context.flush_count); 333 return old; 334 } 335 336 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 337 pmd_t *pmdp, pmd_t new) 338 { 339 pmd_t old; 340 341 preempt_disable(); 342 old = pmdp_flush_direct(mm, addr, pmdp); 343 *pmdp = new; 344 preempt_enable(); 345 return old; 346 } 347 EXPORT_SYMBOL(pmdp_xchg_direct); 348 349 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 350 pmd_t *pmdp, pmd_t new) 351 { 352 pmd_t old; 353 354 preempt_disable(); 355 old = pmdp_flush_lazy(mm, addr, pmdp); 356 *pmdp = new; 357 preempt_enable(); 358 return old; 359 } 360 EXPORT_SYMBOL(pmdp_xchg_lazy); 361 362 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 363 unsigned long addr, pud_t *pudp) 364 { 365 pud_t old; 366 367 old = *pudp; 368 if (pud_val(old) & _REGION_ENTRY_INVALID) 369 return old; 370 if (!MACHINE_HAS_IDTE) { 371 /* 372 * Invalid bit position is the same for pmd and pud, so we can 373 * re-use _pmd_csp() here 374 */ 375 __pmdp_csp((pmd_t *) pudp); 376 return old; 377 } 378 atomic_inc(&mm->context.flush_count); 379 if (MACHINE_HAS_TLB_LC && 380 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 381 __pudp_idte(addr, pudp, IDTE_LOCAL); 382 else 383 __pudp_idte(addr, pudp, IDTE_GLOBAL); 384 atomic_dec(&mm->context.flush_count); 385 return old; 386 } 387 388 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 389 pud_t *pudp, pud_t new) 390 { 391 pud_t old; 392 393 preempt_disable(); 394 old = pudp_flush_direct(mm, addr, pudp); 395 *pudp = new; 396 preempt_enable(); 397 return old; 398 } 399 EXPORT_SYMBOL(pudp_xchg_direct); 400 401 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 402 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 403 pgtable_t pgtable) 404 { 405 struct list_head *lh = (struct list_head *) pgtable; 406 407 assert_spin_locked(pmd_lockptr(mm, pmdp)); 408 409 /* FIFO */ 410 if (!pmd_huge_pte(mm, pmdp)) 411 INIT_LIST_HEAD(lh); 412 else 413 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 414 pmd_huge_pte(mm, pmdp) = pgtable; 415 } 416 417 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 418 { 419 struct list_head *lh; 420 pgtable_t pgtable; 421 pte_t *ptep; 422 423 assert_spin_locked(pmd_lockptr(mm, pmdp)); 424 425 /* FIFO */ 426 pgtable = pmd_huge_pte(mm, pmdp); 427 lh = (struct list_head *) pgtable; 428 if (list_empty(lh)) 429 pmd_huge_pte(mm, pmdp) = NULL; 430 else { 431 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 432 list_del(lh); 433 } 434 ptep = (pte_t *) pgtable; 435 pte_val(*ptep) = _PAGE_INVALID; 436 ptep++; 437 pte_val(*ptep) = _PAGE_INVALID; 438 return pgtable; 439 } 440 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 441 442 #ifdef CONFIG_PGSTE 443 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 444 pte_t *ptep, pte_t entry) 445 { 446 pgste_t pgste; 447 448 /* the mm_has_pgste() check is done in set_pte_at() */ 449 preempt_disable(); 450 pgste = pgste_get_lock(ptep); 451 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 452 pgste_set_key(ptep, pgste, entry, mm); 453 pgste = pgste_set_pte(ptep, pgste, entry); 454 pgste_set_unlock(ptep, pgste); 455 preempt_enable(); 456 } 457 458 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 459 { 460 pgste_t pgste; 461 462 preempt_disable(); 463 pgste = pgste_get_lock(ptep); 464 pgste_val(pgste) |= PGSTE_IN_BIT; 465 pgste_set_unlock(ptep, pgste); 466 preempt_enable(); 467 } 468 469 /** 470 * ptep_force_prot - change access rights of a locked pte 471 * @mm: pointer to the process mm_struct 472 * @addr: virtual address in the guest address space 473 * @ptep: pointer to the page table entry 474 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 475 * @bit: pgste bit to set (e.g. for notification) 476 * 477 * Returns 0 if the access rights were changed and -EAGAIN if the current 478 * and requested access rights are incompatible. 479 */ 480 int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 481 pte_t *ptep, int prot, unsigned long bit) 482 { 483 pte_t entry; 484 pgste_t pgste; 485 int pte_i, pte_p; 486 487 pgste = pgste_get_lock(ptep); 488 entry = *ptep; 489 /* Check pte entry after all locks have been acquired */ 490 pte_i = pte_val(entry) & _PAGE_INVALID; 491 pte_p = pte_val(entry) & _PAGE_PROTECT; 492 if ((pte_i && (prot != PROT_NONE)) || 493 (pte_p && (prot & PROT_WRITE))) { 494 pgste_set_unlock(ptep, pgste); 495 return -EAGAIN; 496 } 497 /* Change access rights and set pgste bit */ 498 if (prot == PROT_NONE && !pte_i) { 499 ptep_flush_direct(mm, addr, ptep); 500 pgste = pgste_update_all(entry, pgste, mm); 501 pte_val(entry) |= _PAGE_INVALID; 502 } 503 if (prot == PROT_READ && !pte_p) { 504 ptep_flush_direct(mm, addr, ptep); 505 pte_val(entry) &= ~_PAGE_INVALID; 506 pte_val(entry) |= _PAGE_PROTECT; 507 } 508 pgste_val(pgste) |= bit; 509 pgste = pgste_set_pte(ptep, pgste, entry); 510 pgste_set_unlock(ptep, pgste); 511 return 0; 512 } 513 514 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 515 pte_t *sptep, pte_t *tptep, pte_t pte) 516 { 517 pgste_t spgste, tpgste; 518 pte_t spte, tpte; 519 int rc = -EAGAIN; 520 521 if (!(pte_val(*tptep) & _PAGE_INVALID)) 522 return 0; /* already shadowed */ 523 spgste = pgste_get_lock(sptep); 524 spte = *sptep; 525 if (!(pte_val(spte) & _PAGE_INVALID) && 526 !((pte_val(spte) & _PAGE_PROTECT) && 527 !(pte_val(pte) & _PAGE_PROTECT))) { 528 pgste_val(spgste) |= PGSTE_VSIE_BIT; 529 tpgste = pgste_get_lock(tptep); 530 pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | 531 (pte_val(pte) & _PAGE_PROTECT); 532 /* don't touch the storage key - it belongs to parent pgste */ 533 tpgste = pgste_set_pte(tptep, tpgste, tpte); 534 pgste_set_unlock(tptep, tpgste); 535 rc = 1; 536 } 537 pgste_set_unlock(sptep, spgste); 538 return rc; 539 } 540 541 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 542 { 543 pgste_t pgste; 544 545 pgste = pgste_get_lock(ptep); 546 /* notifier is called by the caller */ 547 ptep_flush_direct(mm, saddr, ptep); 548 /* don't touch the storage key - it belongs to parent pgste */ 549 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 550 pgste_set_unlock(ptep, pgste); 551 } 552 553 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 554 { 555 if (!non_swap_entry(entry)) 556 dec_mm_counter(mm, MM_SWAPENTS); 557 else if (is_migration_entry(entry)) { 558 struct page *page = migration_entry_to_page(entry); 559 560 dec_mm_counter(mm, mm_counter(page)); 561 } 562 free_swap_and_cache(entry); 563 } 564 565 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 566 pte_t *ptep, int reset) 567 { 568 unsigned long pgstev; 569 pgste_t pgste; 570 pte_t pte; 571 572 /* Zap unused and logically-zero pages */ 573 preempt_disable(); 574 pgste = pgste_get_lock(ptep); 575 pgstev = pgste_val(pgste); 576 pte = *ptep; 577 if (!reset && pte_swap(pte) && 578 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 579 (pgstev & _PGSTE_GPS_ZERO))) { 580 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 581 pte_clear(mm, addr, ptep); 582 } 583 if (reset) 584 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 585 pgste_set_unlock(ptep, pgste); 586 preempt_enable(); 587 } 588 589 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 590 { 591 unsigned long ptev; 592 pgste_t pgste; 593 594 /* Clear storage key */ 595 preempt_disable(); 596 pgste = pgste_get_lock(ptep); 597 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 598 PGSTE_GR_BIT | PGSTE_GC_BIT); 599 ptev = pte_val(*ptep); 600 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 601 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 602 pgste_set_unlock(ptep, pgste); 603 preempt_enable(); 604 } 605 606 /* 607 * Test and reset if a guest page is dirty 608 */ 609 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) 610 { 611 spinlock_t *ptl; 612 pgd_t *pgd; 613 p4d_t *p4d; 614 pud_t *pud; 615 pmd_t *pmd; 616 pgste_t pgste; 617 pte_t *ptep; 618 pte_t pte; 619 bool dirty; 620 621 pgd = pgd_offset(mm, addr); 622 p4d = p4d_alloc(mm, pgd, addr); 623 if (!p4d) 624 return false; 625 pud = pud_alloc(mm, p4d, addr); 626 if (!pud) 627 return false; 628 pmd = pmd_alloc(mm, pud, addr); 629 if (!pmd) 630 return false; 631 /* We can't run guests backed by huge pages, but userspace can 632 * still set them up and then try to migrate them without any 633 * migration support. 634 */ 635 if (pmd_large(*pmd)) 636 return true; 637 638 ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); 639 if (unlikely(!ptep)) 640 return false; 641 642 pgste = pgste_get_lock(ptep); 643 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 644 pgste_val(pgste) &= ~PGSTE_UC_BIT; 645 pte = *ptep; 646 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 647 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 648 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 649 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 650 pte_val(pte) |= _PAGE_PROTECT; 651 else 652 pte_val(pte) |= _PAGE_INVALID; 653 *ptep = pte; 654 } 655 pgste_set_unlock(ptep, pgste); 656 657 spin_unlock(ptl); 658 return dirty; 659 } 660 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); 661 662 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 663 unsigned char key, bool nq) 664 { 665 unsigned long keyul; 666 spinlock_t *ptl; 667 pgste_t old, new; 668 pte_t *ptep; 669 670 ptep = get_locked_pte(mm, addr, &ptl); 671 if (unlikely(!ptep)) 672 return -EFAULT; 673 674 new = old = pgste_get_lock(ptep); 675 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 676 PGSTE_ACC_BITS | PGSTE_FP_BIT); 677 keyul = (unsigned long) key; 678 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 679 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 680 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 681 unsigned long address, bits, skey; 682 683 address = pte_val(*ptep) & PAGE_MASK; 684 skey = (unsigned long) page_get_storage_key(address); 685 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 686 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 687 /* Set storage key ACC and FP */ 688 page_set_storage_key(address, skey, !nq); 689 /* Merge host changed & referenced into pgste */ 690 pgste_val(new) |= bits << 52; 691 } 692 /* changing the guest storage key is considered a change of the page */ 693 if ((pgste_val(new) ^ pgste_val(old)) & 694 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 695 pgste_val(new) |= PGSTE_UC_BIT; 696 697 pgste_set_unlock(ptep, new); 698 pte_unmap_unlock(ptep, ptl); 699 return 0; 700 } 701 EXPORT_SYMBOL(set_guest_storage_key); 702 703 /** 704 * Conditionally set a guest storage key (handling csske). 705 * oldkey will be updated when either mr or mc is set and a pointer is given. 706 * 707 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 708 * storage key was updated and -EFAULT on access errors. 709 */ 710 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 711 unsigned char key, unsigned char *oldkey, 712 bool nq, bool mr, bool mc) 713 { 714 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 715 int rc; 716 717 /* we can drop the pgste lock between getting and setting the key */ 718 if (mr | mc) { 719 rc = get_guest_storage_key(current->mm, addr, &tmp); 720 if (rc) 721 return rc; 722 if (oldkey) 723 *oldkey = tmp; 724 if (!mr) 725 mask |= _PAGE_REFERENCED; 726 if (!mc) 727 mask |= _PAGE_CHANGED; 728 if (!((tmp ^ key) & mask)) 729 return 0; 730 } 731 rc = set_guest_storage_key(current->mm, addr, key, nq); 732 return rc < 0 ? rc : 1; 733 } 734 EXPORT_SYMBOL(cond_set_guest_storage_key); 735 736 /** 737 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 738 * 739 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 740 */ 741 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 742 { 743 spinlock_t *ptl; 744 pgste_t old, new; 745 pte_t *ptep; 746 int cc = 0; 747 748 ptep = get_locked_pte(mm, addr, &ptl); 749 if (unlikely(!ptep)) 750 return -EFAULT; 751 752 new = old = pgste_get_lock(ptep); 753 /* Reset guest reference bit only */ 754 pgste_val(new) &= ~PGSTE_GR_BIT; 755 756 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 757 cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); 758 /* Merge real referenced bit into host-set */ 759 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 760 } 761 /* Reflect guest's logical view, not physical */ 762 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 763 /* Changing the guest storage key is considered a change of the page */ 764 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 765 pgste_val(new) |= PGSTE_UC_BIT; 766 767 pgste_set_unlock(ptep, new); 768 pte_unmap_unlock(ptep, ptl); 769 return cc; 770 } 771 EXPORT_SYMBOL(reset_guest_reference_bit); 772 773 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 774 unsigned char *key) 775 { 776 spinlock_t *ptl; 777 pgste_t pgste; 778 pte_t *ptep; 779 780 ptep = get_locked_pte(mm, addr, &ptl); 781 if (unlikely(!ptep)) 782 return -EFAULT; 783 784 pgste = pgste_get_lock(ptep); 785 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 786 if (!(pte_val(*ptep) & _PAGE_INVALID)) 787 *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); 788 /* Reflect guest's logical view, not physical */ 789 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 790 pgste_set_unlock(ptep, pgste); 791 pte_unmap_unlock(ptep, ptl); 792 return 0; 793 } 794 EXPORT_SYMBOL(get_guest_storage_key); 795 796 /** 797 * pgste_perform_essa - perform ESSA actions on the PGSTE. 798 * @mm: the memory context. It must have PGSTEs, no check is performed here! 799 * @hva: the host virtual address of the page whose PGSTE is to be processed 800 * @orc: the specific action to perform, see the ESSA_SET_* macros. 801 * @oldpte: the PTE will be saved there if the pointer is not NULL. 802 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 803 * 804 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 805 * or < 0 in case of error. -EINVAL is returned for invalid values 806 * of orc, -EFAULT for invalid addresses. 807 */ 808 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 809 unsigned long *oldpte, unsigned long *oldpgste) 810 { 811 unsigned long pgstev; 812 spinlock_t *ptl; 813 pgste_t pgste; 814 pte_t *ptep; 815 int res = 0; 816 817 WARN_ON_ONCE(orc > ESSA_MAX); 818 if (unlikely(orc > ESSA_MAX)) 819 return -EINVAL; 820 ptep = get_locked_pte(mm, hva, &ptl); 821 if (unlikely(!ptep)) 822 return -EFAULT; 823 pgste = pgste_get_lock(ptep); 824 pgstev = pgste_val(pgste); 825 if (oldpte) 826 *oldpte = pte_val(*ptep); 827 if (oldpgste) 828 *oldpgste = pgstev; 829 830 switch (orc) { 831 case ESSA_GET_STATE: 832 break; 833 case ESSA_SET_STABLE: 834 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 835 pgstev |= _PGSTE_GPS_USAGE_STABLE; 836 break; 837 case ESSA_SET_UNUSED: 838 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 839 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 840 if (pte_val(*ptep) & _PAGE_INVALID) 841 res = 1; 842 break; 843 case ESSA_SET_VOLATILE: 844 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 845 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 846 if (pte_val(*ptep) & _PAGE_INVALID) 847 res = 1; 848 break; 849 case ESSA_SET_POT_VOLATILE: 850 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 851 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 852 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 853 break; 854 } 855 if (pgstev & _PGSTE_GPS_ZERO) { 856 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 857 break; 858 } 859 if (!(pgstev & PGSTE_GC_BIT)) { 860 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 861 res = 1; 862 break; 863 } 864 break; 865 case ESSA_SET_STABLE_RESIDENT: 866 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 867 pgstev |= _PGSTE_GPS_USAGE_STABLE; 868 /* 869 * Since the resident state can go away any time after this 870 * call, we will not make this page resident. We can revisit 871 * this decision if a guest will ever start using this. 872 */ 873 break; 874 case ESSA_SET_STABLE_IF_RESIDENT: 875 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 876 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 877 pgstev |= _PGSTE_GPS_USAGE_STABLE; 878 } 879 break; 880 default: 881 /* we should never get here! */ 882 break; 883 } 884 /* If we are discarding a page, set it to logical zero */ 885 if (res) 886 pgstev |= _PGSTE_GPS_ZERO; 887 888 pgste_val(pgste) = pgstev; 889 pgste_set_unlock(ptep, pgste); 890 pte_unmap_unlock(ptep, ptl); 891 return res; 892 } 893 EXPORT_SYMBOL(pgste_perform_essa); 894 895 /** 896 * set_pgste_bits - set specific PGSTE bits. 897 * @mm: the memory context. It must have PGSTEs, no check is performed here! 898 * @hva: the host virtual address of the page whose PGSTE is to be processed 899 * @bits: a bitmask representing the bits that will be touched 900 * @value: the values of the bits to be written. Only the bits in the mask 901 * will be written. 902 * 903 * Return: 0 on success, < 0 in case of error. 904 */ 905 int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 906 unsigned long bits, unsigned long value) 907 { 908 spinlock_t *ptl; 909 pgste_t new; 910 pte_t *ptep; 911 912 ptep = get_locked_pte(mm, hva, &ptl); 913 if (unlikely(!ptep)) 914 return -EFAULT; 915 new = pgste_get_lock(ptep); 916 917 pgste_val(new) &= ~bits; 918 pgste_val(new) |= value & bits; 919 920 pgste_set_unlock(ptep, new); 921 pte_unmap_unlock(ptep, ptl); 922 return 0; 923 } 924 EXPORT_SYMBOL(set_pgste_bits); 925 926 /** 927 * get_pgste - get the current PGSTE for the given address. 928 * @mm: the memory context. It must have PGSTEs, no check is performed here! 929 * @hva: the host virtual address of the page whose PGSTE is to be processed 930 * @pgstep: will be written with the current PGSTE for the given address. 931 * 932 * Return: 0 on success, < 0 in case of error. 933 */ 934 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 935 { 936 spinlock_t *ptl; 937 pte_t *ptep; 938 939 ptep = get_locked_pte(mm, hva, &ptl); 940 if (unlikely(!ptep)) 941 return -EFAULT; 942 *pgstep = pgste_val(pgste_get(ptep)); 943 pte_unmap_unlock(ptep, ptl); 944 return 0; 945 } 946 EXPORT_SYMBOL(get_pgste); 947 #endif 948