1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/spinlock.h> 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/swapops.h> 17 #include <linux/sysctl.h> 18 #include <linux/ksm.h> 19 #include <linux/mman.h> 20 21 #include <asm/pgtable.h> 22 #include <asm/pgalloc.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include <asm/mmu_context.h> 26 27 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 28 unsigned long addr, pte_t *ptep) 29 { 30 pte_t old; 31 32 old = *ptep; 33 if (unlikely(pte_val(old) & _PAGE_INVALID)) 34 return old; 35 atomic_inc(&mm->context.flush_count); 36 if (MACHINE_HAS_TLB_LC && 37 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 38 __ptep_ipte_local(addr, ptep); 39 else 40 __ptep_ipte(addr, ptep); 41 atomic_dec(&mm->context.flush_count); 42 return old; 43 } 44 45 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 46 unsigned long addr, pte_t *ptep) 47 { 48 pte_t old; 49 50 old = *ptep; 51 if (unlikely(pte_val(old) & _PAGE_INVALID)) 52 return old; 53 atomic_inc(&mm->context.flush_count); 54 if (cpumask_equal(&mm->context.cpu_attach_mask, 55 cpumask_of(smp_processor_id()))) { 56 pte_val(*ptep) |= _PAGE_INVALID; 57 mm->context.flush_mm = 1; 58 } else 59 __ptep_ipte(addr, ptep); 60 atomic_dec(&mm->context.flush_count); 61 return old; 62 } 63 64 static inline pgste_t pgste_get_lock(pte_t *ptep) 65 { 66 unsigned long new = 0; 67 #ifdef CONFIG_PGSTE 68 unsigned long old; 69 70 asm( 71 " lg %0,%2\n" 72 "0: lgr %1,%0\n" 73 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 74 " oihh %1,0x0080\n" /* set PCL bit in new */ 75 " csg %0,%1,%2\n" 76 " jl 0b\n" 77 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 78 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 79 #endif 80 return __pgste(new); 81 } 82 83 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 84 { 85 #ifdef CONFIG_PGSTE 86 asm( 87 " nihh %1,0xff7f\n" /* clear PCL bit */ 88 " stg %1,%0\n" 89 : "=Q" (ptep[PTRS_PER_PTE]) 90 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 91 : "cc", "memory"); 92 #endif 93 } 94 95 static inline pgste_t pgste_get(pte_t *ptep) 96 { 97 unsigned long pgste = 0; 98 #ifdef CONFIG_PGSTE 99 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 100 #endif 101 return __pgste(pgste); 102 } 103 104 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 105 { 106 #ifdef CONFIG_PGSTE 107 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 108 #endif 109 } 110 111 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 112 struct mm_struct *mm) 113 { 114 #ifdef CONFIG_PGSTE 115 unsigned long address, bits, skey; 116 117 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) 118 return pgste; 119 address = pte_val(pte) & PAGE_MASK; 120 skey = (unsigned long) page_get_storage_key(address); 121 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 122 /* Transfer page changed & referenced bit to guest bits in pgste */ 123 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 124 /* Copy page access key and fetch protection bit to pgste */ 125 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 126 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 127 #endif 128 return pgste; 129 130 } 131 132 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 133 struct mm_struct *mm) 134 { 135 #ifdef CONFIG_PGSTE 136 unsigned long address; 137 unsigned long nkey; 138 139 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) 140 return; 141 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 142 address = pte_val(entry) & PAGE_MASK; 143 /* 144 * Set page access key and fetch protection bit from pgste. 145 * The guest C/R information is still in the PGSTE, set real 146 * key C/R to 0. 147 */ 148 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 149 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 150 page_set_storage_key(address, nkey, 0); 151 #endif 152 } 153 154 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 155 { 156 #ifdef CONFIG_PGSTE 157 if ((pte_val(entry) & _PAGE_PRESENT) && 158 (pte_val(entry) & _PAGE_WRITE) && 159 !(pte_val(entry) & _PAGE_INVALID)) { 160 if (!MACHINE_HAS_ESOP) { 161 /* 162 * Without enhanced suppression-on-protection force 163 * the dirty bit on for all writable ptes. 164 */ 165 pte_val(entry) |= _PAGE_DIRTY; 166 pte_val(entry) &= ~_PAGE_PROTECT; 167 } 168 if (!(pte_val(entry) & _PAGE_PROTECT)) 169 /* This pte allows write access, set user-dirty */ 170 pgste_val(pgste) |= PGSTE_UC_BIT; 171 } 172 #endif 173 *ptep = entry; 174 return pgste; 175 } 176 177 static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 178 unsigned long addr, 179 pte_t *ptep, pgste_t pgste) 180 { 181 #ifdef CONFIG_PGSTE 182 if (pgste_val(pgste) & PGSTE_IN_BIT) { 183 pgste_val(pgste) &= ~PGSTE_IN_BIT; 184 ptep_notify(mm, addr, ptep); 185 } 186 #endif 187 return pgste; 188 } 189 190 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 191 unsigned long addr, pte_t *ptep) 192 { 193 pgste_t pgste = __pgste(0); 194 195 if (mm_has_pgste(mm)) { 196 pgste = pgste_get_lock(ptep); 197 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 198 } 199 return pgste; 200 } 201 202 static inline void ptep_xchg_commit(struct mm_struct *mm, 203 unsigned long addr, pte_t *ptep, 204 pgste_t pgste, pte_t old, pte_t new) 205 { 206 if (mm_has_pgste(mm)) { 207 if (pte_val(old) & _PAGE_INVALID) 208 pgste_set_key(ptep, pgste, new, mm); 209 if (pte_val(new) & _PAGE_INVALID) { 210 pgste = pgste_update_all(old, pgste, mm); 211 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 212 _PGSTE_GPS_USAGE_UNUSED) 213 pte_val(old) |= _PAGE_UNUSED; 214 } 215 pgste = pgste_set_pte(ptep, pgste, new); 216 pgste_set_unlock(ptep, pgste); 217 } else { 218 *ptep = new; 219 } 220 } 221 222 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 223 pte_t *ptep, pte_t new) 224 { 225 pgste_t pgste; 226 pte_t old; 227 228 preempt_disable(); 229 pgste = ptep_xchg_start(mm, addr, ptep); 230 old = ptep_flush_direct(mm, addr, ptep); 231 ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 232 preempt_enable(); 233 return old; 234 } 235 EXPORT_SYMBOL(ptep_xchg_direct); 236 237 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 238 pte_t *ptep, pte_t new) 239 { 240 pgste_t pgste; 241 pte_t old; 242 243 preempt_disable(); 244 pgste = ptep_xchg_start(mm, addr, ptep); 245 old = ptep_flush_lazy(mm, addr, ptep); 246 ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 247 preempt_enable(); 248 return old; 249 } 250 EXPORT_SYMBOL(ptep_xchg_lazy); 251 252 pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 253 pte_t *ptep) 254 { 255 pgste_t pgste; 256 pte_t old; 257 258 preempt_disable(); 259 pgste = ptep_xchg_start(mm, addr, ptep); 260 old = ptep_flush_lazy(mm, addr, ptep); 261 if (mm_has_pgste(mm)) { 262 pgste = pgste_update_all(old, pgste, mm); 263 pgste_set(ptep, pgste); 264 } 265 return old; 266 } 267 EXPORT_SYMBOL(ptep_modify_prot_start); 268 269 void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 270 pte_t *ptep, pte_t pte) 271 { 272 pgste_t pgste; 273 274 if (mm_has_pgste(mm)) { 275 pgste = pgste_get(ptep); 276 pgste_set_key(ptep, pgste, pte, mm); 277 pgste = pgste_set_pte(ptep, pgste, pte); 278 pgste_set_unlock(ptep, pgste); 279 } else { 280 *ptep = pte; 281 } 282 preempt_enable(); 283 } 284 EXPORT_SYMBOL(ptep_modify_prot_commit); 285 286 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 287 unsigned long addr, pmd_t *pmdp) 288 { 289 pmd_t old; 290 291 old = *pmdp; 292 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 293 return old; 294 if (!MACHINE_HAS_IDTE) { 295 __pmdp_csp(pmdp); 296 return old; 297 } 298 atomic_inc(&mm->context.flush_count); 299 if (MACHINE_HAS_TLB_LC && 300 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 301 __pmdp_idte_local(addr, pmdp); 302 else 303 __pmdp_idte(addr, pmdp); 304 atomic_dec(&mm->context.flush_count); 305 return old; 306 } 307 308 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 309 unsigned long addr, pmd_t *pmdp) 310 { 311 pmd_t old; 312 313 old = *pmdp; 314 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 315 return old; 316 atomic_inc(&mm->context.flush_count); 317 if (cpumask_equal(&mm->context.cpu_attach_mask, 318 cpumask_of(smp_processor_id()))) { 319 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 320 mm->context.flush_mm = 1; 321 } else if (MACHINE_HAS_IDTE) 322 __pmdp_idte(addr, pmdp); 323 else 324 __pmdp_csp(pmdp); 325 atomic_dec(&mm->context.flush_count); 326 return old; 327 } 328 329 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 330 pmd_t *pmdp, pmd_t new) 331 { 332 pmd_t old; 333 334 preempt_disable(); 335 old = pmdp_flush_direct(mm, addr, pmdp); 336 *pmdp = new; 337 preempt_enable(); 338 return old; 339 } 340 EXPORT_SYMBOL(pmdp_xchg_direct); 341 342 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 343 pmd_t *pmdp, pmd_t new) 344 { 345 pmd_t old; 346 347 preempt_disable(); 348 old = pmdp_flush_lazy(mm, addr, pmdp); 349 *pmdp = new; 350 preempt_enable(); 351 return old; 352 } 353 EXPORT_SYMBOL(pmdp_xchg_lazy); 354 355 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 356 unsigned long addr, pud_t *pudp) 357 { 358 pud_t old; 359 360 old = *pudp; 361 if (pud_val(old) & _REGION_ENTRY_INVALID) 362 return old; 363 if (!MACHINE_HAS_IDTE) { 364 /* 365 * Invalid bit position is the same for pmd and pud, so we can 366 * re-use _pmd_csp() here 367 */ 368 __pmdp_csp((pmd_t *) pudp); 369 return old; 370 } 371 atomic_inc(&mm->context.flush_count); 372 if (MACHINE_HAS_TLB_LC && 373 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 374 __pudp_idte_local(addr, pudp); 375 else 376 __pudp_idte(addr, pudp); 377 atomic_dec(&mm->context.flush_count); 378 return old; 379 } 380 381 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 382 pud_t *pudp, pud_t new) 383 { 384 pud_t old; 385 386 preempt_disable(); 387 old = pudp_flush_direct(mm, addr, pudp); 388 *pudp = new; 389 preempt_enable(); 390 return old; 391 } 392 EXPORT_SYMBOL(pudp_xchg_direct); 393 394 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 395 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 396 pgtable_t pgtable) 397 { 398 struct list_head *lh = (struct list_head *) pgtable; 399 400 assert_spin_locked(pmd_lockptr(mm, pmdp)); 401 402 /* FIFO */ 403 if (!pmd_huge_pte(mm, pmdp)) 404 INIT_LIST_HEAD(lh); 405 else 406 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 407 pmd_huge_pte(mm, pmdp) = pgtable; 408 } 409 410 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 411 { 412 struct list_head *lh; 413 pgtable_t pgtable; 414 pte_t *ptep; 415 416 assert_spin_locked(pmd_lockptr(mm, pmdp)); 417 418 /* FIFO */ 419 pgtable = pmd_huge_pte(mm, pmdp); 420 lh = (struct list_head *) pgtable; 421 if (list_empty(lh)) 422 pmd_huge_pte(mm, pmdp) = NULL; 423 else { 424 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 425 list_del(lh); 426 } 427 ptep = (pte_t *) pgtable; 428 pte_val(*ptep) = _PAGE_INVALID; 429 ptep++; 430 pte_val(*ptep) = _PAGE_INVALID; 431 return pgtable; 432 } 433 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 434 435 #ifdef CONFIG_PGSTE 436 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 437 pte_t *ptep, pte_t entry) 438 { 439 pgste_t pgste; 440 441 /* the mm_has_pgste() check is done in set_pte_at() */ 442 preempt_disable(); 443 pgste = pgste_get_lock(ptep); 444 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 445 pgste_set_key(ptep, pgste, entry, mm); 446 pgste = pgste_set_pte(ptep, pgste, entry); 447 pgste_set_unlock(ptep, pgste); 448 preempt_enable(); 449 } 450 451 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 452 { 453 pgste_t pgste; 454 455 preempt_disable(); 456 pgste = pgste_get_lock(ptep); 457 pgste_val(pgste) |= PGSTE_IN_BIT; 458 pgste_set_unlock(ptep, pgste); 459 preempt_enable(); 460 } 461 462 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 463 { 464 if (!non_swap_entry(entry)) 465 dec_mm_counter(mm, MM_SWAPENTS); 466 else if (is_migration_entry(entry)) { 467 struct page *page = migration_entry_to_page(entry); 468 469 dec_mm_counter(mm, mm_counter(page)); 470 } 471 free_swap_and_cache(entry); 472 } 473 474 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 475 pte_t *ptep, int reset) 476 { 477 unsigned long pgstev; 478 pgste_t pgste; 479 pte_t pte; 480 481 /* Zap unused and logically-zero pages */ 482 preempt_disable(); 483 pgste = pgste_get_lock(ptep); 484 pgstev = pgste_val(pgste); 485 pte = *ptep; 486 if (!reset && pte_swap(pte) && 487 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 488 (pgstev & _PGSTE_GPS_ZERO))) { 489 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 490 pte_clear(mm, addr, ptep); 491 } 492 if (reset) 493 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 494 pgste_set_unlock(ptep, pgste); 495 preempt_enable(); 496 } 497 498 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 499 { 500 unsigned long ptev; 501 pgste_t pgste; 502 503 /* Clear storage key */ 504 preempt_disable(); 505 pgste = pgste_get_lock(ptep); 506 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 507 PGSTE_GR_BIT | PGSTE_GC_BIT); 508 ptev = pte_val(*ptep); 509 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 510 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 511 pgste_set_unlock(ptep, pgste); 512 preempt_enable(); 513 } 514 515 /* 516 * Test and reset if a guest page is dirty 517 */ 518 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) 519 { 520 spinlock_t *ptl; 521 pgste_t pgste; 522 pte_t *ptep; 523 pte_t pte; 524 bool dirty; 525 526 ptep = get_locked_pte(mm, addr, &ptl); 527 if (unlikely(!ptep)) 528 return false; 529 530 pgste = pgste_get_lock(ptep); 531 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 532 pgste_val(pgste) &= ~PGSTE_UC_BIT; 533 pte = *ptep; 534 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 535 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 536 __ptep_ipte(addr, ptep); 537 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 538 pte_val(pte) |= _PAGE_PROTECT; 539 else 540 pte_val(pte) |= _PAGE_INVALID; 541 *ptep = pte; 542 } 543 pgste_set_unlock(ptep, pgste); 544 545 spin_unlock(ptl); 546 return dirty; 547 } 548 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); 549 550 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 551 unsigned char key, bool nq) 552 { 553 unsigned long keyul; 554 spinlock_t *ptl; 555 pgste_t old, new; 556 pte_t *ptep; 557 558 down_read(&mm->mmap_sem); 559 ptep = get_locked_pte(mm, addr, &ptl); 560 if (unlikely(!ptep)) { 561 up_read(&mm->mmap_sem); 562 return -EFAULT; 563 } 564 565 new = old = pgste_get_lock(ptep); 566 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 567 PGSTE_ACC_BITS | PGSTE_FP_BIT); 568 keyul = (unsigned long) key; 569 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 570 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 571 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 572 unsigned long address, bits, skey; 573 574 address = pte_val(*ptep) & PAGE_MASK; 575 skey = (unsigned long) page_get_storage_key(address); 576 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 577 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 578 /* Set storage key ACC and FP */ 579 page_set_storage_key(address, skey, !nq); 580 /* Merge host changed & referenced into pgste */ 581 pgste_val(new) |= bits << 52; 582 } 583 /* changing the guest storage key is considered a change of the page */ 584 if ((pgste_val(new) ^ pgste_val(old)) & 585 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 586 pgste_val(new) |= PGSTE_UC_BIT; 587 588 pgste_set_unlock(ptep, new); 589 pte_unmap_unlock(ptep, ptl); 590 up_read(&mm->mmap_sem); 591 return 0; 592 } 593 EXPORT_SYMBOL(set_guest_storage_key); 594 595 unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) 596 { 597 unsigned char key; 598 spinlock_t *ptl; 599 pgste_t pgste; 600 pte_t *ptep; 601 602 down_read(&mm->mmap_sem); 603 ptep = get_locked_pte(mm, addr, &ptl); 604 if (unlikely(!ptep)) { 605 up_read(&mm->mmap_sem); 606 return -EFAULT; 607 } 608 pgste = pgste_get_lock(ptep); 609 610 if (pte_val(*ptep) & _PAGE_INVALID) { 611 key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; 612 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; 613 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; 614 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; 615 } else { 616 key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); 617 618 /* Reflect guest's logical view, not physical */ 619 if (pgste_val(pgste) & PGSTE_GR_BIT) 620 key |= _PAGE_REFERENCED; 621 if (pgste_val(pgste) & PGSTE_GC_BIT) 622 key |= _PAGE_CHANGED; 623 } 624 625 pgste_set_unlock(ptep, pgste); 626 pte_unmap_unlock(ptep, ptl); 627 up_read(&mm->mmap_sem); 628 return key; 629 } 630 EXPORT_SYMBOL(get_guest_storage_key); 631 #endif 632