1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/highmem.h> 14 #include <linux/pagemap.h> 15 #include <linux/spinlock.h> 16 #include <linux/module.h> 17 #include <linux/quicklist.h> 18 #include <linux/rcupdate.h> 19 #include <linux/slab.h> 20 #include <linux/swapops.h> 21 #include <linux/sysctl.h> 22 #include <linux/ksm.h> 23 #include <linux/mman.h> 24 25 #include <asm/pgtable.h> 26 #include <asm/pgalloc.h> 27 #include <asm/tlb.h> 28 #include <asm/tlbflush.h> 29 #include <asm/mmu_context.h> 30 31 #define ALLOC_ORDER 2 32 #define FRAG_MASK 0x03 33 34 unsigned long *crst_table_alloc(struct mm_struct *mm) 35 { 36 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 37 38 if (!page) 39 return NULL; 40 return (unsigned long *) page_to_phys(page); 41 } 42 43 void crst_table_free(struct mm_struct *mm, unsigned long *table) 44 { 45 free_pages((unsigned long) table, ALLOC_ORDER); 46 } 47 48 static void __crst_table_upgrade(void *arg) 49 { 50 struct mm_struct *mm = arg; 51 52 if (current->active_mm == mm) { 53 clear_user_asce(); 54 set_user_asce(mm); 55 } 56 __tlb_flush_local(); 57 } 58 59 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 60 { 61 unsigned long *table, *pgd; 62 unsigned long entry; 63 int flush; 64 65 BUG_ON(limit > (1UL << 53)); 66 flush = 0; 67 repeat: 68 table = crst_table_alloc(mm); 69 if (!table) 70 return -ENOMEM; 71 spin_lock_bh(&mm->page_table_lock); 72 if (mm->context.asce_limit < limit) { 73 pgd = (unsigned long *) mm->pgd; 74 if (mm->context.asce_limit <= (1UL << 31)) { 75 entry = _REGION3_ENTRY_EMPTY; 76 mm->context.asce_limit = 1UL << 42; 77 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 78 _ASCE_USER_BITS | 79 _ASCE_TYPE_REGION3; 80 } else { 81 entry = _REGION2_ENTRY_EMPTY; 82 mm->context.asce_limit = 1UL << 53; 83 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 84 _ASCE_USER_BITS | 85 _ASCE_TYPE_REGION2; 86 } 87 crst_table_init(table, entry); 88 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 89 mm->pgd = (pgd_t *) table; 90 mm->task_size = mm->context.asce_limit; 91 table = NULL; 92 flush = 1; 93 } 94 spin_unlock_bh(&mm->page_table_lock); 95 if (table) 96 crst_table_free(mm, table); 97 if (mm->context.asce_limit < limit) 98 goto repeat; 99 if (flush) 100 on_each_cpu(__crst_table_upgrade, mm, 0); 101 return 0; 102 } 103 104 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 105 { 106 pgd_t *pgd; 107 108 if (current->active_mm == mm) { 109 clear_user_asce(); 110 __tlb_flush_mm(mm); 111 } 112 while (mm->context.asce_limit > limit) { 113 pgd = mm->pgd; 114 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 115 case _REGION_ENTRY_TYPE_R2: 116 mm->context.asce_limit = 1UL << 42; 117 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 118 _ASCE_USER_BITS | 119 _ASCE_TYPE_REGION3; 120 break; 121 case _REGION_ENTRY_TYPE_R3: 122 mm->context.asce_limit = 1UL << 31; 123 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 124 _ASCE_USER_BITS | 125 _ASCE_TYPE_SEGMENT; 126 break; 127 default: 128 BUG(); 129 } 130 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 131 mm->task_size = mm->context.asce_limit; 132 crst_table_free(mm, (unsigned long *) pgd); 133 } 134 if (current->active_mm == mm) 135 set_user_asce(mm); 136 } 137 138 #ifdef CONFIG_PGSTE 139 140 /** 141 * gmap_alloc - allocate a guest address space 142 * @mm: pointer to the parent mm_struct 143 * @limit: maximum size of the gmap address space 144 * 145 * Returns a guest address space structure. 146 */ 147 struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) 148 { 149 struct gmap *gmap; 150 struct page *page; 151 unsigned long *table; 152 unsigned long etype, atype; 153 154 if (limit < (1UL << 31)) { 155 limit = (1UL << 31) - 1; 156 atype = _ASCE_TYPE_SEGMENT; 157 etype = _SEGMENT_ENTRY_EMPTY; 158 } else if (limit < (1UL << 42)) { 159 limit = (1UL << 42) - 1; 160 atype = _ASCE_TYPE_REGION3; 161 etype = _REGION3_ENTRY_EMPTY; 162 } else if (limit < (1UL << 53)) { 163 limit = (1UL << 53) - 1; 164 atype = _ASCE_TYPE_REGION2; 165 etype = _REGION2_ENTRY_EMPTY; 166 } else { 167 limit = -1UL; 168 atype = _ASCE_TYPE_REGION1; 169 etype = _REGION1_ENTRY_EMPTY; 170 } 171 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 172 if (!gmap) 173 goto out; 174 INIT_LIST_HEAD(&gmap->crst_list); 175 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); 176 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); 177 spin_lock_init(&gmap->guest_table_lock); 178 gmap->mm = mm; 179 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 180 if (!page) 181 goto out_free; 182 page->index = 0; 183 list_add(&page->lru, &gmap->crst_list); 184 table = (unsigned long *) page_to_phys(page); 185 crst_table_init(table, etype); 186 gmap->table = table; 187 gmap->asce = atype | _ASCE_TABLE_LENGTH | 188 _ASCE_USER_BITS | __pa(table); 189 gmap->asce_end = limit; 190 down_write(&mm->mmap_sem); 191 list_add(&gmap->list, &mm->context.gmap_list); 192 up_write(&mm->mmap_sem); 193 return gmap; 194 195 out_free: 196 kfree(gmap); 197 out: 198 return NULL; 199 } 200 EXPORT_SYMBOL_GPL(gmap_alloc); 201 202 static void gmap_flush_tlb(struct gmap *gmap) 203 { 204 if (MACHINE_HAS_IDTE) 205 __tlb_flush_asce(gmap->mm, gmap->asce); 206 else 207 __tlb_flush_global(); 208 } 209 210 static void gmap_radix_tree_free(struct radix_tree_root *root) 211 { 212 struct radix_tree_iter iter; 213 unsigned long indices[16]; 214 unsigned long index; 215 void **slot; 216 int i, nr; 217 218 /* A radix tree is freed by deleting all of its entries */ 219 index = 0; 220 do { 221 nr = 0; 222 radix_tree_for_each_slot(slot, root, &iter, index) { 223 indices[nr] = iter.index; 224 if (++nr == 16) 225 break; 226 } 227 for (i = 0; i < nr; i++) { 228 index = indices[i]; 229 radix_tree_delete(root, index); 230 } 231 } while (nr > 0); 232 } 233 234 /** 235 * gmap_free - free a guest address space 236 * @gmap: pointer to the guest address space structure 237 */ 238 void gmap_free(struct gmap *gmap) 239 { 240 struct page *page, *next; 241 242 /* Flush tlb. */ 243 if (MACHINE_HAS_IDTE) 244 __tlb_flush_asce(gmap->mm, gmap->asce); 245 else 246 __tlb_flush_global(); 247 248 /* Free all segment & region tables. */ 249 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) 250 __free_pages(page, ALLOC_ORDER); 251 gmap_radix_tree_free(&gmap->guest_to_host); 252 gmap_radix_tree_free(&gmap->host_to_guest); 253 down_write(&gmap->mm->mmap_sem); 254 list_del(&gmap->list); 255 up_write(&gmap->mm->mmap_sem); 256 kfree(gmap); 257 } 258 EXPORT_SYMBOL_GPL(gmap_free); 259 260 /** 261 * gmap_enable - switch primary space to the guest address space 262 * @gmap: pointer to the guest address space structure 263 */ 264 void gmap_enable(struct gmap *gmap) 265 { 266 S390_lowcore.gmap = (unsigned long) gmap; 267 } 268 EXPORT_SYMBOL_GPL(gmap_enable); 269 270 /** 271 * gmap_disable - switch back to the standard primary address space 272 * @gmap: pointer to the guest address space structure 273 */ 274 void gmap_disable(struct gmap *gmap) 275 { 276 S390_lowcore.gmap = 0UL; 277 } 278 EXPORT_SYMBOL_GPL(gmap_disable); 279 280 /* 281 * gmap_alloc_table is assumed to be called with mmap_sem held 282 */ 283 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 284 unsigned long init, unsigned long gaddr) 285 { 286 struct page *page; 287 unsigned long *new; 288 289 /* since we dont free the gmap table until gmap_free we can unlock */ 290 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 291 if (!page) 292 return -ENOMEM; 293 new = (unsigned long *) page_to_phys(page); 294 crst_table_init(new, init); 295 spin_lock(&gmap->mm->page_table_lock); 296 if (*table & _REGION_ENTRY_INVALID) { 297 list_add(&page->lru, &gmap->crst_list); 298 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 299 (*table & _REGION_ENTRY_TYPE_MASK); 300 page->index = gaddr; 301 page = NULL; 302 } 303 spin_unlock(&gmap->mm->page_table_lock); 304 if (page) 305 __free_pages(page, ALLOC_ORDER); 306 return 0; 307 } 308 309 /** 310 * __gmap_segment_gaddr - find virtual address from segment pointer 311 * @entry: pointer to a segment table entry in the guest address space 312 * 313 * Returns the virtual address in the guest address space for the segment 314 */ 315 static unsigned long __gmap_segment_gaddr(unsigned long *entry) 316 { 317 struct page *page; 318 unsigned long offset, mask; 319 320 offset = (unsigned long) entry / sizeof(unsigned long); 321 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; 322 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 323 page = virt_to_page((void *)((unsigned long) entry & mask)); 324 return page->index + offset; 325 } 326 327 /** 328 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 329 * @gmap: pointer to the guest address space structure 330 * @vmaddr: address in the host process address space 331 * 332 * Returns 1 if a TLB flush is required 333 */ 334 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 335 { 336 unsigned long *entry; 337 int flush = 0; 338 339 spin_lock(&gmap->guest_table_lock); 340 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 341 if (entry) { 342 flush = (*entry != _SEGMENT_ENTRY_INVALID); 343 *entry = _SEGMENT_ENTRY_INVALID; 344 } 345 spin_unlock(&gmap->guest_table_lock); 346 return flush; 347 } 348 349 /** 350 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 351 * @gmap: pointer to the guest address space structure 352 * @gaddr: address in the guest address space 353 * 354 * Returns 1 if a TLB flush is required 355 */ 356 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 357 { 358 unsigned long vmaddr; 359 360 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 361 gaddr >> PMD_SHIFT); 362 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 363 } 364 365 /** 366 * gmap_unmap_segment - unmap segment from the guest address space 367 * @gmap: pointer to the guest address space structure 368 * @to: address in the guest address space 369 * @len: length of the memory area to unmap 370 * 371 * Returns 0 if the unmap succeeded, -EINVAL if not. 372 */ 373 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 374 { 375 unsigned long off; 376 int flush; 377 378 if ((to | len) & (PMD_SIZE - 1)) 379 return -EINVAL; 380 if (len == 0 || to + len < to) 381 return -EINVAL; 382 383 flush = 0; 384 down_write(&gmap->mm->mmap_sem); 385 for (off = 0; off < len; off += PMD_SIZE) 386 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 387 up_write(&gmap->mm->mmap_sem); 388 if (flush) 389 gmap_flush_tlb(gmap); 390 return 0; 391 } 392 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 393 394 /** 395 * gmap_mmap_segment - map a segment to the guest address space 396 * @gmap: pointer to the guest address space structure 397 * @from: source address in the parent address space 398 * @to: target address in the guest address space 399 * @len: length of the memory area to map 400 * 401 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 402 */ 403 int gmap_map_segment(struct gmap *gmap, unsigned long from, 404 unsigned long to, unsigned long len) 405 { 406 unsigned long off; 407 int flush; 408 409 if ((from | to | len) & (PMD_SIZE - 1)) 410 return -EINVAL; 411 if (len == 0 || from + len < from || to + len < to || 412 from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) 413 return -EINVAL; 414 415 flush = 0; 416 down_write(&gmap->mm->mmap_sem); 417 for (off = 0; off < len; off += PMD_SIZE) { 418 /* Remove old translation */ 419 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 420 /* Store new translation */ 421 if (radix_tree_insert(&gmap->guest_to_host, 422 (to + off) >> PMD_SHIFT, 423 (void *) from + off)) 424 break; 425 } 426 up_write(&gmap->mm->mmap_sem); 427 if (flush) 428 gmap_flush_tlb(gmap); 429 if (off >= len) 430 return 0; 431 gmap_unmap_segment(gmap, to, len); 432 return -ENOMEM; 433 } 434 EXPORT_SYMBOL_GPL(gmap_map_segment); 435 436 /** 437 * __gmap_translate - translate a guest address to a user space address 438 * @gmap: pointer to guest mapping meta data structure 439 * @gaddr: guest address 440 * 441 * Returns user space address which corresponds to the guest address or 442 * -EFAULT if no such mapping exists. 443 * This function does not establish potentially missing page table entries. 444 * The mmap_sem of the mm that belongs to the address space must be held 445 * when this function gets called. 446 */ 447 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 448 { 449 unsigned long vmaddr; 450 451 vmaddr = (unsigned long) 452 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 453 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 454 } 455 EXPORT_SYMBOL_GPL(__gmap_translate); 456 457 /** 458 * gmap_translate - translate a guest address to a user space address 459 * @gmap: pointer to guest mapping meta data structure 460 * @gaddr: guest address 461 * 462 * Returns user space address which corresponds to the guest address or 463 * -EFAULT if no such mapping exists. 464 * This function does not establish potentially missing page table entries. 465 */ 466 unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) 467 { 468 unsigned long rc; 469 470 down_read(&gmap->mm->mmap_sem); 471 rc = __gmap_translate(gmap, gaddr); 472 up_read(&gmap->mm->mmap_sem); 473 return rc; 474 } 475 EXPORT_SYMBOL_GPL(gmap_translate); 476 477 /** 478 * gmap_unlink - disconnect a page table from the gmap shadow tables 479 * @gmap: pointer to guest mapping meta data structure 480 * @table: pointer to the host page table 481 * @vmaddr: vm address associated with the host page table 482 */ 483 static void gmap_unlink(struct mm_struct *mm, unsigned long *table, 484 unsigned long vmaddr) 485 { 486 struct gmap *gmap; 487 int flush; 488 489 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 490 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 491 if (flush) 492 gmap_flush_tlb(gmap); 493 } 494 } 495 496 /** 497 * gmap_link - set up shadow page tables to connect a host to a guest address 498 * @gmap: pointer to guest mapping meta data structure 499 * @gaddr: guest address 500 * @vmaddr: vm address 501 * 502 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 503 * if the vm address is already mapped to a different guest segment. 504 * The mmap_sem of the mm that belongs to the address space must be held 505 * when this function gets called. 506 */ 507 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 508 { 509 struct mm_struct *mm; 510 unsigned long *table; 511 spinlock_t *ptl; 512 pgd_t *pgd; 513 pud_t *pud; 514 pmd_t *pmd; 515 int rc; 516 517 /* Create higher level tables in the gmap page table */ 518 table = gmap->table; 519 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 520 table += (gaddr >> 53) & 0x7ff; 521 if ((*table & _REGION_ENTRY_INVALID) && 522 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 523 gaddr & 0xffe0000000000000UL)) 524 return -ENOMEM; 525 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 526 } 527 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 528 table += (gaddr >> 42) & 0x7ff; 529 if ((*table & _REGION_ENTRY_INVALID) && 530 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 531 gaddr & 0xfffffc0000000000UL)) 532 return -ENOMEM; 533 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 534 } 535 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 536 table += (gaddr >> 31) & 0x7ff; 537 if ((*table & _REGION_ENTRY_INVALID) && 538 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 539 gaddr & 0xffffffff80000000UL)) 540 return -ENOMEM; 541 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 542 } 543 table += (gaddr >> 20) & 0x7ff; 544 /* Walk the parent mm page table */ 545 mm = gmap->mm; 546 pgd = pgd_offset(mm, vmaddr); 547 VM_BUG_ON(pgd_none(*pgd)); 548 pud = pud_offset(pgd, vmaddr); 549 VM_BUG_ON(pud_none(*pud)); 550 pmd = pmd_offset(pud, vmaddr); 551 VM_BUG_ON(pmd_none(*pmd)); 552 /* large pmds cannot yet be handled */ 553 if (pmd_large(*pmd)) 554 return -EFAULT; 555 /* Link gmap segment table entry location to page table. */ 556 rc = radix_tree_preload(GFP_KERNEL); 557 if (rc) 558 return rc; 559 ptl = pmd_lock(mm, pmd); 560 spin_lock(&gmap->guest_table_lock); 561 if (*table == _SEGMENT_ENTRY_INVALID) { 562 rc = radix_tree_insert(&gmap->host_to_guest, 563 vmaddr >> PMD_SHIFT, table); 564 if (!rc) 565 *table = pmd_val(*pmd); 566 } else 567 rc = 0; 568 spin_unlock(&gmap->guest_table_lock); 569 spin_unlock(ptl); 570 radix_tree_preload_end(); 571 return rc; 572 } 573 574 /** 575 * gmap_fault - resolve a fault on a guest address 576 * @gmap: pointer to guest mapping meta data structure 577 * @gaddr: guest address 578 * @fault_flags: flags to pass down to handle_mm_fault() 579 * 580 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 581 * if the vm address is already mapped to a different guest segment. 582 */ 583 int gmap_fault(struct gmap *gmap, unsigned long gaddr, 584 unsigned int fault_flags) 585 { 586 unsigned long vmaddr; 587 int rc; 588 589 down_read(&gmap->mm->mmap_sem); 590 vmaddr = __gmap_translate(gmap, gaddr); 591 if (IS_ERR_VALUE(vmaddr)) { 592 rc = vmaddr; 593 goto out_up; 594 } 595 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) { 596 rc = -EFAULT; 597 goto out_up; 598 } 599 rc = __gmap_link(gmap, gaddr, vmaddr); 600 out_up: 601 up_read(&gmap->mm->mmap_sem); 602 return rc; 603 } 604 EXPORT_SYMBOL_GPL(gmap_fault); 605 606 static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) 607 { 608 if (!non_swap_entry(entry)) 609 dec_mm_counter(mm, MM_SWAPENTS); 610 else if (is_migration_entry(entry)) { 611 struct page *page = migration_entry_to_page(entry); 612 613 if (PageAnon(page)) 614 dec_mm_counter(mm, MM_ANONPAGES); 615 else 616 dec_mm_counter(mm, MM_FILEPAGES); 617 } 618 free_swap_and_cache(entry); 619 } 620 621 /* 622 * this function is assumed to be called with mmap_sem held 623 */ 624 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 625 { 626 unsigned long vmaddr, ptev, pgstev; 627 pte_t *ptep, pte; 628 spinlock_t *ptl; 629 pgste_t pgste; 630 631 /* Find the vm address for the guest address */ 632 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 633 gaddr >> PMD_SHIFT); 634 if (!vmaddr) 635 return; 636 vmaddr |= gaddr & ~PMD_MASK; 637 /* Get pointer to the page table entry */ 638 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); 639 if (unlikely(!ptep)) 640 return; 641 pte = *ptep; 642 if (!pte_swap(pte)) 643 goto out_pte; 644 /* Zap unused and logically-zero pages */ 645 pgste = pgste_get_lock(ptep); 646 pgstev = pgste_val(pgste); 647 ptev = pte_val(pte); 648 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || 649 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { 650 gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); 651 pte_clear(gmap->mm, vmaddr, ptep); 652 } 653 pgste_set_unlock(ptep, pgste); 654 out_pte: 655 pte_unmap_unlock(ptep, ptl); 656 } 657 EXPORT_SYMBOL_GPL(__gmap_zap); 658 659 void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) 660 { 661 unsigned long gaddr, vmaddr, size; 662 struct vm_area_struct *vma; 663 664 down_read(&gmap->mm->mmap_sem); 665 for (gaddr = from; gaddr < to; 666 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { 667 /* Find the vm address for the guest address */ 668 vmaddr = (unsigned long) 669 radix_tree_lookup(&gmap->guest_to_host, 670 gaddr >> PMD_SHIFT); 671 if (!vmaddr) 672 continue; 673 vmaddr |= gaddr & ~PMD_MASK; 674 /* Find vma in the parent mm */ 675 vma = find_vma(gmap->mm, vmaddr); 676 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); 677 zap_page_range(vma, vmaddr, size, NULL); 678 } 679 up_read(&gmap->mm->mmap_sem); 680 } 681 EXPORT_SYMBOL_GPL(gmap_discard); 682 683 static LIST_HEAD(gmap_notifier_list); 684 static DEFINE_SPINLOCK(gmap_notifier_lock); 685 686 /** 687 * gmap_register_ipte_notifier - register a pte invalidation callback 688 * @nb: pointer to the gmap notifier block 689 */ 690 void gmap_register_ipte_notifier(struct gmap_notifier *nb) 691 { 692 spin_lock(&gmap_notifier_lock); 693 list_add(&nb->list, &gmap_notifier_list); 694 spin_unlock(&gmap_notifier_lock); 695 } 696 EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); 697 698 /** 699 * gmap_unregister_ipte_notifier - remove a pte invalidation callback 700 * @nb: pointer to the gmap notifier block 701 */ 702 void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) 703 { 704 spin_lock(&gmap_notifier_lock); 705 list_del_init(&nb->list); 706 spin_unlock(&gmap_notifier_lock); 707 } 708 EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); 709 710 /** 711 * gmap_ipte_notify - mark a range of ptes for invalidation notification 712 * @gmap: pointer to guest mapping meta data structure 713 * @gaddr: virtual address in the guest address space 714 * @len: size of area 715 * 716 * Returns 0 if for each page in the given range a gmap mapping exists and 717 * the invalidation notification could be set. If the gmap mapping is missing 718 * for one or more pages -EFAULT is returned. If no memory could be allocated 719 * -ENOMEM is returned. This function establishes missing page table entries. 720 */ 721 int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) 722 { 723 unsigned long addr; 724 spinlock_t *ptl; 725 pte_t *ptep, entry; 726 pgste_t pgste; 727 int rc = 0; 728 729 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) 730 return -EINVAL; 731 down_read(&gmap->mm->mmap_sem); 732 while (len) { 733 /* Convert gmap address and connect the page tables */ 734 addr = __gmap_translate(gmap, gaddr); 735 if (IS_ERR_VALUE(addr)) { 736 rc = addr; 737 break; 738 } 739 /* Get the page mapped */ 740 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { 741 rc = -EFAULT; 742 break; 743 } 744 rc = __gmap_link(gmap, gaddr, addr); 745 if (rc) 746 break; 747 /* Walk the process page table, lock and get pte pointer */ 748 ptep = get_locked_pte(gmap->mm, addr, &ptl); 749 VM_BUG_ON(!ptep); 750 /* Set notification bit in the pgste of the pte */ 751 entry = *ptep; 752 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { 753 pgste = pgste_get_lock(ptep); 754 pgste_val(pgste) |= PGSTE_IN_BIT; 755 pgste_set_unlock(ptep, pgste); 756 gaddr += PAGE_SIZE; 757 len -= PAGE_SIZE; 758 } 759 pte_unmap_unlock(ptep, ptl); 760 } 761 up_read(&gmap->mm->mmap_sem); 762 return rc; 763 } 764 EXPORT_SYMBOL_GPL(gmap_ipte_notify); 765 766 /** 767 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. 768 * @mm: pointer to the process mm_struct 769 * @addr: virtual address in the process address space 770 * @pte: pointer to the page table entry 771 * 772 * This function is assumed to be called with the page table lock held 773 * for the pte to notify. 774 */ 775 void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) 776 { 777 unsigned long offset, gaddr; 778 unsigned long *table; 779 struct gmap_notifier *nb; 780 struct gmap *gmap; 781 782 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 783 offset = offset * (4096 / sizeof(pte_t)); 784 spin_lock(&gmap_notifier_lock); 785 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 786 table = radix_tree_lookup(&gmap->host_to_guest, 787 vmaddr >> PMD_SHIFT); 788 if (!table) 789 continue; 790 gaddr = __gmap_segment_gaddr(table) + offset; 791 list_for_each_entry(nb, &gmap_notifier_list, list) 792 nb->notifier_call(gmap, gaddr); 793 } 794 spin_unlock(&gmap_notifier_lock); 795 } 796 EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); 797 798 static inline int page_table_with_pgste(struct page *page) 799 { 800 return atomic_read(&page->_mapcount) == 0; 801 } 802 803 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) 804 { 805 struct page *page; 806 unsigned long *table; 807 808 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 809 if (!page) 810 return NULL; 811 if (!pgtable_page_ctor(page)) { 812 __free_page(page); 813 return NULL; 814 } 815 atomic_set(&page->_mapcount, 0); 816 table = (unsigned long *) page_to_phys(page); 817 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 818 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 819 return table; 820 } 821 822 static inline void page_table_free_pgste(unsigned long *table) 823 { 824 struct page *page; 825 826 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 827 pgtable_page_dtor(page); 828 atomic_set(&page->_mapcount, -1); 829 __free_page(page); 830 } 831 832 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 833 unsigned long key, bool nq) 834 { 835 spinlock_t *ptl; 836 pgste_t old, new; 837 pte_t *ptep; 838 839 down_read(&mm->mmap_sem); 840 retry: 841 ptep = get_locked_pte(mm, addr, &ptl); 842 if (unlikely(!ptep)) { 843 up_read(&mm->mmap_sem); 844 return -EFAULT; 845 } 846 if (!(pte_val(*ptep) & _PAGE_INVALID) && 847 (pte_val(*ptep) & _PAGE_PROTECT)) { 848 pte_unmap_unlock(ptep, ptl); 849 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { 850 up_read(&mm->mmap_sem); 851 return -EFAULT; 852 } 853 goto retry; 854 } 855 856 new = old = pgste_get_lock(ptep); 857 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 858 PGSTE_ACC_BITS | PGSTE_FP_BIT); 859 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 860 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 861 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 862 unsigned long address, bits, skey; 863 864 address = pte_val(*ptep) & PAGE_MASK; 865 skey = (unsigned long) page_get_storage_key(address); 866 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 867 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 868 /* Set storage key ACC and FP */ 869 page_set_storage_key(address, skey, !nq); 870 /* Merge host changed & referenced into pgste */ 871 pgste_val(new) |= bits << 52; 872 } 873 /* changing the guest storage key is considered a change of the page */ 874 if ((pgste_val(new) ^ pgste_val(old)) & 875 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 876 pgste_val(new) |= PGSTE_UC_BIT; 877 878 pgste_set_unlock(ptep, new); 879 pte_unmap_unlock(ptep, ptl); 880 up_read(&mm->mmap_sem); 881 return 0; 882 } 883 EXPORT_SYMBOL(set_guest_storage_key); 884 885 unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) 886 { 887 spinlock_t *ptl; 888 pgste_t pgste; 889 pte_t *ptep; 890 uint64_t physaddr; 891 unsigned long key = 0; 892 893 down_read(&mm->mmap_sem); 894 ptep = get_locked_pte(mm, addr, &ptl); 895 if (unlikely(!ptep)) { 896 up_read(&mm->mmap_sem); 897 return -EFAULT; 898 } 899 pgste = pgste_get_lock(ptep); 900 901 if (pte_val(*ptep) & _PAGE_INVALID) { 902 key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; 903 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; 904 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; 905 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; 906 } else { 907 physaddr = pte_val(*ptep) & PAGE_MASK; 908 key = page_get_storage_key(physaddr); 909 910 /* Reflect guest's logical view, not physical */ 911 if (pgste_val(pgste) & PGSTE_GR_BIT) 912 key |= _PAGE_REFERENCED; 913 if (pgste_val(pgste) & PGSTE_GC_BIT) 914 key |= _PAGE_CHANGED; 915 } 916 917 pgste_set_unlock(ptep, pgste); 918 pte_unmap_unlock(ptep, ptl); 919 up_read(&mm->mmap_sem); 920 return key; 921 } 922 EXPORT_SYMBOL(get_guest_storage_key); 923 924 static int page_table_allocate_pgste_min = 0; 925 static int page_table_allocate_pgste_max = 1; 926 int page_table_allocate_pgste = 0; 927 EXPORT_SYMBOL(page_table_allocate_pgste); 928 929 static struct ctl_table page_table_sysctl[] = { 930 { 931 .procname = "allocate_pgste", 932 .data = &page_table_allocate_pgste, 933 .maxlen = sizeof(int), 934 .mode = S_IRUGO | S_IWUSR, 935 .proc_handler = proc_dointvec, 936 .extra1 = &page_table_allocate_pgste_min, 937 .extra2 = &page_table_allocate_pgste_max, 938 }, 939 { } 940 }; 941 942 static struct ctl_table page_table_sysctl_dir[] = { 943 { 944 .procname = "vm", 945 .maxlen = 0, 946 .mode = 0555, 947 .child = page_table_sysctl, 948 }, 949 { } 950 }; 951 952 static int __init page_table_register_sysctl(void) 953 { 954 return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; 955 } 956 __initcall(page_table_register_sysctl); 957 958 #else /* CONFIG_PGSTE */ 959 960 static inline int page_table_with_pgste(struct page *page) 961 { 962 return 0; 963 } 964 965 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) 966 { 967 return NULL; 968 } 969 970 static inline void page_table_free_pgste(unsigned long *table) 971 { 972 } 973 974 static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, 975 unsigned long vmaddr) 976 { 977 } 978 979 #endif /* CONFIG_PGSTE */ 980 981 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 982 { 983 unsigned int old, new; 984 985 do { 986 old = atomic_read(v); 987 new = old ^ bits; 988 } while (atomic_cmpxchg(v, old, new) != old); 989 return new; 990 } 991 992 /* 993 * page table entry allocation/free routines. 994 */ 995 unsigned long *page_table_alloc(struct mm_struct *mm) 996 { 997 unsigned long *uninitialized_var(table); 998 struct page *uninitialized_var(page); 999 unsigned int mask, bit; 1000 1001 if (mm_alloc_pgste(mm)) 1002 return page_table_alloc_pgste(mm); 1003 /* Allocate fragments of a 4K page as 1K/2K page table */ 1004 spin_lock_bh(&mm->context.list_lock); 1005 mask = FRAG_MASK; 1006 if (!list_empty(&mm->context.pgtable_list)) { 1007 page = list_first_entry(&mm->context.pgtable_list, 1008 struct page, lru); 1009 table = (unsigned long *) page_to_phys(page); 1010 mask = atomic_read(&page->_mapcount); 1011 mask = mask | (mask >> 4); 1012 } 1013 if ((mask & FRAG_MASK) == FRAG_MASK) { 1014 spin_unlock_bh(&mm->context.list_lock); 1015 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 1016 if (!page) 1017 return NULL; 1018 if (!pgtable_page_ctor(page)) { 1019 __free_page(page); 1020 return NULL; 1021 } 1022 atomic_set(&page->_mapcount, 1); 1023 table = (unsigned long *) page_to_phys(page); 1024 clear_table(table, _PAGE_INVALID, PAGE_SIZE); 1025 spin_lock_bh(&mm->context.list_lock); 1026 list_add(&page->lru, &mm->context.pgtable_list); 1027 } else { 1028 for (bit = 1; mask & bit; bit <<= 1) 1029 table += PTRS_PER_PTE; 1030 mask = atomic_xor_bits(&page->_mapcount, bit); 1031 if ((mask & FRAG_MASK) == FRAG_MASK) 1032 list_del(&page->lru); 1033 } 1034 spin_unlock_bh(&mm->context.list_lock); 1035 return table; 1036 } 1037 1038 void page_table_free(struct mm_struct *mm, unsigned long *table) 1039 { 1040 struct page *page; 1041 unsigned int bit, mask; 1042 1043 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1044 if (page_table_with_pgste(page)) 1045 return page_table_free_pgste(table); 1046 /* Free 1K/2K page table fragment of a 4K page */ 1047 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 1048 spin_lock_bh(&mm->context.list_lock); 1049 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 1050 list_del(&page->lru); 1051 mask = atomic_xor_bits(&page->_mapcount, bit); 1052 if (mask & FRAG_MASK) 1053 list_add(&page->lru, &mm->context.pgtable_list); 1054 spin_unlock_bh(&mm->context.list_lock); 1055 if (mask == 0) { 1056 pgtable_page_dtor(page); 1057 atomic_set(&page->_mapcount, -1); 1058 __free_page(page); 1059 } 1060 } 1061 1062 static void __page_table_free_rcu(void *table, unsigned bit) 1063 { 1064 struct page *page; 1065 1066 if (bit == FRAG_MASK) 1067 return page_table_free_pgste(table); 1068 /* Free 1K/2K page table fragment of a 4K page */ 1069 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1070 if (atomic_xor_bits(&page->_mapcount, bit) == 0) { 1071 pgtable_page_dtor(page); 1072 atomic_set(&page->_mapcount, -1); 1073 __free_page(page); 1074 } 1075 } 1076 1077 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, 1078 unsigned long vmaddr) 1079 { 1080 struct mm_struct *mm; 1081 struct page *page; 1082 unsigned int bit, mask; 1083 1084 mm = tlb->mm; 1085 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1086 if (page_table_with_pgste(page)) { 1087 gmap_unlink(mm, table, vmaddr); 1088 table = (unsigned long *) (__pa(table) | FRAG_MASK); 1089 tlb_remove_table(tlb, table); 1090 return; 1091 } 1092 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 1093 spin_lock_bh(&mm->context.list_lock); 1094 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 1095 list_del(&page->lru); 1096 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); 1097 if (mask & FRAG_MASK) 1098 list_add_tail(&page->lru, &mm->context.pgtable_list); 1099 spin_unlock_bh(&mm->context.list_lock); 1100 table = (unsigned long *) (__pa(table) | (bit << 4)); 1101 tlb_remove_table(tlb, table); 1102 } 1103 1104 static void __tlb_remove_table(void *_table) 1105 { 1106 const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; 1107 void *table = (void *)((unsigned long) _table & ~mask); 1108 unsigned type = (unsigned long) _table & mask; 1109 1110 if (type) 1111 __page_table_free_rcu(table, type); 1112 else 1113 free_pages((unsigned long) table, ALLOC_ORDER); 1114 } 1115 1116 static void tlb_remove_table_smp_sync(void *arg) 1117 { 1118 /* Simply deliver the interrupt */ 1119 } 1120 1121 static void tlb_remove_table_one(void *table) 1122 { 1123 /* 1124 * This isn't an RCU grace period and hence the page-tables cannot be 1125 * assumed to be actually RCU-freed. 1126 * 1127 * It is however sufficient for software page-table walkers that rely 1128 * on IRQ disabling. See the comment near struct mmu_table_batch. 1129 */ 1130 smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 1131 __tlb_remove_table(table); 1132 } 1133 1134 static void tlb_remove_table_rcu(struct rcu_head *head) 1135 { 1136 struct mmu_table_batch *batch; 1137 int i; 1138 1139 batch = container_of(head, struct mmu_table_batch, rcu); 1140 1141 for (i = 0; i < batch->nr; i++) 1142 __tlb_remove_table(batch->tables[i]); 1143 1144 free_page((unsigned long)batch); 1145 } 1146 1147 void tlb_table_flush(struct mmu_gather *tlb) 1148 { 1149 struct mmu_table_batch **batch = &tlb->batch; 1150 1151 if (*batch) { 1152 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 1153 *batch = NULL; 1154 } 1155 } 1156 1157 void tlb_remove_table(struct mmu_gather *tlb, void *table) 1158 { 1159 struct mmu_table_batch **batch = &tlb->batch; 1160 1161 tlb->mm->context.flush_mm = 1; 1162 if (*batch == NULL) { 1163 *batch = (struct mmu_table_batch *) 1164 __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 1165 if (*batch == NULL) { 1166 __tlb_flush_mm_lazy(tlb->mm); 1167 tlb_remove_table_one(table); 1168 return; 1169 } 1170 (*batch)->nr = 0; 1171 } 1172 (*batch)->tables[(*batch)->nr++] = table; 1173 if ((*batch)->nr == MAX_TABLE_BATCH) 1174 tlb_flush_mmu(tlb); 1175 } 1176 1177 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1178 static inline void thp_split_vma(struct vm_area_struct *vma) 1179 { 1180 unsigned long addr; 1181 1182 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) 1183 follow_page(vma, addr, FOLL_SPLIT); 1184 } 1185 1186 static inline void thp_split_mm(struct mm_struct *mm) 1187 { 1188 struct vm_area_struct *vma; 1189 1190 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 1191 thp_split_vma(vma); 1192 vma->vm_flags &= ~VM_HUGEPAGE; 1193 vma->vm_flags |= VM_NOHUGEPAGE; 1194 } 1195 mm->def_flags |= VM_NOHUGEPAGE; 1196 } 1197 #else 1198 static inline void thp_split_mm(struct mm_struct *mm) 1199 { 1200 } 1201 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1202 1203 /* 1204 * switch on pgstes for its userspace process (for kvm) 1205 */ 1206 int s390_enable_sie(void) 1207 { 1208 struct mm_struct *mm = current->mm; 1209 1210 /* Do we have pgstes? if yes, we are done */ 1211 if (mm_has_pgste(mm)) 1212 return 0; 1213 /* Fail if the page tables are 2K */ 1214 if (!mm_alloc_pgste(mm)) 1215 return -EINVAL; 1216 down_write(&mm->mmap_sem); 1217 mm->context.has_pgste = 1; 1218 /* split thp mappings and disable thp for future mappings */ 1219 thp_split_mm(mm); 1220 up_write(&mm->mmap_sem); 1221 return 0; 1222 } 1223 EXPORT_SYMBOL_GPL(s390_enable_sie); 1224 1225 /* 1226 * Enable storage key handling from now on and initialize the storage 1227 * keys with the default key. 1228 */ 1229 static int __s390_enable_skey(pte_t *pte, unsigned long addr, 1230 unsigned long next, struct mm_walk *walk) 1231 { 1232 unsigned long ptev; 1233 pgste_t pgste; 1234 1235 pgste = pgste_get_lock(pte); 1236 /* 1237 * Remove all zero page mappings, 1238 * after establishing a policy to forbid zero page mappings 1239 * following faults for that page will get fresh anonymous pages 1240 */ 1241 if (is_zero_pfn(pte_pfn(*pte))) { 1242 ptep_flush_direct(walk->mm, addr, pte); 1243 pte_val(*pte) = _PAGE_INVALID; 1244 } 1245 /* Clear storage key */ 1246 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 1247 PGSTE_GR_BIT | PGSTE_GC_BIT); 1248 ptev = pte_val(*pte); 1249 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 1250 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 1251 pgste_set_unlock(pte, pgste); 1252 return 0; 1253 } 1254 1255 int s390_enable_skey(void) 1256 { 1257 struct mm_walk walk = { .pte_entry = __s390_enable_skey }; 1258 struct mm_struct *mm = current->mm; 1259 struct vm_area_struct *vma; 1260 int rc = 0; 1261 1262 down_write(&mm->mmap_sem); 1263 if (mm_use_skey(mm)) 1264 goto out_up; 1265 1266 mm->context.use_skey = 1; 1267 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1268 if (ksm_madvise(vma, vma->vm_start, vma->vm_end, 1269 MADV_UNMERGEABLE, &vma->vm_flags)) { 1270 mm->context.use_skey = 0; 1271 rc = -ENOMEM; 1272 goto out_up; 1273 } 1274 } 1275 mm->def_flags &= ~VM_MERGEABLE; 1276 1277 walk.mm = mm; 1278 walk_page_range(0, TASK_SIZE, &walk); 1279 1280 out_up: 1281 up_write(&mm->mmap_sem); 1282 return rc; 1283 } 1284 EXPORT_SYMBOL_GPL(s390_enable_skey); 1285 1286 /* 1287 * Reset CMMA state, make all pages stable again. 1288 */ 1289 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 1290 unsigned long next, struct mm_walk *walk) 1291 { 1292 pgste_t pgste; 1293 1294 pgste = pgste_get_lock(pte); 1295 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 1296 pgste_set_unlock(pte, pgste); 1297 return 0; 1298 } 1299 1300 void s390_reset_cmma(struct mm_struct *mm) 1301 { 1302 struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; 1303 1304 down_write(&mm->mmap_sem); 1305 walk.mm = mm; 1306 walk_page_range(0, TASK_SIZE, &walk); 1307 up_write(&mm->mmap_sem); 1308 } 1309 EXPORT_SYMBOL_GPL(s390_reset_cmma); 1310 1311 /* 1312 * Test and reset if a guest page is dirty 1313 */ 1314 bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) 1315 { 1316 pte_t *pte; 1317 spinlock_t *ptl; 1318 bool dirty = false; 1319 1320 pte = get_locked_pte(gmap->mm, address, &ptl); 1321 if (unlikely(!pte)) 1322 return false; 1323 1324 if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) 1325 dirty = true; 1326 1327 spin_unlock(ptl); 1328 return dirty; 1329 } 1330 EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); 1331 1332 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1333 int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, 1334 pmd_t *pmdp) 1335 { 1336 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1337 /* No need to flush TLB 1338 * On s390 reference bits are in storage key and never in TLB */ 1339 return pmdp_test_and_clear_young(vma, address, pmdp); 1340 } 1341 1342 int pmdp_set_access_flags(struct vm_area_struct *vma, 1343 unsigned long address, pmd_t *pmdp, 1344 pmd_t entry, int dirty) 1345 { 1346 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1347 1348 entry = pmd_mkyoung(entry); 1349 if (dirty) 1350 entry = pmd_mkdirty(entry); 1351 if (pmd_same(*pmdp, entry)) 1352 return 0; 1353 pmdp_invalidate(vma, address, pmdp); 1354 set_pmd_at(vma->vm_mm, address, pmdp, entry); 1355 return 1; 1356 } 1357 1358 static void pmdp_splitting_flush_sync(void *arg) 1359 { 1360 /* Simply deliver the interrupt */ 1361 } 1362 1363 void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, 1364 pmd_t *pmdp) 1365 { 1366 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1367 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, 1368 (unsigned long *) pmdp)) { 1369 /* need to serialize against gup-fast (IRQ disabled) */ 1370 smp_call_function(pmdp_splitting_flush_sync, NULL, 1); 1371 } 1372 } 1373 1374 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 1375 pgtable_t pgtable) 1376 { 1377 struct list_head *lh = (struct list_head *) pgtable; 1378 1379 assert_spin_locked(pmd_lockptr(mm, pmdp)); 1380 1381 /* FIFO */ 1382 if (!pmd_huge_pte(mm, pmdp)) 1383 INIT_LIST_HEAD(lh); 1384 else 1385 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 1386 pmd_huge_pte(mm, pmdp) = pgtable; 1387 } 1388 1389 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 1390 { 1391 struct list_head *lh; 1392 pgtable_t pgtable; 1393 pte_t *ptep; 1394 1395 assert_spin_locked(pmd_lockptr(mm, pmdp)); 1396 1397 /* FIFO */ 1398 pgtable = pmd_huge_pte(mm, pmdp); 1399 lh = (struct list_head *) pgtable; 1400 if (list_empty(lh)) 1401 pmd_huge_pte(mm, pmdp) = NULL; 1402 else { 1403 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 1404 list_del(lh); 1405 } 1406 ptep = (pte_t *) pgtable; 1407 pte_val(*ptep) = _PAGE_INVALID; 1408 ptep++; 1409 pte_val(*ptep) = _PAGE_INVALID; 1410 return pgtable; 1411 } 1412 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1413