1 /* 2 * KVM guest address space mapping code 3 * 4 * Copyright IBM Corp. 2007, 2016 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/mm.h> 10 #include <linux/swap.h> 11 #include <linux/smp.h> 12 #include <linux/spinlock.h> 13 #include <linux/slab.h> 14 #include <linux/swapops.h> 15 #include <linux/ksm.h> 16 #include <linux/mman.h> 17 18 #include <asm/pgtable.h> 19 #include <asm/pgalloc.h> 20 #include <asm/gmap.h> 21 #include <asm/tlb.h> 22 23 /** 24 * gmap_alloc - allocate a guest address space 25 * @mm: pointer to the parent mm_struct 26 * @limit: maximum address of the gmap address space 27 * 28 * Returns a guest address space structure. 29 */ 30 struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) 31 { 32 struct gmap *gmap; 33 struct page *page; 34 unsigned long *table; 35 unsigned long etype, atype; 36 37 if (limit < (1UL << 31)) { 38 limit = (1UL << 31) - 1; 39 atype = _ASCE_TYPE_SEGMENT; 40 etype = _SEGMENT_ENTRY_EMPTY; 41 } else if (limit < (1UL << 42)) { 42 limit = (1UL << 42) - 1; 43 atype = _ASCE_TYPE_REGION3; 44 etype = _REGION3_ENTRY_EMPTY; 45 } else if (limit < (1UL << 53)) { 46 limit = (1UL << 53) - 1; 47 atype = _ASCE_TYPE_REGION2; 48 etype = _REGION2_ENTRY_EMPTY; 49 } else { 50 limit = -1UL; 51 atype = _ASCE_TYPE_REGION1; 52 etype = _REGION1_ENTRY_EMPTY; 53 } 54 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 55 if (!gmap) 56 goto out; 57 INIT_LIST_HEAD(&gmap->crst_list); 58 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); 59 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); 60 spin_lock_init(&gmap->guest_table_lock); 61 gmap->mm = mm; 62 page = alloc_pages(GFP_KERNEL, 2); 63 if (!page) 64 goto out_free; 65 page->index = 0; 66 list_add(&page->lru, &gmap->crst_list); 67 table = (unsigned long *) page_to_phys(page); 68 crst_table_init(table, etype); 69 gmap->table = table; 70 gmap->asce = atype | _ASCE_TABLE_LENGTH | 71 _ASCE_USER_BITS | __pa(table); 72 gmap->asce_end = limit; 73 down_write(&mm->mmap_sem); 74 list_add(&gmap->list, &mm->context.gmap_list); 75 up_write(&mm->mmap_sem); 76 return gmap; 77 78 out_free: 79 kfree(gmap); 80 out: 81 return NULL; 82 } 83 EXPORT_SYMBOL_GPL(gmap_alloc); 84 85 static void gmap_flush_tlb(struct gmap *gmap) 86 { 87 if (MACHINE_HAS_IDTE) 88 __tlb_flush_asce(gmap->mm, gmap->asce); 89 else 90 __tlb_flush_global(); 91 } 92 93 static void gmap_radix_tree_free(struct radix_tree_root *root) 94 { 95 struct radix_tree_iter iter; 96 unsigned long indices[16]; 97 unsigned long index; 98 void **slot; 99 int i, nr; 100 101 /* A radix tree is freed by deleting all of its entries */ 102 index = 0; 103 do { 104 nr = 0; 105 radix_tree_for_each_slot(slot, root, &iter, index) { 106 indices[nr] = iter.index; 107 if (++nr == 16) 108 break; 109 } 110 for (i = 0; i < nr; i++) { 111 index = indices[i]; 112 radix_tree_delete(root, index); 113 } 114 } while (nr > 0); 115 } 116 117 /** 118 * gmap_free - free a guest address space 119 * @gmap: pointer to the guest address space structure 120 */ 121 void gmap_free(struct gmap *gmap) 122 { 123 struct page *page, *next; 124 125 /* Flush tlb. */ 126 if (MACHINE_HAS_IDTE) 127 __tlb_flush_asce(gmap->mm, gmap->asce); 128 else 129 __tlb_flush_global(); 130 131 /* Free all segment & region tables. */ 132 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) 133 __free_pages(page, 2); 134 gmap_radix_tree_free(&gmap->guest_to_host); 135 gmap_radix_tree_free(&gmap->host_to_guest); 136 down_write(&gmap->mm->mmap_sem); 137 list_del(&gmap->list); 138 up_write(&gmap->mm->mmap_sem); 139 kfree(gmap); 140 } 141 EXPORT_SYMBOL_GPL(gmap_free); 142 143 /** 144 * gmap_enable - switch primary space to the guest address space 145 * @gmap: pointer to the guest address space structure 146 */ 147 void gmap_enable(struct gmap *gmap) 148 { 149 S390_lowcore.gmap = (unsigned long) gmap; 150 } 151 EXPORT_SYMBOL_GPL(gmap_enable); 152 153 /** 154 * gmap_disable - switch back to the standard primary address space 155 * @gmap: pointer to the guest address space structure 156 */ 157 void gmap_disable(struct gmap *gmap) 158 { 159 S390_lowcore.gmap = 0UL; 160 } 161 EXPORT_SYMBOL_GPL(gmap_disable); 162 163 /* 164 * gmap_alloc_table is assumed to be called with mmap_sem held 165 */ 166 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 167 unsigned long init, unsigned long gaddr) 168 { 169 struct page *page; 170 unsigned long *new; 171 172 /* since we dont free the gmap table until gmap_free we can unlock */ 173 page = alloc_pages(GFP_KERNEL, 2); 174 if (!page) 175 return -ENOMEM; 176 new = (unsigned long *) page_to_phys(page); 177 crst_table_init(new, init); 178 spin_lock(&gmap->mm->page_table_lock); 179 if (*table & _REGION_ENTRY_INVALID) { 180 list_add(&page->lru, &gmap->crst_list); 181 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 182 (*table & _REGION_ENTRY_TYPE_MASK); 183 page->index = gaddr; 184 page = NULL; 185 } 186 spin_unlock(&gmap->mm->page_table_lock); 187 if (page) 188 __free_pages(page, 2); 189 return 0; 190 } 191 192 /** 193 * __gmap_segment_gaddr - find virtual address from segment pointer 194 * @entry: pointer to a segment table entry in the guest address space 195 * 196 * Returns the virtual address in the guest address space for the segment 197 */ 198 static unsigned long __gmap_segment_gaddr(unsigned long *entry) 199 { 200 struct page *page; 201 unsigned long offset, mask; 202 203 offset = (unsigned long) entry / sizeof(unsigned long); 204 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; 205 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 206 page = virt_to_page((void *)((unsigned long) entry & mask)); 207 return page->index + offset; 208 } 209 210 /** 211 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 212 * @gmap: pointer to the guest address space structure 213 * @vmaddr: address in the host process address space 214 * 215 * Returns 1 if a TLB flush is required 216 */ 217 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 218 { 219 unsigned long *entry; 220 int flush = 0; 221 222 spin_lock(&gmap->guest_table_lock); 223 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 224 if (entry) { 225 flush = (*entry != _SEGMENT_ENTRY_INVALID); 226 *entry = _SEGMENT_ENTRY_INVALID; 227 } 228 spin_unlock(&gmap->guest_table_lock); 229 return flush; 230 } 231 232 /** 233 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 234 * @gmap: pointer to the guest address space structure 235 * @gaddr: address in the guest address space 236 * 237 * Returns 1 if a TLB flush is required 238 */ 239 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 240 { 241 unsigned long vmaddr; 242 243 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 244 gaddr >> PMD_SHIFT); 245 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 246 } 247 248 /** 249 * gmap_unmap_segment - unmap segment from the guest address space 250 * @gmap: pointer to the guest address space structure 251 * @to: address in the guest address space 252 * @len: length of the memory area to unmap 253 * 254 * Returns 0 if the unmap succeeded, -EINVAL if not. 255 */ 256 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 257 { 258 unsigned long off; 259 int flush; 260 261 if ((to | len) & (PMD_SIZE - 1)) 262 return -EINVAL; 263 if (len == 0 || to + len < to) 264 return -EINVAL; 265 266 flush = 0; 267 down_write(&gmap->mm->mmap_sem); 268 for (off = 0; off < len; off += PMD_SIZE) 269 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 270 up_write(&gmap->mm->mmap_sem); 271 if (flush) 272 gmap_flush_tlb(gmap); 273 return 0; 274 } 275 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 276 277 /** 278 * gmap_map_segment - map a segment to the guest address space 279 * @gmap: pointer to the guest address space structure 280 * @from: source address in the parent address space 281 * @to: target address in the guest address space 282 * @len: length of the memory area to map 283 * 284 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 285 */ 286 int gmap_map_segment(struct gmap *gmap, unsigned long from, 287 unsigned long to, unsigned long len) 288 { 289 unsigned long off; 290 int flush; 291 292 if ((from | to | len) & (PMD_SIZE - 1)) 293 return -EINVAL; 294 if (len == 0 || from + len < from || to + len < to || 295 from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) 296 return -EINVAL; 297 298 flush = 0; 299 down_write(&gmap->mm->mmap_sem); 300 for (off = 0; off < len; off += PMD_SIZE) { 301 /* Remove old translation */ 302 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 303 /* Store new translation */ 304 if (radix_tree_insert(&gmap->guest_to_host, 305 (to + off) >> PMD_SHIFT, 306 (void *) from + off)) 307 break; 308 } 309 up_write(&gmap->mm->mmap_sem); 310 if (flush) 311 gmap_flush_tlb(gmap); 312 if (off >= len) 313 return 0; 314 gmap_unmap_segment(gmap, to, len); 315 return -ENOMEM; 316 } 317 EXPORT_SYMBOL_GPL(gmap_map_segment); 318 319 /** 320 * __gmap_translate - translate a guest address to a user space address 321 * @gmap: pointer to guest mapping meta data structure 322 * @gaddr: guest address 323 * 324 * Returns user space address which corresponds to the guest address or 325 * -EFAULT if no such mapping exists. 326 * This function does not establish potentially missing page table entries. 327 * The mmap_sem of the mm that belongs to the address space must be held 328 * when this function gets called. 329 */ 330 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 331 { 332 unsigned long vmaddr; 333 334 vmaddr = (unsigned long) 335 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 336 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 337 } 338 EXPORT_SYMBOL_GPL(__gmap_translate); 339 340 /** 341 * gmap_translate - translate a guest address to a user space address 342 * @gmap: pointer to guest mapping meta data structure 343 * @gaddr: guest address 344 * 345 * Returns user space address which corresponds to the guest address or 346 * -EFAULT if no such mapping exists. 347 * This function does not establish potentially missing page table entries. 348 */ 349 unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) 350 { 351 unsigned long rc; 352 353 down_read(&gmap->mm->mmap_sem); 354 rc = __gmap_translate(gmap, gaddr); 355 up_read(&gmap->mm->mmap_sem); 356 return rc; 357 } 358 EXPORT_SYMBOL_GPL(gmap_translate); 359 360 /** 361 * gmap_unlink - disconnect a page table from the gmap shadow tables 362 * @gmap: pointer to guest mapping meta data structure 363 * @table: pointer to the host page table 364 * @vmaddr: vm address associated with the host page table 365 */ 366 void gmap_unlink(struct mm_struct *mm, unsigned long *table, 367 unsigned long vmaddr) 368 { 369 struct gmap *gmap; 370 int flush; 371 372 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 373 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 374 if (flush) 375 gmap_flush_tlb(gmap); 376 } 377 } 378 379 /** 380 * gmap_link - set up shadow page tables to connect a host to a guest address 381 * @gmap: pointer to guest mapping meta data structure 382 * @gaddr: guest address 383 * @vmaddr: vm address 384 * 385 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 386 * if the vm address is already mapped to a different guest segment. 387 * The mmap_sem of the mm that belongs to the address space must be held 388 * when this function gets called. 389 */ 390 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 391 { 392 struct mm_struct *mm; 393 unsigned long *table; 394 spinlock_t *ptl; 395 pgd_t *pgd; 396 pud_t *pud; 397 pmd_t *pmd; 398 int rc; 399 400 /* Create higher level tables in the gmap page table */ 401 table = gmap->table; 402 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 403 table += (gaddr >> 53) & 0x7ff; 404 if ((*table & _REGION_ENTRY_INVALID) && 405 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 406 gaddr & 0xffe0000000000000UL)) 407 return -ENOMEM; 408 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 409 } 410 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 411 table += (gaddr >> 42) & 0x7ff; 412 if ((*table & _REGION_ENTRY_INVALID) && 413 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 414 gaddr & 0xfffffc0000000000UL)) 415 return -ENOMEM; 416 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 417 } 418 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 419 table += (gaddr >> 31) & 0x7ff; 420 if ((*table & _REGION_ENTRY_INVALID) && 421 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 422 gaddr & 0xffffffff80000000UL)) 423 return -ENOMEM; 424 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 425 } 426 table += (gaddr >> 20) & 0x7ff; 427 /* Walk the parent mm page table */ 428 mm = gmap->mm; 429 pgd = pgd_offset(mm, vmaddr); 430 VM_BUG_ON(pgd_none(*pgd)); 431 pud = pud_offset(pgd, vmaddr); 432 VM_BUG_ON(pud_none(*pud)); 433 pmd = pmd_offset(pud, vmaddr); 434 VM_BUG_ON(pmd_none(*pmd)); 435 /* large pmds cannot yet be handled */ 436 if (pmd_large(*pmd)) 437 return -EFAULT; 438 /* Link gmap segment table entry location to page table. */ 439 rc = radix_tree_preload(GFP_KERNEL); 440 if (rc) 441 return rc; 442 ptl = pmd_lock(mm, pmd); 443 spin_lock(&gmap->guest_table_lock); 444 if (*table == _SEGMENT_ENTRY_INVALID) { 445 rc = radix_tree_insert(&gmap->host_to_guest, 446 vmaddr >> PMD_SHIFT, table); 447 if (!rc) 448 *table = pmd_val(*pmd); 449 } else 450 rc = 0; 451 spin_unlock(&gmap->guest_table_lock); 452 spin_unlock(ptl); 453 radix_tree_preload_end(); 454 return rc; 455 } 456 457 /** 458 * gmap_fault - resolve a fault on a guest address 459 * @gmap: pointer to guest mapping meta data structure 460 * @gaddr: guest address 461 * @fault_flags: flags to pass down to handle_mm_fault() 462 * 463 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 464 * if the vm address is already mapped to a different guest segment. 465 */ 466 int gmap_fault(struct gmap *gmap, unsigned long gaddr, 467 unsigned int fault_flags) 468 { 469 unsigned long vmaddr; 470 int rc; 471 bool unlocked; 472 473 down_read(&gmap->mm->mmap_sem); 474 475 retry: 476 unlocked = false; 477 vmaddr = __gmap_translate(gmap, gaddr); 478 if (IS_ERR_VALUE(vmaddr)) { 479 rc = vmaddr; 480 goto out_up; 481 } 482 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, 483 &unlocked)) { 484 rc = -EFAULT; 485 goto out_up; 486 } 487 /* 488 * In the case that fixup_user_fault unlocked the mmap_sem during 489 * faultin redo __gmap_translate to not race with a map/unmap_segment. 490 */ 491 if (unlocked) 492 goto retry; 493 494 rc = __gmap_link(gmap, gaddr, vmaddr); 495 out_up: 496 up_read(&gmap->mm->mmap_sem); 497 return rc; 498 } 499 EXPORT_SYMBOL_GPL(gmap_fault); 500 501 /* 502 * this function is assumed to be called with mmap_sem held 503 */ 504 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 505 { 506 unsigned long vmaddr; 507 spinlock_t *ptl; 508 pte_t *ptep; 509 510 /* Find the vm address for the guest address */ 511 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 512 gaddr >> PMD_SHIFT); 513 if (vmaddr) { 514 vmaddr |= gaddr & ~PMD_MASK; 515 /* Get pointer to the page table entry */ 516 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); 517 if (likely(ptep)) 518 ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); 519 pte_unmap_unlock(ptep, ptl); 520 } 521 } 522 EXPORT_SYMBOL_GPL(__gmap_zap); 523 524 void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) 525 { 526 unsigned long gaddr, vmaddr, size; 527 struct vm_area_struct *vma; 528 529 down_read(&gmap->mm->mmap_sem); 530 for (gaddr = from; gaddr < to; 531 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { 532 /* Find the vm address for the guest address */ 533 vmaddr = (unsigned long) 534 radix_tree_lookup(&gmap->guest_to_host, 535 gaddr >> PMD_SHIFT); 536 if (!vmaddr) 537 continue; 538 vmaddr |= gaddr & ~PMD_MASK; 539 /* Find vma in the parent mm */ 540 vma = find_vma(gmap->mm, vmaddr); 541 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); 542 zap_page_range(vma, vmaddr, size, NULL); 543 } 544 up_read(&gmap->mm->mmap_sem); 545 } 546 EXPORT_SYMBOL_GPL(gmap_discard); 547 548 static LIST_HEAD(gmap_notifier_list); 549 static DEFINE_SPINLOCK(gmap_notifier_lock); 550 551 /** 552 * gmap_register_ipte_notifier - register a pte invalidation callback 553 * @nb: pointer to the gmap notifier block 554 */ 555 void gmap_register_ipte_notifier(struct gmap_notifier *nb) 556 { 557 spin_lock(&gmap_notifier_lock); 558 list_add(&nb->list, &gmap_notifier_list); 559 spin_unlock(&gmap_notifier_lock); 560 } 561 EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); 562 563 /** 564 * gmap_unregister_ipte_notifier - remove a pte invalidation callback 565 * @nb: pointer to the gmap notifier block 566 */ 567 void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) 568 { 569 spin_lock(&gmap_notifier_lock); 570 list_del_init(&nb->list); 571 spin_unlock(&gmap_notifier_lock); 572 } 573 EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); 574 575 /** 576 * gmap_ipte_notify - mark a range of ptes for invalidation notification 577 * @gmap: pointer to guest mapping meta data structure 578 * @gaddr: virtual address in the guest address space 579 * @len: size of area 580 * 581 * Returns 0 if for each page in the given range a gmap mapping exists and 582 * the invalidation notification could be set. If the gmap mapping is missing 583 * for one or more pages -EFAULT is returned. If no memory could be allocated 584 * -ENOMEM is returned. This function establishes missing page table entries. 585 */ 586 int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) 587 { 588 unsigned long addr; 589 spinlock_t *ptl; 590 pte_t *ptep; 591 bool unlocked; 592 int rc = 0; 593 594 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) 595 return -EINVAL; 596 down_read(&gmap->mm->mmap_sem); 597 while (len) { 598 unlocked = false; 599 /* Convert gmap address and connect the page tables */ 600 addr = __gmap_translate(gmap, gaddr); 601 if (IS_ERR_VALUE(addr)) { 602 rc = addr; 603 break; 604 } 605 /* Get the page mapped */ 606 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, 607 &unlocked)) { 608 rc = -EFAULT; 609 break; 610 } 611 /* While trying to map mmap_sem got unlocked. Let us retry */ 612 if (unlocked) 613 continue; 614 rc = __gmap_link(gmap, gaddr, addr); 615 if (rc) 616 break; 617 /* Walk the process page table, lock and get pte pointer */ 618 ptep = get_locked_pte(gmap->mm, addr, &ptl); 619 VM_BUG_ON(!ptep); 620 /* Set notification bit in the pgste of the pte */ 621 if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { 622 ptep_set_notify(gmap->mm, addr, ptep); 623 gaddr += PAGE_SIZE; 624 len -= PAGE_SIZE; 625 } 626 pte_unmap_unlock(ptep, ptl); 627 } 628 up_read(&gmap->mm->mmap_sem); 629 return rc; 630 } 631 EXPORT_SYMBOL_GPL(gmap_ipte_notify); 632 633 /** 634 * ptep_notify - call all invalidation callbacks for a specific pte. 635 * @mm: pointer to the process mm_struct 636 * @addr: virtual address in the process address space 637 * @pte: pointer to the page table entry 638 * 639 * This function is assumed to be called with the page table lock held 640 * for the pte to notify. 641 */ 642 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) 643 { 644 unsigned long offset, gaddr; 645 unsigned long *table; 646 struct gmap_notifier *nb; 647 struct gmap *gmap; 648 649 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 650 offset = offset * (4096 / sizeof(pte_t)); 651 spin_lock(&gmap_notifier_lock); 652 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 653 table = radix_tree_lookup(&gmap->host_to_guest, 654 vmaddr >> PMD_SHIFT); 655 if (!table) 656 continue; 657 gaddr = __gmap_segment_gaddr(table) + offset; 658 list_for_each_entry(nb, &gmap_notifier_list, list) 659 nb->notifier_call(gmap, gaddr); 660 } 661 spin_unlock(&gmap_notifier_lock); 662 } 663 EXPORT_SYMBOL_GPL(ptep_notify); 664 665 static inline void thp_split_mm(struct mm_struct *mm) 666 { 667 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 668 struct vm_area_struct *vma; 669 unsigned long addr; 670 671 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 672 for (addr = vma->vm_start; 673 addr < vma->vm_end; 674 addr += PAGE_SIZE) 675 follow_page(vma, addr, FOLL_SPLIT); 676 vma->vm_flags &= ~VM_HUGEPAGE; 677 vma->vm_flags |= VM_NOHUGEPAGE; 678 } 679 mm->def_flags |= VM_NOHUGEPAGE; 680 #endif 681 } 682 683 /* 684 * switch on pgstes for its userspace process (for kvm) 685 */ 686 int s390_enable_sie(void) 687 { 688 struct mm_struct *mm = current->mm; 689 690 /* Do we have pgstes? if yes, we are done */ 691 if (mm_has_pgste(mm)) 692 return 0; 693 /* Fail if the page tables are 2K */ 694 if (!mm_alloc_pgste(mm)) 695 return -EINVAL; 696 down_write(&mm->mmap_sem); 697 mm->context.has_pgste = 1; 698 /* split thp mappings and disable thp for future mappings */ 699 thp_split_mm(mm); 700 up_write(&mm->mmap_sem); 701 return 0; 702 } 703 EXPORT_SYMBOL_GPL(s390_enable_sie); 704 705 /* 706 * Enable storage key handling from now on and initialize the storage 707 * keys with the default key. 708 */ 709 static int __s390_enable_skey(pte_t *pte, unsigned long addr, 710 unsigned long next, struct mm_walk *walk) 711 { 712 /* 713 * Remove all zero page mappings, 714 * after establishing a policy to forbid zero page mappings 715 * following faults for that page will get fresh anonymous pages 716 */ 717 if (is_zero_pfn(pte_pfn(*pte))) 718 ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); 719 /* Clear storage key */ 720 ptep_zap_key(walk->mm, addr, pte); 721 return 0; 722 } 723 724 int s390_enable_skey(void) 725 { 726 struct mm_walk walk = { .pte_entry = __s390_enable_skey }; 727 struct mm_struct *mm = current->mm; 728 struct vm_area_struct *vma; 729 int rc = 0; 730 731 down_write(&mm->mmap_sem); 732 if (mm_use_skey(mm)) 733 goto out_up; 734 735 mm->context.use_skey = 1; 736 for (vma = mm->mmap; vma; vma = vma->vm_next) { 737 if (ksm_madvise(vma, vma->vm_start, vma->vm_end, 738 MADV_UNMERGEABLE, &vma->vm_flags)) { 739 mm->context.use_skey = 0; 740 rc = -ENOMEM; 741 goto out_up; 742 } 743 } 744 mm->def_flags &= ~VM_MERGEABLE; 745 746 walk.mm = mm; 747 walk_page_range(0, TASK_SIZE, &walk); 748 749 out_up: 750 up_write(&mm->mmap_sem); 751 return rc; 752 } 753 EXPORT_SYMBOL_GPL(s390_enable_skey); 754 755 /* 756 * Reset CMMA state, make all pages stable again. 757 */ 758 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 759 unsigned long next, struct mm_walk *walk) 760 { 761 ptep_zap_unused(walk->mm, addr, pte, 1); 762 return 0; 763 } 764 765 void s390_reset_cmma(struct mm_struct *mm) 766 { 767 struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; 768 769 down_write(&mm->mmap_sem); 770 walk.mm = mm; 771 walk_page_range(0, TASK_SIZE, &walk); 772 up_write(&mm->mmap_sem); 773 } 774 EXPORT_SYMBOL_GPL(s390_reset_cmma); 775