1 /* 2 * KVM guest address space mapping code 3 * 4 * Copyright IBM Corp. 2007, 2016 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/mm.h> 10 #include <linux/swap.h> 11 #include <linux/smp.h> 12 #include <linux/spinlock.h> 13 #include <linux/slab.h> 14 #include <linux/swapops.h> 15 #include <linux/ksm.h> 16 #include <linux/mman.h> 17 18 #include <asm/pgtable.h> 19 #include <asm/pgalloc.h> 20 #include <asm/gmap.h> 21 #include <asm/tlb.h> 22 23 /** 24 * gmap_alloc - allocate a guest address space 25 * @mm: pointer to the parent mm_struct 26 * @limit: maximum address of the gmap address space 27 * 28 * Returns a guest address space structure. 29 */ 30 struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) 31 { 32 struct gmap *gmap; 33 struct page *page; 34 unsigned long *table; 35 unsigned long etype, atype; 36 37 if (limit < (1UL << 31)) { 38 limit = (1UL << 31) - 1; 39 atype = _ASCE_TYPE_SEGMENT; 40 etype = _SEGMENT_ENTRY_EMPTY; 41 } else if (limit < (1UL << 42)) { 42 limit = (1UL << 42) - 1; 43 atype = _ASCE_TYPE_REGION3; 44 etype = _REGION3_ENTRY_EMPTY; 45 } else if (limit < (1UL << 53)) { 46 limit = (1UL << 53) - 1; 47 atype = _ASCE_TYPE_REGION2; 48 etype = _REGION2_ENTRY_EMPTY; 49 } else { 50 limit = -1UL; 51 atype = _ASCE_TYPE_REGION1; 52 etype = _REGION1_ENTRY_EMPTY; 53 } 54 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 55 if (!gmap) 56 goto out; 57 INIT_LIST_HEAD(&gmap->crst_list); 58 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); 59 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); 60 spin_lock_init(&gmap->guest_table_lock); 61 gmap->mm = mm; 62 page = alloc_pages(GFP_KERNEL, 2); 63 if (!page) 64 goto out_free; 65 page->index = 0; 66 list_add(&page->lru, &gmap->crst_list); 67 table = (unsigned long *) page_to_phys(page); 68 crst_table_init(table, etype); 69 gmap->table = table; 70 gmap->asce = atype | _ASCE_TABLE_LENGTH | 71 _ASCE_USER_BITS | __pa(table); 72 gmap->asce_end = limit; 73 down_write(&mm->mmap_sem); 74 list_add(&gmap->list, &mm->context.gmap_list); 75 up_write(&mm->mmap_sem); 76 return gmap; 77 78 out_free: 79 kfree(gmap); 80 out: 81 return NULL; 82 } 83 EXPORT_SYMBOL_GPL(gmap_alloc); 84 85 static void gmap_flush_tlb(struct gmap *gmap) 86 { 87 if (MACHINE_HAS_IDTE) 88 __tlb_flush_idte(gmap->asce); 89 else 90 __tlb_flush_global(); 91 } 92 93 static void gmap_radix_tree_free(struct radix_tree_root *root) 94 { 95 struct radix_tree_iter iter; 96 unsigned long indices[16]; 97 unsigned long index; 98 void **slot; 99 int i, nr; 100 101 /* A radix tree is freed by deleting all of its entries */ 102 index = 0; 103 do { 104 nr = 0; 105 radix_tree_for_each_slot(slot, root, &iter, index) { 106 indices[nr] = iter.index; 107 if (++nr == 16) 108 break; 109 } 110 for (i = 0; i < nr; i++) { 111 index = indices[i]; 112 radix_tree_delete(root, index); 113 } 114 } while (nr > 0); 115 } 116 117 /** 118 * gmap_free - free a guest address space 119 * @gmap: pointer to the guest address space structure 120 */ 121 void gmap_free(struct gmap *gmap) 122 { 123 struct page *page, *next; 124 125 /* Flush tlb. */ 126 if (MACHINE_HAS_IDTE) 127 __tlb_flush_idte(gmap->asce); 128 else 129 __tlb_flush_global(); 130 131 /* Free all segment & region tables. */ 132 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) 133 __free_pages(page, 2); 134 gmap_radix_tree_free(&gmap->guest_to_host); 135 gmap_radix_tree_free(&gmap->host_to_guest); 136 down_write(&gmap->mm->mmap_sem); 137 list_del(&gmap->list); 138 up_write(&gmap->mm->mmap_sem); 139 kfree(gmap); 140 } 141 EXPORT_SYMBOL_GPL(gmap_free); 142 143 /** 144 * gmap_enable - switch primary space to the guest address space 145 * @gmap: pointer to the guest address space structure 146 */ 147 void gmap_enable(struct gmap *gmap) 148 { 149 S390_lowcore.gmap = (unsigned long) gmap; 150 } 151 EXPORT_SYMBOL_GPL(gmap_enable); 152 153 /** 154 * gmap_disable - switch back to the standard primary address space 155 * @gmap: pointer to the guest address space structure 156 */ 157 void gmap_disable(struct gmap *gmap) 158 { 159 S390_lowcore.gmap = 0UL; 160 } 161 EXPORT_SYMBOL_GPL(gmap_disable); 162 163 /* 164 * gmap_alloc_table is assumed to be called with mmap_sem held 165 */ 166 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 167 unsigned long init, unsigned long gaddr) 168 { 169 struct page *page; 170 unsigned long *new; 171 172 /* since we dont free the gmap table until gmap_free we can unlock */ 173 page = alloc_pages(GFP_KERNEL, 2); 174 if (!page) 175 return -ENOMEM; 176 new = (unsigned long *) page_to_phys(page); 177 crst_table_init(new, init); 178 spin_lock(&gmap->mm->page_table_lock); 179 if (*table & _REGION_ENTRY_INVALID) { 180 list_add(&page->lru, &gmap->crst_list); 181 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 182 (*table & _REGION_ENTRY_TYPE_MASK); 183 page->index = gaddr; 184 page = NULL; 185 } 186 spin_unlock(&gmap->mm->page_table_lock); 187 if (page) 188 __free_pages(page, 2); 189 return 0; 190 } 191 192 /** 193 * __gmap_segment_gaddr - find virtual address from segment pointer 194 * @entry: pointer to a segment table entry in the guest address space 195 * 196 * Returns the virtual address in the guest address space for the segment 197 */ 198 static unsigned long __gmap_segment_gaddr(unsigned long *entry) 199 { 200 struct page *page; 201 unsigned long offset, mask; 202 203 offset = (unsigned long) entry / sizeof(unsigned long); 204 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; 205 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 206 page = virt_to_page((void *)((unsigned long) entry & mask)); 207 return page->index + offset; 208 } 209 210 /** 211 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 212 * @gmap: pointer to the guest address space structure 213 * @vmaddr: address in the host process address space 214 * 215 * Returns 1 if a TLB flush is required 216 */ 217 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 218 { 219 unsigned long *entry; 220 int flush = 0; 221 222 spin_lock(&gmap->guest_table_lock); 223 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 224 if (entry) { 225 flush = (*entry != _SEGMENT_ENTRY_INVALID); 226 *entry = _SEGMENT_ENTRY_INVALID; 227 } 228 spin_unlock(&gmap->guest_table_lock); 229 return flush; 230 } 231 232 /** 233 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 234 * @gmap: pointer to the guest address space structure 235 * @gaddr: address in the guest address space 236 * 237 * Returns 1 if a TLB flush is required 238 */ 239 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 240 { 241 unsigned long vmaddr; 242 243 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 244 gaddr >> PMD_SHIFT); 245 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 246 } 247 248 /** 249 * gmap_unmap_segment - unmap segment from the guest address space 250 * @gmap: pointer to the guest address space structure 251 * @to: address in the guest address space 252 * @len: length of the memory area to unmap 253 * 254 * Returns 0 if the unmap succeeded, -EINVAL if not. 255 */ 256 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 257 { 258 unsigned long off; 259 int flush; 260 261 if ((to | len) & (PMD_SIZE - 1)) 262 return -EINVAL; 263 if (len == 0 || to + len < to) 264 return -EINVAL; 265 266 flush = 0; 267 down_write(&gmap->mm->mmap_sem); 268 for (off = 0; off < len; off += PMD_SIZE) 269 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 270 up_write(&gmap->mm->mmap_sem); 271 if (flush) 272 gmap_flush_tlb(gmap); 273 return 0; 274 } 275 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 276 277 /** 278 * gmap_map_segment - map a segment to the guest address space 279 * @gmap: pointer to the guest address space structure 280 * @from: source address in the parent address space 281 * @to: target address in the guest address space 282 * @len: length of the memory area to map 283 * 284 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 285 */ 286 int gmap_map_segment(struct gmap *gmap, unsigned long from, 287 unsigned long to, unsigned long len) 288 { 289 unsigned long off; 290 int flush; 291 292 if ((from | to | len) & (PMD_SIZE - 1)) 293 return -EINVAL; 294 if (len == 0 || from + len < from || to + len < to || 295 from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) 296 return -EINVAL; 297 298 flush = 0; 299 down_write(&gmap->mm->mmap_sem); 300 for (off = 0; off < len; off += PMD_SIZE) { 301 /* Remove old translation */ 302 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 303 /* Store new translation */ 304 if (radix_tree_insert(&gmap->guest_to_host, 305 (to + off) >> PMD_SHIFT, 306 (void *) from + off)) 307 break; 308 } 309 up_write(&gmap->mm->mmap_sem); 310 if (flush) 311 gmap_flush_tlb(gmap); 312 if (off >= len) 313 return 0; 314 gmap_unmap_segment(gmap, to, len); 315 return -ENOMEM; 316 } 317 EXPORT_SYMBOL_GPL(gmap_map_segment); 318 319 /** 320 * __gmap_translate - translate a guest address to a user space address 321 * @gmap: pointer to guest mapping meta data structure 322 * @gaddr: guest address 323 * 324 * Returns user space address which corresponds to the guest address or 325 * -EFAULT if no such mapping exists. 326 * This function does not establish potentially missing page table entries. 327 * The mmap_sem of the mm that belongs to the address space must be held 328 * when this function gets called. 329 */ 330 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 331 { 332 unsigned long vmaddr; 333 334 vmaddr = (unsigned long) 335 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 336 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 337 } 338 EXPORT_SYMBOL_GPL(__gmap_translate); 339 340 /** 341 * gmap_translate - translate a guest address to a user space address 342 * @gmap: pointer to guest mapping meta data structure 343 * @gaddr: guest address 344 * 345 * Returns user space address which corresponds to the guest address or 346 * -EFAULT if no such mapping exists. 347 * This function does not establish potentially missing page table entries. 348 */ 349 unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) 350 { 351 unsigned long rc; 352 353 down_read(&gmap->mm->mmap_sem); 354 rc = __gmap_translate(gmap, gaddr); 355 up_read(&gmap->mm->mmap_sem); 356 return rc; 357 } 358 EXPORT_SYMBOL_GPL(gmap_translate); 359 360 /** 361 * gmap_unlink - disconnect a page table from the gmap shadow tables 362 * @gmap: pointer to guest mapping meta data structure 363 * @table: pointer to the host page table 364 * @vmaddr: vm address associated with the host page table 365 */ 366 void gmap_unlink(struct mm_struct *mm, unsigned long *table, 367 unsigned long vmaddr) 368 { 369 struct gmap *gmap; 370 int flush; 371 372 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 373 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 374 if (flush) 375 gmap_flush_tlb(gmap); 376 } 377 } 378 379 /** 380 * gmap_link - set up shadow page tables to connect a host to a guest address 381 * @gmap: pointer to guest mapping meta data structure 382 * @gaddr: guest address 383 * @vmaddr: vm address 384 * 385 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 386 * if the vm address is already mapped to a different guest segment. 387 * The mmap_sem of the mm that belongs to the address space must be held 388 * when this function gets called. 389 */ 390 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 391 { 392 struct mm_struct *mm; 393 unsigned long *table; 394 spinlock_t *ptl; 395 pgd_t *pgd; 396 pud_t *pud; 397 pmd_t *pmd; 398 int rc; 399 400 /* Create higher level tables in the gmap page table */ 401 table = gmap->table; 402 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 403 table += (gaddr >> 53) & 0x7ff; 404 if ((*table & _REGION_ENTRY_INVALID) && 405 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 406 gaddr & 0xffe0000000000000UL)) 407 return -ENOMEM; 408 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 409 } 410 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 411 table += (gaddr >> 42) & 0x7ff; 412 if ((*table & _REGION_ENTRY_INVALID) && 413 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 414 gaddr & 0xfffffc0000000000UL)) 415 return -ENOMEM; 416 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 417 } 418 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 419 table += (gaddr >> 31) & 0x7ff; 420 if ((*table & _REGION_ENTRY_INVALID) && 421 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 422 gaddr & 0xffffffff80000000UL)) 423 return -ENOMEM; 424 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 425 } 426 table += (gaddr >> 20) & 0x7ff; 427 /* Walk the parent mm page table */ 428 mm = gmap->mm; 429 pgd = pgd_offset(mm, vmaddr); 430 VM_BUG_ON(pgd_none(*pgd)); 431 pud = pud_offset(pgd, vmaddr); 432 VM_BUG_ON(pud_none(*pud)); 433 /* large puds cannot yet be handled */ 434 if (pud_large(*pud)) 435 return -EFAULT; 436 pmd = pmd_offset(pud, vmaddr); 437 VM_BUG_ON(pmd_none(*pmd)); 438 /* large pmds cannot yet be handled */ 439 if (pmd_large(*pmd)) 440 return -EFAULT; 441 /* Link gmap segment table entry location to page table. */ 442 rc = radix_tree_preload(GFP_KERNEL); 443 if (rc) 444 return rc; 445 ptl = pmd_lock(mm, pmd); 446 spin_lock(&gmap->guest_table_lock); 447 if (*table == _SEGMENT_ENTRY_INVALID) { 448 rc = radix_tree_insert(&gmap->host_to_guest, 449 vmaddr >> PMD_SHIFT, table); 450 if (!rc) 451 *table = pmd_val(*pmd); 452 } else 453 rc = 0; 454 spin_unlock(&gmap->guest_table_lock); 455 spin_unlock(ptl); 456 radix_tree_preload_end(); 457 return rc; 458 } 459 460 /** 461 * gmap_fault - resolve a fault on a guest address 462 * @gmap: pointer to guest mapping meta data structure 463 * @gaddr: guest address 464 * @fault_flags: flags to pass down to handle_mm_fault() 465 * 466 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 467 * if the vm address is already mapped to a different guest segment. 468 */ 469 int gmap_fault(struct gmap *gmap, unsigned long gaddr, 470 unsigned int fault_flags) 471 { 472 unsigned long vmaddr; 473 int rc; 474 bool unlocked; 475 476 down_read(&gmap->mm->mmap_sem); 477 478 retry: 479 unlocked = false; 480 vmaddr = __gmap_translate(gmap, gaddr); 481 if (IS_ERR_VALUE(vmaddr)) { 482 rc = vmaddr; 483 goto out_up; 484 } 485 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, 486 &unlocked)) { 487 rc = -EFAULT; 488 goto out_up; 489 } 490 /* 491 * In the case that fixup_user_fault unlocked the mmap_sem during 492 * faultin redo __gmap_translate to not race with a map/unmap_segment. 493 */ 494 if (unlocked) 495 goto retry; 496 497 rc = __gmap_link(gmap, gaddr, vmaddr); 498 out_up: 499 up_read(&gmap->mm->mmap_sem); 500 return rc; 501 } 502 EXPORT_SYMBOL_GPL(gmap_fault); 503 504 /* 505 * this function is assumed to be called with mmap_sem held 506 */ 507 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 508 { 509 unsigned long vmaddr; 510 spinlock_t *ptl; 511 pte_t *ptep; 512 513 /* Find the vm address for the guest address */ 514 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 515 gaddr >> PMD_SHIFT); 516 if (vmaddr) { 517 vmaddr |= gaddr & ~PMD_MASK; 518 /* Get pointer to the page table entry */ 519 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); 520 if (likely(ptep)) 521 ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); 522 pte_unmap_unlock(ptep, ptl); 523 } 524 } 525 EXPORT_SYMBOL_GPL(__gmap_zap); 526 527 void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) 528 { 529 unsigned long gaddr, vmaddr, size; 530 struct vm_area_struct *vma; 531 532 down_read(&gmap->mm->mmap_sem); 533 for (gaddr = from; gaddr < to; 534 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { 535 /* Find the vm address for the guest address */ 536 vmaddr = (unsigned long) 537 radix_tree_lookup(&gmap->guest_to_host, 538 gaddr >> PMD_SHIFT); 539 if (!vmaddr) 540 continue; 541 vmaddr |= gaddr & ~PMD_MASK; 542 /* Find vma in the parent mm */ 543 vma = find_vma(gmap->mm, vmaddr); 544 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); 545 zap_page_range(vma, vmaddr, size, NULL); 546 } 547 up_read(&gmap->mm->mmap_sem); 548 } 549 EXPORT_SYMBOL_GPL(gmap_discard); 550 551 static LIST_HEAD(gmap_notifier_list); 552 static DEFINE_SPINLOCK(gmap_notifier_lock); 553 554 /** 555 * gmap_register_ipte_notifier - register a pte invalidation callback 556 * @nb: pointer to the gmap notifier block 557 */ 558 void gmap_register_ipte_notifier(struct gmap_notifier *nb) 559 { 560 spin_lock(&gmap_notifier_lock); 561 list_add(&nb->list, &gmap_notifier_list); 562 spin_unlock(&gmap_notifier_lock); 563 } 564 EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); 565 566 /** 567 * gmap_unregister_ipte_notifier - remove a pte invalidation callback 568 * @nb: pointer to the gmap notifier block 569 */ 570 void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) 571 { 572 spin_lock(&gmap_notifier_lock); 573 list_del_init(&nb->list); 574 spin_unlock(&gmap_notifier_lock); 575 } 576 EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); 577 578 /** 579 * gmap_ipte_notify - mark a range of ptes for invalidation notification 580 * @gmap: pointer to guest mapping meta data structure 581 * @gaddr: virtual address in the guest address space 582 * @len: size of area 583 * 584 * Returns 0 if for each page in the given range a gmap mapping exists and 585 * the invalidation notification could be set. If the gmap mapping is missing 586 * for one or more pages -EFAULT is returned. If no memory could be allocated 587 * -ENOMEM is returned. This function establishes missing page table entries. 588 */ 589 int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) 590 { 591 unsigned long addr; 592 spinlock_t *ptl; 593 pte_t *ptep; 594 bool unlocked; 595 int rc = 0; 596 597 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) 598 return -EINVAL; 599 down_read(&gmap->mm->mmap_sem); 600 while (len) { 601 unlocked = false; 602 /* Convert gmap address and connect the page tables */ 603 addr = __gmap_translate(gmap, gaddr); 604 if (IS_ERR_VALUE(addr)) { 605 rc = addr; 606 break; 607 } 608 /* Get the page mapped */ 609 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, 610 &unlocked)) { 611 rc = -EFAULT; 612 break; 613 } 614 /* While trying to map mmap_sem got unlocked. Let us retry */ 615 if (unlocked) 616 continue; 617 rc = __gmap_link(gmap, gaddr, addr); 618 if (rc) 619 break; 620 /* Walk the process page table, lock and get pte pointer */ 621 ptep = get_locked_pte(gmap->mm, addr, &ptl); 622 VM_BUG_ON(!ptep); 623 /* Set notification bit in the pgste of the pte */ 624 if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { 625 ptep_set_notify(gmap->mm, addr, ptep); 626 gaddr += PAGE_SIZE; 627 len -= PAGE_SIZE; 628 } 629 pte_unmap_unlock(ptep, ptl); 630 } 631 up_read(&gmap->mm->mmap_sem); 632 return rc; 633 } 634 EXPORT_SYMBOL_GPL(gmap_ipte_notify); 635 636 /** 637 * ptep_notify - call all invalidation callbacks for a specific pte. 638 * @mm: pointer to the process mm_struct 639 * @addr: virtual address in the process address space 640 * @pte: pointer to the page table entry 641 * 642 * This function is assumed to be called with the page table lock held 643 * for the pte to notify. 644 */ 645 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) 646 { 647 unsigned long offset, gaddr; 648 unsigned long *table; 649 struct gmap_notifier *nb; 650 struct gmap *gmap; 651 652 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 653 offset = offset * (4096 / sizeof(pte_t)); 654 spin_lock(&gmap_notifier_lock); 655 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 656 table = radix_tree_lookup(&gmap->host_to_guest, 657 vmaddr >> PMD_SHIFT); 658 if (!table) 659 continue; 660 gaddr = __gmap_segment_gaddr(table) + offset; 661 list_for_each_entry(nb, &gmap_notifier_list, list) 662 nb->notifier_call(gmap, gaddr); 663 } 664 spin_unlock(&gmap_notifier_lock); 665 } 666 EXPORT_SYMBOL_GPL(ptep_notify); 667 668 static inline void thp_split_mm(struct mm_struct *mm) 669 { 670 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 671 struct vm_area_struct *vma; 672 unsigned long addr; 673 674 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 675 for (addr = vma->vm_start; 676 addr < vma->vm_end; 677 addr += PAGE_SIZE) 678 follow_page(vma, addr, FOLL_SPLIT); 679 vma->vm_flags &= ~VM_HUGEPAGE; 680 vma->vm_flags |= VM_NOHUGEPAGE; 681 } 682 mm->def_flags |= VM_NOHUGEPAGE; 683 #endif 684 } 685 686 /* 687 * switch on pgstes for its userspace process (for kvm) 688 */ 689 int s390_enable_sie(void) 690 { 691 struct mm_struct *mm = current->mm; 692 693 /* Do we have pgstes? if yes, we are done */ 694 if (mm_has_pgste(mm)) 695 return 0; 696 /* Fail if the page tables are 2K */ 697 if (!mm_alloc_pgste(mm)) 698 return -EINVAL; 699 down_write(&mm->mmap_sem); 700 mm->context.has_pgste = 1; 701 /* split thp mappings and disable thp for future mappings */ 702 thp_split_mm(mm); 703 up_write(&mm->mmap_sem); 704 return 0; 705 } 706 EXPORT_SYMBOL_GPL(s390_enable_sie); 707 708 /* 709 * Enable storage key handling from now on and initialize the storage 710 * keys with the default key. 711 */ 712 static int __s390_enable_skey(pte_t *pte, unsigned long addr, 713 unsigned long next, struct mm_walk *walk) 714 { 715 /* 716 * Remove all zero page mappings, 717 * after establishing a policy to forbid zero page mappings 718 * following faults for that page will get fresh anonymous pages 719 */ 720 if (is_zero_pfn(pte_pfn(*pte))) 721 ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); 722 /* Clear storage key */ 723 ptep_zap_key(walk->mm, addr, pte); 724 return 0; 725 } 726 727 int s390_enable_skey(void) 728 { 729 struct mm_walk walk = { .pte_entry = __s390_enable_skey }; 730 struct mm_struct *mm = current->mm; 731 struct vm_area_struct *vma; 732 int rc = 0; 733 734 down_write(&mm->mmap_sem); 735 if (mm_use_skey(mm)) 736 goto out_up; 737 738 mm->context.use_skey = 1; 739 for (vma = mm->mmap; vma; vma = vma->vm_next) { 740 if (ksm_madvise(vma, vma->vm_start, vma->vm_end, 741 MADV_UNMERGEABLE, &vma->vm_flags)) { 742 mm->context.use_skey = 0; 743 rc = -ENOMEM; 744 goto out_up; 745 } 746 } 747 mm->def_flags &= ~VM_MERGEABLE; 748 749 walk.mm = mm; 750 walk_page_range(0, TASK_SIZE, &walk); 751 752 out_up: 753 up_write(&mm->mmap_sem); 754 return rc; 755 } 756 EXPORT_SYMBOL_GPL(s390_enable_skey); 757 758 /* 759 * Reset CMMA state, make all pages stable again. 760 */ 761 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 762 unsigned long next, struct mm_walk *walk) 763 { 764 ptep_zap_unused(walk->mm, addr, pte, 1); 765 return 0; 766 } 767 768 void s390_reset_cmma(struct mm_struct *mm) 769 { 770 struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; 771 772 down_write(&mm->mmap_sem); 773 walk.mm = mm; 774 walk_page_range(0, TASK_SIZE, &walk); 775 up_write(&mm->mmap_sem); 776 } 777 EXPORT_SYMBOL_GPL(s390_reset_cmma); 778