1 /* 2 * KVM guest address space mapping code 3 * 4 * Copyright IBM Corp. 2007, 2016 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/mm.h> 10 #include <linux/swap.h> 11 #include <linux/smp.h> 12 #include <linux/spinlock.h> 13 #include <linux/slab.h> 14 #include <linux/swapops.h> 15 #include <linux/ksm.h> 16 #include <linux/mman.h> 17 18 #include <asm/pgtable.h> 19 #include <asm/pgalloc.h> 20 #include <asm/gmap.h> 21 #include <asm/tlb.h> 22 23 #define GMAP_SHADOW_FAKE_TABLE 1ULL 24 25 /** 26 * gmap_alloc - allocate and initialize a guest address space 27 * @mm: pointer to the parent mm_struct 28 * @limit: maximum address of the gmap address space 29 * 30 * Returns a guest address space structure. 31 */ 32 static struct gmap *gmap_alloc(unsigned long limit) 33 { 34 struct gmap *gmap; 35 struct page *page; 36 unsigned long *table; 37 unsigned long etype, atype; 38 39 if (limit < (1UL << 31)) { 40 limit = (1UL << 31) - 1; 41 atype = _ASCE_TYPE_SEGMENT; 42 etype = _SEGMENT_ENTRY_EMPTY; 43 } else if (limit < (1UL << 42)) { 44 limit = (1UL << 42) - 1; 45 atype = _ASCE_TYPE_REGION3; 46 etype = _REGION3_ENTRY_EMPTY; 47 } else if (limit < (1UL << 53)) { 48 limit = (1UL << 53) - 1; 49 atype = _ASCE_TYPE_REGION2; 50 etype = _REGION2_ENTRY_EMPTY; 51 } else { 52 limit = -1UL; 53 atype = _ASCE_TYPE_REGION1; 54 etype = _REGION1_ENTRY_EMPTY; 55 } 56 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 57 if (!gmap) 58 goto out; 59 INIT_LIST_HEAD(&gmap->crst_list); 60 INIT_LIST_HEAD(&gmap->children); 61 INIT_LIST_HEAD(&gmap->pt_list); 62 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); 63 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); 64 INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC); 65 spin_lock_init(&gmap->guest_table_lock); 66 spin_lock_init(&gmap->shadow_lock); 67 atomic_set(&gmap->ref_count, 1); 68 page = alloc_pages(GFP_KERNEL, 2); 69 if (!page) 70 goto out_free; 71 page->index = 0; 72 list_add(&page->lru, &gmap->crst_list); 73 table = (unsigned long *) page_to_phys(page); 74 crst_table_init(table, etype); 75 gmap->table = table; 76 gmap->asce = atype | _ASCE_TABLE_LENGTH | 77 _ASCE_USER_BITS | __pa(table); 78 gmap->asce_end = limit; 79 return gmap; 80 81 out_free: 82 kfree(gmap); 83 out: 84 return NULL; 85 } 86 87 /** 88 * gmap_create - create a guest address space 89 * @mm: pointer to the parent mm_struct 90 * @limit: maximum size of the gmap address space 91 * 92 * Returns a guest address space structure. 93 */ 94 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) 95 { 96 struct gmap *gmap; 97 98 gmap = gmap_alloc(limit); 99 if (!gmap) 100 return NULL; 101 gmap->mm = mm; 102 spin_lock(&mm->context.gmap_lock); 103 list_add_rcu(&gmap->list, &mm->context.gmap_list); 104 spin_unlock(&mm->context.gmap_lock); 105 return gmap; 106 } 107 EXPORT_SYMBOL_GPL(gmap_create); 108 109 static void gmap_flush_tlb(struct gmap *gmap) 110 { 111 if (MACHINE_HAS_IDTE) 112 __tlb_flush_idte(gmap->asce); 113 else 114 __tlb_flush_global(); 115 } 116 117 static void gmap_radix_tree_free(struct radix_tree_root *root) 118 { 119 struct radix_tree_iter iter; 120 unsigned long indices[16]; 121 unsigned long index; 122 void **slot; 123 int i, nr; 124 125 /* A radix tree is freed by deleting all of its entries */ 126 index = 0; 127 do { 128 nr = 0; 129 radix_tree_for_each_slot(slot, root, &iter, index) { 130 indices[nr] = iter.index; 131 if (++nr == 16) 132 break; 133 } 134 for (i = 0; i < nr; i++) { 135 index = indices[i]; 136 radix_tree_delete(root, index); 137 } 138 } while (nr > 0); 139 } 140 141 static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) 142 { 143 struct gmap_rmap *rmap, *rnext, *head; 144 struct radix_tree_iter iter; 145 unsigned long indices[16]; 146 unsigned long index; 147 void **slot; 148 int i, nr; 149 150 /* A radix tree is freed by deleting all of its entries */ 151 index = 0; 152 do { 153 nr = 0; 154 radix_tree_for_each_slot(slot, root, &iter, index) { 155 indices[nr] = iter.index; 156 if (++nr == 16) 157 break; 158 } 159 for (i = 0; i < nr; i++) { 160 index = indices[i]; 161 head = radix_tree_delete(root, index); 162 gmap_for_each_rmap_safe(rmap, rnext, head) 163 kfree(rmap); 164 } 165 } while (nr > 0); 166 } 167 168 /** 169 * gmap_free - free a guest address space 170 * @gmap: pointer to the guest address space structure 171 * 172 * No locks required. There are no references to this gmap anymore. 173 */ 174 static void gmap_free(struct gmap *gmap) 175 { 176 struct page *page, *next; 177 178 /* Flush tlb of all gmaps (if not already done for shadows) */ 179 if (!(gmap_is_shadow(gmap) && gmap->removed)) 180 gmap_flush_tlb(gmap); 181 /* Free all segment & region tables. */ 182 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) 183 __free_pages(page, 2); 184 gmap_radix_tree_free(&gmap->guest_to_host); 185 gmap_radix_tree_free(&gmap->host_to_guest); 186 187 /* Free additional data for a shadow gmap */ 188 if (gmap_is_shadow(gmap)) { 189 /* Free all page tables. */ 190 list_for_each_entry_safe(page, next, &gmap->pt_list, lru) 191 page_table_free_pgste(page); 192 gmap_rmap_radix_tree_free(&gmap->host_to_rmap); 193 /* Release reference to the parent */ 194 gmap_put(gmap->parent); 195 } 196 197 kfree(gmap); 198 } 199 200 /** 201 * gmap_get - increase reference counter for guest address space 202 * @gmap: pointer to the guest address space structure 203 * 204 * Returns the gmap pointer 205 */ 206 struct gmap *gmap_get(struct gmap *gmap) 207 { 208 atomic_inc(&gmap->ref_count); 209 return gmap; 210 } 211 EXPORT_SYMBOL_GPL(gmap_get); 212 213 /** 214 * gmap_put - decrease reference counter for guest address space 215 * @gmap: pointer to the guest address space structure 216 * 217 * If the reference counter reaches zero the guest address space is freed. 218 */ 219 void gmap_put(struct gmap *gmap) 220 { 221 if (atomic_dec_return(&gmap->ref_count) == 0) 222 gmap_free(gmap); 223 } 224 EXPORT_SYMBOL_GPL(gmap_put); 225 226 /** 227 * gmap_remove - remove a guest address space but do not free it yet 228 * @gmap: pointer to the guest address space structure 229 */ 230 void gmap_remove(struct gmap *gmap) 231 { 232 struct gmap *sg, *next; 233 234 /* Remove all shadow gmaps linked to this gmap */ 235 if (!list_empty(&gmap->children)) { 236 spin_lock(&gmap->shadow_lock); 237 list_for_each_entry_safe(sg, next, &gmap->children, list) { 238 list_del(&sg->list); 239 gmap_put(sg); 240 } 241 spin_unlock(&gmap->shadow_lock); 242 } 243 /* Remove gmap from the pre-mm list */ 244 spin_lock(&gmap->mm->context.gmap_lock); 245 list_del_rcu(&gmap->list); 246 spin_unlock(&gmap->mm->context.gmap_lock); 247 synchronize_rcu(); 248 /* Put reference */ 249 gmap_put(gmap); 250 } 251 EXPORT_SYMBOL_GPL(gmap_remove); 252 253 /** 254 * gmap_enable - switch primary space to the guest address space 255 * @gmap: pointer to the guest address space structure 256 */ 257 void gmap_enable(struct gmap *gmap) 258 { 259 S390_lowcore.gmap = (unsigned long) gmap; 260 } 261 EXPORT_SYMBOL_GPL(gmap_enable); 262 263 /** 264 * gmap_disable - switch back to the standard primary address space 265 * @gmap: pointer to the guest address space structure 266 */ 267 void gmap_disable(struct gmap *gmap) 268 { 269 S390_lowcore.gmap = 0UL; 270 } 271 EXPORT_SYMBOL_GPL(gmap_disable); 272 273 /** 274 * gmap_get_enabled - get a pointer to the currently enabled gmap 275 * 276 * Returns a pointer to the currently enabled gmap. 0 if none is enabled. 277 */ 278 struct gmap *gmap_get_enabled(void) 279 { 280 return (struct gmap *) S390_lowcore.gmap; 281 } 282 EXPORT_SYMBOL_GPL(gmap_get_enabled); 283 284 /* 285 * gmap_alloc_table is assumed to be called with mmap_sem held 286 */ 287 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 288 unsigned long init, unsigned long gaddr) 289 { 290 struct page *page; 291 unsigned long *new; 292 293 /* since we dont free the gmap table until gmap_free we can unlock */ 294 page = alloc_pages(GFP_KERNEL, 2); 295 if (!page) 296 return -ENOMEM; 297 new = (unsigned long *) page_to_phys(page); 298 crst_table_init(new, init); 299 spin_lock(&gmap->guest_table_lock); 300 if (*table & _REGION_ENTRY_INVALID) { 301 list_add(&page->lru, &gmap->crst_list); 302 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 303 (*table & _REGION_ENTRY_TYPE_MASK); 304 page->index = gaddr; 305 page = NULL; 306 } 307 spin_unlock(&gmap->guest_table_lock); 308 if (page) 309 __free_pages(page, 2); 310 return 0; 311 } 312 313 /** 314 * __gmap_segment_gaddr - find virtual address from segment pointer 315 * @entry: pointer to a segment table entry in the guest address space 316 * 317 * Returns the virtual address in the guest address space for the segment 318 */ 319 static unsigned long __gmap_segment_gaddr(unsigned long *entry) 320 { 321 struct page *page; 322 unsigned long offset, mask; 323 324 offset = (unsigned long) entry / sizeof(unsigned long); 325 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; 326 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 327 page = virt_to_page((void *)((unsigned long) entry & mask)); 328 return page->index + offset; 329 } 330 331 /** 332 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 333 * @gmap: pointer to the guest address space structure 334 * @vmaddr: address in the host process address space 335 * 336 * Returns 1 if a TLB flush is required 337 */ 338 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 339 { 340 unsigned long *entry; 341 int flush = 0; 342 343 BUG_ON(gmap_is_shadow(gmap)); 344 spin_lock(&gmap->guest_table_lock); 345 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 346 if (entry) { 347 flush = (*entry != _SEGMENT_ENTRY_INVALID); 348 *entry = _SEGMENT_ENTRY_INVALID; 349 } 350 spin_unlock(&gmap->guest_table_lock); 351 return flush; 352 } 353 354 /** 355 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 356 * @gmap: pointer to the guest address space structure 357 * @gaddr: address in the guest address space 358 * 359 * Returns 1 if a TLB flush is required 360 */ 361 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 362 { 363 unsigned long vmaddr; 364 365 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 366 gaddr >> PMD_SHIFT); 367 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 368 } 369 370 /** 371 * gmap_unmap_segment - unmap segment from the guest address space 372 * @gmap: pointer to the guest address space structure 373 * @to: address in the guest address space 374 * @len: length of the memory area to unmap 375 * 376 * Returns 0 if the unmap succeeded, -EINVAL if not. 377 */ 378 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 379 { 380 unsigned long off; 381 int flush; 382 383 BUG_ON(gmap_is_shadow(gmap)); 384 if ((to | len) & (PMD_SIZE - 1)) 385 return -EINVAL; 386 if (len == 0 || to + len < to) 387 return -EINVAL; 388 389 flush = 0; 390 down_write(&gmap->mm->mmap_sem); 391 for (off = 0; off < len; off += PMD_SIZE) 392 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 393 up_write(&gmap->mm->mmap_sem); 394 if (flush) 395 gmap_flush_tlb(gmap); 396 return 0; 397 } 398 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 399 400 /** 401 * gmap_map_segment - map a segment to the guest address space 402 * @gmap: pointer to the guest address space structure 403 * @from: source address in the parent address space 404 * @to: target address in the guest address space 405 * @len: length of the memory area to map 406 * 407 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 408 */ 409 int gmap_map_segment(struct gmap *gmap, unsigned long from, 410 unsigned long to, unsigned long len) 411 { 412 unsigned long off; 413 int flush; 414 415 BUG_ON(gmap_is_shadow(gmap)); 416 if ((from | to | len) & (PMD_SIZE - 1)) 417 return -EINVAL; 418 if (len == 0 || from + len < from || to + len < to || 419 from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) 420 return -EINVAL; 421 422 flush = 0; 423 down_write(&gmap->mm->mmap_sem); 424 for (off = 0; off < len; off += PMD_SIZE) { 425 /* Remove old translation */ 426 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 427 /* Store new translation */ 428 if (radix_tree_insert(&gmap->guest_to_host, 429 (to + off) >> PMD_SHIFT, 430 (void *) from + off)) 431 break; 432 } 433 up_write(&gmap->mm->mmap_sem); 434 if (flush) 435 gmap_flush_tlb(gmap); 436 if (off >= len) 437 return 0; 438 gmap_unmap_segment(gmap, to, len); 439 return -ENOMEM; 440 } 441 EXPORT_SYMBOL_GPL(gmap_map_segment); 442 443 /** 444 * __gmap_translate - translate a guest address to a user space address 445 * @gmap: pointer to guest mapping meta data structure 446 * @gaddr: guest address 447 * 448 * Returns user space address which corresponds to the guest address or 449 * -EFAULT if no such mapping exists. 450 * This function does not establish potentially missing page table entries. 451 * The mmap_sem of the mm that belongs to the address space must be held 452 * when this function gets called. 453 * 454 * Note: Can also be called for shadow gmaps. 455 */ 456 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 457 { 458 unsigned long vmaddr; 459 460 vmaddr = (unsigned long) 461 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 462 /* Note: guest_to_host is empty for a shadow gmap */ 463 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 464 } 465 EXPORT_SYMBOL_GPL(__gmap_translate); 466 467 /** 468 * gmap_translate - translate a guest address to a user space address 469 * @gmap: pointer to guest mapping meta data structure 470 * @gaddr: guest address 471 * 472 * Returns user space address which corresponds to the guest address or 473 * -EFAULT if no such mapping exists. 474 * This function does not establish potentially missing page table entries. 475 */ 476 unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) 477 { 478 unsigned long rc; 479 480 down_read(&gmap->mm->mmap_sem); 481 rc = __gmap_translate(gmap, gaddr); 482 up_read(&gmap->mm->mmap_sem); 483 return rc; 484 } 485 EXPORT_SYMBOL_GPL(gmap_translate); 486 487 /** 488 * gmap_unlink - disconnect a page table from the gmap shadow tables 489 * @gmap: pointer to guest mapping meta data structure 490 * @table: pointer to the host page table 491 * @vmaddr: vm address associated with the host page table 492 */ 493 void gmap_unlink(struct mm_struct *mm, unsigned long *table, 494 unsigned long vmaddr) 495 { 496 struct gmap *gmap; 497 int flush; 498 499 rcu_read_lock(); 500 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 501 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 502 if (flush) 503 gmap_flush_tlb(gmap); 504 } 505 rcu_read_unlock(); 506 } 507 508 /** 509 * gmap_link - set up shadow page tables to connect a host to a guest address 510 * @gmap: pointer to guest mapping meta data structure 511 * @gaddr: guest address 512 * @vmaddr: vm address 513 * 514 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 515 * if the vm address is already mapped to a different guest segment. 516 * The mmap_sem of the mm that belongs to the address space must be held 517 * when this function gets called. 518 */ 519 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 520 { 521 struct mm_struct *mm; 522 unsigned long *table; 523 spinlock_t *ptl; 524 pgd_t *pgd; 525 pud_t *pud; 526 pmd_t *pmd; 527 int rc; 528 529 BUG_ON(gmap_is_shadow(gmap)); 530 /* Create higher level tables in the gmap page table */ 531 table = gmap->table; 532 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 533 table += (gaddr >> 53) & 0x7ff; 534 if ((*table & _REGION_ENTRY_INVALID) && 535 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 536 gaddr & 0xffe0000000000000UL)) 537 return -ENOMEM; 538 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 539 } 540 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 541 table += (gaddr >> 42) & 0x7ff; 542 if ((*table & _REGION_ENTRY_INVALID) && 543 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 544 gaddr & 0xfffffc0000000000UL)) 545 return -ENOMEM; 546 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 547 } 548 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 549 table += (gaddr >> 31) & 0x7ff; 550 if ((*table & _REGION_ENTRY_INVALID) && 551 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 552 gaddr & 0xffffffff80000000UL)) 553 return -ENOMEM; 554 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 555 } 556 table += (gaddr >> 20) & 0x7ff; 557 /* Walk the parent mm page table */ 558 mm = gmap->mm; 559 pgd = pgd_offset(mm, vmaddr); 560 VM_BUG_ON(pgd_none(*pgd)); 561 pud = pud_offset(pgd, vmaddr); 562 VM_BUG_ON(pud_none(*pud)); 563 /* large puds cannot yet be handled */ 564 if (pud_large(*pud)) 565 return -EFAULT; 566 pmd = pmd_offset(pud, vmaddr); 567 VM_BUG_ON(pmd_none(*pmd)); 568 /* large pmds cannot yet be handled */ 569 if (pmd_large(*pmd)) 570 return -EFAULT; 571 /* Link gmap segment table entry location to page table. */ 572 rc = radix_tree_preload(GFP_KERNEL); 573 if (rc) 574 return rc; 575 ptl = pmd_lock(mm, pmd); 576 spin_lock(&gmap->guest_table_lock); 577 if (*table == _SEGMENT_ENTRY_INVALID) { 578 rc = radix_tree_insert(&gmap->host_to_guest, 579 vmaddr >> PMD_SHIFT, table); 580 if (!rc) 581 *table = pmd_val(*pmd); 582 } else 583 rc = 0; 584 spin_unlock(&gmap->guest_table_lock); 585 spin_unlock(ptl); 586 radix_tree_preload_end(); 587 return rc; 588 } 589 590 /** 591 * gmap_fault - resolve a fault on a guest address 592 * @gmap: pointer to guest mapping meta data structure 593 * @gaddr: guest address 594 * @fault_flags: flags to pass down to handle_mm_fault() 595 * 596 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 597 * if the vm address is already mapped to a different guest segment. 598 */ 599 int gmap_fault(struct gmap *gmap, unsigned long gaddr, 600 unsigned int fault_flags) 601 { 602 unsigned long vmaddr; 603 int rc; 604 bool unlocked; 605 606 down_read(&gmap->mm->mmap_sem); 607 608 retry: 609 unlocked = false; 610 vmaddr = __gmap_translate(gmap, gaddr); 611 if (IS_ERR_VALUE(vmaddr)) { 612 rc = vmaddr; 613 goto out_up; 614 } 615 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, 616 &unlocked)) { 617 rc = -EFAULT; 618 goto out_up; 619 } 620 /* 621 * In the case that fixup_user_fault unlocked the mmap_sem during 622 * faultin redo __gmap_translate to not race with a map/unmap_segment. 623 */ 624 if (unlocked) 625 goto retry; 626 627 rc = __gmap_link(gmap, gaddr, vmaddr); 628 out_up: 629 up_read(&gmap->mm->mmap_sem); 630 return rc; 631 } 632 EXPORT_SYMBOL_GPL(gmap_fault); 633 634 /* 635 * this function is assumed to be called with mmap_sem held 636 */ 637 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 638 { 639 unsigned long vmaddr; 640 spinlock_t *ptl; 641 pte_t *ptep; 642 643 /* Find the vm address for the guest address */ 644 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 645 gaddr >> PMD_SHIFT); 646 if (vmaddr) { 647 vmaddr |= gaddr & ~PMD_MASK; 648 /* Get pointer to the page table entry */ 649 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); 650 if (likely(ptep)) 651 ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); 652 pte_unmap_unlock(ptep, ptl); 653 } 654 } 655 EXPORT_SYMBOL_GPL(__gmap_zap); 656 657 void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) 658 { 659 unsigned long gaddr, vmaddr, size; 660 struct vm_area_struct *vma; 661 662 down_read(&gmap->mm->mmap_sem); 663 for (gaddr = from; gaddr < to; 664 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { 665 /* Find the vm address for the guest address */ 666 vmaddr = (unsigned long) 667 radix_tree_lookup(&gmap->guest_to_host, 668 gaddr >> PMD_SHIFT); 669 if (!vmaddr) 670 continue; 671 vmaddr |= gaddr & ~PMD_MASK; 672 /* Find vma in the parent mm */ 673 vma = find_vma(gmap->mm, vmaddr); 674 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); 675 zap_page_range(vma, vmaddr, size, NULL); 676 } 677 up_read(&gmap->mm->mmap_sem); 678 } 679 EXPORT_SYMBOL_GPL(gmap_discard); 680 681 static LIST_HEAD(gmap_notifier_list); 682 static DEFINE_SPINLOCK(gmap_notifier_lock); 683 684 /** 685 * gmap_register_pte_notifier - register a pte invalidation callback 686 * @nb: pointer to the gmap notifier block 687 */ 688 void gmap_register_pte_notifier(struct gmap_notifier *nb) 689 { 690 spin_lock(&gmap_notifier_lock); 691 list_add_rcu(&nb->list, &gmap_notifier_list); 692 spin_unlock(&gmap_notifier_lock); 693 } 694 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); 695 696 /** 697 * gmap_unregister_pte_notifier - remove a pte invalidation callback 698 * @nb: pointer to the gmap notifier block 699 */ 700 void gmap_unregister_pte_notifier(struct gmap_notifier *nb) 701 { 702 spin_lock(&gmap_notifier_lock); 703 list_del_rcu(&nb->list); 704 spin_unlock(&gmap_notifier_lock); 705 synchronize_rcu(); 706 } 707 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); 708 709 /** 710 * gmap_call_notifier - call all registered invalidation callbacks 711 * @gmap: pointer to guest mapping meta data structure 712 * @start: start virtual address in the guest address space 713 * @end: end virtual address in the guest address space 714 */ 715 static void gmap_call_notifier(struct gmap *gmap, unsigned long start, 716 unsigned long end) 717 { 718 struct gmap_notifier *nb; 719 720 list_for_each_entry(nb, &gmap_notifier_list, list) 721 nb->notifier_call(gmap, start, end); 722 } 723 724 /** 725 * gmap_table_walk - walk the gmap page tables 726 * @gmap: pointer to guest mapping meta data structure 727 * @gaddr: virtual address in the guest address space 728 * @level: page table level to stop at 729 * 730 * Returns a table entry pointer for the given guest address and @level 731 * @level=0 : returns a pointer to a page table table entry (or NULL) 732 * @level=1 : returns a pointer to a segment table entry (or NULL) 733 * @level=2 : returns a pointer to a region-3 table entry (or NULL) 734 * @level=3 : returns a pointer to a region-2 table entry (or NULL) 735 * @level=4 : returns a pointer to a region-1 table entry (or NULL) 736 * 737 * Returns NULL if the gmap page tables could not be walked to the 738 * requested level. 739 * 740 * Note: Can also be called for shadow gmaps. 741 */ 742 static inline unsigned long *gmap_table_walk(struct gmap *gmap, 743 unsigned long gaddr, int level) 744 { 745 unsigned long *table; 746 747 if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) 748 return NULL; 749 if (gmap_is_shadow(gmap) && gmap->removed) 750 return NULL; 751 if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) 752 return NULL; 753 table = gmap->table; 754 switch (gmap->asce & _ASCE_TYPE_MASK) { 755 case _ASCE_TYPE_REGION1: 756 table += (gaddr >> 53) & 0x7ff; 757 if (level == 4) 758 break; 759 if (*table & _REGION_ENTRY_INVALID) 760 return NULL; 761 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 762 /* Fallthrough */ 763 case _ASCE_TYPE_REGION2: 764 table += (gaddr >> 42) & 0x7ff; 765 if (level == 3) 766 break; 767 if (*table & _REGION_ENTRY_INVALID) 768 return NULL; 769 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 770 /* Fallthrough */ 771 case _ASCE_TYPE_REGION3: 772 table += (gaddr >> 31) & 0x7ff; 773 if (level == 2) 774 break; 775 if (*table & _REGION_ENTRY_INVALID) 776 return NULL; 777 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 778 /* Fallthrough */ 779 case _ASCE_TYPE_SEGMENT: 780 table += (gaddr >> 20) & 0x7ff; 781 if (level == 1) 782 break; 783 if (*table & _REGION_ENTRY_INVALID) 784 return NULL; 785 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); 786 table += (gaddr >> 12) & 0xff; 787 } 788 return table; 789 } 790 791 /** 792 * gmap_pte_op_walk - walk the gmap page table, get the page table lock 793 * and return the pte pointer 794 * @gmap: pointer to guest mapping meta data structure 795 * @gaddr: virtual address in the guest address space 796 * @ptl: pointer to the spinlock pointer 797 * 798 * Returns a pointer to the locked pte for a guest address, or NULL 799 * 800 * Note: Can also be called for shadow gmaps. 801 */ 802 static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, 803 spinlock_t **ptl) 804 { 805 unsigned long *table; 806 807 if (gmap_is_shadow(gmap)) 808 spin_lock(&gmap->guest_table_lock); 809 /* Walk the gmap page table, lock and get pte pointer */ 810 table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ 811 if (!table || *table & _SEGMENT_ENTRY_INVALID) { 812 if (gmap_is_shadow(gmap)) 813 spin_unlock(&gmap->guest_table_lock); 814 return NULL; 815 } 816 if (gmap_is_shadow(gmap)) { 817 *ptl = &gmap->guest_table_lock; 818 return pte_offset_map((pmd_t *) table, gaddr); 819 } 820 return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); 821 } 822 823 /** 824 * gmap_pte_op_fixup - force a page in and connect the gmap page table 825 * @gmap: pointer to guest mapping meta data structure 826 * @gaddr: virtual address in the guest address space 827 * @vmaddr: address in the host process address space 828 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 829 * 830 * Returns 0 if the caller can retry __gmap_translate (might fail again), 831 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing 832 * up or connecting the gmap page table. 833 */ 834 static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, 835 unsigned long vmaddr, int prot) 836 { 837 struct mm_struct *mm = gmap->mm; 838 unsigned int fault_flags; 839 bool unlocked = false; 840 841 BUG_ON(gmap_is_shadow(gmap)); 842 fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 843 if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) 844 return -EFAULT; 845 if (unlocked) 846 /* lost mmap_sem, caller has to retry __gmap_translate */ 847 return 0; 848 /* Connect the page tables */ 849 return __gmap_link(gmap, gaddr, vmaddr); 850 } 851 852 /** 853 * gmap_pte_op_end - release the page table lock 854 * @ptl: pointer to the spinlock pointer 855 */ 856 static void gmap_pte_op_end(spinlock_t *ptl) 857 { 858 spin_unlock(ptl); 859 } 860 861 /* 862 * gmap_protect_range - remove access rights to memory and set pgste bits 863 * @gmap: pointer to guest mapping meta data structure 864 * @gaddr: virtual address in the guest address space 865 * @len: size of area 866 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 867 * @bits: pgste notification bits to set 868 * 869 * Returns 0 if successfully protected, -ENOMEM if out of memory and 870 * -EFAULT if gaddr is invalid (or mapping for shadows is missing). 871 * 872 * Called with sg->mm->mmap_sem in read. 873 * 874 * Note: Can also be called for shadow gmaps. 875 */ 876 static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, 877 unsigned long len, int prot, unsigned long bits) 878 { 879 unsigned long vmaddr; 880 spinlock_t *ptl; 881 pte_t *ptep; 882 int rc; 883 884 while (len) { 885 rc = -EAGAIN; 886 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); 887 if (ptep) { 888 rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); 889 gmap_pte_op_end(ptl); 890 } 891 if (rc) { 892 vmaddr = __gmap_translate(gmap, gaddr); 893 if (IS_ERR_VALUE(vmaddr)) 894 return vmaddr; 895 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); 896 if (rc) 897 return rc; 898 continue; 899 } 900 gaddr += PAGE_SIZE; 901 len -= PAGE_SIZE; 902 } 903 return 0; 904 } 905 906 /** 907 * gmap_mprotect_notify - change access rights for a range of ptes and 908 * call the notifier if any pte changes again 909 * @gmap: pointer to guest mapping meta data structure 910 * @gaddr: virtual address in the guest address space 911 * @len: size of area 912 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 913 * 914 * Returns 0 if for each page in the given range a gmap mapping exists, 915 * the new access rights could be set and the notifier could be armed. 916 * If the gmap mapping is missing for one or more pages -EFAULT is 917 * returned. If no memory could be allocated -ENOMEM is returned. 918 * This function establishes missing page table entries. 919 */ 920 int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, 921 unsigned long len, int prot) 922 { 923 int rc; 924 925 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) 926 return -EINVAL; 927 if (!MACHINE_HAS_ESOP && prot == PROT_READ) 928 return -EINVAL; 929 down_read(&gmap->mm->mmap_sem); 930 rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT); 931 up_read(&gmap->mm->mmap_sem); 932 return rc; 933 } 934 EXPORT_SYMBOL_GPL(gmap_mprotect_notify); 935 936 /** 937 * gmap_read_table - get an unsigned long value from a guest page table using 938 * absolute addressing, without marking the page referenced. 939 * @gmap: pointer to guest mapping meta data structure 940 * @gaddr: virtual address in the guest address space 941 * @val: pointer to the unsigned long value to return 942 * 943 * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT 944 * if reading using the virtual address failed. 945 * 946 * Called with gmap->mm->mmap_sem in read. 947 */ 948 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) 949 { 950 unsigned long address, vmaddr; 951 spinlock_t *ptl; 952 pte_t *ptep, pte; 953 int rc; 954 955 while (1) { 956 rc = -EAGAIN; 957 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); 958 if (ptep) { 959 pte = *ptep; 960 if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { 961 address = pte_val(pte) & PAGE_MASK; 962 address += gaddr & ~PAGE_MASK; 963 *val = *(unsigned long *) address; 964 pte_val(*ptep) |= _PAGE_YOUNG; 965 /* Do *NOT* clear the _PAGE_INVALID bit! */ 966 rc = 0; 967 } 968 gmap_pte_op_end(ptl); 969 } 970 if (!rc) 971 break; 972 vmaddr = __gmap_translate(gmap, gaddr); 973 if (IS_ERR_VALUE(vmaddr)) { 974 rc = vmaddr; 975 break; 976 } 977 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ); 978 if (rc) 979 break; 980 } 981 return rc; 982 } 983 EXPORT_SYMBOL_GPL(gmap_read_table); 984 985 /** 986 * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree 987 * @sg: pointer to the shadow guest address space structure 988 * @vmaddr: vm address associated with the rmap 989 * @rmap: pointer to the rmap structure 990 * 991 * Called with the sg->guest_table_lock 992 */ 993 static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, 994 struct gmap_rmap *rmap) 995 { 996 void **slot; 997 998 BUG_ON(!gmap_is_shadow(sg)); 999 slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1000 if (slot) { 1001 rmap->next = radix_tree_deref_slot_protected(slot, 1002 &sg->guest_table_lock); 1003 radix_tree_replace_slot(slot, rmap); 1004 } else { 1005 rmap->next = NULL; 1006 radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, 1007 rmap); 1008 } 1009 } 1010 1011 /** 1012 * gmap_protect_rmap - modify access rights to memory and create an rmap 1013 * @sg: pointer to the shadow guest address space structure 1014 * @raddr: rmap address in the shadow gmap 1015 * @paddr: address in the parent guest address space 1016 * @len: length of the memory area to protect 1017 * @prot: indicates access rights: none, read-only or read-write 1018 * 1019 * Returns 0 if successfully protected and the rmap was created, -ENOMEM 1020 * if out of memory and -EFAULT if paddr is invalid. 1021 */ 1022 static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, 1023 unsigned long paddr, unsigned long len, int prot) 1024 { 1025 struct gmap *parent; 1026 struct gmap_rmap *rmap; 1027 unsigned long vmaddr; 1028 spinlock_t *ptl; 1029 pte_t *ptep; 1030 int rc; 1031 1032 BUG_ON(!gmap_is_shadow(sg)); 1033 parent = sg->parent; 1034 while (len) { 1035 vmaddr = __gmap_translate(parent, paddr); 1036 if (IS_ERR_VALUE(vmaddr)) 1037 return vmaddr; 1038 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); 1039 if (!rmap) 1040 return -ENOMEM; 1041 rmap->raddr = raddr; 1042 rc = radix_tree_preload(GFP_KERNEL); 1043 if (rc) { 1044 kfree(rmap); 1045 return rc; 1046 } 1047 rc = -EAGAIN; 1048 ptep = gmap_pte_op_walk(parent, paddr, &ptl); 1049 if (ptep) { 1050 spin_lock(&sg->guest_table_lock); 1051 rc = ptep_force_prot(parent->mm, paddr, ptep, prot, 1052 PGSTE_VSIE_BIT); 1053 if (!rc) 1054 gmap_insert_rmap(sg, vmaddr, rmap); 1055 spin_unlock(&sg->guest_table_lock); 1056 gmap_pte_op_end(ptl); 1057 } 1058 radix_tree_preload_end(); 1059 if (rc) { 1060 kfree(rmap); 1061 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); 1062 if (rc) 1063 return rc; 1064 continue; 1065 } 1066 paddr += PAGE_SIZE; 1067 len -= PAGE_SIZE; 1068 } 1069 return 0; 1070 } 1071 1072 #define _SHADOW_RMAP_MASK 0x7 1073 #define _SHADOW_RMAP_REGION1 0x5 1074 #define _SHADOW_RMAP_REGION2 0x4 1075 #define _SHADOW_RMAP_REGION3 0x3 1076 #define _SHADOW_RMAP_SEGMENT 0x2 1077 #define _SHADOW_RMAP_PGTABLE 0x1 1078 1079 /** 1080 * gmap_idte_one - invalidate a single region or segment table entry 1081 * @asce: region or segment table *origin* + table-type bits 1082 * @vaddr: virtual address to identify the table entry to flush 1083 * 1084 * The invalid bit of a single region or segment table entry is set 1085 * and the associated TLB entries depending on the entry are flushed. 1086 * The table-type of the @asce identifies the portion of the @vaddr 1087 * that is used as the invalidation index. 1088 */ 1089 static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) 1090 { 1091 asm volatile( 1092 " .insn rrf,0xb98e0000,%0,%1,0,0" 1093 : : "a" (asce), "a" (vaddr) : "cc", "memory"); 1094 } 1095 1096 /** 1097 * gmap_unshadow_page - remove a page from a shadow page table 1098 * @sg: pointer to the shadow guest address space structure 1099 * @raddr: rmap address in the shadow guest address space 1100 * 1101 * Called with the sg->guest_table_lock 1102 */ 1103 static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) 1104 { 1105 unsigned long *table; 1106 1107 BUG_ON(!gmap_is_shadow(sg)); 1108 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ 1109 if (!table || *table & _PAGE_INVALID) 1110 return; 1111 gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1); 1112 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); 1113 } 1114 1115 /** 1116 * __gmap_unshadow_pgt - remove all entries from a shadow page table 1117 * @sg: pointer to the shadow guest address space structure 1118 * @raddr: rmap address in the shadow guest address space 1119 * @pgt: pointer to the start of a shadow page table 1120 * 1121 * Called with the sg->guest_table_lock 1122 */ 1123 static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, 1124 unsigned long *pgt) 1125 { 1126 int i; 1127 1128 BUG_ON(!gmap_is_shadow(sg)); 1129 for (i = 0; i < 256; i++, raddr += 1UL << 12) 1130 pgt[i] = _PAGE_INVALID; 1131 } 1132 1133 /** 1134 * gmap_unshadow_pgt - remove a shadow page table from a segment entry 1135 * @sg: pointer to the shadow guest address space structure 1136 * @raddr: address in the shadow guest address space 1137 * 1138 * Called with the sg->guest_table_lock 1139 */ 1140 static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) 1141 { 1142 unsigned long sto, *ste, *pgt; 1143 struct page *page; 1144 1145 BUG_ON(!gmap_is_shadow(sg)); 1146 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ 1147 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) 1148 return; 1149 gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1); 1150 sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff)); 1151 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); 1152 pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); 1153 *ste = _SEGMENT_ENTRY_EMPTY; 1154 __gmap_unshadow_pgt(sg, raddr, pgt); 1155 /* Free page table */ 1156 page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); 1157 list_del(&page->lru); 1158 page_table_free_pgste(page); 1159 } 1160 1161 /** 1162 * __gmap_unshadow_sgt - remove all entries from a shadow segment table 1163 * @sg: pointer to the shadow guest address space structure 1164 * @raddr: rmap address in the shadow guest address space 1165 * @sgt: pointer to the start of a shadow segment table 1166 * 1167 * Called with the sg->guest_table_lock 1168 */ 1169 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, 1170 unsigned long *sgt) 1171 { 1172 unsigned long asce, *pgt; 1173 struct page *page; 1174 int i; 1175 1176 BUG_ON(!gmap_is_shadow(sg)); 1177 asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; 1178 for (i = 0; i < 2048; i++, raddr += 1UL << 20) { 1179 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) 1180 continue; 1181 pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); 1182 sgt[i] = _SEGMENT_ENTRY_EMPTY; 1183 __gmap_unshadow_pgt(sg, raddr, pgt); 1184 /* Free page table */ 1185 page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); 1186 list_del(&page->lru); 1187 page_table_free_pgste(page); 1188 } 1189 } 1190 1191 /** 1192 * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry 1193 * @sg: pointer to the shadow guest address space structure 1194 * @raddr: rmap address in the shadow guest address space 1195 * 1196 * Called with the shadow->guest_table_lock 1197 */ 1198 static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) 1199 { 1200 unsigned long r3o, *r3e, *sgt; 1201 struct page *page; 1202 1203 BUG_ON(!gmap_is_shadow(sg)); 1204 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ 1205 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) 1206 return; 1207 gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1); 1208 r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff)); 1209 gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); 1210 sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); 1211 *r3e = _REGION3_ENTRY_EMPTY; 1212 __gmap_unshadow_sgt(sg, raddr, sgt); 1213 /* Free segment table */ 1214 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); 1215 list_del(&page->lru); 1216 __free_pages(page, 2); 1217 } 1218 1219 /** 1220 * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table 1221 * @sg: pointer to the shadow guest address space structure 1222 * @raddr: address in the shadow guest address space 1223 * @r3t: pointer to the start of a shadow region-3 table 1224 * 1225 * Called with the sg->guest_table_lock 1226 */ 1227 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, 1228 unsigned long *r3t) 1229 { 1230 unsigned long asce, *sgt; 1231 struct page *page; 1232 int i; 1233 1234 BUG_ON(!gmap_is_shadow(sg)); 1235 asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; 1236 for (i = 0; i < 2048; i++, raddr += 1UL << 31) { 1237 if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) 1238 continue; 1239 sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); 1240 r3t[i] = _REGION3_ENTRY_EMPTY; 1241 __gmap_unshadow_sgt(sg, raddr, sgt); 1242 /* Free segment table */ 1243 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); 1244 list_del(&page->lru); 1245 __free_pages(page, 2); 1246 } 1247 } 1248 1249 /** 1250 * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry 1251 * @sg: pointer to the shadow guest address space structure 1252 * @raddr: rmap address in the shadow guest address space 1253 * 1254 * Called with the sg->guest_table_lock 1255 */ 1256 static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) 1257 { 1258 unsigned long r2o, *r2e, *r3t; 1259 struct page *page; 1260 1261 BUG_ON(!gmap_is_shadow(sg)); 1262 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ 1263 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) 1264 return; 1265 gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1); 1266 r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff)); 1267 gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); 1268 r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); 1269 *r2e = _REGION2_ENTRY_EMPTY; 1270 __gmap_unshadow_r3t(sg, raddr, r3t); 1271 /* Free region 3 table */ 1272 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); 1273 list_del(&page->lru); 1274 __free_pages(page, 2); 1275 } 1276 1277 /** 1278 * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table 1279 * @sg: pointer to the shadow guest address space structure 1280 * @raddr: rmap address in the shadow guest address space 1281 * @r2t: pointer to the start of a shadow region-2 table 1282 * 1283 * Called with the sg->guest_table_lock 1284 */ 1285 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, 1286 unsigned long *r2t) 1287 { 1288 unsigned long asce, *r3t; 1289 struct page *page; 1290 int i; 1291 1292 BUG_ON(!gmap_is_shadow(sg)); 1293 asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; 1294 for (i = 0; i < 2048; i++, raddr += 1UL << 42) { 1295 if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) 1296 continue; 1297 r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); 1298 r2t[i] = _REGION2_ENTRY_EMPTY; 1299 __gmap_unshadow_r3t(sg, raddr, r3t); 1300 /* Free region 3 table */ 1301 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); 1302 list_del(&page->lru); 1303 __free_pages(page, 2); 1304 } 1305 } 1306 1307 /** 1308 * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry 1309 * @sg: pointer to the shadow guest address space structure 1310 * @raddr: rmap address in the shadow guest address space 1311 * 1312 * Called with the sg->guest_table_lock 1313 */ 1314 static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) 1315 { 1316 unsigned long r1o, *r1e, *r2t; 1317 struct page *page; 1318 1319 BUG_ON(!gmap_is_shadow(sg)); 1320 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ 1321 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) 1322 return; 1323 gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1); 1324 r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff)); 1325 gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); 1326 r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); 1327 *r1e = _REGION1_ENTRY_EMPTY; 1328 __gmap_unshadow_r2t(sg, raddr, r2t); 1329 /* Free region 2 table */ 1330 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); 1331 list_del(&page->lru); 1332 __free_pages(page, 2); 1333 } 1334 1335 /** 1336 * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table 1337 * @sg: pointer to the shadow guest address space structure 1338 * @raddr: rmap address in the shadow guest address space 1339 * @r1t: pointer to the start of a shadow region-1 table 1340 * 1341 * Called with the shadow->guest_table_lock 1342 */ 1343 static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, 1344 unsigned long *r1t) 1345 { 1346 unsigned long asce, *r2t; 1347 struct page *page; 1348 int i; 1349 1350 BUG_ON(!gmap_is_shadow(sg)); 1351 asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; 1352 for (i = 0; i < 2048; i++, raddr += 1UL << 53) { 1353 if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) 1354 continue; 1355 r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); 1356 __gmap_unshadow_r2t(sg, raddr, r2t); 1357 /* Clear entry and flush translation r1t -> r2t */ 1358 gmap_idte_one(asce, raddr); 1359 r1t[i] = _REGION1_ENTRY_EMPTY; 1360 /* Free region 2 table */ 1361 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); 1362 list_del(&page->lru); 1363 __free_pages(page, 2); 1364 } 1365 } 1366 1367 /** 1368 * gmap_unshadow - remove a shadow page table completely 1369 * @sg: pointer to the shadow guest address space structure 1370 * 1371 * Called with sg->guest_table_lock 1372 */ 1373 static void gmap_unshadow(struct gmap *sg) 1374 { 1375 unsigned long *table; 1376 1377 BUG_ON(!gmap_is_shadow(sg)); 1378 if (sg->removed) 1379 return; 1380 sg->removed = 1; 1381 gmap_call_notifier(sg, 0, -1UL); 1382 gmap_flush_tlb(sg); 1383 table = (unsigned long *)(sg->asce & _ASCE_ORIGIN); 1384 switch (sg->asce & _ASCE_TYPE_MASK) { 1385 case _ASCE_TYPE_REGION1: 1386 __gmap_unshadow_r1t(sg, 0, table); 1387 break; 1388 case _ASCE_TYPE_REGION2: 1389 __gmap_unshadow_r2t(sg, 0, table); 1390 break; 1391 case _ASCE_TYPE_REGION3: 1392 __gmap_unshadow_r3t(sg, 0, table); 1393 break; 1394 case _ASCE_TYPE_SEGMENT: 1395 __gmap_unshadow_sgt(sg, 0, table); 1396 break; 1397 } 1398 } 1399 1400 /** 1401 * gmap_find_shadow - find a specific asce in the list of shadow tables 1402 * @parent: pointer to the parent gmap 1403 * @asce: ASCE for which the shadow table is created 1404 * @edat_level: edat level to be used for the shadow translation 1405 * 1406 * Returns the pointer to a gmap if a shadow table with the given asce is 1407 * already available, ERR_PTR(-EAGAIN) if another one is just being created, 1408 * otherwise NULL 1409 */ 1410 static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, 1411 int edat_level) 1412 { 1413 struct gmap *sg; 1414 1415 list_for_each_entry(sg, &parent->children, list) { 1416 if (sg->orig_asce != asce || sg->edat_level != edat_level || 1417 sg->removed) 1418 continue; 1419 if (!sg->initialized) 1420 return ERR_PTR(-EAGAIN); 1421 atomic_inc(&sg->ref_count); 1422 return sg; 1423 } 1424 return NULL; 1425 } 1426 1427 /** 1428 * gmap_shadow_valid - check if a shadow guest address space matches the 1429 * given properties and is still valid 1430 * @sg: pointer to the shadow guest address space structure 1431 * @asce: ASCE for which the shadow table is requested 1432 * @edat_level: edat level to be used for the shadow translation 1433 * 1434 * Returns 1 if the gmap shadow is still valid and matches the given 1435 * properties, the caller can continue using it. Returns 0 otherwise, the 1436 * caller has to request a new shadow gmap in this case. 1437 * 1438 */ 1439 int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) 1440 { 1441 if (sg->removed) 1442 return 0; 1443 return sg->orig_asce == asce && sg->edat_level == edat_level; 1444 } 1445 EXPORT_SYMBOL_GPL(gmap_shadow_valid); 1446 1447 /** 1448 * gmap_shadow - create/find a shadow guest address space 1449 * @parent: pointer to the parent gmap 1450 * @asce: ASCE for which the shadow table is created 1451 * @edat_level: edat level to be used for the shadow translation 1452 * 1453 * The pages of the top level page table referred by the asce parameter 1454 * will be set to read-only and marked in the PGSTEs of the kvm process. 1455 * The shadow table will be removed automatically on any change to the 1456 * PTE mapping for the source table. 1457 * 1458 * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, 1459 * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the 1460 * parent gmap table could not be protected. 1461 */ 1462 struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, 1463 int edat_level) 1464 { 1465 struct gmap *sg, *new; 1466 unsigned long limit; 1467 int rc; 1468 1469 BUG_ON(gmap_is_shadow(parent)); 1470 spin_lock(&parent->shadow_lock); 1471 sg = gmap_find_shadow(parent, asce, edat_level); 1472 spin_unlock(&parent->shadow_lock); 1473 if (sg) 1474 return sg; 1475 /* Create a new shadow gmap */ 1476 limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); 1477 if (asce & _ASCE_REAL_SPACE) 1478 limit = -1UL; 1479 new = gmap_alloc(limit); 1480 if (!new) 1481 return ERR_PTR(-ENOMEM); 1482 new->mm = parent->mm; 1483 new->parent = gmap_get(parent); 1484 new->orig_asce = asce; 1485 new->edat_level = edat_level; 1486 new->initialized = false; 1487 spin_lock(&parent->shadow_lock); 1488 /* Recheck if another CPU created the same shadow */ 1489 sg = gmap_find_shadow(parent, asce, edat_level); 1490 if (sg) { 1491 spin_unlock(&parent->shadow_lock); 1492 gmap_free(new); 1493 return sg; 1494 } 1495 if (asce & _ASCE_REAL_SPACE) { 1496 /* only allow one real-space gmap shadow */ 1497 list_for_each_entry(sg, &parent->children, list) { 1498 if (sg->orig_asce & _ASCE_REAL_SPACE) { 1499 spin_lock(&sg->guest_table_lock); 1500 gmap_unshadow(sg); 1501 spin_unlock(&sg->guest_table_lock); 1502 list_del(&sg->list); 1503 gmap_put(sg); 1504 break; 1505 } 1506 } 1507 } 1508 atomic_set(&new->ref_count, 2); 1509 list_add(&new->list, &parent->children); 1510 if (asce & _ASCE_REAL_SPACE) { 1511 /* nothing to protect, return right away */ 1512 new->initialized = true; 1513 spin_unlock(&parent->shadow_lock); 1514 return new; 1515 } 1516 spin_unlock(&parent->shadow_lock); 1517 /* protect after insertion, so it will get properly invalidated */ 1518 down_read(&parent->mm->mmap_sem); 1519 rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, 1520 ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096, 1521 PROT_READ, PGSTE_VSIE_BIT); 1522 up_read(&parent->mm->mmap_sem); 1523 spin_lock(&parent->shadow_lock); 1524 new->initialized = true; 1525 if (rc) { 1526 list_del(&new->list); 1527 gmap_free(new); 1528 new = ERR_PTR(rc); 1529 } 1530 spin_unlock(&parent->shadow_lock); 1531 return new; 1532 } 1533 EXPORT_SYMBOL_GPL(gmap_shadow); 1534 1535 /** 1536 * gmap_shadow_r2t - create an empty shadow region 2 table 1537 * @sg: pointer to the shadow guest address space structure 1538 * @saddr: faulting address in the shadow gmap 1539 * @r2t: parent gmap address of the region 2 table to get shadowed 1540 * @fake: r2t references contiguous guest memory block, not a r2t 1541 * 1542 * The r2t parameter specifies the address of the source table. The 1543 * four pages of the source table are made read-only in the parent gmap 1544 * address space. A write to the source table area @r2t will automatically 1545 * remove the shadow r2 table and all of its decendents. 1546 * 1547 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1548 * shadow table structure is incomplete, -ENOMEM if out of memory and 1549 * -EFAULT if an address in the parent gmap could not be resolved. 1550 * 1551 * Called with sg->mm->mmap_sem in read. 1552 */ 1553 int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, 1554 int fake) 1555 { 1556 unsigned long raddr, origin, offset, len; 1557 unsigned long *s_r2t, *table; 1558 struct page *page; 1559 int rc; 1560 1561 BUG_ON(!gmap_is_shadow(sg)); 1562 /* Allocate a shadow region second table */ 1563 page = alloc_pages(GFP_KERNEL, 2); 1564 if (!page) 1565 return -ENOMEM; 1566 page->index = r2t & _REGION_ENTRY_ORIGIN; 1567 if (fake) 1568 page->index |= GMAP_SHADOW_FAKE_TABLE; 1569 s_r2t = (unsigned long *) page_to_phys(page); 1570 /* Install shadow region second table */ 1571 spin_lock(&sg->guest_table_lock); 1572 table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ 1573 if (!table) { 1574 rc = -EAGAIN; /* Race with unshadow */ 1575 goto out_free; 1576 } 1577 if (!(*table & _REGION_ENTRY_INVALID)) { 1578 rc = 0; /* Already established */ 1579 goto out_free; 1580 } else if (*table & _REGION_ENTRY_ORIGIN) { 1581 rc = -EAGAIN; /* Race with shadow */ 1582 goto out_free; 1583 } 1584 crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY); 1585 /* mark as invalid as long as the parent table is not protected */ 1586 *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH | 1587 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; 1588 if (sg->edat_level >= 1) 1589 *table |= (r2t & _REGION_ENTRY_PROTECT); 1590 list_add(&page->lru, &sg->crst_list); 1591 if (fake) { 1592 /* nothing to protect for fake tables */ 1593 *table &= ~_REGION_ENTRY_INVALID; 1594 spin_unlock(&sg->guest_table_lock); 1595 return 0; 1596 } 1597 spin_unlock(&sg->guest_table_lock); 1598 /* Make r2t read-only in parent gmap page table */ 1599 raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1; 1600 origin = r2t & _REGION_ENTRY_ORIGIN; 1601 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1602 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1603 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1604 spin_lock(&sg->guest_table_lock); 1605 if (!rc) { 1606 table = gmap_table_walk(sg, saddr, 4); 1607 if (!table || (*table & _REGION_ENTRY_ORIGIN) != 1608 (unsigned long) s_r2t) 1609 rc = -EAGAIN; /* Race with unshadow */ 1610 else 1611 *table &= ~_REGION_ENTRY_INVALID; 1612 } else { 1613 gmap_unshadow_r2t(sg, raddr); 1614 } 1615 spin_unlock(&sg->guest_table_lock); 1616 return rc; 1617 out_free: 1618 spin_unlock(&sg->guest_table_lock); 1619 __free_pages(page, 2); 1620 return rc; 1621 } 1622 EXPORT_SYMBOL_GPL(gmap_shadow_r2t); 1623 1624 /** 1625 * gmap_shadow_r3t - create a shadow region 3 table 1626 * @sg: pointer to the shadow guest address space structure 1627 * @saddr: faulting address in the shadow gmap 1628 * @r3t: parent gmap address of the region 3 table to get shadowed 1629 * @fake: r3t references contiguous guest memory block, not a r3t 1630 * 1631 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1632 * shadow table structure is incomplete, -ENOMEM if out of memory and 1633 * -EFAULT if an address in the parent gmap could not be resolved. 1634 * 1635 * Called with sg->mm->mmap_sem in read. 1636 */ 1637 int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, 1638 int fake) 1639 { 1640 unsigned long raddr, origin, offset, len; 1641 unsigned long *s_r3t, *table; 1642 struct page *page; 1643 int rc; 1644 1645 BUG_ON(!gmap_is_shadow(sg)); 1646 /* Allocate a shadow region second table */ 1647 page = alloc_pages(GFP_KERNEL, 2); 1648 if (!page) 1649 return -ENOMEM; 1650 page->index = r3t & _REGION_ENTRY_ORIGIN; 1651 if (fake) 1652 page->index |= GMAP_SHADOW_FAKE_TABLE; 1653 s_r3t = (unsigned long *) page_to_phys(page); 1654 /* Install shadow region second table */ 1655 spin_lock(&sg->guest_table_lock); 1656 table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ 1657 if (!table) { 1658 rc = -EAGAIN; /* Race with unshadow */ 1659 goto out_free; 1660 } 1661 if (!(*table & _REGION_ENTRY_INVALID)) { 1662 rc = 0; /* Already established */ 1663 goto out_free; 1664 } else if (*table & _REGION_ENTRY_ORIGIN) { 1665 rc = -EAGAIN; /* Race with shadow */ 1666 } 1667 crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); 1668 /* mark as invalid as long as the parent table is not protected */ 1669 *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH | 1670 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; 1671 if (sg->edat_level >= 1) 1672 *table |= (r3t & _REGION_ENTRY_PROTECT); 1673 list_add(&page->lru, &sg->crst_list); 1674 if (fake) { 1675 /* nothing to protect for fake tables */ 1676 *table &= ~_REGION_ENTRY_INVALID; 1677 spin_unlock(&sg->guest_table_lock); 1678 return 0; 1679 } 1680 spin_unlock(&sg->guest_table_lock); 1681 /* Make r3t read-only in parent gmap page table */ 1682 raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2; 1683 origin = r3t & _REGION_ENTRY_ORIGIN; 1684 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1685 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1686 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1687 spin_lock(&sg->guest_table_lock); 1688 if (!rc) { 1689 table = gmap_table_walk(sg, saddr, 3); 1690 if (!table || (*table & _REGION_ENTRY_ORIGIN) != 1691 (unsigned long) s_r3t) 1692 rc = -EAGAIN; /* Race with unshadow */ 1693 else 1694 *table &= ~_REGION_ENTRY_INVALID; 1695 } else { 1696 gmap_unshadow_r3t(sg, raddr); 1697 } 1698 spin_unlock(&sg->guest_table_lock); 1699 return rc; 1700 out_free: 1701 spin_unlock(&sg->guest_table_lock); 1702 __free_pages(page, 2); 1703 return rc; 1704 } 1705 EXPORT_SYMBOL_GPL(gmap_shadow_r3t); 1706 1707 /** 1708 * gmap_shadow_sgt - create a shadow segment table 1709 * @sg: pointer to the shadow guest address space structure 1710 * @saddr: faulting address in the shadow gmap 1711 * @sgt: parent gmap address of the segment table to get shadowed 1712 * @fake: sgt references contiguous guest memory block, not a sgt 1713 * 1714 * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the 1715 * shadow table structure is incomplete, -ENOMEM if out of memory and 1716 * -EFAULT if an address in the parent gmap could not be resolved. 1717 * 1718 * Called with sg->mm->mmap_sem in read. 1719 */ 1720 int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, 1721 int fake) 1722 { 1723 unsigned long raddr, origin, offset, len; 1724 unsigned long *s_sgt, *table; 1725 struct page *page; 1726 int rc; 1727 1728 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); 1729 /* Allocate a shadow segment table */ 1730 page = alloc_pages(GFP_KERNEL, 2); 1731 if (!page) 1732 return -ENOMEM; 1733 page->index = sgt & _REGION_ENTRY_ORIGIN; 1734 if (fake) 1735 page->index |= GMAP_SHADOW_FAKE_TABLE; 1736 s_sgt = (unsigned long *) page_to_phys(page); 1737 /* Install shadow region second table */ 1738 spin_lock(&sg->guest_table_lock); 1739 table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ 1740 if (!table) { 1741 rc = -EAGAIN; /* Race with unshadow */ 1742 goto out_free; 1743 } 1744 if (!(*table & _REGION_ENTRY_INVALID)) { 1745 rc = 0; /* Already established */ 1746 goto out_free; 1747 } else if (*table & _REGION_ENTRY_ORIGIN) { 1748 rc = -EAGAIN; /* Race with shadow */ 1749 goto out_free; 1750 } 1751 crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY); 1752 /* mark as invalid as long as the parent table is not protected */ 1753 *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH | 1754 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; 1755 if (sg->edat_level >= 1) 1756 *table |= sgt & _REGION_ENTRY_PROTECT; 1757 list_add(&page->lru, &sg->crst_list); 1758 if (fake) { 1759 /* nothing to protect for fake tables */ 1760 *table &= ~_REGION_ENTRY_INVALID; 1761 spin_unlock(&sg->guest_table_lock); 1762 return 0; 1763 } 1764 spin_unlock(&sg->guest_table_lock); 1765 /* Make sgt read-only in parent gmap page table */ 1766 raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3; 1767 origin = sgt & _REGION_ENTRY_ORIGIN; 1768 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1769 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1770 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1771 spin_lock(&sg->guest_table_lock); 1772 if (!rc) { 1773 table = gmap_table_walk(sg, saddr, 2); 1774 if (!table || (*table & _REGION_ENTRY_ORIGIN) != 1775 (unsigned long) s_sgt) 1776 rc = -EAGAIN; /* Race with unshadow */ 1777 else 1778 *table &= ~_REGION_ENTRY_INVALID; 1779 } else { 1780 gmap_unshadow_sgt(sg, raddr); 1781 } 1782 spin_unlock(&sg->guest_table_lock); 1783 return rc; 1784 out_free: 1785 spin_unlock(&sg->guest_table_lock); 1786 __free_pages(page, 2); 1787 return rc; 1788 } 1789 EXPORT_SYMBOL_GPL(gmap_shadow_sgt); 1790 1791 /** 1792 * gmap_shadow_lookup_pgtable - find a shadow page table 1793 * @sg: pointer to the shadow guest address space structure 1794 * @saddr: the address in the shadow aguest address space 1795 * @pgt: parent gmap address of the page table to get shadowed 1796 * @dat_protection: if the pgtable is marked as protected by dat 1797 * @fake: pgt references contiguous guest memory block, not a pgtable 1798 * 1799 * Returns 0 if the shadow page table was found and -EAGAIN if the page 1800 * table was not found. 1801 * 1802 * Called with sg->mm->mmap_sem in read. 1803 */ 1804 int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, 1805 unsigned long *pgt, int *dat_protection, 1806 int *fake) 1807 { 1808 unsigned long *table; 1809 struct page *page; 1810 int rc; 1811 1812 BUG_ON(!gmap_is_shadow(sg)); 1813 spin_lock(&sg->guest_table_lock); 1814 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1815 if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { 1816 /* Shadow page tables are full pages (pte+pgste) */ 1817 page = pfn_to_page(*table >> PAGE_SHIFT); 1818 *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; 1819 *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); 1820 *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); 1821 rc = 0; 1822 } else { 1823 rc = -EAGAIN; 1824 } 1825 spin_unlock(&sg->guest_table_lock); 1826 return rc; 1827 1828 } 1829 EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); 1830 1831 /** 1832 * gmap_shadow_pgt - instantiate a shadow page table 1833 * @sg: pointer to the shadow guest address space structure 1834 * @saddr: faulting address in the shadow gmap 1835 * @pgt: parent gmap address of the page table to get shadowed 1836 * @fake: pgt references contiguous guest memory block, not a pgtable 1837 * 1838 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1839 * shadow table structure is incomplete, -ENOMEM if out of memory, 1840 * -EFAULT if an address in the parent gmap could not be resolved and 1841 * 1842 * Called with gmap->mm->mmap_sem in read 1843 */ 1844 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, 1845 int fake) 1846 { 1847 unsigned long raddr, origin; 1848 unsigned long *s_pgt, *table; 1849 struct page *page; 1850 int rc; 1851 1852 BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); 1853 /* Allocate a shadow page table */ 1854 page = page_table_alloc_pgste(sg->mm); 1855 if (!page) 1856 return -ENOMEM; 1857 page->index = pgt & _SEGMENT_ENTRY_ORIGIN; 1858 if (fake) 1859 page->index |= GMAP_SHADOW_FAKE_TABLE; 1860 s_pgt = (unsigned long *) page_to_phys(page); 1861 /* Install shadow page table */ 1862 spin_lock(&sg->guest_table_lock); 1863 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1864 if (!table) { 1865 rc = -EAGAIN; /* Race with unshadow */ 1866 goto out_free; 1867 } 1868 if (!(*table & _SEGMENT_ENTRY_INVALID)) { 1869 rc = 0; /* Already established */ 1870 goto out_free; 1871 } else if (*table & _SEGMENT_ENTRY_ORIGIN) { 1872 rc = -EAGAIN; /* Race with shadow */ 1873 goto out_free; 1874 } 1875 /* mark as invalid as long as the parent table is not protected */ 1876 *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | 1877 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; 1878 list_add(&page->lru, &sg->pt_list); 1879 if (fake) { 1880 /* nothing to protect for fake tables */ 1881 *table &= ~_SEGMENT_ENTRY_INVALID; 1882 spin_unlock(&sg->guest_table_lock); 1883 return 0; 1884 } 1885 spin_unlock(&sg->guest_table_lock); 1886 /* Make pgt read-only in parent gmap page table (not the pgste) */ 1887 raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT; 1888 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; 1889 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); 1890 spin_lock(&sg->guest_table_lock); 1891 if (!rc) { 1892 table = gmap_table_walk(sg, saddr, 1); 1893 if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != 1894 (unsigned long) s_pgt) 1895 rc = -EAGAIN; /* Race with unshadow */ 1896 else 1897 *table &= ~_SEGMENT_ENTRY_INVALID; 1898 } else { 1899 gmap_unshadow_pgt(sg, raddr); 1900 } 1901 spin_unlock(&sg->guest_table_lock); 1902 return rc; 1903 out_free: 1904 spin_unlock(&sg->guest_table_lock); 1905 page_table_free_pgste(page); 1906 return rc; 1907 1908 } 1909 EXPORT_SYMBOL_GPL(gmap_shadow_pgt); 1910 1911 /** 1912 * gmap_shadow_page - create a shadow page mapping 1913 * @sg: pointer to the shadow guest address space structure 1914 * @saddr: faulting address in the shadow gmap 1915 * @pte: pte in parent gmap address space to get shadowed 1916 * 1917 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1918 * shadow table structure is incomplete, -ENOMEM if out of memory and 1919 * -EFAULT if an address in the parent gmap could not be resolved. 1920 * 1921 * Called with sg->mm->mmap_sem in read. 1922 */ 1923 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) 1924 { 1925 struct gmap *parent; 1926 struct gmap_rmap *rmap; 1927 unsigned long vmaddr, paddr; 1928 spinlock_t *ptl; 1929 pte_t *sptep, *tptep; 1930 int prot; 1931 int rc; 1932 1933 BUG_ON(!gmap_is_shadow(sg)); 1934 parent = sg->parent; 1935 prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; 1936 1937 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); 1938 if (!rmap) 1939 return -ENOMEM; 1940 rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; 1941 1942 while (1) { 1943 paddr = pte_val(pte) & PAGE_MASK; 1944 vmaddr = __gmap_translate(parent, paddr); 1945 if (IS_ERR_VALUE(vmaddr)) { 1946 rc = vmaddr; 1947 break; 1948 } 1949 rc = radix_tree_preload(GFP_KERNEL); 1950 if (rc) 1951 break; 1952 rc = -EAGAIN; 1953 sptep = gmap_pte_op_walk(parent, paddr, &ptl); 1954 if (sptep) { 1955 spin_lock(&sg->guest_table_lock); 1956 /* Get page table pointer */ 1957 tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); 1958 if (!tptep) { 1959 spin_unlock(&sg->guest_table_lock); 1960 gmap_pte_op_end(ptl); 1961 radix_tree_preload_end(); 1962 break; 1963 } 1964 rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte); 1965 if (rc > 0) { 1966 /* Success and a new mapping */ 1967 gmap_insert_rmap(sg, vmaddr, rmap); 1968 rmap = NULL; 1969 rc = 0; 1970 } 1971 gmap_pte_op_end(ptl); 1972 spin_unlock(&sg->guest_table_lock); 1973 } 1974 radix_tree_preload_end(); 1975 if (!rc) 1976 break; 1977 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); 1978 if (rc) 1979 break; 1980 } 1981 kfree(rmap); 1982 return rc; 1983 } 1984 EXPORT_SYMBOL_GPL(gmap_shadow_page); 1985 1986 /** 1987 * gmap_shadow_notify - handle notifications for shadow gmap 1988 * 1989 * Called with sg->parent->shadow_lock. 1990 */ 1991 static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, 1992 unsigned long offset, pte_t *pte) 1993 { 1994 struct gmap_rmap *rmap, *rnext, *head; 1995 unsigned long gaddr, start, end, bits, raddr; 1996 unsigned long *table; 1997 1998 BUG_ON(!gmap_is_shadow(sg)); 1999 spin_lock(&sg->parent->guest_table_lock); 2000 table = radix_tree_lookup(&sg->parent->host_to_guest, 2001 vmaddr >> PMD_SHIFT); 2002 gaddr = table ? __gmap_segment_gaddr(table) + offset : 0; 2003 spin_unlock(&sg->parent->guest_table_lock); 2004 if (!table) 2005 return; 2006 2007 spin_lock(&sg->guest_table_lock); 2008 if (sg->removed) { 2009 spin_unlock(&sg->guest_table_lock); 2010 return; 2011 } 2012 /* Check for top level table */ 2013 start = sg->orig_asce & _ASCE_ORIGIN; 2014 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096; 2015 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && 2016 gaddr < end) { 2017 /* The complete shadow table has to go */ 2018 gmap_unshadow(sg); 2019 spin_unlock(&sg->guest_table_lock); 2020 list_del(&sg->list); 2021 gmap_put(sg); 2022 return; 2023 } 2024 /* Remove the page table tree from on specific entry */ 2025 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12); 2026 gmap_for_each_rmap_safe(rmap, rnext, head) { 2027 bits = rmap->raddr & _SHADOW_RMAP_MASK; 2028 raddr = rmap->raddr ^ bits; 2029 switch (bits) { 2030 case _SHADOW_RMAP_REGION1: 2031 gmap_unshadow_r2t(sg, raddr); 2032 break; 2033 case _SHADOW_RMAP_REGION2: 2034 gmap_unshadow_r3t(sg, raddr); 2035 break; 2036 case _SHADOW_RMAP_REGION3: 2037 gmap_unshadow_sgt(sg, raddr); 2038 break; 2039 case _SHADOW_RMAP_SEGMENT: 2040 gmap_unshadow_pgt(sg, raddr); 2041 break; 2042 case _SHADOW_RMAP_PGTABLE: 2043 gmap_unshadow_page(sg, raddr); 2044 break; 2045 } 2046 kfree(rmap); 2047 } 2048 spin_unlock(&sg->guest_table_lock); 2049 } 2050 2051 /** 2052 * ptep_notify - call all invalidation callbacks for a specific pte. 2053 * @mm: pointer to the process mm_struct 2054 * @addr: virtual address in the process address space 2055 * @pte: pointer to the page table entry 2056 * @bits: bits from the pgste that caused the notify call 2057 * 2058 * This function is assumed to be called with the page table lock held 2059 * for the pte to notify. 2060 */ 2061 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, 2062 pte_t *pte, unsigned long bits) 2063 { 2064 unsigned long offset, gaddr; 2065 unsigned long *table; 2066 struct gmap *gmap, *sg, *next; 2067 2068 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 2069 offset = offset * (4096 / sizeof(pte_t)); 2070 rcu_read_lock(); 2071 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2072 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { 2073 spin_lock(&gmap->shadow_lock); 2074 list_for_each_entry_safe(sg, next, 2075 &gmap->children, list) 2076 gmap_shadow_notify(sg, vmaddr, offset, pte); 2077 spin_unlock(&gmap->shadow_lock); 2078 } 2079 if (!(bits & PGSTE_IN_BIT)) 2080 continue; 2081 spin_lock(&gmap->guest_table_lock); 2082 table = radix_tree_lookup(&gmap->host_to_guest, 2083 vmaddr >> PMD_SHIFT); 2084 if (table) 2085 gaddr = __gmap_segment_gaddr(table) + offset; 2086 spin_unlock(&gmap->guest_table_lock); 2087 if (table) 2088 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); 2089 } 2090 rcu_read_unlock(); 2091 } 2092 EXPORT_SYMBOL_GPL(ptep_notify); 2093 2094 static inline void thp_split_mm(struct mm_struct *mm) 2095 { 2096 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2097 struct vm_area_struct *vma; 2098 unsigned long addr; 2099 2100 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 2101 for (addr = vma->vm_start; 2102 addr < vma->vm_end; 2103 addr += PAGE_SIZE) 2104 follow_page(vma, addr, FOLL_SPLIT); 2105 vma->vm_flags &= ~VM_HUGEPAGE; 2106 vma->vm_flags |= VM_NOHUGEPAGE; 2107 } 2108 mm->def_flags |= VM_NOHUGEPAGE; 2109 #endif 2110 } 2111 2112 /* 2113 * switch on pgstes for its userspace process (for kvm) 2114 */ 2115 int s390_enable_sie(void) 2116 { 2117 struct mm_struct *mm = current->mm; 2118 2119 /* Do we have pgstes? if yes, we are done */ 2120 if (mm_has_pgste(mm)) 2121 return 0; 2122 /* Fail if the page tables are 2K */ 2123 if (!mm_alloc_pgste(mm)) 2124 return -EINVAL; 2125 down_write(&mm->mmap_sem); 2126 mm->context.has_pgste = 1; 2127 /* split thp mappings and disable thp for future mappings */ 2128 thp_split_mm(mm); 2129 up_write(&mm->mmap_sem); 2130 return 0; 2131 } 2132 EXPORT_SYMBOL_GPL(s390_enable_sie); 2133 2134 /* 2135 * Enable storage key handling from now on and initialize the storage 2136 * keys with the default key. 2137 */ 2138 static int __s390_enable_skey(pte_t *pte, unsigned long addr, 2139 unsigned long next, struct mm_walk *walk) 2140 { 2141 /* 2142 * Remove all zero page mappings, 2143 * after establishing a policy to forbid zero page mappings 2144 * following faults for that page will get fresh anonymous pages 2145 */ 2146 if (is_zero_pfn(pte_pfn(*pte))) 2147 ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); 2148 /* Clear storage key */ 2149 ptep_zap_key(walk->mm, addr, pte); 2150 return 0; 2151 } 2152 2153 int s390_enable_skey(void) 2154 { 2155 struct mm_walk walk = { .pte_entry = __s390_enable_skey }; 2156 struct mm_struct *mm = current->mm; 2157 struct vm_area_struct *vma; 2158 int rc = 0; 2159 2160 down_write(&mm->mmap_sem); 2161 if (mm_use_skey(mm)) 2162 goto out_up; 2163 2164 mm->context.use_skey = 1; 2165 for (vma = mm->mmap; vma; vma = vma->vm_next) { 2166 if (ksm_madvise(vma, vma->vm_start, vma->vm_end, 2167 MADV_UNMERGEABLE, &vma->vm_flags)) { 2168 mm->context.use_skey = 0; 2169 rc = -ENOMEM; 2170 goto out_up; 2171 } 2172 } 2173 mm->def_flags &= ~VM_MERGEABLE; 2174 2175 walk.mm = mm; 2176 walk_page_range(0, TASK_SIZE, &walk); 2177 2178 out_up: 2179 up_write(&mm->mmap_sem); 2180 return rc; 2181 } 2182 EXPORT_SYMBOL_GPL(s390_enable_skey); 2183 2184 /* 2185 * Reset CMMA state, make all pages stable again. 2186 */ 2187 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 2188 unsigned long next, struct mm_walk *walk) 2189 { 2190 ptep_zap_unused(walk->mm, addr, pte, 1); 2191 return 0; 2192 } 2193 2194 void s390_reset_cmma(struct mm_struct *mm) 2195 { 2196 struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; 2197 2198 down_write(&mm->mmap_sem); 2199 walk.mm = mm; 2200 walk_page_range(0, TASK_SIZE, &walk); 2201 up_write(&mm->mmap_sem); 2202 } 2203 EXPORT_SYMBOL_GPL(s390_reset_cmma); 2204