1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common Ultravisor functions and initialization 4 * 5 * Copyright IBM Corp. 2019, 2020 6 */ 7 #define KMSG_COMPONENT "prot_virt" 8 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9 10 #include <linux/kernel.h> 11 #include <linux/types.h> 12 #include <linux/sizes.h> 13 #include <linux/bitmap.h> 14 #include <linux/memblock.h> 15 #include <linux/pagemap.h> 16 #include <linux/swap.h> 17 #include <asm/facility.h> 18 #include <asm/sections.h> 19 #include <asm/uv.h> 20 21 /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ 22 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST 23 int __bootdata_preserved(prot_virt_guest); 24 #endif 25 26 struct uv_info __bootdata_preserved(uv_info); 27 28 #if IS_ENABLED(CONFIG_KVM) 29 int __bootdata_preserved(prot_virt_host); 30 EXPORT_SYMBOL(prot_virt_host); 31 EXPORT_SYMBOL(uv_info); 32 33 static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len) 34 { 35 struct uv_cb_init uvcb = { 36 .header.cmd = UVC_CMD_INIT_UV, 37 .header.len = sizeof(uvcb), 38 .stor_origin = stor_base, 39 .stor_len = stor_len, 40 }; 41 42 if (uv_call(0, (uint64_t)&uvcb)) { 43 pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n", 44 uvcb.header.rc, uvcb.header.rrc); 45 return -1; 46 } 47 return 0; 48 } 49 50 void __init setup_uv(void) 51 { 52 void *uv_stor_base; 53 54 if (!is_prot_virt_host()) 55 return; 56 57 uv_stor_base = memblock_alloc_try_nid( 58 uv_info.uv_base_stor_len, SZ_1M, SZ_2G, 59 MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); 60 if (!uv_stor_base) { 61 pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n", 62 uv_info.uv_base_stor_len); 63 goto fail; 64 } 65 66 if (uv_init(__pa(uv_stor_base), uv_info.uv_base_stor_len)) { 67 memblock_free(uv_stor_base, uv_info.uv_base_stor_len); 68 goto fail; 69 } 70 71 pr_info("Reserving %luMB as ultravisor base storage\n", 72 uv_info.uv_base_stor_len >> 20); 73 return; 74 fail: 75 pr_info("Disabling support for protected virtualization"); 76 prot_virt_host = 0; 77 } 78 79 /* 80 * Requests the Ultravisor to pin the page in the shared state. This will 81 * cause an intercept when the guest attempts to unshare the pinned page. 82 */ 83 static int uv_pin_shared(unsigned long paddr) 84 { 85 struct uv_cb_cfs uvcb = { 86 .header.cmd = UVC_CMD_PIN_PAGE_SHARED, 87 .header.len = sizeof(uvcb), 88 .paddr = paddr, 89 }; 90 91 if (uv_call(0, (u64)&uvcb)) 92 return -EINVAL; 93 return 0; 94 } 95 96 /* 97 * Requests the Ultravisor to destroy a guest page and make it 98 * accessible to the host. The destroy clears the page instead of 99 * exporting. 100 * 101 * @paddr: Absolute host address of page to be destroyed 102 */ 103 static int uv_destroy_page(unsigned long paddr) 104 { 105 struct uv_cb_cfs uvcb = { 106 .header.cmd = UVC_CMD_DESTR_SEC_STOR, 107 .header.len = sizeof(uvcb), 108 .paddr = paddr 109 }; 110 111 if (uv_call(0, (u64)&uvcb)) { 112 /* 113 * Older firmware uses 107/d as an indication of a non secure 114 * page. Let us emulate the newer variant (no-op). 115 */ 116 if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd) 117 return 0; 118 return -EINVAL; 119 } 120 return 0; 121 } 122 123 /* 124 * The caller must already hold a reference to the page 125 */ 126 int uv_destroy_owned_page(unsigned long paddr) 127 { 128 struct page *page = phys_to_page(paddr); 129 int rc; 130 131 get_page(page); 132 rc = uv_destroy_page(paddr); 133 if (!rc) 134 clear_bit(PG_arch_1, &page->flags); 135 put_page(page); 136 return rc; 137 } 138 139 /* 140 * Requests the Ultravisor to encrypt a guest page and make it 141 * accessible to the host for paging (export). 142 * 143 * @paddr: Absolute host address of page to be exported 144 */ 145 int uv_convert_from_secure(unsigned long paddr) 146 { 147 struct uv_cb_cfs uvcb = { 148 .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, 149 .header.len = sizeof(uvcb), 150 .paddr = paddr 151 }; 152 153 if (uv_call(0, (u64)&uvcb)) 154 return -EINVAL; 155 return 0; 156 } 157 158 /* 159 * The caller must already hold a reference to the page 160 */ 161 int uv_convert_owned_from_secure(unsigned long paddr) 162 { 163 struct page *page = phys_to_page(paddr); 164 int rc; 165 166 get_page(page); 167 rc = uv_convert_from_secure(paddr); 168 if (!rc) 169 clear_bit(PG_arch_1, &page->flags); 170 put_page(page); 171 return rc; 172 } 173 174 /* 175 * Calculate the expected ref_count for a page that would otherwise have no 176 * further pins. This was cribbed from similar functions in other places in 177 * the kernel, but with some slight modifications. We know that a secure 178 * page can not be a huge page for example. 179 */ 180 static int expected_page_refs(struct page *page) 181 { 182 int res; 183 184 res = page_mapcount(page); 185 if (PageSwapCache(page)) { 186 res++; 187 } else if (page_mapping(page)) { 188 res++; 189 if (page_has_private(page)) 190 res++; 191 } 192 return res; 193 } 194 195 static int make_secure_pte(pte_t *ptep, unsigned long addr, 196 struct page *exp_page, struct uv_cb_header *uvcb) 197 { 198 pte_t entry = READ_ONCE(*ptep); 199 struct page *page; 200 int expected, cc = 0; 201 202 if (!pte_present(entry)) 203 return -ENXIO; 204 if (pte_val(entry) & _PAGE_INVALID) 205 return -ENXIO; 206 207 page = pte_page(entry); 208 if (page != exp_page) 209 return -ENXIO; 210 if (PageWriteback(page)) 211 return -EAGAIN; 212 expected = expected_page_refs(page); 213 if (!page_ref_freeze(page, expected)) 214 return -EBUSY; 215 set_bit(PG_arch_1, &page->flags); 216 /* 217 * If the UVC does not succeed or fail immediately, we don't want to 218 * loop for long, or we might get stall notifications. 219 * On the other hand, this is a complex scenario and we are holding a lot of 220 * locks, so we can't easily sleep and reschedule. We try only once, 221 * and if the UVC returned busy or partial completion, we return 222 * -EAGAIN and we let the callers deal with it. 223 */ 224 cc = __uv_call(0, (u64)uvcb); 225 page_ref_unfreeze(page, expected); 226 /* 227 * Return -ENXIO if the page was not mapped, -EINVAL for other errors. 228 * If busy or partially completed, return -EAGAIN. 229 */ 230 if (cc == UVC_CC_OK) 231 return 0; 232 else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL) 233 return -EAGAIN; 234 return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; 235 } 236 237 /** 238 * should_export_before_import - Determine whether an export is needed 239 * before an import-like operation 240 * @uvcb: the Ultravisor control block of the UVC to be performed 241 * @mm: the mm of the process 242 * 243 * Returns whether an export is needed before every import-like operation. 244 * This is needed for shared pages, which don't trigger a secure storage 245 * exception when accessed from a different guest. 246 * 247 * Although considered as one, the Unpin Page UVC is not an actual import, 248 * so it is not affected. 249 * 250 * No export is needed also when there is only one protected VM, because the 251 * page cannot belong to the wrong VM in that case (there is no "other VM" 252 * it can belong to). 253 * 254 * Return: true if an export is needed before every import, otherwise false. 255 */ 256 static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) 257 { 258 if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) 259 return false; 260 return atomic_read(&mm->context.protected_count) > 1; 261 } 262 263 /* 264 * Requests the Ultravisor to make a page accessible to a guest. 265 * If it's brought in the first time, it will be cleared. If 266 * it has been exported before, it will be decrypted and integrity 267 * checked. 268 */ 269 int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) 270 { 271 struct vm_area_struct *vma; 272 bool local_drain = false; 273 spinlock_t *ptelock; 274 unsigned long uaddr; 275 struct page *page; 276 pte_t *ptep; 277 int rc; 278 279 again: 280 rc = -EFAULT; 281 mmap_read_lock(gmap->mm); 282 283 uaddr = __gmap_translate(gmap, gaddr); 284 if (IS_ERR_VALUE(uaddr)) 285 goto out; 286 vma = vma_lookup(gmap->mm, uaddr); 287 if (!vma) 288 goto out; 289 /* 290 * Secure pages cannot be huge and userspace should not combine both. 291 * In case userspace does it anyway this will result in an -EFAULT for 292 * the unpack. The guest is thus never reaching secure mode. If 293 * userspace is playing dirty tricky with mapping huge pages later 294 * on this will result in a segmentation fault. 295 */ 296 if (is_vm_hugetlb_page(vma)) 297 goto out; 298 299 rc = -ENXIO; 300 page = follow_page(vma, uaddr, FOLL_WRITE); 301 if (IS_ERR_OR_NULL(page)) 302 goto out; 303 304 lock_page(page); 305 ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); 306 if (should_export_before_import(uvcb, gmap->mm)) 307 uv_convert_from_secure(page_to_phys(page)); 308 rc = make_secure_pte(ptep, uaddr, page, uvcb); 309 pte_unmap_unlock(ptep, ptelock); 310 unlock_page(page); 311 out: 312 mmap_read_unlock(gmap->mm); 313 314 if (rc == -EAGAIN) { 315 /* 316 * If we are here because the UVC returned busy or partial 317 * completion, this is just a useless check, but it is safe. 318 */ 319 wait_on_page_writeback(page); 320 } else if (rc == -EBUSY) { 321 /* 322 * If we have tried a local drain and the page refcount 323 * still does not match our expected safe value, try with a 324 * system wide drain. This is needed if the pagevecs holding 325 * the page are on a different CPU. 326 */ 327 if (local_drain) { 328 lru_add_drain_all(); 329 /* We give up here, and let the caller try again */ 330 return -EAGAIN; 331 } 332 /* 333 * We are here if the page refcount does not match the 334 * expected safe value. The main culprits are usually 335 * pagevecs. With lru_add_drain() we drain the pagevecs 336 * on the local CPU so that hopefully the refcount will 337 * reach the expected safe value. 338 */ 339 lru_add_drain(); 340 local_drain = true; 341 /* And now we try again immediately after draining */ 342 goto again; 343 } else if (rc == -ENXIO) { 344 if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) 345 return -EFAULT; 346 return -EAGAIN; 347 } 348 return rc; 349 } 350 EXPORT_SYMBOL_GPL(gmap_make_secure); 351 352 int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) 353 { 354 struct uv_cb_cts uvcb = { 355 .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, 356 .header.len = sizeof(uvcb), 357 .guest_handle = gmap->guest_handle, 358 .gaddr = gaddr, 359 }; 360 361 return gmap_make_secure(gmap, gaddr, &uvcb); 362 } 363 EXPORT_SYMBOL_GPL(gmap_convert_to_secure); 364 365 /** 366 * gmap_destroy_page - Destroy a guest page. 367 * @gmap: the gmap of the guest 368 * @gaddr: the guest address to destroy 369 * 370 * An attempt will be made to destroy the given guest page. If the attempt 371 * fails, an attempt is made to export the page. If both attempts fail, an 372 * appropriate error is returned. 373 */ 374 int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) 375 { 376 struct vm_area_struct *vma; 377 unsigned long uaddr; 378 struct page *page; 379 int rc; 380 381 rc = -EFAULT; 382 mmap_read_lock(gmap->mm); 383 384 uaddr = __gmap_translate(gmap, gaddr); 385 if (IS_ERR_VALUE(uaddr)) 386 goto out; 387 vma = vma_lookup(gmap->mm, uaddr); 388 if (!vma) 389 goto out; 390 /* 391 * Huge pages should not be able to become secure 392 */ 393 if (is_vm_hugetlb_page(vma)) 394 goto out; 395 396 rc = 0; 397 /* we take an extra reference here */ 398 page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET); 399 if (IS_ERR_OR_NULL(page)) 400 goto out; 401 rc = uv_destroy_owned_page(page_to_phys(page)); 402 /* 403 * Fault handlers can race; it is possible that two CPUs will fault 404 * on the same secure page. One CPU can destroy the page, reboot, 405 * re-enter secure mode and import it, while the second CPU was 406 * stuck at the beginning of the handler. At some point the second 407 * CPU will be able to progress, and it will not be able to destroy 408 * the page. In that case we do not want to terminate the process, 409 * we instead try to export the page. 410 */ 411 if (rc) 412 rc = uv_convert_owned_from_secure(page_to_phys(page)); 413 put_page(page); 414 out: 415 mmap_read_unlock(gmap->mm); 416 return rc; 417 } 418 EXPORT_SYMBOL_GPL(gmap_destroy_page); 419 420 /* 421 * To be called with the page locked or with an extra reference! This will 422 * prevent gmap_make_secure from touching the page concurrently. Having 2 423 * parallel make_page_accessible is fine, as the UV calls will become a 424 * no-op if the page is already exported. 425 */ 426 int arch_make_page_accessible(struct page *page) 427 { 428 int rc = 0; 429 430 /* Hugepage cannot be protected, so nothing to do */ 431 if (PageHuge(page)) 432 return 0; 433 434 /* 435 * PG_arch_1 is used in 3 places: 436 * 1. for kernel page tables during early boot 437 * 2. for storage keys of huge pages and KVM 438 * 3. As an indication that this page might be secure. This can 439 * overindicate, e.g. we set the bit before calling 440 * convert_to_secure. 441 * As secure pages are never huge, all 3 variants can co-exists. 442 */ 443 if (!test_bit(PG_arch_1, &page->flags)) 444 return 0; 445 446 rc = uv_pin_shared(page_to_phys(page)); 447 if (!rc) { 448 clear_bit(PG_arch_1, &page->flags); 449 return 0; 450 } 451 452 rc = uv_convert_from_secure(page_to_phys(page)); 453 if (!rc) { 454 clear_bit(PG_arch_1, &page->flags); 455 return 0; 456 } 457 458 return rc; 459 } 460 EXPORT_SYMBOL_GPL(arch_make_page_accessible); 461 462 #endif 463 464 #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) 465 static ssize_t uv_query_facilities(struct kobject *kobj, 466 struct kobj_attribute *attr, char *page) 467 { 468 return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", 469 uv_info.inst_calls_list[0], 470 uv_info.inst_calls_list[1], 471 uv_info.inst_calls_list[2], 472 uv_info.inst_calls_list[3]); 473 } 474 475 static struct kobj_attribute uv_query_facilities_attr = 476 __ATTR(facilities, 0444, uv_query_facilities, NULL); 477 478 static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj, 479 struct kobj_attribute *attr, char *buf) 480 { 481 return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver); 482 } 483 484 static struct kobj_attribute uv_query_supp_se_hdr_ver_attr = 485 __ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL); 486 487 static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj, 488 struct kobj_attribute *attr, char *buf) 489 { 490 return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf); 491 } 492 493 static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr = 494 __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL); 495 496 static ssize_t uv_query_dump_cpu_len(struct kobject *kobj, 497 struct kobj_attribute *attr, char *page) 498 { 499 return scnprintf(page, PAGE_SIZE, "%lx\n", 500 uv_info.guest_cpu_stor_len); 501 } 502 503 static struct kobj_attribute uv_query_dump_cpu_len_attr = 504 __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL); 505 506 static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj, 507 struct kobj_attribute *attr, char *page) 508 { 509 return scnprintf(page, PAGE_SIZE, "%lx\n", 510 uv_info.conf_dump_storage_state_len); 511 } 512 513 static struct kobj_attribute uv_query_dump_storage_state_len_attr = 514 __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL); 515 516 static ssize_t uv_query_dump_finalize_len(struct kobject *kobj, 517 struct kobj_attribute *attr, char *page) 518 { 519 return scnprintf(page, PAGE_SIZE, "%lx\n", 520 uv_info.conf_dump_finalize_len); 521 } 522 523 static struct kobj_attribute uv_query_dump_finalize_len_attr = 524 __ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL); 525 526 static ssize_t uv_query_feature_indications(struct kobject *kobj, 527 struct kobj_attribute *attr, char *buf) 528 { 529 return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications); 530 } 531 532 static struct kobj_attribute uv_query_feature_indications_attr = 533 __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL); 534 535 static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, 536 struct kobj_attribute *attr, char *page) 537 { 538 return scnprintf(page, PAGE_SIZE, "%d\n", 539 uv_info.max_guest_cpu_id + 1); 540 } 541 542 static struct kobj_attribute uv_query_max_guest_cpus_attr = 543 __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL); 544 545 static ssize_t uv_query_max_guest_vms(struct kobject *kobj, 546 struct kobj_attribute *attr, char *page) 547 { 548 return scnprintf(page, PAGE_SIZE, "%d\n", 549 uv_info.max_num_sec_conf); 550 } 551 552 static struct kobj_attribute uv_query_max_guest_vms_attr = 553 __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL); 554 555 static ssize_t uv_query_max_guest_addr(struct kobject *kobj, 556 struct kobj_attribute *attr, char *page) 557 { 558 return scnprintf(page, PAGE_SIZE, "%lx\n", 559 uv_info.max_sec_stor_addr); 560 } 561 562 static struct kobj_attribute uv_query_max_guest_addr_attr = 563 __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL); 564 565 static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj, 566 struct kobj_attribute *attr, char *page) 567 { 568 return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver); 569 } 570 571 static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr = 572 __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL); 573 574 static ssize_t uv_query_supp_att_pflags(struct kobject *kobj, 575 struct kobj_attribute *attr, char *page) 576 { 577 return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags); 578 } 579 580 static struct kobj_attribute uv_query_supp_att_pflags_attr = 581 __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL); 582 583 static struct attribute *uv_query_attrs[] = { 584 &uv_query_facilities_attr.attr, 585 &uv_query_feature_indications_attr.attr, 586 &uv_query_max_guest_cpus_attr.attr, 587 &uv_query_max_guest_vms_attr.attr, 588 &uv_query_max_guest_addr_attr.attr, 589 &uv_query_supp_se_hdr_ver_attr.attr, 590 &uv_query_supp_se_hdr_pcf_attr.attr, 591 &uv_query_dump_storage_state_len_attr.attr, 592 &uv_query_dump_finalize_len_attr.attr, 593 &uv_query_dump_cpu_len_attr.attr, 594 &uv_query_supp_att_req_hdr_ver_attr.attr, 595 &uv_query_supp_att_pflags_attr.attr, 596 NULL, 597 }; 598 599 static struct attribute_group uv_query_attr_group = { 600 .attrs = uv_query_attrs, 601 }; 602 603 static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, 604 struct kobj_attribute *attr, char *page) 605 { 606 int val = 0; 607 608 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST 609 val = prot_virt_guest; 610 #endif 611 return scnprintf(page, PAGE_SIZE, "%d\n", val); 612 } 613 614 static ssize_t uv_is_prot_virt_host(struct kobject *kobj, 615 struct kobj_attribute *attr, char *page) 616 { 617 int val = 0; 618 619 #if IS_ENABLED(CONFIG_KVM) 620 val = prot_virt_host; 621 #endif 622 623 return scnprintf(page, PAGE_SIZE, "%d\n", val); 624 } 625 626 static struct kobj_attribute uv_prot_virt_guest = 627 __ATTR(prot_virt_guest, 0444, uv_is_prot_virt_guest, NULL); 628 629 static struct kobj_attribute uv_prot_virt_host = 630 __ATTR(prot_virt_host, 0444, uv_is_prot_virt_host, NULL); 631 632 static const struct attribute *uv_prot_virt_attrs[] = { 633 &uv_prot_virt_guest.attr, 634 &uv_prot_virt_host.attr, 635 NULL, 636 }; 637 638 static struct kset *uv_query_kset; 639 static struct kobject *uv_kobj; 640 641 static int __init uv_info_init(void) 642 { 643 int rc = -ENOMEM; 644 645 if (!test_facility(158)) 646 return 0; 647 648 uv_kobj = kobject_create_and_add("uv", firmware_kobj); 649 if (!uv_kobj) 650 return -ENOMEM; 651 652 rc = sysfs_create_files(uv_kobj, uv_prot_virt_attrs); 653 if (rc) 654 goto out_kobj; 655 656 uv_query_kset = kset_create_and_add("query", NULL, uv_kobj); 657 if (!uv_query_kset) { 658 rc = -ENOMEM; 659 goto out_ind_files; 660 } 661 662 rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group); 663 if (!rc) 664 return 0; 665 666 kset_unregister(uv_query_kset); 667 out_ind_files: 668 sysfs_remove_files(uv_kobj, uv_prot_virt_attrs); 669 out_kobj: 670 kobject_del(uv_kobj); 671 kobject_put(uv_kobj); 672 return rc; 673 } 674 device_initcall(uv_info_init); 675 #endif 676