1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2021 Intel Corporation. */ 3 4 #include <asm/sgx.h> 5 6 #include "cpuid.h" 7 #include "kvm_cache_regs.h" 8 #include "nested.h" 9 #include "sgx.h" 10 #include "vmx.h" 11 #include "x86.h" 12 13 bool __read_mostly enable_sgx = 1; 14 module_param_named(sgx, enable_sgx, bool, 0444); 15 16 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */ 17 static u64 sgx_pubkey_hash[4] __ro_after_init; 18 19 /* 20 * ENCLS's memory operands use a fixed segment (DS) and a fixed 21 * address size based on the mode. Related prefixes are ignored. 22 */ 23 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, 24 int size, int alignment, gva_t *gva) 25 { 26 struct kvm_segment s; 27 bool fault; 28 29 /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */ 30 *gva = offset; 31 if (!is_long_mode(vcpu)) { 32 vmx_get_segment(vcpu, &s, VCPU_SREG_DS); 33 *gva += s.base; 34 } 35 36 if (!IS_ALIGNED(*gva, alignment)) { 37 fault = true; 38 } else if (likely(is_long_mode(vcpu))) { 39 fault = is_noncanonical_address(*gva, vcpu); 40 } else { 41 *gva &= 0xffffffff; 42 fault = (s.unusable) || 43 (s.type != 2 && s.type != 3) || 44 (*gva > s.limit) || 45 ((s.base != 0 || s.limit != 0xffffffff) && 46 (((u64)*gva + size - 1) > s.limit + 1)); 47 } 48 if (fault) 49 kvm_inject_gp(vcpu, 0); 50 return fault ? -EINVAL : 0; 51 } 52 53 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr, 54 unsigned int size) 55 { 56 uint64_t data[2] = { addr, size }; 57 58 __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data)); 59 } 60 61 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data, 62 unsigned int size) 63 { 64 if (__copy_from_user(data, (void __user *)hva, size)) { 65 sgx_handle_emulation_failure(vcpu, hva, size); 66 return -EFAULT; 67 } 68 69 return 0; 70 } 71 72 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write, 73 gpa_t *gpa) 74 { 75 struct x86_exception ex; 76 77 if (write) 78 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex); 79 else 80 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex); 81 82 if (*gpa == INVALID_GPA) { 83 kvm_inject_emulated_page_fault(vcpu, &ex); 84 return -EFAULT; 85 } 86 87 return 0; 88 } 89 90 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva) 91 { 92 *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa)); 93 if (kvm_is_error_hva(*hva)) { 94 sgx_handle_emulation_failure(vcpu, gpa, 1); 95 return -EFAULT; 96 } 97 98 *hva |= gpa & ~PAGE_MASK; 99 100 return 0; 101 } 102 103 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) 104 { 105 struct x86_exception ex; 106 107 /* 108 * A non-EPCM #PF indicates a bad userspace HVA. This *should* check 109 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC, 110 * but the error code isn't (yet) plumbed through the ENCLS helpers. 111 */ 112 if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) { 113 kvm_prepare_emulation_failure_exit(vcpu); 114 return 0; 115 } 116 117 /* 118 * If the guest thinks it's running on SGX2 hardware, inject an SGX 119 * #PF if the fault matches an EPCM fault signature (#GP on SGX1, 120 * #PF on SGX2). The assumption is that EPCM faults are much more 121 * likely than a bad userspace address. 122 */ 123 if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) && 124 guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) { 125 memset(&ex, 0, sizeof(ex)); 126 ex.vector = PF_VECTOR; 127 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK | 128 PFERR_SGX_MASK; 129 ex.address = gva; 130 ex.error_code_valid = true; 131 ex.nested_page_fault = false; 132 kvm_inject_page_fault(vcpu, &ex); 133 } else { 134 kvm_inject_gp(vcpu, 0); 135 } 136 return 1; 137 } 138 139 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, 140 struct sgx_pageinfo *pageinfo, 141 unsigned long secs_hva, 142 gva_t secs_gva) 143 { 144 struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents; 145 struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1; 146 u64 attributes, xfrm, size; 147 u32 miscselect; 148 u8 max_size_log2; 149 int trapnr, ret; 150 151 sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0); 152 sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1); 153 if (!sgx_12_0 || !sgx_12_1) { 154 kvm_prepare_emulation_failure_exit(vcpu); 155 return 0; 156 } 157 158 miscselect = contents->miscselect; 159 attributes = contents->attributes; 160 xfrm = contents->xfrm; 161 size = contents->size; 162 163 /* Enforce restriction of access to the PROVISIONKEY. */ 164 if (!vcpu->kvm->arch.sgx_provisioning_allowed && 165 (attributes & SGX_ATTR_PROVISIONKEY)) { 166 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY) 167 pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n"); 168 kvm_inject_gp(vcpu, 0); 169 return 1; 170 } 171 172 /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */ 173 if ((u32)miscselect & ~sgx_12_0->ebx || 174 (u32)attributes & ~sgx_12_1->eax || 175 (u32)(attributes >> 32) & ~sgx_12_1->ebx || 176 (u32)xfrm & ~sgx_12_1->ecx || 177 (u32)(xfrm >> 32) & ~sgx_12_1->edx) { 178 kvm_inject_gp(vcpu, 0); 179 return 1; 180 } 181 182 /* Enforce CPUID restriction on max enclave size. */ 183 max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : 184 sgx_12_0->edx; 185 if (size >= BIT_ULL(max_size_log2)) 186 kvm_inject_gp(vcpu, 0); 187 188 /* 189 * sgx_virt_ecreate() returns: 190 * 1) 0: ECREATE was successful 191 * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the 192 * exception number. 193 * 3) -EINVAL: access_ok() on @secs_hva failed. This should never 194 * happen as KVM checks host addresses at memslot creation. 195 * sgx_virt_ecreate() has already warned in this case. 196 */ 197 ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr); 198 if (!ret) 199 return kvm_skip_emulated_instruction(vcpu); 200 if (ret == -EFAULT) 201 return sgx_inject_fault(vcpu, secs_gva, trapnr); 202 203 return ret; 204 } 205 206 static int handle_encls_ecreate(struct kvm_vcpu *vcpu) 207 { 208 gva_t pageinfo_gva, secs_gva; 209 gva_t metadata_gva, contents_gva; 210 gpa_t metadata_gpa, contents_gpa, secs_gpa; 211 unsigned long metadata_hva, contents_hva, secs_hva; 212 struct sgx_pageinfo pageinfo; 213 struct sgx_secs *contents; 214 struct x86_exception ex; 215 int r; 216 217 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) || 218 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva)) 219 return 1; 220 221 /* 222 * Copy the PAGEINFO to local memory, its pointers need to be 223 * translated, i.e. we need to do a deep copy/translate. 224 */ 225 r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo, 226 sizeof(pageinfo), &ex); 227 if (r == X86EMUL_PROPAGATE_FAULT) { 228 kvm_inject_emulated_page_fault(vcpu, &ex); 229 return 1; 230 } else if (r != X86EMUL_CONTINUE) { 231 sgx_handle_emulation_failure(vcpu, pageinfo_gva, 232 sizeof(pageinfo)); 233 return 0; 234 } 235 236 if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) || 237 sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096, 238 &contents_gva)) 239 return 1; 240 241 /* 242 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA. 243 * Resume the guest on failure to inject a #PF. 244 */ 245 if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) || 246 sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) || 247 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa)) 248 return 1; 249 250 /* 251 * ...and then to HVA. The order of accesses isn't architectural, i.e. 252 * KVM doesn't have to fully process one address at a time. Exit to 253 * userspace if a GPA is invalid. 254 */ 255 if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) || 256 sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) || 257 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva)) 258 return 0; 259 260 /* 261 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the 262 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and 263 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to 264 * enforce restriction of access to the PROVISIONKEY. 265 */ 266 contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT); 267 if (!contents) 268 return -ENOMEM; 269 270 /* Exit to userspace if copying from a host userspace address fails. */ 271 if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) { 272 free_page((unsigned long)contents); 273 return 0; 274 } 275 276 pageinfo.metadata = metadata_hva; 277 pageinfo.contents = (u64)contents; 278 279 r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva); 280 281 free_page((unsigned long)contents); 282 283 return r; 284 } 285 286 static int handle_encls_einit(struct kvm_vcpu *vcpu) 287 { 288 unsigned long sig_hva, secs_hva, token_hva, rflags; 289 struct vcpu_vmx *vmx = to_vmx(vcpu); 290 gva_t sig_gva, secs_gva, token_gva; 291 gpa_t sig_gpa, secs_gpa, token_gpa; 292 int ret, trapnr; 293 294 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) || 295 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) || 296 sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva)) 297 return 1; 298 299 /* 300 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA. 301 * Resume the guest on failure to inject a #PF. 302 */ 303 if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) || 304 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) || 305 sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa)) 306 return 1; 307 308 /* 309 * ...and then to HVA. The order of accesses isn't architectural, i.e. 310 * KVM doesn't have to fully process one address at a time. Exit to 311 * userspace if a GPA is invalid. Note, all structures are aligned and 312 * cannot split pages. 313 */ 314 if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) || 315 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) || 316 sgx_gpa_to_hva(vcpu, token_gpa, &token_hva)) 317 return 0; 318 319 ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva, 320 (void __user *)secs_hva, 321 vmx->msr_ia32_sgxlepubkeyhash, &trapnr); 322 323 if (ret == -EFAULT) 324 return sgx_inject_fault(vcpu, secs_gva, trapnr); 325 326 /* 327 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva, 328 * @token_hva or @secs_hva. This should never happen as KVM checks host 329 * addresses at memslot creation. sgx_virt_einit() has already warned 330 * in this case, so just return. 331 */ 332 if (ret < 0) 333 return ret; 334 335 rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | 336 X86_EFLAGS_AF | X86_EFLAGS_SF | 337 X86_EFLAGS_OF); 338 if (ret) 339 rflags |= X86_EFLAGS_ZF; 340 else 341 rflags &= ~X86_EFLAGS_ZF; 342 vmx_set_rflags(vcpu, rflags); 343 344 kvm_rax_write(vcpu, ret); 345 return kvm_skip_emulated_instruction(vcpu); 346 } 347 348 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) 349 { 350 if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX)) 351 return false; 352 353 if (leaf >= ECREATE && leaf <= ETRACK) 354 return guest_cpuid_has(vcpu, X86_FEATURE_SGX1); 355 356 if (leaf >= EAUG && leaf <= EMODT) 357 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2); 358 359 return false; 360 } 361 362 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu) 363 { 364 const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED; 365 366 return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits; 367 } 368 369 int handle_encls(struct kvm_vcpu *vcpu) 370 { 371 u32 leaf = (u32)kvm_rax_read(vcpu); 372 373 if (!encls_leaf_enabled_in_guest(vcpu, leaf)) { 374 kvm_queue_exception(vcpu, UD_VECTOR); 375 } else if (!sgx_enabled_in_guest_bios(vcpu)) { 376 kvm_inject_gp(vcpu, 0); 377 } else { 378 if (leaf == ECREATE) 379 return handle_encls_ecreate(vcpu); 380 if (leaf == EINIT) 381 return handle_encls_einit(vcpu); 382 WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf); 383 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 384 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS; 385 return 0; 386 } 387 return 1; 388 } 389 390 void setup_default_sgx_lepubkeyhash(void) 391 { 392 /* 393 * Use Intel's default value for Skylake hardware if Launch Control is 394 * not supported, i.e. Intel's hash is hardcoded into silicon, or if 395 * Launch Control is supported and enabled, i.e. mimic the reset value 396 * and let the guest write the MSRs at will. If Launch Control is 397 * supported but disabled, then use the current MSR values as the hash 398 * MSRs exist but are read-only (locked and not writable). 399 */ 400 if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) || 401 rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) { 402 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL; 403 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL; 404 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL; 405 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL; 406 } else { 407 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */ 408 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]); 409 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]); 410 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]); 411 } 412 } 413 414 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu) 415 { 416 struct vcpu_vmx *vmx = to_vmx(vcpu); 417 418 memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash, 419 sizeof(sgx_pubkey_hash)); 420 } 421 422 /* 423 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM 424 * restrictions if the guest's allowed-1 settings diverge from hardware. 425 */ 426 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu) 427 { 428 struct kvm_cpuid_entry2 *guest_cpuid; 429 u32 eax, ebx, ecx, edx; 430 431 if (!vcpu->kvm->arch.sgx_provisioning_allowed) 432 return true; 433 434 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0); 435 if (!guest_cpuid) 436 return true; 437 438 cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx); 439 if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx) 440 return true; 441 442 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1); 443 if (!guest_cpuid) 444 return true; 445 446 cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx); 447 if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx || 448 guest_cpuid->ecx != ecx || guest_cpuid->edx != edx) 449 return true; 450 451 return false; 452 } 453 454 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 455 { 456 /* 457 * There is no software enable bit for SGX that is virtualized by 458 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the 459 * guest (either by the host or by the guest's BIOS) but enabled in the 460 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate 461 * the expected system behavior for ENCLS. 462 */ 463 u64 bitmap = -1ull; 464 465 /* Nothing to do if hardware doesn't support SGX */ 466 if (!cpu_has_vmx_encls_vmexit()) 467 return; 468 469 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) && 470 sgx_enabled_in_guest_bios(vcpu)) { 471 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { 472 bitmap &= ~GENMASK_ULL(ETRACK, ECREATE); 473 if (sgx_intercept_encls_ecreate(vcpu)) 474 bitmap |= (1 << ECREATE); 475 } 476 477 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) 478 bitmap &= ~GENMASK_ULL(EMODT, EAUG); 479 480 /* 481 * Trap and execute EINIT if launch control is enabled in the 482 * host using the guest's values for launch control MSRs, even 483 * if the guest's values are fixed to hardware default values. 484 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing 485 * the MSRs is extraordinarily expensive. 486 */ 487 if (boot_cpu_has(X86_FEATURE_SGX_LC)) 488 bitmap |= (1 << EINIT); 489 490 if (!vmcs12 && is_guest_mode(vcpu)) 491 vmcs12 = get_vmcs12(vcpu); 492 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12)) 493 bitmap |= vmcs12->encls_exiting_bitmap; 494 } 495 vmcs_write64(ENCLS_EXITING_BITMAP, bitmap); 496 } 497