1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. */
3 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
4
5 #include <asm/sgx.h>
6
7 #include "cpuid.h"
8 #include "kvm_cache_regs.h"
9 #include "nested.h"
10 #include "sgx.h"
11 #include "vmx.h"
12 #include "x86.h"
13
14 bool __read_mostly enable_sgx = 1;
15 module_param_named(sgx, enable_sgx, bool, 0444);
16
17 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
18 static u64 sgx_pubkey_hash[4] __ro_after_init;
19
20 /*
21 * ENCLS's memory operands use a fixed segment (DS) and a fixed
22 * address size based on the mode. Related prefixes are ignored.
23 */
sgx_get_encls_gva(struct kvm_vcpu * vcpu,unsigned long offset,int size,int alignment,gva_t * gva)24 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
25 int size, int alignment, gva_t *gva)
26 {
27 struct kvm_segment s;
28 bool fault;
29
30 /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
31 *gva = offset;
32 if (!is_64_bit_mode(vcpu)) {
33 vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
34 *gva += s.base;
35 }
36
37 if (!IS_ALIGNED(*gva, alignment)) {
38 fault = true;
39 } else if (likely(is_64_bit_mode(vcpu))) {
40 fault = is_noncanonical_address(*gva, vcpu);
41 } else {
42 *gva &= 0xffffffff;
43 fault = (s.unusable) ||
44 (s.type != 2 && s.type != 3) ||
45 (*gva > s.limit) ||
46 ((s.base != 0 || s.limit != 0xffffffff) &&
47 (((u64)*gva + size - 1) > s.limit + 1));
48 }
49 if (fault)
50 kvm_inject_gp(vcpu, 0);
51 return fault ? -EINVAL : 0;
52 }
53
sgx_handle_emulation_failure(struct kvm_vcpu * vcpu,u64 addr,unsigned int size)54 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
55 unsigned int size)
56 {
57 uint64_t data[2] = { addr, size };
58
59 __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
60 }
61
sgx_read_hva(struct kvm_vcpu * vcpu,unsigned long hva,void * data,unsigned int size)62 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
63 unsigned int size)
64 {
65 if (__copy_from_user(data, (void __user *)hva, size)) {
66 sgx_handle_emulation_failure(vcpu, hva, size);
67 return -EFAULT;
68 }
69
70 return 0;
71 }
72
sgx_gva_to_gpa(struct kvm_vcpu * vcpu,gva_t gva,bool write,gpa_t * gpa)73 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
74 gpa_t *gpa)
75 {
76 struct x86_exception ex;
77
78 if (write)
79 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
80 else
81 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
82
83 if (*gpa == INVALID_GPA) {
84 kvm_inject_emulated_page_fault(vcpu, &ex);
85 return -EFAULT;
86 }
87
88 return 0;
89 }
90
sgx_gpa_to_hva(struct kvm_vcpu * vcpu,gpa_t gpa,unsigned long * hva)91 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
92 {
93 *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
94 if (kvm_is_error_hva(*hva)) {
95 sgx_handle_emulation_failure(vcpu, gpa, 1);
96 return -EFAULT;
97 }
98
99 *hva |= gpa & ~PAGE_MASK;
100
101 return 0;
102 }
103
sgx_inject_fault(struct kvm_vcpu * vcpu,gva_t gva,int trapnr)104 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
105 {
106 struct x86_exception ex;
107
108 /*
109 * A non-EPCM #PF indicates a bad userspace HVA. This *should* check
110 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
111 * but the error code isn't (yet) plumbed through the ENCLS helpers.
112 */
113 if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
114 kvm_prepare_emulation_failure_exit(vcpu);
115 return 0;
116 }
117
118 /*
119 * If the guest thinks it's running on SGX2 hardware, inject an SGX
120 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
121 * #PF on SGX2). The assumption is that EPCM faults are much more
122 * likely than a bad userspace address.
123 */
124 if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
125 guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
126 memset(&ex, 0, sizeof(ex));
127 ex.vector = PF_VECTOR;
128 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
129 PFERR_SGX_MASK;
130 ex.address = gva;
131 ex.error_code_valid = true;
132 ex.nested_page_fault = false;
133 kvm_inject_emulated_page_fault(vcpu, &ex);
134 } else {
135 kvm_inject_gp(vcpu, 0);
136 }
137 return 1;
138 }
139
__handle_encls_ecreate(struct kvm_vcpu * vcpu,struct sgx_pageinfo * pageinfo,unsigned long secs_hva,gva_t secs_gva)140 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
141 struct sgx_pageinfo *pageinfo,
142 unsigned long secs_hva,
143 gva_t secs_gva)
144 {
145 struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
146 struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
147 u64 attributes, xfrm, size;
148 u32 miscselect;
149 u8 max_size_log2;
150 int trapnr, ret;
151
152 sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
153 sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
154 if (!sgx_12_0 || !sgx_12_1) {
155 kvm_prepare_emulation_failure_exit(vcpu);
156 return 0;
157 }
158
159 miscselect = contents->miscselect;
160 attributes = contents->attributes;
161 xfrm = contents->xfrm;
162 size = contents->size;
163
164 /* Enforce restriction of access to the PROVISIONKEY. */
165 if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
166 (attributes & SGX_ATTR_PROVISIONKEY)) {
167 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
168 pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n");
169 kvm_inject_gp(vcpu, 0);
170 return 1;
171 }
172
173 /*
174 * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note
175 * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
176 * by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE
177 * is unsupported, i.e. even if XCR0 itself is completely unsupported.
178 */
179 if ((u32)miscselect & ~sgx_12_0->ebx ||
180 (u32)attributes & ~sgx_12_1->eax ||
181 (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
182 (u32)xfrm & ~sgx_12_1->ecx ||
183 (u32)(xfrm >> 32) & ~sgx_12_1->edx ||
184 xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
185 (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
186 kvm_inject_gp(vcpu, 0);
187 return 1;
188 }
189
190 /* Enforce CPUID restriction on max enclave size. */
191 max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
192 sgx_12_0->edx;
193 if (size >= BIT_ULL(max_size_log2)) {
194 kvm_inject_gp(vcpu, 0);
195 return 1;
196 }
197
198 /*
199 * sgx_virt_ecreate() returns:
200 * 1) 0: ECREATE was successful
201 * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
202 * exception number.
203 * 3) -EINVAL: access_ok() on @secs_hva failed. This should never
204 * happen as KVM checks host addresses at memslot creation.
205 * sgx_virt_ecreate() has already warned in this case.
206 */
207 ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
208 if (!ret)
209 return kvm_skip_emulated_instruction(vcpu);
210 if (ret == -EFAULT)
211 return sgx_inject_fault(vcpu, secs_gva, trapnr);
212
213 return ret;
214 }
215
handle_encls_ecreate(struct kvm_vcpu * vcpu)216 static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
217 {
218 gva_t pageinfo_gva, secs_gva;
219 gva_t metadata_gva, contents_gva;
220 gpa_t metadata_gpa, contents_gpa, secs_gpa;
221 unsigned long metadata_hva, contents_hva, secs_hva;
222 struct sgx_pageinfo pageinfo;
223 struct sgx_secs *contents;
224 struct x86_exception ex;
225 int r;
226
227 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
228 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
229 return 1;
230
231 /*
232 * Copy the PAGEINFO to local memory, its pointers need to be
233 * translated, i.e. we need to do a deep copy/translate.
234 */
235 r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
236 sizeof(pageinfo), &ex);
237 if (r == X86EMUL_PROPAGATE_FAULT) {
238 kvm_inject_emulated_page_fault(vcpu, &ex);
239 return 1;
240 } else if (r != X86EMUL_CONTINUE) {
241 sgx_handle_emulation_failure(vcpu, pageinfo_gva,
242 sizeof(pageinfo));
243 return 0;
244 }
245
246 if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
247 sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
248 &contents_gva))
249 return 1;
250
251 /*
252 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
253 * Resume the guest on failure to inject a #PF.
254 */
255 if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
256 sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
257 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
258 return 1;
259
260 /*
261 * ...and then to HVA. The order of accesses isn't architectural, i.e.
262 * KVM doesn't have to fully process one address at a time. Exit to
263 * userspace if a GPA is invalid.
264 */
265 if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
266 sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
267 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
268 return 0;
269
270 /*
271 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
272 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
273 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
274 * enforce restriction of access to the PROVISIONKEY.
275 */
276 contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
277 if (!contents)
278 return -ENOMEM;
279
280 /* Exit to userspace if copying from a host userspace address fails. */
281 if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
282 free_page((unsigned long)contents);
283 return 0;
284 }
285
286 pageinfo.metadata = metadata_hva;
287 pageinfo.contents = (u64)contents;
288
289 r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
290
291 free_page((unsigned long)contents);
292
293 return r;
294 }
295
handle_encls_einit(struct kvm_vcpu * vcpu)296 static int handle_encls_einit(struct kvm_vcpu *vcpu)
297 {
298 unsigned long sig_hva, secs_hva, token_hva, rflags;
299 struct vcpu_vmx *vmx = to_vmx(vcpu);
300 gva_t sig_gva, secs_gva, token_gva;
301 gpa_t sig_gpa, secs_gpa, token_gpa;
302 int ret, trapnr;
303
304 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
305 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
306 sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
307 return 1;
308
309 /*
310 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
311 * Resume the guest on failure to inject a #PF.
312 */
313 if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
314 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
315 sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
316 return 1;
317
318 /*
319 * ...and then to HVA. The order of accesses isn't architectural, i.e.
320 * KVM doesn't have to fully process one address at a time. Exit to
321 * userspace if a GPA is invalid. Note, all structures are aligned and
322 * cannot split pages.
323 */
324 if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
325 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
326 sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
327 return 0;
328
329 ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
330 (void __user *)secs_hva,
331 vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
332
333 if (ret == -EFAULT)
334 return sgx_inject_fault(vcpu, secs_gva, trapnr);
335
336 /*
337 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
338 * @token_hva or @secs_hva. This should never happen as KVM checks host
339 * addresses at memslot creation. sgx_virt_einit() has already warned
340 * in this case, so just return.
341 */
342 if (ret < 0)
343 return ret;
344
345 rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
346 X86_EFLAGS_AF | X86_EFLAGS_SF |
347 X86_EFLAGS_OF);
348 if (ret)
349 rflags |= X86_EFLAGS_ZF;
350 else
351 rflags &= ~X86_EFLAGS_ZF;
352 vmx_set_rflags(vcpu, rflags);
353
354 kvm_rax_write(vcpu, ret);
355 return kvm_skip_emulated_instruction(vcpu);
356 }
357
encls_leaf_enabled_in_guest(struct kvm_vcpu * vcpu,u32 leaf)358 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
359 {
360 /*
361 * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will
362 * be reached if and only if the SGX1 leafs are enabled.
363 */
364 if (leaf >= ECREATE && leaf <= ETRACK)
365 return true;
366
367 if (leaf >= EAUG && leaf <= EMODT)
368 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
369
370 return false;
371 }
372
sgx_enabled_in_guest_bios(struct kvm_vcpu * vcpu)373 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
374 {
375 const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
376
377 return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
378 }
379
handle_encls(struct kvm_vcpu * vcpu)380 int handle_encls(struct kvm_vcpu *vcpu)
381 {
382 u32 leaf = (u32)kvm_rax_read(vcpu);
383
384 if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
385 !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
386 kvm_queue_exception(vcpu, UD_VECTOR);
387 } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
388 !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
389 kvm_inject_gp(vcpu, 0);
390 } else {
391 if (leaf == ECREATE)
392 return handle_encls_ecreate(vcpu);
393 if (leaf == EINIT)
394 return handle_encls_einit(vcpu);
395 WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf);
396 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
397 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
398 return 0;
399 }
400 return 1;
401 }
402
setup_default_sgx_lepubkeyhash(void)403 void setup_default_sgx_lepubkeyhash(void)
404 {
405 /*
406 * Use Intel's default value for Skylake hardware if Launch Control is
407 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
408 * Launch Control is supported and enabled, i.e. mimic the reset value
409 * and let the guest write the MSRs at will. If Launch Control is
410 * supported but disabled, then use the current MSR values as the hash
411 * MSRs exist but are read-only (locked and not writable).
412 */
413 if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
414 rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
415 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
416 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
417 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
418 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
419 } else {
420 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
421 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
422 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
423 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
424 }
425 }
426
vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu * vcpu)427 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
428 {
429 struct vcpu_vmx *vmx = to_vmx(vcpu);
430
431 memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
432 sizeof(sgx_pubkey_hash));
433 }
434
435 /*
436 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
437 * restrictions if the guest's allowed-1 settings diverge from hardware.
438 */
sgx_intercept_encls_ecreate(struct kvm_vcpu * vcpu)439 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
440 {
441 struct kvm_cpuid_entry2 *guest_cpuid;
442 u32 eax, ebx, ecx, edx;
443
444 if (!vcpu->kvm->arch.sgx_provisioning_allowed)
445 return true;
446
447 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
448 if (!guest_cpuid)
449 return true;
450
451 cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
452 if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
453 return true;
454
455 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
456 if (!guest_cpuid)
457 return true;
458
459 cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
460 if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
461 guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
462 return true;
463
464 return false;
465 }
466
vmx_write_encls_bitmap(struct kvm_vcpu * vcpu,struct vmcs12 * vmcs12)467 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
468 {
469 /*
470 * There is no software enable bit for SGX that is virtualized by
471 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
472 * guest (either by the host or by the guest's BIOS) but enabled in the
473 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
474 * the expected system behavior for ENCLS.
475 */
476 u64 bitmap = -1ull;
477
478 /* Nothing to do if hardware doesn't support SGX */
479 if (!cpu_has_vmx_encls_vmexit())
480 return;
481
482 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
483 sgx_enabled_in_guest_bios(vcpu)) {
484 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
485 bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
486 if (sgx_intercept_encls_ecreate(vcpu))
487 bitmap |= (1 << ECREATE);
488 }
489
490 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
491 bitmap &= ~GENMASK_ULL(EMODT, EAUG);
492
493 /*
494 * Trap and execute EINIT if launch control is enabled in the
495 * host using the guest's values for launch control MSRs, even
496 * if the guest's values are fixed to hardware default values.
497 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
498 * the MSRs is extraordinarily expensive.
499 */
500 if (boot_cpu_has(X86_FEATURE_SGX_LC))
501 bitmap |= (1 << EINIT);
502
503 if (!vmcs12 && is_guest_mode(vcpu))
504 vmcs12 = get_vmcs12(vcpu);
505 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
506 bitmap |= vmcs12->encls_exiting_bitmap;
507 }
508 vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
509 }
510