xref: /openbmc/linux/arch/x86/kernel/cpu/sgx/ioctl.c (revision f2d8e15b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*  Copyright(c) 2016-20 Intel Corporation. */
3 
4 #include <asm/mman.h>
5 #include <asm/sgx.h>
6 #include <linux/mman.h>
7 #include <linux/delay.h>
8 #include <linux/file.h>
9 #include <linux/hashtable.h>
10 #include <linux/highmem.h>
11 #include <linux/ratelimit.h>
12 #include <linux/sched/signal.h>
13 #include <linux/shmem_fs.h>
14 #include <linux/slab.h>
15 #include <linux/suspend.h>
16 #include "driver.h"
17 #include "encl.h"
18 #include "encls.h"
19 
20 struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim)
21 {
22 	struct sgx_va_page *va_page = NULL;
23 	void *err;
24 
25 	BUILD_BUG_ON(SGX_VA_SLOT_COUNT !=
26 		(SGX_ENCL_PAGE_VA_OFFSET_MASK >> 3) + 1);
27 
28 	if (!(encl->page_cnt % SGX_VA_SLOT_COUNT)) {
29 		va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
30 		if (!va_page)
31 			return ERR_PTR(-ENOMEM);
32 
33 		va_page->epc_page = sgx_alloc_va_page(reclaim);
34 		if (IS_ERR(va_page->epc_page)) {
35 			err = ERR_CAST(va_page->epc_page);
36 			kfree(va_page);
37 			return err;
38 		}
39 
40 		WARN_ON_ONCE(encl->page_cnt % SGX_VA_SLOT_COUNT);
41 	}
42 	encl->page_cnt++;
43 	return va_page;
44 }
45 
46 void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
47 {
48 	encl->page_cnt--;
49 
50 	if (va_page) {
51 		sgx_encl_free_epc_page(va_page->epc_page);
52 		list_del(&va_page->list);
53 		kfree(va_page);
54 	}
55 }
56 
57 static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
58 {
59 	struct sgx_epc_page *secs_epc;
60 	struct sgx_va_page *va_page;
61 	struct sgx_pageinfo pginfo;
62 	struct sgx_secinfo secinfo;
63 	unsigned long encl_size;
64 	struct file *backing;
65 	long ret;
66 
67 	va_page = sgx_encl_grow(encl, true);
68 	if (IS_ERR(va_page))
69 		return PTR_ERR(va_page);
70 	else if (va_page)
71 		list_add(&va_page->list, &encl->va_pages);
72 	/* else the tail page of the VA page list had free slots. */
73 
74 	/* The extra page goes to SECS. */
75 	encl_size = secs->size + PAGE_SIZE;
76 
77 	backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
78 				   VM_NORESERVE);
79 	if (IS_ERR(backing)) {
80 		ret = PTR_ERR(backing);
81 		goto err_out_shrink;
82 	}
83 
84 	encl->backing = backing;
85 
86 	secs_epc = sgx_alloc_epc_page(&encl->secs, true);
87 	if (IS_ERR(secs_epc)) {
88 		ret = PTR_ERR(secs_epc);
89 		goto err_out_backing;
90 	}
91 
92 	encl->secs.epc_page = secs_epc;
93 
94 	pginfo.addr = 0;
95 	pginfo.contents = (unsigned long)secs;
96 	pginfo.metadata = (unsigned long)&secinfo;
97 	pginfo.secs = 0;
98 	memset(&secinfo, 0, sizeof(secinfo));
99 
100 	ret = __ecreate((void *)&pginfo, sgx_get_epc_virt_addr(secs_epc));
101 	if (ret) {
102 		ret = -EIO;
103 		goto err_out;
104 	}
105 
106 	if (secs->attributes & SGX_ATTR_DEBUG)
107 		set_bit(SGX_ENCL_DEBUG, &encl->flags);
108 
109 	encl->secs.encl = encl;
110 	encl->secs.type = SGX_PAGE_TYPE_SECS;
111 	encl->base = secs->base;
112 	encl->size = secs->size;
113 	encl->attributes = secs->attributes;
114 	encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
115 
116 	/* Set only after completion, as encl->lock has not been taken. */
117 	set_bit(SGX_ENCL_CREATED, &encl->flags);
118 
119 	return 0;
120 
121 err_out:
122 	sgx_encl_free_epc_page(encl->secs.epc_page);
123 	encl->secs.epc_page = NULL;
124 
125 err_out_backing:
126 	fput(encl->backing);
127 	encl->backing = NULL;
128 
129 err_out_shrink:
130 	sgx_encl_shrink(encl, va_page);
131 
132 	return ret;
133 }
134 
135 /**
136  * sgx_ioc_enclave_create() - handler for %SGX_IOC_ENCLAVE_CREATE
137  * @encl:	An enclave pointer.
138  * @arg:	The ioctl argument.
139  *
140  * Allocate kernel data structures for the enclave and invoke ECREATE.
141  *
142  * Return:
143  * - 0:		Success.
144  * - -EIO:	ECREATE failed.
145  * - -errno:	POSIX error.
146  */
147 static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
148 {
149 	struct sgx_enclave_create create_arg;
150 	void *secs;
151 	int ret;
152 
153 	if (test_bit(SGX_ENCL_CREATED, &encl->flags))
154 		return -EINVAL;
155 
156 	if (copy_from_user(&create_arg, arg, sizeof(create_arg)))
157 		return -EFAULT;
158 
159 	secs = kmalloc(PAGE_SIZE, GFP_KERNEL);
160 	if (!secs)
161 		return -ENOMEM;
162 
163 	if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE))
164 		ret = -EFAULT;
165 	else
166 		ret = sgx_encl_create(encl, secs);
167 
168 	kfree(secs);
169 	return ret;
170 }
171 
172 static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
173 {
174 	u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
175 	u64 pt   = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK;
176 
177 	if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS)
178 		return -EINVAL;
179 
180 	if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R))
181 		return -EINVAL;
182 
183 	/*
184 	 * CPU will silently overwrite the permissions as zero, which means
185 	 * that we need to validate it ourselves.
186 	 */
187 	if (pt == SGX_SECINFO_TCS && perm)
188 		return -EINVAL;
189 
190 	if (secinfo->flags & SGX_SECINFO_RESERVED_MASK)
191 		return -EINVAL;
192 
193 	if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved)))
194 		return -EINVAL;
195 
196 	return 0;
197 }
198 
199 static int __sgx_encl_add_page(struct sgx_encl *encl,
200 			       struct sgx_encl_page *encl_page,
201 			       struct sgx_epc_page *epc_page,
202 			       struct sgx_secinfo *secinfo, unsigned long src)
203 {
204 	struct sgx_pageinfo pginfo;
205 	struct vm_area_struct *vma;
206 	struct page *src_page;
207 	int ret;
208 
209 	/* Deny noexec. */
210 	vma = find_vma(current->mm, src);
211 	if (!vma)
212 		return -EFAULT;
213 
214 	if (!(vma->vm_flags & VM_MAYEXEC))
215 		return -EACCES;
216 
217 	ret = get_user_pages(src, 1, 0, &src_page, NULL);
218 	if (ret < 1)
219 		return -EFAULT;
220 
221 	pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
222 	pginfo.addr = encl_page->desc & PAGE_MASK;
223 	pginfo.metadata = (unsigned long)secinfo;
224 	pginfo.contents = (unsigned long)kmap_atomic(src_page);
225 
226 	ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
227 
228 	kunmap_atomic((void *)pginfo.contents);
229 	put_page(src_page);
230 
231 	return ret ? -EIO : 0;
232 }
233 
234 /*
235  * If the caller requires measurement of the page as a proof for the content,
236  * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
237  * operation until the entire page is measured."
238  */
239 static int __sgx_encl_extend(struct sgx_encl *encl,
240 			     struct sgx_epc_page *epc_page)
241 {
242 	unsigned long offset;
243 	int ret;
244 
245 	for (offset = 0; offset < PAGE_SIZE; offset += SGX_EEXTEND_BLOCK_SIZE) {
246 		ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page),
247 				sgx_get_epc_virt_addr(epc_page) + offset);
248 		if (ret) {
249 			if (encls_failed(ret))
250 				ENCLS_WARN(ret, "EEXTEND");
251 
252 			return -EIO;
253 		}
254 	}
255 
256 	return 0;
257 }
258 
259 static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
260 			     unsigned long offset, struct sgx_secinfo *secinfo,
261 			     unsigned long flags)
262 {
263 	struct sgx_encl_page *encl_page;
264 	struct sgx_epc_page *epc_page;
265 	struct sgx_va_page *va_page;
266 	int ret;
267 
268 	encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags);
269 	if (IS_ERR(encl_page))
270 		return PTR_ERR(encl_page);
271 
272 	epc_page = sgx_alloc_epc_page(encl_page, true);
273 	if (IS_ERR(epc_page)) {
274 		kfree(encl_page);
275 		return PTR_ERR(epc_page);
276 	}
277 
278 	va_page = sgx_encl_grow(encl, true);
279 	if (IS_ERR(va_page)) {
280 		ret = PTR_ERR(va_page);
281 		goto err_out_free;
282 	}
283 
284 	mmap_read_lock(current->mm);
285 	mutex_lock(&encl->lock);
286 
287 	/*
288 	 * Adding to encl->va_pages must be done under encl->lock.  Ditto for
289 	 * deleting (via sgx_encl_shrink()) in the error path.
290 	 */
291 	if (va_page)
292 		list_add(&va_page->list, &encl->va_pages);
293 
294 	/*
295 	 * Insert prior to EADD in case of OOM.  EADD modifies MRENCLAVE, i.e.
296 	 * can't be gracefully unwound, while failure on EADD/EXTEND is limited
297 	 * to userspace errors (or kernel/hardware bugs).
298 	 */
299 	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
300 			encl_page, GFP_KERNEL);
301 	if (ret)
302 		goto err_out_unlock;
303 
304 	ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
305 				  src);
306 	if (ret)
307 		goto err_out;
308 
309 	/*
310 	 * Complete the "add" before doing the "extend" so that the "add"
311 	 * isn't in a half-baked state in the extremely unlikely scenario
312 	 * the enclave will be destroyed in response to EEXTEND failure.
313 	 */
314 	encl_page->encl = encl;
315 	encl_page->epc_page = epc_page;
316 	encl_page->type = (secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK) >> 8;
317 	encl->secs_child_cnt++;
318 
319 	if (flags & SGX_PAGE_MEASURE) {
320 		ret = __sgx_encl_extend(encl, epc_page);
321 		if (ret)
322 			goto err_out;
323 	}
324 
325 	sgx_mark_page_reclaimable(encl_page->epc_page);
326 	mutex_unlock(&encl->lock);
327 	mmap_read_unlock(current->mm);
328 	return ret;
329 
330 err_out:
331 	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
332 
333 err_out_unlock:
334 	sgx_encl_shrink(encl, va_page);
335 	mutex_unlock(&encl->lock);
336 	mmap_read_unlock(current->mm);
337 
338 err_out_free:
339 	sgx_encl_free_epc_page(epc_page);
340 	kfree(encl_page);
341 
342 	return ret;
343 }
344 
345 /*
346  * Ensure user provided offset and length values are valid for
347  * an enclave.
348  */
349 static int sgx_validate_offset_length(struct sgx_encl *encl,
350 				      unsigned long offset,
351 				      unsigned long length)
352 {
353 	if (!IS_ALIGNED(offset, PAGE_SIZE))
354 		return -EINVAL;
355 
356 	if (!length || !IS_ALIGNED(length, PAGE_SIZE))
357 		return -EINVAL;
358 
359 	if (offset + length - PAGE_SIZE >= encl->size)
360 		return -EINVAL;
361 
362 	return 0;
363 }
364 
365 /**
366  * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
367  * @encl:       an enclave pointer
368  * @arg:	a user pointer to a struct sgx_enclave_add_pages instance
369  *
370  * Add one or more pages to an uninitialized enclave, and optionally extend the
371  * measurement with the contents of the page. The SECINFO and measurement mask
372  * are applied to all pages.
373  *
374  * A SECINFO for a TCS is required to always contain zero permissions because
375  * CPU silently zeros them. Allowing anything else would cause a mismatch in
376  * the measurement.
377  *
378  * mmap()'s protection bits are capped by the page permissions. For each page
379  * address, the maximum protection bits are computed with the following
380  * heuristics:
381  *
382  * 1. A regular page: PROT_R, PROT_W and PROT_X match the SECINFO permissions.
383  * 2. A TCS page: PROT_R | PROT_W.
384  *
385  * mmap() is not allowed to surpass the minimum of the maximum protection bits
386  * within the given address range.
387  *
388  * The function deinitializes kernel data structures for enclave and returns
389  * -EIO in any of the following conditions:
390  *
391  * - Enclave Page Cache (EPC), the physical memory holding enclaves, has
392  *   been invalidated. This will cause EADD and EEXTEND to fail.
393  * - If the source address is corrupted somehow when executing EADD.
394  *
395  * Return:
396  * - 0:		Success.
397  * - -EACCES:	The source page is located in a noexec partition.
398  * - -ENOMEM:	Out of EPC pages.
399  * - -EINTR:	The call was interrupted before data was processed.
400  * - -EIO:	Either EADD or EEXTEND failed because invalid source address
401  *		or power cycle.
402  * - -errno:	POSIX error.
403  */
404 static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
405 {
406 	struct sgx_enclave_add_pages add_arg;
407 	struct sgx_secinfo secinfo;
408 	unsigned long c;
409 	int ret;
410 
411 	if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
412 	    test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
413 		return -EINVAL;
414 
415 	if (copy_from_user(&add_arg, arg, sizeof(add_arg)))
416 		return -EFAULT;
417 
418 	if (!IS_ALIGNED(add_arg.src, PAGE_SIZE))
419 		return -EINVAL;
420 
421 	if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length))
422 		return -EINVAL;
423 
424 	if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo,
425 			   sizeof(secinfo)))
426 		return -EFAULT;
427 
428 	if (sgx_validate_secinfo(&secinfo))
429 		return -EINVAL;
430 
431 	for (c = 0 ; c < add_arg.length; c += PAGE_SIZE) {
432 		if (signal_pending(current)) {
433 			if (!c)
434 				ret = -ERESTARTSYS;
435 
436 			break;
437 		}
438 
439 		if (need_resched())
440 			cond_resched();
441 
442 		ret = sgx_encl_add_page(encl, add_arg.src + c, add_arg.offset + c,
443 					&secinfo, add_arg.flags);
444 		if (ret)
445 			break;
446 	}
447 
448 	add_arg.count = c;
449 
450 	if (copy_to_user(arg, &add_arg, sizeof(add_arg)))
451 		return -EFAULT;
452 
453 	return ret;
454 }
455 
456 static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus,
457 			      void *hash)
458 {
459 	SHASH_DESC_ON_STACK(shash, tfm);
460 
461 	shash->tfm = tfm;
462 
463 	return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash);
464 }
465 
466 static int sgx_get_key_hash(const void *modulus, void *hash)
467 {
468 	struct crypto_shash *tfm;
469 	int ret;
470 
471 	tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC);
472 	if (IS_ERR(tfm))
473 		return PTR_ERR(tfm);
474 
475 	ret = __sgx_get_key_hash(tfm, modulus, hash);
476 
477 	crypto_free_shash(tfm);
478 	return ret;
479 }
480 
481 static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
482 			 void *token)
483 {
484 	u64 mrsigner[4];
485 	int i, j;
486 	void *addr;
487 	int ret;
488 
489 	/*
490 	 * Deny initializing enclaves with attributes (namely provisioning)
491 	 * that have not been explicitly allowed.
492 	 */
493 	if (encl->attributes & ~encl->attributes_mask)
494 		return -EACCES;
495 
496 	/*
497 	 * Attributes should not be enforced *only* against what's available on
498 	 * platform (done in sgx_encl_create) but checked and enforced against
499 	 * the mask for enforcement in sigstruct. For example an enclave could
500 	 * opt to sign with AVX bit in xfrm, but still be loadable on a platform
501 	 * without it if the sigstruct->body.attributes_mask does not turn that
502 	 * bit on.
503 	 */
504 	if (sigstruct->body.attributes & sigstruct->body.attributes_mask &
505 	    sgx_attributes_reserved_mask)
506 		return -EINVAL;
507 
508 	if (sigstruct->body.miscselect & sigstruct->body.misc_mask &
509 	    sgx_misc_reserved_mask)
510 		return -EINVAL;
511 
512 	if (sigstruct->body.xfrm & sigstruct->body.xfrm_mask &
513 	    sgx_xfrm_reserved_mask)
514 		return -EINVAL;
515 
516 	ret = sgx_get_key_hash(sigstruct->modulus, mrsigner);
517 	if (ret)
518 		return ret;
519 
520 	mutex_lock(&encl->lock);
521 
522 	/*
523 	 * ENCLS[EINIT] is interruptible because it has such a high latency,
524 	 * e.g. 50k+ cycles on success. If an IRQ/NMI/SMI becomes pending,
525 	 * EINIT may fail with SGX_UNMASKED_EVENT so that the event can be
526 	 * serviced.
527 	 */
528 	for (i = 0; i < SGX_EINIT_SLEEP_COUNT; i++) {
529 		for (j = 0; j < SGX_EINIT_SPIN_COUNT; j++) {
530 			addr = sgx_get_epc_virt_addr(encl->secs.epc_page);
531 
532 			preempt_disable();
533 
534 			sgx_update_lepubkeyhash(mrsigner);
535 
536 			ret = __einit(sigstruct, token, addr);
537 
538 			preempt_enable();
539 
540 			if (ret == SGX_UNMASKED_EVENT)
541 				continue;
542 			else
543 				break;
544 		}
545 
546 		if (ret != SGX_UNMASKED_EVENT)
547 			break;
548 
549 		msleep_interruptible(SGX_EINIT_SLEEP_TIME);
550 
551 		if (signal_pending(current)) {
552 			ret = -ERESTARTSYS;
553 			goto err_out;
554 		}
555 	}
556 
557 	if (encls_faulted(ret)) {
558 		if (encls_failed(ret))
559 			ENCLS_WARN(ret, "EINIT");
560 
561 		ret = -EIO;
562 	} else if (ret) {
563 		pr_debug("EINIT returned %d\n", ret);
564 		ret = -EPERM;
565 	} else {
566 		set_bit(SGX_ENCL_INITIALIZED, &encl->flags);
567 	}
568 
569 err_out:
570 	mutex_unlock(&encl->lock);
571 	return ret;
572 }
573 
574 /**
575  * sgx_ioc_enclave_init() - handler for %SGX_IOC_ENCLAVE_INIT
576  * @encl:	an enclave pointer
577  * @arg:	userspace pointer to a struct sgx_enclave_init instance
578  *
579  * Flush any outstanding enqueued EADD operations and perform EINIT.  The
580  * Launch Enclave Public Key Hash MSRs are rewritten as necessary to match
581  * the enclave's MRSIGNER, which is caculated from the provided sigstruct.
582  *
583  * Return:
584  * - 0:		Success.
585  * - -EPERM:	Invalid SIGSTRUCT.
586  * - -EIO:	EINIT failed because of a power cycle.
587  * - -errno:	POSIX error.
588  */
589 static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
590 {
591 	struct sgx_sigstruct *sigstruct;
592 	struct sgx_enclave_init init_arg;
593 	void *token;
594 	int ret;
595 
596 	if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
597 	    test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
598 		return -EINVAL;
599 
600 	if (copy_from_user(&init_arg, arg, sizeof(init_arg)))
601 		return -EFAULT;
602 
603 	/*
604 	 * 'sigstruct' must be on a page boundary and 'token' on a 512 byte
605 	 * boundary.  kmalloc() will give this alignment when allocating
606 	 * PAGE_SIZE bytes.
607 	 */
608 	sigstruct = kmalloc(PAGE_SIZE, GFP_KERNEL);
609 	if (!sigstruct)
610 		return -ENOMEM;
611 
612 	token = (void *)((unsigned long)sigstruct + PAGE_SIZE / 2);
613 	memset(token, 0, SGX_LAUNCH_TOKEN_SIZE);
614 
615 	if (copy_from_user(sigstruct, (void __user *)init_arg.sigstruct,
616 			   sizeof(*sigstruct))) {
617 		ret = -EFAULT;
618 		goto out;
619 	}
620 
621 	/*
622 	 * A legacy field used with Intel signed enclaves. These used to mean
623 	 * regular and architectural enclaves. The CPU only accepts these values
624 	 * but they do not have any other meaning.
625 	 *
626 	 * Thus, reject any other values.
627 	 */
628 	if (sigstruct->header.vendor != 0x0000 &&
629 	    sigstruct->header.vendor != 0x8086) {
630 		ret = -EINVAL;
631 		goto out;
632 	}
633 
634 	ret = sgx_encl_init(encl, sigstruct, token);
635 
636 out:
637 	kfree(sigstruct);
638 	return ret;
639 }
640 
641 /**
642  * sgx_ioc_enclave_provision() - handler for %SGX_IOC_ENCLAVE_PROVISION
643  * @encl:	an enclave pointer
644  * @arg:	userspace pointer to a struct sgx_enclave_provision instance
645  *
646  * Allow ATTRIBUTE.PROVISION_KEY for an enclave by providing a file handle to
647  * /dev/sgx_provision.
648  *
649  * Return:
650  * - 0:		Success.
651  * - -errno:	Otherwise.
652  */
653 static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
654 {
655 	struct sgx_enclave_provision params;
656 
657 	if (copy_from_user(&params, arg, sizeof(params)))
658 		return -EFAULT;
659 
660 	return sgx_set_attribute(&encl->attributes_mask, params.fd);
661 }
662 
663 /*
664  * Ensure enclave is ready for SGX2 functions. Readiness is checked
665  * by ensuring the hardware supports SGX2 and the enclave is initialized
666  * and thus able to handle requests to modify pages within it.
667  */
668 static int sgx_ioc_sgx2_ready(struct sgx_encl *encl)
669 {
670 	if (!(cpu_feature_enabled(X86_FEATURE_SGX2)))
671 		return -ENODEV;
672 
673 	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
674 		return -EINVAL;
675 
676 	return 0;
677 }
678 
679 /*
680  * Some SGX functions require that no cached linear-to-physical address
681  * mappings are present before they can succeed. Collaborate with
682  * hardware via ENCLS[ETRACK] to ensure that all cached
683  * linear-to-physical address mappings belonging to all threads of
684  * the enclave are cleared. See sgx_encl_cpumask() for details.
685  *
686  * Must be called with enclave's mutex held from the time the
687  * SGX function requiring that no cached linear-to-physical mappings
688  * are present is executed until this ETRACK flow is complete.
689  */
690 static int sgx_enclave_etrack(struct sgx_encl *encl)
691 {
692 	void *epc_virt;
693 	int ret;
694 
695 	epc_virt = sgx_get_epc_virt_addr(encl->secs.epc_page);
696 	ret = __etrack(epc_virt);
697 	if (ret) {
698 		/*
699 		 * ETRACK only fails when there is an OS issue. For
700 		 * example, two consecutive ETRACK was sent without
701 		 * completed IPI between.
702 		 */
703 		pr_err_once("ETRACK returned %d (0x%x)", ret, ret);
704 		/*
705 		 * Send IPIs to kick CPUs out of the enclave and
706 		 * try ETRACK again.
707 		 */
708 		on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
709 		ret = __etrack(epc_virt);
710 		if (ret) {
711 			pr_err_once("ETRACK repeat returned %d (0x%x)",
712 				    ret, ret);
713 			return -EFAULT;
714 		}
715 	}
716 	on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
717 
718 	return 0;
719 }
720 
721 /**
722  * sgx_enclave_restrict_permissions() - Restrict EPCM permissions
723  * @encl:	Enclave to which the pages belong.
724  * @modp:	Checked parameters from user on which pages need modifying and
725  *              their new permissions.
726  *
727  * Return:
728  * - 0:		Success.
729  * - -errno:	Otherwise.
730  */
731 static long
732 sgx_enclave_restrict_permissions(struct sgx_encl *encl,
733 				 struct sgx_enclave_restrict_permissions *modp)
734 {
735 	struct sgx_encl_page *entry;
736 	struct sgx_secinfo secinfo;
737 	unsigned long addr;
738 	unsigned long c;
739 	void *epc_virt;
740 	int ret;
741 
742 	memset(&secinfo, 0, sizeof(secinfo));
743 	secinfo.flags = modp->permissions & SGX_SECINFO_PERMISSION_MASK;
744 
745 	for (c = 0 ; c < modp->length; c += PAGE_SIZE) {
746 		addr = encl->base + modp->offset + c;
747 
748 		sgx_reclaim_direct();
749 
750 		mutex_lock(&encl->lock);
751 
752 		entry = sgx_encl_load_page(encl, addr);
753 		if (IS_ERR(entry)) {
754 			ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
755 			goto out_unlock;
756 		}
757 
758 		/*
759 		 * Changing EPCM permissions is only supported on regular
760 		 * SGX pages. Attempting this change on other pages will
761 		 * result in #PF.
762 		 */
763 		if (entry->type != SGX_PAGE_TYPE_REG) {
764 			ret = -EINVAL;
765 			goto out_unlock;
766 		}
767 
768 		/*
769 		 * Apart from ensuring that read-access remains, do not verify
770 		 * the permission bits requested. Kernel has no control over
771 		 * how EPCM permissions can be relaxed from within the enclave.
772 		 * ENCLS[EMODPR] can only remove existing EPCM permissions,
773 		 * attempting to set new permissions will be ignored by the
774 		 * hardware.
775 		 */
776 
777 		/* Change EPCM permissions. */
778 		epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
779 		ret = __emodpr(&secinfo, epc_virt);
780 		if (encls_faulted(ret)) {
781 			/*
782 			 * All possible faults should be avoidable:
783 			 * parameters have been checked, will only change
784 			 * permissions of a regular page, and no concurrent
785 			 * SGX1/SGX2 ENCLS instructions since these
786 			 * are protected with mutex.
787 			 */
788 			pr_err_once("EMODPR encountered exception %d\n",
789 				    ENCLS_TRAPNR(ret));
790 			ret = -EFAULT;
791 			goto out_unlock;
792 		}
793 		if (encls_failed(ret)) {
794 			modp->result = ret;
795 			ret = -EFAULT;
796 			goto out_unlock;
797 		}
798 
799 		ret = sgx_enclave_etrack(encl);
800 		if (ret) {
801 			ret = -EFAULT;
802 			goto out_unlock;
803 		}
804 
805 		mutex_unlock(&encl->lock);
806 	}
807 
808 	ret = 0;
809 	goto out;
810 
811 out_unlock:
812 	mutex_unlock(&encl->lock);
813 out:
814 	modp->count = c;
815 
816 	return ret;
817 }
818 
819 /**
820  * sgx_ioc_enclave_restrict_permissions() - handler for
821  *                                        %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
822  * @encl:	an enclave pointer
823  * @arg:	userspace pointer to a &struct sgx_enclave_restrict_permissions
824  *		instance
825  *
826  * SGX2 distinguishes between relaxing and restricting the enclave page
827  * permissions maintained by the hardware (EPCM permissions) of pages
828  * belonging to an initialized enclave (after SGX_IOC_ENCLAVE_INIT).
829  *
830  * EPCM permissions cannot be restricted from within the enclave, the enclave
831  * requires the kernel to run the privileged level 0 instructions ENCLS[EMODPR]
832  * and ENCLS[ETRACK]. An attempt to relax EPCM permissions with this call
833  * will be ignored by the hardware.
834  *
835  * Return:
836  * - 0:		Success
837  * - -errno:	Otherwise
838  */
839 static long sgx_ioc_enclave_restrict_permissions(struct sgx_encl *encl,
840 						 void __user *arg)
841 {
842 	struct sgx_enclave_restrict_permissions params;
843 	long ret;
844 
845 	ret = sgx_ioc_sgx2_ready(encl);
846 	if (ret)
847 		return ret;
848 
849 	if (copy_from_user(&params, arg, sizeof(params)))
850 		return -EFAULT;
851 
852 	if (sgx_validate_offset_length(encl, params.offset, params.length))
853 		return -EINVAL;
854 
855 	if (params.permissions & ~SGX_SECINFO_PERMISSION_MASK)
856 		return -EINVAL;
857 
858 	/*
859 	 * Fail early if invalid permissions requested to prevent ENCLS[EMODPR]
860 	 * from faulting later when the CPU does the same check.
861 	 */
862 	if ((params.permissions & SGX_SECINFO_W) &&
863 	    !(params.permissions & SGX_SECINFO_R))
864 		return -EINVAL;
865 
866 	if (params.result || params.count)
867 		return -EINVAL;
868 
869 	ret = sgx_enclave_restrict_permissions(encl, &params);
870 
871 	if (copy_to_user(arg, &params, sizeof(params)))
872 		return -EFAULT;
873 
874 	return ret;
875 }
876 
877 /**
878  * sgx_enclave_modify_types() - Modify type of SGX enclave pages
879  * @encl:	Enclave to which the pages belong.
880  * @modt:	Checked parameters from user about which pages need modifying
881  *              and their new page type.
882  *
883  * Return:
884  * - 0:		Success
885  * - -errno:	Otherwise
886  */
887 static long sgx_enclave_modify_types(struct sgx_encl *encl,
888 				     struct sgx_enclave_modify_types *modt)
889 {
890 	unsigned long max_prot_restore;
891 	enum sgx_page_type page_type;
892 	struct sgx_encl_page *entry;
893 	struct sgx_secinfo secinfo;
894 	unsigned long prot;
895 	unsigned long addr;
896 	unsigned long c;
897 	void *epc_virt;
898 	int ret;
899 
900 	page_type = modt->page_type & SGX_PAGE_TYPE_MASK;
901 
902 	/*
903 	 * The only new page types allowed by hardware are PT_TCS and PT_TRIM.
904 	 */
905 	if (page_type != SGX_PAGE_TYPE_TCS && page_type != SGX_PAGE_TYPE_TRIM)
906 		return -EINVAL;
907 
908 	memset(&secinfo, 0, sizeof(secinfo));
909 
910 	secinfo.flags = page_type << 8;
911 
912 	for (c = 0 ; c < modt->length; c += PAGE_SIZE) {
913 		addr = encl->base + modt->offset + c;
914 
915 		sgx_reclaim_direct();
916 
917 		mutex_lock(&encl->lock);
918 
919 		entry = sgx_encl_load_page(encl, addr);
920 		if (IS_ERR(entry)) {
921 			ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
922 			goto out_unlock;
923 		}
924 
925 		/*
926 		 * Borrow the logic from the Intel SDM. Regular pages
927 		 * (SGX_PAGE_TYPE_REG) can change type to SGX_PAGE_TYPE_TCS
928 		 * or SGX_PAGE_TYPE_TRIM but TCS pages can only be trimmed.
929 		 * CET pages not supported yet.
930 		 */
931 		if (!(entry->type == SGX_PAGE_TYPE_REG ||
932 		      (entry->type == SGX_PAGE_TYPE_TCS &&
933 		       page_type == SGX_PAGE_TYPE_TRIM))) {
934 			ret = -EINVAL;
935 			goto out_unlock;
936 		}
937 
938 		max_prot_restore = entry->vm_max_prot_bits;
939 
940 		/*
941 		 * Once a regular page becomes a TCS page it cannot be
942 		 * changed back. So the maximum allowed protection reflects
943 		 * the TCS page that is always RW from kernel perspective but
944 		 * will be inaccessible from within enclave. Before doing
945 		 * so, do make sure that the new page type continues to
946 		 * respect the originally vetted page permissions.
947 		 */
948 		if (entry->type == SGX_PAGE_TYPE_REG &&
949 		    page_type == SGX_PAGE_TYPE_TCS) {
950 			if (~entry->vm_max_prot_bits & (VM_READ | VM_WRITE)) {
951 				ret = -EPERM;
952 				goto out_unlock;
953 			}
954 			prot = PROT_READ | PROT_WRITE;
955 			entry->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
956 
957 			/*
958 			 * Prevent page from being reclaimed while mutex
959 			 * is released.
960 			 */
961 			if (sgx_unmark_page_reclaimable(entry->epc_page)) {
962 				ret = -EAGAIN;
963 				goto out_entry_changed;
964 			}
965 
966 			/*
967 			 * Do not keep encl->lock because of dependency on
968 			 * mmap_lock acquired in sgx_zap_enclave_ptes().
969 			 */
970 			mutex_unlock(&encl->lock);
971 
972 			sgx_zap_enclave_ptes(encl, addr);
973 
974 			mutex_lock(&encl->lock);
975 
976 			sgx_mark_page_reclaimable(entry->epc_page);
977 		}
978 
979 		/* Change EPC type */
980 		epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
981 		ret = __emodt(&secinfo, epc_virt);
982 		if (encls_faulted(ret)) {
983 			/*
984 			 * All possible faults should be avoidable:
985 			 * parameters have been checked, will only change
986 			 * valid page types, and no concurrent
987 			 * SGX1/SGX2 ENCLS instructions since these are
988 			 * protected with mutex.
989 			 */
990 			pr_err_once("EMODT encountered exception %d\n",
991 				    ENCLS_TRAPNR(ret));
992 			ret = -EFAULT;
993 			goto out_entry_changed;
994 		}
995 		if (encls_failed(ret)) {
996 			modt->result = ret;
997 			ret = -EFAULT;
998 			goto out_entry_changed;
999 		}
1000 
1001 		ret = sgx_enclave_etrack(encl);
1002 		if (ret) {
1003 			ret = -EFAULT;
1004 			goto out_unlock;
1005 		}
1006 
1007 		entry->type = page_type;
1008 
1009 		mutex_unlock(&encl->lock);
1010 	}
1011 
1012 	ret = 0;
1013 	goto out;
1014 
1015 out_entry_changed:
1016 	entry->vm_max_prot_bits = max_prot_restore;
1017 out_unlock:
1018 	mutex_unlock(&encl->lock);
1019 out:
1020 	modt->count = c;
1021 
1022 	return ret;
1023 }
1024 
1025 /**
1026  * sgx_ioc_enclave_modify_types() - handler for %SGX_IOC_ENCLAVE_MODIFY_TYPES
1027  * @encl:	an enclave pointer
1028  * @arg:	userspace pointer to a &struct sgx_enclave_modify_types instance
1029  *
1030  * Ability to change the enclave page type supports the following use cases:
1031  *
1032  * * It is possible to add TCS pages to an enclave by changing the type of
1033  *   regular pages (%SGX_PAGE_TYPE_REG) to TCS (%SGX_PAGE_TYPE_TCS) pages.
1034  *   With this support the number of threads supported by an initialized
1035  *   enclave can be increased dynamically.
1036  *
1037  * * Regular or TCS pages can dynamically be removed from an initialized
1038  *   enclave by changing the page type to %SGX_PAGE_TYPE_TRIM. Changing the
1039  *   page type to %SGX_PAGE_TYPE_TRIM marks the page for removal with actual
1040  *   removal done by handler of %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() called
1041  *   after ENCLU[EACCEPT] is run on %SGX_PAGE_TYPE_TRIM page from within the
1042  *   enclave.
1043  *
1044  * Return:
1045  * - 0:		Success
1046  * - -errno:	Otherwise
1047  */
1048 static long sgx_ioc_enclave_modify_types(struct sgx_encl *encl,
1049 					 void __user *arg)
1050 {
1051 	struct sgx_enclave_modify_types params;
1052 	long ret;
1053 
1054 	ret = sgx_ioc_sgx2_ready(encl);
1055 	if (ret)
1056 		return ret;
1057 
1058 	if (copy_from_user(&params, arg, sizeof(params)))
1059 		return -EFAULT;
1060 
1061 	if (sgx_validate_offset_length(encl, params.offset, params.length))
1062 		return -EINVAL;
1063 
1064 	if (params.page_type & ~SGX_PAGE_TYPE_MASK)
1065 		return -EINVAL;
1066 
1067 	if (params.result || params.count)
1068 		return -EINVAL;
1069 
1070 	ret = sgx_enclave_modify_types(encl, &params);
1071 
1072 	if (copy_to_user(arg, &params, sizeof(params)))
1073 		return -EFAULT;
1074 
1075 	return ret;
1076 }
1077 
1078 /**
1079  * sgx_encl_remove_pages() - Remove trimmed pages from SGX enclave
1080  * @encl:	Enclave to which the pages belong
1081  * @params:	Checked parameters from user on which pages need to be removed
1082  *
1083  * Return:
1084  * - 0:		Success.
1085  * - -errno:	Otherwise.
1086  */
1087 static long sgx_encl_remove_pages(struct sgx_encl *encl,
1088 				  struct sgx_enclave_remove_pages *params)
1089 {
1090 	struct sgx_encl_page *entry;
1091 	struct sgx_secinfo secinfo;
1092 	unsigned long addr;
1093 	unsigned long c;
1094 	void *epc_virt;
1095 	int ret;
1096 
1097 	memset(&secinfo, 0, sizeof(secinfo));
1098 	secinfo.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
1099 
1100 	for (c = 0 ; c < params->length; c += PAGE_SIZE) {
1101 		addr = encl->base + params->offset + c;
1102 
1103 		sgx_reclaim_direct();
1104 
1105 		mutex_lock(&encl->lock);
1106 
1107 		entry = sgx_encl_load_page(encl, addr);
1108 		if (IS_ERR(entry)) {
1109 			ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
1110 			goto out_unlock;
1111 		}
1112 
1113 		if (entry->type != SGX_PAGE_TYPE_TRIM) {
1114 			ret = -EPERM;
1115 			goto out_unlock;
1116 		}
1117 
1118 		/*
1119 		 * ENCLS[EMODPR] is a no-op instruction used to inform if
1120 		 * ENCLU[EACCEPT] was run from within the enclave. If
1121 		 * ENCLS[EMODPR] is run with RWX on a trimmed page that is
1122 		 * not yet accepted then it will return
1123 		 * %SGX_PAGE_NOT_MODIFIABLE, after the trimmed page is
1124 		 * accepted the instruction will encounter a page fault.
1125 		 */
1126 		epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
1127 		ret = __emodpr(&secinfo, epc_virt);
1128 		if (!encls_faulted(ret) || ENCLS_TRAPNR(ret) != X86_TRAP_PF) {
1129 			ret = -EPERM;
1130 			goto out_unlock;
1131 		}
1132 
1133 		if (sgx_unmark_page_reclaimable(entry->epc_page)) {
1134 			ret = -EBUSY;
1135 			goto out_unlock;
1136 		}
1137 
1138 		/*
1139 		 * Do not keep encl->lock because of dependency on
1140 		 * mmap_lock acquired in sgx_zap_enclave_ptes().
1141 		 */
1142 		mutex_unlock(&encl->lock);
1143 
1144 		sgx_zap_enclave_ptes(encl, addr);
1145 
1146 		mutex_lock(&encl->lock);
1147 
1148 		sgx_encl_free_epc_page(entry->epc_page);
1149 		encl->secs_child_cnt--;
1150 		entry->epc_page = NULL;
1151 		xa_erase(&encl->page_array, PFN_DOWN(entry->desc));
1152 		sgx_encl_shrink(encl, NULL);
1153 		kfree(entry);
1154 
1155 		mutex_unlock(&encl->lock);
1156 	}
1157 
1158 	ret = 0;
1159 	goto out;
1160 
1161 out_unlock:
1162 	mutex_unlock(&encl->lock);
1163 out:
1164 	params->count = c;
1165 
1166 	return ret;
1167 }
1168 
1169 /**
1170  * sgx_ioc_enclave_remove_pages() - handler for %SGX_IOC_ENCLAVE_REMOVE_PAGES
1171  * @encl:	an enclave pointer
1172  * @arg:	userspace pointer to &struct sgx_enclave_remove_pages instance
1173  *
1174  * Final step of the flow removing pages from an initialized enclave. The
1175  * complete flow is:
1176  *
1177  * 1) User changes the type of the pages to be removed to %SGX_PAGE_TYPE_TRIM
1178  *    using the %SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl().
1179  * 2) User approves the page removal by running ENCLU[EACCEPT] from within
1180  *    the enclave.
1181  * 3) User initiates actual page removal using the
1182  *    %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() that is handled here.
1183  *
1184  * First remove any page table entries pointing to the page and then proceed
1185  * with the actual removal of the enclave page and data in support of it.
1186  *
1187  * VA pages are not affected by this removal. It is thus possible that the
1188  * enclave may end up with more VA pages than needed to support all its
1189  * pages.
1190  *
1191  * Return:
1192  * - 0:		Success
1193  * - -errno:	Otherwise
1194  */
1195 static long sgx_ioc_enclave_remove_pages(struct sgx_encl *encl,
1196 					 void __user *arg)
1197 {
1198 	struct sgx_enclave_remove_pages params;
1199 	long ret;
1200 
1201 	ret = sgx_ioc_sgx2_ready(encl);
1202 	if (ret)
1203 		return ret;
1204 
1205 	if (copy_from_user(&params, arg, sizeof(params)))
1206 		return -EFAULT;
1207 
1208 	if (sgx_validate_offset_length(encl, params.offset, params.length))
1209 		return -EINVAL;
1210 
1211 	if (params.count)
1212 		return -EINVAL;
1213 
1214 	ret = sgx_encl_remove_pages(encl, &params);
1215 
1216 	if (copy_to_user(arg, &params, sizeof(params)))
1217 		return -EFAULT;
1218 
1219 	return ret;
1220 }
1221 
1222 long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1223 {
1224 	struct sgx_encl *encl = filep->private_data;
1225 	int ret;
1226 
1227 	if (test_and_set_bit(SGX_ENCL_IOCTL, &encl->flags))
1228 		return -EBUSY;
1229 
1230 	switch (cmd) {
1231 	case SGX_IOC_ENCLAVE_CREATE:
1232 		ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
1233 		break;
1234 	case SGX_IOC_ENCLAVE_ADD_PAGES:
1235 		ret = sgx_ioc_enclave_add_pages(encl, (void __user *)arg);
1236 		break;
1237 	case SGX_IOC_ENCLAVE_INIT:
1238 		ret = sgx_ioc_enclave_init(encl, (void __user *)arg);
1239 		break;
1240 	case SGX_IOC_ENCLAVE_PROVISION:
1241 		ret = sgx_ioc_enclave_provision(encl, (void __user *)arg);
1242 		break;
1243 	case SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS:
1244 		ret = sgx_ioc_enclave_restrict_permissions(encl,
1245 							   (void __user *)arg);
1246 		break;
1247 	case SGX_IOC_ENCLAVE_MODIFY_TYPES:
1248 		ret = sgx_ioc_enclave_modify_types(encl, (void __user *)arg);
1249 		break;
1250 	case SGX_IOC_ENCLAVE_REMOVE_PAGES:
1251 		ret = sgx_ioc_enclave_remove_pages(encl, (void __user *)arg);
1252 		break;
1253 	default:
1254 		ret = -ENOIOCTLCMD;
1255 		break;
1256 	}
1257 
1258 	clear_bit(SGX_ENCL_IOCTL, &encl->flags);
1259 	return ret;
1260 }
1261