xref: /openbmc/linux/arch/x86/kernel/cpu/sgx/encl.c (revision e6486939)
1 // SPDX-License-Identifier: GPL-2.0
2 /*  Copyright(c) 2016-20 Intel Corporation. */
3 
4 #include <linux/lockdep.h>
5 #include <linux/mm.h>
6 #include <linux/mman.h>
7 #include <linux/shmem_fs.h>
8 #include <linux/suspend.h>
9 #include <linux/sched/mm.h>
10 #include <asm/sgx.h>
11 #include "encl.h"
12 #include "encls.h"
13 #include "sgx.h"
14 
15 /*
16  * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
17  * follow right after the EPC data in the backing storage. In addition to the
18  * visible enclave pages, there's one extra page slot for SECS, before PCMD
19  * structs.
20  */
21 static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
22 							    unsigned long page_index)
23 {
24 	pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
25 
26 	return epc_end_off + page_index * sizeof(struct sgx_pcmd);
27 }
28 
29 /*
30  * Free a page from the backing storage in the given page index.
31  */
32 static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
33 {
34 	struct inode *inode = file_inode(encl->backing);
35 
36 	shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
37 }
38 
39 /*
40  * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
41  * Pages" in the SDM.
42  */
43 static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
44 			   struct sgx_epc_page *epc_page,
45 			   struct sgx_epc_page *secs_page)
46 {
47 	unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
48 	struct sgx_encl *encl = encl_page->encl;
49 	pgoff_t page_index, page_pcmd_off;
50 	struct sgx_pageinfo pginfo;
51 	struct sgx_backing b;
52 	bool pcmd_page_empty;
53 	u8 *pcmd_page;
54 	int ret;
55 
56 	if (secs_page)
57 		page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
58 	else
59 		page_index = PFN_DOWN(encl->size);
60 
61 	page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
62 
63 	ret = sgx_encl_get_backing(encl, page_index, &b);
64 	if (ret)
65 		return ret;
66 
67 	pginfo.addr = encl_page->desc & PAGE_MASK;
68 	pginfo.contents = (unsigned long)kmap_atomic(b.contents);
69 	pcmd_page = kmap_atomic(b.pcmd);
70 	pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
71 
72 	if (secs_page)
73 		pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
74 	else
75 		pginfo.secs = 0;
76 
77 	ret = __eldu(&pginfo, sgx_get_epc_virt_addr(epc_page),
78 		     sgx_get_epc_virt_addr(encl_page->va_page->epc_page) + va_offset);
79 	if (ret) {
80 		if (encls_failed(ret))
81 			ENCLS_WARN(ret, "ELDU");
82 
83 		ret = -EFAULT;
84 	}
85 
86 	memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
87 
88 	/*
89 	 * The area for the PCMD in the page was zeroed above.  Check if the
90 	 * whole page is now empty meaning that all PCMD's have been zeroed:
91 	 */
92 	pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
93 
94 	kunmap_atomic(pcmd_page);
95 	kunmap_atomic((void *)(unsigned long)pginfo.contents);
96 
97 	sgx_encl_put_backing(&b, false);
98 
99 	sgx_encl_truncate_backing_page(encl, page_index);
100 
101 	if (pcmd_page_empty)
102 		sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
103 
104 	return ret;
105 }
106 
107 static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
108 					  struct sgx_epc_page *secs_page)
109 {
110 
111 	unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
112 	struct sgx_encl *encl = encl_page->encl;
113 	struct sgx_epc_page *epc_page;
114 	int ret;
115 
116 	epc_page = sgx_alloc_epc_page(encl_page, false);
117 	if (IS_ERR(epc_page))
118 		return epc_page;
119 
120 	ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
121 	if (ret) {
122 		sgx_encl_free_epc_page(epc_page);
123 		return ERR_PTR(ret);
124 	}
125 
126 	sgx_free_va_slot(encl_page->va_page, va_offset);
127 	list_move(&encl_page->va_page->list, &encl->va_pages);
128 	encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK;
129 	encl_page->epc_page = epc_page;
130 
131 	return epc_page;
132 }
133 
134 static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
135 						unsigned long addr,
136 						unsigned long vm_flags)
137 {
138 	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
139 	struct sgx_epc_page *epc_page;
140 	struct sgx_encl_page *entry;
141 
142 	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
143 	if (!entry)
144 		return ERR_PTR(-EFAULT);
145 
146 	/*
147 	 * Verify that the faulted page has equal or higher build time
148 	 * permissions than the VMA permissions (i.e. the subset of {VM_READ,
149 	 * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
150 	 */
151 	if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
152 		return ERR_PTR(-EFAULT);
153 
154 	/* Entry successfully located. */
155 	if (entry->epc_page) {
156 		if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)
157 			return ERR_PTR(-EBUSY);
158 
159 		return entry;
160 	}
161 
162 	if (!(encl->secs.epc_page)) {
163 		epc_page = sgx_encl_eldu(&encl->secs, NULL);
164 		if (IS_ERR(epc_page))
165 			return ERR_CAST(epc_page);
166 	}
167 
168 	epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
169 	if (IS_ERR(epc_page))
170 		return ERR_CAST(epc_page);
171 
172 	encl->secs_child_cnt++;
173 	sgx_mark_page_reclaimable(entry->epc_page);
174 
175 	return entry;
176 }
177 
178 static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
179 {
180 	unsigned long addr = (unsigned long)vmf->address;
181 	struct vm_area_struct *vma = vmf->vma;
182 	struct sgx_encl_page *entry;
183 	unsigned long phys_addr;
184 	struct sgx_encl *encl;
185 	vm_fault_t ret;
186 
187 	encl = vma->vm_private_data;
188 
189 	/*
190 	 * It's very unlikely but possible that allocating memory for the
191 	 * mm_list entry of a forked process failed in sgx_vma_open(). When
192 	 * this happens, vm_private_data is set to NULL.
193 	 */
194 	if (unlikely(!encl))
195 		return VM_FAULT_SIGBUS;
196 
197 	mutex_lock(&encl->lock);
198 
199 	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
200 	if (IS_ERR(entry)) {
201 		mutex_unlock(&encl->lock);
202 
203 		if (PTR_ERR(entry) == -EBUSY)
204 			return VM_FAULT_NOPAGE;
205 
206 		return VM_FAULT_SIGBUS;
207 	}
208 
209 	phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
210 
211 	ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
212 	if (ret != VM_FAULT_NOPAGE) {
213 		mutex_unlock(&encl->lock);
214 
215 		return VM_FAULT_SIGBUS;
216 	}
217 
218 	sgx_encl_test_and_clear_young(vma->vm_mm, entry);
219 	mutex_unlock(&encl->lock);
220 
221 	return VM_FAULT_NOPAGE;
222 }
223 
224 static void sgx_vma_open(struct vm_area_struct *vma)
225 {
226 	struct sgx_encl *encl = vma->vm_private_data;
227 
228 	/*
229 	 * It's possible but unlikely that vm_private_data is NULL. This can
230 	 * happen in a grandchild of a process, when sgx_encl_mm_add() had
231 	 * failed to allocate memory in this callback.
232 	 */
233 	if (unlikely(!encl))
234 		return;
235 
236 	if (sgx_encl_mm_add(encl, vma->vm_mm))
237 		vma->vm_private_data = NULL;
238 }
239 
240 
241 /**
242  * sgx_encl_may_map() - Check if a requested VMA mapping is allowed
243  * @encl:		an enclave pointer
244  * @start:		lower bound of the address range, inclusive
245  * @end:		upper bound of the address range, exclusive
246  * @vm_flags:		VMA flags
247  *
248  * Iterate through the enclave pages contained within [@start, @end) to verify
249  * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC}
250  * do not contain any permissions that are not contained in the build time
251  * permissions of any of the enclave pages within the given address range.
252  *
253  * An enclave creator must declare the strongest permissions that will be
254  * needed for each enclave page. This ensures that mappings have the identical
255  * or weaker permissions than the earlier declared permissions.
256  *
257  * Return: 0 on success, -EACCES otherwise
258  */
259 int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
260 		     unsigned long end, unsigned long vm_flags)
261 {
262 	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
263 	struct sgx_encl_page *page;
264 	unsigned long count = 0;
265 	int ret = 0;
266 
267 	XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
268 
269 	/*
270 	 * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
271 	 * conflict with the enclave page permissions.
272 	 */
273 	if (current->personality & READ_IMPLIES_EXEC)
274 		return -EACCES;
275 
276 	mutex_lock(&encl->lock);
277 	xas_lock(&xas);
278 	xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
279 		if (~page->vm_max_prot_bits & vm_prot_bits) {
280 			ret = -EACCES;
281 			break;
282 		}
283 
284 		/* Reschedule on every XA_CHECK_SCHED iteration. */
285 		if (!(++count % XA_CHECK_SCHED)) {
286 			xas_pause(&xas);
287 			xas_unlock(&xas);
288 			mutex_unlock(&encl->lock);
289 
290 			cond_resched();
291 
292 			mutex_lock(&encl->lock);
293 			xas_lock(&xas);
294 		}
295 	}
296 	xas_unlock(&xas);
297 	mutex_unlock(&encl->lock);
298 
299 	return ret;
300 }
301 
302 static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start,
303 			    unsigned long end, unsigned long newflags)
304 {
305 	return sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
306 }
307 
308 static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page,
309 			       unsigned long addr, void *data)
310 {
311 	unsigned long offset = addr & ~PAGE_MASK;
312 	int ret;
313 
314 
315 	ret = __edbgrd(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
316 	if (ret)
317 		return -EIO;
318 
319 	return 0;
320 }
321 
322 static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page,
323 				unsigned long addr, void *data)
324 {
325 	unsigned long offset = addr & ~PAGE_MASK;
326 	int ret;
327 
328 	ret = __edbgwr(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
329 	if (ret)
330 		return -EIO;
331 
332 	return 0;
333 }
334 
335 /*
336  * Load an enclave page to EPC if required, and take encl->lock.
337  */
338 static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
339 						   unsigned long addr,
340 						   unsigned long vm_flags)
341 {
342 	struct sgx_encl_page *entry;
343 
344 	for ( ; ; ) {
345 		mutex_lock(&encl->lock);
346 
347 		entry = sgx_encl_load_page(encl, addr, vm_flags);
348 		if (PTR_ERR(entry) != -EBUSY)
349 			break;
350 
351 		mutex_unlock(&encl->lock);
352 	}
353 
354 	if (IS_ERR(entry))
355 		mutex_unlock(&encl->lock);
356 
357 	return entry;
358 }
359 
360 static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
361 			  void *buf, int len, int write)
362 {
363 	struct sgx_encl *encl = vma->vm_private_data;
364 	struct sgx_encl_page *entry = NULL;
365 	char data[sizeof(unsigned long)];
366 	unsigned long align;
367 	int offset;
368 	int cnt;
369 	int ret = 0;
370 	int i;
371 
372 	/*
373 	 * If process was forked, VMA is still there but vm_private_data is set
374 	 * to NULL.
375 	 */
376 	if (!encl)
377 		return -EFAULT;
378 
379 	if (!test_bit(SGX_ENCL_DEBUG, &encl->flags))
380 		return -EFAULT;
381 
382 	for (i = 0; i < len; i += cnt) {
383 		entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK,
384 					      vma->vm_flags);
385 		if (IS_ERR(entry)) {
386 			ret = PTR_ERR(entry);
387 			break;
388 		}
389 
390 		align = ALIGN_DOWN(addr + i, sizeof(unsigned long));
391 		offset = (addr + i) & (sizeof(unsigned long) - 1);
392 		cnt = sizeof(unsigned long) - offset;
393 		cnt = min(cnt, len - i);
394 
395 		ret = sgx_encl_debug_read(encl, entry, align, data);
396 		if (ret)
397 			goto out;
398 
399 		if (write) {
400 			memcpy(data + offset, buf + i, cnt);
401 			ret = sgx_encl_debug_write(encl, entry, align, data);
402 			if (ret)
403 				goto out;
404 		} else {
405 			memcpy(buf + i, data + offset, cnt);
406 		}
407 
408 out:
409 		mutex_unlock(&encl->lock);
410 
411 		if (ret)
412 			break;
413 	}
414 
415 	return ret < 0 ? ret : i;
416 }
417 
418 const struct vm_operations_struct sgx_vm_ops = {
419 	.fault = sgx_vma_fault,
420 	.mprotect = sgx_vma_mprotect,
421 	.open = sgx_vma_open,
422 	.access = sgx_vma_access,
423 };
424 
425 /**
426  * sgx_encl_release - Destroy an enclave instance
427  * @ref:	address of a kref inside &sgx_encl
428  *
429  * Used together with kref_put(). Frees all the resources associated with the
430  * enclave and the instance itself.
431  */
432 void sgx_encl_release(struct kref *ref)
433 {
434 	struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
435 	struct sgx_va_page *va_page;
436 	struct sgx_encl_page *entry;
437 	unsigned long index;
438 
439 	xa_for_each(&encl->page_array, index, entry) {
440 		if (entry->epc_page) {
441 			/*
442 			 * The page and its radix tree entry cannot be freed
443 			 * if the page is being held by the reclaimer.
444 			 */
445 			if (sgx_unmark_page_reclaimable(entry->epc_page))
446 				continue;
447 
448 			sgx_encl_free_epc_page(entry->epc_page);
449 			encl->secs_child_cnt--;
450 			entry->epc_page = NULL;
451 		}
452 
453 		kfree(entry);
454 		/* Invoke scheduler to prevent soft lockups. */
455 		cond_resched();
456 	}
457 
458 	xa_destroy(&encl->page_array);
459 
460 	if (!encl->secs_child_cnt && encl->secs.epc_page) {
461 		sgx_encl_free_epc_page(encl->secs.epc_page);
462 		encl->secs.epc_page = NULL;
463 	}
464 
465 	while (!list_empty(&encl->va_pages)) {
466 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
467 					   list);
468 		list_del(&va_page->list);
469 		sgx_encl_free_epc_page(va_page->epc_page);
470 		kfree(va_page);
471 	}
472 
473 	if (encl->backing)
474 		fput(encl->backing);
475 
476 	cleanup_srcu_struct(&encl->srcu);
477 
478 	WARN_ON_ONCE(!list_empty(&encl->mm_list));
479 
480 	/* Detect EPC page leak's. */
481 	WARN_ON_ONCE(encl->secs_child_cnt);
482 	WARN_ON_ONCE(encl->secs.epc_page);
483 
484 	kfree(encl);
485 }
486 
487 /*
488  * 'mm' is exiting and no longer needs mmu notifications.
489  */
490 static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
491 				     struct mm_struct *mm)
492 {
493 	struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
494 	struct sgx_encl_mm *tmp = NULL;
495 
496 	/*
497 	 * The enclave itself can remove encl_mm.  Note, objects can't be moved
498 	 * off an RCU protected list, but deletion is ok.
499 	 */
500 	spin_lock(&encl_mm->encl->mm_lock);
501 	list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
502 		if (tmp == encl_mm) {
503 			list_del_rcu(&encl_mm->list);
504 			break;
505 		}
506 	}
507 	spin_unlock(&encl_mm->encl->mm_lock);
508 
509 	if (tmp == encl_mm) {
510 		synchronize_srcu(&encl_mm->encl->srcu);
511 		mmu_notifier_put(mn);
512 	}
513 }
514 
515 static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
516 {
517 	struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
518 
519 	/* 'encl_mm' is going away, put encl_mm->encl reference: */
520 	kref_put(&encl_mm->encl->refcount, sgx_encl_release);
521 
522 	kfree(encl_mm);
523 }
524 
525 static const struct mmu_notifier_ops sgx_mmu_notifier_ops = {
526 	.release		= sgx_mmu_notifier_release,
527 	.free_notifier		= sgx_mmu_notifier_free,
528 };
529 
530 static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
531 					    struct mm_struct *mm)
532 {
533 	struct sgx_encl_mm *encl_mm = NULL;
534 	struct sgx_encl_mm *tmp;
535 	int idx;
536 
537 	idx = srcu_read_lock(&encl->srcu);
538 
539 	list_for_each_entry_rcu(tmp, &encl->mm_list, list) {
540 		if (tmp->mm == mm) {
541 			encl_mm = tmp;
542 			break;
543 		}
544 	}
545 
546 	srcu_read_unlock(&encl->srcu, idx);
547 
548 	return encl_mm;
549 }
550 
551 int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
552 {
553 	struct sgx_encl_mm *encl_mm;
554 	int ret;
555 
556 	/*
557 	 * Even though a single enclave may be mapped into an mm more than once,
558 	 * each 'mm' only appears once on encl->mm_list. This is guaranteed by
559 	 * holding the mm's mmap lock for write before an mm can be added or
560 	 * remove to an encl->mm_list.
561 	 */
562 	mmap_assert_write_locked(mm);
563 
564 	/*
565 	 * It's possible that an entry already exists in the mm_list, because it
566 	 * is removed only on VFS release or process exit.
567 	 */
568 	if (sgx_encl_find_mm(encl, mm))
569 		return 0;
570 
571 	encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL);
572 	if (!encl_mm)
573 		return -ENOMEM;
574 
575 	/* Grab a refcount for the encl_mm->encl reference: */
576 	kref_get(&encl->refcount);
577 	encl_mm->encl = encl;
578 	encl_mm->mm = mm;
579 	encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
580 
581 	ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm);
582 	if (ret) {
583 		kfree(encl_mm);
584 		return ret;
585 	}
586 
587 	spin_lock(&encl->mm_lock);
588 	list_add_rcu(&encl_mm->list, &encl->mm_list);
589 	/* Pairs with smp_rmb() in sgx_reclaimer_block(). */
590 	smp_wmb();
591 	encl->mm_list_version++;
592 	spin_unlock(&encl->mm_lock);
593 
594 	return 0;
595 }
596 
597 static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
598 					      pgoff_t index)
599 {
600 	struct inode *inode = encl->backing->f_path.dentry->d_inode;
601 	struct address_space *mapping = inode->i_mapping;
602 	gfp_t gfpmask = mapping_gfp_mask(mapping);
603 
604 	return shmem_read_mapping_page_gfp(mapping, index, gfpmask);
605 }
606 
607 /**
608  * sgx_encl_get_backing() - Pin the backing storage
609  * @encl:	an enclave pointer
610  * @page_index:	enclave page index
611  * @backing:	data for accessing backing storage for the page
612  *
613  * Pin the backing storage pages for storing the encrypted contents and Paging
614  * Crypto MetaData (PCMD) of an enclave page.
615  *
616  * Return:
617  *   0 on success,
618  *   -errno otherwise.
619  */
620 int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
621 			 struct sgx_backing *backing)
622 {
623 	pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
624 	struct page *contents;
625 	struct page *pcmd;
626 
627 	contents = sgx_encl_get_backing_page(encl, page_index);
628 	if (IS_ERR(contents))
629 		return PTR_ERR(contents);
630 
631 	pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
632 	if (IS_ERR(pcmd)) {
633 		put_page(contents);
634 		return PTR_ERR(pcmd);
635 	}
636 
637 	backing->page_index = page_index;
638 	backing->contents = contents;
639 	backing->pcmd = pcmd;
640 	backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
641 
642 	return 0;
643 }
644 
645 /**
646  * sgx_encl_put_backing() - Unpin the backing storage
647  * @backing:	data for accessing backing storage for the page
648  * @do_write:	mark pages dirty
649  */
650 void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write)
651 {
652 	if (do_write) {
653 		set_page_dirty(backing->pcmd);
654 		set_page_dirty(backing->contents);
655 	}
656 
657 	put_page(backing->pcmd);
658 	put_page(backing->contents);
659 }
660 
661 static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr,
662 					    void *data)
663 {
664 	pte_t pte;
665 	int ret;
666 
667 	ret = pte_young(*ptep);
668 	if (ret) {
669 		pte = pte_mkold(*ptep);
670 		set_pte_at((struct mm_struct *)data, addr, ptep, pte);
671 	}
672 
673 	return ret;
674 }
675 
676 /**
677  * sgx_encl_test_and_clear_young() - Test and reset the accessed bit
678  * @mm:		mm_struct that is checked
679  * @page:	enclave page to be tested for recent access
680  *
681  * Checks the Access (A) bit from the PTE corresponding to the enclave page and
682  * clears it.
683  *
684  * Return: 1 if the page has been recently accessed and 0 if not.
685  */
686 int sgx_encl_test_and_clear_young(struct mm_struct *mm,
687 				  struct sgx_encl_page *page)
688 {
689 	unsigned long addr = page->desc & PAGE_MASK;
690 	struct sgx_encl *encl = page->encl;
691 	struct vm_area_struct *vma;
692 	int ret;
693 
694 	ret = sgx_encl_find(mm, addr, &vma);
695 	if (ret)
696 		return 0;
697 
698 	if (encl != vma->vm_private_data)
699 		return 0;
700 
701 	ret = apply_to_page_range(vma->vm_mm, addr, PAGE_SIZE,
702 				  sgx_encl_test_and_clear_young_cb, vma->vm_mm);
703 	if (ret < 0)
704 		return 0;
705 
706 	return ret;
707 }
708 
709 /**
710  * sgx_alloc_va_page() - Allocate a Version Array (VA) page
711  *
712  * Allocate a free EPC page and convert it to a Version Array (VA) page.
713  *
714  * Return:
715  *   a VA page,
716  *   -errno otherwise
717  */
718 struct sgx_epc_page *sgx_alloc_va_page(void)
719 {
720 	struct sgx_epc_page *epc_page;
721 	int ret;
722 
723 	epc_page = sgx_alloc_epc_page(NULL, true);
724 	if (IS_ERR(epc_page))
725 		return ERR_CAST(epc_page);
726 
727 	ret = __epa(sgx_get_epc_virt_addr(epc_page));
728 	if (ret) {
729 		WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
730 		sgx_encl_free_epc_page(epc_page);
731 		return ERR_PTR(-EFAULT);
732 	}
733 
734 	return epc_page;
735 }
736 
737 /**
738  * sgx_alloc_va_slot - allocate a VA slot
739  * @va_page:	a &struct sgx_va_page instance
740  *
741  * Allocates a slot from a &struct sgx_va_page instance.
742  *
743  * Return: offset of the slot inside the VA page
744  */
745 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page)
746 {
747 	int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
748 
749 	if (slot < SGX_VA_SLOT_COUNT)
750 		set_bit(slot, va_page->slots);
751 
752 	return slot << 3;
753 }
754 
755 /**
756  * sgx_free_va_slot - free a VA slot
757  * @va_page:	a &struct sgx_va_page instance
758  * @offset:	offset of the slot inside the VA page
759  *
760  * Frees a slot from a &struct sgx_va_page instance.
761  */
762 void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset)
763 {
764 	clear_bit(offset >> 3, va_page->slots);
765 }
766 
767 /**
768  * sgx_va_page_full - is the VA page full?
769  * @va_page:	a &struct sgx_va_page instance
770  *
771  * Return: true if all slots have been taken
772  */
773 bool sgx_va_page_full(struct sgx_va_page *va_page)
774 {
775 	int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
776 
777 	return slot == SGX_VA_SLOT_COUNT;
778 }
779 
780 /**
781  * sgx_encl_free_epc_page - free an EPC page assigned to an enclave
782  * @page:	EPC page to be freed
783  *
784  * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
785  * only upon success, it puts the page back to free page list.  Otherwise, it
786  * gives a WARNING to indicate page is leaked.
787  */
788 void sgx_encl_free_epc_page(struct sgx_epc_page *page)
789 {
790 	int ret;
791 
792 	WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
793 
794 	ret = __eremove(sgx_get_epc_virt_addr(page));
795 	if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
796 		return;
797 
798 	sgx_free_epc_page(page);
799 }
800