1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2016-20 Intel Corporation. */ 3 4 #include <linux/file.h> 5 #include <linux/freezer.h> 6 #include <linux/highmem.h> 7 #include <linux/kthread.h> 8 #include <linux/miscdevice.h> 9 #include <linux/pagemap.h> 10 #include <linux/ratelimit.h> 11 #include <linux/sched/mm.h> 12 #include <linux/sched/signal.h> 13 #include <linux/slab.h> 14 #include <asm/sgx.h> 15 #include "driver.h" 16 #include "encl.h" 17 #include "encls.h" 18 19 struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; 20 static int sgx_nr_epc_sections; 21 static struct task_struct *ksgxd_tsk; 22 static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); 23 24 /* 25 * These variables are part of the state of the reclaimer, and must be accessed 26 * with sgx_reclaimer_lock acquired. 27 */ 28 static LIST_HEAD(sgx_active_page_list); 29 static DEFINE_SPINLOCK(sgx_reclaimer_lock); 30 31 /* The free page list lock protected variables prepend the lock. */ 32 static unsigned long sgx_nr_free_pages; 33 34 /* Nodes with one or more EPC sections. */ 35 static nodemask_t sgx_numa_mask; 36 37 /* 38 * Array with one list_head for each possible NUMA node. Each 39 * list contains all the sgx_epc_section's which are on that 40 * node. 41 */ 42 static struct sgx_numa_node *sgx_numa_nodes; 43 44 static LIST_HEAD(sgx_dirty_page_list); 45 46 /* 47 * Reset post-kexec EPC pages to the uninitialized state. The pages are removed 48 * from the input list, and made available for the page allocator. SECS pages 49 * prepending their children in the input list are left intact. 50 */ 51 static void __sgx_sanitize_pages(struct list_head *dirty_page_list) 52 { 53 struct sgx_epc_page *page; 54 LIST_HEAD(dirty); 55 int ret; 56 57 /* dirty_page_list is thread-local, no need for a lock: */ 58 while (!list_empty(dirty_page_list)) { 59 if (kthread_should_stop()) 60 return; 61 62 page = list_first_entry(dirty_page_list, struct sgx_epc_page, list); 63 64 ret = __eremove(sgx_get_epc_virt_addr(page)); 65 if (!ret) { 66 /* 67 * page is now sanitized. Make it available via the SGX 68 * page allocator: 69 */ 70 list_del(&page->list); 71 sgx_free_epc_page(page); 72 } else { 73 /* The page is not yet clean - move to the dirty list. */ 74 list_move_tail(&page->list, &dirty); 75 } 76 77 cond_resched(); 78 } 79 80 list_splice(&dirty, dirty_page_list); 81 } 82 83 static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page) 84 { 85 struct sgx_encl_page *page = epc_page->owner; 86 struct sgx_encl *encl = page->encl; 87 struct sgx_encl_mm *encl_mm; 88 bool ret = true; 89 int idx; 90 91 idx = srcu_read_lock(&encl->srcu); 92 93 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { 94 if (!mmget_not_zero(encl_mm->mm)) 95 continue; 96 97 mmap_read_lock(encl_mm->mm); 98 ret = !sgx_encl_test_and_clear_young(encl_mm->mm, page); 99 mmap_read_unlock(encl_mm->mm); 100 101 mmput_async(encl_mm->mm); 102 103 if (!ret) 104 break; 105 } 106 107 srcu_read_unlock(&encl->srcu, idx); 108 109 if (!ret) 110 return false; 111 112 return true; 113 } 114 115 static void sgx_reclaimer_block(struct sgx_epc_page *epc_page) 116 { 117 struct sgx_encl_page *page = epc_page->owner; 118 unsigned long addr = page->desc & PAGE_MASK; 119 struct sgx_encl *encl = page->encl; 120 unsigned long mm_list_version; 121 struct sgx_encl_mm *encl_mm; 122 struct vm_area_struct *vma; 123 int idx, ret; 124 125 do { 126 mm_list_version = encl->mm_list_version; 127 128 /* Pairs with smp_rmb() in sgx_encl_mm_add(). */ 129 smp_rmb(); 130 131 idx = srcu_read_lock(&encl->srcu); 132 133 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { 134 if (!mmget_not_zero(encl_mm->mm)) 135 continue; 136 137 mmap_read_lock(encl_mm->mm); 138 139 ret = sgx_encl_find(encl_mm->mm, addr, &vma); 140 if (!ret && encl == vma->vm_private_data) 141 zap_vma_ptes(vma, addr, PAGE_SIZE); 142 143 mmap_read_unlock(encl_mm->mm); 144 145 mmput_async(encl_mm->mm); 146 } 147 148 srcu_read_unlock(&encl->srcu, idx); 149 } while (unlikely(encl->mm_list_version != mm_list_version)); 150 151 mutex_lock(&encl->lock); 152 153 ret = __eblock(sgx_get_epc_virt_addr(epc_page)); 154 if (encls_failed(ret)) 155 ENCLS_WARN(ret, "EBLOCK"); 156 157 mutex_unlock(&encl->lock); 158 } 159 160 static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot, 161 struct sgx_backing *backing) 162 { 163 struct sgx_pageinfo pginfo; 164 int ret; 165 166 pginfo.addr = 0; 167 pginfo.secs = 0; 168 169 pginfo.contents = (unsigned long)kmap_atomic(backing->contents); 170 pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) + 171 backing->pcmd_offset; 172 173 ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot); 174 175 kunmap_atomic((void *)(unsigned long)(pginfo.metadata - 176 backing->pcmd_offset)); 177 kunmap_atomic((void *)(unsigned long)pginfo.contents); 178 179 return ret; 180 } 181 182 static void sgx_ipi_cb(void *info) 183 { 184 } 185 186 static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl) 187 { 188 cpumask_t *cpumask = &encl->cpumask; 189 struct sgx_encl_mm *encl_mm; 190 int idx; 191 192 /* 193 * Can race with sgx_encl_mm_add(), but ETRACK has already been 194 * executed, which means that the CPUs running in the new mm will enter 195 * into the enclave with a fresh epoch. 196 */ 197 cpumask_clear(cpumask); 198 199 idx = srcu_read_lock(&encl->srcu); 200 201 list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { 202 if (!mmget_not_zero(encl_mm->mm)) 203 continue; 204 205 cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm)); 206 207 mmput_async(encl_mm->mm); 208 } 209 210 srcu_read_unlock(&encl->srcu, idx); 211 212 return cpumask; 213 } 214 215 /* 216 * Swap page to the regular memory transformed to the blocked state by using 217 * EBLOCK, which means that it can no longer be referenced (no new TLB entries). 218 * 219 * The first trial just tries to write the page assuming that some other thread 220 * has reset the count for threads inside the enclave by using ETRACK, and 221 * previous thread count has been zeroed out. The second trial calls ETRACK 222 * before EWB. If that fails we kick all the HW threads out, and then do EWB, 223 * which should be guaranteed the succeed. 224 */ 225 static void sgx_encl_ewb(struct sgx_epc_page *epc_page, 226 struct sgx_backing *backing) 227 { 228 struct sgx_encl_page *encl_page = epc_page->owner; 229 struct sgx_encl *encl = encl_page->encl; 230 struct sgx_va_page *va_page; 231 unsigned int va_offset; 232 void *va_slot; 233 int ret; 234 235 encl_page->desc &= ~SGX_ENCL_PAGE_BEING_RECLAIMED; 236 237 va_page = list_first_entry(&encl->va_pages, struct sgx_va_page, 238 list); 239 va_offset = sgx_alloc_va_slot(va_page); 240 va_slot = sgx_get_epc_virt_addr(va_page->epc_page) + va_offset; 241 if (sgx_va_page_full(va_page)) 242 list_move_tail(&va_page->list, &encl->va_pages); 243 244 ret = __sgx_encl_ewb(epc_page, va_slot, backing); 245 if (ret == SGX_NOT_TRACKED) { 246 ret = __etrack(sgx_get_epc_virt_addr(encl->secs.epc_page)); 247 if (ret) { 248 if (encls_failed(ret)) 249 ENCLS_WARN(ret, "ETRACK"); 250 } 251 252 ret = __sgx_encl_ewb(epc_page, va_slot, backing); 253 if (ret == SGX_NOT_TRACKED) { 254 /* 255 * Slow path, send IPIs to kick cpus out of the 256 * enclave. Note, it's imperative that the cpu 257 * mask is generated *after* ETRACK, else we'll 258 * miss cpus that entered the enclave between 259 * generating the mask and incrementing epoch. 260 */ 261 on_each_cpu_mask(sgx_encl_ewb_cpumask(encl), 262 sgx_ipi_cb, NULL, 1); 263 ret = __sgx_encl_ewb(epc_page, va_slot, backing); 264 } 265 } 266 267 if (ret) { 268 if (encls_failed(ret)) 269 ENCLS_WARN(ret, "EWB"); 270 271 sgx_free_va_slot(va_page, va_offset); 272 } else { 273 encl_page->desc |= va_offset; 274 encl_page->va_page = va_page; 275 } 276 } 277 278 static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, 279 struct sgx_backing *backing) 280 { 281 struct sgx_encl_page *encl_page = epc_page->owner; 282 struct sgx_encl *encl = encl_page->encl; 283 struct sgx_backing secs_backing; 284 int ret; 285 286 mutex_lock(&encl->lock); 287 288 sgx_encl_ewb(epc_page, backing); 289 encl_page->epc_page = NULL; 290 encl->secs_child_cnt--; 291 292 if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { 293 ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), 294 &secs_backing); 295 if (ret) 296 goto out; 297 298 sgx_encl_ewb(encl->secs.epc_page, &secs_backing); 299 300 sgx_encl_free_epc_page(encl->secs.epc_page); 301 encl->secs.epc_page = NULL; 302 303 sgx_encl_put_backing(&secs_backing, true); 304 } 305 306 out: 307 mutex_unlock(&encl->lock); 308 } 309 310 /* 311 * Take a fixed number of pages from the head of the active page pool and 312 * reclaim them to the enclave's private shmem files. Skip the pages, which have 313 * been accessed since the last scan. Move those pages to the tail of active 314 * page pool so that the pages get scanned in LRU like fashion. 315 * 316 * Batch process a chunk of pages (at the moment 16) in order to degrade amount 317 * of IPI's and ETRACK's potentially required. sgx_encl_ewb() does degrade a bit 318 * among the HW threads with three stage EWB pipeline (EWB, ETRACK + EWB and IPI 319 * + EWB) but not sufficiently. Reclaiming one page at a time would also be 320 * problematic as it would increase the lock contention too much, which would 321 * halt forward progress. 322 */ 323 static void sgx_reclaim_pages(void) 324 { 325 struct sgx_epc_page *chunk[SGX_NR_TO_SCAN]; 326 struct sgx_backing backing[SGX_NR_TO_SCAN]; 327 struct sgx_epc_section *section; 328 struct sgx_encl_page *encl_page; 329 struct sgx_epc_page *epc_page; 330 struct sgx_numa_node *node; 331 pgoff_t page_index; 332 int cnt = 0; 333 int ret; 334 int i; 335 336 spin_lock(&sgx_reclaimer_lock); 337 for (i = 0; i < SGX_NR_TO_SCAN; i++) { 338 if (list_empty(&sgx_active_page_list)) 339 break; 340 341 epc_page = list_first_entry(&sgx_active_page_list, 342 struct sgx_epc_page, list); 343 list_del_init(&epc_page->list); 344 encl_page = epc_page->owner; 345 346 if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) 347 chunk[cnt++] = epc_page; 348 else 349 /* The owner is freeing the page. No need to add the 350 * page back to the list of reclaimable pages. 351 */ 352 epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; 353 } 354 spin_unlock(&sgx_reclaimer_lock); 355 356 for (i = 0; i < cnt; i++) { 357 epc_page = chunk[i]; 358 encl_page = epc_page->owner; 359 360 if (!sgx_reclaimer_age(epc_page)) 361 goto skip; 362 363 page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); 364 ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); 365 if (ret) 366 goto skip; 367 368 mutex_lock(&encl_page->encl->lock); 369 encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; 370 mutex_unlock(&encl_page->encl->lock); 371 continue; 372 373 skip: 374 spin_lock(&sgx_reclaimer_lock); 375 list_add_tail(&epc_page->list, &sgx_active_page_list); 376 spin_unlock(&sgx_reclaimer_lock); 377 378 kref_put(&encl_page->encl->refcount, sgx_encl_release); 379 380 chunk[i] = NULL; 381 } 382 383 for (i = 0; i < cnt; i++) { 384 epc_page = chunk[i]; 385 if (epc_page) 386 sgx_reclaimer_block(epc_page); 387 } 388 389 for (i = 0; i < cnt; i++) { 390 epc_page = chunk[i]; 391 if (!epc_page) 392 continue; 393 394 encl_page = epc_page->owner; 395 sgx_reclaimer_write(epc_page, &backing[i]); 396 sgx_encl_put_backing(&backing[i], true); 397 398 kref_put(&encl_page->encl->refcount, sgx_encl_release); 399 epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; 400 401 section = &sgx_epc_sections[epc_page->section]; 402 node = section->node; 403 404 spin_lock(&node->lock); 405 list_add_tail(&epc_page->list, &node->free_page_list); 406 sgx_nr_free_pages++; 407 spin_unlock(&node->lock); 408 } 409 } 410 411 static bool sgx_should_reclaim(unsigned long watermark) 412 { 413 return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list); 414 } 415 416 static int ksgxd(void *p) 417 { 418 set_freezable(); 419 420 /* 421 * Sanitize pages in order to recover from kexec(). The 2nd pass is 422 * required for SECS pages, whose child pages blocked EREMOVE. 423 */ 424 __sgx_sanitize_pages(&sgx_dirty_page_list); 425 __sgx_sanitize_pages(&sgx_dirty_page_list); 426 427 /* sanity check: */ 428 WARN_ON(!list_empty(&sgx_dirty_page_list)); 429 430 while (!kthread_should_stop()) { 431 if (try_to_freeze()) 432 continue; 433 434 wait_event_freezable(ksgxd_waitq, 435 kthread_should_stop() || 436 sgx_should_reclaim(SGX_NR_HIGH_PAGES)); 437 438 if (sgx_should_reclaim(SGX_NR_HIGH_PAGES)) 439 sgx_reclaim_pages(); 440 441 cond_resched(); 442 } 443 444 return 0; 445 } 446 447 static bool __init sgx_page_reclaimer_init(void) 448 { 449 struct task_struct *tsk; 450 451 tsk = kthread_run(ksgxd, NULL, "ksgxd"); 452 if (IS_ERR(tsk)) 453 return false; 454 455 ksgxd_tsk = tsk; 456 457 return true; 458 } 459 460 static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) 461 { 462 struct sgx_numa_node *node = &sgx_numa_nodes[nid]; 463 struct sgx_epc_page *page = NULL; 464 465 spin_lock(&node->lock); 466 467 if (list_empty(&node->free_page_list)) { 468 spin_unlock(&node->lock); 469 return NULL; 470 } 471 472 page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); 473 list_del_init(&page->list); 474 sgx_nr_free_pages--; 475 476 spin_unlock(&node->lock); 477 478 return page; 479 } 480 481 /** 482 * __sgx_alloc_epc_page() - Allocate an EPC page 483 * 484 * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start 485 * from the NUMA node, where the caller is executing. 486 * 487 * Return: 488 * - an EPC page: A borrowed EPC pages were available. 489 * - NULL: Out of EPC pages. 490 */ 491 struct sgx_epc_page *__sgx_alloc_epc_page(void) 492 { 493 struct sgx_epc_page *page; 494 int nid_of_current = numa_node_id(); 495 int nid = nid_of_current; 496 497 if (node_isset(nid_of_current, sgx_numa_mask)) { 498 page = __sgx_alloc_epc_page_from_node(nid_of_current); 499 if (page) 500 return page; 501 } 502 503 /* Fall back to the non-local NUMA nodes: */ 504 while (true) { 505 nid = next_node_in(nid, sgx_numa_mask); 506 if (nid == nid_of_current) 507 break; 508 509 page = __sgx_alloc_epc_page_from_node(nid); 510 if (page) 511 return page; 512 } 513 514 return ERR_PTR(-ENOMEM); 515 } 516 517 /** 518 * sgx_mark_page_reclaimable() - Mark a page as reclaimable 519 * @page: EPC page 520 * 521 * Mark a page as reclaimable and add it to the active page list. Pages 522 * are automatically removed from the active list when freed. 523 */ 524 void sgx_mark_page_reclaimable(struct sgx_epc_page *page) 525 { 526 spin_lock(&sgx_reclaimer_lock); 527 page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED; 528 list_add_tail(&page->list, &sgx_active_page_list); 529 spin_unlock(&sgx_reclaimer_lock); 530 } 531 532 /** 533 * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list 534 * @page: EPC page 535 * 536 * Clear the reclaimable flag and remove the page from the active page list. 537 * 538 * Return: 539 * 0 on success, 540 * -EBUSY if the page is in the process of being reclaimed 541 */ 542 int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) 543 { 544 spin_lock(&sgx_reclaimer_lock); 545 if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) { 546 /* The page is being reclaimed. */ 547 if (list_empty(&page->list)) { 548 spin_unlock(&sgx_reclaimer_lock); 549 return -EBUSY; 550 } 551 552 list_del(&page->list); 553 page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; 554 } 555 spin_unlock(&sgx_reclaimer_lock); 556 557 return 0; 558 } 559 560 /** 561 * sgx_alloc_epc_page() - Allocate an EPC page 562 * @owner: the owner of the EPC page 563 * @reclaim: reclaim pages if necessary 564 * 565 * Iterate through EPC sections and borrow a free EPC page to the caller. When a 566 * page is no longer needed it must be released with sgx_free_epc_page(). If 567 * @reclaim is set to true, directly reclaim pages when we are out of pages. No 568 * mm's can be locked when @reclaim is set to true. 569 * 570 * Finally, wake up ksgxd when the number of pages goes below the watermark 571 * before returning back to the caller. 572 * 573 * Return: 574 * an EPC page, 575 * -errno on error 576 */ 577 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim) 578 { 579 struct sgx_epc_page *page; 580 581 for ( ; ; ) { 582 page = __sgx_alloc_epc_page(); 583 if (!IS_ERR(page)) { 584 page->owner = owner; 585 break; 586 } 587 588 if (list_empty(&sgx_active_page_list)) 589 return ERR_PTR(-ENOMEM); 590 591 if (!reclaim) { 592 page = ERR_PTR(-EBUSY); 593 break; 594 } 595 596 if (signal_pending(current)) { 597 page = ERR_PTR(-ERESTARTSYS); 598 break; 599 } 600 601 sgx_reclaim_pages(); 602 cond_resched(); 603 } 604 605 if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) 606 wake_up(&ksgxd_waitq); 607 608 return page; 609 } 610 611 /** 612 * sgx_free_epc_page() - Free an EPC page 613 * @page: an EPC page 614 * 615 * Put the EPC page back to the list of free pages. It's the caller's 616 * responsibility to make sure that the page is in uninitialized state. In other 617 * words, do EREMOVE, EWB or whatever operation is necessary before calling 618 * this function. 619 */ 620 void sgx_free_epc_page(struct sgx_epc_page *page) 621 { 622 struct sgx_epc_section *section = &sgx_epc_sections[page->section]; 623 struct sgx_numa_node *node = section->node; 624 625 spin_lock(&node->lock); 626 627 list_add_tail(&page->list, &node->free_page_list); 628 sgx_nr_free_pages++; 629 630 spin_unlock(&node->lock); 631 } 632 633 static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, 634 unsigned long index, 635 struct sgx_epc_section *section) 636 { 637 unsigned long nr_pages = size >> PAGE_SHIFT; 638 unsigned long i; 639 640 section->virt_addr = memremap(phys_addr, size, MEMREMAP_WB); 641 if (!section->virt_addr) 642 return false; 643 644 section->pages = vmalloc(nr_pages * sizeof(struct sgx_epc_page)); 645 if (!section->pages) { 646 memunmap(section->virt_addr); 647 return false; 648 } 649 650 section->phys_addr = phys_addr; 651 652 for (i = 0; i < nr_pages; i++) { 653 section->pages[i].section = index; 654 section->pages[i].flags = 0; 655 section->pages[i].owner = NULL; 656 list_add_tail(§ion->pages[i].list, &sgx_dirty_page_list); 657 } 658 659 return true; 660 } 661 662 /** 663 * A section metric is concatenated in a way that @low bits 12-31 define the 664 * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the 665 * metric. 666 */ 667 static inline u64 __init sgx_calc_section_metric(u64 low, u64 high) 668 { 669 return (low & GENMASK_ULL(31, 12)) + 670 ((high & GENMASK_ULL(19, 0)) << 32); 671 } 672 673 static bool __init sgx_page_cache_init(void) 674 { 675 u32 eax, ebx, ecx, edx, type; 676 u64 pa, size; 677 int nid; 678 int i; 679 680 sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL); 681 if (!sgx_numa_nodes) 682 return false; 683 684 for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) { 685 cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx); 686 687 type = eax & SGX_CPUID_EPC_MASK; 688 if (type == SGX_CPUID_EPC_INVALID) 689 break; 690 691 if (type != SGX_CPUID_EPC_SECTION) { 692 pr_err_once("Unknown EPC section type: %u\n", type); 693 break; 694 } 695 696 pa = sgx_calc_section_metric(eax, ebx); 697 size = sgx_calc_section_metric(ecx, edx); 698 699 pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1); 700 701 if (!sgx_setup_epc_section(pa, size, i, &sgx_epc_sections[i])) { 702 pr_err("No free memory for an EPC section\n"); 703 break; 704 } 705 706 nid = numa_map_to_online_node(phys_to_target_node(pa)); 707 if (nid == NUMA_NO_NODE) { 708 /* The physical address is already printed above. */ 709 pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n"); 710 nid = 0; 711 } 712 713 if (!node_isset(nid, sgx_numa_mask)) { 714 spin_lock_init(&sgx_numa_nodes[nid].lock); 715 INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list); 716 node_set(nid, sgx_numa_mask); 717 } 718 719 sgx_epc_sections[i].node = &sgx_numa_nodes[nid]; 720 721 sgx_nr_epc_sections++; 722 } 723 724 if (!sgx_nr_epc_sections) { 725 pr_err("There are zero EPC sections.\n"); 726 return false; 727 } 728 729 return true; 730 } 731 732 /* 733 * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller. 734 * Bare-metal driver requires to update them to hash of enclave's signer 735 * before EINIT. KVM needs to update them to guest's virtual MSR values 736 * before doing EINIT from guest. 737 */ 738 void sgx_update_lepubkeyhash(u64 *lepubkeyhash) 739 { 740 int i; 741 742 WARN_ON_ONCE(preemptible()); 743 744 for (i = 0; i < 4; i++) 745 wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]); 746 } 747 748 const struct file_operations sgx_provision_fops = { 749 .owner = THIS_MODULE, 750 }; 751 752 static struct miscdevice sgx_dev_provision = { 753 .minor = MISC_DYNAMIC_MINOR, 754 .name = "sgx_provision", 755 .nodename = "sgx_provision", 756 .fops = &sgx_provision_fops, 757 }; 758 759 /** 760 * sgx_set_attribute() - Update allowed attributes given file descriptor 761 * @allowed_attributes: Pointer to allowed enclave attributes 762 * @attribute_fd: File descriptor for specific attribute 763 * 764 * Append enclave attribute indicated by file descriptor to allowed 765 * attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by 766 * /dev/sgx_provision is supported. 767 * 768 * Return: 769 * -0: SGX_ATTR_PROVISIONKEY is appended to allowed_attributes 770 * -EINVAL: Invalid, or not supported file descriptor 771 */ 772 int sgx_set_attribute(unsigned long *allowed_attributes, 773 unsigned int attribute_fd) 774 { 775 struct file *file; 776 777 file = fget(attribute_fd); 778 if (!file) 779 return -EINVAL; 780 781 if (file->f_op != &sgx_provision_fops) { 782 fput(file); 783 return -EINVAL; 784 } 785 786 *allowed_attributes |= SGX_ATTR_PROVISIONKEY; 787 788 fput(file); 789 return 0; 790 } 791 EXPORT_SYMBOL_GPL(sgx_set_attribute); 792 793 static int __init sgx_init(void) 794 { 795 int ret; 796 int i; 797 798 if (!cpu_feature_enabled(X86_FEATURE_SGX)) 799 return -ENODEV; 800 801 if (!sgx_page_cache_init()) 802 return -ENOMEM; 803 804 if (!sgx_page_reclaimer_init()) { 805 ret = -ENOMEM; 806 goto err_page_cache; 807 } 808 809 ret = misc_register(&sgx_dev_provision); 810 if (ret) 811 goto err_kthread; 812 813 /* 814 * Always try to initialize the native *and* KVM drivers. 815 * The KVM driver is less picky than the native one and 816 * can function if the native one is not supported on the 817 * current system or fails to initialize. 818 * 819 * Error out only if both fail to initialize. 820 */ 821 ret = sgx_drv_init(); 822 823 if (sgx_vepc_init() && ret) 824 goto err_provision; 825 826 return 0; 827 828 err_provision: 829 misc_deregister(&sgx_dev_provision); 830 831 err_kthread: 832 kthread_stop(ksgxd_tsk); 833 834 err_page_cache: 835 for (i = 0; i < sgx_nr_epc_sections; i++) { 836 vfree(sgx_epc_sections[i].pages); 837 memunmap(sgx_epc_sections[i].virt_addr); 838 } 839 840 return ret; 841 } 842 843 device_initcall(sgx_init); 844