1 /* 2 * AMD Memory Encryption Support 3 * 4 * Copyright (C) 2016 Advanced Micro Devices, Inc. 5 * 6 * Author: Tom Lendacky <thomas.lendacky@amd.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #define DISABLE_BRANCH_PROFILING 14 15 #include <linux/linkage.h> 16 #include <linux/init.h> 17 #include <linux/mm.h> 18 #include <linux/dma-mapping.h> 19 #include <linux/swiotlb.h> 20 #include <linux/mem_encrypt.h> 21 22 #include <asm/tlbflush.h> 23 #include <asm/fixmap.h> 24 #include <asm/setup.h> 25 #include <asm/bootparam.h> 26 #include <asm/set_memory.h> 27 #include <asm/cacheflush.h> 28 #include <asm/sections.h> 29 #include <asm/processor-flags.h> 30 #include <asm/msr.h> 31 #include <asm/cmdline.h> 32 33 #include "mm_internal.h" 34 35 static char sme_cmdline_arg[] __initdata = "mem_encrypt"; 36 static char sme_cmdline_on[] __initdata = "on"; 37 static char sme_cmdline_off[] __initdata = "off"; 38 39 /* 40 * Since SME related variables are set early in the boot process they must 41 * reside in the .data section so as not to be zeroed out when the .bss 42 * section is later cleared. 43 */ 44 u64 sme_me_mask __section(.data) = 0; 45 EXPORT_SYMBOL(sme_me_mask); 46 DEFINE_STATIC_KEY_FALSE(sev_enable_key); 47 EXPORT_SYMBOL_GPL(sev_enable_key); 48 49 static bool sev_enabled __section(.data); 50 51 /* Buffer used for early in-place encryption by BSP, no locking needed */ 52 static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); 53 54 /* 55 * This routine does not change the underlying encryption setting of the 56 * page(s) that map this memory. It assumes that eventually the memory is 57 * meant to be accessed as either encrypted or decrypted but the contents 58 * are currently not in the desired state. 59 * 60 * This routine follows the steps outlined in the AMD64 Architecture 61 * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. 62 */ 63 static void __init __sme_early_enc_dec(resource_size_t paddr, 64 unsigned long size, bool enc) 65 { 66 void *src, *dst; 67 size_t len; 68 69 if (!sme_me_mask) 70 return; 71 72 wbinvd(); 73 74 /* 75 * There are limited number of early mapping slots, so map (at most) 76 * one page at time. 77 */ 78 while (size) { 79 len = min_t(size_t, sizeof(sme_early_buffer), size); 80 81 /* 82 * Create mappings for the current and desired format of 83 * the memory. Use a write-protected mapping for the source. 84 */ 85 src = enc ? early_memremap_decrypted_wp(paddr, len) : 86 early_memremap_encrypted_wp(paddr, len); 87 88 dst = enc ? early_memremap_encrypted(paddr, len) : 89 early_memremap_decrypted(paddr, len); 90 91 /* 92 * If a mapping can't be obtained to perform the operation, 93 * then eventual access of that area in the desired mode 94 * will cause a crash. 95 */ 96 BUG_ON(!src || !dst); 97 98 /* 99 * Use a temporary buffer, of cache-line multiple size, to 100 * avoid data corruption as documented in the APM. 101 */ 102 memcpy(sme_early_buffer, src, len); 103 memcpy(dst, sme_early_buffer, len); 104 105 early_memunmap(dst, len); 106 early_memunmap(src, len); 107 108 paddr += len; 109 size -= len; 110 } 111 } 112 113 void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) 114 { 115 __sme_early_enc_dec(paddr, size, true); 116 } 117 118 void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) 119 { 120 __sme_early_enc_dec(paddr, size, false); 121 } 122 123 static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, 124 bool map) 125 { 126 unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; 127 pmdval_t pmd_flags, pmd; 128 129 /* Use early_pmd_flags but remove the encryption mask */ 130 pmd_flags = __sme_clr(early_pmd_flags); 131 132 do { 133 pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; 134 __early_make_pgtable((unsigned long)vaddr, pmd); 135 136 vaddr += PMD_SIZE; 137 paddr += PMD_SIZE; 138 size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; 139 } while (size); 140 141 __native_flush_tlb(); 142 } 143 144 void __init sme_unmap_bootdata(char *real_mode_data) 145 { 146 struct boot_params *boot_data; 147 unsigned long cmdline_paddr; 148 149 if (!sme_active()) 150 return; 151 152 /* Get the command line address before unmapping the real_mode_data */ 153 boot_data = (struct boot_params *)real_mode_data; 154 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 155 156 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); 157 158 if (!cmdline_paddr) 159 return; 160 161 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); 162 } 163 164 void __init sme_map_bootdata(char *real_mode_data) 165 { 166 struct boot_params *boot_data; 167 unsigned long cmdline_paddr; 168 169 if (!sme_active()) 170 return; 171 172 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); 173 174 /* Get the command line address after mapping the real_mode_data */ 175 boot_data = (struct boot_params *)real_mode_data; 176 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 177 178 if (!cmdline_paddr) 179 return; 180 181 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); 182 } 183 184 void __init sme_early_init(void) 185 { 186 unsigned int i; 187 188 if (!sme_me_mask) 189 return; 190 191 early_pmd_flags = __sme_set(early_pmd_flags); 192 193 __supported_pte_mask = __sme_set(__supported_pte_mask); 194 195 /* Update the protection map with memory encryption mask */ 196 for (i = 0; i < ARRAY_SIZE(protection_map); i++) 197 protection_map[i] = pgprot_encrypted(protection_map[i]); 198 199 if (sev_active()) 200 swiotlb_force = SWIOTLB_FORCE; 201 } 202 203 static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 204 gfp_t gfp, unsigned long attrs) 205 { 206 unsigned long dma_mask; 207 unsigned int order; 208 struct page *page; 209 void *vaddr = NULL; 210 211 dma_mask = dma_alloc_coherent_mask(dev, gfp); 212 order = get_order(size); 213 214 /* 215 * Memory will be memset to zero after marking decrypted, so don't 216 * bother clearing it before. 217 */ 218 gfp &= ~__GFP_ZERO; 219 220 page = alloc_pages_node(dev_to_node(dev), gfp, order); 221 if (page) { 222 dma_addr_t addr; 223 224 /* 225 * Since we will be clearing the encryption bit, check the 226 * mask with it already cleared. 227 */ 228 addr = __sme_clr(phys_to_dma(dev, page_to_phys(page))); 229 if ((addr + size) > dma_mask) { 230 __free_pages(page, get_order(size)); 231 } else { 232 vaddr = page_address(page); 233 *dma_handle = addr; 234 } 235 } 236 237 if (!vaddr) 238 vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); 239 240 if (!vaddr) 241 return NULL; 242 243 /* Clear the SME encryption bit for DMA use if not swiotlb area */ 244 if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) { 245 set_memory_decrypted((unsigned long)vaddr, 1 << order); 246 memset(vaddr, 0, PAGE_SIZE << order); 247 *dma_handle = __sme_clr(*dma_handle); 248 } 249 250 return vaddr; 251 } 252 253 static void sev_free(struct device *dev, size_t size, void *vaddr, 254 dma_addr_t dma_handle, unsigned long attrs) 255 { 256 /* Set the SME encryption bit for re-use if not swiotlb area */ 257 if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle))) 258 set_memory_encrypted((unsigned long)vaddr, 259 1 << get_order(size)); 260 261 swiotlb_free_coherent(dev, size, vaddr, dma_handle); 262 } 263 264 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) 265 { 266 pgprot_t old_prot, new_prot; 267 unsigned long pfn, pa, size; 268 pte_t new_pte; 269 270 switch (level) { 271 case PG_LEVEL_4K: 272 pfn = pte_pfn(*kpte); 273 old_prot = pte_pgprot(*kpte); 274 break; 275 case PG_LEVEL_2M: 276 pfn = pmd_pfn(*(pmd_t *)kpte); 277 old_prot = pmd_pgprot(*(pmd_t *)kpte); 278 break; 279 case PG_LEVEL_1G: 280 pfn = pud_pfn(*(pud_t *)kpte); 281 old_prot = pud_pgprot(*(pud_t *)kpte); 282 break; 283 default: 284 return; 285 } 286 287 new_prot = old_prot; 288 if (enc) 289 pgprot_val(new_prot) |= _PAGE_ENC; 290 else 291 pgprot_val(new_prot) &= ~_PAGE_ENC; 292 293 /* If prot is same then do nothing. */ 294 if (pgprot_val(old_prot) == pgprot_val(new_prot)) 295 return; 296 297 pa = pfn << page_level_shift(level); 298 size = page_level_size(level); 299 300 /* 301 * We are going to perform in-place en-/decryption and change the 302 * physical page attribute from C=1 to C=0 or vice versa. Flush the 303 * caches to ensure that data gets accessed with the correct C-bit. 304 */ 305 clflush_cache_range(__va(pa), size); 306 307 /* Encrypt/decrypt the contents in-place */ 308 if (enc) 309 sme_early_encrypt(pa, size); 310 else 311 sme_early_decrypt(pa, size); 312 313 /* Change the page encryption mask. */ 314 new_pte = pfn_pte(pfn, new_prot); 315 set_pte_atomic(kpte, new_pte); 316 } 317 318 static int __init early_set_memory_enc_dec(unsigned long vaddr, 319 unsigned long size, bool enc) 320 { 321 unsigned long vaddr_end, vaddr_next; 322 unsigned long psize, pmask; 323 int split_page_size_mask; 324 int level, ret; 325 pte_t *kpte; 326 327 vaddr_next = vaddr; 328 vaddr_end = vaddr + size; 329 330 for (; vaddr < vaddr_end; vaddr = vaddr_next) { 331 kpte = lookup_address(vaddr, &level); 332 if (!kpte || pte_none(*kpte)) { 333 ret = 1; 334 goto out; 335 } 336 337 if (level == PG_LEVEL_4K) { 338 __set_clr_pte_enc(kpte, level, enc); 339 vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE; 340 continue; 341 } 342 343 psize = page_level_size(level); 344 pmask = page_level_mask(level); 345 346 /* 347 * Check whether we can change the large page in one go. 348 * We request a split when the address is not aligned and 349 * the number of pages to set/clear encryption bit is smaller 350 * than the number of pages in the large page. 351 */ 352 if (vaddr == (vaddr & pmask) && 353 ((vaddr_end - vaddr) >= psize)) { 354 __set_clr_pte_enc(kpte, level, enc); 355 vaddr_next = (vaddr & pmask) + psize; 356 continue; 357 } 358 359 /* 360 * The virtual address is part of a larger page, create the next 361 * level page table mapping (4K or 2M). If it is part of a 2M 362 * page then we request a split of the large page into 4K 363 * chunks. A 1GB large page is split into 2M pages, resp. 364 */ 365 if (level == PG_LEVEL_2M) 366 split_page_size_mask = 0; 367 else 368 split_page_size_mask = 1 << PG_LEVEL_2M; 369 370 kernel_physical_mapping_init(__pa(vaddr & pmask), 371 __pa((vaddr_end & pmask) + psize), 372 split_page_size_mask); 373 } 374 375 ret = 0; 376 377 out: 378 __flush_tlb_all(); 379 return ret; 380 } 381 382 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) 383 { 384 return early_set_memory_enc_dec(vaddr, size, false); 385 } 386 387 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) 388 { 389 return early_set_memory_enc_dec(vaddr, size, true); 390 } 391 392 /* 393 * SME and SEV are very similar but they are not the same, so there are 394 * times that the kernel will need to distinguish between SME and SEV. The 395 * sme_active() and sev_active() functions are used for this. When a 396 * distinction isn't needed, the mem_encrypt_active() function can be used. 397 * 398 * The trampoline code is a good example for this requirement. Before 399 * paging is activated, SME will access all memory as decrypted, but SEV 400 * will access all memory as encrypted. So, when APs are being brought 401 * up under SME the trampoline area cannot be encrypted, whereas under SEV 402 * the trampoline area must be encrypted. 403 */ 404 bool sme_active(void) 405 { 406 return sme_me_mask && !sev_enabled; 407 } 408 EXPORT_SYMBOL_GPL(sme_active); 409 410 bool sev_active(void) 411 { 412 return sme_me_mask && sev_enabled; 413 } 414 EXPORT_SYMBOL_GPL(sev_active); 415 416 static const struct dma_map_ops sev_dma_ops = { 417 .alloc = sev_alloc, 418 .free = sev_free, 419 .map_page = swiotlb_map_page, 420 .unmap_page = swiotlb_unmap_page, 421 .map_sg = swiotlb_map_sg_attrs, 422 .unmap_sg = swiotlb_unmap_sg_attrs, 423 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 424 .sync_single_for_device = swiotlb_sync_single_for_device, 425 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 426 .sync_sg_for_device = swiotlb_sync_sg_for_device, 427 .mapping_error = swiotlb_dma_mapping_error, 428 }; 429 430 /* Architecture __weak replacement functions */ 431 void __init mem_encrypt_init(void) 432 { 433 if (!sme_me_mask) 434 return; 435 436 /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ 437 swiotlb_update_mem_attributes(); 438 439 /* 440 * With SEV, DMA operations cannot use encryption. New DMA ops 441 * are required in order to mark the DMA areas as decrypted or 442 * to use bounce buffers. 443 */ 444 if (sev_active()) 445 dma_ops = &sev_dma_ops; 446 447 /* 448 * With SEV, we need to unroll the rep string I/O instructions. 449 */ 450 if (sev_active()) 451 static_branch_enable(&sev_enable_key); 452 453 pr_info("AMD %s active\n", 454 sev_active() ? "Secure Encrypted Virtualization (SEV)" 455 : "Secure Memory Encryption (SME)"); 456 } 457 458 void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) 459 { 460 WARN(PAGE_ALIGN(size) != size, 461 "size is not page-aligned (%#lx)\n", size); 462 463 /* Make the SWIOTLB buffer area decrypted */ 464 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); 465 } 466 467 static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, 468 unsigned long end) 469 { 470 unsigned long pgd_start, pgd_end, pgd_size; 471 pgd_t *pgd_p; 472 473 pgd_start = start & PGDIR_MASK; 474 pgd_end = end & PGDIR_MASK; 475 476 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); 477 pgd_size *= sizeof(pgd_t); 478 479 pgd_p = pgd_base + pgd_index(start); 480 481 memset(pgd_p, 0, pgd_size); 482 } 483 484 #define PGD_FLAGS _KERNPG_TABLE_NOENC 485 #define P4D_FLAGS _KERNPG_TABLE_NOENC 486 #define PUD_FLAGS _KERNPG_TABLE_NOENC 487 #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) 488 489 static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, 490 unsigned long vaddr, pmdval_t pmd_val) 491 { 492 pgd_t *pgd_p; 493 p4d_t *p4d_p; 494 pud_t *pud_p; 495 pmd_t *pmd_p; 496 497 pgd_p = pgd_base + pgd_index(vaddr); 498 if (native_pgd_val(*pgd_p)) { 499 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 500 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); 501 else 502 pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); 503 } else { 504 pgd_t pgd; 505 506 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 507 p4d_p = pgtable_area; 508 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); 509 pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; 510 511 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); 512 } else { 513 pud_p = pgtable_area; 514 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 515 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 516 517 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); 518 } 519 native_set_pgd(pgd_p, pgd); 520 } 521 522 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 523 p4d_p += p4d_index(vaddr); 524 if (native_p4d_val(*p4d_p)) { 525 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); 526 } else { 527 p4d_t p4d; 528 529 pud_p = pgtable_area; 530 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 531 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 532 533 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); 534 native_set_p4d(p4d_p, p4d); 535 } 536 } 537 538 pud_p += pud_index(vaddr); 539 if (native_pud_val(*pud_p)) { 540 if (native_pud_val(*pud_p) & _PAGE_PSE) 541 goto out; 542 543 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); 544 } else { 545 pud_t pud; 546 547 pmd_p = pgtable_area; 548 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); 549 pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; 550 551 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); 552 native_set_pud(pud_p, pud); 553 } 554 555 pmd_p += pmd_index(vaddr); 556 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) 557 native_set_pmd(pmd_p, native_make_pmd(pmd_val)); 558 559 out: 560 return pgtable_area; 561 } 562 563 static unsigned long __init sme_pgtable_calc(unsigned long len) 564 { 565 unsigned long p4d_size, pud_size, pmd_size; 566 unsigned long total; 567 568 /* 569 * Perform a relatively simplistic calculation of the pagetable 570 * entries that are needed. That mappings will be covered by 2MB 571 * PMD entries so we can conservatively calculate the required 572 * number of P4D, PUD and PMD structures needed to perform the 573 * mappings. Incrementing the count for each covers the case where 574 * the addresses cross entries. 575 */ 576 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 577 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; 578 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; 579 pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; 580 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 581 } else { 582 p4d_size = 0; 583 pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; 584 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 585 } 586 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; 587 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 588 589 total = p4d_size + pud_size + pmd_size; 590 591 /* 592 * Now calculate the added pagetable structures needed to populate 593 * the new pagetables. 594 */ 595 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 596 p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; 597 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; 598 pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; 599 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 600 } else { 601 p4d_size = 0; 602 pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; 603 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 604 } 605 pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; 606 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 607 608 total += p4d_size + pud_size + pmd_size; 609 610 return total; 611 } 612 613 void __init sme_encrypt_kernel(void) 614 { 615 unsigned long workarea_start, workarea_end, workarea_len; 616 unsigned long execute_start, execute_end, execute_len; 617 unsigned long kernel_start, kernel_end, kernel_len; 618 unsigned long pgtable_area_len; 619 unsigned long paddr, pmd_flags; 620 unsigned long decrypted_base; 621 void *pgtable_area; 622 pgd_t *pgd; 623 624 if (!sme_active()) 625 return; 626 627 /* 628 * Prepare for encrypting the kernel by building new pagetables with 629 * the necessary attributes needed to encrypt the kernel in place. 630 * 631 * One range of virtual addresses will map the memory occupied 632 * by the kernel as encrypted. 633 * 634 * Another range of virtual addresses will map the memory occupied 635 * by the kernel as decrypted and write-protected. 636 * 637 * The use of write-protect attribute will prevent any of the 638 * memory from being cached. 639 */ 640 641 /* Physical addresses gives us the identity mapped virtual addresses */ 642 kernel_start = __pa_symbol(_text); 643 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); 644 kernel_len = kernel_end - kernel_start; 645 646 /* Set the encryption workarea to be immediately after the kernel */ 647 workarea_start = kernel_end; 648 649 /* 650 * Calculate required number of workarea bytes needed: 651 * executable encryption area size: 652 * stack page (PAGE_SIZE) 653 * encryption routine page (PAGE_SIZE) 654 * intermediate copy buffer (PMD_PAGE_SIZE) 655 * pagetable structures for the encryption of the kernel 656 * pagetable structures for workarea (in case not currently mapped) 657 */ 658 execute_start = workarea_start; 659 execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; 660 execute_len = execute_end - execute_start; 661 662 /* 663 * One PGD for both encrypted and decrypted mappings and a set of 664 * PUDs and PMDs for each of the encrypted and decrypted mappings. 665 */ 666 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; 667 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; 668 669 /* PUDs and PMDs needed in the current pagetables for the workarea */ 670 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); 671 672 /* 673 * The total workarea includes the executable encryption area and 674 * the pagetable area. 675 */ 676 workarea_len = execute_len + pgtable_area_len; 677 workarea_end = workarea_start + workarea_len; 678 679 /* 680 * Set the address to the start of where newly created pagetable 681 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable 682 * structures are created when the workarea is added to the current 683 * pagetables and when the new encrypted and decrypted kernel 684 * mappings are populated. 685 */ 686 pgtable_area = (void *)execute_end; 687 688 /* 689 * Make sure the current pagetable structure has entries for 690 * addressing the workarea. 691 */ 692 pgd = (pgd_t *)native_read_cr3_pa(); 693 paddr = workarea_start; 694 while (paddr < workarea_end) { 695 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 696 paddr, 697 paddr + PMD_FLAGS); 698 699 paddr += PMD_PAGE_SIZE; 700 } 701 702 /* Flush the TLB - no globals so cr3 is enough */ 703 native_write_cr3(__native_read_cr3()); 704 705 /* 706 * A new pagetable structure is being built to allow for the kernel 707 * to be encrypted. It starts with an empty PGD that will then be 708 * populated with new PUDs and PMDs as the encrypted and decrypted 709 * kernel mappings are created. 710 */ 711 pgd = pgtable_area; 712 memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); 713 pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; 714 715 /* Add encrypted kernel (identity) mappings */ 716 pmd_flags = PMD_FLAGS | _PAGE_ENC; 717 paddr = kernel_start; 718 while (paddr < kernel_end) { 719 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 720 paddr, 721 paddr + pmd_flags); 722 723 paddr += PMD_PAGE_SIZE; 724 } 725 726 /* 727 * A different PGD index/entry must be used to get different 728 * pagetable entries for the decrypted mapping. Choose the next 729 * PGD index and convert it to a virtual address to be used as 730 * the base of the mapping. 731 */ 732 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); 733 decrypted_base <<= PGDIR_SHIFT; 734 735 /* Add decrypted, write-protected kernel (non-identity) mappings */ 736 pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); 737 paddr = kernel_start; 738 while (paddr < kernel_end) { 739 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 740 paddr + decrypted_base, 741 paddr + pmd_flags); 742 743 paddr += PMD_PAGE_SIZE; 744 } 745 746 /* Add decrypted workarea mappings to both kernel mappings */ 747 paddr = workarea_start; 748 while (paddr < workarea_end) { 749 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 750 paddr, 751 paddr + PMD_FLAGS); 752 753 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 754 paddr + decrypted_base, 755 paddr + PMD_FLAGS); 756 757 paddr += PMD_PAGE_SIZE; 758 } 759 760 /* Perform the encryption */ 761 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, 762 kernel_len, workarea_start, (unsigned long)pgd); 763 764 /* 765 * At this point we are running encrypted. Remove the mappings for 766 * the decrypted areas - all that is needed for this is to remove 767 * the PGD entry/entries. 768 */ 769 sme_clear_pgd(pgd, kernel_start + decrypted_base, 770 kernel_end + decrypted_base); 771 772 sme_clear_pgd(pgd, workarea_start + decrypted_base, 773 workarea_end + decrypted_base); 774 775 /* Flush the TLB - no globals so cr3 is enough */ 776 native_write_cr3(__native_read_cr3()); 777 } 778 779 void __init __nostackprotector sme_enable(struct boot_params *bp) 780 { 781 const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; 782 unsigned int eax, ebx, ecx, edx; 783 unsigned long feature_mask; 784 bool active_by_default; 785 unsigned long me_mask; 786 char buffer[16]; 787 u64 msr; 788 789 /* Check for the SME/SEV support leaf */ 790 eax = 0x80000000; 791 ecx = 0; 792 native_cpuid(&eax, &ebx, &ecx, &edx); 793 if (eax < 0x8000001f) 794 return; 795 796 #define AMD_SME_BIT BIT(0) 797 #define AMD_SEV_BIT BIT(1) 798 /* 799 * Set the feature mask (SME or SEV) based on whether we are 800 * running under a hypervisor. 801 */ 802 eax = 1; 803 ecx = 0; 804 native_cpuid(&eax, &ebx, &ecx, &edx); 805 feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; 806 807 /* 808 * Check for the SME/SEV feature: 809 * CPUID Fn8000_001F[EAX] 810 * - Bit 0 - Secure Memory Encryption support 811 * - Bit 1 - Secure Encrypted Virtualization support 812 * CPUID Fn8000_001F[EBX] 813 * - Bits 5:0 - Pagetable bit position used to indicate encryption 814 */ 815 eax = 0x8000001f; 816 ecx = 0; 817 native_cpuid(&eax, &ebx, &ecx, &edx); 818 if (!(eax & feature_mask)) 819 return; 820 821 me_mask = 1UL << (ebx & 0x3f); 822 823 /* Check if memory encryption is enabled */ 824 if (feature_mask == AMD_SME_BIT) { 825 /* For SME, check the SYSCFG MSR */ 826 msr = __rdmsr(MSR_K8_SYSCFG); 827 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) 828 return; 829 } else { 830 /* For SEV, check the SEV MSR */ 831 msr = __rdmsr(MSR_AMD64_SEV); 832 if (!(msr & MSR_AMD64_SEV_ENABLED)) 833 return; 834 835 /* SEV state cannot be controlled by a command line option */ 836 sme_me_mask = me_mask; 837 sev_enabled = true; 838 return; 839 } 840 841 /* 842 * Fixups have not been applied to phys_base yet and we're running 843 * identity mapped, so we must obtain the address to the SME command 844 * line argument data using rip-relative addressing. 845 */ 846 asm ("lea sme_cmdline_arg(%%rip), %0" 847 : "=r" (cmdline_arg) 848 : "p" (sme_cmdline_arg)); 849 asm ("lea sme_cmdline_on(%%rip), %0" 850 : "=r" (cmdline_on) 851 : "p" (sme_cmdline_on)); 852 asm ("lea sme_cmdline_off(%%rip), %0" 853 : "=r" (cmdline_off) 854 : "p" (sme_cmdline_off)); 855 856 if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) 857 active_by_default = true; 858 else 859 active_by_default = false; 860 861 cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | 862 ((u64)bp->ext_cmd_line_ptr << 32)); 863 864 cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); 865 866 if (!strncmp(buffer, cmdline_on, sizeof(buffer))) 867 sme_me_mask = me_mask; 868 else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) 869 sme_me_mask = 0; 870 else 871 sme_me_mask = active_by_default ? me_mask : 0; 872 } 873