1 /* 2 * AMD Memory Encryption Support 3 * 4 * Copyright (C) 2016 Advanced Micro Devices, Inc. 5 * 6 * Author: Tom Lendacky <thomas.lendacky@amd.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/linkage.h> 14 #include <linux/init.h> 15 #include <linux/mm.h> 16 #include <linux/dma-mapping.h> 17 #include <linux/swiotlb.h> 18 #include <linux/mem_encrypt.h> 19 20 #include <asm/tlbflush.h> 21 #include <asm/fixmap.h> 22 #include <asm/setup.h> 23 #include <asm/bootparam.h> 24 #include <asm/set_memory.h> 25 #include <asm/cacheflush.h> 26 #include <asm/sections.h> 27 #include <asm/processor-flags.h> 28 #include <asm/msr.h> 29 #include <asm/cmdline.h> 30 31 static char sme_cmdline_arg[] __initdata = "mem_encrypt"; 32 static char sme_cmdline_on[] __initdata = "on"; 33 static char sme_cmdline_off[] __initdata = "off"; 34 35 /* 36 * Since SME related variables are set early in the boot process they must 37 * reside in the .data section so as not to be zeroed out when the .bss 38 * section is later cleared. 39 */ 40 u64 sme_me_mask __section(.data) = 0; 41 EXPORT_SYMBOL_GPL(sme_me_mask); 42 43 /* Buffer used for early in-place encryption by BSP, no locking needed */ 44 static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); 45 46 /* 47 * This routine does not change the underlying encryption setting of the 48 * page(s) that map this memory. It assumes that eventually the memory is 49 * meant to be accessed as either encrypted or decrypted but the contents 50 * are currently not in the desired state. 51 * 52 * This routine follows the steps outlined in the AMD64 Architecture 53 * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. 54 */ 55 static void __init __sme_early_enc_dec(resource_size_t paddr, 56 unsigned long size, bool enc) 57 { 58 void *src, *dst; 59 size_t len; 60 61 if (!sme_me_mask) 62 return; 63 64 local_flush_tlb(); 65 wbinvd(); 66 67 /* 68 * There are limited number of early mapping slots, so map (at most) 69 * one page at time. 70 */ 71 while (size) { 72 len = min_t(size_t, sizeof(sme_early_buffer), size); 73 74 /* 75 * Create mappings for the current and desired format of 76 * the memory. Use a write-protected mapping for the source. 77 */ 78 src = enc ? early_memremap_decrypted_wp(paddr, len) : 79 early_memremap_encrypted_wp(paddr, len); 80 81 dst = enc ? early_memremap_encrypted(paddr, len) : 82 early_memremap_decrypted(paddr, len); 83 84 /* 85 * If a mapping can't be obtained to perform the operation, 86 * then eventual access of that area in the desired mode 87 * will cause a crash. 88 */ 89 BUG_ON(!src || !dst); 90 91 /* 92 * Use a temporary buffer, of cache-line multiple size, to 93 * avoid data corruption as documented in the APM. 94 */ 95 memcpy(sme_early_buffer, src, len); 96 memcpy(dst, sme_early_buffer, len); 97 98 early_memunmap(dst, len); 99 early_memunmap(src, len); 100 101 paddr += len; 102 size -= len; 103 } 104 } 105 106 void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) 107 { 108 __sme_early_enc_dec(paddr, size, true); 109 } 110 111 void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) 112 { 113 __sme_early_enc_dec(paddr, size, false); 114 } 115 116 static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, 117 bool map) 118 { 119 unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; 120 pmdval_t pmd_flags, pmd; 121 122 /* Use early_pmd_flags but remove the encryption mask */ 123 pmd_flags = __sme_clr(early_pmd_flags); 124 125 do { 126 pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; 127 __early_make_pgtable((unsigned long)vaddr, pmd); 128 129 vaddr += PMD_SIZE; 130 paddr += PMD_SIZE; 131 size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; 132 } while (size); 133 134 __native_flush_tlb(); 135 } 136 137 void __init sme_unmap_bootdata(char *real_mode_data) 138 { 139 struct boot_params *boot_data; 140 unsigned long cmdline_paddr; 141 142 if (!sme_active()) 143 return; 144 145 /* Get the command line address before unmapping the real_mode_data */ 146 boot_data = (struct boot_params *)real_mode_data; 147 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 148 149 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); 150 151 if (!cmdline_paddr) 152 return; 153 154 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); 155 } 156 157 void __init sme_map_bootdata(char *real_mode_data) 158 { 159 struct boot_params *boot_data; 160 unsigned long cmdline_paddr; 161 162 if (!sme_active()) 163 return; 164 165 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); 166 167 /* Get the command line address after mapping the real_mode_data */ 168 boot_data = (struct boot_params *)real_mode_data; 169 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 170 171 if (!cmdline_paddr) 172 return; 173 174 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); 175 } 176 177 void __init sme_early_init(void) 178 { 179 unsigned int i; 180 181 if (!sme_me_mask) 182 return; 183 184 early_pmd_flags = __sme_set(early_pmd_flags); 185 186 __supported_pte_mask = __sme_set(__supported_pte_mask); 187 188 /* Update the protection map with memory encryption mask */ 189 for (i = 0; i < ARRAY_SIZE(protection_map); i++) 190 protection_map[i] = pgprot_encrypted(protection_map[i]); 191 } 192 193 /* Architecture __weak replacement functions */ 194 void __init mem_encrypt_init(void) 195 { 196 if (!sme_me_mask) 197 return; 198 199 /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ 200 swiotlb_update_mem_attributes(); 201 202 pr_info("AMD Secure Memory Encryption (SME) active\n"); 203 } 204 205 void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) 206 { 207 WARN(PAGE_ALIGN(size) != size, 208 "size is not page-aligned (%#lx)\n", size); 209 210 /* Make the SWIOTLB buffer area decrypted */ 211 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); 212 } 213 214 static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, 215 unsigned long end) 216 { 217 unsigned long pgd_start, pgd_end, pgd_size; 218 pgd_t *pgd_p; 219 220 pgd_start = start & PGDIR_MASK; 221 pgd_end = end & PGDIR_MASK; 222 223 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); 224 pgd_size *= sizeof(pgd_t); 225 226 pgd_p = pgd_base + pgd_index(start); 227 228 memset(pgd_p, 0, pgd_size); 229 } 230 231 #define PGD_FLAGS _KERNPG_TABLE_NOENC 232 #define P4D_FLAGS _KERNPG_TABLE_NOENC 233 #define PUD_FLAGS _KERNPG_TABLE_NOENC 234 #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) 235 236 static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, 237 unsigned long vaddr, pmdval_t pmd_val) 238 { 239 pgd_t *pgd_p; 240 p4d_t *p4d_p; 241 pud_t *pud_p; 242 pmd_t *pmd_p; 243 244 pgd_p = pgd_base + pgd_index(vaddr); 245 if (native_pgd_val(*pgd_p)) { 246 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 247 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); 248 else 249 pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); 250 } else { 251 pgd_t pgd; 252 253 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 254 p4d_p = pgtable_area; 255 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); 256 pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; 257 258 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); 259 } else { 260 pud_p = pgtable_area; 261 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 262 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 263 264 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); 265 } 266 native_set_pgd(pgd_p, pgd); 267 } 268 269 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 270 p4d_p += p4d_index(vaddr); 271 if (native_p4d_val(*p4d_p)) { 272 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); 273 } else { 274 p4d_t p4d; 275 276 pud_p = pgtable_area; 277 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 278 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 279 280 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); 281 native_set_p4d(p4d_p, p4d); 282 } 283 } 284 285 pud_p += pud_index(vaddr); 286 if (native_pud_val(*pud_p)) { 287 if (native_pud_val(*pud_p) & _PAGE_PSE) 288 goto out; 289 290 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); 291 } else { 292 pud_t pud; 293 294 pmd_p = pgtable_area; 295 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); 296 pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; 297 298 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); 299 native_set_pud(pud_p, pud); 300 } 301 302 pmd_p += pmd_index(vaddr); 303 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) 304 native_set_pmd(pmd_p, native_make_pmd(pmd_val)); 305 306 out: 307 return pgtable_area; 308 } 309 310 static unsigned long __init sme_pgtable_calc(unsigned long len) 311 { 312 unsigned long p4d_size, pud_size, pmd_size; 313 unsigned long total; 314 315 /* 316 * Perform a relatively simplistic calculation of the pagetable 317 * entries that are needed. That mappings will be covered by 2MB 318 * PMD entries so we can conservatively calculate the required 319 * number of P4D, PUD and PMD structures needed to perform the 320 * mappings. Incrementing the count for each covers the case where 321 * the addresses cross entries. 322 */ 323 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 324 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; 325 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; 326 pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; 327 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 328 } else { 329 p4d_size = 0; 330 pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; 331 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 332 } 333 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; 334 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 335 336 total = p4d_size + pud_size + pmd_size; 337 338 /* 339 * Now calculate the added pagetable structures needed to populate 340 * the new pagetables. 341 */ 342 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 343 p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; 344 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; 345 pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; 346 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 347 } else { 348 p4d_size = 0; 349 pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; 350 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 351 } 352 pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; 353 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 354 355 total += p4d_size + pud_size + pmd_size; 356 357 return total; 358 } 359 360 void __init sme_encrypt_kernel(void) 361 { 362 unsigned long workarea_start, workarea_end, workarea_len; 363 unsigned long execute_start, execute_end, execute_len; 364 unsigned long kernel_start, kernel_end, kernel_len; 365 unsigned long pgtable_area_len; 366 unsigned long paddr, pmd_flags; 367 unsigned long decrypted_base; 368 void *pgtable_area; 369 pgd_t *pgd; 370 371 if (!sme_active()) 372 return; 373 374 /* 375 * Prepare for encrypting the kernel by building new pagetables with 376 * the necessary attributes needed to encrypt the kernel in place. 377 * 378 * One range of virtual addresses will map the memory occupied 379 * by the kernel as encrypted. 380 * 381 * Another range of virtual addresses will map the memory occupied 382 * by the kernel as decrypted and write-protected. 383 * 384 * The use of write-protect attribute will prevent any of the 385 * memory from being cached. 386 */ 387 388 /* Physical addresses gives us the identity mapped virtual addresses */ 389 kernel_start = __pa_symbol(_text); 390 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); 391 kernel_len = kernel_end - kernel_start; 392 393 /* Set the encryption workarea to be immediately after the kernel */ 394 workarea_start = kernel_end; 395 396 /* 397 * Calculate required number of workarea bytes needed: 398 * executable encryption area size: 399 * stack page (PAGE_SIZE) 400 * encryption routine page (PAGE_SIZE) 401 * intermediate copy buffer (PMD_PAGE_SIZE) 402 * pagetable structures for the encryption of the kernel 403 * pagetable structures for workarea (in case not currently mapped) 404 */ 405 execute_start = workarea_start; 406 execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; 407 execute_len = execute_end - execute_start; 408 409 /* 410 * One PGD for both encrypted and decrypted mappings and a set of 411 * PUDs and PMDs for each of the encrypted and decrypted mappings. 412 */ 413 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; 414 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; 415 416 /* PUDs and PMDs needed in the current pagetables for the workarea */ 417 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); 418 419 /* 420 * The total workarea includes the executable encryption area and 421 * the pagetable area. 422 */ 423 workarea_len = execute_len + pgtable_area_len; 424 workarea_end = workarea_start + workarea_len; 425 426 /* 427 * Set the address to the start of where newly created pagetable 428 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable 429 * structures are created when the workarea is added to the current 430 * pagetables and when the new encrypted and decrypted kernel 431 * mappings are populated. 432 */ 433 pgtable_area = (void *)execute_end; 434 435 /* 436 * Make sure the current pagetable structure has entries for 437 * addressing the workarea. 438 */ 439 pgd = (pgd_t *)native_read_cr3_pa(); 440 paddr = workarea_start; 441 while (paddr < workarea_end) { 442 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 443 paddr, 444 paddr + PMD_FLAGS); 445 446 paddr += PMD_PAGE_SIZE; 447 } 448 449 /* Flush the TLB - no globals so cr3 is enough */ 450 native_write_cr3(__native_read_cr3()); 451 452 /* 453 * A new pagetable structure is being built to allow for the kernel 454 * to be encrypted. It starts with an empty PGD that will then be 455 * populated with new PUDs and PMDs as the encrypted and decrypted 456 * kernel mappings are created. 457 */ 458 pgd = pgtable_area; 459 memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); 460 pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; 461 462 /* Add encrypted kernel (identity) mappings */ 463 pmd_flags = PMD_FLAGS | _PAGE_ENC; 464 paddr = kernel_start; 465 while (paddr < kernel_end) { 466 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 467 paddr, 468 paddr + pmd_flags); 469 470 paddr += PMD_PAGE_SIZE; 471 } 472 473 /* 474 * A different PGD index/entry must be used to get different 475 * pagetable entries for the decrypted mapping. Choose the next 476 * PGD index and convert it to a virtual address to be used as 477 * the base of the mapping. 478 */ 479 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); 480 decrypted_base <<= PGDIR_SHIFT; 481 482 /* Add decrypted, write-protected kernel (non-identity) mappings */ 483 pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); 484 paddr = kernel_start; 485 while (paddr < kernel_end) { 486 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 487 paddr + decrypted_base, 488 paddr + pmd_flags); 489 490 paddr += PMD_PAGE_SIZE; 491 } 492 493 /* Add decrypted workarea mappings to both kernel mappings */ 494 paddr = workarea_start; 495 while (paddr < workarea_end) { 496 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 497 paddr, 498 paddr + PMD_FLAGS); 499 500 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 501 paddr + decrypted_base, 502 paddr + PMD_FLAGS); 503 504 paddr += PMD_PAGE_SIZE; 505 } 506 507 /* Perform the encryption */ 508 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, 509 kernel_len, workarea_start, (unsigned long)pgd); 510 511 /* 512 * At this point we are running encrypted. Remove the mappings for 513 * the decrypted areas - all that is needed for this is to remove 514 * the PGD entry/entries. 515 */ 516 sme_clear_pgd(pgd, kernel_start + decrypted_base, 517 kernel_end + decrypted_base); 518 519 sme_clear_pgd(pgd, workarea_start + decrypted_base, 520 workarea_end + decrypted_base); 521 522 /* Flush the TLB - no globals so cr3 is enough */ 523 native_write_cr3(__native_read_cr3()); 524 } 525 526 void __init __nostackprotector sme_enable(struct boot_params *bp) 527 { 528 const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; 529 unsigned int eax, ebx, ecx, edx; 530 bool active_by_default; 531 unsigned long me_mask; 532 char buffer[16]; 533 u64 msr; 534 535 /* Check for the SME support leaf */ 536 eax = 0x80000000; 537 ecx = 0; 538 native_cpuid(&eax, &ebx, &ecx, &edx); 539 if (eax < 0x8000001f) 540 return; 541 542 /* 543 * Check for the SME feature: 544 * CPUID Fn8000_001F[EAX] - Bit 0 545 * Secure Memory Encryption support 546 * CPUID Fn8000_001F[EBX] - Bits 5:0 547 * Pagetable bit position used to indicate encryption 548 */ 549 eax = 0x8000001f; 550 ecx = 0; 551 native_cpuid(&eax, &ebx, &ecx, &edx); 552 if (!(eax & 1)) 553 return; 554 555 me_mask = 1UL << (ebx & 0x3f); 556 557 /* Check if SME is enabled */ 558 msr = __rdmsr(MSR_K8_SYSCFG); 559 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) 560 return; 561 562 /* 563 * Fixups have not been applied to phys_base yet and we're running 564 * identity mapped, so we must obtain the address to the SME command 565 * line argument data using rip-relative addressing. 566 */ 567 asm ("lea sme_cmdline_arg(%%rip), %0" 568 : "=r" (cmdline_arg) 569 : "p" (sme_cmdline_arg)); 570 asm ("lea sme_cmdline_on(%%rip), %0" 571 : "=r" (cmdline_on) 572 : "p" (sme_cmdline_on)); 573 asm ("lea sme_cmdline_off(%%rip), %0" 574 : "=r" (cmdline_off) 575 : "p" (sme_cmdline_off)); 576 577 if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) 578 active_by_default = true; 579 else 580 active_by_default = false; 581 582 cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | 583 ((u64)bp->ext_cmd_line_ptr << 32)); 584 585 cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); 586 587 if (!strncmp(buffer, cmdline_on, sizeof(buffer))) 588 sme_me_mask = me_mask; 589 else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) 590 sme_me_mask = 0; 591 else 592 sme_me_mask = active_by_default ? me_mask : 0; 593 } 594