1 /* 2 * AMD Memory Encryption Support 3 * 4 * Copyright (C) 2016 Advanced Micro Devices, Inc. 5 * 6 * Author: Tom Lendacky <thomas.lendacky@amd.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #define DISABLE_BRANCH_PROFILING 14 15 #include <linux/linkage.h> 16 #include <linux/init.h> 17 #include <linux/mm.h> 18 #include <linux/dma-mapping.h> 19 #include <linux/swiotlb.h> 20 #include <linux/mem_encrypt.h> 21 22 #include <asm/tlbflush.h> 23 #include <asm/fixmap.h> 24 #include <asm/setup.h> 25 #include <asm/bootparam.h> 26 #include <asm/set_memory.h> 27 #include <asm/cacheflush.h> 28 #include <asm/sections.h> 29 #include <asm/processor-flags.h> 30 #include <asm/msr.h> 31 #include <asm/cmdline.h> 32 33 static char sme_cmdline_arg[] __initdata = "mem_encrypt"; 34 static char sme_cmdline_on[] __initdata = "on"; 35 static char sme_cmdline_off[] __initdata = "off"; 36 37 /* 38 * Since SME related variables are set early in the boot process they must 39 * reside in the .data section so as not to be zeroed out when the .bss 40 * section is later cleared. 41 */ 42 u64 sme_me_mask __section(.data) = 0; 43 EXPORT_SYMBOL_GPL(sme_me_mask); 44 45 /* Buffer used for early in-place encryption by BSP, no locking needed */ 46 static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); 47 48 /* 49 * This routine does not change the underlying encryption setting of the 50 * page(s) that map this memory. It assumes that eventually the memory is 51 * meant to be accessed as either encrypted or decrypted but the contents 52 * are currently not in the desired state. 53 * 54 * This routine follows the steps outlined in the AMD64 Architecture 55 * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. 56 */ 57 static void __init __sme_early_enc_dec(resource_size_t paddr, 58 unsigned long size, bool enc) 59 { 60 void *src, *dst; 61 size_t len; 62 63 if (!sme_me_mask) 64 return; 65 66 local_flush_tlb(); 67 wbinvd(); 68 69 /* 70 * There are limited number of early mapping slots, so map (at most) 71 * one page at time. 72 */ 73 while (size) { 74 len = min_t(size_t, sizeof(sme_early_buffer), size); 75 76 /* 77 * Create mappings for the current and desired format of 78 * the memory. Use a write-protected mapping for the source. 79 */ 80 src = enc ? early_memremap_decrypted_wp(paddr, len) : 81 early_memremap_encrypted_wp(paddr, len); 82 83 dst = enc ? early_memremap_encrypted(paddr, len) : 84 early_memremap_decrypted(paddr, len); 85 86 /* 87 * If a mapping can't be obtained to perform the operation, 88 * then eventual access of that area in the desired mode 89 * will cause a crash. 90 */ 91 BUG_ON(!src || !dst); 92 93 /* 94 * Use a temporary buffer, of cache-line multiple size, to 95 * avoid data corruption as documented in the APM. 96 */ 97 memcpy(sme_early_buffer, src, len); 98 memcpy(dst, sme_early_buffer, len); 99 100 early_memunmap(dst, len); 101 early_memunmap(src, len); 102 103 paddr += len; 104 size -= len; 105 } 106 } 107 108 void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) 109 { 110 __sme_early_enc_dec(paddr, size, true); 111 } 112 113 void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) 114 { 115 __sme_early_enc_dec(paddr, size, false); 116 } 117 118 static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, 119 bool map) 120 { 121 unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; 122 pmdval_t pmd_flags, pmd; 123 124 /* Use early_pmd_flags but remove the encryption mask */ 125 pmd_flags = __sme_clr(early_pmd_flags); 126 127 do { 128 pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; 129 __early_make_pgtable((unsigned long)vaddr, pmd); 130 131 vaddr += PMD_SIZE; 132 paddr += PMD_SIZE; 133 size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; 134 } while (size); 135 136 __native_flush_tlb(); 137 } 138 139 void __init sme_unmap_bootdata(char *real_mode_data) 140 { 141 struct boot_params *boot_data; 142 unsigned long cmdline_paddr; 143 144 if (!sme_active()) 145 return; 146 147 /* Get the command line address before unmapping the real_mode_data */ 148 boot_data = (struct boot_params *)real_mode_data; 149 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 150 151 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); 152 153 if (!cmdline_paddr) 154 return; 155 156 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); 157 } 158 159 void __init sme_map_bootdata(char *real_mode_data) 160 { 161 struct boot_params *boot_data; 162 unsigned long cmdline_paddr; 163 164 if (!sme_active()) 165 return; 166 167 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); 168 169 /* Get the command line address after mapping the real_mode_data */ 170 boot_data = (struct boot_params *)real_mode_data; 171 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 172 173 if (!cmdline_paddr) 174 return; 175 176 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); 177 } 178 179 void __init sme_early_init(void) 180 { 181 unsigned int i; 182 183 if (!sme_me_mask) 184 return; 185 186 early_pmd_flags = __sme_set(early_pmd_flags); 187 188 __supported_pte_mask = __sme_set(__supported_pte_mask); 189 190 /* Update the protection map with memory encryption mask */ 191 for (i = 0; i < ARRAY_SIZE(protection_map); i++) 192 protection_map[i] = pgprot_encrypted(protection_map[i]); 193 } 194 195 /* Architecture __weak replacement functions */ 196 void __init mem_encrypt_init(void) 197 { 198 if (!sme_me_mask) 199 return; 200 201 /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ 202 swiotlb_update_mem_attributes(); 203 204 pr_info("AMD Secure Memory Encryption (SME) active\n"); 205 } 206 207 void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) 208 { 209 WARN(PAGE_ALIGN(size) != size, 210 "size is not page-aligned (%#lx)\n", size); 211 212 /* Make the SWIOTLB buffer area decrypted */ 213 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); 214 } 215 216 static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, 217 unsigned long end) 218 { 219 unsigned long pgd_start, pgd_end, pgd_size; 220 pgd_t *pgd_p; 221 222 pgd_start = start & PGDIR_MASK; 223 pgd_end = end & PGDIR_MASK; 224 225 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); 226 pgd_size *= sizeof(pgd_t); 227 228 pgd_p = pgd_base + pgd_index(start); 229 230 memset(pgd_p, 0, pgd_size); 231 } 232 233 #define PGD_FLAGS _KERNPG_TABLE_NOENC 234 #define P4D_FLAGS _KERNPG_TABLE_NOENC 235 #define PUD_FLAGS _KERNPG_TABLE_NOENC 236 #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) 237 238 static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, 239 unsigned long vaddr, pmdval_t pmd_val) 240 { 241 pgd_t *pgd_p; 242 p4d_t *p4d_p; 243 pud_t *pud_p; 244 pmd_t *pmd_p; 245 246 pgd_p = pgd_base + pgd_index(vaddr); 247 if (native_pgd_val(*pgd_p)) { 248 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 249 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); 250 else 251 pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); 252 } else { 253 pgd_t pgd; 254 255 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 256 p4d_p = pgtable_area; 257 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); 258 pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; 259 260 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); 261 } else { 262 pud_p = pgtable_area; 263 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 264 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 265 266 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); 267 } 268 native_set_pgd(pgd_p, pgd); 269 } 270 271 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 272 p4d_p += p4d_index(vaddr); 273 if (native_p4d_val(*p4d_p)) { 274 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); 275 } else { 276 p4d_t p4d; 277 278 pud_p = pgtable_area; 279 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 280 pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; 281 282 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); 283 native_set_p4d(p4d_p, p4d); 284 } 285 } 286 287 pud_p += pud_index(vaddr); 288 if (native_pud_val(*pud_p)) { 289 if (native_pud_val(*pud_p) & _PAGE_PSE) 290 goto out; 291 292 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); 293 } else { 294 pud_t pud; 295 296 pmd_p = pgtable_area; 297 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); 298 pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; 299 300 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); 301 native_set_pud(pud_p, pud); 302 } 303 304 pmd_p += pmd_index(vaddr); 305 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) 306 native_set_pmd(pmd_p, native_make_pmd(pmd_val)); 307 308 out: 309 return pgtable_area; 310 } 311 312 static unsigned long __init sme_pgtable_calc(unsigned long len) 313 { 314 unsigned long p4d_size, pud_size, pmd_size; 315 unsigned long total; 316 317 /* 318 * Perform a relatively simplistic calculation of the pagetable 319 * entries that are needed. That mappings will be covered by 2MB 320 * PMD entries so we can conservatively calculate the required 321 * number of P4D, PUD and PMD structures needed to perform the 322 * mappings. Incrementing the count for each covers the case where 323 * the addresses cross entries. 324 */ 325 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 326 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; 327 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; 328 pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; 329 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 330 } else { 331 p4d_size = 0; 332 pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; 333 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 334 } 335 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; 336 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 337 338 total = p4d_size + pud_size + pmd_size; 339 340 /* 341 * Now calculate the added pagetable structures needed to populate 342 * the new pagetables. 343 */ 344 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 345 p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; 346 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; 347 pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; 348 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 349 } else { 350 p4d_size = 0; 351 pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; 352 pud_size *= sizeof(pud_t) * PTRS_PER_PUD; 353 } 354 pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; 355 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 356 357 total += p4d_size + pud_size + pmd_size; 358 359 return total; 360 } 361 362 void __init sme_encrypt_kernel(void) 363 { 364 unsigned long workarea_start, workarea_end, workarea_len; 365 unsigned long execute_start, execute_end, execute_len; 366 unsigned long kernel_start, kernel_end, kernel_len; 367 unsigned long pgtable_area_len; 368 unsigned long paddr, pmd_flags; 369 unsigned long decrypted_base; 370 void *pgtable_area; 371 pgd_t *pgd; 372 373 if (!sme_active()) 374 return; 375 376 /* 377 * Prepare for encrypting the kernel by building new pagetables with 378 * the necessary attributes needed to encrypt the kernel in place. 379 * 380 * One range of virtual addresses will map the memory occupied 381 * by the kernel as encrypted. 382 * 383 * Another range of virtual addresses will map the memory occupied 384 * by the kernel as decrypted and write-protected. 385 * 386 * The use of write-protect attribute will prevent any of the 387 * memory from being cached. 388 */ 389 390 /* Physical addresses gives us the identity mapped virtual addresses */ 391 kernel_start = __pa_symbol(_text); 392 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); 393 kernel_len = kernel_end - kernel_start; 394 395 /* Set the encryption workarea to be immediately after the kernel */ 396 workarea_start = kernel_end; 397 398 /* 399 * Calculate required number of workarea bytes needed: 400 * executable encryption area size: 401 * stack page (PAGE_SIZE) 402 * encryption routine page (PAGE_SIZE) 403 * intermediate copy buffer (PMD_PAGE_SIZE) 404 * pagetable structures for the encryption of the kernel 405 * pagetable structures for workarea (in case not currently mapped) 406 */ 407 execute_start = workarea_start; 408 execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; 409 execute_len = execute_end - execute_start; 410 411 /* 412 * One PGD for both encrypted and decrypted mappings and a set of 413 * PUDs and PMDs for each of the encrypted and decrypted mappings. 414 */ 415 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; 416 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; 417 418 /* PUDs and PMDs needed in the current pagetables for the workarea */ 419 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); 420 421 /* 422 * The total workarea includes the executable encryption area and 423 * the pagetable area. 424 */ 425 workarea_len = execute_len + pgtable_area_len; 426 workarea_end = workarea_start + workarea_len; 427 428 /* 429 * Set the address to the start of where newly created pagetable 430 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable 431 * structures are created when the workarea is added to the current 432 * pagetables and when the new encrypted and decrypted kernel 433 * mappings are populated. 434 */ 435 pgtable_area = (void *)execute_end; 436 437 /* 438 * Make sure the current pagetable structure has entries for 439 * addressing the workarea. 440 */ 441 pgd = (pgd_t *)native_read_cr3_pa(); 442 paddr = workarea_start; 443 while (paddr < workarea_end) { 444 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 445 paddr, 446 paddr + PMD_FLAGS); 447 448 paddr += PMD_PAGE_SIZE; 449 } 450 451 /* Flush the TLB - no globals so cr3 is enough */ 452 native_write_cr3(__native_read_cr3()); 453 454 /* 455 * A new pagetable structure is being built to allow for the kernel 456 * to be encrypted. It starts with an empty PGD that will then be 457 * populated with new PUDs and PMDs as the encrypted and decrypted 458 * kernel mappings are created. 459 */ 460 pgd = pgtable_area; 461 memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); 462 pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; 463 464 /* Add encrypted kernel (identity) mappings */ 465 pmd_flags = PMD_FLAGS | _PAGE_ENC; 466 paddr = kernel_start; 467 while (paddr < kernel_end) { 468 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 469 paddr, 470 paddr + pmd_flags); 471 472 paddr += PMD_PAGE_SIZE; 473 } 474 475 /* 476 * A different PGD index/entry must be used to get different 477 * pagetable entries for the decrypted mapping. Choose the next 478 * PGD index and convert it to a virtual address to be used as 479 * the base of the mapping. 480 */ 481 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); 482 decrypted_base <<= PGDIR_SHIFT; 483 484 /* Add decrypted, write-protected kernel (non-identity) mappings */ 485 pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); 486 paddr = kernel_start; 487 while (paddr < kernel_end) { 488 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 489 paddr + decrypted_base, 490 paddr + pmd_flags); 491 492 paddr += PMD_PAGE_SIZE; 493 } 494 495 /* Add decrypted workarea mappings to both kernel mappings */ 496 paddr = workarea_start; 497 while (paddr < workarea_end) { 498 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 499 paddr, 500 paddr + PMD_FLAGS); 501 502 pgtable_area = sme_populate_pgd(pgd, pgtable_area, 503 paddr + decrypted_base, 504 paddr + PMD_FLAGS); 505 506 paddr += PMD_PAGE_SIZE; 507 } 508 509 /* Perform the encryption */ 510 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, 511 kernel_len, workarea_start, (unsigned long)pgd); 512 513 /* 514 * At this point we are running encrypted. Remove the mappings for 515 * the decrypted areas - all that is needed for this is to remove 516 * the PGD entry/entries. 517 */ 518 sme_clear_pgd(pgd, kernel_start + decrypted_base, 519 kernel_end + decrypted_base); 520 521 sme_clear_pgd(pgd, workarea_start + decrypted_base, 522 workarea_end + decrypted_base); 523 524 /* Flush the TLB - no globals so cr3 is enough */ 525 native_write_cr3(__native_read_cr3()); 526 } 527 528 void __init __nostackprotector sme_enable(struct boot_params *bp) 529 { 530 const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; 531 unsigned int eax, ebx, ecx, edx; 532 bool active_by_default; 533 unsigned long me_mask; 534 char buffer[16]; 535 u64 msr; 536 537 /* Check for the SME support leaf */ 538 eax = 0x80000000; 539 ecx = 0; 540 native_cpuid(&eax, &ebx, &ecx, &edx); 541 if (eax < 0x8000001f) 542 return; 543 544 /* 545 * Check for the SME feature: 546 * CPUID Fn8000_001F[EAX] - Bit 0 547 * Secure Memory Encryption support 548 * CPUID Fn8000_001F[EBX] - Bits 5:0 549 * Pagetable bit position used to indicate encryption 550 */ 551 eax = 0x8000001f; 552 ecx = 0; 553 native_cpuid(&eax, &ebx, &ecx, &edx); 554 if (!(eax & 1)) 555 return; 556 557 me_mask = 1UL << (ebx & 0x3f); 558 559 /* Check if SME is enabled */ 560 msr = __rdmsr(MSR_K8_SYSCFG); 561 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) 562 return; 563 564 /* 565 * Fixups have not been applied to phys_base yet and we're running 566 * identity mapped, so we must obtain the address to the SME command 567 * line argument data using rip-relative addressing. 568 */ 569 asm ("lea sme_cmdline_arg(%%rip), %0" 570 : "=r" (cmdline_arg) 571 : "p" (sme_cmdline_arg)); 572 asm ("lea sme_cmdline_on(%%rip), %0" 573 : "=r" (cmdline_on) 574 : "p" (sme_cmdline_on)); 575 asm ("lea sme_cmdline_off(%%rip), %0" 576 : "=r" (cmdline_off) 577 : "p" (sme_cmdline_off)); 578 579 if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) 580 active_by_default = true; 581 else 582 active_by_default = false; 583 584 cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | 585 ((u64)bp->ext_cmd_line_ptr << 32)); 586 587 cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); 588 589 if (!strncmp(buffer, cmdline_on, sizeof(buffer))) 590 sme_me_mask = me_mask; 591 else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) 592 sme_me_mask = 0; 593 else 594 sme_me_mask = active_by_default ? me_mask : 0; 595 } 596