1 #define pr_fmt(fmt) "efi: " fmt 2 3 #include <linux/init.h> 4 #include <linux/kernel.h> 5 #include <linux/string.h> 6 #include <linux/time.h> 7 #include <linux/types.h> 8 #include <linux/efi.h> 9 #include <linux/slab.h> 10 #include <linux/memblock.h> 11 #include <linux/bootmem.h> 12 #include <linux/acpi.h> 13 #include <linux/dmi.h> 14 15 #include <asm/e820/api.h> 16 #include <asm/efi.h> 17 #include <asm/uv/uv.h> 18 19 #define EFI_MIN_RESERVE 5120 20 21 #define EFI_DUMMY_GUID \ 22 EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) 23 24 static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; 25 26 static bool efi_no_storage_paranoia; 27 28 /* 29 * Some firmware implementations refuse to boot if there's insufficient 30 * space in the variable store. The implementation of garbage collection 31 * in some FW versions causes stale (deleted) variables to take up space 32 * longer than intended and space is only freed once the store becomes 33 * almost completely full. 34 * 35 * Enabling this option disables the space checks in 36 * efi_query_variable_store() and forces garbage collection. 37 * 38 * Only enable this option if deleting EFI variables does not free up 39 * space in your variable store, e.g. if despite deleting variables 40 * you're unable to create new ones. 41 */ 42 static int __init setup_storage_paranoia(char *arg) 43 { 44 efi_no_storage_paranoia = true; 45 return 0; 46 } 47 early_param("efi_no_storage_paranoia", setup_storage_paranoia); 48 49 /* 50 * Deleting the dummy variable which kicks off garbage collection 51 */ 52 void efi_delete_dummy_variable(void) 53 { 54 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, 55 EFI_VARIABLE_NON_VOLATILE | 56 EFI_VARIABLE_BOOTSERVICE_ACCESS | 57 EFI_VARIABLE_RUNTIME_ACCESS, 58 0, NULL); 59 } 60 61 /* 62 * In the nonblocking case we do not attempt to perform garbage 63 * collection if we do not have enough free space. Rather, we do the 64 * bare minimum check and give up immediately if the available space 65 * is below EFI_MIN_RESERVE. 66 * 67 * This function is intended to be small and simple because it is 68 * invoked from crash handler paths. 69 */ 70 static efi_status_t 71 query_variable_store_nonblocking(u32 attributes, unsigned long size) 72 { 73 efi_status_t status; 74 u64 storage_size, remaining_size, max_size; 75 76 status = efi.query_variable_info_nonblocking(attributes, &storage_size, 77 &remaining_size, 78 &max_size); 79 if (status != EFI_SUCCESS) 80 return status; 81 82 if (remaining_size - size < EFI_MIN_RESERVE) 83 return EFI_OUT_OF_RESOURCES; 84 85 return EFI_SUCCESS; 86 } 87 88 /* 89 * Some firmware implementations refuse to boot if there's insufficient space 90 * in the variable store. Ensure that we never use more than a safe limit. 91 * 92 * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable 93 * store. 94 */ 95 efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, 96 bool nonblocking) 97 { 98 efi_status_t status; 99 u64 storage_size, remaining_size, max_size; 100 101 if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) 102 return 0; 103 104 if (nonblocking) 105 return query_variable_store_nonblocking(attributes, size); 106 107 status = efi.query_variable_info(attributes, &storage_size, 108 &remaining_size, &max_size); 109 if (status != EFI_SUCCESS) 110 return status; 111 112 /* 113 * We account for that by refusing the write if permitting it would 114 * reduce the available space to under 5KB. This figure was provided by 115 * Samsung, so should be safe. 116 */ 117 if ((remaining_size - size < EFI_MIN_RESERVE) && 118 !efi_no_storage_paranoia) { 119 120 /* 121 * Triggering garbage collection may require that the firmware 122 * generate a real EFI_OUT_OF_RESOURCES error. We can force 123 * that by attempting to use more space than is available. 124 */ 125 unsigned long dummy_size = remaining_size + 1024; 126 void *dummy = kzalloc(dummy_size, GFP_ATOMIC); 127 128 if (!dummy) 129 return EFI_OUT_OF_RESOURCES; 130 131 status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, 132 EFI_VARIABLE_NON_VOLATILE | 133 EFI_VARIABLE_BOOTSERVICE_ACCESS | 134 EFI_VARIABLE_RUNTIME_ACCESS, 135 dummy_size, dummy); 136 137 if (status == EFI_SUCCESS) { 138 /* 139 * This should have failed, so if it didn't make sure 140 * that we delete it... 141 */ 142 efi_delete_dummy_variable(); 143 } 144 145 kfree(dummy); 146 147 /* 148 * The runtime code may now have triggered a garbage collection 149 * run, so check the variable info again 150 */ 151 status = efi.query_variable_info(attributes, &storage_size, 152 &remaining_size, &max_size); 153 154 if (status != EFI_SUCCESS) 155 return status; 156 157 /* 158 * There still isn't enough room, so return an error 159 */ 160 if (remaining_size - size < EFI_MIN_RESERVE) 161 return EFI_OUT_OF_RESOURCES; 162 } 163 164 return EFI_SUCCESS; 165 } 166 EXPORT_SYMBOL_GPL(efi_query_variable_store); 167 168 /* 169 * The UEFI specification makes it clear that the operating system is 170 * free to do whatever it wants with boot services code after 171 * ExitBootServices() has been called. Ignoring this recommendation a 172 * significant bunch of EFI implementations continue calling into boot 173 * services code (SetVirtualAddressMap). In order to work around such 174 * buggy implementations we reserve boot services region during EFI 175 * init and make sure it stays executable. Then, after 176 * SetVirtualAddressMap(), it is discarded. 177 * 178 * However, some boot services regions contain data that is required 179 * by drivers, so we need to track which memory ranges can never be 180 * freed. This is done by tagging those regions with the 181 * EFI_MEMORY_RUNTIME attribute. 182 * 183 * Any driver that wants to mark a region as reserved must use 184 * efi_mem_reserve() which will insert a new EFI memory descriptor 185 * into efi.memmap (splitting existing regions if necessary) and tag 186 * it with EFI_MEMORY_RUNTIME. 187 */ 188 void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) 189 { 190 phys_addr_t new_phys, new_size; 191 struct efi_mem_range mr; 192 efi_memory_desc_t md; 193 int num_entries; 194 void *new; 195 196 if (efi_mem_desc_lookup(addr, &md)) { 197 pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); 198 return; 199 } 200 201 if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { 202 pr_err("Region spans EFI memory descriptors, %pa\n", &addr); 203 return; 204 } 205 206 /* No need to reserve regions that will never be freed. */ 207 if (md.attribute & EFI_MEMORY_RUNTIME) 208 return; 209 210 size += addr % EFI_PAGE_SIZE; 211 size = round_up(size, EFI_PAGE_SIZE); 212 addr = round_down(addr, EFI_PAGE_SIZE); 213 214 mr.range.start = addr; 215 mr.range.end = addr + size - 1; 216 mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; 217 218 num_entries = efi_memmap_split_count(&md, &mr.range); 219 num_entries += efi.memmap.nr_map; 220 221 new_size = efi.memmap.desc_size * num_entries; 222 223 new_phys = efi_memmap_alloc(num_entries); 224 if (!new_phys) { 225 pr_err("Could not allocate boot services memmap\n"); 226 return; 227 } 228 229 new = early_memremap(new_phys, new_size); 230 if (!new) { 231 pr_err("Failed to map new boot services memmap\n"); 232 return; 233 } 234 235 efi_memmap_insert(&efi.memmap, new, &mr); 236 early_memunmap(new, new_size); 237 238 efi_memmap_install(new_phys, num_entries); 239 } 240 241 /* 242 * Helper function for efi_reserve_boot_services() to figure out if we 243 * can free regions in efi_free_boot_services(). 244 * 245 * Use this function to ensure we do not free regions owned by somebody 246 * else. We must only reserve (and then free) regions: 247 * 248 * - Not within any part of the kernel 249 * - Not the BIOS reserved area (E820_TYPE_RESERVED, E820_TYPE_NVS, etc) 250 */ 251 static bool can_free_region(u64 start, u64 size) 252 { 253 if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) 254 return false; 255 256 if (!e820__mapped_all(start, start+size, E820_TYPE_RAM)) 257 return false; 258 259 return true; 260 } 261 262 void __init efi_reserve_boot_services(void) 263 { 264 efi_memory_desc_t *md; 265 266 for_each_efi_memory_desc(md) { 267 u64 start = md->phys_addr; 268 u64 size = md->num_pages << EFI_PAGE_SHIFT; 269 bool already_reserved; 270 271 if (md->type != EFI_BOOT_SERVICES_CODE && 272 md->type != EFI_BOOT_SERVICES_DATA) 273 continue; 274 275 already_reserved = memblock_is_region_reserved(start, size); 276 277 /* 278 * Because the following memblock_reserve() is paired 279 * with free_bootmem_late() for this region in 280 * efi_free_boot_services(), we must be extremely 281 * careful not to reserve, and subsequently free, 282 * critical regions of memory (like the kernel image) or 283 * those regions that somebody else has already 284 * reserved. 285 * 286 * A good example of a critical region that must not be 287 * freed is page zero (first 4Kb of memory), which may 288 * contain boot services code/data but is marked 289 * E820_TYPE_RESERVED by trim_bios_range(). 290 */ 291 if (!already_reserved) { 292 memblock_reserve(start, size); 293 294 /* 295 * If we are the first to reserve the region, no 296 * one else cares about it. We own it and can 297 * free it later. 298 */ 299 if (can_free_region(start, size)) 300 continue; 301 } 302 303 /* 304 * We don't own the region. We must not free it. 305 * 306 * Setting this bit for a boot services region really 307 * doesn't make sense as far as the firmware is 308 * concerned, but it does provide us with a way to tag 309 * those regions that must not be paired with 310 * free_bootmem_late(). 311 */ 312 md->attribute |= EFI_MEMORY_RUNTIME; 313 } 314 } 315 316 void __init efi_free_boot_services(void) 317 { 318 phys_addr_t new_phys, new_size; 319 efi_memory_desc_t *md; 320 int num_entries = 0; 321 void *new, *new_md; 322 323 for_each_efi_memory_desc(md) { 324 unsigned long long start = md->phys_addr; 325 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; 326 size_t rm_size; 327 328 if (md->type != EFI_BOOT_SERVICES_CODE && 329 md->type != EFI_BOOT_SERVICES_DATA) { 330 num_entries++; 331 continue; 332 } 333 334 /* Do not free, someone else owns it: */ 335 if (md->attribute & EFI_MEMORY_RUNTIME) { 336 num_entries++; 337 continue; 338 } 339 340 /* 341 * Nasty quirk: if all sub-1MB memory is used for boot 342 * services, we can get here without having allocated the 343 * real mode trampoline. It's too late to hand boot services 344 * memory back to the memblock allocator, so instead 345 * try to manually allocate the trampoline if needed. 346 * 347 * I've seen this on a Dell XPS 13 9350 with firmware 348 * 1.4.4 with SGX enabled booting Linux via Fedora 24's 349 * grub2-efi on a hard disk. (And no, I don't know why 350 * this happened, but Linux should still try to boot rather 351 * panicing early.) 352 */ 353 rm_size = real_mode_size_needed(); 354 if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { 355 set_real_mode_mem(start, rm_size); 356 start += rm_size; 357 size -= rm_size; 358 } 359 360 free_bootmem_late(start, size); 361 } 362 363 new_size = efi.memmap.desc_size * num_entries; 364 new_phys = efi_memmap_alloc(num_entries); 365 if (!new_phys) { 366 pr_err("Failed to allocate new EFI memmap\n"); 367 return; 368 } 369 370 new = memremap(new_phys, new_size, MEMREMAP_WB); 371 if (!new) { 372 pr_err("Failed to map new EFI memmap\n"); 373 return; 374 } 375 376 /* 377 * Build a new EFI memmap that excludes any boot services 378 * regions that are not tagged EFI_MEMORY_RUNTIME, since those 379 * regions have now been freed. 380 */ 381 new_md = new; 382 for_each_efi_memory_desc(md) { 383 if (!(md->attribute & EFI_MEMORY_RUNTIME) && 384 (md->type == EFI_BOOT_SERVICES_CODE || 385 md->type == EFI_BOOT_SERVICES_DATA)) 386 continue; 387 388 memcpy(new_md, md, efi.memmap.desc_size); 389 new_md += efi.memmap.desc_size; 390 } 391 392 memunmap(new); 393 394 if (efi_memmap_install(new_phys, num_entries)) { 395 pr_err("Could not install new EFI memmap\n"); 396 return; 397 } 398 } 399 400 /* 401 * A number of config table entries get remapped to virtual addresses 402 * after entering EFI virtual mode. However, the kexec kernel requires 403 * their physical addresses therefore we pass them via setup_data and 404 * correct those entries to their respective physical addresses here. 405 * 406 * Currently only handles smbios which is necessary for some firmware 407 * implementation. 408 */ 409 int __init efi_reuse_config(u64 tables, int nr_tables) 410 { 411 int i, sz, ret = 0; 412 void *p, *tablep; 413 struct efi_setup_data *data; 414 415 if (!efi_setup) 416 return 0; 417 418 if (!efi_enabled(EFI_64BIT)) 419 return 0; 420 421 data = early_memremap(efi_setup, sizeof(*data)); 422 if (!data) { 423 ret = -ENOMEM; 424 goto out; 425 } 426 427 if (!data->smbios) 428 goto out_memremap; 429 430 sz = sizeof(efi_config_table_64_t); 431 432 p = tablep = early_memremap(tables, nr_tables * sz); 433 if (!p) { 434 pr_err("Could not map Configuration table!\n"); 435 ret = -ENOMEM; 436 goto out_memremap; 437 } 438 439 for (i = 0; i < efi.systab->nr_tables; i++) { 440 efi_guid_t guid; 441 442 guid = ((efi_config_table_64_t *)p)->guid; 443 444 if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) 445 ((efi_config_table_64_t *)p)->table = data->smbios; 446 p += sz; 447 } 448 early_memunmap(tablep, nr_tables * sz); 449 450 out_memremap: 451 early_memunmap(data, sizeof(*data)); 452 out: 453 return ret; 454 } 455 456 static const struct dmi_system_id sgi_uv1_dmi[] = { 457 { NULL, "SGI UV1", 458 { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), 459 DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), 460 DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"), 461 } 462 }, 463 { } /* NULL entry stops DMI scanning */ 464 }; 465 466 void __init efi_apply_memmap_quirks(void) 467 { 468 /* 469 * Once setup is done earlier, unmap the EFI memory map on mismatched 470 * firmware/kernel architectures since there is no support for runtime 471 * services. 472 */ 473 if (!efi_runtime_supported()) { 474 pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); 475 efi_memmap_unmap(); 476 } 477 478 /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ 479 if (dmi_check_system(sgi_uv1_dmi)) 480 set_bit(EFI_OLD_MEMMAP, &efi.flags); 481 } 482 483 /* 484 * For most modern platforms the preferred method of powering off is via 485 * ACPI. However, there are some that are known to require the use of 486 * EFI runtime services and for which ACPI does not work at all. 487 * 488 * Using EFI is a last resort, to be used only if no other option 489 * exists. 490 */ 491 bool efi_reboot_required(void) 492 { 493 if (!acpi_gbl_reduced_hardware) 494 return false; 495 496 efi_reboot_quirk_mode = EFI_RESET_WARM; 497 return true; 498 } 499 500 bool efi_poweroff_required(void) 501 { 502 return acpi_gbl_reduced_hardware || acpi_no_s5; 503 } 504