1 #define pr_fmt(fmt) "efi: " fmt 2 3 #include <linux/init.h> 4 #include <linux/kernel.h> 5 #include <linux/string.h> 6 #include <linux/time.h> 7 #include <linux/types.h> 8 #include <linux/efi.h> 9 #include <linux/slab.h> 10 #include <linux/memblock.h> 11 #include <linux/bootmem.h> 12 #include <linux/acpi.h> 13 #include <linux/dmi.h> 14 #include <asm/efi.h> 15 #include <asm/uv/uv.h> 16 17 #define EFI_MIN_RESERVE 5120 18 19 #define EFI_DUMMY_GUID \ 20 EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) 21 22 static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; 23 24 static bool efi_no_storage_paranoia; 25 26 /* 27 * Some firmware implementations refuse to boot if there's insufficient 28 * space in the variable store. The implementation of garbage collection 29 * in some FW versions causes stale (deleted) variables to take up space 30 * longer than intended and space is only freed once the store becomes 31 * almost completely full. 32 * 33 * Enabling this option disables the space checks in 34 * efi_query_variable_store() and forces garbage collection. 35 * 36 * Only enable this option if deleting EFI variables does not free up 37 * space in your variable store, e.g. if despite deleting variables 38 * you're unable to create new ones. 39 */ 40 static int __init setup_storage_paranoia(char *arg) 41 { 42 efi_no_storage_paranoia = true; 43 return 0; 44 } 45 early_param("efi_no_storage_paranoia", setup_storage_paranoia); 46 47 /* 48 * Deleting the dummy variable which kicks off garbage collection 49 */ 50 void efi_delete_dummy_variable(void) 51 { 52 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, 53 EFI_VARIABLE_NON_VOLATILE | 54 EFI_VARIABLE_BOOTSERVICE_ACCESS | 55 EFI_VARIABLE_RUNTIME_ACCESS, 56 0, NULL); 57 } 58 59 /* 60 * In the nonblocking case we do not attempt to perform garbage 61 * collection if we do not have enough free space. Rather, we do the 62 * bare minimum check and give up immediately if the available space 63 * is below EFI_MIN_RESERVE. 64 * 65 * This function is intended to be small and simple because it is 66 * invoked from crash handler paths. 67 */ 68 static efi_status_t 69 query_variable_store_nonblocking(u32 attributes, unsigned long size) 70 { 71 efi_status_t status; 72 u64 storage_size, remaining_size, max_size; 73 74 status = efi.query_variable_info_nonblocking(attributes, &storage_size, 75 &remaining_size, 76 &max_size); 77 if (status != EFI_SUCCESS) 78 return status; 79 80 if (remaining_size - size < EFI_MIN_RESERVE) 81 return EFI_OUT_OF_RESOURCES; 82 83 return EFI_SUCCESS; 84 } 85 86 /* 87 * Some firmware implementations refuse to boot if there's insufficient space 88 * in the variable store. Ensure that we never use more than a safe limit. 89 * 90 * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable 91 * store. 92 */ 93 efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, 94 bool nonblocking) 95 { 96 efi_status_t status; 97 u64 storage_size, remaining_size, max_size; 98 99 if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) 100 return 0; 101 102 if (nonblocking) 103 return query_variable_store_nonblocking(attributes, size); 104 105 status = efi.query_variable_info(attributes, &storage_size, 106 &remaining_size, &max_size); 107 if (status != EFI_SUCCESS) 108 return status; 109 110 /* 111 * We account for that by refusing the write if permitting it would 112 * reduce the available space to under 5KB. This figure was provided by 113 * Samsung, so should be safe. 114 */ 115 if ((remaining_size - size < EFI_MIN_RESERVE) && 116 !efi_no_storage_paranoia) { 117 118 /* 119 * Triggering garbage collection may require that the firmware 120 * generate a real EFI_OUT_OF_RESOURCES error. We can force 121 * that by attempting to use more space than is available. 122 */ 123 unsigned long dummy_size = remaining_size + 1024; 124 void *dummy = kzalloc(dummy_size, GFP_ATOMIC); 125 126 if (!dummy) 127 return EFI_OUT_OF_RESOURCES; 128 129 status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, 130 EFI_VARIABLE_NON_VOLATILE | 131 EFI_VARIABLE_BOOTSERVICE_ACCESS | 132 EFI_VARIABLE_RUNTIME_ACCESS, 133 dummy_size, dummy); 134 135 if (status == EFI_SUCCESS) { 136 /* 137 * This should have failed, so if it didn't make sure 138 * that we delete it... 139 */ 140 efi_delete_dummy_variable(); 141 } 142 143 kfree(dummy); 144 145 /* 146 * The runtime code may now have triggered a garbage collection 147 * run, so check the variable info again 148 */ 149 status = efi.query_variable_info(attributes, &storage_size, 150 &remaining_size, &max_size); 151 152 if (status != EFI_SUCCESS) 153 return status; 154 155 /* 156 * There still isn't enough room, so return an error 157 */ 158 if (remaining_size - size < EFI_MIN_RESERVE) 159 return EFI_OUT_OF_RESOURCES; 160 } 161 162 return EFI_SUCCESS; 163 } 164 EXPORT_SYMBOL_GPL(efi_query_variable_store); 165 166 /* 167 * The UEFI specification makes it clear that the operating system is 168 * free to do whatever it wants with boot services code after 169 * ExitBootServices() has been called. Ignoring this recommendation a 170 * significant bunch of EFI implementations continue calling into boot 171 * services code (SetVirtualAddressMap). In order to work around such 172 * buggy implementations we reserve boot services region during EFI 173 * init and make sure it stays executable. Then, after 174 * SetVirtualAddressMap(), it is discarded. 175 * 176 * However, some boot services regions contain data that is required 177 * by drivers, so we need to track which memory ranges can never be 178 * freed. This is done by tagging those regions with the 179 * EFI_MEMORY_RUNTIME attribute. 180 * 181 * Any driver that wants to mark a region as reserved must use 182 * efi_mem_reserve() which will insert a new EFI memory descriptor 183 * into efi.memmap (splitting existing regions if necessary) and tag 184 * it with EFI_MEMORY_RUNTIME. 185 */ 186 void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) 187 { 188 phys_addr_t new_phys, new_size; 189 struct efi_mem_range mr; 190 efi_memory_desc_t md; 191 int num_entries; 192 void *new; 193 194 if (efi_mem_desc_lookup(addr, &md)) { 195 pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); 196 return; 197 } 198 199 if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { 200 pr_err("Region spans EFI memory descriptors, %pa\n", &addr); 201 return; 202 } 203 204 size += addr % EFI_PAGE_SIZE; 205 size = round_up(size, EFI_PAGE_SIZE); 206 addr = round_down(addr, EFI_PAGE_SIZE); 207 208 mr.range.start = addr; 209 mr.range.end = addr + size - 1; 210 mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; 211 212 num_entries = efi_memmap_split_count(&md, &mr.range); 213 num_entries += efi.memmap.nr_map; 214 215 new_size = efi.memmap.desc_size * num_entries; 216 217 new_phys = efi_memmap_alloc(num_entries); 218 if (!new_phys) { 219 pr_err("Could not allocate boot services memmap\n"); 220 return; 221 } 222 223 new = early_memremap(new_phys, new_size); 224 if (!new) { 225 pr_err("Failed to map new boot services memmap\n"); 226 return; 227 } 228 229 efi_memmap_insert(&efi.memmap, new, &mr); 230 early_memunmap(new, new_size); 231 232 efi_memmap_install(new_phys, num_entries); 233 } 234 235 /* 236 * Helper function for efi_reserve_boot_services() to figure out if we 237 * can free regions in efi_free_boot_services(). 238 * 239 * Use this function to ensure we do not free regions owned by somebody 240 * else. We must only reserve (and then free) regions: 241 * 242 * - Not within any part of the kernel 243 * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc) 244 */ 245 static bool can_free_region(u64 start, u64 size) 246 { 247 if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) 248 return false; 249 250 if (!e820_all_mapped(start, start+size, E820_RAM)) 251 return false; 252 253 return true; 254 } 255 256 void __init efi_reserve_boot_services(void) 257 { 258 efi_memory_desc_t *md; 259 260 for_each_efi_memory_desc(md) { 261 u64 start = md->phys_addr; 262 u64 size = md->num_pages << EFI_PAGE_SHIFT; 263 bool already_reserved; 264 265 if (md->type != EFI_BOOT_SERVICES_CODE && 266 md->type != EFI_BOOT_SERVICES_DATA) 267 continue; 268 269 already_reserved = memblock_is_region_reserved(start, size); 270 271 /* 272 * Because the following memblock_reserve() is paired 273 * with free_bootmem_late() for this region in 274 * efi_free_boot_services(), we must be extremely 275 * careful not to reserve, and subsequently free, 276 * critical regions of memory (like the kernel image) or 277 * those regions that somebody else has already 278 * reserved. 279 * 280 * A good example of a critical region that must not be 281 * freed is page zero (first 4Kb of memory), which may 282 * contain boot services code/data but is marked 283 * E820_RESERVED by trim_bios_range(). 284 */ 285 if (!already_reserved) { 286 memblock_reserve(start, size); 287 288 /* 289 * If we are the first to reserve the region, no 290 * one else cares about it. We own it and can 291 * free it later. 292 */ 293 if (can_free_region(start, size)) 294 continue; 295 } 296 297 /* 298 * We don't own the region. We must not free it. 299 * 300 * Setting this bit for a boot services region really 301 * doesn't make sense as far as the firmware is 302 * concerned, but it does provide us with a way to tag 303 * those regions that must not be paired with 304 * free_bootmem_late(). 305 */ 306 md->attribute |= EFI_MEMORY_RUNTIME; 307 } 308 } 309 310 void __init efi_free_boot_services(void) 311 { 312 phys_addr_t new_phys, new_size; 313 efi_memory_desc_t *md; 314 int num_entries = 0; 315 void *new, *new_md; 316 317 for_each_efi_memory_desc(md) { 318 unsigned long long start = md->phys_addr; 319 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; 320 size_t rm_size; 321 322 if (md->type != EFI_BOOT_SERVICES_CODE && 323 md->type != EFI_BOOT_SERVICES_DATA) { 324 num_entries++; 325 continue; 326 } 327 328 /* Do not free, someone else owns it: */ 329 if (md->attribute & EFI_MEMORY_RUNTIME) { 330 num_entries++; 331 continue; 332 } 333 334 /* 335 * Nasty quirk: if all sub-1MB memory is used for boot 336 * services, we can get here without having allocated the 337 * real mode trampoline. It's too late to hand boot services 338 * memory back to the memblock allocator, so instead 339 * try to manually allocate the trampoline if needed. 340 * 341 * I've seen this on a Dell XPS 13 9350 with firmware 342 * 1.4.4 with SGX enabled booting Linux via Fedora 24's 343 * grub2-efi on a hard disk. (And no, I don't know why 344 * this happened, but Linux should still try to boot rather 345 * panicing early.) 346 */ 347 rm_size = real_mode_size_needed(); 348 if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { 349 set_real_mode_mem(start, rm_size); 350 start += rm_size; 351 size -= rm_size; 352 } 353 354 free_bootmem_late(start, size); 355 } 356 357 new_size = efi.memmap.desc_size * num_entries; 358 new_phys = efi_memmap_alloc(num_entries); 359 if (!new_phys) { 360 pr_err("Failed to allocate new EFI memmap\n"); 361 return; 362 } 363 364 new = memremap(new_phys, new_size, MEMREMAP_WB); 365 if (!new) { 366 pr_err("Failed to map new EFI memmap\n"); 367 return; 368 } 369 370 /* 371 * Build a new EFI memmap that excludes any boot services 372 * regions that are not tagged EFI_MEMORY_RUNTIME, since those 373 * regions have now been freed. 374 */ 375 new_md = new; 376 for_each_efi_memory_desc(md) { 377 if (!(md->attribute & EFI_MEMORY_RUNTIME) && 378 (md->type == EFI_BOOT_SERVICES_CODE || 379 md->type == EFI_BOOT_SERVICES_DATA)) 380 continue; 381 382 memcpy(new_md, md, efi.memmap.desc_size); 383 new_md += efi.memmap.desc_size; 384 } 385 386 memunmap(new); 387 388 if (efi_memmap_install(new_phys, num_entries)) { 389 pr_err("Could not install new EFI memmap\n"); 390 return; 391 } 392 } 393 394 /* 395 * A number of config table entries get remapped to virtual addresses 396 * after entering EFI virtual mode. However, the kexec kernel requires 397 * their physical addresses therefore we pass them via setup_data and 398 * correct those entries to their respective physical addresses here. 399 * 400 * Currently only handles smbios which is necessary for some firmware 401 * implementation. 402 */ 403 int __init efi_reuse_config(u64 tables, int nr_tables) 404 { 405 int i, sz, ret = 0; 406 void *p, *tablep; 407 struct efi_setup_data *data; 408 409 if (!efi_setup) 410 return 0; 411 412 if (!efi_enabled(EFI_64BIT)) 413 return 0; 414 415 data = early_memremap(efi_setup, sizeof(*data)); 416 if (!data) { 417 ret = -ENOMEM; 418 goto out; 419 } 420 421 if (!data->smbios) 422 goto out_memremap; 423 424 sz = sizeof(efi_config_table_64_t); 425 426 p = tablep = early_memremap(tables, nr_tables * sz); 427 if (!p) { 428 pr_err("Could not map Configuration table!\n"); 429 ret = -ENOMEM; 430 goto out_memremap; 431 } 432 433 for (i = 0; i < efi.systab->nr_tables; i++) { 434 efi_guid_t guid; 435 436 guid = ((efi_config_table_64_t *)p)->guid; 437 438 if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) 439 ((efi_config_table_64_t *)p)->table = data->smbios; 440 p += sz; 441 } 442 early_memunmap(tablep, nr_tables * sz); 443 444 out_memremap: 445 early_memunmap(data, sizeof(*data)); 446 out: 447 return ret; 448 } 449 450 static const struct dmi_system_id sgi_uv1_dmi[] = { 451 { NULL, "SGI UV1", 452 { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), 453 DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), 454 DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"), 455 } 456 }, 457 { } /* NULL entry stops DMI scanning */ 458 }; 459 460 void __init efi_apply_memmap_quirks(void) 461 { 462 /* 463 * Once setup is done earlier, unmap the EFI memory map on mismatched 464 * firmware/kernel architectures since there is no support for runtime 465 * services. 466 */ 467 if (!efi_runtime_supported()) { 468 pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); 469 efi_memmap_unmap(); 470 } 471 472 /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ 473 if (dmi_check_system(sgi_uv1_dmi)) 474 set_bit(EFI_OLD_MEMMAP, &efi.flags); 475 } 476 477 /* 478 * For most modern platforms the preferred method of powering off is via 479 * ACPI. However, there are some that are known to require the use of 480 * EFI runtime services and for which ACPI does not work at all. 481 * 482 * Using EFI is a last resort, to be used only if no other option 483 * exists. 484 */ 485 bool efi_reboot_required(void) 486 { 487 if (!acpi_gbl_reduced_hardware) 488 return false; 489 490 efi_reboot_quirk_mode = EFI_RESET_WARM; 491 return true; 492 } 493 494 bool efi_poweroff_required(void) 495 { 496 return acpi_gbl_reduced_hardware || acpi_no_s5; 497 } 498