1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 262a31a03SHiroshi Shimamoto /* 362a31a03SHiroshi Shimamoto * Architecture specific (i386/x86_64) functions for kexec based crash dumps. 462a31a03SHiroshi Shimamoto * 562a31a03SHiroshi Shimamoto * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) 662a31a03SHiroshi Shimamoto * 762a31a03SHiroshi Shimamoto * Copyright (C) IBM Corporation, 2004. All rights reserved. 8dd5f7260SVivek Goyal * Copyright (C) Red Hat Inc., 2014. All rights reserved. 9dd5f7260SVivek Goyal * Authors: 10dd5f7260SVivek Goyal * Vivek Goyal <vgoyal@redhat.com> 1162a31a03SHiroshi Shimamoto * 1262a31a03SHiroshi Shimamoto */ 1362a31a03SHiroshi Shimamoto 14dd5f7260SVivek Goyal #define pr_fmt(fmt) "kexec: " fmt 15dd5f7260SVivek Goyal 1662a31a03SHiroshi Shimamoto #include <linux/types.h> 1762a31a03SHiroshi Shimamoto #include <linux/kernel.h> 1862a31a03SHiroshi Shimamoto #include <linux/smp.h> 1962a31a03SHiroshi Shimamoto #include <linux/reboot.h> 2062a31a03SHiroshi Shimamoto #include <linux/kexec.h> 2162a31a03SHiroshi Shimamoto #include <linux/delay.h> 2262a31a03SHiroshi Shimamoto #include <linux/elf.h> 2362a31a03SHiroshi Shimamoto #include <linux/elfcore.h> 24186f4360SPaul Gortmaker #include <linux/export.h> 25dd5f7260SVivek Goyal #include <linux/slab.h> 26d6472302SStephen Rothwell #include <linux/vmalloc.h> 276f599d84SLianbo Jiang #include <linux/memblock.h> 2862a31a03SHiroshi Shimamoto 2962a31a03SHiroshi Shimamoto #include <asm/processor.h> 3062a31a03SHiroshi Shimamoto #include <asm/hardirq.h> 3162a31a03SHiroshi Shimamoto #include <asm/nmi.h> 3262a31a03SHiroshi Shimamoto #include <asm/hw_irq.h> 3362a31a03SHiroshi Shimamoto #include <asm/apic.h> 345520b7e7SIngo Molnar #include <asm/e820/types.h> 358643e28dSJiang Liu #include <asm/io_apic.h> 360c1b2724SOGAWA Hirofumi #include <asm/hpet.h> 3762a31a03SHiroshi Shimamoto #include <linux/kdebug.h> 3896b89dc6SJaswinder Singh Rajput #include <asm/cpu.h> 39ed23dc6fSGlauber Costa #include <asm/reboot.h> 40da06a43dSTakao Indoh #include <asm/intel_pt.h> 4189f579ceSYi Wang #include <asm/crash.h> 426f599d84SLianbo Jiang #include <asm/cmdline.h> 438e294786SEduardo Habkost 44dd5f7260SVivek Goyal /* Used while preparing memory map entries for second kernel */ 45dd5f7260SVivek Goyal struct crash_memmap_data { 46dd5f7260SVivek Goyal struct boot_params *params; 47dd5f7260SVivek Goyal /* Type of memory */ 48dd5f7260SVivek Goyal unsigned int type; 49dd5f7260SVivek Goyal }; 50dd5f7260SVivek Goyal 51f23d1f4aSZhang Yanfei /* 52f23d1f4aSZhang Yanfei * This is used to VMCLEAR all VMCSs loaded on the 53f23d1f4aSZhang Yanfei * processor. And when loading kvm_intel module, the 54f23d1f4aSZhang Yanfei * callback function pointer will be assigned. 55f23d1f4aSZhang Yanfei * 56f23d1f4aSZhang Yanfei * protected by rcu. 57f23d1f4aSZhang Yanfei */ 580ca0d818SZhang Yanfei crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; 59f23d1f4aSZhang Yanfei EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); 60f23d1f4aSZhang Yanfei 61f23d1f4aSZhang Yanfei static inline void cpu_crash_vmclear_loaded_vmcss(void) 62f23d1f4aSZhang Yanfei { 630ca0d818SZhang Yanfei crash_vmclear_fn *do_vmclear_operation = NULL; 64f23d1f4aSZhang Yanfei 65f23d1f4aSZhang Yanfei rcu_read_lock(); 66f23d1f4aSZhang Yanfei do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); 67f23d1f4aSZhang Yanfei if (do_vmclear_operation) 68f23d1f4aSZhang Yanfei do_vmclear_operation(); 69f23d1f4aSZhang Yanfei rcu_read_unlock(); 70f23d1f4aSZhang Yanfei } 71f23d1f4aSZhang Yanfei 72b2bbe71bSEduardo Habkost #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 73b2bbe71bSEduardo Habkost 749c48f1c6SDon Zickus static void kdump_nmi_callback(int cpu, struct pt_regs *regs) 7562a31a03SHiroshi Shimamoto { 76a7d41820SEduardo Habkost crash_save_cpu(regs, cpu); 77a7d41820SEduardo Habkost 78f23d1f4aSZhang Yanfei /* 79f23d1f4aSZhang Yanfei * VMCLEAR VMCSs loaded on all cpus if needed. 80f23d1f4aSZhang Yanfei */ 81f23d1f4aSZhang Yanfei cpu_crash_vmclear_loaded_vmcss(); 82f23d1f4aSZhang Yanfei 83da06a43dSTakao Indoh /* 84da06a43dSTakao Indoh * Disable Intel PT to stop its logging 85da06a43dSTakao Indoh */ 86da06a43dSTakao Indoh cpu_emergency_stop_pt(); 87da06a43dSTakao Indoh 88a7d41820SEduardo Habkost disable_local_APIC(); 89a7d41820SEduardo Habkost } 90a7d41820SEduardo Habkost 910ee59413SHidehiro Kawai void kdump_nmi_shootdown_cpus(void) 92d1e7b91cSEduardo Habkost { 938e294786SEduardo Habkost nmi_shootdown_cpus(kdump_nmi_callback); 94d1e7b91cSEduardo Habkost 9562a31a03SHiroshi Shimamoto disable_local_APIC(); 9662a31a03SHiroshi Shimamoto } 97d1e7b91cSEduardo Habkost 980ee59413SHidehiro Kawai /* Override the weak function in kernel/panic.c */ 990ee59413SHidehiro Kawai void crash_smp_send_stop(void) 1000ee59413SHidehiro Kawai { 1010ee59413SHidehiro Kawai static int cpus_stopped; 1020ee59413SHidehiro Kawai 1030ee59413SHidehiro Kawai if (cpus_stopped) 1040ee59413SHidehiro Kawai return; 1050ee59413SHidehiro Kawai 1060ee59413SHidehiro Kawai if (smp_ops.crash_stop_other_cpus) 1070ee59413SHidehiro Kawai smp_ops.crash_stop_other_cpus(); 1080ee59413SHidehiro Kawai else 1090ee59413SHidehiro Kawai smp_send_stop(); 1100ee59413SHidehiro Kawai 1110ee59413SHidehiro Kawai cpus_stopped = 1; 1120ee59413SHidehiro Kawai } 1130ee59413SHidehiro Kawai 11462a31a03SHiroshi Shimamoto #else 1150ee59413SHidehiro Kawai void crash_smp_send_stop(void) 11662a31a03SHiroshi Shimamoto { 11762a31a03SHiroshi Shimamoto /* There are no cpus to shootdown */ 11862a31a03SHiroshi Shimamoto } 11962a31a03SHiroshi Shimamoto #endif 12062a31a03SHiroshi Shimamoto 121ed23dc6fSGlauber Costa void native_machine_crash_shutdown(struct pt_regs *regs) 12262a31a03SHiroshi Shimamoto { 12362a31a03SHiroshi Shimamoto /* This function is only called after the system 12462a31a03SHiroshi Shimamoto * has panicked or is otherwise in a critical state. 12562a31a03SHiroshi Shimamoto * The minimum amount of code to allow a kexec'd kernel 12662a31a03SHiroshi Shimamoto * to run successfully needs to happen here. 12762a31a03SHiroshi Shimamoto * 12862a31a03SHiroshi Shimamoto * In practice this means shooting down the other cpus in 12962a31a03SHiroshi Shimamoto * an SMP system. 13062a31a03SHiroshi Shimamoto */ 13162a31a03SHiroshi Shimamoto /* The kernel is broken so disable interrupts */ 13262a31a03SHiroshi Shimamoto local_irq_disable(); 13362a31a03SHiroshi Shimamoto 1340ee59413SHidehiro Kawai crash_smp_send_stop(); 1352340b62fSEduardo Habkost 136f23d1f4aSZhang Yanfei /* 137f23d1f4aSZhang Yanfei * VMCLEAR VMCSs loaded on this cpu if needed. 138f23d1f4aSZhang Yanfei */ 139f23d1f4aSZhang Yanfei cpu_crash_vmclear_loaded_vmcss(); 140f23d1f4aSZhang Yanfei 14126044affSSean Christopherson cpu_emergency_disable_virtualization(); 1422340b62fSEduardo Habkost 143da06a43dSTakao Indoh /* 144da06a43dSTakao Indoh * Disable Intel PT to stop its logging 145da06a43dSTakao Indoh */ 146da06a43dSTakao Indoh cpu_emergency_stop_pt(); 147da06a43dSTakao Indoh 14817405453SYoshihiro YUNOMAE #ifdef CONFIG_X86_IO_APIC 14917405453SYoshihiro YUNOMAE /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ 15017405453SYoshihiro YUNOMAE ioapic_zap_locks(); 151339b2ae0SBaoquan He clear_IO_APIC(); 15262a31a03SHiroshi Shimamoto #endif 153522e6646SFenghua Yu lapic_shutdown(); 154339b2ae0SBaoquan He restore_boot_irq_mode(); 1550c1b2724SOGAWA Hirofumi #ifdef CONFIG_HPET_TIMER 1560c1b2724SOGAWA Hirofumi hpet_disable(); 1570c1b2724SOGAWA Hirofumi #endif 15862a31a03SHiroshi Shimamoto crash_save_cpu(regs, safe_smp_processor_id()); 15962a31a03SHiroshi Shimamoto } 160dd5f7260SVivek Goyal 161ea53ad9cSEric DeVolder #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG) 1621d2e733bSTom Lendacky static int get_nr_ram_ranges_callback(struct resource *res, void *arg) 163dd5f7260SVivek Goyal { 164e3c41e37SLee, Chun-Yi unsigned int *nr_ranges = arg; 165dd5f7260SVivek Goyal 166dd5f7260SVivek Goyal (*nr_ranges)++; 167dd5f7260SVivek Goyal return 0; 168dd5f7260SVivek Goyal } 169dd5f7260SVivek Goyal 170dd5f7260SVivek Goyal /* Gather all the required information to prepare elf headers for ram regions */ 1718d5f894aSAKASHI Takahiro static struct crash_mem *fill_up_crash_elf_data(void) 172dd5f7260SVivek Goyal { 173dd5f7260SVivek Goyal unsigned int nr_ranges = 0; 1748d5f894aSAKASHI Takahiro struct crash_mem *cmem; 175dd5f7260SVivek Goyal 1769eff3037SBorislav Petkov walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); 1778d5f894aSAKASHI Takahiro if (!nr_ranges) 1788d5f894aSAKASHI Takahiro return NULL; 179dd5f7260SVivek Goyal 1808d5f894aSAKASHI Takahiro /* 1818d5f894aSAKASHI Takahiro * Exclusion of crash region and/or crashk_low_res may cause 1828d5f894aSAKASHI Takahiro * another range split. So add extra two slots here. 1838d5f894aSAKASHI Takahiro */ 1848d5f894aSAKASHI Takahiro nr_ranges += 2; 1854df43095SGustavo A. R. Silva cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); 1868d5f894aSAKASHI Takahiro if (!cmem) 1878d5f894aSAKASHI Takahiro return NULL; 188dd5f7260SVivek Goyal 1898d5f894aSAKASHI Takahiro cmem->max_nr_ranges = nr_ranges; 1908d5f894aSAKASHI Takahiro cmem->nr_ranges = 0; 191dd5f7260SVivek Goyal 1928d5f894aSAKASHI Takahiro return cmem; 193dd5f7260SVivek Goyal } 194dd5f7260SVivek Goyal 195dd5f7260SVivek Goyal /* 196dd5f7260SVivek Goyal * Look for any unwanted ranges between mstart, mend and remove them. This 1978d5f894aSAKASHI Takahiro * might lead to split and split ranges are put in cmem->ranges[] array 198dd5f7260SVivek Goyal */ 1998d5f894aSAKASHI Takahiro static int elf_header_exclude_ranges(struct crash_mem *cmem) 200dd5f7260SVivek Goyal { 201dd5f7260SVivek Goyal int ret = 0; 202dd5f7260SVivek Goyal 2037c321eb2SLianbo Jiang /* Exclude the low 1M because it is always reserved */ 204a3e1c3bbSLianbo Jiang ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1); 2057c321eb2SLianbo Jiang if (ret) 2067c321eb2SLianbo Jiang return ret; 2077c321eb2SLianbo Jiang 208dd5f7260SVivek Goyal /* Exclude crashkernel region */ 209babac4a8SAKASHI Takahiro ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); 210dd5f7260SVivek Goyal if (ret) 211dd5f7260SVivek Goyal return ret; 212dd5f7260SVivek Goyal 2139eff3037SBorislav Petkov if (crashk_low_res.end) 214babac4a8SAKASHI Takahiro ret = crash_exclude_mem_range(cmem, crashk_low_res.start, 215babac4a8SAKASHI Takahiro crashk_low_res.end); 216dd5f7260SVivek Goyal 217dd5f7260SVivek Goyal return ret; 218dd5f7260SVivek Goyal } 219dd5f7260SVivek Goyal 2201d2e733bSTom Lendacky static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) 221dd5f7260SVivek Goyal { 2228d5f894aSAKASHI Takahiro struct crash_mem *cmem = arg; 223dd5f7260SVivek Goyal 224cbe66016SAKASHI Takahiro cmem->ranges[cmem->nr_ranges].start = res->start; 225cbe66016SAKASHI Takahiro cmem->ranges[cmem->nr_ranges].end = res->end; 226cbe66016SAKASHI Takahiro cmem->nr_ranges++; 227dd5f7260SVivek Goyal 228cbe66016SAKASHI Takahiro return 0; 229dd5f7260SVivek Goyal } 230dd5f7260SVivek Goyal 231dd5f7260SVivek Goyal /* Prepare elf headers. Return addr and size */ 232dd5f7260SVivek Goyal static int prepare_elf_headers(struct kimage *image, void **addr, 233ea53ad9cSEric DeVolder unsigned long *sz, unsigned long *nr_mem_ranges) 234dd5f7260SVivek Goyal { 2358d5f894aSAKASHI Takahiro struct crash_mem *cmem; 2367c321eb2SLianbo Jiang int ret; 237dd5f7260SVivek Goyal 2388d5f894aSAKASHI Takahiro cmem = fill_up_crash_elf_data(); 2398d5f894aSAKASHI Takahiro if (!cmem) 240dd5f7260SVivek Goyal return -ENOMEM; 241dd5f7260SVivek Goyal 2429eff3037SBorislav Petkov ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); 243cbe66016SAKASHI Takahiro if (ret) 244cbe66016SAKASHI Takahiro goto out; 245cbe66016SAKASHI Takahiro 246cbe66016SAKASHI Takahiro /* Exclude unwanted mem ranges */ 2478d5f894aSAKASHI Takahiro ret = elf_header_exclude_ranges(cmem); 248cbe66016SAKASHI Takahiro if (ret) 249cbe66016SAKASHI Takahiro goto out; 250cbe66016SAKASHI Takahiro 251ea53ad9cSEric DeVolder /* Return the computed number of memory ranges, for hotplug usage */ 252ea53ad9cSEric DeVolder *nr_mem_ranges = cmem->nr_ranges; 253ea53ad9cSEric DeVolder 254dd5f7260SVivek Goyal /* By default prepare 64bit headers */ 2559eff3037SBorislav Petkov ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); 256cbe66016SAKASHI Takahiro 257cbe66016SAKASHI Takahiro out: 2588d5f894aSAKASHI Takahiro vfree(cmem); 259dd5f7260SVivek Goyal return ret; 260dd5f7260SVivek Goyal } 261ea53ad9cSEric DeVolder #endif 262dd5f7260SVivek Goyal 263ea53ad9cSEric DeVolder #ifdef CONFIG_KEXEC_FILE 2648ec67d97SIngo Molnar static int add_e820_entry(struct boot_params *params, struct e820_entry *entry) 265dd5f7260SVivek Goyal { 266dd5f7260SVivek Goyal unsigned int nr_e820_entries; 267dd5f7260SVivek Goyal 268dd5f7260SVivek Goyal nr_e820_entries = params->e820_entries; 26908b46d5dSIngo Molnar if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE) 270dd5f7260SVivek Goyal return 1; 271dd5f7260SVivek Goyal 2729eff3037SBorislav Petkov memcpy(¶ms->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry)); 273dd5f7260SVivek Goyal params->e820_entries++; 274dd5f7260SVivek Goyal return 0; 275dd5f7260SVivek Goyal } 276dd5f7260SVivek Goyal 2771d2e733bSTom Lendacky static int memmap_entry_callback(struct resource *res, void *arg) 278dd5f7260SVivek Goyal { 279dd5f7260SVivek Goyal struct crash_memmap_data *cmd = arg; 280dd5f7260SVivek Goyal struct boot_params *params = cmd->params; 2818ec67d97SIngo Molnar struct e820_entry ei; 282dd5f7260SVivek Goyal 2831d2e733bSTom Lendacky ei.addr = res->start; 2849275b933Skbuild test robot ei.size = resource_size(res); 285dd5f7260SVivek Goyal ei.type = cmd->type; 286dd5f7260SVivek Goyal add_e820_entry(params, &ei); 287dd5f7260SVivek Goyal 288dd5f7260SVivek Goyal return 0; 289dd5f7260SVivek Goyal } 290dd5f7260SVivek Goyal 291dd5f7260SVivek Goyal static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, 292dd5f7260SVivek Goyal unsigned long long mstart, 293dd5f7260SVivek Goyal unsigned long long mend) 294dd5f7260SVivek Goyal { 295dd5f7260SVivek Goyal unsigned long start, end; 296dd5f7260SVivek Goyal 297dd5f7260SVivek Goyal cmem->ranges[0].start = mstart; 298dd5f7260SVivek Goyal cmem->ranges[0].end = mend; 299dd5f7260SVivek Goyal cmem->nr_ranges = 1; 300dd5f7260SVivek Goyal 301dd5f7260SVivek Goyal /* Exclude elf header region */ 302179350f0SLakshmi Ramasubramanian start = image->elf_load_addr; 303179350f0SLakshmi Ramasubramanian end = start + image->elf_headers_sz - 1; 304babac4a8SAKASHI Takahiro return crash_exclude_mem_range(cmem, start, end); 305dd5f7260SVivek Goyal } 306dd5f7260SVivek Goyal 307dd5f7260SVivek Goyal /* Prepare memory map for crash dump kernel */ 308dd5f7260SVivek Goyal int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) 309dd5f7260SVivek Goyal { 310dd5f7260SVivek Goyal int i, ret = 0; 311dd5f7260SVivek Goyal unsigned long flags; 3128ec67d97SIngo Molnar struct e820_entry ei; 313dd5f7260SVivek Goyal struct crash_memmap_data cmd; 314dd5f7260SVivek Goyal struct crash_mem *cmem; 315dd5f7260SVivek Goyal 3165849cdf8SMike Galbraith cmem = vzalloc(struct_size(cmem, ranges, 1)); 317dd5f7260SVivek Goyal if (!cmem) 318dd5f7260SVivek Goyal return -ENOMEM; 319dd5f7260SVivek Goyal 320dd5f7260SVivek Goyal memset(&cmd, 0, sizeof(struct crash_memmap_data)); 321dd5f7260SVivek Goyal cmd.params = params; 322dd5f7260SVivek Goyal 3237c321eb2SLianbo Jiang /* Add the low 1M */ 3247c321eb2SLianbo Jiang cmd.type = E820_TYPE_RAM; 3257c321eb2SLianbo Jiang flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 3267c321eb2SLianbo Jiang walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd, 3277c321eb2SLianbo Jiang memmap_entry_callback); 328dd5f7260SVivek Goyal 329dd5f7260SVivek Goyal /* Add ACPI tables */ 33009821ff1SIngo Molnar cmd.type = E820_TYPE_ACPI; 331dd5f7260SVivek Goyal flags = IORESOURCE_MEM | IORESOURCE_BUSY; 332f0f4711aSToshi Kani walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, 333dd5f7260SVivek Goyal memmap_entry_callback); 334dd5f7260SVivek Goyal 335dd5f7260SVivek Goyal /* Add ACPI Non-volatile Storage */ 33609821ff1SIngo Molnar cmd.type = E820_TYPE_NVS; 337f0f4711aSToshi Kani walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, 338dd5f7260SVivek Goyal memmap_entry_callback); 339dd5f7260SVivek Goyal 340980621daSLianbo Jiang /* Add e820 reserved ranges */ 341980621daSLianbo Jiang cmd.type = E820_TYPE_RESERVED; 342980621daSLianbo Jiang flags = IORESOURCE_MEM; 343980621daSLianbo Jiang walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd, 344980621daSLianbo Jiang memmap_entry_callback); 345980621daSLianbo Jiang 346dd5f7260SVivek Goyal /* Add crashk_low_res region */ 347dd5f7260SVivek Goyal if (crashk_low_res.end) { 348dd5f7260SVivek Goyal ei.addr = crashk_low_res.start; 3491429b568SJulia Lawall ei.size = resource_size(&crashk_low_res); 35009821ff1SIngo Molnar ei.type = E820_TYPE_RAM; 351dd5f7260SVivek Goyal add_e820_entry(params, &ei); 352dd5f7260SVivek Goyal } 353dd5f7260SVivek Goyal 354dd5f7260SVivek Goyal /* Exclude some ranges from crashk_res and add rest to memmap */ 3559eff3037SBorislav Petkov ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end); 356dd5f7260SVivek Goyal if (ret) 357dd5f7260SVivek Goyal goto out; 358dd5f7260SVivek Goyal 359dd5f7260SVivek Goyal for (i = 0; i < cmem->nr_ranges; i++) { 360dd5f7260SVivek Goyal ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1; 361dd5f7260SVivek Goyal 362dd5f7260SVivek Goyal /* If entry is less than a page, skip it */ 363dd5f7260SVivek Goyal if (ei.size < PAGE_SIZE) 364dd5f7260SVivek Goyal continue; 365dd5f7260SVivek Goyal ei.addr = cmem->ranges[i].start; 36609821ff1SIngo Molnar ei.type = E820_TYPE_RAM; 367dd5f7260SVivek Goyal add_e820_entry(params, &ei); 368dd5f7260SVivek Goyal } 369dd5f7260SVivek Goyal 370dd5f7260SVivek Goyal out: 371dd5f7260SVivek Goyal vfree(cmem); 372dd5f7260SVivek Goyal return ret; 373dd5f7260SVivek Goyal } 374dd5f7260SVivek Goyal 375dd5f7260SVivek Goyal int crash_load_segments(struct kimage *image) 376dd5f7260SVivek Goyal { 377dd5f7260SVivek Goyal int ret; 378ea53ad9cSEric DeVolder unsigned long pnum = 0; 379ec2b9bfaSThiago Jung Bauermann struct kexec_buf kbuf = { .image = image, .buf_min = 0, 380ec2b9bfaSThiago Jung Bauermann .buf_max = ULONG_MAX, .top_down = false }; 381dd5f7260SVivek Goyal 382dd5f7260SVivek Goyal /* Prepare elf headers and add a segment */ 383ea53ad9cSEric DeVolder ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz, &pnum); 384dd5f7260SVivek Goyal if (ret) 385dd5f7260SVivek Goyal return ret; 386dd5f7260SVivek Goyal 387179350f0SLakshmi Ramasubramanian image->elf_headers = kbuf.buffer; 388179350f0SLakshmi Ramasubramanian image->elf_headers_sz = kbuf.bufsz; 389ec2b9bfaSThiago Jung Bauermann kbuf.memsz = kbuf.bufsz; 390ea53ad9cSEric DeVolder 391ea53ad9cSEric DeVolder #ifdef CONFIG_CRASH_HOTPLUG 392ea53ad9cSEric DeVolder /* 393ea53ad9cSEric DeVolder * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map, 394ea53ad9cSEric DeVolder * maximum CPUs and maximum memory ranges. 395ea53ad9cSEric DeVolder */ 396ea53ad9cSEric DeVolder if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 397ea53ad9cSEric DeVolder pnum = 2 + CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES; 398ea53ad9cSEric DeVolder else 399ea53ad9cSEric DeVolder pnum += 2 + CONFIG_NR_CPUS_DEFAULT; 400ea53ad9cSEric DeVolder 401ea53ad9cSEric DeVolder if (pnum < (unsigned long)PN_XNUM) { 402ea53ad9cSEric DeVolder kbuf.memsz = pnum * sizeof(Elf64_Phdr); 403ea53ad9cSEric DeVolder kbuf.memsz += sizeof(Elf64_Ehdr); 404ea53ad9cSEric DeVolder 405ea53ad9cSEric DeVolder image->elfcorehdr_index = image->nr_segments; 406ea53ad9cSEric DeVolder 407ea53ad9cSEric DeVolder /* Mark as usable to crash kernel, else crash kernel fails on boot */ 408ea53ad9cSEric DeVolder image->elf_headers_sz = kbuf.memsz; 409ea53ad9cSEric DeVolder } else { 410ea53ad9cSEric DeVolder pr_err("number of Phdrs %lu exceeds max\n", pnum); 411ea53ad9cSEric DeVolder } 412ea53ad9cSEric DeVolder #endif 413ea53ad9cSEric DeVolder 414ec2b9bfaSThiago Jung Bauermann kbuf.buf_align = ELF_CORE_HEADER_ALIGN; 415993a1103SDave Young kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 416ec2b9bfaSThiago Jung Bauermann ret = kexec_add_buffer(&kbuf); 417d00dd2f2STakashi Iwai if (ret) 418dd5f7260SVivek Goyal return ret; 419179350f0SLakshmi Ramasubramanian image->elf_load_addr = kbuf.mem; 420dd5f7260SVivek Goyal pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", 421b57a7c9dSEric DeVolder image->elf_load_addr, kbuf.bufsz, kbuf.memsz); 422dd5f7260SVivek Goyal 423dd5f7260SVivek Goyal return ret; 424dd5f7260SVivek Goyal } 42574ca317cSVivek Goyal #endif /* CONFIG_KEXEC_FILE */ 426ea53ad9cSEric DeVolder 427ea53ad9cSEric DeVolder #ifdef CONFIG_CRASH_HOTPLUG 428ea53ad9cSEric DeVolder 429ea53ad9cSEric DeVolder #undef pr_fmt 430ea53ad9cSEric DeVolder #define pr_fmt(fmt) "crash hp: " fmt 431ea53ad9cSEric DeVolder 432*a72bbec7SEric DeVolder /* These functions provide the value for the sysfs crash_hotplug nodes */ 433*a72bbec7SEric DeVolder #ifdef CONFIG_HOTPLUG_CPU 434*a72bbec7SEric DeVolder int arch_crash_hotplug_cpu_support(void) 435*a72bbec7SEric DeVolder { 436*a72bbec7SEric DeVolder return crash_check_update_elfcorehdr(); 437*a72bbec7SEric DeVolder } 438*a72bbec7SEric DeVolder #endif 439*a72bbec7SEric DeVolder 440*a72bbec7SEric DeVolder #ifdef CONFIG_MEMORY_HOTPLUG 441*a72bbec7SEric DeVolder int arch_crash_hotplug_memory_support(void) 442*a72bbec7SEric DeVolder { 443*a72bbec7SEric DeVolder return crash_check_update_elfcorehdr(); 444*a72bbec7SEric DeVolder } 445*a72bbec7SEric DeVolder #endif 446*a72bbec7SEric DeVolder 447*a72bbec7SEric DeVolder unsigned int arch_crash_get_elfcorehdr_size(void) 448*a72bbec7SEric DeVolder { 449*a72bbec7SEric DeVolder unsigned int sz; 450*a72bbec7SEric DeVolder 451*a72bbec7SEric DeVolder /* kernel_map, VMCOREINFO and maximum CPUs */ 452*a72bbec7SEric DeVolder sz = 2 + CONFIG_NR_CPUS_DEFAULT; 453*a72bbec7SEric DeVolder if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 454*a72bbec7SEric DeVolder sz += CONFIG_CRASH_MAX_MEMORY_RANGES; 455*a72bbec7SEric DeVolder sz *= sizeof(Elf64_Phdr); 456*a72bbec7SEric DeVolder return sz; 457*a72bbec7SEric DeVolder } 458*a72bbec7SEric DeVolder 459ea53ad9cSEric DeVolder /** 460ea53ad9cSEric DeVolder * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes 461ea53ad9cSEric DeVolder * @image: a pointer to kexec_crash_image 462ea53ad9cSEric DeVolder * 463ea53ad9cSEric DeVolder * Prepare the new elfcorehdr and replace the existing elfcorehdr. 464ea53ad9cSEric DeVolder */ 465ea53ad9cSEric DeVolder void arch_crash_handle_hotplug_event(struct kimage *image) 466ea53ad9cSEric DeVolder { 467ea53ad9cSEric DeVolder void *elfbuf = NULL, *old_elfcorehdr; 468ea53ad9cSEric DeVolder unsigned long nr_mem_ranges; 469ea53ad9cSEric DeVolder unsigned long mem, memsz; 470ea53ad9cSEric DeVolder unsigned long elfsz = 0; 471ea53ad9cSEric DeVolder 472ea53ad9cSEric DeVolder /* 473ea53ad9cSEric DeVolder * Create the new elfcorehdr reflecting the changes to CPU and/or 474ea53ad9cSEric DeVolder * memory resources. 475ea53ad9cSEric DeVolder */ 476ea53ad9cSEric DeVolder if (prepare_elf_headers(image, &elfbuf, &elfsz, &nr_mem_ranges)) { 477ea53ad9cSEric DeVolder pr_err("unable to create new elfcorehdr"); 478ea53ad9cSEric DeVolder goto out; 479ea53ad9cSEric DeVolder } 480ea53ad9cSEric DeVolder 481ea53ad9cSEric DeVolder /* 482ea53ad9cSEric DeVolder * Obtain address and size of the elfcorehdr segment, and 483ea53ad9cSEric DeVolder * check it against the new elfcorehdr buffer. 484ea53ad9cSEric DeVolder */ 485ea53ad9cSEric DeVolder mem = image->segment[image->elfcorehdr_index].mem; 486ea53ad9cSEric DeVolder memsz = image->segment[image->elfcorehdr_index].memsz; 487ea53ad9cSEric DeVolder if (elfsz > memsz) { 488ea53ad9cSEric DeVolder pr_err("update elfcorehdr elfsz %lu > memsz %lu", 489ea53ad9cSEric DeVolder elfsz, memsz); 490ea53ad9cSEric DeVolder goto out; 491ea53ad9cSEric DeVolder } 492ea53ad9cSEric DeVolder 493ea53ad9cSEric DeVolder /* 494ea53ad9cSEric DeVolder * Copy new elfcorehdr over the old elfcorehdr at destination. 495ea53ad9cSEric DeVolder */ 496ea53ad9cSEric DeVolder old_elfcorehdr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); 497ea53ad9cSEric DeVolder if (!old_elfcorehdr) { 498ea53ad9cSEric DeVolder pr_err("mapping elfcorehdr segment failed\n"); 499ea53ad9cSEric DeVolder goto out; 500ea53ad9cSEric DeVolder } 501ea53ad9cSEric DeVolder 502ea53ad9cSEric DeVolder /* 503ea53ad9cSEric DeVolder * Temporarily invalidate the crash image while the 504ea53ad9cSEric DeVolder * elfcorehdr is updated. 505ea53ad9cSEric DeVolder */ 506ea53ad9cSEric DeVolder xchg(&kexec_crash_image, NULL); 507ea53ad9cSEric DeVolder memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz); 508ea53ad9cSEric DeVolder xchg(&kexec_crash_image, image); 509ea53ad9cSEric DeVolder kunmap_local(old_elfcorehdr); 510ea53ad9cSEric DeVolder pr_debug("updated elfcorehdr\n"); 511ea53ad9cSEric DeVolder 512ea53ad9cSEric DeVolder out: 513ea53ad9cSEric DeVolder vfree(elfbuf); 514ea53ad9cSEric DeVolder } 515ea53ad9cSEric DeVolder #endif 516