xref: /openbmc/linux/arch/x86/kernel/crash.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Architecture specific (i386/x86_64) functions for kexec based crash dumps.
4   *
5   * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
6   *
7   * Copyright (C) IBM Corporation, 2004. All rights reserved.
8   * Copyright (C) Red Hat Inc., 2014. All rights reserved.
9   * Authors:
10   *      Vivek Goyal <vgoyal@redhat.com>
11   *
12   */
13  
14  #define pr_fmt(fmt)	"kexec: " fmt
15  
16  #include <linux/types.h>
17  #include <linux/kernel.h>
18  #include <linux/smp.h>
19  #include <linux/reboot.h>
20  #include <linux/kexec.h>
21  #include <linux/delay.h>
22  #include <linux/elf.h>
23  #include <linux/elfcore.h>
24  #include <linux/export.h>
25  #include <linux/slab.h>
26  #include <linux/vmalloc.h>
27  #include <linux/memblock.h>
28  
29  #include <asm/processor.h>
30  #include <asm/hardirq.h>
31  #include <asm/nmi.h>
32  #include <asm/hw_irq.h>
33  #include <asm/apic.h>
34  #include <asm/e820/types.h>
35  #include <asm/io_apic.h>
36  #include <asm/hpet.h>
37  #include <linux/kdebug.h>
38  #include <asm/cpu.h>
39  #include <asm/reboot.h>
40  #include <asm/intel_pt.h>
41  #include <asm/crash.h>
42  #include <asm/cmdline.h>
43  
44  /* Used while preparing memory map entries for second kernel */
45  struct crash_memmap_data {
46  	struct boot_params *params;
47  	/* Type of memory */
48  	unsigned int type;
49  };
50  
51  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
52  
kdump_nmi_callback(int cpu,struct pt_regs * regs)53  static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
54  {
55  	crash_save_cpu(regs, cpu);
56  
57  	/*
58  	 * Disable Intel PT to stop its logging
59  	 */
60  	cpu_emergency_stop_pt();
61  
62  	disable_local_APIC();
63  }
64  
kdump_nmi_shootdown_cpus(void)65  void kdump_nmi_shootdown_cpus(void)
66  {
67  	nmi_shootdown_cpus(kdump_nmi_callback);
68  
69  	disable_local_APIC();
70  }
71  
72  /* Override the weak function in kernel/panic.c */
crash_smp_send_stop(void)73  void crash_smp_send_stop(void)
74  {
75  	static int cpus_stopped;
76  
77  	if (cpus_stopped)
78  		return;
79  
80  	if (smp_ops.crash_stop_other_cpus)
81  		smp_ops.crash_stop_other_cpus();
82  	else
83  		smp_send_stop();
84  
85  	cpus_stopped = 1;
86  }
87  
88  #else
crash_smp_send_stop(void)89  void crash_smp_send_stop(void)
90  {
91  	/* There are no cpus to shootdown */
92  }
93  #endif
94  
native_machine_crash_shutdown(struct pt_regs * regs)95  void native_machine_crash_shutdown(struct pt_regs *regs)
96  {
97  	/* This function is only called after the system
98  	 * has panicked or is otherwise in a critical state.
99  	 * The minimum amount of code to allow a kexec'd kernel
100  	 * to run successfully needs to happen here.
101  	 *
102  	 * In practice this means shooting down the other cpus in
103  	 * an SMP system.
104  	 */
105  	/* The kernel is broken so disable interrupts */
106  	local_irq_disable();
107  
108  	crash_smp_send_stop();
109  
110  	cpu_emergency_disable_virtualization();
111  
112  	/*
113  	 * Disable Intel PT to stop its logging
114  	 */
115  	cpu_emergency_stop_pt();
116  
117  #ifdef CONFIG_X86_IO_APIC
118  	/* Prevent crash_kexec() from deadlocking on ioapic_lock. */
119  	ioapic_zap_locks();
120  	clear_IO_APIC();
121  #endif
122  	lapic_shutdown();
123  	restore_boot_irq_mode();
124  #ifdef CONFIG_HPET_TIMER
125  	hpet_disable();
126  #endif
127  	crash_save_cpu(regs, safe_smp_processor_id());
128  }
129  
130  #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG)
get_nr_ram_ranges_callback(struct resource * res,void * arg)131  static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
132  {
133  	unsigned int *nr_ranges = arg;
134  
135  	(*nr_ranges)++;
136  	return 0;
137  }
138  
139  /* Gather all the required information to prepare elf headers for ram regions */
fill_up_crash_elf_data(void)140  static struct crash_mem *fill_up_crash_elf_data(void)
141  {
142  	unsigned int nr_ranges = 0;
143  	struct crash_mem *cmem;
144  
145  	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
146  	if (!nr_ranges)
147  		return NULL;
148  
149  	/*
150  	 * Exclusion of crash region and/or crashk_low_res may cause
151  	 * another range split. So add extra two slots here.
152  	 */
153  	nr_ranges += 2;
154  	cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
155  	if (!cmem)
156  		return NULL;
157  
158  	cmem->max_nr_ranges = nr_ranges;
159  	cmem->nr_ranges = 0;
160  
161  	return cmem;
162  }
163  
164  /*
165   * Look for any unwanted ranges between mstart, mend and remove them. This
166   * might lead to split and split ranges are put in cmem->ranges[] array
167   */
elf_header_exclude_ranges(struct crash_mem * cmem)168  static int elf_header_exclude_ranges(struct crash_mem *cmem)
169  {
170  	int ret = 0;
171  
172  	/* Exclude the low 1M because it is always reserved */
173  	ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1);
174  	if (ret)
175  		return ret;
176  
177  	/* Exclude crashkernel region */
178  	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
179  	if (ret)
180  		return ret;
181  
182  	if (crashk_low_res.end)
183  		ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
184  					      crashk_low_res.end);
185  
186  	return ret;
187  }
188  
prepare_elf64_ram_headers_callback(struct resource * res,void * arg)189  static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
190  {
191  	struct crash_mem *cmem = arg;
192  
193  	cmem->ranges[cmem->nr_ranges].start = res->start;
194  	cmem->ranges[cmem->nr_ranges].end = res->end;
195  	cmem->nr_ranges++;
196  
197  	return 0;
198  }
199  
200  /* Prepare elf headers. Return addr and size */
prepare_elf_headers(struct kimage * image,void ** addr,unsigned long * sz,unsigned long * nr_mem_ranges)201  static int prepare_elf_headers(struct kimage *image, void **addr,
202  					unsigned long *sz, unsigned long *nr_mem_ranges)
203  {
204  	struct crash_mem *cmem;
205  	int ret;
206  
207  	cmem = fill_up_crash_elf_data();
208  	if (!cmem)
209  		return -ENOMEM;
210  
211  	ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
212  	if (ret)
213  		goto out;
214  
215  	/* Exclude unwanted mem ranges */
216  	ret = elf_header_exclude_ranges(cmem);
217  	if (ret)
218  		goto out;
219  
220  	/* Return the computed number of memory ranges, for hotplug usage */
221  	*nr_mem_ranges = cmem->nr_ranges;
222  
223  	/* By default prepare 64bit headers */
224  	ret =  crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
225  
226  out:
227  	vfree(cmem);
228  	return ret;
229  }
230  #endif
231  
232  #ifdef CONFIG_KEXEC_FILE
add_e820_entry(struct boot_params * params,struct e820_entry * entry)233  static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
234  {
235  	unsigned int nr_e820_entries;
236  
237  	nr_e820_entries = params->e820_entries;
238  	if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE)
239  		return 1;
240  
241  	memcpy(&params->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry));
242  	params->e820_entries++;
243  	return 0;
244  }
245  
memmap_entry_callback(struct resource * res,void * arg)246  static int memmap_entry_callback(struct resource *res, void *arg)
247  {
248  	struct crash_memmap_data *cmd = arg;
249  	struct boot_params *params = cmd->params;
250  	struct e820_entry ei;
251  
252  	ei.addr = res->start;
253  	ei.size = resource_size(res);
254  	ei.type = cmd->type;
255  	add_e820_entry(params, &ei);
256  
257  	return 0;
258  }
259  
memmap_exclude_ranges(struct kimage * image,struct crash_mem * cmem,unsigned long long mstart,unsigned long long mend)260  static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
261  				 unsigned long long mstart,
262  				 unsigned long long mend)
263  {
264  	unsigned long start, end;
265  
266  	cmem->ranges[0].start = mstart;
267  	cmem->ranges[0].end = mend;
268  	cmem->nr_ranges = 1;
269  
270  	/* Exclude elf header region */
271  	start = image->elf_load_addr;
272  	end = start + image->elf_headers_sz - 1;
273  	return crash_exclude_mem_range(cmem, start, end);
274  }
275  
276  /* Prepare memory map for crash dump kernel */
crash_setup_memmap_entries(struct kimage * image,struct boot_params * params)277  int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
278  {
279  	int i, ret = 0;
280  	unsigned long flags;
281  	struct e820_entry ei;
282  	struct crash_memmap_data cmd;
283  	struct crash_mem *cmem;
284  
285  	cmem = vzalloc(struct_size(cmem, ranges, 1));
286  	if (!cmem)
287  		return -ENOMEM;
288  
289  	memset(&cmd, 0, sizeof(struct crash_memmap_data));
290  	cmd.params = params;
291  
292  	/* Add the low 1M */
293  	cmd.type = E820_TYPE_RAM;
294  	flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
295  	walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd,
296  			    memmap_entry_callback);
297  
298  	/* Add ACPI tables */
299  	cmd.type = E820_TYPE_ACPI;
300  	flags = IORESOURCE_MEM | IORESOURCE_BUSY;
301  	walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
302  			    memmap_entry_callback);
303  
304  	/* Add ACPI Non-volatile Storage */
305  	cmd.type = E820_TYPE_NVS;
306  	walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
307  			    memmap_entry_callback);
308  
309  	/* Add e820 reserved ranges */
310  	cmd.type = E820_TYPE_RESERVED;
311  	flags = IORESOURCE_MEM;
312  	walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
313  			    memmap_entry_callback);
314  
315  	/* Add crashk_low_res region */
316  	if (crashk_low_res.end) {
317  		ei.addr = crashk_low_res.start;
318  		ei.size = resource_size(&crashk_low_res);
319  		ei.type = E820_TYPE_RAM;
320  		add_e820_entry(params, &ei);
321  	}
322  
323  	/* Exclude some ranges from crashk_res and add rest to memmap */
324  	ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end);
325  	if (ret)
326  		goto out;
327  
328  	for (i = 0; i < cmem->nr_ranges; i++) {
329  		ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
330  
331  		/* If entry is less than a page, skip it */
332  		if (ei.size < PAGE_SIZE)
333  			continue;
334  		ei.addr = cmem->ranges[i].start;
335  		ei.type = E820_TYPE_RAM;
336  		add_e820_entry(params, &ei);
337  	}
338  
339  out:
340  	vfree(cmem);
341  	return ret;
342  }
343  
crash_load_segments(struct kimage * image)344  int crash_load_segments(struct kimage *image)
345  {
346  	int ret;
347  	unsigned long pnum = 0;
348  	struct kexec_buf kbuf = { .image = image, .buf_min = 0,
349  				  .buf_max = ULONG_MAX, .top_down = false };
350  
351  	/* Prepare elf headers and add a segment */
352  	ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz, &pnum);
353  	if (ret)
354  		return ret;
355  
356  	image->elf_headers	= kbuf.buffer;
357  	image->elf_headers_sz	= kbuf.bufsz;
358  	kbuf.memsz		= kbuf.bufsz;
359  
360  #ifdef CONFIG_CRASH_HOTPLUG
361  	/*
362  	 * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map,
363  	 * maximum CPUs and maximum memory ranges.
364  	 */
365  	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
366  		pnum = 2 + CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES;
367  	else
368  		pnum += 2 + CONFIG_NR_CPUS_DEFAULT;
369  
370  	if (pnum < (unsigned long)PN_XNUM) {
371  		kbuf.memsz = pnum * sizeof(Elf64_Phdr);
372  		kbuf.memsz += sizeof(Elf64_Ehdr);
373  
374  		image->elfcorehdr_index = image->nr_segments;
375  
376  		/* Mark as usable to crash kernel, else crash kernel fails on boot */
377  		image->elf_headers_sz = kbuf.memsz;
378  	} else {
379  		pr_err("number of Phdrs %lu exceeds max\n", pnum);
380  	}
381  #endif
382  
383  	kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
384  	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
385  	ret = kexec_add_buffer(&kbuf);
386  	if (ret)
387  		return ret;
388  	image->elf_load_addr = kbuf.mem;
389  	pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
390  		 image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
391  
392  	return ret;
393  }
394  #endif /* CONFIG_KEXEC_FILE */
395  
396  #ifdef CONFIG_CRASH_HOTPLUG
397  
398  #undef pr_fmt
399  #define pr_fmt(fmt) "crash hp: " fmt
400  
401  /* These functions provide the value for the sysfs crash_hotplug nodes */
402  #ifdef CONFIG_HOTPLUG_CPU
arch_crash_hotplug_cpu_support(void)403  int arch_crash_hotplug_cpu_support(void)
404  {
405  	return crash_check_update_elfcorehdr();
406  }
407  #endif
408  
409  #ifdef CONFIG_MEMORY_HOTPLUG
arch_crash_hotplug_memory_support(void)410  int arch_crash_hotplug_memory_support(void)
411  {
412  	return crash_check_update_elfcorehdr();
413  }
414  #endif
415  
arch_crash_get_elfcorehdr_size(void)416  unsigned int arch_crash_get_elfcorehdr_size(void)
417  {
418  	unsigned int sz;
419  
420  	/* kernel_map, VMCOREINFO and maximum CPUs */
421  	sz = 2 + CONFIG_NR_CPUS_DEFAULT;
422  	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
423  		sz += CONFIG_CRASH_MAX_MEMORY_RANGES;
424  	sz *= sizeof(Elf64_Phdr);
425  	return sz;
426  }
427  
428  /**
429   * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
430   * @image: a pointer to kexec_crash_image
431   *
432   * Prepare the new elfcorehdr and replace the existing elfcorehdr.
433   */
arch_crash_handle_hotplug_event(struct kimage * image)434  void arch_crash_handle_hotplug_event(struct kimage *image)
435  {
436  	void *elfbuf = NULL, *old_elfcorehdr;
437  	unsigned long nr_mem_ranges;
438  	unsigned long mem, memsz;
439  	unsigned long elfsz = 0;
440  
441  	/*
442  	 * As crash_prepare_elf64_headers() has already described all
443  	 * possible CPUs, there is no need to update the elfcorehdr
444  	 * for additional CPU changes.
445  	 */
446  	if ((image->file_mode || image->elfcorehdr_updated) &&
447  		((image->hp_action == KEXEC_CRASH_HP_ADD_CPU) ||
448  		(image->hp_action == KEXEC_CRASH_HP_REMOVE_CPU)))
449  		return;
450  
451  	/*
452  	 * Create the new elfcorehdr reflecting the changes to CPU and/or
453  	 * memory resources.
454  	 */
455  	if (prepare_elf_headers(image, &elfbuf, &elfsz, &nr_mem_ranges)) {
456  		pr_err("unable to create new elfcorehdr");
457  		goto out;
458  	}
459  
460  	/*
461  	 * Obtain address and size of the elfcorehdr segment, and
462  	 * check it against the new elfcorehdr buffer.
463  	 */
464  	mem = image->segment[image->elfcorehdr_index].mem;
465  	memsz = image->segment[image->elfcorehdr_index].memsz;
466  	if (elfsz > memsz) {
467  		pr_err("update elfcorehdr elfsz %lu > memsz %lu",
468  			elfsz, memsz);
469  		goto out;
470  	}
471  
472  	/*
473  	 * Copy new elfcorehdr over the old elfcorehdr at destination.
474  	 */
475  	old_elfcorehdr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
476  	if (!old_elfcorehdr) {
477  		pr_err("mapping elfcorehdr segment failed\n");
478  		goto out;
479  	}
480  
481  	/*
482  	 * Temporarily invalidate the crash image while the
483  	 * elfcorehdr is updated.
484  	 */
485  	xchg(&kexec_crash_image, NULL);
486  	memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz);
487  	xchg(&kexec_crash_image, image);
488  	kunmap_local(old_elfcorehdr);
489  	pr_debug("updated elfcorehdr\n");
490  
491  out:
492  	vfree(elfbuf);
493  }
494  #endif
495