xref: /openbmc/linux/arch/powerpc/kernel/fadump.c (revision 87fcfa7b7fe6bf819033fe827a27f710e38639b5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
4  * dump with assistance from firmware. This approach does not use kexec,
5  * instead firmware assists in booting the kdump kernel while preserving
6  * memory contents. The most of the code implementation has been adapted
7  * from phyp assisted dump implementation written by Linas Vepstas and
8  * Manish Ahuja
9  *
10  * Copyright 2011 IBM Corporation
11  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
12  */
13 
14 #undef DEBUG
15 #define pr_fmt(fmt) "fadump: " fmt
16 
17 #include <linux/string.h>
18 #include <linux/memblock.h>
19 #include <linux/delay.h>
20 #include <linux/seq_file.h>
21 #include <linux/crash_dump.h>
22 #include <linux/kobject.h>
23 #include <linux/sysfs.h>
24 #include <linux/slab.h>
25 #include <linux/cma.h>
26 #include <linux/hugetlb.h>
27 
28 #include <asm/debugfs.h>
29 #include <asm/page.h>
30 #include <asm/prom.h>
31 #include <asm/fadump.h>
32 #include <asm/fadump-internal.h>
33 #include <asm/setup.h>
34 
35 static struct fw_dump fw_dump;
36 
37 static void __init fadump_reserve_crash_area(u64 base);
38 
39 #ifndef CONFIG_PRESERVE_FA_DUMP
40 static DEFINE_MUTEX(fadump_mutex);
41 struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
42 struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
43 
44 #ifdef CONFIG_CMA
45 static struct cma *fadump_cma;
46 
47 /*
48  * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
49  *
50  * This function initializes CMA area from fadump reserved memory.
51  * The total size of fadump reserved memory covers for boot memory size
52  * + cpu data size + hpte size and metadata.
53  * Initialize only the area equivalent to boot memory size for CMA use.
54  * The reamining portion of fadump reserved memory will be not given
55  * to CMA and pages for thoes will stay reserved. boot memory size is
56  * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
57  * But for some reason even if it fails we still have the memory reservation
58  * with us and we can still continue doing fadump.
59  */
60 int __init fadump_cma_init(void)
61 {
62 	unsigned long long base, size;
63 	int rc;
64 
65 	if (!fw_dump.fadump_enabled)
66 		return 0;
67 
68 	/*
69 	 * Do not use CMA if user has provided fadump=nocma kernel parameter.
70 	 * Return 1 to continue with fadump old behaviour.
71 	 */
72 	if (fw_dump.nocma)
73 		return 1;
74 
75 	base = fw_dump.reserve_dump_area_start;
76 	size = fw_dump.boot_memory_size;
77 
78 	if (!size)
79 		return 0;
80 
81 	rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
82 	if (rc) {
83 		pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
84 		/*
85 		 * Though the CMA init has failed we still have memory
86 		 * reservation with us. The reserved memory will be
87 		 * blocked from production system usage.  Hence return 1,
88 		 * so that we can continue with fadump.
89 		 */
90 		return 1;
91 	}
92 
93 	/*
94 	 * So we now have successfully initialized cma area for fadump.
95 	 */
96 	pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
97 		"bytes of memory reserved for firmware-assisted dump\n",
98 		cma_get_size(fadump_cma),
99 		(unsigned long)cma_get_base(fadump_cma) >> 20,
100 		fw_dump.reserve_dump_area_size);
101 	return 1;
102 }
103 #else
104 static int __init fadump_cma_init(void) { return 1; }
105 #endif /* CONFIG_CMA */
106 
107 /* Scan the Firmware Assisted dump configuration details. */
108 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
109 				      int depth, void *data)
110 {
111 	if (depth != 1)
112 		return 0;
113 
114 	if (strcmp(uname, "rtas") == 0) {
115 		rtas_fadump_dt_scan(&fw_dump, node);
116 		return 1;
117 	}
118 
119 	if (strcmp(uname, "ibm,opal") == 0) {
120 		opal_fadump_dt_scan(&fw_dump, node);
121 		return 1;
122 	}
123 
124 	return 0;
125 }
126 
127 /*
128  * If fadump is registered, check if the memory provided
129  * falls within boot memory area and reserved memory area.
130  */
131 int is_fadump_memory_area(u64 addr, unsigned long size)
132 {
133 	u64 d_start, d_end;
134 
135 	if (!fw_dump.dump_registered)
136 		return 0;
137 
138 	if (!size)
139 		return 0;
140 
141 	d_start = fw_dump.reserve_dump_area_start;
142 	d_end = d_start + fw_dump.reserve_dump_area_size;
143 	if (((addr + size) > d_start) && (addr <= d_end))
144 		return 1;
145 
146 	return (addr <= fw_dump.boot_mem_top);
147 }
148 
149 int should_fadump_crash(void)
150 {
151 	if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
152 		return 0;
153 	return 1;
154 }
155 
156 int is_fadump_active(void)
157 {
158 	return fw_dump.dump_active;
159 }
160 
161 /*
162  * Returns true, if there are no holes in memory area between d_start to d_end,
163  * false otherwise.
164  */
165 static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
166 {
167 	struct memblock_region *reg;
168 	bool ret = false;
169 	u64 start, end;
170 
171 	for_each_memblock(memory, reg) {
172 		start = max_t(u64, d_start, reg->base);
173 		end = min_t(u64, d_end, (reg->base + reg->size));
174 		if (d_start < end) {
175 			/* Memory hole from d_start to start */
176 			if (start > d_start)
177 				break;
178 
179 			if (end == d_end) {
180 				ret = true;
181 				break;
182 			}
183 
184 			d_start = end + 1;
185 		}
186 	}
187 
188 	return ret;
189 }
190 
191 /*
192  * Returns true, if there are no holes in boot memory area,
193  * false otherwise.
194  */
195 bool is_fadump_boot_mem_contiguous(void)
196 {
197 	unsigned long d_start, d_end;
198 	bool ret = false;
199 	int i;
200 
201 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
202 		d_start = fw_dump.boot_mem_addr[i];
203 		d_end   = d_start + fw_dump.boot_mem_sz[i];
204 
205 		ret = is_fadump_mem_area_contiguous(d_start, d_end);
206 		if (!ret)
207 			break;
208 	}
209 
210 	return ret;
211 }
212 
213 /*
214  * Returns true, if there are no holes in reserved memory area,
215  * false otherwise.
216  */
217 bool is_fadump_reserved_mem_contiguous(void)
218 {
219 	u64 d_start, d_end;
220 
221 	d_start	= fw_dump.reserve_dump_area_start;
222 	d_end	= d_start + fw_dump.reserve_dump_area_size;
223 	return is_fadump_mem_area_contiguous(d_start, d_end);
224 }
225 
226 /* Print firmware assisted dump configurations for debugging purpose. */
227 static void fadump_show_config(void)
228 {
229 	int i;
230 
231 	pr_debug("Support for firmware-assisted dump (fadump): %s\n",
232 			(fw_dump.fadump_supported ? "present" : "no support"));
233 
234 	if (!fw_dump.fadump_supported)
235 		return;
236 
237 	pr_debug("Fadump enabled    : %s\n",
238 				(fw_dump.fadump_enabled ? "yes" : "no"));
239 	pr_debug("Dump Active       : %s\n",
240 				(fw_dump.dump_active ? "yes" : "no"));
241 	pr_debug("Dump section sizes:\n");
242 	pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
243 	pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
244 	pr_debug("    Boot memory size   : %lx\n", fw_dump.boot_memory_size);
245 	pr_debug("    Boot memory top    : %llx\n", fw_dump.boot_mem_top);
246 	pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
247 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
248 		pr_debug("[%03d] base = %llx, size = %llx\n", i,
249 			 fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
250 	}
251 }
252 
253 /**
254  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
255  *
256  * Function to find the largest memory size we need to reserve during early
257  * boot process. This will be the size of the memory that is required for a
258  * kernel to boot successfully.
259  *
260  * This function has been taken from phyp-assisted dump feature implementation.
261  *
262  * returns larger of 256MB or 5% rounded down to multiples of 256MB.
263  *
264  * TODO: Come up with better approach to find out more accurate memory size
265  * that is required for a kernel to boot successfully.
266  *
267  */
268 static inline u64 fadump_calculate_reserve_size(void)
269 {
270 	u64 base, size, bootmem_min;
271 	int ret;
272 
273 	if (fw_dump.reserve_bootvar)
274 		pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
275 
276 	/*
277 	 * Check if the size is specified through crashkernel= cmdline
278 	 * option. If yes, then use that but ignore base as fadump reserves
279 	 * memory at a predefined offset.
280 	 */
281 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
282 				&size, &base);
283 	if (ret == 0 && size > 0) {
284 		unsigned long max_size;
285 
286 		if (fw_dump.reserve_bootvar)
287 			pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
288 
289 		fw_dump.reserve_bootvar = (unsigned long)size;
290 
291 		/*
292 		 * Adjust if the boot memory size specified is above
293 		 * the upper limit.
294 		 */
295 		max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
296 		if (fw_dump.reserve_bootvar > max_size) {
297 			fw_dump.reserve_bootvar = max_size;
298 			pr_info("Adjusted boot memory size to %luMB\n",
299 				(fw_dump.reserve_bootvar >> 20));
300 		}
301 
302 		return fw_dump.reserve_bootvar;
303 	} else if (fw_dump.reserve_bootvar) {
304 		/*
305 		 * 'fadump_reserve_mem=' is being used to reserve memory
306 		 * for firmware-assisted dump.
307 		 */
308 		return fw_dump.reserve_bootvar;
309 	}
310 
311 	/* divide by 20 to get 5% of value */
312 	size = memblock_phys_mem_size() / 20;
313 
314 	/* round it down in multiples of 256 */
315 	size = size & ~0x0FFFFFFFUL;
316 
317 	/* Truncate to memory_limit. We don't want to over reserve the memory.*/
318 	if (memory_limit && size > memory_limit)
319 		size = memory_limit;
320 
321 	bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
322 	return (size > bootmem_min ? size : bootmem_min);
323 }
324 
325 /*
326  * Calculate the total memory size required to be reserved for
327  * firmware-assisted dump registration.
328  */
329 static unsigned long get_fadump_area_size(void)
330 {
331 	unsigned long size = 0;
332 
333 	size += fw_dump.cpu_state_data_size;
334 	size += fw_dump.hpte_region_size;
335 	size += fw_dump.boot_memory_size;
336 	size += sizeof(struct fadump_crash_info_header);
337 	size += sizeof(struct elfhdr); /* ELF core header.*/
338 	size += sizeof(struct elf_phdr); /* place holder for cpu notes */
339 	/* Program headers for crash memory regions. */
340 	size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
341 
342 	size = PAGE_ALIGN(size);
343 
344 	/* This is to hold kernel metadata on platforms that support it */
345 	size += (fw_dump.ops->fadump_get_metadata_size ?
346 		 fw_dump.ops->fadump_get_metadata_size() : 0);
347 	return size;
348 }
349 
350 static int __init add_boot_mem_region(unsigned long rstart,
351 				      unsigned long rsize)
352 {
353 	int i = fw_dump.boot_mem_regs_cnt++;
354 
355 	if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
356 		fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
357 		return 0;
358 	}
359 
360 	pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
361 		 i, rstart, (rstart + rsize));
362 	fw_dump.boot_mem_addr[i] = rstart;
363 	fw_dump.boot_mem_sz[i] = rsize;
364 	return 1;
365 }
366 
367 /*
368  * Firmware usually has a hard limit on the data it can copy per region.
369  * Honour that by splitting a memory range into multiple regions.
370  */
371 static int __init add_boot_mem_regions(unsigned long mstart,
372 				       unsigned long msize)
373 {
374 	unsigned long rstart, rsize, max_size;
375 	int ret = 1;
376 
377 	rstart = mstart;
378 	max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
379 	while (msize) {
380 		if (msize > max_size)
381 			rsize = max_size;
382 		else
383 			rsize = msize;
384 
385 		ret = add_boot_mem_region(rstart, rsize);
386 		if (!ret)
387 			break;
388 
389 		msize -= rsize;
390 		rstart += rsize;
391 	}
392 
393 	return ret;
394 }
395 
396 static int __init fadump_get_boot_mem_regions(void)
397 {
398 	unsigned long base, size, cur_size, hole_size, last_end;
399 	unsigned long mem_size = fw_dump.boot_memory_size;
400 	struct memblock_region *reg;
401 	int ret = 1;
402 
403 	fw_dump.boot_mem_regs_cnt = 0;
404 
405 	last_end = 0;
406 	hole_size = 0;
407 	cur_size = 0;
408 	for_each_memblock(memory, reg) {
409 		base = reg->base;
410 		size = reg->size;
411 		hole_size += (base - last_end);
412 
413 		if ((cur_size + size) >= mem_size) {
414 			size = (mem_size - cur_size);
415 			ret = add_boot_mem_regions(base, size);
416 			break;
417 		}
418 
419 		mem_size -= size;
420 		cur_size += size;
421 		ret = add_boot_mem_regions(base, size);
422 		if (!ret)
423 			break;
424 
425 		last_end = base + size;
426 	}
427 	fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
428 
429 	return ret;
430 }
431 
432 int __init fadump_reserve_mem(void)
433 {
434 	u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
435 	bool is_memblock_bottom_up = memblock_bottom_up();
436 	int ret = 1;
437 
438 	if (!fw_dump.fadump_enabled)
439 		return 0;
440 
441 	if (!fw_dump.fadump_supported) {
442 		pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
443 		goto error_out;
444 	}
445 
446 	/*
447 	 * Initialize boot memory size
448 	 * If dump is active then we have already calculated the size during
449 	 * first kernel.
450 	 */
451 	if (!fw_dump.dump_active) {
452 		fw_dump.boot_memory_size =
453 			PAGE_ALIGN(fadump_calculate_reserve_size());
454 #ifdef CONFIG_CMA
455 		if (!fw_dump.nocma) {
456 			align = FADUMP_CMA_ALIGNMENT;
457 			fw_dump.boot_memory_size =
458 				ALIGN(fw_dump.boot_memory_size, align);
459 		}
460 #endif
461 
462 		bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
463 		if (fw_dump.boot_memory_size < bootmem_min) {
464 			pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
465 			       fw_dump.boot_memory_size, bootmem_min);
466 			goto error_out;
467 		}
468 
469 		if (!fadump_get_boot_mem_regions()) {
470 			pr_err("Too many holes in boot memory area to enable fadump\n");
471 			goto error_out;
472 		}
473 	}
474 
475 	/*
476 	 * Calculate the memory boundary.
477 	 * If memory_limit is less than actual memory boundary then reserve
478 	 * the memory for fadump beyond the memory_limit and adjust the
479 	 * memory_limit accordingly, so that the running kernel can run with
480 	 * specified memory_limit.
481 	 */
482 	if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
483 		size = get_fadump_area_size();
484 		if ((memory_limit + size) < memblock_end_of_DRAM())
485 			memory_limit += size;
486 		else
487 			memory_limit = memblock_end_of_DRAM();
488 		printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
489 				" dump, now %#016llx\n", memory_limit);
490 	}
491 	if (memory_limit)
492 		mem_boundary = memory_limit;
493 	else
494 		mem_boundary = memblock_end_of_DRAM();
495 
496 	base = fw_dump.boot_mem_top;
497 	size = get_fadump_area_size();
498 	fw_dump.reserve_dump_area_size = size;
499 	if (fw_dump.dump_active) {
500 		pr_info("Firmware-assisted dump is active.\n");
501 
502 #ifdef CONFIG_HUGETLB_PAGE
503 		/*
504 		 * FADump capture kernel doesn't care much about hugepages.
505 		 * In fact, handling hugepages in capture kernel is asking for
506 		 * trouble. So, disable HugeTLB support when fadump is active.
507 		 */
508 		hugetlb_disabled = true;
509 #endif
510 		/*
511 		 * If last boot has crashed then reserve all the memory
512 		 * above boot memory size so that we don't touch it until
513 		 * dump is written to disk by userspace tool. This memory
514 		 * can be released for general use by invalidating fadump.
515 		 */
516 		fadump_reserve_crash_area(base);
517 
518 		pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
519 		pr_debug("Reserve dump area start address: 0x%lx\n",
520 			 fw_dump.reserve_dump_area_start);
521 	} else {
522 		/*
523 		 * Reserve memory at an offset closer to bottom of the RAM to
524 		 * minimize the impact of memory hot-remove operation.
525 		 */
526 		memblock_set_bottom_up(true);
527 		base = memblock_find_in_range(base, mem_boundary, size, align);
528 
529 		/* Restore the previous allocation mode */
530 		memblock_set_bottom_up(is_memblock_bottom_up);
531 
532 		if (!base) {
533 			pr_err("Failed to find memory chunk for reservation!\n");
534 			goto error_out;
535 		}
536 		fw_dump.reserve_dump_area_start = base;
537 
538 		/*
539 		 * Calculate the kernel metadata address and register it with
540 		 * f/w if the platform supports.
541 		 */
542 		if (fw_dump.ops->fadump_setup_metadata &&
543 		    (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
544 			goto error_out;
545 
546 		if (memblock_reserve(base, size)) {
547 			pr_err("Failed to reserve memory!\n");
548 			goto error_out;
549 		}
550 
551 		pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
552 			(size >> 20), base, (memblock_phys_mem_size() >> 20));
553 
554 		ret = fadump_cma_init();
555 	}
556 
557 	return ret;
558 error_out:
559 	fw_dump.fadump_enabled = 0;
560 	return 0;
561 }
562 
563 /* Look for fadump= cmdline option. */
564 static int __init early_fadump_param(char *p)
565 {
566 	if (!p)
567 		return 1;
568 
569 	if (strncmp(p, "on", 2) == 0)
570 		fw_dump.fadump_enabled = 1;
571 	else if (strncmp(p, "off", 3) == 0)
572 		fw_dump.fadump_enabled = 0;
573 	else if (strncmp(p, "nocma", 5) == 0) {
574 		fw_dump.fadump_enabled = 1;
575 		fw_dump.nocma = 1;
576 	}
577 
578 	return 0;
579 }
580 early_param("fadump", early_fadump_param);
581 
582 /*
583  * Look for fadump_reserve_mem= cmdline option
584  * TODO: Remove references to 'fadump_reserve_mem=' parameter,
585  *       the sooner 'crashkernel=' parameter is accustomed to.
586  */
587 static int __init early_fadump_reserve_mem(char *p)
588 {
589 	if (p)
590 		fw_dump.reserve_bootvar = memparse(p, &p);
591 	return 0;
592 }
593 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
594 
595 void crash_fadump(struct pt_regs *regs, const char *str)
596 {
597 	struct fadump_crash_info_header *fdh = NULL;
598 	int old_cpu, this_cpu;
599 
600 	if (!should_fadump_crash())
601 		return;
602 
603 	/*
604 	 * old_cpu == -1 means this is the first CPU which has come here,
605 	 * go ahead and trigger fadump.
606 	 *
607 	 * old_cpu != -1 means some other CPU has already on it's way
608 	 * to trigger fadump, just keep looping here.
609 	 */
610 	this_cpu = smp_processor_id();
611 	old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
612 
613 	if (old_cpu != -1) {
614 		/*
615 		 * We can't loop here indefinitely. Wait as long as fadump
616 		 * is in force. If we race with fadump un-registration this
617 		 * loop will break and then we go down to normal panic path
618 		 * and reboot. If fadump is in force the first crashing
619 		 * cpu will definitely trigger fadump.
620 		 */
621 		while (fw_dump.dump_registered)
622 			cpu_relax();
623 		return;
624 	}
625 
626 	fdh = __va(fw_dump.fadumphdr_addr);
627 	fdh->crashing_cpu = crashing_cpu;
628 	crash_save_vmcoreinfo();
629 
630 	if (regs)
631 		fdh->regs = *regs;
632 	else
633 		ppc_save_regs(&fdh->regs);
634 
635 	fdh->online_mask = *cpu_online_mask;
636 
637 	fw_dump.ops->fadump_trigger(fdh, str);
638 }
639 
640 u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
641 {
642 	struct elf_prstatus prstatus;
643 
644 	memset(&prstatus, 0, sizeof(prstatus));
645 	/*
646 	 * FIXME: How do i get PID? Do I really need it?
647 	 * prstatus.pr_pid = ????
648 	 */
649 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
650 	buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
651 			      &prstatus, sizeof(prstatus));
652 	return buf;
653 }
654 
655 void fadump_update_elfcore_header(char *bufp)
656 {
657 	struct elfhdr *elf;
658 	struct elf_phdr *phdr;
659 
660 	elf = (struct elfhdr *)bufp;
661 	bufp += sizeof(struct elfhdr);
662 
663 	/* First note is a place holder for cpu notes info. */
664 	phdr = (struct elf_phdr *)bufp;
665 
666 	if (phdr->p_type == PT_NOTE) {
667 		phdr->p_paddr	= __pa(fw_dump.cpu_notes_buf_vaddr);
668 		phdr->p_offset	= phdr->p_paddr;
669 		phdr->p_filesz	= fw_dump.cpu_notes_buf_size;
670 		phdr->p_memsz = fw_dump.cpu_notes_buf_size;
671 	}
672 	return;
673 }
674 
675 static void *fadump_alloc_buffer(unsigned long size)
676 {
677 	unsigned long count, i;
678 	struct page *page;
679 	void *vaddr;
680 
681 	vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
682 	if (!vaddr)
683 		return NULL;
684 
685 	count = PAGE_ALIGN(size) / PAGE_SIZE;
686 	page = virt_to_page(vaddr);
687 	for (i = 0; i < count; i++)
688 		mark_page_reserved(page + i);
689 	return vaddr;
690 }
691 
692 static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
693 {
694 	free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
695 }
696 
697 s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
698 {
699 	/* Allocate buffer to hold cpu crash notes. */
700 	fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
701 	fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
702 	fw_dump.cpu_notes_buf_vaddr =
703 		(unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
704 	if (!fw_dump.cpu_notes_buf_vaddr) {
705 		pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
706 		       fw_dump.cpu_notes_buf_size);
707 		return -ENOMEM;
708 	}
709 
710 	pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
711 		 fw_dump.cpu_notes_buf_size,
712 		 fw_dump.cpu_notes_buf_vaddr);
713 	return 0;
714 }
715 
716 void fadump_free_cpu_notes_buf(void)
717 {
718 	if (!fw_dump.cpu_notes_buf_vaddr)
719 		return;
720 
721 	fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
722 			   fw_dump.cpu_notes_buf_size);
723 	fw_dump.cpu_notes_buf_vaddr = 0;
724 	fw_dump.cpu_notes_buf_size = 0;
725 }
726 
727 static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
728 {
729 	kfree(mrange_info->mem_ranges);
730 	mrange_info->mem_ranges = NULL;
731 	mrange_info->mem_ranges_sz = 0;
732 	mrange_info->max_mem_ranges = 0;
733 }
734 
735 /*
736  * Allocate or reallocate mem_ranges array in incremental units
737  * of PAGE_SIZE.
738  */
739 static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
740 {
741 	struct fadump_memory_range *new_array;
742 	u64 new_size;
743 
744 	new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
745 	pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
746 		 new_size, mrange_info->name);
747 
748 	new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
749 	if (new_array == NULL) {
750 		pr_err("Insufficient memory for setting up %s memory ranges\n",
751 		       mrange_info->name);
752 		fadump_free_mem_ranges(mrange_info);
753 		return -ENOMEM;
754 	}
755 
756 	mrange_info->mem_ranges = new_array;
757 	mrange_info->mem_ranges_sz = new_size;
758 	mrange_info->max_mem_ranges = (new_size /
759 				       sizeof(struct fadump_memory_range));
760 	return 0;
761 }
762 
763 static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
764 				       u64 base, u64 end)
765 {
766 	struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
767 	bool is_adjacent = false;
768 	u64 start, size;
769 
770 	if (base == end)
771 		return 0;
772 
773 	/*
774 	 * Fold adjacent memory ranges to bring down the memory ranges/
775 	 * PT_LOAD segments count.
776 	 */
777 	if (mrange_info->mem_range_cnt) {
778 		start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
779 		size  = mem_ranges[mrange_info->mem_range_cnt - 1].size;
780 
781 		if ((start + size) == base)
782 			is_adjacent = true;
783 	}
784 	if (!is_adjacent) {
785 		/* resize the array on reaching the limit */
786 		if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
787 			int ret;
788 
789 			ret = fadump_alloc_mem_ranges(mrange_info);
790 			if (ret)
791 				return ret;
792 
793 			/* Update to the new resized array */
794 			mem_ranges = mrange_info->mem_ranges;
795 		}
796 
797 		start = base;
798 		mem_ranges[mrange_info->mem_range_cnt].base = start;
799 		mrange_info->mem_range_cnt++;
800 	}
801 
802 	mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
803 	pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
804 		 mrange_info->name, (mrange_info->mem_range_cnt - 1),
805 		 start, end - 1, (end - start));
806 	return 0;
807 }
808 
809 static int fadump_exclude_reserved_area(u64 start, u64 end)
810 {
811 	u64 ra_start, ra_end;
812 	int ret = 0;
813 
814 	ra_start = fw_dump.reserve_dump_area_start;
815 	ra_end = ra_start + fw_dump.reserve_dump_area_size;
816 
817 	if ((ra_start < end) && (ra_end > start)) {
818 		if ((start < ra_start) && (end > ra_end)) {
819 			ret = fadump_add_mem_range(&crash_mrange_info,
820 						   start, ra_start);
821 			if (ret)
822 				return ret;
823 
824 			ret = fadump_add_mem_range(&crash_mrange_info,
825 						   ra_end, end);
826 		} else if (start < ra_start) {
827 			ret = fadump_add_mem_range(&crash_mrange_info,
828 						   start, ra_start);
829 		} else if (ra_end < end) {
830 			ret = fadump_add_mem_range(&crash_mrange_info,
831 						   ra_end, end);
832 		}
833 	} else
834 		ret = fadump_add_mem_range(&crash_mrange_info, start, end);
835 
836 	return ret;
837 }
838 
839 static int fadump_init_elfcore_header(char *bufp)
840 {
841 	struct elfhdr *elf;
842 
843 	elf = (struct elfhdr *) bufp;
844 	bufp += sizeof(struct elfhdr);
845 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
846 	elf->e_ident[EI_CLASS] = ELF_CLASS;
847 	elf->e_ident[EI_DATA] = ELF_DATA;
848 	elf->e_ident[EI_VERSION] = EV_CURRENT;
849 	elf->e_ident[EI_OSABI] = ELF_OSABI;
850 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
851 	elf->e_type = ET_CORE;
852 	elf->e_machine = ELF_ARCH;
853 	elf->e_version = EV_CURRENT;
854 	elf->e_entry = 0;
855 	elf->e_phoff = sizeof(struct elfhdr);
856 	elf->e_shoff = 0;
857 #if defined(_CALL_ELF)
858 	elf->e_flags = _CALL_ELF;
859 #else
860 	elf->e_flags = 0;
861 #endif
862 	elf->e_ehsize = sizeof(struct elfhdr);
863 	elf->e_phentsize = sizeof(struct elf_phdr);
864 	elf->e_phnum = 0;
865 	elf->e_shentsize = 0;
866 	elf->e_shnum = 0;
867 	elf->e_shstrndx = 0;
868 
869 	return 0;
870 }
871 
872 /*
873  * Traverse through memblock structure and setup crash memory ranges. These
874  * ranges will be used create PT_LOAD program headers in elfcore header.
875  */
876 static int fadump_setup_crash_memory_ranges(void)
877 {
878 	struct memblock_region *reg;
879 	u64 start, end;
880 	int i, ret;
881 
882 	pr_debug("Setup crash memory ranges.\n");
883 	crash_mrange_info.mem_range_cnt = 0;
884 
885 	/*
886 	 * Boot memory region(s) registered with firmware are moved to
887 	 * different location at the time of crash. Create separate program
888 	 * header(s) for this memory chunk(s) with the correct offset.
889 	 */
890 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
891 		start = fw_dump.boot_mem_addr[i];
892 		end = start + fw_dump.boot_mem_sz[i];
893 		ret = fadump_add_mem_range(&crash_mrange_info, start, end);
894 		if (ret)
895 			return ret;
896 	}
897 
898 	for_each_memblock(memory, reg) {
899 		start = (u64)reg->base;
900 		end = start + (u64)reg->size;
901 
902 		/*
903 		 * skip the memory chunk that is already added
904 		 * (0 through boot_memory_top).
905 		 */
906 		if (start < fw_dump.boot_mem_top) {
907 			if (end > fw_dump.boot_mem_top)
908 				start = fw_dump.boot_mem_top;
909 			else
910 				continue;
911 		}
912 
913 		/* add this range excluding the reserved dump area. */
914 		ret = fadump_exclude_reserved_area(start, end);
915 		if (ret)
916 			return ret;
917 	}
918 
919 	return 0;
920 }
921 
922 /*
923  * If the given physical address falls within the boot memory region then
924  * return the relocated address that points to the dump region reserved
925  * for saving initial boot memory contents.
926  */
927 static inline unsigned long fadump_relocate(unsigned long paddr)
928 {
929 	unsigned long raddr, rstart, rend, rlast, hole_size;
930 	int i;
931 
932 	hole_size = 0;
933 	rlast = 0;
934 	raddr = paddr;
935 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
936 		rstart = fw_dump.boot_mem_addr[i];
937 		rend = rstart + fw_dump.boot_mem_sz[i];
938 		hole_size += (rstart - rlast);
939 
940 		if (paddr >= rstart && paddr < rend) {
941 			raddr += fw_dump.boot_mem_dest_addr - hole_size;
942 			break;
943 		}
944 
945 		rlast = rend;
946 	}
947 
948 	pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
949 	return raddr;
950 }
951 
952 static int fadump_create_elfcore_headers(char *bufp)
953 {
954 	unsigned long long raddr, offset;
955 	struct elf_phdr *phdr;
956 	struct elfhdr *elf;
957 	int i, j;
958 
959 	fadump_init_elfcore_header(bufp);
960 	elf = (struct elfhdr *)bufp;
961 	bufp += sizeof(struct elfhdr);
962 
963 	/*
964 	 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
965 	 * will be populated during second kernel boot after crash. Hence
966 	 * this PT_NOTE will always be the first elf note.
967 	 *
968 	 * NOTE: Any new ELF note addition should be placed after this note.
969 	 */
970 	phdr = (struct elf_phdr *)bufp;
971 	bufp += sizeof(struct elf_phdr);
972 	phdr->p_type = PT_NOTE;
973 	phdr->p_flags = 0;
974 	phdr->p_vaddr = 0;
975 	phdr->p_align = 0;
976 
977 	phdr->p_offset = 0;
978 	phdr->p_paddr = 0;
979 	phdr->p_filesz = 0;
980 	phdr->p_memsz = 0;
981 
982 	(elf->e_phnum)++;
983 
984 	/* setup ELF PT_NOTE for vmcoreinfo */
985 	phdr = (struct elf_phdr *)bufp;
986 	bufp += sizeof(struct elf_phdr);
987 	phdr->p_type	= PT_NOTE;
988 	phdr->p_flags	= 0;
989 	phdr->p_vaddr	= 0;
990 	phdr->p_align	= 0;
991 
992 	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
993 	phdr->p_offset	= phdr->p_paddr;
994 	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
995 
996 	/* Increment number of program headers. */
997 	(elf->e_phnum)++;
998 
999 	/* setup PT_LOAD sections. */
1000 	j = 0;
1001 	offset = 0;
1002 	raddr = fw_dump.boot_mem_addr[0];
1003 	for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
1004 		u64 mbase, msize;
1005 
1006 		mbase = crash_mrange_info.mem_ranges[i].base;
1007 		msize = crash_mrange_info.mem_ranges[i].size;
1008 		if (!msize)
1009 			continue;
1010 
1011 		phdr = (struct elf_phdr *)bufp;
1012 		bufp += sizeof(struct elf_phdr);
1013 		phdr->p_type	= PT_LOAD;
1014 		phdr->p_flags	= PF_R|PF_W|PF_X;
1015 		phdr->p_offset	= mbase;
1016 
1017 		if (mbase == raddr) {
1018 			/*
1019 			 * The entire real memory region will be moved by
1020 			 * firmware to the specified destination_address.
1021 			 * Hence set the correct offset.
1022 			 */
1023 			phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
1024 			if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
1025 				offset += fw_dump.boot_mem_sz[j];
1026 				raddr = fw_dump.boot_mem_addr[++j];
1027 			}
1028 		}
1029 
1030 		phdr->p_paddr = mbase;
1031 		phdr->p_vaddr = (unsigned long)__va(mbase);
1032 		phdr->p_filesz = msize;
1033 		phdr->p_memsz = msize;
1034 		phdr->p_align = 0;
1035 
1036 		/* Increment number of program headers. */
1037 		(elf->e_phnum)++;
1038 	}
1039 	return 0;
1040 }
1041 
1042 static unsigned long init_fadump_header(unsigned long addr)
1043 {
1044 	struct fadump_crash_info_header *fdh;
1045 
1046 	if (!addr)
1047 		return 0;
1048 
1049 	fdh = __va(addr);
1050 	addr += sizeof(struct fadump_crash_info_header);
1051 
1052 	memset(fdh, 0, sizeof(struct fadump_crash_info_header));
1053 	fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
1054 	fdh->elfcorehdr_addr = addr;
1055 	/* We will set the crashing cpu id in crash_fadump() during crash. */
1056 	fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
1057 
1058 	return addr;
1059 }
1060 
1061 static int register_fadump(void)
1062 {
1063 	unsigned long addr;
1064 	void *vaddr;
1065 	int ret;
1066 
1067 	/*
1068 	 * If no memory is reserved then we can not register for firmware-
1069 	 * assisted dump.
1070 	 */
1071 	if (!fw_dump.reserve_dump_area_size)
1072 		return -ENODEV;
1073 
1074 	ret = fadump_setup_crash_memory_ranges();
1075 	if (ret)
1076 		return ret;
1077 
1078 	addr = fw_dump.fadumphdr_addr;
1079 
1080 	/* Initialize fadump crash info header. */
1081 	addr = init_fadump_header(addr);
1082 	vaddr = __va(addr);
1083 
1084 	pr_debug("Creating ELF core headers at %#016lx\n", addr);
1085 	fadump_create_elfcore_headers(vaddr);
1086 
1087 	/* register the future kernel dump with firmware. */
1088 	pr_debug("Registering for firmware-assisted kernel dump...\n");
1089 	return fw_dump.ops->fadump_register(&fw_dump);
1090 }
1091 
1092 void fadump_cleanup(void)
1093 {
1094 	if (!fw_dump.fadump_supported)
1095 		return;
1096 
1097 	/* Invalidate the registration only if dump is active. */
1098 	if (fw_dump.dump_active) {
1099 		pr_debug("Invalidating firmware-assisted dump registration\n");
1100 		fw_dump.ops->fadump_invalidate(&fw_dump);
1101 	} else if (fw_dump.dump_registered) {
1102 		/* Un-register Firmware-assisted dump if it was registered. */
1103 		fw_dump.ops->fadump_unregister(&fw_dump);
1104 		fadump_free_mem_ranges(&crash_mrange_info);
1105 	}
1106 
1107 	if (fw_dump.ops->fadump_cleanup)
1108 		fw_dump.ops->fadump_cleanup(&fw_dump);
1109 }
1110 
1111 static void fadump_free_reserved_memory(unsigned long start_pfn,
1112 					unsigned long end_pfn)
1113 {
1114 	unsigned long pfn;
1115 	unsigned long time_limit = jiffies + HZ;
1116 
1117 	pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1118 		PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
1119 
1120 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1121 		free_reserved_page(pfn_to_page(pfn));
1122 
1123 		if (time_after(jiffies, time_limit)) {
1124 			cond_resched();
1125 			time_limit = jiffies + HZ;
1126 		}
1127 	}
1128 }
1129 
1130 /*
1131  * Skip memory holes and free memory that was actually reserved.
1132  */
1133 static void fadump_release_reserved_area(u64 start, u64 end)
1134 {
1135 	u64 tstart, tend, spfn, epfn;
1136 	struct memblock_region *reg;
1137 
1138 	spfn = PHYS_PFN(start);
1139 	epfn = PHYS_PFN(end);
1140 	for_each_memblock(memory, reg) {
1141 		tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
1142 		tend   = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
1143 		if (tstart < tend) {
1144 			fadump_free_reserved_memory(tstart, tend);
1145 
1146 			if (tend == epfn)
1147 				break;
1148 
1149 			spfn = tend;
1150 		}
1151 	}
1152 }
1153 
1154 /*
1155  * Sort the mem ranges in-place and merge adjacent ranges
1156  * to minimize the memory ranges count.
1157  */
1158 static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
1159 {
1160 	struct fadump_memory_range *mem_ranges;
1161 	struct fadump_memory_range tmp_range;
1162 	u64 base, size;
1163 	int i, j, idx;
1164 
1165 	if (!reserved_mrange_info.mem_range_cnt)
1166 		return;
1167 
1168 	/* Sort the memory ranges */
1169 	mem_ranges = mrange_info->mem_ranges;
1170 	for (i = 0; i < mrange_info->mem_range_cnt; i++) {
1171 		idx = i;
1172 		for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
1173 			if (mem_ranges[idx].base > mem_ranges[j].base)
1174 				idx = j;
1175 		}
1176 		if (idx != i) {
1177 			tmp_range = mem_ranges[idx];
1178 			mem_ranges[idx] = mem_ranges[i];
1179 			mem_ranges[i] = tmp_range;
1180 		}
1181 	}
1182 
1183 	/* Merge adjacent reserved ranges */
1184 	idx = 0;
1185 	for (i = 1; i < mrange_info->mem_range_cnt; i++) {
1186 		base = mem_ranges[i-1].base;
1187 		size = mem_ranges[i-1].size;
1188 		if (mem_ranges[i].base == (base + size))
1189 			mem_ranges[idx].size += mem_ranges[i].size;
1190 		else {
1191 			idx++;
1192 			if (i == idx)
1193 				continue;
1194 
1195 			mem_ranges[idx] = mem_ranges[i];
1196 		}
1197 	}
1198 	mrange_info->mem_range_cnt = idx + 1;
1199 }
1200 
1201 /*
1202  * Scan reserved-ranges to consider them while reserving/releasing
1203  * memory for FADump.
1204  */
1205 static inline int fadump_scan_reserved_mem_ranges(void)
1206 {
1207 	struct device_node *root;
1208 	const __be32 *prop;
1209 	int len, ret = -1;
1210 	unsigned long i;
1211 
1212 	root = of_find_node_by_path("/");
1213 	if (!root)
1214 		return ret;
1215 
1216 	prop = of_get_property(root, "reserved-ranges", &len);
1217 	if (!prop)
1218 		return ret;
1219 
1220 	/*
1221 	 * Each reserved range is an (address,size) pair, 2 cells each,
1222 	 * totalling 4 cells per range.
1223 	 */
1224 	for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
1225 		u64 base, size;
1226 
1227 		base = of_read_number(prop + (i * 4) + 0, 2);
1228 		size = of_read_number(prop + (i * 4) + 2, 2);
1229 
1230 		if (size) {
1231 			ret = fadump_add_mem_range(&reserved_mrange_info,
1232 						   base, base + size);
1233 			if (ret < 0) {
1234 				pr_warn("some reserved ranges are ignored!\n");
1235 				break;
1236 			}
1237 		}
1238 	}
1239 
1240 	return ret;
1241 }
1242 
1243 /*
1244  * Release the memory that was reserved during early boot to preserve the
1245  * crash'ed kernel's memory contents except reserved dump area (permanent
1246  * reservation) and reserved ranges used by F/W. The released memory will
1247  * be available for general use.
1248  */
1249 static void fadump_release_memory(u64 begin, u64 end)
1250 {
1251 	u64 ra_start, ra_end, tstart;
1252 	int i, ret;
1253 
1254 	fadump_scan_reserved_mem_ranges();
1255 
1256 	ra_start = fw_dump.reserve_dump_area_start;
1257 	ra_end = ra_start + fw_dump.reserve_dump_area_size;
1258 
1259 	/*
1260 	 * Add reserved dump area to reserved ranges list
1261 	 * and exclude all these ranges while releasing memory.
1262 	 */
1263 	ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
1264 	if (ret != 0) {
1265 		/*
1266 		 * Not enough memory to setup reserved ranges but the system is
1267 		 * running shortage of memory. So, release all the memory except
1268 		 * Reserved dump area (reused for next fadump registration).
1269 		 */
1270 		if (begin < ra_end && end > ra_start) {
1271 			if (begin < ra_start)
1272 				fadump_release_reserved_area(begin, ra_start);
1273 			if (end > ra_end)
1274 				fadump_release_reserved_area(ra_end, end);
1275 		} else
1276 			fadump_release_reserved_area(begin, end);
1277 
1278 		return;
1279 	}
1280 
1281 	/* Get the reserved ranges list in order first. */
1282 	sort_and_merge_mem_ranges(&reserved_mrange_info);
1283 
1284 	/* Exclude reserved ranges and release remaining memory */
1285 	tstart = begin;
1286 	for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
1287 		ra_start = reserved_mrange_info.mem_ranges[i].base;
1288 		ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
1289 
1290 		if (tstart >= ra_end)
1291 			continue;
1292 
1293 		if (tstart < ra_start)
1294 			fadump_release_reserved_area(tstart, ra_start);
1295 		tstart = ra_end;
1296 	}
1297 
1298 	if (tstart < end)
1299 		fadump_release_reserved_area(tstart, end);
1300 }
1301 
1302 static void fadump_invalidate_release_mem(void)
1303 {
1304 	mutex_lock(&fadump_mutex);
1305 	if (!fw_dump.dump_active) {
1306 		mutex_unlock(&fadump_mutex);
1307 		return;
1308 	}
1309 
1310 	fadump_cleanup();
1311 	mutex_unlock(&fadump_mutex);
1312 
1313 	fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
1314 	fadump_free_cpu_notes_buf();
1315 
1316 	/*
1317 	 * Setup kernel metadata and initialize the kernel dump
1318 	 * memory structure for FADump re-registration.
1319 	 */
1320 	if (fw_dump.ops->fadump_setup_metadata &&
1321 	    (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
1322 		pr_warn("Failed to setup kernel metadata!\n");
1323 	fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1324 }
1325 
1326 static ssize_t fadump_release_memory_store(struct kobject *kobj,
1327 					struct kobj_attribute *attr,
1328 					const char *buf, size_t count)
1329 {
1330 	int input = -1;
1331 
1332 	if (!fw_dump.dump_active)
1333 		return -EPERM;
1334 
1335 	if (kstrtoint(buf, 0, &input))
1336 		return -EINVAL;
1337 
1338 	if (input == 1) {
1339 		/*
1340 		 * Take away the '/proc/vmcore'. We are releasing the dump
1341 		 * memory, hence it will not be valid anymore.
1342 		 */
1343 #ifdef CONFIG_PROC_VMCORE
1344 		vmcore_cleanup();
1345 #endif
1346 		fadump_invalidate_release_mem();
1347 
1348 	} else
1349 		return -EINVAL;
1350 	return count;
1351 }
1352 
1353 static ssize_t fadump_enabled_show(struct kobject *kobj,
1354 					struct kobj_attribute *attr,
1355 					char *buf)
1356 {
1357 	return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1358 }
1359 
1360 static ssize_t fadump_register_show(struct kobject *kobj,
1361 					struct kobj_attribute *attr,
1362 					char *buf)
1363 {
1364 	return sprintf(buf, "%d\n", fw_dump.dump_registered);
1365 }
1366 
1367 static ssize_t fadump_register_store(struct kobject *kobj,
1368 					struct kobj_attribute *attr,
1369 					const char *buf, size_t count)
1370 {
1371 	int ret = 0;
1372 	int input = -1;
1373 
1374 	if (!fw_dump.fadump_enabled || fw_dump.dump_active)
1375 		return -EPERM;
1376 
1377 	if (kstrtoint(buf, 0, &input))
1378 		return -EINVAL;
1379 
1380 	mutex_lock(&fadump_mutex);
1381 
1382 	switch (input) {
1383 	case 0:
1384 		if (fw_dump.dump_registered == 0) {
1385 			goto unlock_out;
1386 		}
1387 
1388 		/* Un-register Firmware-assisted dump */
1389 		pr_debug("Un-register firmware-assisted dump\n");
1390 		fw_dump.ops->fadump_unregister(&fw_dump);
1391 		break;
1392 	case 1:
1393 		if (fw_dump.dump_registered == 1) {
1394 			/* Un-register Firmware-assisted dump */
1395 			fw_dump.ops->fadump_unregister(&fw_dump);
1396 		}
1397 		/* Register Firmware-assisted dump */
1398 		ret = register_fadump();
1399 		break;
1400 	default:
1401 		ret = -EINVAL;
1402 		break;
1403 	}
1404 
1405 unlock_out:
1406 	mutex_unlock(&fadump_mutex);
1407 	return ret < 0 ? ret : count;
1408 }
1409 
1410 static int fadump_region_show(struct seq_file *m, void *private)
1411 {
1412 	if (!fw_dump.fadump_enabled)
1413 		return 0;
1414 
1415 	mutex_lock(&fadump_mutex);
1416 	fw_dump.ops->fadump_region_show(&fw_dump, m);
1417 	mutex_unlock(&fadump_mutex);
1418 	return 0;
1419 }
1420 
1421 static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
1422 						0200, NULL,
1423 						fadump_release_memory_store);
1424 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1425 						0444, fadump_enabled_show,
1426 						NULL);
1427 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1428 						0644, fadump_register_show,
1429 						fadump_register_store);
1430 
1431 DEFINE_SHOW_ATTRIBUTE(fadump_region);
1432 
1433 static void fadump_init_files(void)
1434 {
1435 	struct dentry *debugfs_file;
1436 	int rc = 0;
1437 
1438 	rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1439 	if (rc)
1440 		printk(KERN_ERR "fadump: unable to create sysfs file"
1441 			" fadump_enabled (%d)\n", rc);
1442 
1443 	rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1444 	if (rc)
1445 		printk(KERN_ERR "fadump: unable to create sysfs file"
1446 			" fadump_registered (%d)\n", rc);
1447 
1448 	debugfs_file = debugfs_create_file("fadump_region", 0444,
1449 					powerpc_debugfs_root, NULL,
1450 					&fadump_region_fops);
1451 	if (!debugfs_file)
1452 		printk(KERN_ERR "fadump: unable to create debugfs file"
1453 				" fadump_region\n");
1454 
1455 	if (fw_dump.dump_active) {
1456 		rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
1457 		if (rc)
1458 			printk(KERN_ERR "fadump: unable to create sysfs file"
1459 				" fadump_release_mem (%d)\n", rc);
1460 	}
1461 	return;
1462 }
1463 
1464 /*
1465  * Prepare for firmware-assisted dump.
1466  */
1467 int __init setup_fadump(void)
1468 {
1469 	if (!fw_dump.fadump_supported)
1470 		return 0;
1471 
1472 	fadump_init_files();
1473 	fadump_show_config();
1474 
1475 	if (!fw_dump.fadump_enabled)
1476 		return 1;
1477 
1478 	/*
1479 	 * If dump data is available then see if it is valid and prepare for
1480 	 * saving it to the disk.
1481 	 */
1482 	if (fw_dump.dump_active) {
1483 		/*
1484 		 * if dump process fails then invalidate the registration
1485 		 * and release memory before proceeding for re-registration.
1486 		 */
1487 		if (fw_dump.ops->fadump_process(&fw_dump) < 0)
1488 			fadump_invalidate_release_mem();
1489 	}
1490 	/* Initialize the kernel dump memory structure for FAD registration. */
1491 	else if (fw_dump.reserve_dump_area_size)
1492 		fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1493 
1494 	return 1;
1495 }
1496 subsys_initcall(setup_fadump);
1497 #else /* !CONFIG_PRESERVE_FA_DUMP */
1498 
1499 /* Scan the Firmware Assisted dump configuration details. */
1500 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
1501 				      int depth, void *data)
1502 {
1503 	if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
1504 		return 0;
1505 
1506 	opal_fadump_dt_scan(&fw_dump, node);
1507 	return 1;
1508 }
1509 
1510 /*
1511  * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
1512  * preserve crash data. The subsequent memory preserving kernel boot
1513  * is likely to process this crash data.
1514  */
1515 int __init fadump_reserve_mem(void)
1516 {
1517 	if (fw_dump.dump_active) {
1518 		/*
1519 		 * If last boot has crashed then reserve all the memory
1520 		 * above boot memory to preserve crash data.
1521 		 */
1522 		pr_info("Preserving crash data for processing in next boot.\n");
1523 		fadump_reserve_crash_area(fw_dump.boot_mem_top);
1524 	} else
1525 		pr_debug("FADump-aware kernel..\n");
1526 
1527 	return 1;
1528 }
1529 #endif /* CONFIG_PRESERVE_FA_DUMP */
1530 
1531 /* Preserve everything above the base address */
1532 static void __init fadump_reserve_crash_area(u64 base)
1533 {
1534 	struct memblock_region *reg;
1535 	u64 mstart, msize;
1536 
1537 	for_each_memblock(memory, reg) {
1538 		mstart = reg->base;
1539 		msize  = reg->size;
1540 
1541 		if ((mstart + msize) < base)
1542 			continue;
1543 
1544 		if (mstart < base) {
1545 			msize -= (base - mstart);
1546 			mstart = base;
1547 		}
1548 
1549 		pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
1550 			(msize >> 20), mstart);
1551 		memblock_reserve(mstart, msize);
1552 	}
1553 }
1554 
1555 unsigned long __init arch_reserved_kernel_pages(void)
1556 {
1557 	return memblock_reserved_size() / PAGE_SIZE;
1558 }
1559