xref: /openbmc/linux/arch/powerpc/kernel/fadump.c (revision 8255da95e54519bb74638c2448ac17f4b34fe6f5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
4  * dump with assistance from firmware. This approach does not use kexec,
5  * instead firmware assists in booting the kdump kernel while preserving
6  * memory contents. The most of the code implementation has been adapted
7  * from phyp assisted dump implementation written by Linas Vepstas and
8  * Manish Ahuja
9  *
10  * Copyright 2011 IBM Corporation
11  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
12  */
13 
14 #undef DEBUG
15 #define pr_fmt(fmt) "fadump: " fmt
16 
17 #include <linux/string.h>
18 #include <linux/memblock.h>
19 #include <linux/delay.h>
20 #include <linux/seq_file.h>
21 #include <linux/crash_dump.h>
22 #include <linux/kobject.h>
23 #include <linux/sysfs.h>
24 #include <linux/slab.h>
25 #include <linux/cma.h>
26 #include <linux/hugetlb.h>
27 
28 #include <asm/debugfs.h>
29 #include <asm/page.h>
30 #include <asm/prom.h>
31 #include <asm/rtas.h>
32 #include <asm/fadump.h>
33 #include <asm/fadump-internal.h>
34 #include <asm/setup.h>
35 
36 #include "../platforms/pseries/rtas-fadump.h"
37 
38 static struct fw_dump fw_dump;
39 static const struct rtas_fadump_mem_struct *fdm_active;
40 
41 static DEFINE_MUTEX(fadump_mutex);
42 struct fad_crash_memory_ranges *crash_memory_ranges;
43 int crash_memory_ranges_size;
44 int crash_mem_ranges;
45 int max_crash_mem_ranges;
46 
47 #ifdef CONFIG_CMA
48 static struct cma *fadump_cma;
49 
50 /*
51  * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
52  *
53  * This function initializes CMA area from fadump reserved memory.
54  * The total size of fadump reserved memory covers for boot memory size
55  * + cpu data size + hpte size and metadata.
56  * Initialize only the area equivalent to boot memory size for CMA use.
57  * The reamining portion of fadump reserved memory will be not given
58  * to CMA and pages for thoes will stay reserved. boot memory size is
59  * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
60  * But for some reason even if it fails we still have the memory reservation
61  * with us and we can still continue doing fadump.
62  */
63 int __init fadump_cma_init(void)
64 {
65 	unsigned long long base, size;
66 	int rc;
67 
68 	if (!fw_dump.fadump_enabled)
69 		return 0;
70 
71 	/*
72 	 * Do not use CMA if user has provided fadump=nocma kernel parameter.
73 	 * Return 1 to continue with fadump old behaviour.
74 	 */
75 	if (fw_dump.nocma)
76 		return 1;
77 
78 	base = fw_dump.reserve_dump_area_start;
79 	size = fw_dump.boot_memory_size;
80 
81 	if (!size)
82 		return 0;
83 
84 	rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
85 	if (rc) {
86 		pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
87 		/*
88 		 * Though the CMA init has failed we still have memory
89 		 * reservation with us. The reserved memory will be
90 		 * blocked from production system usage.  Hence return 1,
91 		 * so that we can continue with fadump.
92 		 */
93 		return 1;
94 	}
95 
96 	/*
97 	 * So we now have successfully initialized cma area for fadump.
98 	 */
99 	pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
100 		"bytes of memory reserved for firmware-assisted dump\n",
101 		cma_get_size(fadump_cma),
102 		(unsigned long)cma_get_base(fadump_cma) >> 20,
103 		fw_dump.reserve_dump_area_size);
104 	return 1;
105 }
106 #else
107 static int __init fadump_cma_init(void) { return 1; }
108 #endif /* CONFIG_CMA */
109 
110 /* Scan the Firmware Assisted dump configuration details. */
111 int __init early_init_dt_scan_fw_dump(unsigned long node,
112 			const char *uname, int depth, void *data)
113 {
114 	if (depth != 1 || strcmp(uname, "rtas") != 0)
115 		return 0;
116 
117 	rtas_fadump_dt_scan(&fw_dump, node);
118 
119 	/*
120 	 * The 'ibm,kernel-dump' rtas node is present only if there is
121 	 * dump data waiting for us.
122 	 */
123 	fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
124 	if (fdm_active)
125 		fw_dump.dump_active = 1;
126 
127 	return 1;
128 }
129 
130 /*
131  * If fadump is registered, check if the memory provided
132  * falls within boot memory area and reserved memory area.
133  */
134 int is_fadump_memory_area(u64 addr, ulong size)
135 {
136 	u64 d_start = fw_dump.reserve_dump_area_start;
137 	u64 d_end = d_start + fw_dump.reserve_dump_area_size;
138 
139 	if (!fw_dump.dump_registered)
140 		return 0;
141 
142 	if (((addr + size) > d_start) && (addr <= d_end))
143 		return 1;
144 
145 	return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
146 }
147 
148 int should_fadump_crash(void)
149 {
150 	if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
151 		return 0;
152 	return 1;
153 }
154 
155 int is_fadump_active(void)
156 {
157 	return fw_dump.dump_active;
158 }
159 
160 /*
161  * Returns true, if there are no holes in memory area between d_start to d_end,
162  * false otherwise.
163  */
164 static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
165 {
166 	struct memblock_region *reg;
167 	bool ret = false;
168 	u64 start, end;
169 
170 	for_each_memblock(memory, reg) {
171 		start = max_t(u64, d_start, reg->base);
172 		end = min_t(u64, d_end, (reg->base + reg->size));
173 		if (d_start < end) {
174 			/* Memory hole from d_start to start */
175 			if (start > d_start)
176 				break;
177 
178 			if (end == d_end) {
179 				ret = true;
180 				break;
181 			}
182 
183 			d_start = end + 1;
184 		}
185 	}
186 
187 	return ret;
188 }
189 
190 /*
191  * Returns true, if there are no holes in boot memory area,
192  * false otherwise.
193  */
194 bool is_fadump_boot_mem_contiguous(void)
195 {
196 	return is_fadump_mem_area_contiguous(0, fw_dump.boot_memory_size);
197 }
198 
199 /*
200  * Returns true, if there are no holes in reserved memory area,
201  * false otherwise.
202  */
203 bool is_fadump_reserved_mem_contiguous(void)
204 {
205 	u64 d_start, d_end;
206 
207 	d_start	= fw_dump.reserve_dump_area_start;
208 	d_end	= d_start + fw_dump.reserve_dump_area_size;
209 	return is_fadump_mem_area_contiguous(d_start, d_end);
210 }
211 
212 /* Print firmware assisted dump configurations for debugging purpose. */
213 static void fadump_show_config(void)
214 {
215 	pr_debug("Support for firmware-assisted dump (fadump): %s\n",
216 			(fw_dump.fadump_supported ? "present" : "no support"));
217 
218 	if (!fw_dump.fadump_supported)
219 		return;
220 
221 	pr_debug("Fadump enabled    : %s\n",
222 				(fw_dump.fadump_enabled ? "yes" : "no"));
223 	pr_debug("Dump Active       : %s\n",
224 				(fw_dump.dump_active ? "yes" : "no"));
225 	pr_debug("Dump section sizes:\n");
226 	pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
227 	pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
228 	pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
229 }
230 
231 /**
232  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
233  *
234  * Function to find the largest memory size we need to reserve during early
235  * boot process. This will be the size of the memory that is required for a
236  * kernel to boot successfully.
237  *
238  * This function has been taken from phyp-assisted dump feature implementation.
239  *
240  * returns larger of 256MB or 5% rounded down to multiples of 256MB.
241  *
242  * TODO: Come up with better approach to find out more accurate memory size
243  * that is required for a kernel to boot successfully.
244  *
245  */
246 static inline unsigned long fadump_calculate_reserve_size(void)
247 {
248 	int ret;
249 	unsigned long long base, size;
250 
251 	if (fw_dump.reserve_bootvar)
252 		pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
253 
254 	/*
255 	 * Check if the size is specified through crashkernel= cmdline
256 	 * option. If yes, then use that but ignore base as fadump reserves
257 	 * memory at a predefined offset.
258 	 */
259 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
260 				&size, &base);
261 	if (ret == 0 && size > 0) {
262 		unsigned long max_size;
263 
264 		if (fw_dump.reserve_bootvar)
265 			pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
266 
267 		fw_dump.reserve_bootvar = (unsigned long)size;
268 
269 		/*
270 		 * Adjust if the boot memory size specified is above
271 		 * the upper limit.
272 		 */
273 		max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
274 		if (fw_dump.reserve_bootvar > max_size) {
275 			fw_dump.reserve_bootvar = max_size;
276 			pr_info("Adjusted boot memory size to %luMB\n",
277 				(fw_dump.reserve_bootvar >> 20));
278 		}
279 
280 		return fw_dump.reserve_bootvar;
281 	} else if (fw_dump.reserve_bootvar) {
282 		/*
283 		 * 'fadump_reserve_mem=' is being used to reserve memory
284 		 * for firmware-assisted dump.
285 		 */
286 		return fw_dump.reserve_bootvar;
287 	}
288 
289 	/* divide by 20 to get 5% of value */
290 	size = memblock_phys_mem_size() / 20;
291 
292 	/* round it down in multiples of 256 */
293 	size = size & ~0x0FFFFFFFUL;
294 
295 	/* Truncate to memory_limit. We don't want to over reserve the memory.*/
296 	if (memory_limit && size > memory_limit)
297 		size = memory_limit;
298 
299 	return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
300 }
301 
302 /*
303  * Calculate the total memory size required to be reserved for
304  * firmware-assisted dump registration.
305  */
306 static unsigned long get_fadump_area_size(void)
307 {
308 	unsigned long size = 0;
309 
310 	size += fw_dump.cpu_state_data_size;
311 	size += fw_dump.hpte_region_size;
312 	size += fw_dump.boot_memory_size;
313 	size += sizeof(struct fadump_crash_info_header);
314 	size += sizeof(struct elfhdr); /* ELF core header.*/
315 	size += sizeof(struct elf_phdr); /* place holder for cpu notes */
316 	/* Program headers for crash memory regions. */
317 	size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
318 
319 	size = PAGE_ALIGN(size);
320 	return size;
321 }
322 
323 static void __init fadump_reserve_crash_area(unsigned long base,
324 					     unsigned long size)
325 {
326 	struct memblock_region *reg;
327 	unsigned long mstart, mend, msize;
328 
329 	for_each_memblock(memory, reg) {
330 		mstart = max_t(unsigned long, base, reg->base);
331 		mend = reg->base + reg->size;
332 		mend = min(base + size, mend);
333 
334 		if (mstart < mend) {
335 			msize = mend - mstart;
336 			memblock_reserve(mstart, msize);
337 			pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
338 				(msize >> 20), mstart);
339 		}
340 	}
341 }
342 
343 int __init fadump_reserve_mem(void)
344 {
345 	unsigned long base, size, memory_boundary;
346 
347 	if (!fw_dump.fadump_enabled)
348 		return 0;
349 
350 	if (!fw_dump.fadump_supported) {
351 		printk(KERN_INFO "Firmware-assisted dump is not supported on"
352 				" this hardware\n");
353 		fw_dump.fadump_enabled = 0;
354 		return 0;
355 	}
356 	/*
357 	 * Initialize boot memory size
358 	 * If dump is active then we have already calculated the size during
359 	 * first kernel.
360 	 */
361 	if (fdm_active)
362 		fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
363 	else {
364 		fw_dump.boot_memory_size = fadump_calculate_reserve_size();
365 #ifdef CONFIG_CMA
366 		if (!fw_dump.nocma)
367 			fw_dump.boot_memory_size =
368 				ALIGN(fw_dump.boot_memory_size,
369 							FADUMP_CMA_ALIGNMENT);
370 #endif
371 	}
372 
373 	/*
374 	 * Calculate the memory boundary.
375 	 * If memory_limit is less than actual memory boundary then reserve
376 	 * the memory for fadump beyond the memory_limit and adjust the
377 	 * memory_limit accordingly, so that the running kernel can run with
378 	 * specified memory_limit.
379 	 */
380 	if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
381 		size = get_fadump_area_size();
382 		if ((memory_limit + size) < memblock_end_of_DRAM())
383 			memory_limit += size;
384 		else
385 			memory_limit = memblock_end_of_DRAM();
386 		printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
387 				" dump, now %#016llx\n", memory_limit);
388 	}
389 	if (memory_limit)
390 		memory_boundary = memory_limit;
391 	else
392 		memory_boundary = memblock_end_of_DRAM();
393 
394 	size = get_fadump_area_size();
395 	fw_dump.reserve_dump_area_size = size;
396 	if (fw_dump.dump_active) {
397 		pr_info("Firmware-assisted dump is active.\n");
398 
399 #ifdef CONFIG_HUGETLB_PAGE
400 		/*
401 		 * FADump capture kernel doesn't care much about hugepages.
402 		 * In fact, handling hugepages in capture kernel is asking for
403 		 * trouble. So, disable HugeTLB support when fadump is active.
404 		 */
405 		hugetlb_disabled = true;
406 #endif
407 		/*
408 		 * If last boot has crashed then reserve all the memory
409 		 * above boot_memory_size so that we don't touch it until
410 		 * dump is written to disk by userspace tool. This memory
411 		 * will be released for general use once the dump is saved.
412 		 */
413 		base = fw_dump.boot_memory_size;
414 		size = memory_boundary - base;
415 		fadump_reserve_crash_area(base, size);
416 
417 		fw_dump.fadumphdr_addr =
418 				be64_to_cpu(fdm_active->rmr_region.destination_address) +
419 				be64_to_cpu(fdm_active->rmr_region.source_len);
420 		pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
421 
422 		/*
423 		 * Start address of reserve dump area (permanent reservation)
424 		 * for re-registering FADump after dump capture.
425 		 */
426 		fw_dump.reserve_dump_area_start =
427 			be64_to_cpu(fdm_active->cpu_state_data.destination_address);
428 	} else {
429 		/*
430 		 * Reserve memory at an offset closer to bottom of the RAM to
431 		 * minimize the impact of memory hot-remove operation. We can't
432 		 * use memblock_find_in_range() here since it doesn't allocate
433 		 * from bottom to top.
434 		 */
435 		for (base = fw_dump.boot_memory_size;
436 		     base <= (memory_boundary - size);
437 		     base += size) {
438 			if (memblock_is_region_memory(base, size) &&
439 			    !memblock_is_region_reserved(base, size))
440 				break;
441 		}
442 		if ((base > (memory_boundary - size)) ||
443 		    memblock_reserve(base, size)) {
444 			pr_err("Failed to reserve memory\n");
445 			return 0;
446 		}
447 
448 		pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
449 			"assisted dump (System RAM: %ldMB)\n",
450 			(unsigned long)(size >> 20),
451 			(unsigned long)(base >> 20),
452 			(unsigned long)(memblock_phys_mem_size() >> 20));
453 
454 		fw_dump.reserve_dump_area_start = base;
455 		return fadump_cma_init();
456 	}
457 	return 1;
458 }
459 
460 unsigned long __init arch_reserved_kernel_pages(void)
461 {
462 	return memblock_reserved_size() / PAGE_SIZE;
463 }
464 
465 /* Look for fadump= cmdline option. */
466 static int __init early_fadump_param(char *p)
467 {
468 	if (!p)
469 		return 1;
470 
471 	if (strncmp(p, "on", 2) == 0)
472 		fw_dump.fadump_enabled = 1;
473 	else if (strncmp(p, "off", 3) == 0)
474 		fw_dump.fadump_enabled = 0;
475 	else if (strncmp(p, "nocma", 5) == 0) {
476 		fw_dump.fadump_enabled = 1;
477 		fw_dump.nocma = 1;
478 	}
479 
480 	return 0;
481 }
482 early_param("fadump", early_fadump_param);
483 
484 /*
485  * Look for fadump_reserve_mem= cmdline option
486  * TODO: Remove references to 'fadump_reserve_mem=' parameter,
487  *       the sooner 'crashkernel=' parameter is accustomed to.
488  */
489 static int __init early_fadump_reserve_mem(char *p)
490 {
491 	if (p)
492 		fw_dump.reserve_bootvar = memparse(p, &p);
493 	return 0;
494 }
495 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
496 
497 void crash_fadump(struct pt_regs *regs, const char *str)
498 {
499 	struct fadump_crash_info_header *fdh = NULL;
500 	int old_cpu, this_cpu;
501 
502 	if (!should_fadump_crash())
503 		return;
504 
505 	/*
506 	 * old_cpu == -1 means this is the first CPU which has come here,
507 	 * go ahead and trigger fadump.
508 	 *
509 	 * old_cpu != -1 means some other CPU has already on it's way
510 	 * to trigger fadump, just keep looping here.
511 	 */
512 	this_cpu = smp_processor_id();
513 	old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
514 
515 	if (old_cpu != -1) {
516 		/*
517 		 * We can't loop here indefinitely. Wait as long as fadump
518 		 * is in force. If we race with fadump un-registration this
519 		 * loop will break and then we go down to normal panic path
520 		 * and reboot. If fadump is in force the first crashing
521 		 * cpu will definitely trigger fadump.
522 		 */
523 		while (fw_dump.dump_registered)
524 			cpu_relax();
525 		return;
526 	}
527 
528 	fdh = __va(fw_dump.fadumphdr_addr);
529 	fdh->crashing_cpu = crashing_cpu;
530 	crash_save_vmcoreinfo();
531 
532 	if (regs)
533 		fdh->regs = *regs;
534 	else
535 		ppc_save_regs(&fdh->regs);
536 
537 	fdh->online_mask = *cpu_online_mask;
538 
539 	fw_dump.ops->fadump_trigger(fdh, str);
540 }
541 
542 #define GPR_MASK	0xffffff0000000000
543 static inline int fadump_gpr_index(u64 id)
544 {
545 	int i = -1;
546 	char str[3];
547 
548 	if ((id & GPR_MASK) == fadump_str_to_u64("GPR")) {
549 		/* get the digits at the end */
550 		id &= ~GPR_MASK;
551 		id >>= 24;
552 		str[2] = '\0';
553 		str[1] = id & 0xff;
554 		str[0] = (id >> 8) & 0xff;
555 		sscanf(str, "%d", &i);
556 		if (i > 31)
557 			i = -1;
558 	}
559 	return i;
560 }
561 
562 static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
563 								u64 reg_val)
564 {
565 	int i;
566 
567 	i = fadump_gpr_index(reg_id);
568 	if (i >= 0)
569 		regs->gpr[i] = (unsigned long)reg_val;
570 	else if (reg_id == fadump_str_to_u64("NIA"))
571 		regs->nip = (unsigned long)reg_val;
572 	else if (reg_id == fadump_str_to_u64("MSR"))
573 		regs->msr = (unsigned long)reg_val;
574 	else if (reg_id == fadump_str_to_u64("CTR"))
575 		regs->ctr = (unsigned long)reg_val;
576 	else if (reg_id == fadump_str_to_u64("LR"))
577 		regs->link = (unsigned long)reg_val;
578 	else if (reg_id == fadump_str_to_u64("XER"))
579 		regs->xer = (unsigned long)reg_val;
580 	else if (reg_id == fadump_str_to_u64("CR"))
581 		regs->ccr = (unsigned long)reg_val;
582 	else if (reg_id == fadump_str_to_u64("DAR"))
583 		regs->dar = (unsigned long)reg_val;
584 	else if (reg_id == fadump_str_to_u64("DSISR"))
585 		regs->dsisr = (unsigned long)reg_val;
586 }
587 
588 static struct rtas_fadump_reg_entry*
589 fadump_read_registers(struct rtas_fadump_reg_entry *reg_entry, struct pt_regs *regs)
590 {
591 	memset(regs, 0, sizeof(struct pt_regs));
592 
593 	while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) {
594 		fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
595 					be64_to_cpu(reg_entry->reg_value));
596 		reg_entry++;
597 	}
598 	reg_entry++;
599 	return reg_entry;
600 }
601 
602 u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
603 {
604 	struct elf_prstatus prstatus;
605 
606 	memset(&prstatus, 0, sizeof(prstatus));
607 	/*
608 	 * FIXME: How do i get PID? Do I really need it?
609 	 * prstatus.pr_pid = ????
610 	 */
611 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
612 	buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
613 			      &prstatus, sizeof(prstatus));
614 	return buf;
615 }
616 
617 void fadump_update_elfcore_header(char *bufp)
618 {
619 	struct elfhdr *elf;
620 	struct elf_phdr *phdr;
621 
622 	elf = (struct elfhdr *)bufp;
623 	bufp += sizeof(struct elfhdr);
624 
625 	/* First note is a place holder for cpu notes info. */
626 	phdr = (struct elf_phdr *)bufp;
627 
628 	if (phdr->p_type == PT_NOTE) {
629 		phdr->p_paddr	= __pa(fw_dump.cpu_notes_buf_vaddr);
630 		phdr->p_offset	= phdr->p_paddr;
631 		phdr->p_filesz	= fw_dump.cpu_notes_buf_size;
632 		phdr->p_memsz = fw_dump.cpu_notes_buf_size;
633 	}
634 	return;
635 }
636 
637 static void *fadump_alloc_buffer(unsigned long size)
638 {
639 	unsigned long count, i;
640 	struct page *page;
641 	void *vaddr;
642 
643 	vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
644 	if (!vaddr)
645 		return NULL;
646 
647 	count = PAGE_ALIGN(size) / PAGE_SIZE;
648 	page = virt_to_page(vaddr);
649 	for (i = 0; i < count; i++)
650 		mark_page_reserved(page + i);
651 	return vaddr;
652 }
653 
654 static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
655 {
656 	free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
657 }
658 
659 s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
660 {
661 	/* Allocate buffer to hold cpu crash notes. */
662 	fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
663 	fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
664 	fw_dump.cpu_notes_buf_vaddr =
665 		(unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
666 	if (!fw_dump.cpu_notes_buf_vaddr) {
667 		pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
668 		       fw_dump.cpu_notes_buf_size);
669 		return -ENOMEM;
670 	}
671 
672 	pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
673 		 fw_dump.cpu_notes_buf_size,
674 		 fw_dump.cpu_notes_buf_vaddr);
675 	return 0;
676 }
677 
678 void fadump_free_cpu_notes_buf(void)
679 {
680 	if (!fw_dump.cpu_notes_buf_vaddr)
681 		return;
682 
683 	fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
684 			   fw_dump.cpu_notes_buf_size);
685 	fw_dump.cpu_notes_buf_vaddr = 0;
686 	fw_dump.cpu_notes_buf_size = 0;
687 }
688 
689 /*
690  * Read CPU state dump data and convert it into ELF notes.
691  * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
692  * used to access the data to allow for additional fields to be added without
693  * affecting compatibility. Each list of registers for a CPU starts with
694  * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
695  * 8 Byte ASCII identifier and 8 Byte register value. The register entry
696  * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
697  * of register value. For more details refer to PAPR document.
698  *
699  * Only for the crashing cpu we ignore the CPU dump data and get exact
700  * state from fadump crash info structure populated by first kernel at the
701  * time of crash.
702  */
703 static int __init fadump_build_cpu_notes(const struct rtas_fadump_mem_struct *fdm)
704 {
705 	struct rtas_fadump_reg_save_area_header *reg_header;
706 	struct rtas_fadump_reg_entry *reg_entry;
707 	struct fadump_crash_info_header *fdh = NULL;
708 	void *vaddr;
709 	unsigned long addr;
710 	u32 num_cpus, *note_buf;
711 	struct pt_regs regs;
712 	int i, rc = 0, cpu = 0;
713 
714 	if (!fdm->cpu_state_data.bytes_dumped)
715 		return -EINVAL;
716 
717 	addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
718 	vaddr = __va(addr);
719 
720 	reg_header = vaddr;
721 	if (be64_to_cpu(reg_header->magic_number) !=
722 	    fadump_str_to_u64("REGSAVE")) {
723 		printk(KERN_ERR "Unable to read register save area.\n");
724 		return -ENOENT;
725 	}
726 	pr_debug("--------CPU State Data------------\n");
727 	pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
728 	pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
729 
730 	vaddr += be32_to_cpu(reg_header->num_cpu_offset);
731 	num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
732 	pr_debug("NumCpus     : %u\n", num_cpus);
733 	vaddr += sizeof(u32);
734 	reg_entry = (struct rtas_fadump_reg_entry *)vaddr;
735 
736 	rc = fadump_setup_cpu_notes_buf(num_cpus);
737 	if (rc != 0)
738 		return rc;
739 
740 	note_buf = (u32 *)fw_dump.cpu_notes_buf_vaddr;
741 
742 	if (fw_dump.fadumphdr_addr)
743 		fdh = __va(fw_dump.fadumphdr_addr);
744 
745 	for (i = 0; i < num_cpus; i++) {
746 		if (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUSTRT")) {
747 			printk(KERN_ERR "Unable to read CPU state data\n");
748 			rc = -ENOENT;
749 			goto error_out;
750 		}
751 		/* Lower 4 bytes of reg_value contains logical cpu id */
752 		cpu = be64_to_cpu(reg_entry->reg_value) & RTAS_FADUMP_CPU_ID_MASK;
753 		if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
754 			RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
755 			continue;
756 		}
757 		pr_debug("Reading register data for cpu %d...\n", cpu);
758 		if (fdh && fdh->crashing_cpu == cpu) {
759 			regs = fdh->regs;
760 			note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
761 			RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
762 		} else {
763 			reg_entry++;
764 			reg_entry = fadump_read_registers(reg_entry, &regs);
765 			note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
766 		}
767 	}
768 	final_note(note_buf);
769 
770 	if (fdh) {
771 		pr_debug("Updating elfcore header (%llx) with cpu notes\n",
772 							fdh->elfcorehdr_addr);
773 		fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
774 	}
775 	return 0;
776 
777 error_out:
778 	fadump_free_cpu_notes_buf();
779 	return rc;
780 
781 }
782 
783 /*
784  * Validate and process the dump data stored by firmware before exporting
785  * it through '/proc/vmcore'.
786  */
787 static int __init process_fadump(const struct rtas_fadump_mem_struct *fdm_active)
788 {
789 	struct fadump_crash_info_header *fdh;
790 	int rc = 0;
791 
792 	if (!fdm_active || !fw_dump.fadumphdr_addr)
793 		return -EINVAL;
794 
795 	/* Check if the dump data is valid. */
796 	if ((be16_to_cpu(fdm_active->header.dump_status_flag) == RTAS_FADUMP_ERROR_FLAG) ||
797 			(fdm_active->cpu_state_data.error_flags != 0) ||
798 			(fdm_active->rmr_region.error_flags != 0)) {
799 		printk(KERN_ERR "Dump taken by platform is not valid\n");
800 		return -EINVAL;
801 	}
802 	if ((fdm_active->rmr_region.bytes_dumped !=
803 			fdm_active->rmr_region.source_len) ||
804 			!fdm_active->cpu_state_data.bytes_dumped) {
805 		printk(KERN_ERR "Dump taken by platform is incomplete\n");
806 		return -EINVAL;
807 	}
808 
809 	/* Validate the fadump crash info header */
810 	fdh = __va(fw_dump.fadumphdr_addr);
811 	if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
812 		printk(KERN_ERR "Crash info header is not valid.\n");
813 		return -EINVAL;
814 	}
815 
816 	rc = fadump_build_cpu_notes(fdm_active);
817 	if (rc)
818 		return rc;
819 
820 	/*
821 	 * We are done validating dump info and elfcore header is now ready
822 	 * to be exported. set elfcorehdr_addr so that vmcore module will
823 	 * export the elfcore header through '/proc/vmcore'.
824 	 */
825 	elfcorehdr_addr = fdh->elfcorehdr_addr;
826 
827 	return 0;
828 }
829 
830 static void free_crash_memory_ranges(void)
831 {
832 	kfree(crash_memory_ranges);
833 	crash_memory_ranges = NULL;
834 	crash_memory_ranges_size = 0;
835 	max_crash_mem_ranges = 0;
836 }
837 
838 /*
839  * Allocate or reallocate crash memory ranges array in incremental units
840  * of PAGE_SIZE.
841  */
842 static int allocate_crash_memory_ranges(void)
843 {
844 	struct fad_crash_memory_ranges *new_array;
845 	u64 new_size;
846 
847 	new_size = crash_memory_ranges_size + PAGE_SIZE;
848 	pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
849 		 new_size);
850 
851 	new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
852 	if (new_array == NULL) {
853 		pr_err("Insufficient memory for setting up crash memory ranges\n");
854 		free_crash_memory_ranges();
855 		return -ENOMEM;
856 	}
857 
858 	crash_memory_ranges = new_array;
859 	crash_memory_ranges_size = new_size;
860 	max_crash_mem_ranges = (new_size /
861 				sizeof(struct fad_crash_memory_ranges));
862 	return 0;
863 }
864 
865 static inline int fadump_add_crash_memory(unsigned long long base,
866 					  unsigned long long end)
867 {
868 	u64  start, size;
869 	bool is_adjacent = false;
870 
871 	if (base == end)
872 		return 0;
873 
874 	/*
875 	 * Fold adjacent memory ranges to bring down the memory ranges/
876 	 * PT_LOAD segments count.
877 	 */
878 	if (crash_mem_ranges) {
879 		start = crash_memory_ranges[crash_mem_ranges - 1].base;
880 		size = crash_memory_ranges[crash_mem_ranges - 1].size;
881 
882 		if ((start + size) == base)
883 			is_adjacent = true;
884 	}
885 	if (!is_adjacent) {
886 		/* resize the array on reaching the limit */
887 		if (crash_mem_ranges == max_crash_mem_ranges) {
888 			int ret;
889 
890 			ret = allocate_crash_memory_ranges();
891 			if (ret)
892 				return ret;
893 		}
894 
895 		start = base;
896 		crash_memory_ranges[crash_mem_ranges].base = start;
897 		crash_mem_ranges++;
898 	}
899 
900 	crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
901 	pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
902 		(crash_mem_ranges - 1), start, end - 1, (end - start));
903 	return 0;
904 }
905 
906 static int fadump_exclude_reserved_area(unsigned long long start,
907 					unsigned long long end)
908 {
909 	unsigned long long ra_start, ra_end;
910 	int ret = 0;
911 
912 	ra_start = fw_dump.reserve_dump_area_start;
913 	ra_end = ra_start + fw_dump.reserve_dump_area_size;
914 
915 	if ((ra_start < end) && (ra_end > start)) {
916 		if ((start < ra_start) && (end > ra_end)) {
917 			ret = fadump_add_crash_memory(start, ra_start);
918 			if (ret)
919 				return ret;
920 
921 			ret = fadump_add_crash_memory(ra_end, end);
922 		} else if (start < ra_start) {
923 			ret = fadump_add_crash_memory(start, ra_start);
924 		} else if (ra_end < end) {
925 			ret = fadump_add_crash_memory(ra_end, end);
926 		}
927 	} else
928 		ret = fadump_add_crash_memory(start, end);
929 
930 	return ret;
931 }
932 
933 static int fadump_init_elfcore_header(char *bufp)
934 {
935 	struct elfhdr *elf;
936 
937 	elf = (struct elfhdr *) bufp;
938 	bufp += sizeof(struct elfhdr);
939 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
940 	elf->e_ident[EI_CLASS] = ELF_CLASS;
941 	elf->e_ident[EI_DATA] = ELF_DATA;
942 	elf->e_ident[EI_VERSION] = EV_CURRENT;
943 	elf->e_ident[EI_OSABI] = ELF_OSABI;
944 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
945 	elf->e_type = ET_CORE;
946 	elf->e_machine = ELF_ARCH;
947 	elf->e_version = EV_CURRENT;
948 	elf->e_entry = 0;
949 	elf->e_phoff = sizeof(struct elfhdr);
950 	elf->e_shoff = 0;
951 #if defined(_CALL_ELF)
952 	elf->e_flags = _CALL_ELF;
953 #else
954 	elf->e_flags = 0;
955 #endif
956 	elf->e_ehsize = sizeof(struct elfhdr);
957 	elf->e_phentsize = sizeof(struct elf_phdr);
958 	elf->e_phnum = 0;
959 	elf->e_shentsize = 0;
960 	elf->e_shnum = 0;
961 	elf->e_shstrndx = 0;
962 
963 	return 0;
964 }
965 
966 /*
967  * Traverse through memblock structure and setup crash memory ranges. These
968  * ranges will be used create PT_LOAD program headers in elfcore header.
969  */
970 static int fadump_setup_crash_memory_ranges(void)
971 {
972 	struct memblock_region *reg;
973 	unsigned long long start, end;
974 	int ret;
975 
976 	pr_debug("Setup crash memory ranges.\n");
977 	crash_mem_ranges = 0;
978 
979 	/*
980 	 * add the first memory chunk (RMA_START through boot_memory_size) as
981 	 * a separate memory chunk. The reason is, at the time crash firmware
982 	 * will move the content of this memory chunk to different location
983 	 * specified during fadump registration. We need to create a separate
984 	 * program header for this chunk with the correct offset.
985 	 */
986 	ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
987 	if (ret)
988 		return ret;
989 
990 	for_each_memblock(memory, reg) {
991 		start = (unsigned long long)reg->base;
992 		end = start + (unsigned long long)reg->size;
993 
994 		/*
995 		 * skip the first memory chunk that is already added (RMA_START
996 		 * through boot_memory_size). This logic needs a relook if and
997 		 * when RMA_START changes to a non-zero value.
998 		 */
999 		BUILD_BUG_ON(RMA_START != 0);
1000 		if (start < fw_dump.boot_memory_size) {
1001 			if (end > fw_dump.boot_memory_size)
1002 				start = fw_dump.boot_memory_size;
1003 			else
1004 				continue;
1005 		}
1006 
1007 		/* add this range excluding the reserved dump area. */
1008 		ret = fadump_exclude_reserved_area(start, end);
1009 		if (ret)
1010 			return ret;
1011 	}
1012 
1013 	return 0;
1014 }
1015 
1016 /*
1017  * If the given physical address falls within the boot memory region then
1018  * return the relocated address that points to the dump region reserved
1019  * for saving initial boot memory contents.
1020  */
1021 static inline unsigned long fadump_relocate(unsigned long paddr)
1022 {
1023 	if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
1024 		return fw_dump.boot_mem_dest_addr + paddr;
1025 	else
1026 		return paddr;
1027 }
1028 
1029 static int fadump_create_elfcore_headers(char *bufp)
1030 {
1031 	struct elfhdr *elf;
1032 	struct elf_phdr *phdr;
1033 	int i;
1034 
1035 	fadump_init_elfcore_header(bufp);
1036 	elf = (struct elfhdr *)bufp;
1037 	bufp += sizeof(struct elfhdr);
1038 
1039 	/*
1040 	 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
1041 	 * will be populated during second kernel boot after crash. Hence
1042 	 * this PT_NOTE will always be the first elf note.
1043 	 *
1044 	 * NOTE: Any new ELF note addition should be placed after this note.
1045 	 */
1046 	phdr = (struct elf_phdr *)bufp;
1047 	bufp += sizeof(struct elf_phdr);
1048 	phdr->p_type = PT_NOTE;
1049 	phdr->p_flags = 0;
1050 	phdr->p_vaddr = 0;
1051 	phdr->p_align = 0;
1052 
1053 	phdr->p_offset = 0;
1054 	phdr->p_paddr = 0;
1055 	phdr->p_filesz = 0;
1056 	phdr->p_memsz = 0;
1057 
1058 	(elf->e_phnum)++;
1059 
1060 	/* setup ELF PT_NOTE for vmcoreinfo */
1061 	phdr = (struct elf_phdr *)bufp;
1062 	bufp += sizeof(struct elf_phdr);
1063 	phdr->p_type	= PT_NOTE;
1064 	phdr->p_flags	= 0;
1065 	phdr->p_vaddr	= 0;
1066 	phdr->p_align	= 0;
1067 
1068 	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
1069 	phdr->p_offset	= phdr->p_paddr;
1070 	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
1071 
1072 	/* Increment number of program headers. */
1073 	(elf->e_phnum)++;
1074 
1075 	/* setup PT_LOAD sections. */
1076 
1077 	for (i = 0; i < crash_mem_ranges; i++) {
1078 		unsigned long long mbase, msize;
1079 		mbase = crash_memory_ranges[i].base;
1080 		msize = crash_memory_ranges[i].size;
1081 
1082 		if (!msize)
1083 			continue;
1084 
1085 		phdr = (struct elf_phdr *)bufp;
1086 		bufp += sizeof(struct elf_phdr);
1087 		phdr->p_type	= PT_LOAD;
1088 		phdr->p_flags	= PF_R|PF_W|PF_X;
1089 		phdr->p_offset	= mbase;
1090 
1091 		if (mbase == RMA_START) {
1092 			/*
1093 			 * The entire RMA region will be moved by firmware
1094 			 * to the specified destination_address. Hence set
1095 			 * the correct offset.
1096 			 */
1097 			phdr->p_offset = fw_dump.boot_mem_dest_addr;
1098 		}
1099 
1100 		phdr->p_paddr = mbase;
1101 		phdr->p_vaddr = (unsigned long)__va(mbase);
1102 		phdr->p_filesz = msize;
1103 		phdr->p_memsz = msize;
1104 		phdr->p_align = 0;
1105 
1106 		/* Increment number of program headers. */
1107 		(elf->e_phnum)++;
1108 	}
1109 	return 0;
1110 }
1111 
1112 static unsigned long init_fadump_header(unsigned long addr)
1113 {
1114 	struct fadump_crash_info_header *fdh;
1115 
1116 	if (!addr)
1117 		return 0;
1118 
1119 	fw_dump.fadumphdr_addr = addr;
1120 	fdh = __va(addr);
1121 	addr += sizeof(struct fadump_crash_info_header);
1122 
1123 	memset(fdh, 0, sizeof(struct fadump_crash_info_header));
1124 	fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
1125 	fdh->elfcorehdr_addr = addr;
1126 	/* We will set the crashing cpu id in crash_fadump() during crash. */
1127 	fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
1128 
1129 	return addr;
1130 }
1131 
1132 static int register_fadump(void)
1133 {
1134 	unsigned long addr;
1135 	void *vaddr;
1136 	int ret;
1137 
1138 	/*
1139 	 * If no memory is reserved then we can not register for firmware-
1140 	 * assisted dump.
1141 	 */
1142 	if (!fw_dump.reserve_dump_area_size)
1143 		return -ENODEV;
1144 
1145 	ret = fadump_setup_crash_memory_ranges();
1146 	if (ret)
1147 		return ret;
1148 
1149 	addr = fw_dump.fadumphdr_addr;
1150 
1151 	/* Initialize fadump crash info header. */
1152 	addr = init_fadump_header(addr);
1153 	vaddr = __va(addr);
1154 
1155 	pr_debug("Creating ELF core headers at %#016lx\n", addr);
1156 	fadump_create_elfcore_headers(vaddr);
1157 
1158 	/* register the future kernel dump with firmware. */
1159 	pr_debug("Registering for firmware-assisted kernel dump...\n");
1160 	return fw_dump.ops->fadump_register(&fw_dump);
1161 }
1162 
1163 static int fadump_invalidate_dump(const struct rtas_fadump_mem_struct *fdm)
1164 {
1165 	int rc = 0;
1166 	unsigned int wait_time;
1167 
1168 	pr_debug("Invalidating firmware-assisted dump registration\n");
1169 
1170 	/* TODO: Add upper time limit for the delay */
1171 	do {
1172 		rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
1173 			FADUMP_INVALIDATE, fdm,
1174 			sizeof(struct rtas_fadump_mem_struct));
1175 
1176 		wait_time = rtas_busy_delay_time(rc);
1177 		if (wait_time)
1178 			mdelay(wait_time);
1179 	} while (wait_time);
1180 
1181 	if (rc) {
1182 		pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
1183 		return rc;
1184 	}
1185 	fw_dump.dump_active = 0;
1186 	fdm_active = NULL;
1187 	return 0;
1188 }
1189 
1190 void fadump_cleanup(void)
1191 {
1192 	/* Invalidate the registration only if dump is active. */
1193 	if (fw_dump.dump_active) {
1194 		/* pass the same memory dump structure provided by platform */
1195 		fadump_invalidate_dump(fdm_active);
1196 	} else if (fw_dump.dump_registered) {
1197 		/* Un-register Firmware-assisted dump if it was registered. */
1198 		fw_dump.ops->fadump_unregister(&fw_dump);
1199 		free_crash_memory_ranges();
1200 	}
1201 }
1202 
1203 static void fadump_free_reserved_memory(unsigned long start_pfn,
1204 					unsigned long end_pfn)
1205 {
1206 	unsigned long pfn;
1207 	unsigned long time_limit = jiffies + HZ;
1208 
1209 	pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1210 		PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
1211 
1212 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1213 		free_reserved_page(pfn_to_page(pfn));
1214 
1215 		if (time_after(jiffies, time_limit)) {
1216 			cond_resched();
1217 			time_limit = jiffies + HZ;
1218 		}
1219 	}
1220 }
1221 
1222 /*
1223  * Skip memory holes and free memory that was actually reserved.
1224  */
1225 static void fadump_release_reserved_area(unsigned long start, unsigned long end)
1226 {
1227 	struct memblock_region *reg;
1228 	unsigned long tstart, tend;
1229 	unsigned long start_pfn = PHYS_PFN(start);
1230 	unsigned long end_pfn = PHYS_PFN(end);
1231 
1232 	for_each_memblock(memory, reg) {
1233 		tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
1234 		tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
1235 		if (tstart < tend) {
1236 			fadump_free_reserved_memory(tstart, tend);
1237 
1238 			if (tend == end_pfn)
1239 				break;
1240 
1241 			start_pfn = tend + 1;
1242 		}
1243 	}
1244 }
1245 
1246 /*
1247  * Release the memory that was reserved in early boot to preserve the memory
1248  * contents. The released memory will be available for general use.
1249  */
1250 static void fadump_release_memory(unsigned long begin, unsigned long end)
1251 {
1252 	unsigned long ra_start, ra_end;
1253 
1254 	ra_start = fw_dump.reserve_dump_area_start;
1255 	ra_end = ra_start + fw_dump.reserve_dump_area_size;
1256 
1257 	/*
1258 	 * exclude the dump reserve area. Will reuse it for next
1259 	 * fadump registration.
1260 	 */
1261 	if (begin < ra_end && end > ra_start) {
1262 		if (begin < ra_start)
1263 			fadump_release_reserved_area(begin, ra_start);
1264 		if (end > ra_end)
1265 			fadump_release_reserved_area(ra_end, end);
1266 	} else
1267 		fadump_release_reserved_area(begin, end);
1268 }
1269 
1270 static void fadump_invalidate_release_mem(void)
1271 {
1272 	mutex_lock(&fadump_mutex);
1273 	if (!fw_dump.dump_active) {
1274 		mutex_unlock(&fadump_mutex);
1275 		return;
1276 	}
1277 
1278 	fadump_cleanup();
1279 	mutex_unlock(&fadump_mutex);
1280 
1281 	fadump_release_memory(fw_dump.boot_memory_size, memblock_end_of_DRAM());
1282 	fadump_free_cpu_notes_buf();
1283 
1284 	/* Initialize the kernel dump memory structure for FAD registration. */
1285 	fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1286 }
1287 
1288 static ssize_t fadump_release_memory_store(struct kobject *kobj,
1289 					struct kobj_attribute *attr,
1290 					const char *buf, size_t count)
1291 {
1292 	int input = -1;
1293 
1294 	if (!fw_dump.dump_active)
1295 		return -EPERM;
1296 
1297 	if (kstrtoint(buf, 0, &input))
1298 		return -EINVAL;
1299 
1300 	if (input == 1) {
1301 		/*
1302 		 * Take away the '/proc/vmcore'. We are releasing the dump
1303 		 * memory, hence it will not be valid anymore.
1304 		 */
1305 #ifdef CONFIG_PROC_VMCORE
1306 		vmcore_cleanup();
1307 #endif
1308 		fadump_invalidate_release_mem();
1309 
1310 	} else
1311 		return -EINVAL;
1312 	return count;
1313 }
1314 
1315 static ssize_t fadump_enabled_show(struct kobject *kobj,
1316 					struct kobj_attribute *attr,
1317 					char *buf)
1318 {
1319 	return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1320 }
1321 
1322 static ssize_t fadump_register_show(struct kobject *kobj,
1323 					struct kobj_attribute *attr,
1324 					char *buf)
1325 {
1326 	return sprintf(buf, "%d\n", fw_dump.dump_registered);
1327 }
1328 
1329 static ssize_t fadump_register_store(struct kobject *kobj,
1330 					struct kobj_attribute *attr,
1331 					const char *buf, size_t count)
1332 {
1333 	int ret = 0;
1334 	int input = -1;
1335 
1336 	if (!fw_dump.fadump_enabled || fdm_active)
1337 		return -EPERM;
1338 
1339 	if (kstrtoint(buf, 0, &input))
1340 		return -EINVAL;
1341 
1342 	mutex_lock(&fadump_mutex);
1343 
1344 	switch (input) {
1345 	case 0:
1346 		if (fw_dump.dump_registered == 0) {
1347 			goto unlock_out;
1348 		}
1349 		/* Un-register Firmware-assisted dump */
1350 		pr_debug("Un-register firmware-assisted dump\n");
1351 		fw_dump.ops->fadump_unregister(&fw_dump);
1352 		break;
1353 	case 1:
1354 		if (fw_dump.dump_registered == 1) {
1355 			/* Un-register Firmware-assisted dump */
1356 			fw_dump.ops->fadump_unregister(&fw_dump);
1357 		}
1358 		/* Register Firmware-assisted dump */
1359 		ret = register_fadump();
1360 		break;
1361 	default:
1362 		ret = -EINVAL;
1363 		break;
1364 	}
1365 
1366 unlock_out:
1367 	mutex_unlock(&fadump_mutex);
1368 	return ret < 0 ? ret : count;
1369 }
1370 
1371 static int fadump_region_show(struct seq_file *m, void *private)
1372 {
1373 	const struct rtas_fadump_mem_struct *fdm_ptr;
1374 
1375 	if (!fw_dump.fadump_enabled)
1376 		return 0;
1377 
1378 	mutex_lock(&fadump_mutex);
1379 	if (fdm_active)
1380 		fdm_ptr = fdm_active;
1381 	else {
1382 		mutex_unlock(&fadump_mutex);
1383 		fw_dump.ops->fadump_region_show(&fw_dump, m);
1384 		return 0;
1385 	}
1386 
1387 	seq_printf(m,
1388 			"CPU : [%#016llx-%#016llx] %#llx bytes, "
1389 			"Dumped: %#llx\n",
1390 			be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
1391 			be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
1392 			be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
1393 			be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
1394 			be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
1395 	seq_printf(m,
1396 			"HPTE: [%#016llx-%#016llx] %#llx bytes, "
1397 			"Dumped: %#llx\n",
1398 			be64_to_cpu(fdm_ptr->hpte_region.destination_address),
1399 			be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
1400 			be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
1401 			be64_to_cpu(fdm_ptr->hpte_region.source_len),
1402 			be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
1403 	seq_printf(m,
1404 			"DUMP: [%#016llx-%#016llx] %#llx bytes, "
1405 			"Dumped: %#llx\n",
1406 			be64_to_cpu(fdm_ptr->rmr_region.destination_address),
1407 			be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
1408 			be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
1409 			be64_to_cpu(fdm_ptr->rmr_region.source_len),
1410 			be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
1411 
1412 	if (!fdm_active ||
1413 		(fw_dump.reserve_dump_area_start ==
1414 		be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
1415 		goto out;
1416 
1417 	/* Dump is active. Show reserved memory region. */
1418 	seq_printf(m,
1419 			"    : [%#016llx-%#016llx] %#llx bytes, "
1420 			"Dumped: %#llx\n",
1421 			(unsigned long long)fw_dump.reserve_dump_area_start,
1422 			be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
1423 			be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
1424 			fw_dump.reserve_dump_area_start,
1425 			be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
1426 			fw_dump.reserve_dump_area_start);
1427 out:
1428 	if (fdm_active)
1429 		mutex_unlock(&fadump_mutex);
1430 	return 0;
1431 }
1432 
1433 static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
1434 						0200, NULL,
1435 						fadump_release_memory_store);
1436 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1437 						0444, fadump_enabled_show,
1438 						NULL);
1439 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1440 						0644, fadump_register_show,
1441 						fadump_register_store);
1442 
1443 DEFINE_SHOW_ATTRIBUTE(fadump_region);
1444 
1445 static void fadump_init_files(void)
1446 {
1447 	struct dentry *debugfs_file;
1448 	int rc = 0;
1449 
1450 	rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1451 	if (rc)
1452 		printk(KERN_ERR "fadump: unable to create sysfs file"
1453 			" fadump_enabled (%d)\n", rc);
1454 
1455 	rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1456 	if (rc)
1457 		printk(KERN_ERR "fadump: unable to create sysfs file"
1458 			" fadump_registered (%d)\n", rc);
1459 
1460 	debugfs_file = debugfs_create_file("fadump_region", 0444,
1461 					powerpc_debugfs_root, NULL,
1462 					&fadump_region_fops);
1463 	if (!debugfs_file)
1464 		printk(KERN_ERR "fadump: unable to create debugfs file"
1465 				" fadump_region\n");
1466 
1467 	if (fw_dump.dump_active) {
1468 		rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
1469 		if (rc)
1470 			printk(KERN_ERR "fadump: unable to create sysfs file"
1471 				" fadump_release_mem (%d)\n", rc);
1472 	}
1473 	return;
1474 }
1475 
1476 /*
1477  * Prepare for firmware-assisted dump.
1478  */
1479 int __init setup_fadump(void)
1480 {
1481 	if (!fw_dump.fadump_enabled)
1482 		return 0;
1483 
1484 	if (!fw_dump.fadump_supported) {
1485 		printk(KERN_ERR "Firmware-assisted dump is not supported on"
1486 			" this hardware\n");
1487 		return 0;
1488 	}
1489 
1490 	fadump_show_config();
1491 	/*
1492 	 * If dump data is available then see if it is valid and prepare for
1493 	 * saving it to the disk.
1494 	 */
1495 	if (fw_dump.dump_active) {
1496 		/*
1497 		 * if dump process fails then invalidate the registration
1498 		 * and release memory before proceeding for re-registration.
1499 		 */
1500 		if (process_fadump(fdm_active) < 0)
1501 			fadump_invalidate_release_mem();
1502 	}
1503 	/* Initialize the kernel dump memory structure for FAD registration. */
1504 	else if (fw_dump.reserve_dump_area_size)
1505 		fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1506 	fadump_init_files();
1507 
1508 	return 1;
1509 }
1510 subsys_initcall(setup_fadump);
1511