1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Firmware-Assisted Dump support on POWER platform (OPAL).
4  *
5  * Copyright 2019, Hari Bathini, IBM Corporation.
6  */
7 
8 #define pr_fmt(fmt) "opal fadump: " fmt
9 
10 #include <linux/string.h>
11 #include <linux/seq_file.h>
12 #include <linux/of_fdt.h>
13 #include <linux/libfdt.h>
14 #include <linux/mm.h>
15 #include <linux/crash_dump.h>
16 
17 #include <asm/page.h>
18 #include <asm/opal.h>
19 #include <asm/fadump-internal.h>
20 
21 #include "opal-fadump.h"
22 
23 static const struct opal_fadump_mem_struct *opal_fdm_active;
24 static struct opal_fadump_mem_struct *opal_fdm;
25 
26 static int opal_fadump_unregister(struct fw_dump *fadump_conf);
27 
28 static void opal_fadump_update_config(struct fw_dump *fadump_conf,
29 				      const struct opal_fadump_mem_struct *fdm)
30 {
31 	pr_debug("Boot memory regions count: %d\n", fdm->region_cnt);
32 
33 	/*
34 	 * The destination address of the first boot memory region is the
35 	 * destination address of boot memory regions.
36 	 */
37 	fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest;
38 	pr_debug("Destination address of boot memory regions: %#016llx\n",
39 		 fadump_conf->boot_mem_dest_addr);
40 
41 	fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr;
42 }
43 
44 /*
45  * This function is called in the capture kernel to get configuration details
46  * from metadata setup by the first kernel.
47  */
48 static void opal_fadump_get_config(struct fw_dump *fadump_conf,
49 				   const struct opal_fadump_mem_struct *fdm)
50 {
51 	int i;
52 
53 	if (!fadump_conf->dump_active)
54 		return;
55 
56 	fadump_conf->boot_memory_size = 0;
57 
58 	pr_debug("Boot memory regions:\n");
59 	for (i = 0; i < fdm->region_cnt; i++) {
60 		pr_debug("\t%d. base: 0x%llx, size: 0x%llx\n",
61 			 (i + 1), fdm->rgn[i].src, fdm->rgn[i].size);
62 
63 		fadump_conf->boot_memory_size += fdm->rgn[i].size;
64 	}
65 
66 	/*
67 	 * Start address of reserve dump area (permanent reservation) for
68 	 * re-registering FADump after dump capture.
69 	 */
70 	fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest;
71 
72 	/*
73 	 * Rarely, but it can so happen that system crashes before all
74 	 * boot memory regions are registered for MPIPL. In such
75 	 * cases, warn that the vmcore may not be accurate and proceed
76 	 * anyway as that is the best bet considering free pages, cache
77 	 * pages, user pages, etc are usually filtered out.
78 	 *
79 	 * Hope the memory that could not be preserved only has pages
80 	 * that are usually filtered out while saving the vmcore.
81 	 */
82 	if (fdm->region_cnt > fdm->registered_regions) {
83 		pr_warn("Not all memory regions were saved!!!\n");
84 		pr_warn("  Unsaved memory regions:\n");
85 		i = fdm->registered_regions;
86 		while (i < fdm->region_cnt) {
87 			pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
88 				i, fdm->rgn[i].src, fdm->rgn[i].size);
89 			i++;
90 		}
91 
92 		pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n");
93 		pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n");
94 	}
95 
96 	opal_fadump_update_config(fadump_conf, fdm);
97 }
98 
99 /* Initialize kernel metadata */
100 static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
101 {
102 	fdm->version = OPAL_FADUMP_VERSION;
103 	fdm->region_cnt = 0;
104 	fdm->registered_regions = 0;
105 	fdm->fadumphdr_addr = 0;
106 }
107 
108 static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
109 {
110 	int max_copy_size, cur_size, size;
111 	u64 src_addr, dest_addr;
112 
113 	opal_fdm = __va(fadump_conf->kernel_metadata);
114 	opal_fadump_init_metadata(opal_fdm);
115 
116 	/*
117 	 * Firmware supports 32-bit field for size. Align it to PAGE_SIZE
118 	 * and request firmware to copy multiple kernel boot memory regions.
119 	 */
120 	max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE);
121 
122 	/* Boot memory regions */
123 	src_addr = 0;
124 	dest_addr = fadump_conf->reserve_dump_area_start;
125 	size = fadump_conf->boot_memory_size;
126 	while (size) {
127 		cur_size = size > max_copy_size ? max_copy_size : size;
128 
129 		opal_fdm->rgn[opal_fdm->region_cnt].src  = src_addr;
130 		opal_fdm->rgn[opal_fdm->region_cnt].dest = dest_addr;
131 		opal_fdm->rgn[opal_fdm->region_cnt].size = cur_size;
132 
133 		opal_fdm->region_cnt++;
134 		dest_addr	+= cur_size;
135 		src_addr	+= cur_size;
136 		size		-= cur_size;
137 	}
138 
139 	/*
140 	 * Kernel metadata is passed to f/w and retrieved in capture kerenl.
141 	 * So, use it to save fadump header address instead of calculating it.
142 	 */
143 	opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest +
144 				    fadump_conf->boot_memory_size);
145 
146 	opal_fadump_update_config(fadump_conf, opal_fdm);
147 
148 	return dest_addr;
149 }
150 
151 static u64 opal_fadump_get_metadata_size(void)
152 {
153 	return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct));
154 }
155 
156 static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf)
157 {
158 	int err = 0;
159 	s64 ret;
160 
161 	/*
162 	 * Use the last page(s) in FADump memory reservation for
163 	 * kernel metadata.
164 	 */
165 	fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start +
166 					fadump_conf->reserve_dump_area_size -
167 					opal_fadump_get_metadata_size());
168 	pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata);
169 
170 	/* Initialize kernel metadata before registering the address with f/w */
171 	opal_fdm = __va(fadump_conf->kernel_metadata);
172 	opal_fadump_init_metadata(opal_fdm);
173 
174 	/*
175 	 * Register metadata address with f/w. Can be retrieved in
176 	 * the capture kernel.
177 	 */
178 	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL,
179 				      fadump_conf->kernel_metadata);
180 	if (ret != OPAL_SUCCESS) {
181 		pr_err("Failed to set kernel metadata tag!\n");
182 		err = -EPERM;
183 	}
184 
185 	return err;
186 }
187 
188 static int opal_fadump_register(struct fw_dump *fadump_conf)
189 {
190 	s64 rc = OPAL_PARAMETER;
191 	int i, err = -EIO;
192 
193 	for (i = 0; i < opal_fdm->region_cnt; i++) {
194 		rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
195 				       opal_fdm->rgn[i].src,
196 				       opal_fdm->rgn[i].dest,
197 				       opal_fdm->rgn[i].size);
198 		if (rc != OPAL_SUCCESS)
199 			break;
200 
201 		opal_fdm->registered_regions++;
202 	}
203 
204 	switch (rc) {
205 	case OPAL_SUCCESS:
206 		pr_info("Registration is successful!\n");
207 		fadump_conf->dump_registered = 1;
208 		err = 0;
209 		break;
210 	case OPAL_RESOURCE:
211 		/* If MAX regions limit in f/w is hit, warn and proceed. */
212 		pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
213 			(opal_fdm->region_cnt - opal_fdm->registered_regions));
214 		fadump_conf->dump_registered = 1;
215 		err = 0;
216 		break;
217 	case OPAL_PARAMETER:
218 		pr_err("Failed to register. Parameter Error(%lld).\n", rc);
219 		break;
220 	case OPAL_HARDWARE:
221 		pr_err("Support not available.\n");
222 		fadump_conf->fadump_supported = 0;
223 		fadump_conf->fadump_enabled = 0;
224 		break;
225 	default:
226 		pr_err("Failed to register. Unknown Error(%lld).\n", rc);
227 		break;
228 	}
229 
230 	/*
231 	 * If some regions were registered before OPAL_MPIPL_ADD_RANGE
232 	 * OPAL call failed, unregister all regions.
233 	 */
234 	if ((err < 0) && (opal_fdm->registered_regions > 0))
235 		opal_fadump_unregister(fadump_conf);
236 
237 	return err;
238 }
239 
240 static int opal_fadump_unregister(struct fw_dump *fadump_conf)
241 {
242 	s64 rc;
243 
244 	rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0);
245 	if (rc) {
246 		pr_err("Failed to un-register - unexpected Error(%lld).\n", rc);
247 		return -EIO;
248 	}
249 
250 	opal_fdm->registered_regions = 0;
251 	fadump_conf->dump_registered = 0;
252 	return 0;
253 }
254 
255 static int opal_fadump_invalidate(struct fw_dump *fadump_conf)
256 {
257 	s64 rc;
258 
259 	rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0);
260 	if (rc) {
261 		pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc);
262 		return -EIO;
263 	}
264 
265 	fadump_conf->dump_active = 0;
266 	opal_fdm_active = NULL;
267 	return 0;
268 }
269 
270 static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
271 {
272 	s64 ret;
273 
274 	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0);
275 	if (ret != OPAL_SUCCESS)
276 		pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
277 }
278 
279 /*
280  * Convert CPU state data saved at the time of crash into ELF notes.
281  *
282  * Append crashing CPU's register data saved by the kernel in the PT_NOTE.
283  */
284 static int __init
285 opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf,
286 			    struct fadump_crash_info_header *fdh)
287 {
288 	u32 num_cpus = 1, *note_buf;
289 	int rc;
290 
291 	if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN)
292 		return -ENODEV;
293 
294 	/* Allocate CPU notes buffer to hold crashing cpu notes. */
295 	rc = fadump_setup_cpu_notes_buf(num_cpus);
296 	if (rc != 0)
297 		return rc;
298 
299 	note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
300 	note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
301 	final_note(note_buf);
302 
303 	pr_debug("Updating elfcore header (%llx) with cpu notes\n",
304 		 fdh->elfcorehdr_addr);
305 	fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
306 	return 0;
307 }
308 
309 static int __init opal_fadump_process(struct fw_dump *fadump_conf)
310 {
311 	struct fadump_crash_info_header *fdh;
312 	int rc = -EINVAL;
313 
314 	if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
315 		return rc;
316 
317 	/* Validate the fadump crash info header */
318 	fdh = __va(fadump_conf->fadumphdr_addr);
319 	if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
320 		pr_err("Crash info header is not valid.\n");
321 		return rc;
322 	}
323 
324 	rc = opal_fadump_build_cpu_notes(fadump_conf, fdh);
325 	if (rc)
326 		return rc;
327 
328 	/*
329 	 * We are done validating dump info and elfcore header is now ready
330 	 * to be exported. set elfcorehdr_addr so that vmcore module will
331 	 * export the elfcore header through '/proc/vmcore'.
332 	 */
333 	elfcorehdr_addr = fdh->elfcorehdr_addr;
334 
335 	return rc;
336 }
337 
338 static void opal_fadump_region_show(struct fw_dump *fadump_conf,
339 				    struct seq_file *m)
340 {
341 	const struct opal_fadump_mem_struct *fdm_ptr;
342 	u64 dumped_bytes = 0;
343 	int i;
344 
345 	if (fadump_conf->dump_active)
346 		fdm_ptr = opal_fdm_active;
347 	else
348 		fdm_ptr = opal_fdm;
349 
350 	for (i = 0; i < fdm_ptr->region_cnt; i++) {
351 		/*
352 		 * Only regions that are registered for MPIPL
353 		 * would have dump data.
354 		 */
355 		if ((fadump_conf->dump_active) &&
356 		    (i < fdm_ptr->registered_regions))
357 			dumped_bytes = fdm_ptr->rgn[i].size;
358 
359 		seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
360 			   fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest);
361 		seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
362 			   fdm_ptr->rgn[i].size, dumped_bytes);
363 	}
364 
365 	/* Dump is active. Show reserved area start address. */
366 	if (fadump_conf->dump_active) {
367 		seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n",
368 			   fadump_conf->reserve_dump_area_start);
369 	}
370 }
371 
372 static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
373 				const char *msg)
374 {
375 	int rc;
376 
377 	rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
378 	if (rc == OPAL_UNSUPPORTED) {
379 		pr_emerg("Reboot type %d not supported.\n",
380 			 OPAL_REBOOT_MPIPL);
381 	} else if (rc == OPAL_HARDWARE)
382 		pr_emerg("No backend support for MPIPL!\n");
383 }
384 
385 static struct fadump_ops opal_fadump_ops = {
386 	.fadump_init_mem_struct		= opal_fadump_init_mem_struct,
387 	.fadump_get_metadata_size	= opal_fadump_get_metadata_size,
388 	.fadump_setup_metadata		= opal_fadump_setup_metadata,
389 	.fadump_register		= opal_fadump_register,
390 	.fadump_unregister		= opal_fadump_unregister,
391 	.fadump_invalidate		= opal_fadump_invalidate,
392 	.fadump_cleanup			= opal_fadump_cleanup,
393 	.fadump_process			= opal_fadump_process,
394 	.fadump_region_show		= opal_fadump_region_show,
395 	.fadump_trigger			= opal_fadump_trigger,
396 };
397 
398 void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
399 {
400 	const __be32 *prop;
401 	unsigned long dn;
402 	u64 addr = 0;
403 	s64 ret;
404 
405 
406 	/*
407 	 * Check if Firmware-Assisted Dump is supported. if yes, check
408 	 * if dump has been initiated on last reboot.
409 	 */
410 	dn = of_get_flat_dt_subnode_by_name(node, "dump");
411 	if (dn == -FDT_ERR_NOTFOUND) {
412 		pr_debug("FADump support is missing!\n");
413 		return;
414 	}
415 
416 	if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) {
417 		pr_err("Support missing for this f/w version!\n");
418 		return;
419 	}
420 
421 	fadump_conf->ops		= &opal_fadump_ops;
422 	fadump_conf->fadump_supported	= 1;
423 
424 	/*
425 	 * Check if dump has been initiated on last reboot.
426 	 */
427 	prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
428 	if (!prop)
429 		return;
430 
431 	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
432 	if ((ret != OPAL_SUCCESS) || !addr) {
433 		pr_err("Failed to get Kernel metadata (%lld)\n", ret);
434 		return;
435 	}
436 
437 	addr = be64_to_cpu(addr);
438 	pr_debug("Kernel metadata addr: %llx\n", addr);
439 
440 	opal_fdm_active = __va(addr);
441 	if (opal_fdm_active->version != OPAL_FADUMP_VERSION) {
442 		pr_warn("Supported kernel metadata version: %u, found: %d!\n",
443 			OPAL_FADUMP_VERSION, opal_fdm_active->version);
444 		pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n");
445 	}
446 
447 	/* Kernel regions not registered with f/w for MPIPL */
448 	if (opal_fdm_active->registered_regions == 0) {
449 		opal_fdm_active = NULL;
450 		return;
451 	}
452 
453 	pr_info("Firmware-assisted dump is active.\n");
454 	fadump_conf->dump_active = 1;
455 	opal_fadump_get_config(fadump_conf, opal_fdm_active);
456 }
457