xref: /openbmc/linux/drivers/acpi/apei/erst.c (revision cb82a2e4)
1 /*
2  * APEI Error Record Serialization Table support
3  *
4  * ERST is a way provided by APEI to save and retrieve hardware error
5  * information to and from a persistent store.
6  *
7  * For more information about ERST, please refer to ACPI Specification
8  * version 4.0, section 17.4.
9  *
10  * Copyright 2010 Intel Corp.
11  *   Author: Huang Ying <ying.huang@intel.com>
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version
15  * 2 as published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25  */
26 
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/delay.h>
31 #include <linux/io.h>
32 #include <linux/acpi.h>
33 #include <linux/uaccess.h>
34 #include <linux/cper.h>
35 #include <linux/nmi.h>
36 #include <linux/hardirq.h>
37 #include <linux/pstore.h>
38 #include <acpi/apei.h>
39 
40 #include "apei-internal.h"
41 
42 #undef pr_fmt
43 #define pr_fmt(fmt) "ERST: " fmt
44 
45 /* ERST command status */
46 #define ERST_STATUS_SUCCESS			0x0
47 #define ERST_STATUS_NOT_ENOUGH_SPACE		0x1
48 #define ERST_STATUS_HARDWARE_NOT_AVAILABLE	0x2
49 #define ERST_STATUS_FAILED			0x3
50 #define ERST_STATUS_RECORD_STORE_EMPTY		0x4
51 #define ERST_STATUS_RECORD_NOT_FOUND		0x5
52 
53 #define ERST_TAB_ENTRY(tab)						\
54 	((struct acpi_whea_header *)((char *)(tab) +			\
55 				     sizeof(struct acpi_table_erst)))
56 
57 #define SPIN_UNIT		100			/* 100ns */
58 /* Firmware should respond within 1 milliseconds */
59 #define FIRMWARE_TIMEOUT	(1 * NSEC_PER_MSEC)
60 #define FIRMWARE_MAX_STALL	50			/* 50us */
61 
62 int erst_disable;
63 EXPORT_SYMBOL_GPL(erst_disable);
64 
65 static struct acpi_table_erst *erst_tab;
66 
67 /* ERST Error Log Address Range atrributes */
68 #define ERST_RANGE_RESERVED	0x0001
69 #define ERST_RANGE_NVRAM	0x0002
70 #define ERST_RANGE_SLOW		0x0004
71 
72 /*
73  * ERST Error Log Address Range, used as buffer for reading/writing
74  * error records.
75  */
76 static struct erst_erange {
77 	u64 base;
78 	u64 size;
79 	void __iomem *vaddr;
80 	u32 attr;
81 } erst_erange;
82 
83 /*
84  * Prevent ERST interpreter to run simultaneously, because the
85  * corresponding firmware implementation may not work properly when
86  * invoked simultaneously.
87  *
88  * It is used to provide exclusive accessing for ERST Error Log
89  * Address Range too.
90  */
91 static DEFINE_RAW_SPINLOCK(erst_lock);
92 
93 static inline int erst_errno(int command_status)
94 {
95 	switch (command_status) {
96 	case ERST_STATUS_SUCCESS:
97 		return 0;
98 	case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
99 		return -ENODEV;
100 	case ERST_STATUS_NOT_ENOUGH_SPACE:
101 		return -ENOSPC;
102 	case ERST_STATUS_RECORD_STORE_EMPTY:
103 	case ERST_STATUS_RECORD_NOT_FOUND:
104 		return -ENOENT;
105 	default:
106 		return -EINVAL;
107 	}
108 }
109 
110 static int erst_timedout(u64 *t, u64 spin_unit)
111 {
112 	if ((s64)*t < spin_unit) {
113 		pr_warn(FW_WARN "Firmware does not respond in time.\n");
114 		return 1;
115 	}
116 	*t -= spin_unit;
117 	ndelay(spin_unit);
118 	touch_nmi_watchdog();
119 	return 0;
120 }
121 
122 static int erst_exec_load_var1(struct apei_exec_context *ctx,
123 			       struct acpi_whea_header *entry)
124 {
125 	return __apei_exec_read_register(entry, &ctx->var1);
126 }
127 
128 static int erst_exec_load_var2(struct apei_exec_context *ctx,
129 			       struct acpi_whea_header *entry)
130 {
131 	return __apei_exec_read_register(entry, &ctx->var2);
132 }
133 
134 static int erst_exec_store_var1(struct apei_exec_context *ctx,
135 				struct acpi_whea_header *entry)
136 {
137 	return __apei_exec_write_register(entry, ctx->var1);
138 }
139 
140 static int erst_exec_add(struct apei_exec_context *ctx,
141 			 struct acpi_whea_header *entry)
142 {
143 	ctx->var1 += ctx->var2;
144 	return 0;
145 }
146 
147 static int erst_exec_subtract(struct apei_exec_context *ctx,
148 			      struct acpi_whea_header *entry)
149 {
150 	ctx->var1 -= ctx->var2;
151 	return 0;
152 }
153 
154 static int erst_exec_add_value(struct apei_exec_context *ctx,
155 			       struct acpi_whea_header *entry)
156 {
157 	int rc;
158 	u64 val;
159 
160 	rc = __apei_exec_read_register(entry, &val);
161 	if (rc)
162 		return rc;
163 	val += ctx->value;
164 	rc = __apei_exec_write_register(entry, val);
165 	return rc;
166 }
167 
168 static int erst_exec_subtract_value(struct apei_exec_context *ctx,
169 				    struct acpi_whea_header *entry)
170 {
171 	int rc;
172 	u64 val;
173 
174 	rc = __apei_exec_read_register(entry, &val);
175 	if (rc)
176 		return rc;
177 	val -= ctx->value;
178 	rc = __apei_exec_write_register(entry, val);
179 	return rc;
180 }
181 
182 static int erst_exec_stall(struct apei_exec_context *ctx,
183 			   struct acpi_whea_header *entry)
184 {
185 	u64 stall_time;
186 
187 	if (ctx->value > FIRMWARE_MAX_STALL) {
188 		if (!in_nmi())
189 			pr_warn(FW_WARN
190 			"Too long stall time for stall instruction: 0x%llx.\n",
191 				   ctx->value);
192 		stall_time = FIRMWARE_MAX_STALL;
193 	} else
194 		stall_time = ctx->value;
195 	udelay(stall_time);
196 	return 0;
197 }
198 
199 static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
200 				      struct acpi_whea_header *entry)
201 {
202 	int rc;
203 	u64 val;
204 	u64 timeout = FIRMWARE_TIMEOUT;
205 	u64 stall_time;
206 
207 	if (ctx->var1 > FIRMWARE_MAX_STALL) {
208 		if (!in_nmi())
209 			pr_warn(FW_WARN
210 		"Too long stall time for stall while true instruction: 0x%llx.\n",
211 				   ctx->var1);
212 		stall_time = FIRMWARE_MAX_STALL;
213 	} else
214 		stall_time = ctx->var1;
215 
216 	for (;;) {
217 		rc = __apei_exec_read_register(entry, &val);
218 		if (rc)
219 			return rc;
220 		if (val != ctx->value)
221 			break;
222 		if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
223 			return -EIO;
224 	}
225 	return 0;
226 }
227 
228 static int erst_exec_skip_next_instruction_if_true(
229 	struct apei_exec_context *ctx,
230 	struct acpi_whea_header *entry)
231 {
232 	int rc;
233 	u64 val;
234 
235 	rc = __apei_exec_read_register(entry, &val);
236 	if (rc)
237 		return rc;
238 	if (val == ctx->value) {
239 		ctx->ip += 2;
240 		return APEI_EXEC_SET_IP;
241 	}
242 
243 	return 0;
244 }
245 
246 static int erst_exec_goto(struct apei_exec_context *ctx,
247 			  struct acpi_whea_header *entry)
248 {
249 	ctx->ip = ctx->value;
250 	return APEI_EXEC_SET_IP;
251 }
252 
253 static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
254 					  struct acpi_whea_header *entry)
255 {
256 	return __apei_exec_read_register(entry, &ctx->src_base);
257 }
258 
259 static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
260 					  struct acpi_whea_header *entry)
261 {
262 	return __apei_exec_read_register(entry, &ctx->dst_base);
263 }
264 
265 static int erst_exec_move_data(struct apei_exec_context *ctx,
266 			       struct acpi_whea_header *entry)
267 {
268 	int rc;
269 	u64 offset;
270 	void *src, *dst;
271 
272 	/* ioremap does not work in interrupt context */
273 	if (in_interrupt()) {
274 		pr_warn("MOVE_DATA can not be used in interrupt context.\n");
275 		return -EBUSY;
276 	}
277 
278 	rc = __apei_exec_read_register(entry, &offset);
279 	if (rc)
280 		return rc;
281 
282 	src = ioremap(ctx->src_base + offset, ctx->var2);
283 	if (!src)
284 		return -ENOMEM;
285 	dst = ioremap(ctx->dst_base + offset, ctx->var2);
286 	if (!dst)
287 		return -ENOMEM;
288 
289 	memmove(dst, src, ctx->var2);
290 
291 	iounmap(src);
292 	iounmap(dst);
293 
294 	return 0;
295 }
296 
297 static struct apei_exec_ins_type erst_ins_type[] = {
298 	[ACPI_ERST_READ_REGISTER] = {
299 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
300 		.run = apei_exec_read_register,
301 	},
302 	[ACPI_ERST_READ_REGISTER_VALUE] = {
303 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
304 		.run = apei_exec_read_register_value,
305 	},
306 	[ACPI_ERST_WRITE_REGISTER] = {
307 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
308 		.run = apei_exec_write_register,
309 	},
310 	[ACPI_ERST_WRITE_REGISTER_VALUE] = {
311 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
312 		.run = apei_exec_write_register_value,
313 	},
314 	[ACPI_ERST_NOOP] = {
315 		.flags = 0,
316 		.run = apei_exec_noop,
317 	},
318 	[ACPI_ERST_LOAD_VAR1] = {
319 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
320 		.run = erst_exec_load_var1,
321 	},
322 	[ACPI_ERST_LOAD_VAR2] = {
323 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
324 		.run = erst_exec_load_var2,
325 	},
326 	[ACPI_ERST_STORE_VAR1] = {
327 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
328 		.run = erst_exec_store_var1,
329 	},
330 	[ACPI_ERST_ADD] = {
331 		.flags = 0,
332 		.run = erst_exec_add,
333 	},
334 	[ACPI_ERST_SUBTRACT] = {
335 		.flags = 0,
336 		.run = erst_exec_subtract,
337 	},
338 	[ACPI_ERST_ADD_VALUE] = {
339 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
340 		.run = erst_exec_add_value,
341 	},
342 	[ACPI_ERST_SUBTRACT_VALUE] = {
343 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
344 		.run = erst_exec_subtract_value,
345 	},
346 	[ACPI_ERST_STALL] = {
347 		.flags = 0,
348 		.run = erst_exec_stall,
349 	},
350 	[ACPI_ERST_STALL_WHILE_TRUE] = {
351 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
352 		.run = erst_exec_stall_while_true,
353 	},
354 	[ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
355 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
356 		.run = erst_exec_skip_next_instruction_if_true,
357 	},
358 	[ACPI_ERST_GOTO] = {
359 		.flags = 0,
360 		.run = erst_exec_goto,
361 	},
362 	[ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
363 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
364 		.run = erst_exec_set_src_address_base,
365 	},
366 	[ACPI_ERST_SET_DST_ADDRESS_BASE] = {
367 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
368 		.run = erst_exec_set_dst_address_base,
369 	},
370 	[ACPI_ERST_MOVE_DATA] = {
371 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
372 		.run = erst_exec_move_data,
373 	},
374 };
375 
376 static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
377 {
378 	apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
379 			   ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
380 }
381 
382 static int erst_get_erange(struct erst_erange *range)
383 {
384 	struct apei_exec_context ctx;
385 	int rc;
386 
387 	erst_exec_ctx_init(&ctx);
388 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
389 	if (rc)
390 		return rc;
391 	range->base = apei_exec_ctx_get_output(&ctx);
392 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
393 	if (rc)
394 		return rc;
395 	range->size = apei_exec_ctx_get_output(&ctx);
396 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
397 	if (rc)
398 		return rc;
399 	range->attr = apei_exec_ctx_get_output(&ctx);
400 
401 	return 0;
402 }
403 
404 static ssize_t __erst_get_record_count(void)
405 {
406 	struct apei_exec_context ctx;
407 	int rc;
408 
409 	erst_exec_ctx_init(&ctx);
410 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
411 	if (rc)
412 		return rc;
413 	return apei_exec_ctx_get_output(&ctx);
414 }
415 
416 ssize_t erst_get_record_count(void)
417 {
418 	ssize_t count;
419 	unsigned long flags;
420 
421 	if (erst_disable)
422 		return -ENODEV;
423 
424 	raw_spin_lock_irqsave(&erst_lock, flags);
425 	count = __erst_get_record_count();
426 	raw_spin_unlock_irqrestore(&erst_lock, flags);
427 
428 	return count;
429 }
430 EXPORT_SYMBOL_GPL(erst_get_record_count);
431 
432 #define ERST_RECORD_ID_CACHE_SIZE_MIN	16
433 #define ERST_RECORD_ID_CACHE_SIZE_MAX	1024
434 
435 struct erst_record_id_cache {
436 	struct mutex lock;
437 	u64 *entries;
438 	int len;
439 	int size;
440 	int refcount;
441 };
442 
443 static struct erst_record_id_cache erst_record_id_cache = {
444 	.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
445 	.refcount = 0,
446 };
447 
448 static int __erst_get_next_record_id(u64 *record_id)
449 {
450 	struct apei_exec_context ctx;
451 	int rc;
452 
453 	erst_exec_ctx_init(&ctx);
454 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
455 	if (rc)
456 		return rc;
457 	*record_id = apei_exec_ctx_get_output(&ctx);
458 
459 	return 0;
460 }
461 
462 int erst_get_record_id_begin(int *pos)
463 {
464 	int rc;
465 
466 	if (erst_disable)
467 		return -ENODEV;
468 
469 	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
470 	if (rc)
471 		return rc;
472 	erst_record_id_cache.refcount++;
473 	mutex_unlock(&erst_record_id_cache.lock);
474 
475 	*pos = 0;
476 
477 	return 0;
478 }
479 EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
480 
481 /* erst_record_id_cache.lock must be held by caller */
482 static int __erst_record_id_cache_add_one(void)
483 {
484 	u64 id, prev_id, first_id;
485 	int i, rc;
486 	u64 *entries;
487 	unsigned long flags;
488 
489 	id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
490 retry:
491 	raw_spin_lock_irqsave(&erst_lock, flags);
492 	rc = __erst_get_next_record_id(&id);
493 	raw_spin_unlock_irqrestore(&erst_lock, flags);
494 	if (rc == -ENOENT)
495 		return 0;
496 	if (rc)
497 		return rc;
498 	if (id == APEI_ERST_INVALID_RECORD_ID)
499 		return 0;
500 	/* can not skip current ID, or loop back to first ID */
501 	if (id == prev_id || id == first_id)
502 		return 0;
503 	if (first_id == APEI_ERST_INVALID_RECORD_ID)
504 		first_id = id;
505 	prev_id = id;
506 
507 	entries = erst_record_id_cache.entries;
508 	for (i = 0; i < erst_record_id_cache.len; i++) {
509 		if (entries[i] == id)
510 			break;
511 	}
512 	/* record id already in cache, try next */
513 	if (i < erst_record_id_cache.len)
514 		goto retry;
515 	if (erst_record_id_cache.len >= erst_record_id_cache.size) {
516 		int new_size, alloc_size;
517 		u64 *new_entries;
518 
519 		new_size = erst_record_id_cache.size * 2;
520 		new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
521 				     ERST_RECORD_ID_CACHE_SIZE_MAX);
522 		if (new_size <= erst_record_id_cache.size) {
523 			if (printk_ratelimit())
524 				pr_warn(FW_WARN "too many record IDs!\n");
525 			return 0;
526 		}
527 		alloc_size = new_size * sizeof(entries[0]);
528 		if (alloc_size < PAGE_SIZE)
529 			new_entries = kmalloc(alloc_size, GFP_KERNEL);
530 		else
531 			new_entries = vmalloc(alloc_size);
532 		if (!new_entries)
533 			return -ENOMEM;
534 		memcpy(new_entries, entries,
535 		       erst_record_id_cache.len * sizeof(entries[0]));
536 		if (erst_record_id_cache.size < PAGE_SIZE)
537 			kfree(entries);
538 		else
539 			vfree(entries);
540 		erst_record_id_cache.entries = entries = new_entries;
541 		erst_record_id_cache.size = new_size;
542 	}
543 	entries[i] = id;
544 	erst_record_id_cache.len++;
545 
546 	return 1;
547 }
548 
549 /*
550  * Get the record ID of an existing error record on the persistent
551  * storage. If there is no error record on the persistent storage, the
552  * returned record_id is APEI_ERST_INVALID_RECORD_ID.
553  */
554 int erst_get_record_id_next(int *pos, u64 *record_id)
555 {
556 	int rc = 0;
557 	u64 *entries;
558 
559 	if (erst_disable)
560 		return -ENODEV;
561 
562 	/* must be enclosed by erst_get_record_id_begin/end */
563 	BUG_ON(!erst_record_id_cache.refcount);
564 	BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
565 
566 	mutex_lock(&erst_record_id_cache.lock);
567 	entries = erst_record_id_cache.entries;
568 	for (; *pos < erst_record_id_cache.len; (*pos)++)
569 		if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
570 			break;
571 	/* found next record id in cache */
572 	if (*pos < erst_record_id_cache.len) {
573 		*record_id = entries[*pos];
574 		(*pos)++;
575 		goto out_unlock;
576 	}
577 
578 	/* Try to add one more record ID to cache */
579 	rc = __erst_record_id_cache_add_one();
580 	if (rc < 0)
581 		goto out_unlock;
582 	/* successfully add one new ID */
583 	if (rc == 1) {
584 		*record_id = erst_record_id_cache.entries[*pos];
585 		(*pos)++;
586 		rc = 0;
587 	} else {
588 		*pos = -1;
589 		*record_id = APEI_ERST_INVALID_RECORD_ID;
590 	}
591 out_unlock:
592 	mutex_unlock(&erst_record_id_cache.lock);
593 
594 	return rc;
595 }
596 EXPORT_SYMBOL_GPL(erst_get_record_id_next);
597 
598 /* erst_record_id_cache.lock must be held by caller */
599 static void __erst_record_id_cache_compact(void)
600 {
601 	int i, wpos = 0;
602 	u64 *entries;
603 
604 	if (erst_record_id_cache.refcount)
605 		return;
606 
607 	entries = erst_record_id_cache.entries;
608 	for (i = 0; i < erst_record_id_cache.len; i++) {
609 		if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
610 			continue;
611 		if (wpos != i)
612 			memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
613 		wpos++;
614 	}
615 	erst_record_id_cache.len = wpos;
616 }
617 
618 void erst_get_record_id_end(void)
619 {
620 	/*
621 	 * erst_disable != 0 should be detected by invoker via the
622 	 * return value of erst_get_record_id_begin/next, so this
623 	 * function should not be called for erst_disable != 0.
624 	 */
625 	BUG_ON(erst_disable);
626 
627 	mutex_lock(&erst_record_id_cache.lock);
628 	erst_record_id_cache.refcount--;
629 	BUG_ON(erst_record_id_cache.refcount < 0);
630 	__erst_record_id_cache_compact();
631 	mutex_unlock(&erst_record_id_cache.lock);
632 }
633 EXPORT_SYMBOL_GPL(erst_get_record_id_end);
634 
635 static int __erst_write_to_storage(u64 offset)
636 {
637 	struct apei_exec_context ctx;
638 	u64 timeout = FIRMWARE_TIMEOUT;
639 	u64 val;
640 	int rc;
641 
642 	erst_exec_ctx_init(&ctx);
643 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
644 	if (rc)
645 		return rc;
646 	apei_exec_ctx_set_input(&ctx, offset);
647 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
648 	if (rc)
649 		return rc;
650 	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
651 	if (rc)
652 		return rc;
653 	for (;;) {
654 		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
655 		if (rc)
656 			return rc;
657 		val = apei_exec_ctx_get_output(&ctx);
658 		if (!val)
659 			break;
660 		if (erst_timedout(&timeout, SPIN_UNIT))
661 			return -EIO;
662 	}
663 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
664 	if (rc)
665 		return rc;
666 	val = apei_exec_ctx_get_output(&ctx);
667 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
668 	if (rc)
669 		return rc;
670 
671 	return erst_errno(val);
672 }
673 
674 static int __erst_read_from_storage(u64 record_id, u64 offset)
675 {
676 	struct apei_exec_context ctx;
677 	u64 timeout = FIRMWARE_TIMEOUT;
678 	u64 val;
679 	int rc;
680 
681 	erst_exec_ctx_init(&ctx);
682 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
683 	if (rc)
684 		return rc;
685 	apei_exec_ctx_set_input(&ctx, offset);
686 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
687 	if (rc)
688 		return rc;
689 	apei_exec_ctx_set_input(&ctx, record_id);
690 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
691 	if (rc)
692 		return rc;
693 	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
694 	if (rc)
695 		return rc;
696 	for (;;) {
697 		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
698 		if (rc)
699 			return rc;
700 		val = apei_exec_ctx_get_output(&ctx);
701 		if (!val)
702 			break;
703 		if (erst_timedout(&timeout, SPIN_UNIT))
704 			return -EIO;
705 	};
706 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
707 	if (rc)
708 		return rc;
709 	val = apei_exec_ctx_get_output(&ctx);
710 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
711 	if (rc)
712 		return rc;
713 
714 	return erst_errno(val);
715 }
716 
717 static int __erst_clear_from_storage(u64 record_id)
718 {
719 	struct apei_exec_context ctx;
720 	u64 timeout = FIRMWARE_TIMEOUT;
721 	u64 val;
722 	int rc;
723 
724 	erst_exec_ctx_init(&ctx);
725 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
726 	if (rc)
727 		return rc;
728 	apei_exec_ctx_set_input(&ctx, record_id);
729 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
730 	if (rc)
731 		return rc;
732 	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
733 	if (rc)
734 		return rc;
735 	for (;;) {
736 		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
737 		if (rc)
738 			return rc;
739 		val = apei_exec_ctx_get_output(&ctx);
740 		if (!val)
741 			break;
742 		if (erst_timedout(&timeout, SPIN_UNIT))
743 			return -EIO;
744 	}
745 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
746 	if (rc)
747 		return rc;
748 	val = apei_exec_ctx_get_output(&ctx);
749 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
750 	if (rc)
751 		return rc;
752 
753 	return erst_errno(val);
754 }
755 
756 /* NVRAM ERST Error Log Address Range is not supported yet */
757 static void pr_unimpl_nvram(void)
758 {
759 	if (printk_ratelimit())
760 		pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
761 }
762 
763 static int __erst_write_to_nvram(const struct cper_record_header *record)
764 {
765 	/* do not print message, because printk is not safe for NMI */
766 	return -ENOSYS;
767 }
768 
769 static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
770 {
771 	pr_unimpl_nvram();
772 	return -ENOSYS;
773 }
774 
775 static int __erst_clear_from_nvram(u64 record_id)
776 {
777 	pr_unimpl_nvram();
778 	return -ENOSYS;
779 }
780 
781 int erst_write(const struct cper_record_header *record)
782 {
783 	int rc;
784 	unsigned long flags;
785 	struct cper_record_header *rcd_erange;
786 
787 	if (erst_disable)
788 		return -ENODEV;
789 
790 	if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
791 		return -EINVAL;
792 
793 	if (erst_erange.attr & ERST_RANGE_NVRAM) {
794 		if (!raw_spin_trylock_irqsave(&erst_lock, flags))
795 			return -EBUSY;
796 		rc = __erst_write_to_nvram(record);
797 		raw_spin_unlock_irqrestore(&erst_lock, flags);
798 		return rc;
799 	}
800 
801 	if (record->record_length > erst_erange.size)
802 		return -EINVAL;
803 
804 	if (!raw_spin_trylock_irqsave(&erst_lock, flags))
805 		return -EBUSY;
806 	memcpy(erst_erange.vaddr, record, record->record_length);
807 	rcd_erange = erst_erange.vaddr;
808 	/* signature for serialization system */
809 	memcpy(&rcd_erange->persistence_information, "ER", 2);
810 
811 	rc = __erst_write_to_storage(0);
812 	raw_spin_unlock_irqrestore(&erst_lock, flags);
813 
814 	return rc;
815 }
816 EXPORT_SYMBOL_GPL(erst_write);
817 
818 static int __erst_read_to_erange(u64 record_id, u64 *offset)
819 {
820 	int rc;
821 
822 	if (erst_erange.attr & ERST_RANGE_NVRAM)
823 		return __erst_read_to_erange_from_nvram(
824 			record_id, offset);
825 
826 	rc = __erst_read_from_storage(record_id, 0);
827 	if (rc)
828 		return rc;
829 	*offset = 0;
830 
831 	return 0;
832 }
833 
834 static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
835 			   size_t buflen)
836 {
837 	int rc;
838 	u64 offset, len = 0;
839 	struct cper_record_header *rcd_tmp;
840 
841 	rc = __erst_read_to_erange(record_id, &offset);
842 	if (rc)
843 		return rc;
844 	rcd_tmp = erst_erange.vaddr + offset;
845 	len = rcd_tmp->record_length;
846 	if (len <= buflen)
847 		memcpy(record, rcd_tmp, len);
848 
849 	return len;
850 }
851 
852 /*
853  * If return value > buflen, the buffer size is not big enough,
854  * else if return value < 0, something goes wrong,
855  * else everything is OK, and return value is record length
856  */
857 ssize_t erst_read(u64 record_id, struct cper_record_header *record,
858 		  size_t buflen)
859 {
860 	ssize_t len;
861 	unsigned long flags;
862 
863 	if (erst_disable)
864 		return -ENODEV;
865 
866 	raw_spin_lock_irqsave(&erst_lock, flags);
867 	len = __erst_read(record_id, record, buflen);
868 	raw_spin_unlock_irqrestore(&erst_lock, flags);
869 	return len;
870 }
871 EXPORT_SYMBOL_GPL(erst_read);
872 
873 int erst_clear(u64 record_id)
874 {
875 	int rc, i;
876 	unsigned long flags;
877 	u64 *entries;
878 
879 	if (erst_disable)
880 		return -ENODEV;
881 
882 	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
883 	if (rc)
884 		return rc;
885 	raw_spin_lock_irqsave(&erst_lock, flags);
886 	if (erst_erange.attr & ERST_RANGE_NVRAM)
887 		rc = __erst_clear_from_nvram(record_id);
888 	else
889 		rc = __erst_clear_from_storage(record_id);
890 	raw_spin_unlock_irqrestore(&erst_lock, flags);
891 	if (rc)
892 		goto out;
893 	entries = erst_record_id_cache.entries;
894 	for (i = 0; i < erst_record_id_cache.len; i++) {
895 		if (entries[i] == record_id)
896 			entries[i] = APEI_ERST_INVALID_RECORD_ID;
897 	}
898 	__erst_record_id_cache_compact();
899 out:
900 	mutex_unlock(&erst_record_id_cache.lock);
901 	return rc;
902 }
903 EXPORT_SYMBOL_GPL(erst_clear);
904 
905 static int __init setup_erst_disable(char *str)
906 {
907 	erst_disable = 1;
908 	return 0;
909 }
910 
911 __setup("erst_disable", setup_erst_disable);
912 
913 static int erst_check_table(struct acpi_table_erst *erst_tab)
914 {
915 	if ((erst_tab->header_length !=
916 	     (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
917 	    && (erst_tab->header_length != sizeof(struct acpi_table_erst)))
918 		return -EINVAL;
919 	if (erst_tab->header.length < sizeof(struct acpi_table_erst))
920 		return -EINVAL;
921 	if (erst_tab->entries !=
922 	    (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
923 	    sizeof(struct acpi_erst_entry))
924 		return -EINVAL;
925 
926 	return 0;
927 }
928 
929 static int erst_open_pstore(struct pstore_info *psi);
930 static int erst_close_pstore(struct pstore_info *psi);
931 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
932 			   struct timespec *time, char **buf,
933 			   struct pstore_info *psi);
934 static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
935 		       u64 *id, unsigned int part, int count, size_t hsize,
936 		       size_t size, struct pstore_info *psi);
937 static int erst_clearer(enum pstore_type_id type, u64 id, int count,
938 			struct timespec time, struct pstore_info *psi);
939 
940 static struct pstore_info erst_info = {
941 	.owner		= THIS_MODULE,
942 	.name		= "erst",
943 	.open		= erst_open_pstore,
944 	.close		= erst_close_pstore,
945 	.read		= erst_reader,
946 	.write		= erst_writer,
947 	.erase		= erst_clearer
948 };
949 
950 #define CPER_CREATOR_PSTORE						\
951 	UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
952 		0x64, 0x90, 0xb8, 0x9d)
953 #define CPER_SECTION_TYPE_DMESG						\
954 	UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54,	\
955 		0x94, 0x19, 0xeb, 0x12)
956 #define CPER_SECTION_TYPE_MCE						\
957 	UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,	\
958 		0x04, 0x4a, 0x38, 0xfc)
959 
960 struct cper_pstore_record {
961 	struct cper_record_header hdr;
962 	struct cper_section_descriptor sec_hdr;
963 	char data[];
964 } __packed;
965 
966 static int reader_pos;
967 
968 static int erst_open_pstore(struct pstore_info *psi)
969 {
970 	int rc;
971 
972 	if (erst_disable)
973 		return -ENODEV;
974 
975 	rc = erst_get_record_id_begin(&reader_pos);
976 
977 	return rc;
978 }
979 
980 static int erst_close_pstore(struct pstore_info *psi)
981 {
982 	erst_get_record_id_end();
983 
984 	return 0;
985 }
986 
987 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
988 			   struct timespec *time, char **buf,
989 			   struct pstore_info *psi)
990 {
991 	int rc;
992 	ssize_t len = 0;
993 	u64 record_id;
994 	struct cper_pstore_record *rcd;
995 	size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
996 
997 	if (erst_disable)
998 		return -ENODEV;
999 
1000 	rcd = kmalloc(rcd_len, GFP_KERNEL);
1001 	if (!rcd) {
1002 		rc = -ENOMEM;
1003 		goto out;
1004 	}
1005 skip:
1006 	rc = erst_get_record_id_next(&reader_pos, &record_id);
1007 	if (rc)
1008 		goto out;
1009 
1010 	/* no more record */
1011 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
1012 		rc = -EINVAL;
1013 		goto out;
1014 	}
1015 
1016 	len = erst_read(record_id, &rcd->hdr, rcd_len);
1017 	/* The record may be cleared by others, try read next record */
1018 	if (len == -ENOENT)
1019 		goto skip;
1020 	else if (len < sizeof(*rcd)) {
1021 		rc = -EIO;
1022 		goto out;
1023 	}
1024 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
1025 		goto skip;
1026 
1027 	*buf = kmalloc(len, GFP_KERNEL);
1028 	if (*buf == NULL) {
1029 		rc = -ENOMEM;
1030 		goto out;
1031 	}
1032 	memcpy(*buf, rcd->data, len - sizeof(*rcd));
1033 	*id = record_id;
1034 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
1035 			CPER_SECTION_TYPE_DMESG) == 0)
1036 		*type = PSTORE_TYPE_DMESG;
1037 	else if (uuid_le_cmp(rcd->sec_hdr.section_type,
1038 			     CPER_SECTION_TYPE_MCE) == 0)
1039 		*type = PSTORE_TYPE_MCE;
1040 	else
1041 		*type = PSTORE_TYPE_UNKNOWN;
1042 
1043 	if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
1044 		time->tv_sec = rcd->hdr.timestamp;
1045 	else
1046 		time->tv_sec = 0;
1047 	time->tv_nsec = 0;
1048 
1049 out:
1050 	kfree(rcd);
1051 	return (rc < 0) ? rc : (len - sizeof(*rcd));
1052 }
1053 
1054 static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
1055 		       u64 *id, unsigned int part, int count, size_t hsize,
1056 		       size_t size, struct pstore_info *psi)
1057 {
1058 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
1059 					(erst_info.buf - sizeof(*rcd));
1060 	int ret;
1061 
1062 	memset(rcd, 0, sizeof(*rcd));
1063 	memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
1064 	rcd->hdr.revision = CPER_RECORD_REV;
1065 	rcd->hdr.signature_end = CPER_SIG_END;
1066 	rcd->hdr.section_count = 1;
1067 	rcd->hdr.error_severity = CPER_SEV_FATAL;
1068 	/* timestamp valid. platform_id, partition_id are invalid */
1069 	rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
1070 	rcd->hdr.timestamp = get_seconds();
1071 	rcd->hdr.record_length = sizeof(*rcd) + size;
1072 	rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
1073 	rcd->hdr.notification_type = CPER_NOTIFY_MCE;
1074 	rcd->hdr.record_id = cper_next_record_id();
1075 	rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
1076 
1077 	rcd->sec_hdr.section_offset = sizeof(*rcd);
1078 	rcd->sec_hdr.section_length = size;
1079 	rcd->sec_hdr.revision = CPER_SEC_REV;
1080 	/* fru_id and fru_text is invalid */
1081 	rcd->sec_hdr.validation_bits = 0;
1082 	rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
1083 	switch (type) {
1084 	case PSTORE_TYPE_DMESG:
1085 		rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
1086 		break;
1087 	case PSTORE_TYPE_MCE:
1088 		rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
1089 		break;
1090 	default:
1091 		return -EINVAL;
1092 	}
1093 	rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
1094 
1095 	ret = erst_write(&rcd->hdr);
1096 	*id = rcd->hdr.record_id;
1097 
1098 	return ret;
1099 }
1100 
1101 static int erst_clearer(enum pstore_type_id type, u64 id, int count,
1102 			struct timespec time, struct pstore_info *psi)
1103 {
1104 	return erst_clear(id);
1105 }
1106 
1107 static int __init erst_init(void)
1108 {
1109 	int rc = 0;
1110 	acpi_status status;
1111 	struct apei_exec_context ctx;
1112 	struct apei_resources erst_resources;
1113 	struct resource *r;
1114 	char *buf;
1115 
1116 	if (acpi_disabled)
1117 		goto err;
1118 
1119 	if (erst_disable) {
1120 		pr_info(
1121 	"Error Record Serialization Table (ERST) support is disabled.\n");
1122 		goto err;
1123 	}
1124 
1125 	status = acpi_get_table(ACPI_SIG_ERST, 0,
1126 				(struct acpi_table_header **)&erst_tab);
1127 	if (status == AE_NOT_FOUND)
1128 		goto err;
1129 	else if (ACPI_FAILURE(status)) {
1130 		const char *msg = acpi_format_exception(status);
1131 		pr_err("Failed to get table, %s\n", msg);
1132 		rc = -EINVAL;
1133 		goto err;
1134 	}
1135 
1136 	rc = erst_check_table(erst_tab);
1137 	if (rc) {
1138 		pr_err(FW_BUG "ERST table is invalid.\n");
1139 		goto err;
1140 	}
1141 
1142 	apei_resources_init(&erst_resources);
1143 	erst_exec_ctx_init(&ctx);
1144 	rc = apei_exec_collect_resources(&ctx, &erst_resources);
1145 	if (rc)
1146 		goto err_fini;
1147 	rc = apei_resources_request(&erst_resources, "APEI ERST");
1148 	if (rc)
1149 		goto err_fini;
1150 	rc = apei_exec_pre_map_gars(&ctx);
1151 	if (rc)
1152 		goto err_release;
1153 	rc = erst_get_erange(&erst_erange);
1154 	if (rc) {
1155 		if (rc == -ENODEV)
1156 			pr_info(
1157 	"The corresponding hardware device or firmware implementation "
1158 	"is not available.\n");
1159 		else
1160 			pr_err("Failed to get Error Log Address Range.\n");
1161 		goto err_unmap_reg;
1162 	}
1163 
1164 	r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
1165 	if (!r) {
1166 		pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
1167 		       (unsigned long long)erst_erange.base,
1168 		       (unsigned long long)erst_erange.base + erst_erange.size - 1);
1169 		rc = -EIO;
1170 		goto err_unmap_reg;
1171 	}
1172 	rc = -ENOMEM;
1173 	erst_erange.vaddr = ioremap_cache(erst_erange.base,
1174 					  erst_erange.size);
1175 	if (!erst_erange.vaddr)
1176 		goto err_release_erange;
1177 
1178 	pr_info(
1179 	"Error Record Serialization Table (ERST) support is initialized.\n");
1180 
1181 	buf = kmalloc(erst_erange.size, GFP_KERNEL);
1182 	spin_lock_init(&erst_info.buf_lock);
1183 	if (buf) {
1184 		erst_info.buf = buf + sizeof(struct cper_pstore_record);
1185 		erst_info.bufsize = erst_erange.size -
1186 				    sizeof(struct cper_pstore_record);
1187 		rc = pstore_register(&erst_info);
1188 		if (rc) {
1189 			if (rc != -EPERM)
1190 				pr_info(
1191 				"Could not register with persistent store.\n");
1192 			erst_info.buf = NULL;
1193 			erst_info.bufsize = 0;
1194 			kfree(buf);
1195 		}
1196 	} else
1197 		pr_err(
1198 		"Failed to allocate %lld bytes for persistent store error log.\n",
1199 		erst_erange.size);
1200 
1201 	return 0;
1202 
1203 err_release_erange:
1204 	release_mem_region(erst_erange.base, erst_erange.size);
1205 err_unmap_reg:
1206 	apei_exec_post_unmap_gars(&ctx);
1207 err_release:
1208 	apei_resources_release(&erst_resources);
1209 err_fini:
1210 	apei_resources_fini(&erst_resources);
1211 err:
1212 	erst_disable = 1;
1213 	return rc;
1214 }
1215 
1216 device_initcall(erst_init);
1217