1 /*
2  *  c 2001 PPC 64 Team, IBM Corp
3  *
4  *      This program is free software; you can redistribute it and/or
5  *      modify it under the terms of the GNU General Public License
6  *      as published by the Free Software Foundation; either version
7  *      2 of the License, or (at your option) any later version.
8  *
9  * /dev/nvram driver for PPC64
10  *
11  * This perhaps should live in drivers/char
12  */
13 
14 
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/init.h>
18 #include <linux/spinlock.h>
19 #include <linux/slab.h>
20 #include <linux/kmsg_dump.h>
21 #include <linux/ctype.h>
22 #include <linux/zlib.h>
23 #include <asm/uaccess.h>
24 #include <asm/nvram.h>
25 #include <asm/rtas.h>
26 #include <asm/prom.h>
27 #include <asm/machdep.h>
28 
29 /* Max bytes to read/write in one go */
30 #define NVRW_CNT 0x20
31 
32 static unsigned int nvram_size;
33 static int nvram_fetch, nvram_store;
34 static char nvram_buf[NVRW_CNT];	/* assume this is in the first 4GB */
35 static DEFINE_SPINLOCK(nvram_lock);
36 
37 struct err_log_info {
38 	int error_type;
39 	unsigned int seq_num;
40 };
41 
42 struct nvram_os_partition {
43 	const char *name;
44 	int req_size;	/* desired size, in bytes */
45 	int min_size;	/* minimum acceptable size (0 means req_size) */
46 	long size;	/* size of data portion (excluding err_log_info) */
47 	long index;	/* offset of data portion of partition */
48 };
49 
50 static struct nvram_os_partition rtas_log_partition = {
51 	.name = "ibm,rtas-log",
52 	.req_size = 2079,
53 	.min_size = 1055,
54 	.index = -1
55 };
56 
57 static struct nvram_os_partition oops_log_partition = {
58 	.name = "lnx,oops-log",
59 	.req_size = 4000,
60 	.min_size = 2000,
61 	.index = -1
62 };
63 
64 static const char *pseries_nvram_os_partitions[] = {
65 	"ibm,rtas-log",
66 	"lnx,oops-log",
67 	NULL
68 };
69 
70 static void oops_to_nvram(struct kmsg_dumper *dumper,
71 			  enum kmsg_dump_reason reason);
72 
73 static struct kmsg_dumper nvram_kmsg_dumper = {
74 	.dump = oops_to_nvram
75 };
76 
77 /* See clobbering_unread_rtas_event() */
78 #define NVRAM_RTAS_READ_TIMEOUT 5		/* seconds */
79 static unsigned long last_unread_rtas_event;	/* timestamp */
80 
81 /*
82  * For capturing and compressing an oops or panic report...
83 
84  * big_oops_buf[] holds the uncompressed text we're capturing.
85  *
86  * oops_buf[] holds the compressed text, preceded by a prefix.
87  * The prefix is just a u16 holding the length of the compressed* text.
88  * (*Or uncompressed, if compression fails.)  oops_buf[] gets written
89  * to NVRAM.
90  *
91  * oops_len points to the prefix.  oops_data points to the compressed text.
92  *
93  * +- oops_buf
94  * |		+- oops_data
95  * v		v
96  * +------------+-----------------------------------------------+
97  * | length	| text                                          |
98  * | (2 bytes)	| (oops_data_sz bytes)                          |
99  * +------------+-----------------------------------------------+
100  * ^
101  * +- oops_len
102  *
103  * We preallocate these buffers during init to avoid kmalloc during oops/panic.
104  */
105 static size_t big_oops_buf_sz;
106 static char *big_oops_buf, *oops_buf;
107 static u16 *oops_len;
108 static char *oops_data;
109 static size_t oops_data_sz;
110 
111 /* Compression parameters */
112 #define COMPR_LEVEL 6
113 #define WINDOW_BITS 12
114 #define MEM_LEVEL 4
115 static struct z_stream_s stream;
116 
117 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
118 {
119 	unsigned int i;
120 	unsigned long len;
121 	int done;
122 	unsigned long flags;
123 	char *p = buf;
124 
125 
126 	if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
127 		return -ENODEV;
128 
129 	if (*index >= nvram_size)
130 		return 0;
131 
132 	i = *index;
133 	if (i + count > nvram_size)
134 		count = nvram_size - i;
135 
136 	spin_lock_irqsave(&nvram_lock, flags);
137 
138 	for (; count != 0; count -= len) {
139 		len = count;
140 		if (len > NVRW_CNT)
141 			len = NVRW_CNT;
142 
143 		if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
144 			       len) != 0) || len != done) {
145 			spin_unlock_irqrestore(&nvram_lock, flags);
146 			return -EIO;
147 		}
148 
149 		memcpy(p, nvram_buf, len);
150 
151 		p += len;
152 		i += len;
153 	}
154 
155 	spin_unlock_irqrestore(&nvram_lock, flags);
156 
157 	*index = i;
158 	return p - buf;
159 }
160 
161 static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
162 {
163 	unsigned int i;
164 	unsigned long len;
165 	int done;
166 	unsigned long flags;
167 	const char *p = buf;
168 
169 	if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
170 		return -ENODEV;
171 
172 	if (*index >= nvram_size)
173 		return 0;
174 
175 	i = *index;
176 	if (i + count > nvram_size)
177 		count = nvram_size - i;
178 
179 	spin_lock_irqsave(&nvram_lock, flags);
180 
181 	for (; count != 0; count -= len) {
182 		len = count;
183 		if (len > NVRW_CNT)
184 			len = NVRW_CNT;
185 
186 		memcpy(nvram_buf, p, len);
187 
188 		if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
189 			       len) != 0) || len != done) {
190 			spin_unlock_irqrestore(&nvram_lock, flags);
191 			return -EIO;
192 		}
193 
194 		p += len;
195 		i += len;
196 	}
197 	spin_unlock_irqrestore(&nvram_lock, flags);
198 
199 	*index = i;
200 	return p - buf;
201 }
202 
203 static ssize_t pSeries_nvram_get_size(void)
204 {
205 	return nvram_size ? nvram_size : -ENODEV;
206 }
207 
208 
209 /* nvram_write_os_partition, nvram_write_error_log
210  *
211  * We need to buffer the error logs into nvram to ensure that we have
212  * the failure information to decode.  If we have a severe error there
213  * is no way to guarantee that the OS or the machine is in a state to
214  * get back to user land and write the error to disk.  For example if
215  * the SCSI device driver causes a Machine Check by writing to a bad
216  * IO address, there is no way of guaranteeing that the device driver
217  * is in any state that is would also be able to write the error data
218  * captured to disk, thus we buffer it in NVRAM for analysis on the
219  * next boot.
220  *
221  * In NVRAM the partition containing the error log buffer will looks like:
222  * Header (in bytes):
223  * +-----------+----------+--------+------------+------------------+
224  * | signature | checksum | length | name       | data             |
225  * |0          |1         |2      3|4         15|16        length-1|
226  * +-----------+----------+--------+------------+------------------+
227  *
228  * The 'data' section would look like (in bytes):
229  * +--------------+------------+-----------------------------------+
230  * | event_logged | sequence # | error log                         |
231  * |0            3|4          7|8                  error_log_size-1|
232  * +--------------+------------+-----------------------------------+
233  *
234  * event_logged: 0 if event has not been logged to syslog, 1 if it has
235  * sequence #: The unique sequence # for each event. (until it wraps)
236  * error log: The error log from event_scan
237  */
238 int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
239 		int length, unsigned int err_type, unsigned int error_log_cnt)
240 {
241 	int rc;
242 	loff_t tmp_index;
243 	struct err_log_info info;
244 
245 	if (part->index == -1) {
246 		return -ESPIPE;
247 	}
248 
249 	if (length > part->size) {
250 		length = part->size;
251 	}
252 
253 	info.error_type = err_type;
254 	info.seq_num = error_log_cnt;
255 
256 	tmp_index = part->index;
257 
258 	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
259 	if (rc <= 0) {
260 		pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
261 		return rc;
262 	}
263 
264 	rc = ppc_md.nvram_write(buff, length, &tmp_index);
265 	if (rc <= 0) {
266 		pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
267 		return rc;
268 	}
269 
270 	return 0;
271 }
272 
273 int nvram_write_error_log(char * buff, int length,
274                           unsigned int err_type, unsigned int error_log_cnt)
275 {
276 	int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
277 						err_type, error_log_cnt);
278 	if (!rc)
279 		last_unread_rtas_event = get_seconds();
280 	return rc;
281 }
282 
283 /* nvram_read_error_log
284  *
285  * Reads nvram for error log for at most 'length'
286  */
287 int nvram_read_error_log(char * buff, int length,
288                          unsigned int * err_type, unsigned int * error_log_cnt)
289 {
290 	int rc;
291 	loff_t tmp_index;
292 	struct err_log_info info;
293 
294 	if (rtas_log_partition.index == -1)
295 		return -1;
296 
297 	if (length > rtas_log_partition.size)
298 		length = rtas_log_partition.size;
299 
300 	tmp_index = rtas_log_partition.index;
301 
302 	rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
303 	if (rc <= 0) {
304 		printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
305 		return rc;
306 	}
307 
308 	rc = ppc_md.nvram_read(buff, length, &tmp_index);
309 	if (rc <= 0) {
310 		printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
311 		return rc;
312 	}
313 
314 	*error_log_cnt = info.seq_num;
315 	*err_type = info.error_type;
316 
317 	return 0;
318 }
319 
320 /* This doesn't actually zero anything, but it sets the event_logged
321  * word to tell that this event is safely in syslog.
322  */
323 int nvram_clear_error_log(void)
324 {
325 	loff_t tmp_index;
326 	int clear_word = ERR_FLAG_ALREADY_LOGGED;
327 	int rc;
328 
329 	if (rtas_log_partition.index == -1)
330 		return -1;
331 
332 	tmp_index = rtas_log_partition.index;
333 
334 	rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
335 	if (rc <= 0) {
336 		printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
337 		return rc;
338 	}
339 	last_unread_rtas_event = 0;
340 
341 	return 0;
342 }
343 
344 /* pseries_nvram_init_os_partition
345  *
346  * This sets up a partition with an "OS" signature.
347  *
348  * The general strategy is the following:
349  * 1.) If a partition with the indicated name already exists...
350  *	- If it's large enough, use it.
351  *	- Otherwise, recycle it and keep going.
352  * 2.) Search for a free partition that is large enough.
353  * 3.) If there's not a free partition large enough, recycle any obsolete
354  * OS partitions and try again.
355  * 4.) Will first try getting a chunk that will satisfy the requested size.
356  * 5.) If a chunk of the requested size cannot be allocated, then try finding
357  * a chunk that will satisfy the minum needed.
358  *
359  * Returns 0 on success, else -1.
360  */
361 static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
362 									*part)
363 {
364 	loff_t p;
365 	int size;
366 
367 	/* Scan nvram for partitions */
368 	nvram_scan_partitions();
369 
370 	/* Look for ours */
371 	p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
372 
373 	/* Found one but too small, remove it */
374 	if (p && size < part->min_size) {
375 		pr_info("nvram: Found too small %s partition,"
376 					" removing it...\n", part->name);
377 		nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
378 		p = 0;
379 	}
380 
381 	/* Create one if we didn't find */
382 	if (!p) {
383 		p = nvram_create_partition(part->name, NVRAM_SIG_OS,
384 					part->req_size, part->min_size);
385 		if (p == -ENOSPC) {
386 			pr_info("nvram: No room to create %s partition, "
387 				"deleting any obsolete OS partitions...\n",
388 				part->name);
389 			nvram_remove_partition(NULL, NVRAM_SIG_OS,
390 						pseries_nvram_os_partitions);
391 			p = nvram_create_partition(part->name, NVRAM_SIG_OS,
392 					part->req_size, part->min_size);
393 		}
394 	}
395 
396 	if (p <= 0) {
397 		pr_err("nvram: Failed to find or create %s"
398 		       " partition, err %d\n", part->name, (int)p);
399 		return -1;
400 	}
401 
402 	part->index = p;
403 	part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
404 
405 	return 0;
406 }
407 
408 static void __init nvram_init_oops_partition(int rtas_partition_exists)
409 {
410 	int rc;
411 
412 	rc = pseries_nvram_init_os_partition(&oops_log_partition);
413 	if (rc != 0) {
414 		if (!rtas_partition_exists)
415 			return;
416 		pr_notice("nvram: Using %s partition to log both"
417 			" RTAS errors and oops/panic reports\n",
418 			rtas_log_partition.name);
419 		memcpy(&oops_log_partition, &rtas_log_partition,
420 						sizeof(rtas_log_partition));
421 	}
422 	oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
423 	if (!oops_buf) {
424 		pr_err("nvram: No memory for %s partition\n",
425 						oops_log_partition.name);
426 		return;
427 	}
428 	oops_len = (u16*) oops_buf;
429 	oops_data = oops_buf + sizeof(u16);
430 	oops_data_sz = oops_log_partition.size - sizeof(u16);
431 
432 	/*
433 	 * Figure compression (preceded by elimination of each line's <n>
434 	 * severity prefix) will reduce the oops/panic report to at most
435 	 * 45% of its original size.
436 	 */
437 	big_oops_buf_sz = (oops_data_sz * 100) / 45;
438 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
439 	if (big_oops_buf) {
440 		stream.workspace = kmalloc(zlib_deflate_workspacesize(
441 				WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
442 		if (!stream.workspace) {
443 			pr_err("nvram: No memory for compression workspace; "
444 				"skipping compression of %s partition data\n",
445 				oops_log_partition.name);
446 			kfree(big_oops_buf);
447 			big_oops_buf = NULL;
448 		}
449 	} else {
450 		pr_err("No memory for uncompressed %s data; "
451 			"skipping compression\n", oops_log_partition.name);
452 		stream.workspace = NULL;
453 	}
454 
455 	rc = kmsg_dump_register(&nvram_kmsg_dumper);
456 	if (rc != 0) {
457 		pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
458 		kfree(oops_buf);
459 		kfree(big_oops_buf);
460 		kfree(stream.workspace);
461 	}
462 }
463 
464 static int __init pseries_nvram_init_log_partitions(void)
465 {
466 	int rc;
467 
468 	rc = pseries_nvram_init_os_partition(&rtas_log_partition);
469 	nvram_init_oops_partition(rc == 0);
470 	return 0;
471 }
472 machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
473 
474 int __init pSeries_nvram_init(void)
475 {
476 	struct device_node *nvram;
477 	const unsigned int *nbytes_p;
478 	unsigned int proplen;
479 
480 	nvram = of_find_node_by_type(NULL, "nvram");
481 	if (nvram == NULL)
482 		return -ENODEV;
483 
484 	nbytes_p = of_get_property(nvram, "#bytes", &proplen);
485 	if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
486 		of_node_put(nvram);
487 		return -EIO;
488 	}
489 
490 	nvram_size = *nbytes_p;
491 
492 	nvram_fetch = rtas_token("nvram-fetch");
493 	nvram_store = rtas_token("nvram-store");
494 	printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
495 	of_node_put(nvram);
496 
497 	ppc_md.nvram_read	= pSeries_nvram_read;
498 	ppc_md.nvram_write	= pSeries_nvram_write;
499 	ppc_md.nvram_size	= pSeries_nvram_get_size;
500 
501 	return 0;
502 }
503 
504 /*
505  * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
506  * would logging this oops/panic overwrite an RTAS event that rtas_errd
507  * hasn't had a chance to read and process?  Return 1 if so, else 0.
508  *
509  * We assume that if rtas_errd hasn't read the RTAS event in
510  * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
511  */
512 static int clobbering_unread_rtas_event(void)
513 {
514 	return (oops_log_partition.index == rtas_log_partition.index
515 		&& last_unread_rtas_event
516 		&& get_seconds() - last_unread_rtas_event <=
517 						NVRAM_RTAS_READ_TIMEOUT);
518 }
519 
520 /* Derived from logfs_compress() */
521 static int nvram_compress(const void *in, void *out, size_t inlen,
522 							size_t outlen)
523 {
524 	int err, ret;
525 
526 	ret = -EIO;
527 	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
528 						MEM_LEVEL, Z_DEFAULT_STRATEGY);
529 	if (err != Z_OK)
530 		goto error;
531 
532 	stream.next_in = in;
533 	stream.avail_in = inlen;
534 	stream.total_in = 0;
535 	stream.next_out = out;
536 	stream.avail_out = outlen;
537 	stream.total_out = 0;
538 
539 	err = zlib_deflate(&stream, Z_FINISH);
540 	if (err != Z_STREAM_END)
541 		goto error;
542 
543 	err = zlib_deflateEnd(&stream);
544 	if (err != Z_OK)
545 		goto error;
546 
547 	if (stream.total_out >= stream.total_in)
548 		goto error;
549 
550 	ret = stream.total_out;
551 error:
552 	return ret;
553 }
554 
555 /* Compress the text from big_oops_buf into oops_buf. */
556 static int zip_oops(size_t text_len)
557 {
558 	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
559 								oops_data_sz);
560 	if (zipped_len < 0) {
561 		pr_err("nvram: compression failed; returned %d\n", zipped_len);
562 		pr_err("nvram: logging uncompressed oops/panic report\n");
563 		return -1;
564 	}
565 	*oops_len = (u16) zipped_len;
566 	return 0;
567 }
568 
569 /*
570  * This is our kmsg_dump callback, called after an oops or panic report
571  * has been written to the printk buffer.  We want to capture as much
572  * of the printk buffer as possible.  First, capture as much as we can
573  * that we think will compress sufficiently to fit in the lnx,oops-log
574  * partition.  If that's too much, go back and capture uncompressed text.
575  */
576 static void oops_to_nvram(struct kmsg_dumper *dumper,
577 			  enum kmsg_dump_reason reason)
578 {
579 	static unsigned int oops_count = 0;
580 	static bool panicking = false;
581 	static DEFINE_SPINLOCK(lock);
582 	unsigned long flags;
583 	size_t text_len;
584 	unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
585 	int rc = -1;
586 
587 	switch (reason) {
588 	case KMSG_DUMP_RESTART:
589 	case KMSG_DUMP_HALT:
590 	case KMSG_DUMP_POWEROFF:
591 		/* These are almost always orderly shutdowns. */
592 		return;
593 	case KMSG_DUMP_OOPS:
594 		break;
595 	case KMSG_DUMP_PANIC:
596 		panicking = true;
597 		break;
598 	case KMSG_DUMP_EMERG:
599 		if (panicking)
600 			/* Panic report already captured. */
601 			return;
602 		break;
603 	default:
604 		pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
605 						__FUNCTION__, (int) reason);
606 		return;
607 	}
608 
609 	if (clobbering_unread_rtas_event())
610 		return;
611 
612 	if (!spin_trylock_irqsave(&lock, flags))
613 		return;
614 
615 	if (big_oops_buf) {
616 		kmsg_dump_get_buffer(dumper, false,
617 				     big_oops_buf, big_oops_buf_sz, &text_len);
618 		rc = zip_oops(text_len);
619 	}
620 	if (rc != 0) {
621 		kmsg_dump_rewind(dumper);
622 		kmsg_dump_get_buffer(dumper, true,
623 				     oops_data, oops_data_sz, &text_len);
624 		err_type = ERR_TYPE_KERNEL_PANIC;
625 		*oops_len = (u16) text_len;
626 	}
627 
628 	(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
629 		(int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count);
630 
631 	spin_unlock_irqrestore(&lock, flags);
632 }
633