xref: /openbmc/linux/fs/proc/vmcore.c (revision fe7498ef)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *	fs/proc/vmcore.c Interface for accessing the crash
4  * 				 dump from the system's previous life.
5  * 	Heavily borrowed from fs/proc/kcore.c
6  *	Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
7  *	Copyright (C) IBM Corporation, 2004. All rights reserved
8  *
9  */
10 
11 #include <linux/mm.h>
12 #include <linux/kcore.h>
13 #include <linux/user.h>
14 #include <linux/elf.h>
15 #include <linux/elfcore.h>
16 #include <linux/export.h>
17 #include <linux/slab.h>
18 #include <linux/highmem.h>
19 #include <linux/printk.h>
20 #include <linux/memblock.h>
21 #include <linux/init.h>
22 #include <linux/crash_dump.h>
23 #include <linux/list.h>
24 #include <linux/moduleparam.h>
25 #include <linux/mutex.h>
26 #include <linux/vmalloc.h>
27 #include <linux/pagemap.h>
28 #include <linux/uaccess.h>
29 #include <linux/cc_platform.h>
30 #include <asm/io.h>
31 #include "internal.h"
32 
33 /* List representing chunks of contiguous memory areas and their offsets in
34  * vmcore file.
35  */
36 static LIST_HEAD(vmcore_list);
37 
38 /* Stores the pointer to the buffer containing kernel elf core headers. */
39 static char *elfcorebuf;
40 static size_t elfcorebuf_sz;
41 static size_t elfcorebuf_sz_orig;
42 
43 static char *elfnotes_buf;
44 static size_t elfnotes_sz;
45 /* Size of all notes minus the device dump notes */
46 static size_t elfnotes_orig_sz;
47 
48 /* Total size of vmcore file. */
49 static u64 vmcore_size;
50 
51 static struct proc_dir_entry *proc_vmcore;
52 
53 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
54 /* Device Dump list and mutex to synchronize access to list */
55 static LIST_HEAD(vmcoredd_list);
56 static DEFINE_MUTEX(vmcoredd_mutex);
57 
58 static bool vmcoredd_disabled;
59 core_param(novmcoredd, vmcoredd_disabled, bool, 0);
60 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
61 
62 /* Device Dump Size */
63 static size_t vmcoredd_orig_sz;
64 
65 static DECLARE_RWSEM(vmcore_cb_rwsem);
66 /* List of registered vmcore callbacks. */
67 static LIST_HEAD(vmcore_cb_list);
68 /* Whether we had a surprise unregistration of a callback. */
69 static bool vmcore_cb_unstable;
70 /* Whether the vmcore has been opened once. */
71 static bool vmcore_opened;
72 
73 void register_vmcore_cb(struct vmcore_cb *cb)
74 {
75 	down_write(&vmcore_cb_rwsem);
76 	INIT_LIST_HEAD(&cb->next);
77 	list_add_tail(&cb->next, &vmcore_cb_list);
78 	/*
79 	 * Registering a vmcore callback after the vmcore was opened is
80 	 * very unusual (e.g., manual driver loading).
81 	 */
82 	if (vmcore_opened)
83 		pr_warn_once("Unexpected vmcore callback registration\n");
84 	up_write(&vmcore_cb_rwsem);
85 }
86 EXPORT_SYMBOL_GPL(register_vmcore_cb);
87 
88 void unregister_vmcore_cb(struct vmcore_cb *cb)
89 {
90 	down_write(&vmcore_cb_rwsem);
91 	list_del(&cb->next);
92 	/*
93 	 * Unregistering a vmcore callback after the vmcore was opened is
94 	 * very unusual (e.g., forced driver removal), but we cannot stop
95 	 * unregistering.
96 	 */
97 	if (vmcore_opened) {
98 		pr_warn_once("Unexpected vmcore callback unregistration\n");
99 		vmcore_cb_unstable = true;
100 	}
101 	up_write(&vmcore_cb_rwsem);
102 }
103 EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
104 
105 static bool pfn_is_ram(unsigned long pfn)
106 {
107 	struct vmcore_cb *cb;
108 	bool ret = true;
109 
110 	lockdep_assert_held_read(&vmcore_cb_rwsem);
111 	if (unlikely(vmcore_cb_unstable))
112 		return false;
113 
114 	list_for_each_entry(cb, &vmcore_cb_list, next) {
115 		if (unlikely(!cb->pfn_is_ram))
116 			continue;
117 		ret = cb->pfn_is_ram(cb, pfn);
118 		if (!ret)
119 			break;
120 	}
121 
122 	return ret;
123 }
124 
125 static int open_vmcore(struct inode *inode, struct file *file)
126 {
127 	down_read(&vmcore_cb_rwsem);
128 	vmcore_opened = true;
129 	up_read(&vmcore_cb_rwsem);
130 
131 	return 0;
132 }
133 
134 /* Reads a page from the oldmem device from given offset. */
135 ssize_t read_from_oldmem(char *buf, size_t count,
136 			 u64 *ppos, int userbuf,
137 			 bool encrypted)
138 {
139 	unsigned long pfn, offset;
140 	size_t nr_bytes;
141 	ssize_t read = 0, tmp;
142 
143 	if (!count)
144 		return 0;
145 
146 	offset = (unsigned long)(*ppos % PAGE_SIZE);
147 	pfn = (unsigned long)(*ppos / PAGE_SIZE);
148 
149 	down_read(&vmcore_cb_rwsem);
150 	do {
151 		if (count > (PAGE_SIZE - offset))
152 			nr_bytes = PAGE_SIZE - offset;
153 		else
154 			nr_bytes = count;
155 
156 		/* If pfn is not ram, return zeros for sparse dump files */
157 		if (!pfn_is_ram(pfn))
158 			memset(buf, 0, nr_bytes);
159 		else {
160 			if (encrypted)
161 				tmp = copy_oldmem_page_encrypted(pfn, buf,
162 								 nr_bytes,
163 								 offset,
164 								 userbuf);
165 			else
166 				tmp = copy_oldmem_page(pfn, buf, nr_bytes,
167 						       offset, userbuf);
168 
169 			if (tmp < 0) {
170 				up_read(&vmcore_cb_rwsem);
171 				return tmp;
172 			}
173 		}
174 		*ppos += nr_bytes;
175 		count -= nr_bytes;
176 		buf += nr_bytes;
177 		read += nr_bytes;
178 		++pfn;
179 		offset = 0;
180 	} while (count);
181 
182 	up_read(&vmcore_cb_rwsem);
183 	return read;
184 }
185 
186 /*
187  * Architectures may override this function to allocate ELF header in 2nd kernel
188  */
189 int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
190 {
191 	return 0;
192 }
193 
194 /*
195  * Architectures may override this function to free header
196  */
197 void __weak elfcorehdr_free(unsigned long long addr)
198 {}
199 
200 /*
201  * Architectures may override this function to read from ELF header
202  */
203 ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
204 {
205 	return read_from_oldmem(buf, count, ppos, 0, false);
206 }
207 
208 /*
209  * Architectures may override this function to read from notes sections
210  */
211 ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
212 {
213 	return read_from_oldmem(buf, count, ppos, 0, cc_platform_has(CC_ATTR_MEM_ENCRYPT));
214 }
215 
216 /*
217  * Architectures may override this function to map oldmem
218  */
219 int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
220 				  unsigned long from, unsigned long pfn,
221 				  unsigned long size, pgprot_t prot)
222 {
223 	prot = pgprot_encrypted(prot);
224 	return remap_pfn_range(vma, from, pfn, size, prot);
225 }
226 
227 /*
228  * Architectures which support memory encryption override this.
229  */
230 ssize_t __weak
231 copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
232 			   unsigned long offset, int userbuf)
233 {
234 	return copy_oldmem_page(pfn, buf, csize, offset, userbuf);
235 }
236 
237 /*
238  * Copy to either kernel or user space
239  */
240 static int copy_to(void *target, void *src, size_t size, int userbuf)
241 {
242 	if (userbuf) {
243 		if (copy_to_user((char __user *) target, src, size))
244 			return -EFAULT;
245 	} else {
246 		memcpy(target, src, size);
247 	}
248 	return 0;
249 }
250 
251 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
252 static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf)
253 {
254 	struct vmcoredd_node *dump;
255 	u64 offset = 0;
256 	int ret = 0;
257 	size_t tsz;
258 	char *buf;
259 
260 	mutex_lock(&vmcoredd_mutex);
261 	list_for_each_entry(dump, &vmcoredd_list, list) {
262 		if (start < offset + dump->size) {
263 			tsz = min(offset + (u64)dump->size - start, (u64)size);
264 			buf = dump->buf + start - offset;
265 			if (copy_to(dst, buf, tsz, userbuf)) {
266 				ret = -EFAULT;
267 				goto out_unlock;
268 			}
269 
270 			size -= tsz;
271 			start += tsz;
272 			dst += tsz;
273 
274 			/* Leave now if buffer filled already */
275 			if (!size)
276 				goto out_unlock;
277 		}
278 		offset += dump->size;
279 	}
280 
281 out_unlock:
282 	mutex_unlock(&vmcoredd_mutex);
283 	return ret;
284 }
285 
286 #ifdef CONFIG_MMU
287 static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
288 			       u64 start, size_t size)
289 {
290 	struct vmcoredd_node *dump;
291 	u64 offset = 0;
292 	int ret = 0;
293 	size_t tsz;
294 	char *buf;
295 
296 	mutex_lock(&vmcoredd_mutex);
297 	list_for_each_entry(dump, &vmcoredd_list, list) {
298 		if (start < offset + dump->size) {
299 			tsz = min(offset + (u64)dump->size - start, (u64)size);
300 			buf = dump->buf + start - offset;
301 			if (remap_vmalloc_range_partial(vma, dst, buf, 0,
302 							tsz)) {
303 				ret = -EFAULT;
304 				goto out_unlock;
305 			}
306 
307 			size -= tsz;
308 			start += tsz;
309 			dst += tsz;
310 
311 			/* Leave now if buffer filled already */
312 			if (!size)
313 				goto out_unlock;
314 		}
315 		offset += dump->size;
316 	}
317 
318 out_unlock:
319 	mutex_unlock(&vmcoredd_mutex);
320 	return ret;
321 }
322 #endif /* CONFIG_MMU */
323 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
324 
325 /* Read from the ELF header and then the crash dump. On error, negative value is
326  * returned otherwise number of bytes read are returned.
327  */
328 static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
329 			     int userbuf)
330 {
331 	ssize_t acc = 0, tmp;
332 	size_t tsz;
333 	u64 start;
334 	struct vmcore *m = NULL;
335 
336 	if (buflen == 0 || *fpos >= vmcore_size)
337 		return 0;
338 
339 	/* trim buflen to not go beyond EOF */
340 	if (buflen > vmcore_size - *fpos)
341 		buflen = vmcore_size - *fpos;
342 
343 	/* Read ELF core header */
344 	if (*fpos < elfcorebuf_sz) {
345 		tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
346 		if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf))
347 			return -EFAULT;
348 		buflen -= tsz;
349 		*fpos += tsz;
350 		buffer += tsz;
351 		acc += tsz;
352 
353 		/* leave now if filled buffer already */
354 		if (buflen == 0)
355 			return acc;
356 	}
357 
358 	/* Read Elf note segment */
359 	if (*fpos < elfcorebuf_sz + elfnotes_sz) {
360 		void *kaddr;
361 
362 		/* We add device dumps before other elf notes because the
363 		 * other elf notes may not fill the elf notes buffer
364 		 * completely and we will end up with zero-filled data
365 		 * between the elf notes and the device dumps. Tools will
366 		 * then try to decode this zero-filled data as valid notes
367 		 * and we don't want that. Hence, adding device dumps before
368 		 * the other elf notes ensure that zero-filled data can be
369 		 * avoided.
370 		 */
371 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
372 		/* Read device dumps */
373 		if (*fpos < elfcorebuf_sz + vmcoredd_orig_sz) {
374 			tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
375 				  (size_t)*fpos, buflen);
376 			start = *fpos - elfcorebuf_sz;
377 			if (vmcoredd_copy_dumps(buffer, start, tsz, userbuf))
378 				return -EFAULT;
379 
380 			buflen -= tsz;
381 			*fpos += tsz;
382 			buffer += tsz;
383 			acc += tsz;
384 
385 			/* leave now if filled buffer already */
386 			if (!buflen)
387 				return acc;
388 		}
389 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
390 
391 		/* Read remaining elf notes */
392 		tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
393 		kaddr = elfnotes_buf + *fpos - elfcorebuf_sz - vmcoredd_orig_sz;
394 		if (copy_to(buffer, kaddr, tsz, userbuf))
395 			return -EFAULT;
396 
397 		buflen -= tsz;
398 		*fpos += tsz;
399 		buffer += tsz;
400 		acc += tsz;
401 
402 		/* leave now if filled buffer already */
403 		if (buflen == 0)
404 			return acc;
405 	}
406 
407 	list_for_each_entry(m, &vmcore_list, list) {
408 		if (*fpos < m->offset + m->size) {
409 			tsz = (size_t)min_t(unsigned long long,
410 					    m->offset + m->size - *fpos,
411 					    buflen);
412 			start = m->paddr + *fpos - m->offset;
413 			tmp = read_from_oldmem(buffer, tsz, &start,
414 					       userbuf, cc_platform_has(CC_ATTR_MEM_ENCRYPT));
415 			if (tmp < 0)
416 				return tmp;
417 			buflen -= tsz;
418 			*fpos += tsz;
419 			buffer += tsz;
420 			acc += tsz;
421 
422 			/* leave now if filled buffer already */
423 			if (buflen == 0)
424 				return acc;
425 		}
426 	}
427 
428 	return acc;
429 }
430 
431 static ssize_t read_vmcore(struct file *file, char __user *buffer,
432 			   size_t buflen, loff_t *fpos)
433 {
434 	return __read_vmcore((__force char *) buffer, buflen, fpos, 1);
435 }
436 
437 /*
438  * The vmcore fault handler uses the page cache and fills data using the
439  * standard __vmcore_read() function.
440  *
441  * On s390 the fault handler is used for memory regions that can't be mapped
442  * directly with remap_pfn_range().
443  */
444 static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
445 {
446 #ifdef CONFIG_S390
447 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
448 	pgoff_t index = vmf->pgoff;
449 	struct page *page;
450 	loff_t offset;
451 	char *buf;
452 	int rc;
453 
454 	page = find_or_create_page(mapping, index, GFP_KERNEL);
455 	if (!page)
456 		return VM_FAULT_OOM;
457 	if (!PageUptodate(page)) {
458 		offset = (loff_t) index << PAGE_SHIFT;
459 		buf = __va((page_to_pfn(page) << PAGE_SHIFT));
460 		rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
461 		if (rc < 0) {
462 			unlock_page(page);
463 			put_page(page);
464 			return vmf_error(rc);
465 		}
466 		SetPageUptodate(page);
467 	}
468 	unlock_page(page);
469 	vmf->page = page;
470 	return 0;
471 #else
472 	return VM_FAULT_SIGBUS;
473 #endif
474 }
475 
476 static const struct vm_operations_struct vmcore_mmap_ops = {
477 	.fault = mmap_vmcore_fault,
478 };
479 
480 /**
481  * vmcore_alloc_buf - allocate buffer in vmalloc memory
482  * @sizez: size of buffer
483  *
484  * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
485  * the buffer to user-space by means of remap_vmalloc_range().
486  *
487  * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is
488  * disabled and there's no need to allow users to mmap the buffer.
489  */
490 static inline char *vmcore_alloc_buf(size_t size)
491 {
492 #ifdef CONFIG_MMU
493 	return vmalloc_user(size);
494 #else
495 	return vzalloc(size);
496 #endif
497 }
498 
499 /*
500  * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is
501  * essential for mmap_vmcore() in order to map physically
502  * non-contiguous objects (ELF header, ELF note segment and memory
503  * regions in the 1st kernel pointed to by PT_LOAD entries) into
504  * virtually contiguous user-space in ELF layout.
505  */
506 #ifdef CONFIG_MMU
507 /*
508  * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
509  * reported as not being ram with the zero page.
510  *
511  * @vma: vm_area_struct describing requested mapping
512  * @from: start remapping from
513  * @pfn: page frame number to start remapping to
514  * @size: remapping size
515  * @prot: protection bits
516  *
517  * Returns zero on success, -EAGAIN on failure.
518  */
519 static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
520 				    unsigned long from, unsigned long pfn,
521 				    unsigned long size, pgprot_t prot)
522 {
523 	unsigned long map_size;
524 	unsigned long pos_start, pos_end, pos;
525 	unsigned long zeropage_pfn = my_zero_pfn(0);
526 	size_t len = 0;
527 
528 	pos_start = pfn;
529 	pos_end = pfn + (size >> PAGE_SHIFT);
530 
531 	for (pos = pos_start; pos < pos_end; ++pos) {
532 		if (!pfn_is_ram(pos)) {
533 			/*
534 			 * We hit a page which is not ram. Remap the continuous
535 			 * region between pos_start and pos-1 and replace
536 			 * the non-ram page at pos with the zero page.
537 			 */
538 			if (pos > pos_start) {
539 				/* Remap continuous region */
540 				map_size = (pos - pos_start) << PAGE_SHIFT;
541 				if (remap_oldmem_pfn_range(vma, from + len,
542 							   pos_start, map_size,
543 							   prot))
544 					goto fail;
545 				len += map_size;
546 			}
547 			/* Remap the zero page */
548 			if (remap_oldmem_pfn_range(vma, from + len,
549 						   zeropage_pfn,
550 						   PAGE_SIZE, prot))
551 				goto fail;
552 			len += PAGE_SIZE;
553 			pos_start = pos + 1;
554 		}
555 	}
556 	if (pos > pos_start) {
557 		/* Remap the rest */
558 		map_size = (pos - pos_start) << PAGE_SHIFT;
559 		if (remap_oldmem_pfn_range(vma, from + len, pos_start,
560 					   map_size, prot))
561 			goto fail;
562 	}
563 	return 0;
564 fail:
565 	do_munmap(vma->vm_mm, from, len, NULL);
566 	return -EAGAIN;
567 }
568 
569 static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
570 			    unsigned long from, unsigned long pfn,
571 			    unsigned long size, pgprot_t prot)
572 {
573 	int ret;
574 
575 	/*
576 	 * Check if oldmem_pfn_is_ram was registered to avoid
577 	 * looping over all pages without a reason.
578 	 */
579 	down_read(&vmcore_cb_rwsem);
580 	if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
581 		ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
582 	else
583 		ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
584 	up_read(&vmcore_cb_rwsem);
585 	return ret;
586 }
587 
588 static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
589 {
590 	size_t size = vma->vm_end - vma->vm_start;
591 	u64 start, end, len, tsz;
592 	struct vmcore *m;
593 
594 	start = (u64)vma->vm_pgoff << PAGE_SHIFT;
595 	end = start + size;
596 
597 	if (size > vmcore_size || end > vmcore_size)
598 		return -EINVAL;
599 
600 	if (vma->vm_flags & (VM_WRITE | VM_EXEC))
601 		return -EPERM;
602 
603 	vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
604 	vma->vm_flags |= VM_MIXEDMAP;
605 	vma->vm_ops = &vmcore_mmap_ops;
606 
607 	len = 0;
608 
609 	if (start < elfcorebuf_sz) {
610 		u64 pfn;
611 
612 		tsz = min(elfcorebuf_sz - (size_t)start, size);
613 		pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT;
614 		if (remap_pfn_range(vma, vma->vm_start, pfn, tsz,
615 				    vma->vm_page_prot))
616 			return -EAGAIN;
617 		size -= tsz;
618 		start += tsz;
619 		len += tsz;
620 
621 		if (size == 0)
622 			return 0;
623 	}
624 
625 	if (start < elfcorebuf_sz + elfnotes_sz) {
626 		void *kaddr;
627 
628 		/* We add device dumps before other elf notes because the
629 		 * other elf notes may not fill the elf notes buffer
630 		 * completely and we will end up with zero-filled data
631 		 * between the elf notes and the device dumps. Tools will
632 		 * then try to decode this zero-filled data as valid notes
633 		 * and we don't want that. Hence, adding device dumps before
634 		 * the other elf notes ensure that zero-filled data can be
635 		 * avoided. This also ensures that the device dumps and
636 		 * other elf notes can be properly mmaped at page aligned
637 		 * address.
638 		 */
639 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
640 		/* Read device dumps */
641 		if (start < elfcorebuf_sz + vmcoredd_orig_sz) {
642 			u64 start_off;
643 
644 			tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
645 				  (size_t)start, size);
646 			start_off = start - elfcorebuf_sz;
647 			if (vmcoredd_mmap_dumps(vma, vma->vm_start + len,
648 						start_off, tsz))
649 				goto fail;
650 
651 			size -= tsz;
652 			start += tsz;
653 			len += tsz;
654 
655 			/* leave now if filled buffer already */
656 			if (!size)
657 				return 0;
658 		}
659 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
660 
661 		/* Read remaining elf notes */
662 		tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
663 		kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz;
664 		if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
665 						kaddr, 0, tsz))
666 			goto fail;
667 
668 		size -= tsz;
669 		start += tsz;
670 		len += tsz;
671 
672 		if (size == 0)
673 			return 0;
674 	}
675 
676 	list_for_each_entry(m, &vmcore_list, list) {
677 		if (start < m->offset + m->size) {
678 			u64 paddr = 0;
679 
680 			tsz = (size_t)min_t(unsigned long long,
681 					    m->offset + m->size - start, size);
682 			paddr = m->paddr + start - m->offset;
683 			if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
684 						    paddr >> PAGE_SHIFT, tsz,
685 						    vma->vm_page_prot))
686 				goto fail;
687 			size -= tsz;
688 			start += tsz;
689 			len += tsz;
690 
691 			if (size == 0)
692 				return 0;
693 		}
694 	}
695 
696 	return 0;
697 fail:
698 	do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
699 	return -EAGAIN;
700 }
701 #else
702 static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
703 {
704 	return -ENOSYS;
705 }
706 #endif
707 
708 static const struct proc_ops vmcore_proc_ops = {
709 	.proc_open	= open_vmcore,
710 	.proc_read	= read_vmcore,
711 	.proc_lseek	= default_llseek,
712 	.proc_mmap	= mmap_vmcore,
713 };
714 
715 static struct vmcore* __init get_new_element(void)
716 {
717 	return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
718 }
719 
720 static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
721 			   struct list_head *vc_list)
722 {
723 	u64 size;
724 	struct vmcore *m;
725 
726 	size = elfsz + elfnotesegsz;
727 	list_for_each_entry(m, vc_list, list) {
728 		size += m->size;
729 	}
730 	return size;
731 }
732 
733 /**
734  * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry
735  *
736  * @ehdr_ptr: ELF header
737  *
738  * This function updates p_memsz member of each PT_NOTE entry in the
739  * program header table pointed to by @ehdr_ptr to real size of ELF
740  * note segment.
741  */
742 static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
743 {
744 	int i, rc=0;
745 	Elf64_Phdr *phdr_ptr;
746 	Elf64_Nhdr *nhdr_ptr;
747 
748 	phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
749 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
750 		void *notes_section;
751 		u64 offset, max_sz, sz, real_sz = 0;
752 		if (phdr_ptr->p_type != PT_NOTE)
753 			continue;
754 		max_sz = phdr_ptr->p_memsz;
755 		offset = phdr_ptr->p_offset;
756 		notes_section = kmalloc(max_sz, GFP_KERNEL);
757 		if (!notes_section)
758 			return -ENOMEM;
759 		rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
760 		if (rc < 0) {
761 			kfree(notes_section);
762 			return rc;
763 		}
764 		nhdr_ptr = notes_section;
765 		while (nhdr_ptr->n_namesz != 0) {
766 			sz = sizeof(Elf64_Nhdr) +
767 				(((u64)nhdr_ptr->n_namesz + 3) & ~3) +
768 				(((u64)nhdr_ptr->n_descsz + 3) & ~3);
769 			if ((real_sz + sz) > max_sz) {
770 				pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
771 					nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
772 				break;
773 			}
774 			real_sz += sz;
775 			nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
776 		}
777 		kfree(notes_section);
778 		phdr_ptr->p_memsz = real_sz;
779 		if (real_sz == 0) {
780 			pr_warn("Warning: Zero PT_NOTE entries found\n");
781 		}
782 	}
783 
784 	return 0;
785 }
786 
787 /**
788  * get_note_number_and_size_elf64 - get the number of PT_NOTE program
789  * headers and sum of real size of their ELF note segment headers and
790  * data.
791  *
792  * @ehdr_ptr: ELF header
793  * @nr_ptnote: buffer for the number of PT_NOTE program headers
794  * @sz_ptnote: buffer for size of unique PT_NOTE program header
795  *
796  * This function is used to merge multiple PT_NOTE program headers
797  * into a unique single one. The resulting unique entry will have
798  * @sz_ptnote in its phdr->p_mem.
799  *
800  * It is assumed that program headers with PT_NOTE type pointed to by
801  * @ehdr_ptr has already been updated by update_note_header_size_elf64
802  * and each of PT_NOTE program headers has actual ELF note segment
803  * size in its p_memsz member.
804  */
805 static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr,
806 						 int *nr_ptnote, u64 *sz_ptnote)
807 {
808 	int i;
809 	Elf64_Phdr *phdr_ptr;
810 
811 	*nr_ptnote = *sz_ptnote = 0;
812 
813 	phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
814 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
815 		if (phdr_ptr->p_type != PT_NOTE)
816 			continue;
817 		*nr_ptnote += 1;
818 		*sz_ptnote += phdr_ptr->p_memsz;
819 	}
820 
821 	return 0;
822 }
823 
824 /**
825  * copy_notes_elf64 - copy ELF note segments in a given buffer
826  *
827  * @ehdr_ptr: ELF header
828  * @notes_buf: buffer into which ELF note segments are copied
829  *
830  * This function is used to copy ELF note segment in the 1st kernel
831  * into the buffer @notes_buf in the 2nd kernel. It is assumed that
832  * size of the buffer @notes_buf is equal to or larger than sum of the
833  * real ELF note segment headers and data.
834  *
835  * It is assumed that program headers with PT_NOTE type pointed to by
836  * @ehdr_ptr has already been updated by update_note_header_size_elf64
837  * and each of PT_NOTE program headers has actual ELF note segment
838  * size in its p_memsz member.
839  */
840 static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
841 {
842 	int i, rc=0;
843 	Elf64_Phdr *phdr_ptr;
844 
845 	phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1);
846 
847 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
848 		u64 offset;
849 		if (phdr_ptr->p_type != PT_NOTE)
850 			continue;
851 		offset = phdr_ptr->p_offset;
852 		rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
853 					   &offset);
854 		if (rc < 0)
855 			return rc;
856 		notes_buf += phdr_ptr->p_memsz;
857 	}
858 
859 	return 0;
860 }
861 
862 /* Merges all the PT_NOTE headers into one. */
863 static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
864 					   char **notes_buf, size_t *notes_sz)
865 {
866 	int i, nr_ptnote=0, rc=0;
867 	char *tmp;
868 	Elf64_Ehdr *ehdr_ptr;
869 	Elf64_Phdr phdr;
870 	u64 phdr_sz = 0, note_off;
871 
872 	ehdr_ptr = (Elf64_Ehdr *)elfptr;
873 
874 	rc = update_note_header_size_elf64(ehdr_ptr);
875 	if (rc < 0)
876 		return rc;
877 
878 	rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz);
879 	if (rc < 0)
880 		return rc;
881 
882 	*notes_sz = roundup(phdr_sz, PAGE_SIZE);
883 	*notes_buf = vmcore_alloc_buf(*notes_sz);
884 	if (!*notes_buf)
885 		return -ENOMEM;
886 
887 	rc = copy_notes_elf64(ehdr_ptr, *notes_buf);
888 	if (rc < 0)
889 		return rc;
890 
891 	/* Prepare merged PT_NOTE program header. */
892 	phdr.p_type    = PT_NOTE;
893 	phdr.p_flags   = 0;
894 	note_off = sizeof(Elf64_Ehdr) +
895 			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
896 	phdr.p_offset  = roundup(note_off, PAGE_SIZE);
897 	phdr.p_vaddr   = phdr.p_paddr = 0;
898 	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
899 	phdr.p_align   = 0;
900 
901 	/* Add merged PT_NOTE program header*/
902 	tmp = elfptr + sizeof(Elf64_Ehdr);
903 	memcpy(tmp, &phdr, sizeof(phdr));
904 	tmp += sizeof(phdr);
905 
906 	/* Remove unwanted PT_NOTE program headers. */
907 	i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
908 	*elfsz = *elfsz - i;
909 	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
910 	memset(elfptr + *elfsz, 0, i);
911 	*elfsz = roundup(*elfsz, PAGE_SIZE);
912 
913 	/* Modify e_phnum to reflect merged headers. */
914 	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
915 
916 	/* Store the size of all notes.  We need this to update the note
917 	 * header when the device dumps will be added.
918 	 */
919 	elfnotes_orig_sz = phdr.p_memsz;
920 
921 	return 0;
922 }
923 
924 /**
925  * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry
926  *
927  * @ehdr_ptr: ELF header
928  *
929  * This function updates p_memsz member of each PT_NOTE entry in the
930  * program header table pointed to by @ehdr_ptr to real size of ELF
931  * note segment.
932  */
933 static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
934 {
935 	int i, rc=0;
936 	Elf32_Phdr *phdr_ptr;
937 	Elf32_Nhdr *nhdr_ptr;
938 
939 	phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
940 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
941 		void *notes_section;
942 		u64 offset, max_sz, sz, real_sz = 0;
943 		if (phdr_ptr->p_type != PT_NOTE)
944 			continue;
945 		max_sz = phdr_ptr->p_memsz;
946 		offset = phdr_ptr->p_offset;
947 		notes_section = kmalloc(max_sz, GFP_KERNEL);
948 		if (!notes_section)
949 			return -ENOMEM;
950 		rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
951 		if (rc < 0) {
952 			kfree(notes_section);
953 			return rc;
954 		}
955 		nhdr_ptr = notes_section;
956 		while (nhdr_ptr->n_namesz != 0) {
957 			sz = sizeof(Elf32_Nhdr) +
958 				(((u64)nhdr_ptr->n_namesz + 3) & ~3) +
959 				(((u64)nhdr_ptr->n_descsz + 3) & ~3);
960 			if ((real_sz + sz) > max_sz) {
961 				pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
962 					nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
963 				break;
964 			}
965 			real_sz += sz;
966 			nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
967 		}
968 		kfree(notes_section);
969 		phdr_ptr->p_memsz = real_sz;
970 		if (real_sz == 0) {
971 			pr_warn("Warning: Zero PT_NOTE entries found\n");
972 		}
973 	}
974 
975 	return 0;
976 }
977 
978 /**
979  * get_note_number_and_size_elf32 - get the number of PT_NOTE program
980  * headers and sum of real size of their ELF note segment headers and
981  * data.
982  *
983  * @ehdr_ptr: ELF header
984  * @nr_ptnote: buffer for the number of PT_NOTE program headers
985  * @sz_ptnote: buffer for size of unique PT_NOTE program header
986  *
987  * This function is used to merge multiple PT_NOTE program headers
988  * into a unique single one. The resulting unique entry will have
989  * @sz_ptnote in its phdr->p_mem.
990  *
991  * It is assumed that program headers with PT_NOTE type pointed to by
992  * @ehdr_ptr has already been updated by update_note_header_size_elf32
993  * and each of PT_NOTE program headers has actual ELF note segment
994  * size in its p_memsz member.
995  */
996 static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr,
997 						 int *nr_ptnote, u64 *sz_ptnote)
998 {
999 	int i;
1000 	Elf32_Phdr *phdr_ptr;
1001 
1002 	*nr_ptnote = *sz_ptnote = 0;
1003 
1004 	phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
1005 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
1006 		if (phdr_ptr->p_type != PT_NOTE)
1007 			continue;
1008 		*nr_ptnote += 1;
1009 		*sz_ptnote += phdr_ptr->p_memsz;
1010 	}
1011 
1012 	return 0;
1013 }
1014 
1015 /**
1016  * copy_notes_elf32 - copy ELF note segments in a given buffer
1017  *
1018  * @ehdr_ptr: ELF header
1019  * @notes_buf: buffer into which ELF note segments are copied
1020  *
1021  * This function is used to copy ELF note segment in the 1st kernel
1022  * into the buffer @notes_buf in the 2nd kernel. It is assumed that
1023  * size of the buffer @notes_buf is equal to or larger than sum of the
1024  * real ELF note segment headers and data.
1025  *
1026  * It is assumed that program headers with PT_NOTE type pointed to by
1027  * @ehdr_ptr has already been updated by update_note_header_size_elf32
1028  * and each of PT_NOTE program headers has actual ELF note segment
1029  * size in its p_memsz member.
1030  */
1031 static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
1032 {
1033 	int i, rc=0;
1034 	Elf32_Phdr *phdr_ptr;
1035 
1036 	phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1);
1037 
1038 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
1039 		u64 offset;
1040 		if (phdr_ptr->p_type != PT_NOTE)
1041 			continue;
1042 		offset = phdr_ptr->p_offset;
1043 		rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
1044 					   &offset);
1045 		if (rc < 0)
1046 			return rc;
1047 		notes_buf += phdr_ptr->p_memsz;
1048 	}
1049 
1050 	return 0;
1051 }
1052 
1053 /* Merges all the PT_NOTE headers into one. */
1054 static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
1055 					   char **notes_buf, size_t *notes_sz)
1056 {
1057 	int i, nr_ptnote=0, rc=0;
1058 	char *tmp;
1059 	Elf32_Ehdr *ehdr_ptr;
1060 	Elf32_Phdr phdr;
1061 	u64 phdr_sz = 0, note_off;
1062 
1063 	ehdr_ptr = (Elf32_Ehdr *)elfptr;
1064 
1065 	rc = update_note_header_size_elf32(ehdr_ptr);
1066 	if (rc < 0)
1067 		return rc;
1068 
1069 	rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz);
1070 	if (rc < 0)
1071 		return rc;
1072 
1073 	*notes_sz = roundup(phdr_sz, PAGE_SIZE);
1074 	*notes_buf = vmcore_alloc_buf(*notes_sz);
1075 	if (!*notes_buf)
1076 		return -ENOMEM;
1077 
1078 	rc = copy_notes_elf32(ehdr_ptr, *notes_buf);
1079 	if (rc < 0)
1080 		return rc;
1081 
1082 	/* Prepare merged PT_NOTE program header. */
1083 	phdr.p_type    = PT_NOTE;
1084 	phdr.p_flags   = 0;
1085 	note_off = sizeof(Elf32_Ehdr) +
1086 			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
1087 	phdr.p_offset  = roundup(note_off, PAGE_SIZE);
1088 	phdr.p_vaddr   = phdr.p_paddr = 0;
1089 	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
1090 	phdr.p_align   = 0;
1091 
1092 	/* Add merged PT_NOTE program header*/
1093 	tmp = elfptr + sizeof(Elf32_Ehdr);
1094 	memcpy(tmp, &phdr, sizeof(phdr));
1095 	tmp += sizeof(phdr);
1096 
1097 	/* Remove unwanted PT_NOTE program headers. */
1098 	i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
1099 	*elfsz = *elfsz - i;
1100 	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
1101 	memset(elfptr + *elfsz, 0, i);
1102 	*elfsz = roundup(*elfsz, PAGE_SIZE);
1103 
1104 	/* Modify e_phnum to reflect merged headers. */
1105 	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
1106 
1107 	/* Store the size of all notes.  We need this to update the note
1108 	 * header when the device dumps will be added.
1109 	 */
1110 	elfnotes_orig_sz = phdr.p_memsz;
1111 
1112 	return 0;
1113 }
1114 
1115 /* Add memory chunks represented by program headers to vmcore list. Also update
1116  * the new offset fields of exported program headers. */
1117 static int __init process_ptload_program_headers_elf64(char *elfptr,
1118 						size_t elfsz,
1119 						size_t elfnotes_sz,
1120 						struct list_head *vc_list)
1121 {
1122 	int i;
1123 	Elf64_Ehdr *ehdr_ptr;
1124 	Elf64_Phdr *phdr_ptr;
1125 	loff_t vmcore_off;
1126 	struct vmcore *new;
1127 
1128 	ehdr_ptr = (Elf64_Ehdr *)elfptr;
1129 	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
1130 
1131 	/* Skip Elf header, program headers and Elf note segment. */
1132 	vmcore_off = elfsz + elfnotes_sz;
1133 
1134 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
1135 		u64 paddr, start, end, size;
1136 
1137 		if (phdr_ptr->p_type != PT_LOAD)
1138 			continue;
1139 
1140 		paddr = phdr_ptr->p_offset;
1141 		start = rounddown(paddr, PAGE_SIZE);
1142 		end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
1143 		size = end - start;
1144 
1145 		/* Add this contiguous chunk of memory to vmcore list.*/
1146 		new = get_new_element();
1147 		if (!new)
1148 			return -ENOMEM;
1149 		new->paddr = start;
1150 		new->size = size;
1151 		list_add_tail(&new->list, vc_list);
1152 
1153 		/* Update the program header offset. */
1154 		phdr_ptr->p_offset = vmcore_off + (paddr - start);
1155 		vmcore_off = vmcore_off + size;
1156 	}
1157 	return 0;
1158 }
1159 
1160 static int __init process_ptload_program_headers_elf32(char *elfptr,
1161 						size_t elfsz,
1162 						size_t elfnotes_sz,
1163 						struct list_head *vc_list)
1164 {
1165 	int i;
1166 	Elf32_Ehdr *ehdr_ptr;
1167 	Elf32_Phdr *phdr_ptr;
1168 	loff_t vmcore_off;
1169 	struct vmcore *new;
1170 
1171 	ehdr_ptr = (Elf32_Ehdr *)elfptr;
1172 	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
1173 
1174 	/* Skip Elf header, program headers and Elf note segment. */
1175 	vmcore_off = elfsz + elfnotes_sz;
1176 
1177 	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
1178 		u64 paddr, start, end, size;
1179 
1180 		if (phdr_ptr->p_type != PT_LOAD)
1181 			continue;
1182 
1183 		paddr = phdr_ptr->p_offset;
1184 		start = rounddown(paddr, PAGE_SIZE);
1185 		end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
1186 		size = end - start;
1187 
1188 		/* Add this contiguous chunk of memory to vmcore list.*/
1189 		new = get_new_element();
1190 		if (!new)
1191 			return -ENOMEM;
1192 		new->paddr = start;
1193 		new->size = size;
1194 		list_add_tail(&new->list, vc_list);
1195 
1196 		/* Update the program header offset */
1197 		phdr_ptr->p_offset = vmcore_off + (paddr - start);
1198 		vmcore_off = vmcore_off + size;
1199 	}
1200 	return 0;
1201 }
1202 
1203 /* Sets offset fields of vmcore elements. */
1204 static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
1205 				    struct list_head *vc_list)
1206 {
1207 	loff_t vmcore_off;
1208 	struct vmcore *m;
1209 
1210 	/* Skip Elf header, program headers and Elf note segment. */
1211 	vmcore_off = elfsz + elfnotes_sz;
1212 
1213 	list_for_each_entry(m, vc_list, list) {
1214 		m->offset = vmcore_off;
1215 		vmcore_off += m->size;
1216 	}
1217 }
1218 
1219 static void free_elfcorebuf(void)
1220 {
1221 	free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig));
1222 	elfcorebuf = NULL;
1223 	vfree(elfnotes_buf);
1224 	elfnotes_buf = NULL;
1225 }
1226 
1227 static int __init parse_crash_elf64_headers(void)
1228 {
1229 	int rc=0;
1230 	Elf64_Ehdr ehdr;
1231 	u64 addr;
1232 
1233 	addr = elfcorehdr_addr;
1234 
1235 	/* Read Elf header */
1236 	rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr);
1237 	if (rc < 0)
1238 		return rc;
1239 
1240 	/* Do some basic Verification. */
1241 	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
1242 		(ehdr.e_type != ET_CORE) ||
1243 		!vmcore_elf64_check_arch(&ehdr) ||
1244 		ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
1245 		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
1246 		ehdr.e_version != EV_CURRENT ||
1247 		ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
1248 		ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
1249 		ehdr.e_phnum == 0) {
1250 		pr_warn("Warning: Core image elf header is not sane\n");
1251 		return -EINVAL;
1252 	}
1253 
1254 	/* Read in all elf headers. */
1255 	elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) +
1256 				ehdr.e_phnum * sizeof(Elf64_Phdr);
1257 	elfcorebuf_sz = elfcorebuf_sz_orig;
1258 	elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1259 					      get_order(elfcorebuf_sz_orig));
1260 	if (!elfcorebuf)
1261 		return -ENOMEM;
1262 	addr = elfcorehdr_addr;
1263 	rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
1264 	if (rc < 0)
1265 		goto fail;
1266 
1267 	/* Merge all PT_NOTE headers into one. */
1268 	rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz,
1269 				      &elfnotes_buf, &elfnotes_sz);
1270 	if (rc)
1271 		goto fail;
1272 	rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
1273 						  elfnotes_sz, &vmcore_list);
1274 	if (rc)
1275 		goto fail;
1276 	set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
1277 	return 0;
1278 fail:
1279 	free_elfcorebuf();
1280 	return rc;
1281 }
1282 
1283 static int __init parse_crash_elf32_headers(void)
1284 {
1285 	int rc=0;
1286 	Elf32_Ehdr ehdr;
1287 	u64 addr;
1288 
1289 	addr = elfcorehdr_addr;
1290 
1291 	/* Read Elf header */
1292 	rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr);
1293 	if (rc < 0)
1294 		return rc;
1295 
1296 	/* Do some basic Verification. */
1297 	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
1298 		(ehdr.e_type != ET_CORE) ||
1299 		!vmcore_elf32_check_arch(&ehdr) ||
1300 		ehdr.e_ident[EI_CLASS] != ELFCLASS32||
1301 		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
1302 		ehdr.e_version != EV_CURRENT ||
1303 		ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
1304 		ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
1305 		ehdr.e_phnum == 0) {
1306 		pr_warn("Warning: Core image elf header is not sane\n");
1307 		return -EINVAL;
1308 	}
1309 
1310 	/* Read in all elf headers. */
1311 	elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
1312 	elfcorebuf_sz = elfcorebuf_sz_orig;
1313 	elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1314 					      get_order(elfcorebuf_sz_orig));
1315 	if (!elfcorebuf)
1316 		return -ENOMEM;
1317 	addr = elfcorehdr_addr;
1318 	rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
1319 	if (rc < 0)
1320 		goto fail;
1321 
1322 	/* Merge all PT_NOTE headers into one. */
1323 	rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz,
1324 				      &elfnotes_buf, &elfnotes_sz);
1325 	if (rc)
1326 		goto fail;
1327 	rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
1328 						  elfnotes_sz, &vmcore_list);
1329 	if (rc)
1330 		goto fail;
1331 	set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
1332 	return 0;
1333 fail:
1334 	free_elfcorebuf();
1335 	return rc;
1336 }
1337 
1338 static int __init parse_crash_elf_headers(void)
1339 {
1340 	unsigned char e_ident[EI_NIDENT];
1341 	u64 addr;
1342 	int rc=0;
1343 
1344 	addr = elfcorehdr_addr;
1345 	rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr);
1346 	if (rc < 0)
1347 		return rc;
1348 	if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
1349 		pr_warn("Warning: Core image elf header not found\n");
1350 		return -EINVAL;
1351 	}
1352 
1353 	if (e_ident[EI_CLASS] == ELFCLASS64) {
1354 		rc = parse_crash_elf64_headers();
1355 		if (rc)
1356 			return rc;
1357 	} else if (e_ident[EI_CLASS] == ELFCLASS32) {
1358 		rc = parse_crash_elf32_headers();
1359 		if (rc)
1360 			return rc;
1361 	} else {
1362 		pr_warn("Warning: Core image elf header is not sane\n");
1363 		return -EINVAL;
1364 	}
1365 
1366 	/* Determine vmcore size. */
1367 	vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
1368 				      &vmcore_list);
1369 
1370 	return 0;
1371 }
1372 
1373 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
1374 /**
1375  * vmcoredd_write_header - Write vmcore device dump header at the
1376  * beginning of the dump's buffer.
1377  * @buf: Output buffer where the note is written
1378  * @data: Dump info
1379  * @size: Size of the dump
1380  *
1381  * Fills beginning of the dump's buffer with vmcore device dump header.
1382  */
1383 static void vmcoredd_write_header(void *buf, struct vmcoredd_data *data,
1384 				  u32 size)
1385 {
1386 	struct vmcoredd_header *vdd_hdr = (struct vmcoredd_header *)buf;
1387 
1388 	vdd_hdr->n_namesz = sizeof(vdd_hdr->name);
1389 	vdd_hdr->n_descsz = size + sizeof(vdd_hdr->dump_name);
1390 	vdd_hdr->n_type = NT_VMCOREDD;
1391 
1392 	strncpy((char *)vdd_hdr->name, VMCOREDD_NOTE_NAME,
1393 		sizeof(vdd_hdr->name));
1394 	memcpy(vdd_hdr->dump_name, data->dump_name, sizeof(vdd_hdr->dump_name));
1395 }
1396 
1397 /**
1398  * vmcoredd_update_program_headers - Update all Elf program headers
1399  * @elfptr: Pointer to elf header
1400  * @elfnotesz: Size of elf notes aligned to page size
1401  * @vmcoreddsz: Size of device dumps to be added to elf note header
1402  *
1403  * Determine type of Elf header (Elf64 or Elf32) and update the elf note size.
1404  * Also update the offsets of all the program headers after the elf note header.
1405  */
1406 static void vmcoredd_update_program_headers(char *elfptr, size_t elfnotesz,
1407 					    size_t vmcoreddsz)
1408 {
1409 	unsigned char *e_ident = (unsigned char *)elfptr;
1410 	u64 start, end, size;
1411 	loff_t vmcore_off;
1412 	u32 i;
1413 
1414 	vmcore_off = elfcorebuf_sz + elfnotesz;
1415 
1416 	if (e_ident[EI_CLASS] == ELFCLASS64) {
1417 		Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfptr;
1418 		Elf64_Phdr *phdr = (Elf64_Phdr *)(elfptr + sizeof(Elf64_Ehdr));
1419 
1420 		/* Update all program headers */
1421 		for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
1422 			if (phdr->p_type == PT_NOTE) {
1423 				/* Update note size */
1424 				phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
1425 				phdr->p_filesz = phdr->p_memsz;
1426 				continue;
1427 			}
1428 
1429 			start = rounddown(phdr->p_offset, PAGE_SIZE);
1430 			end = roundup(phdr->p_offset + phdr->p_memsz,
1431 				      PAGE_SIZE);
1432 			size = end - start;
1433 			phdr->p_offset = vmcore_off + (phdr->p_offset - start);
1434 			vmcore_off += size;
1435 		}
1436 	} else {
1437 		Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfptr;
1438 		Elf32_Phdr *phdr = (Elf32_Phdr *)(elfptr + sizeof(Elf32_Ehdr));
1439 
1440 		/* Update all program headers */
1441 		for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
1442 			if (phdr->p_type == PT_NOTE) {
1443 				/* Update note size */
1444 				phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
1445 				phdr->p_filesz = phdr->p_memsz;
1446 				continue;
1447 			}
1448 
1449 			start = rounddown(phdr->p_offset, PAGE_SIZE);
1450 			end = roundup(phdr->p_offset + phdr->p_memsz,
1451 				      PAGE_SIZE);
1452 			size = end - start;
1453 			phdr->p_offset = vmcore_off + (phdr->p_offset - start);
1454 			vmcore_off += size;
1455 		}
1456 	}
1457 }
1458 
1459 /**
1460  * vmcoredd_update_size - Update the total size of the device dumps and update
1461  * Elf header
1462  * @dump_size: Size of the current device dump to be added to total size
1463  *
1464  * Update the total size of all the device dumps and update the Elf program
1465  * headers. Calculate the new offsets for the vmcore list and update the
1466  * total vmcore size.
1467  */
1468 static void vmcoredd_update_size(size_t dump_size)
1469 {
1470 	vmcoredd_orig_sz += dump_size;
1471 	elfnotes_sz = roundup(elfnotes_orig_sz, PAGE_SIZE) + vmcoredd_orig_sz;
1472 	vmcoredd_update_program_headers(elfcorebuf, elfnotes_sz,
1473 					vmcoredd_orig_sz);
1474 
1475 	/* Update vmcore list offsets */
1476 	set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
1477 
1478 	vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
1479 				      &vmcore_list);
1480 	proc_vmcore->size = vmcore_size;
1481 }
1482 
1483 /**
1484  * vmcore_add_device_dump - Add a buffer containing device dump to vmcore
1485  * @data: dump info.
1486  *
1487  * Allocate a buffer and invoke the calling driver's dump collect routine.
1488  * Write Elf note at the beginning of the buffer to indicate vmcore device
1489  * dump and add the dump to global list.
1490  */
1491 int vmcore_add_device_dump(struct vmcoredd_data *data)
1492 {
1493 	struct vmcoredd_node *dump;
1494 	void *buf = NULL;
1495 	size_t data_size;
1496 	int ret;
1497 
1498 	if (vmcoredd_disabled) {
1499 		pr_err_once("Device dump is disabled\n");
1500 		return -EINVAL;
1501 	}
1502 
1503 	if (!data || !strlen(data->dump_name) ||
1504 	    !data->vmcoredd_callback || !data->size)
1505 		return -EINVAL;
1506 
1507 	dump = vzalloc(sizeof(*dump));
1508 	if (!dump) {
1509 		ret = -ENOMEM;
1510 		goto out_err;
1511 	}
1512 
1513 	/* Keep size of the buffer page aligned so that it can be mmaped */
1514 	data_size = roundup(sizeof(struct vmcoredd_header) + data->size,
1515 			    PAGE_SIZE);
1516 
1517 	/* Allocate buffer for driver's to write their dumps */
1518 	buf = vmcore_alloc_buf(data_size);
1519 	if (!buf) {
1520 		ret = -ENOMEM;
1521 		goto out_err;
1522 	}
1523 
1524 	vmcoredd_write_header(buf, data, data_size -
1525 			      sizeof(struct vmcoredd_header));
1526 
1527 	/* Invoke the driver's dump collection routing */
1528 	ret = data->vmcoredd_callback(data, buf +
1529 				      sizeof(struct vmcoredd_header));
1530 	if (ret)
1531 		goto out_err;
1532 
1533 	dump->buf = buf;
1534 	dump->size = data_size;
1535 
1536 	/* Add the dump to driver sysfs list */
1537 	mutex_lock(&vmcoredd_mutex);
1538 	list_add_tail(&dump->list, &vmcoredd_list);
1539 	mutex_unlock(&vmcoredd_mutex);
1540 
1541 	vmcoredd_update_size(data_size);
1542 	return 0;
1543 
1544 out_err:
1545 	vfree(buf);
1546 	vfree(dump);
1547 
1548 	return ret;
1549 }
1550 EXPORT_SYMBOL(vmcore_add_device_dump);
1551 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
1552 
1553 /* Free all dumps in vmcore device dump list */
1554 static void vmcore_free_device_dumps(void)
1555 {
1556 #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
1557 	mutex_lock(&vmcoredd_mutex);
1558 	while (!list_empty(&vmcoredd_list)) {
1559 		struct vmcoredd_node *dump;
1560 
1561 		dump = list_first_entry(&vmcoredd_list, struct vmcoredd_node,
1562 					list);
1563 		list_del(&dump->list);
1564 		vfree(dump->buf);
1565 		vfree(dump);
1566 	}
1567 	mutex_unlock(&vmcoredd_mutex);
1568 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
1569 }
1570 
1571 /* Init function for vmcore module. */
1572 static int __init vmcore_init(void)
1573 {
1574 	int rc = 0;
1575 
1576 	/* Allow architectures to allocate ELF header in 2nd kernel */
1577 	rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size);
1578 	if (rc)
1579 		return rc;
1580 	/*
1581 	 * If elfcorehdr= has been passed in cmdline or created in 2nd kernel,
1582 	 * then capture the dump.
1583 	 */
1584 	if (!(is_vmcore_usable()))
1585 		return rc;
1586 	rc = parse_crash_elf_headers();
1587 	if (rc) {
1588 		pr_warn("Kdump: vmcore not initialized\n");
1589 		return rc;
1590 	}
1591 	elfcorehdr_free(elfcorehdr_addr);
1592 	elfcorehdr_addr = ELFCORE_ADDR_ERR;
1593 
1594 	proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &vmcore_proc_ops);
1595 	if (proc_vmcore)
1596 		proc_vmcore->size = vmcore_size;
1597 	return 0;
1598 }
1599 fs_initcall(vmcore_init);
1600 
1601 /* Cleanup function for vmcore module. */
1602 void vmcore_cleanup(void)
1603 {
1604 	if (proc_vmcore) {
1605 		proc_remove(proc_vmcore);
1606 		proc_vmcore = NULL;
1607 	}
1608 
1609 	/* clear the vmcore list. */
1610 	while (!list_empty(&vmcore_list)) {
1611 		struct vmcore *m;
1612 
1613 		m = list_first_entry(&vmcore_list, struct vmcore, list);
1614 		list_del(&m->list);
1615 		kfree(m);
1616 	}
1617 	free_elfcorebuf();
1618 
1619 	/* clear vmcore device dump list */
1620 	vmcore_free_device_dumps();
1621 }
1622