xref: /openbmc/linux/kernel/kexec_core.c (revision cd4d09ec)
1 /*
2  * kexec.c - kexec system call core code.
3  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
4  *
5  * This source code is licensed under the GNU General Public License,
6  * Version 2.  See the file COPYING for more details.
7  */
8 
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/capability.h>
12 #include <linux/mm.h>
13 #include <linux/file.h>
14 #include <linux/slab.h>
15 #include <linux/fs.h>
16 #include <linux/kexec.h>
17 #include <linux/mutex.h>
18 #include <linux/list.h>
19 #include <linux/highmem.h>
20 #include <linux/syscalls.h>
21 #include <linux/reboot.h>
22 #include <linux/ioport.h>
23 #include <linux/hardirq.h>
24 #include <linux/elf.h>
25 #include <linux/elfcore.h>
26 #include <linux/utsname.h>
27 #include <linux/numa.h>
28 #include <linux/suspend.h>
29 #include <linux/device.h>
30 #include <linux/freezer.h>
31 #include <linux/pm.h>
32 #include <linux/cpu.h>
33 #include <linux/uaccess.h>
34 #include <linux/io.h>
35 #include <linux/console.h>
36 #include <linux/vmalloc.h>
37 #include <linux/swap.h>
38 #include <linux/syscore_ops.h>
39 #include <linux/compiler.h>
40 #include <linux/hugetlb.h>
41 
42 #include <asm/page.h>
43 #include <asm/sections.h>
44 
45 #include <crypto/hash.h>
46 #include <crypto/sha.h>
47 #include "kexec_internal.h"
48 
49 DEFINE_MUTEX(kexec_mutex);
50 
51 /* Per cpu memory for storing cpu states in case of system crash. */
52 note_buf_t __percpu *crash_notes;
53 
54 /* vmcoreinfo stuff */
55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
57 size_t vmcoreinfo_size;
58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
59 
60 /* Flag to indicate we are going to kexec a new kernel */
61 bool kexec_in_progress = false;
62 
63 
64 /* Location of the reserved area for the crash kernel */
65 struct resource crashk_res = {
66 	.name  = "Crash kernel",
67 	.start = 0,
68 	.end   = 0,
69 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
70 };
71 struct resource crashk_low_res = {
72 	.name  = "Crash kernel",
73 	.start = 0,
74 	.end   = 0,
75 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
76 };
77 
78 int kexec_should_crash(struct task_struct *p)
79 {
80 	/*
81 	 * If crash_kexec_post_notifiers is enabled, don't run
82 	 * crash_kexec() here yet, which must be run after panic
83 	 * notifiers in panic().
84 	 */
85 	if (crash_kexec_post_notifiers)
86 		return 0;
87 	/*
88 	 * There are 4 panic() calls in do_exit() path, each of which
89 	 * corresponds to each of these 4 conditions.
90 	 */
91 	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
92 		return 1;
93 	return 0;
94 }
95 
96 /*
97  * When kexec transitions to the new kernel there is a one-to-one
98  * mapping between physical and virtual addresses.  On processors
99  * where you can disable the MMU this is trivial, and easy.  For
100  * others it is still a simple predictable page table to setup.
101  *
102  * In that environment kexec copies the new kernel to its final
103  * resting place.  This means I can only support memory whose
104  * physical address can fit in an unsigned long.  In particular
105  * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
106  * If the assembly stub has more restrictive requirements
107  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
108  * defined more restrictively in <asm/kexec.h>.
109  *
110  * The code for the transition from the current kernel to the
111  * the new kernel is placed in the control_code_buffer, whose size
112  * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
113  * page of memory is necessary, but some architectures require more.
114  * Because this memory must be identity mapped in the transition from
115  * virtual to physical addresses it must live in the range
116  * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
117  * modifiable.
118  *
119  * The assembly stub in the control code buffer is passed a linked list
120  * of descriptor pages detailing the source pages of the new kernel,
121  * and the destination addresses of those source pages.  As this data
122  * structure is not used in the context of the current OS, it must
123  * be self-contained.
124  *
125  * The code has been made to work with highmem pages and will use a
126  * destination page in its final resting place (if it happens
127  * to allocate it).  The end product of this is that most of the
128  * physical address space, and most of RAM can be used.
129  *
130  * Future directions include:
131  *  - allocating a page table with the control code buffer identity
132  *    mapped, to simplify machine_kexec and make kexec_on_panic more
133  *    reliable.
134  */
135 
136 /*
137  * KIMAGE_NO_DEST is an impossible destination address..., for
138  * allocating pages whose destination address we do not care about.
139  */
140 #define KIMAGE_NO_DEST (-1UL)
141 
142 static struct page *kimage_alloc_page(struct kimage *image,
143 				       gfp_t gfp_mask,
144 				       unsigned long dest);
145 
146 int sanity_check_segment_list(struct kimage *image)
147 {
148 	int result, i;
149 	unsigned long nr_segments = image->nr_segments;
150 
151 	/*
152 	 * Verify we have good destination addresses.  The caller is
153 	 * responsible for making certain we don't attempt to load
154 	 * the new image into invalid or reserved areas of RAM.  This
155 	 * just verifies it is an address we can use.
156 	 *
157 	 * Since the kernel does everything in page size chunks ensure
158 	 * the destination addresses are page aligned.  Too many
159 	 * special cases crop of when we don't do this.  The most
160 	 * insidious is getting overlapping destination addresses
161 	 * simply because addresses are changed to page size
162 	 * granularity.
163 	 */
164 	result = -EADDRNOTAVAIL;
165 	for (i = 0; i < nr_segments; i++) {
166 		unsigned long mstart, mend;
167 
168 		mstart = image->segment[i].mem;
169 		mend   = mstart + image->segment[i].memsz;
170 		if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
171 			return result;
172 		if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
173 			return result;
174 	}
175 
176 	/* Verify our destination addresses do not overlap.
177 	 * If we alloed overlapping destination addresses
178 	 * through very weird things can happen with no
179 	 * easy explanation as one segment stops on another.
180 	 */
181 	result = -EINVAL;
182 	for (i = 0; i < nr_segments; i++) {
183 		unsigned long mstart, mend;
184 		unsigned long j;
185 
186 		mstart = image->segment[i].mem;
187 		mend   = mstart + image->segment[i].memsz;
188 		for (j = 0; j < i; j++) {
189 			unsigned long pstart, pend;
190 
191 			pstart = image->segment[j].mem;
192 			pend   = pstart + image->segment[j].memsz;
193 			/* Do the segments overlap ? */
194 			if ((mend > pstart) && (mstart < pend))
195 				return result;
196 		}
197 	}
198 
199 	/* Ensure our buffer sizes are strictly less than
200 	 * our memory sizes.  This should always be the case,
201 	 * and it is easier to check up front than to be surprised
202 	 * later on.
203 	 */
204 	result = -EINVAL;
205 	for (i = 0; i < nr_segments; i++) {
206 		if (image->segment[i].bufsz > image->segment[i].memsz)
207 			return result;
208 	}
209 
210 	/*
211 	 * Verify we have good destination addresses.  Normally
212 	 * the caller is responsible for making certain we don't
213 	 * attempt to load the new image into invalid or reserved
214 	 * areas of RAM.  But crash kernels are preloaded into a
215 	 * reserved area of ram.  We must ensure the addresses
216 	 * are in the reserved area otherwise preloading the
217 	 * kernel could corrupt things.
218 	 */
219 
220 	if (image->type == KEXEC_TYPE_CRASH) {
221 		result = -EADDRNOTAVAIL;
222 		for (i = 0; i < nr_segments; i++) {
223 			unsigned long mstart, mend;
224 
225 			mstart = image->segment[i].mem;
226 			mend = mstart + image->segment[i].memsz - 1;
227 			/* Ensure we are within the crash kernel limits */
228 			if ((mstart < crashk_res.start) ||
229 			    (mend > crashk_res.end))
230 				return result;
231 		}
232 	}
233 
234 	return 0;
235 }
236 
237 struct kimage *do_kimage_alloc_init(void)
238 {
239 	struct kimage *image;
240 
241 	/* Allocate a controlling structure */
242 	image = kzalloc(sizeof(*image), GFP_KERNEL);
243 	if (!image)
244 		return NULL;
245 
246 	image->head = 0;
247 	image->entry = &image->head;
248 	image->last_entry = &image->head;
249 	image->control_page = ~0; /* By default this does not apply */
250 	image->type = KEXEC_TYPE_DEFAULT;
251 
252 	/* Initialize the list of control pages */
253 	INIT_LIST_HEAD(&image->control_pages);
254 
255 	/* Initialize the list of destination pages */
256 	INIT_LIST_HEAD(&image->dest_pages);
257 
258 	/* Initialize the list of unusable pages */
259 	INIT_LIST_HEAD(&image->unusable_pages);
260 
261 	return image;
262 }
263 
264 int kimage_is_destination_range(struct kimage *image,
265 					unsigned long start,
266 					unsigned long end)
267 {
268 	unsigned long i;
269 
270 	for (i = 0; i < image->nr_segments; i++) {
271 		unsigned long mstart, mend;
272 
273 		mstart = image->segment[i].mem;
274 		mend = mstart + image->segment[i].memsz;
275 		if ((end > mstart) && (start < mend))
276 			return 1;
277 	}
278 
279 	return 0;
280 }
281 
282 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
283 {
284 	struct page *pages;
285 
286 	pages = alloc_pages(gfp_mask, order);
287 	if (pages) {
288 		unsigned int count, i;
289 
290 		pages->mapping = NULL;
291 		set_page_private(pages, order);
292 		count = 1 << order;
293 		for (i = 0; i < count; i++)
294 			SetPageReserved(pages + i);
295 	}
296 
297 	return pages;
298 }
299 
300 static void kimage_free_pages(struct page *page)
301 {
302 	unsigned int order, count, i;
303 
304 	order = page_private(page);
305 	count = 1 << order;
306 	for (i = 0; i < count; i++)
307 		ClearPageReserved(page + i);
308 	__free_pages(page, order);
309 }
310 
311 void kimage_free_page_list(struct list_head *list)
312 {
313 	struct page *page, *next;
314 
315 	list_for_each_entry_safe(page, next, list, lru) {
316 		list_del(&page->lru);
317 		kimage_free_pages(page);
318 	}
319 }
320 
321 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
322 							unsigned int order)
323 {
324 	/* Control pages are special, they are the intermediaries
325 	 * that are needed while we copy the rest of the pages
326 	 * to their final resting place.  As such they must
327 	 * not conflict with either the destination addresses
328 	 * or memory the kernel is already using.
329 	 *
330 	 * The only case where we really need more than one of
331 	 * these are for architectures where we cannot disable
332 	 * the MMU and must instead generate an identity mapped
333 	 * page table for all of the memory.
334 	 *
335 	 * At worst this runs in O(N) of the image size.
336 	 */
337 	struct list_head extra_pages;
338 	struct page *pages;
339 	unsigned int count;
340 
341 	count = 1 << order;
342 	INIT_LIST_HEAD(&extra_pages);
343 
344 	/* Loop while I can allocate a page and the page allocated
345 	 * is a destination page.
346 	 */
347 	do {
348 		unsigned long pfn, epfn, addr, eaddr;
349 
350 		pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
351 		if (!pages)
352 			break;
353 		pfn   = page_to_pfn(pages);
354 		epfn  = pfn + count;
355 		addr  = pfn << PAGE_SHIFT;
356 		eaddr = epfn << PAGE_SHIFT;
357 		if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
358 			      kimage_is_destination_range(image, addr, eaddr)) {
359 			list_add(&pages->lru, &extra_pages);
360 			pages = NULL;
361 		}
362 	} while (!pages);
363 
364 	if (pages) {
365 		/* Remember the allocated page... */
366 		list_add(&pages->lru, &image->control_pages);
367 
368 		/* Because the page is already in it's destination
369 		 * location we will never allocate another page at
370 		 * that address.  Therefore kimage_alloc_pages
371 		 * will not return it (again) and we don't need
372 		 * to give it an entry in image->segment[].
373 		 */
374 	}
375 	/* Deal with the destination pages I have inadvertently allocated.
376 	 *
377 	 * Ideally I would convert multi-page allocations into single
378 	 * page allocations, and add everything to image->dest_pages.
379 	 *
380 	 * For now it is simpler to just free the pages.
381 	 */
382 	kimage_free_page_list(&extra_pages);
383 
384 	return pages;
385 }
386 
387 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
388 						      unsigned int order)
389 {
390 	/* Control pages are special, they are the intermediaries
391 	 * that are needed while we copy the rest of the pages
392 	 * to their final resting place.  As such they must
393 	 * not conflict with either the destination addresses
394 	 * or memory the kernel is already using.
395 	 *
396 	 * Control pages are also the only pags we must allocate
397 	 * when loading a crash kernel.  All of the other pages
398 	 * are specified by the segments and we just memcpy
399 	 * into them directly.
400 	 *
401 	 * The only case where we really need more than one of
402 	 * these are for architectures where we cannot disable
403 	 * the MMU and must instead generate an identity mapped
404 	 * page table for all of the memory.
405 	 *
406 	 * Given the low demand this implements a very simple
407 	 * allocator that finds the first hole of the appropriate
408 	 * size in the reserved memory region, and allocates all
409 	 * of the memory up to and including the hole.
410 	 */
411 	unsigned long hole_start, hole_end, size;
412 	struct page *pages;
413 
414 	pages = NULL;
415 	size = (1 << order) << PAGE_SHIFT;
416 	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
417 	hole_end   = hole_start + size - 1;
418 	while (hole_end <= crashk_res.end) {
419 		unsigned long i;
420 
421 		if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
422 			break;
423 		/* See if I overlap any of the segments */
424 		for (i = 0; i < image->nr_segments; i++) {
425 			unsigned long mstart, mend;
426 
427 			mstart = image->segment[i].mem;
428 			mend   = mstart + image->segment[i].memsz - 1;
429 			if ((hole_end >= mstart) && (hole_start <= mend)) {
430 				/* Advance the hole to the end of the segment */
431 				hole_start = (mend + (size - 1)) & ~(size - 1);
432 				hole_end   = hole_start + size - 1;
433 				break;
434 			}
435 		}
436 		/* If I don't overlap any segments I have found my hole! */
437 		if (i == image->nr_segments) {
438 			pages = pfn_to_page(hole_start >> PAGE_SHIFT);
439 			image->control_page = hole_end;
440 			break;
441 		}
442 	}
443 
444 	return pages;
445 }
446 
447 
448 struct page *kimage_alloc_control_pages(struct kimage *image,
449 					 unsigned int order)
450 {
451 	struct page *pages = NULL;
452 
453 	switch (image->type) {
454 	case KEXEC_TYPE_DEFAULT:
455 		pages = kimage_alloc_normal_control_pages(image, order);
456 		break;
457 	case KEXEC_TYPE_CRASH:
458 		pages = kimage_alloc_crash_control_pages(image, order);
459 		break;
460 	}
461 
462 	return pages;
463 }
464 
465 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
466 {
467 	if (*image->entry != 0)
468 		image->entry++;
469 
470 	if (image->entry == image->last_entry) {
471 		kimage_entry_t *ind_page;
472 		struct page *page;
473 
474 		page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
475 		if (!page)
476 			return -ENOMEM;
477 
478 		ind_page = page_address(page);
479 		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
480 		image->entry = ind_page;
481 		image->last_entry = ind_page +
482 				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
483 	}
484 	*image->entry = entry;
485 	image->entry++;
486 	*image->entry = 0;
487 
488 	return 0;
489 }
490 
491 static int kimage_set_destination(struct kimage *image,
492 				   unsigned long destination)
493 {
494 	int result;
495 
496 	destination &= PAGE_MASK;
497 	result = kimage_add_entry(image, destination | IND_DESTINATION);
498 
499 	return result;
500 }
501 
502 
503 static int kimage_add_page(struct kimage *image, unsigned long page)
504 {
505 	int result;
506 
507 	page &= PAGE_MASK;
508 	result = kimage_add_entry(image, page | IND_SOURCE);
509 
510 	return result;
511 }
512 
513 
514 static void kimage_free_extra_pages(struct kimage *image)
515 {
516 	/* Walk through and free any extra destination pages I may have */
517 	kimage_free_page_list(&image->dest_pages);
518 
519 	/* Walk through and free any unusable pages I have cached */
520 	kimage_free_page_list(&image->unusable_pages);
521 
522 }
523 void kimage_terminate(struct kimage *image)
524 {
525 	if (*image->entry != 0)
526 		image->entry++;
527 
528 	*image->entry = IND_DONE;
529 }
530 
531 #define for_each_kimage_entry(image, ptr, entry) \
532 	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
533 		ptr = (entry & IND_INDIRECTION) ? \
534 			phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
535 
536 static void kimage_free_entry(kimage_entry_t entry)
537 {
538 	struct page *page;
539 
540 	page = pfn_to_page(entry >> PAGE_SHIFT);
541 	kimage_free_pages(page);
542 }
543 
544 void kimage_free(struct kimage *image)
545 {
546 	kimage_entry_t *ptr, entry;
547 	kimage_entry_t ind = 0;
548 
549 	if (!image)
550 		return;
551 
552 	kimage_free_extra_pages(image);
553 	for_each_kimage_entry(image, ptr, entry) {
554 		if (entry & IND_INDIRECTION) {
555 			/* Free the previous indirection page */
556 			if (ind & IND_INDIRECTION)
557 				kimage_free_entry(ind);
558 			/* Save this indirection page until we are
559 			 * done with it.
560 			 */
561 			ind = entry;
562 		} else if (entry & IND_SOURCE)
563 			kimage_free_entry(entry);
564 	}
565 	/* Free the final indirection page */
566 	if (ind & IND_INDIRECTION)
567 		kimage_free_entry(ind);
568 
569 	/* Handle any machine specific cleanup */
570 	machine_kexec_cleanup(image);
571 
572 	/* Free the kexec control pages... */
573 	kimage_free_page_list(&image->control_pages);
574 
575 	/*
576 	 * Free up any temporary buffers allocated. This might hit if
577 	 * error occurred much later after buffer allocation.
578 	 */
579 	if (image->file_mode)
580 		kimage_file_post_load_cleanup(image);
581 
582 	kfree(image);
583 }
584 
585 static kimage_entry_t *kimage_dst_used(struct kimage *image,
586 					unsigned long page)
587 {
588 	kimage_entry_t *ptr, entry;
589 	unsigned long destination = 0;
590 
591 	for_each_kimage_entry(image, ptr, entry) {
592 		if (entry & IND_DESTINATION)
593 			destination = entry & PAGE_MASK;
594 		else if (entry & IND_SOURCE) {
595 			if (page == destination)
596 				return ptr;
597 			destination += PAGE_SIZE;
598 		}
599 	}
600 
601 	return NULL;
602 }
603 
604 static struct page *kimage_alloc_page(struct kimage *image,
605 					gfp_t gfp_mask,
606 					unsigned long destination)
607 {
608 	/*
609 	 * Here we implement safeguards to ensure that a source page
610 	 * is not copied to its destination page before the data on
611 	 * the destination page is no longer useful.
612 	 *
613 	 * To do this we maintain the invariant that a source page is
614 	 * either its own destination page, or it is not a
615 	 * destination page at all.
616 	 *
617 	 * That is slightly stronger than required, but the proof
618 	 * that no problems will not occur is trivial, and the
619 	 * implementation is simply to verify.
620 	 *
621 	 * When allocating all pages normally this algorithm will run
622 	 * in O(N) time, but in the worst case it will run in O(N^2)
623 	 * time.   If the runtime is a problem the data structures can
624 	 * be fixed.
625 	 */
626 	struct page *page;
627 	unsigned long addr;
628 
629 	/*
630 	 * Walk through the list of destination pages, and see if I
631 	 * have a match.
632 	 */
633 	list_for_each_entry(page, &image->dest_pages, lru) {
634 		addr = page_to_pfn(page) << PAGE_SHIFT;
635 		if (addr == destination) {
636 			list_del(&page->lru);
637 			return page;
638 		}
639 	}
640 	page = NULL;
641 	while (1) {
642 		kimage_entry_t *old;
643 
644 		/* Allocate a page, if we run out of memory give up */
645 		page = kimage_alloc_pages(gfp_mask, 0);
646 		if (!page)
647 			return NULL;
648 		/* If the page cannot be used file it away */
649 		if (page_to_pfn(page) >
650 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
651 			list_add(&page->lru, &image->unusable_pages);
652 			continue;
653 		}
654 		addr = page_to_pfn(page) << PAGE_SHIFT;
655 
656 		/* If it is the destination page we want use it */
657 		if (addr == destination)
658 			break;
659 
660 		/* If the page is not a destination page use it */
661 		if (!kimage_is_destination_range(image, addr,
662 						  addr + PAGE_SIZE))
663 			break;
664 
665 		/*
666 		 * I know that the page is someones destination page.
667 		 * See if there is already a source page for this
668 		 * destination page.  And if so swap the source pages.
669 		 */
670 		old = kimage_dst_used(image, addr);
671 		if (old) {
672 			/* If so move it */
673 			unsigned long old_addr;
674 			struct page *old_page;
675 
676 			old_addr = *old & PAGE_MASK;
677 			old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
678 			copy_highpage(page, old_page);
679 			*old = addr | (*old & ~PAGE_MASK);
680 
681 			/* The old page I have found cannot be a
682 			 * destination page, so return it if it's
683 			 * gfp_flags honor the ones passed in.
684 			 */
685 			if (!(gfp_mask & __GFP_HIGHMEM) &&
686 			    PageHighMem(old_page)) {
687 				kimage_free_pages(old_page);
688 				continue;
689 			}
690 			addr = old_addr;
691 			page = old_page;
692 			break;
693 		}
694 		/* Place the page on the destination list, to be used later */
695 		list_add(&page->lru, &image->dest_pages);
696 	}
697 
698 	return page;
699 }
700 
701 static int kimage_load_normal_segment(struct kimage *image,
702 					 struct kexec_segment *segment)
703 {
704 	unsigned long maddr;
705 	size_t ubytes, mbytes;
706 	int result;
707 	unsigned char __user *buf = NULL;
708 	unsigned char *kbuf = NULL;
709 
710 	result = 0;
711 	if (image->file_mode)
712 		kbuf = segment->kbuf;
713 	else
714 		buf = segment->buf;
715 	ubytes = segment->bufsz;
716 	mbytes = segment->memsz;
717 	maddr = segment->mem;
718 
719 	result = kimage_set_destination(image, maddr);
720 	if (result < 0)
721 		goto out;
722 
723 	while (mbytes) {
724 		struct page *page;
725 		char *ptr;
726 		size_t uchunk, mchunk;
727 
728 		page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
729 		if (!page) {
730 			result  = -ENOMEM;
731 			goto out;
732 		}
733 		result = kimage_add_page(image, page_to_pfn(page)
734 								<< PAGE_SHIFT);
735 		if (result < 0)
736 			goto out;
737 
738 		ptr = kmap(page);
739 		/* Start with a clear page */
740 		clear_page(ptr);
741 		ptr += maddr & ~PAGE_MASK;
742 		mchunk = min_t(size_t, mbytes,
743 				PAGE_SIZE - (maddr & ~PAGE_MASK));
744 		uchunk = min(ubytes, mchunk);
745 
746 		/* For file based kexec, source pages are in kernel memory */
747 		if (image->file_mode)
748 			memcpy(ptr, kbuf, uchunk);
749 		else
750 			result = copy_from_user(ptr, buf, uchunk);
751 		kunmap(page);
752 		if (result) {
753 			result = -EFAULT;
754 			goto out;
755 		}
756 		ubytes -= uchunk;
757 		maddr  += mchunk;
758 		if (image->file_mode)
759 			kbuf += mchunk;
760 		else
761 			buf += mchunk;
762 		mbytes -= mchunk;
763 	}
764 out:
765 	return result;
766 }
767 
768 static int kimage_load_crash_segment(struct kimage *image,
769 					struct kexec_segment *segment)
770 {
771 	/* For crash dumps kernels we simply copy the data from
772 	 * user space to it's destination.
773 	 * We do things a page at a time for the sake of kmap.
774 	 */
775 	unsigned long maddr;
776 	size_t ubytes, mbytes;
777 	int result;
778 	unsigned char __user *buf = NULL;
779 	unsigned char *kbuf = NULL;
780 
781 	result = 0;
782 	if (image->file_mode)
783 		kbuf = segment->kbuf;
784 	else
785 		buf = segment->buf;
786 	ubytes = segment->bufsz;
787 	mbytes = segment->memsz;
788 	maddr = segment->mem;
789 	while (mbytes) {
790 		struct page *page;
791 		char *ptr;
792 		size_t uchunk, mchunk;
793 
794 		page = pfn_to_page(maddr >> PAGE_SHIFT);
795 		if (!page) {
796 			result  = -ENOMEM;
797 			goto out;
798 		}
799 		ptr = kmap(page);
800 		ptr += maddr & ~PAGE_MASK;
801 		mchunk = min_t(size_t, mbytes,
802 				PAGE_SIZE - (maddr & ~PAGE_MASK));
803 		uchunk = min(ubytes, mchunk);
804 		if (mchunk > uchunk) {
805 			/* Zero the trailing part of the page */
806 			memset(ptr + uchunk, 0, mchunk - uchunk);
807 		}
808 
809 		/* For file based kexec, source pages are in kernel memory */
810 		if (image->file_mode)
811 			memcpy(ptr, kbuf, uchunk);
812 		else
813 			result = copy_from_user(ptr, buf, uchunk);
814 		kexec_flush_icache_page(page);
815 		kunmap(page);
816 		if (result) {
817 			result = -EFAULT;
818 			goto out;
819 		}
820 		ubytes -= uchunk;
821 		maddr  += mchunk;
822 		if (image->file_mode)
823 			kbuf += mchunk;
824 		else
825 			buf += mchunk;
826 		mbytes -= mchunk;
827 	}
828 out:
829 	return result;
830 }
831 
832 int kimage_load_segment(struct kimage *image,
833 				struct kexec_segment *segment)
834 {
835 	int result = -ENOMEM;
836 
837 	switch (image->type) {
838 	case KEXEC_TYPE_DEFAULT:
839 		result = kimage_load_normal_segment(image, segment);
840 		break;
841 	case KEXEC_TYPE_CRASH:
842 		result = kimage_load_crash_segment(image, segment);
843 		break;
844 	}
845 
846 	return result;
847 }
848 
849 struct kimage *kexec_image;
850 struct kimage *kexec_crash_image;
851 int kexec_load_disabled;
852 
853 /*
854  * No panic_cpu check version of crash_kexec().  This function is called
855  * only when panic_cpu holds the current CPU number; this is the only CPU
856  * which processes crash_kexec routines.
857  */
858 void __crash_kexec(struct pt_regs *regs)
859 {
860 	/* Take the kexec_mutex here to prevent sys_kexec_load
861 	 * running on one cpu from replacing the crash kernel
862 	 * we are using after a panic on a different cpu.
863 	 *
864 	 * If the crash kernel was not located in a fixed area
865 	 * of memory the xchg(&kexec_crash_image) would be
866 	 * sufficient.  But since I reuse the memory...
867 	 */
868 	if (mutex_trylock(&kexec_mutex)) {
869 		if (kexec_crash_image) {
870 			struct pt_regs fixed_regs;
871 
872 			crash_setup_regs(&fixed_regs, regs);
873 			crash_save_vmcoreinfo();
874 			machine_crash_shutdown(&fixed_regs);
875 			machine_kexec(kexec_crash_image);
876 		}
877 		mutex_unlock(&kexec_mutex);
878 	}
879 }
880 
881 void crash_kexec(struct pt_regs *regs)
882 {
883 	int old_cpu, this_cpu;
884 
885 	/*
886 	 * Only one CPU is allowed to execute the crash_kexec() code as with
887 	 * panic().  Otherwise parallel calls of panic() and crash_kexec()
888 	 * may stop each other.  To exclude them, we use panic_cpu here too.
889 	 */
890 	this_cpu = raw_smp_processor_id();
891 	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
892 	if (old_cpu == PANIC_CPU_INVALID) {
893 		/* This is the 1st CPU which comes here, so go ahead. */
894 		__crash_kexec(regs);
895 
896 		/*
897 		 * Reset panic_cpu to allow another panic()/crash_kexec()
898 		 * call.
899 		 */
900 		atomic_set(&panic_cpu, PANIC_CPU_INVALID);
901 	}
902 }
903 
904 size_t crash_get_memory_size(void)
905 {
906 	size_t size = 0;
907 
908 	mutex_lock(&kexec_mutex);
909 	if (crashk_res.end != crashk_res.start)
910 		size = resource_size(&crashk_res);
911 	mutex_unlock(&kexec_mutex);
912 	return size;
913 }
914 
915 void __weak crash_free_reserved_phys_range(unsigned long begin,
916 					   unsigned long end)
917 {
918 	unsigned long addr;
919 
920 	for (addr = begin; addr < end; addr += PAGE_SIZE)
921 		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
922 }
923 
924 int crash_shrink_memory(unsigned long new_size)
925 {
926 	int ret = 0;
927 	unsigned long start, end;
928 	unsigned long old_size;
929 	struct resource *ram_res;
930 
931 	mutex_lock(&kexec_mutex);
932 
933 	if (kexec_crash_image) {
934 		ret = -ENOENT;
935 		goto unlock;
936 	}
937 	start = crashk_res.start;
938 	end = crashk_res.end;
939 	old_size = (end == 0) ? 0 : end - start + 1;
940 	if (new_size >= old_size) {
941 		ret = (new_size == old_size) ? 0 : -EINVAL;
942 		goto unlock;
943 	}
944 
945 	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
946 	if (!ram_res) {
947 		ret = -ENOMEM;
948 		goto unlock;
949 	}
950 
951 	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
952 	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
953 
954 	crash_map_reserved_pages();
955 	crash_free_reserved_phys_range(end, crashk_res.end);
956 
957 	if ((start == end) && (crashk_res.parent != NULL))
958 		release_resource(&crashk_res);
959 
960 	ram_res->start = end;
961 	ram_res->end = crashk_res.end;
962 	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
963 	ram_res->name = "System RAM";
964 
965 	crashk_res.end = end - 1;
966 
967 	insert_resource(&iomem_resource, ram_res);
968 	crash_unmap_reserved_pages();
969 
970 unlock:
971 	mutex_unlock(&kexec_mutex);
972 	return ret;
973 }
974 
975 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
976 			    size_t data_len)
977 {
978 	struct elf_note note;
979 
980 	note.n_namesz = strlen(name) + 1;
981 	note.n_descsz = data_len;
982 	note.n_type   = type;
983 	memcpy(buf, &note, sizeof(note));
984 	buf += (sizeof(note) + 3)/4;
985 	memcpy(buf, name, note.n_namesz);
986 	buf += (note.n_namesz + 3)/4;
987 	memcpy(buf, data, note.n_descsz);
988 	buf += (note.n_descsz + 3)/4;
989 
990 	return buf;
991 }
992 
993 static void final_note(u32 *buf)
994 {
995 	struct elf_note note;
996 
997 	note.n_namesz = 0;
998 	note.n_descsz = 0;
999 	note.n_type   = 0;
1000 	memcpy(buf, &note, sizeof(note));
1001 }
1002 
1003 void crash_save_cpu(struct pt_regs *regs, int cpu)
1004 {
1005 	struct elf_prstatus prstatus;
1006 	u32 *buf;
1007 
1008 	if ((cpu < 0) || (cpu >= nr_cpu_ids))
1009 		return;
1010 
1011 	/* Using ELF notes here is opportunistic.
1012 	 * I need a well defined structure format
1013 	 * for the data I pass, and I need tags
1014 	 * on the data to indicate what information I have
1015 	 * squirrelled away.  ELF notes happen to provide
1016 	 * all of that, so there is no need to invent something new.
1017 	 */
1018 	buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
1019 	if (!buf)
1020 		return;
1021 	memset(&prstatus, 0, sizeof(prstatus));
1022 	prstatus.pr_pid = current->pid;
1023 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1024 	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1025 			      &prstatus, sizeof(prstatus));
1026 	final_note(buf);
1027 }
1028 
1029 static int __init crash_notes_memory_init(void)
1030 {
1031 	/* Allocate memory for saving cpu registers. */
1032 	size_t size, align;
1033 
1034 	/*
1035 	 * crash_notes could be allocated across 2 vmalloc pages when percpu
1036 	 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
1037 	 * pages are also on 2 continuous physical pages. In this case the
1038 	 * 2nd part of crash_notes in 2nd page could be lost since only the
1039 	 * starting address and size of crash_notes are exported through sysfs.
1040 	 * Here round up the size of crash_notes to the nearest power of two
1041 	 * and pass it to __alloc_percpu as align value. This can make sure
1042 	 * crash_notes is allocated inside one physical page.
1043 	 */
1044 	size = sizeof(note_buf_t);
1045 	align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
1046 
1047 	/*
1048 	 * Break compile if size is bigger than PAGE_SIZE since crash_notes
1049 	 * definitely will be in 2 pages with that.
1050 	 */
1051 	BUILD_BUG_ON(size > PAGE_SIZE);
1052 
1053 	crash_notes = __alloc_percpu(size, align);
1054 	if (!crash_notes) {
1055 		pr_warn("Memory allocation for saving cpu register states failed\n");
1056 		return -ENOMEM;
1057 	}
1058 	return 0;
1059 }
1060 subsys_initcall(crash_notes_memory_init);
1061 
1062 
1063 /*
1064  * parsing the "crashkernel" commandline
1065  *
1066  * this code is intended to be called from architecture specific code
1067  */
1068 
1069 
1070 /*
1071  * This function parses command lines in the format
1072  *
1073  *   crashkernel=ramsize-range:size[,...][@offset]
1074  *
1075  * The function returns 0 on success and -EINVAL on failure.
1076  */
1077 static int __init parse_crashkernel_mem(char *cmdline,
1078 					unsigned long long system_ram,
1079 					unsigned long long *crash_size,
1080 					unsigned long long *crash_base)
1081 {
1082 	char *cur = cmdline, *tmp;
1083 
1084 	/* for each entry of the comma-separated list */
1085 	do {
1086 		unsigned long long start, end = ULLONG_MAX, size;
1087 
1088 		/* get the start of the range */
1089 		start = memparse(cur, &tmp);
1090 		if (cur == tmp) {
1091 			pr_warn("crashkernel: Memory value expected\n");
1092 			return -EINVAL;
1093 		}
1094 		cur = tmp;
1095 		if (*cur != '-') {
1096 			pr_warn("crashkernel: '-' expected\n");
1097 			return -EINVAL;
1098 		}
1099 		cur++;
1100 
1101 		/* if no ':' is here, than we read the end */
1102 		if (*cur != ':') {
1103 			end = memparse(cur, &tmp);
1104 			if (cur == tmp) {
1105 				pr_warn("crashkernel: Memory value expected\n");
1106 				return -EINVAL;
1107 			}
1108 			cur = tmp;
1109 			if (end <= start) {
1110 				pr_warn("crashkernel: end <= start\n");
1111 				return -EINVAL;
1112 			}
1113 		}
1114 
1115 		if (*cur != ':') {
1116 			pr_warn("crashkernel: ':' expected\n");
1117 			return -EINVAL;
1118 		}
1119 		cur++;
1120 
1121 		size = memparse(cur, &tmp);
1122 		if (cur == tmp) {
1123 			pr_warn("Memory value expected\n");
1124 			return -EINVAL;
1125 		}
1126 		cur = tmp;
1127 		if (size >= system_ram) {
1128 			pr_warn("crashkernel: invalid size\n");
1129 			return -EINVAL;
1130 		}
1131 
1132 		/* match ? */
1133 		if (system_ram >= start && system_ram < end) {
1134 			*crash_size = size;
1135 			break;
1136 		}
1137 	} while (*cur++ == ',');
1138 
1139 	if (*crash_size > 0) {
1140 		while (*cur && *cur != ' ' && *cur != '@')
1141 			cur++;
1142 		if (*cur == '@') {
1143 			cur++;
1144 			*crash_base = memparse(cur, &tmp);
1145 			if (cur == tmp) {
1146 				pr_warn("Memory value expected after '@'\n");
1147 				return -EINVAL;
1148 			}
1149 		}
1150 	}
1151 
1152 	return 0;
1153 }
1154 
1155 /*
1156  * That function parses "simple" (old) crashkernel command lines like
1157  *
1158  *	crashkernel=size[@offset]
1159  *
1160  * It returns 0 on success and -EINVAL on failure.
1161  */
1162 static int __init parse_crashkernel_simple(char *cmdline,
1163 					   unsigned long long *crash_size,
1164 					   unsigned long long *crash_base)
1165 {
1166 	char *cur = cmdline;
1167 
1168 	*crash_size = memparse(cmdline, &cur);
1169 	if (cmdline == cur) {
1170 		pr_warn("crashkernel: memory value expected\n");
1171 		return -EINVAL;
1172 	}
1173 
1174 	if (*cur == '@')
1175 		*crash_base = memparse(cur+1, &cur);
1176 	else if (*cur != ' ' && *cur != '\0') {
1177 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1178 		return -EINVAL;
1179 	}
1180 
1181 	return 0;
1182 }
1183 
1184 #define SUFFIX_HIGH 0
1185 #define SUFFIX_LOW  1
1186 #define SUFFIX_NULL 2
1187 static __initdata char *suffix_tbl[] = {
1188 	[SUFFIX_HIGH] = ",high",
1189 	[SUFFIX_LOW]  = ",low",
1190 	[SUFFIX_NULL] = NULL,
1191 };
1192 
1193 /*
1194  * That function parses "suffix"  crashkernel command lines like
1195  *
1196  *	crashkernel=size,[high|low]
1197  *
1198  * It returns 0 on success and -EINVAL on failure.
1199  */
1200 static int __init parse_crashkernel_suffix(char *cmdline,
1201 					   unsigned long long	*crash_size,
1202 					   const char *suffix)
1203 {
1204 	char *cur = cmdline;
1205 
1206 	*crash_size = memparse(cmdline, &cur);
1207 	if (cmdline == cur) {
1208 		pr_warn("crashkernel: memory value expected\n");
1209 		return -EINVAL;
1210 	}
1211 
1212 	/* check with suffix */
1213 	if (strncmp(cur, suffix, strlen(suffix))) {
1214 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1215 		return -EINVAL;
1216 	}
1217 	cur += strlen(suffix);
1218 	if (*cur != ' ' && *cur != '\0') {
1219 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1220 		return -EINVAL;
1221 	}
1222 
1223 	return 0;
1224 }
1225 
1226 static __init char *get_last_crashkernel(char *cmdline,
1227 			     const char *name,
1228 			     const char *suffix)
1229 {
1230 	char *p = cmdline, *ck_cmdline = NULL;
1231 
1232 	/* find crashkernel and use the last one if there are more */
1233 	p = strstr(p, name);
1234 	while (p) {
1235 		char *end_p = strchr(p, ' ');
1236 		char *q;
1237 
1238 		if (!end_p)
1239 			end_p = p + strlen(p);
1240 
1241 		if (!suffix) {
1242 			int i;
1243 
1244 			/* skip the one with any known suffix */
1245 			for (i = 0; suffix_tbl[i]; i++) {
1246 				q = end_p - strlen(suffix_tbl[i]);
1247 				if (!strncmp(q, suffix_tbl[i],
1248 					     strlen(suffix_tbl[i])))
1249 					goto next;
1250 			}
1251 			ck_cmdline = p;
1252 		} else {
1253 			q = end_p - strlen(suffix);
1254 			if (!strncmp(q, suffix, strlen(suffix)))
1255 				ck_cmdline = p;
1256 		}
1257 next:
1258 		p = strstr(p+1, name);
1259 	}
1260 
1261 	if (!ck_cmdline)
1262 		return NULL;
1263 
1264 	return ck_cmdline;
1265 }
1266 
1267 static int __init __parse_crashkernel(char *cmdline,
1268 			     unsigned long long system_ram,
1269 			     unsigned long long *crash_size,
1270 			     unsigned long long *crash_base,
1271 			     const char *name,
1272 			     const char *suffix)
1273 {
1274 	char	*first_colon, *first_space;
1275 	char	*ck_cmdline;
1276 
1277 	BUG_ON(!crash_size || !crash_base);
1278 	*crash_size = 0;
1279 	*crash_base = 0;
1280 
1281 	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1282 
1283 	if (!ck_cmdline)
1284 		return -EINVAL;
1285 
1286 	ck_cmdline += strlen(name);
1287 
1288 	if (suffix)
1289 		return parse_crashkernel_suffix(ck_cmdline, crash_size,
1290 				suffix);
1291 	/*
1292 	 * if the commandline contains a ':', then that's the extended
1293 	 * syntax -- if not, it must be the classic syntax
1294 	 */
1295 	first_colon = strchr(ck_cmdline, ':');
1296 	first_space = strchr(ck_cmdline, ' ');
1297 	if (first_colon && (!first_space || first_colon < first_space))
1298 		return parse_crashkernel_mem(ck_cmdline, system_ram,
1299 				crash_size, crash_base);
1300 
1301 	return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1302 }
1303 
1304 /*
1305  * That function is the entry point for command line parsing and should be
1306  * called from the arch-specific code.
1307  */
1308 int __init parse_crashkernel(char *cmdline,
1309 			     unsigned long long system_ram,
1310 			     unsigned long long *crash_size,
1311 			     unsigned long long *crash_base)
1312 {
1313 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1314 					"crashkernel=", NULL);
1315 }
1316 
1317 int __init parse_crashkernel_high(char *cmdline,
1318 			     unsigned long long system_ram,
1319 			     unsigned long long *crash_size,
1320 			     unsigned long long *crash_base)
1321 {
1322 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1323 				"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1324 }
1325 
1326 int __init parse_crashkernel_low(char *cmdline,
1327 			     unsigned long long system_ram,
1328 			     unsigned long long *crash_size,
1329 			     unsigned long long *crash_base)
1330 {
1331 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1332 				"crashkernel=", suffix_tbl[SUFFIX_LOW]);
1333 }
1334 
1335 static void update_vmcoreinfo_note(void)
1336 {
1337 	u32 *buf = vmcoreinfo_note;
1338 
1339 	if (!vmcoreinfo_size)
1340 		return;
1341 	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1342 			      vmcoreinfo_size);
1343 	final_note(buf);
1344 }
1345 
1346 void crash_save_vmcoreinfo(void)
1347 {
1348 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1349 	update_vmcoreinfo_note();
1350 }
1351 
1352 void vmcoreinfo_append_str(const char *fmt, ...)
1353 {
1354 	va_list args;
1355 	char buf[0x50];
1356 	size_t r;
1357 
1358 	va_start(args, fmt);
1359 	r = vscnprintf(buf, sizeof(buf), fmt, args);
1360 	va_end(args);
1361 
1362 	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1363 
1364 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1365 
1366 	vmcoreinfo_size += r;
1367 }
1368 
1369 /*
1370  * provide an empty default implementation here -- architecture
1371  * code may override this
1372  */
1373 void __weak arch_crash_save_vmcoreinfo(void)
1374 {}
1375 
1376 unsigned long __weak paddr_vmcoreinfo_note(void)
1377 {
1378 	return __pa((unsigned long)(char *)&vmcoreinfo_note);
1379 }
1380 
1381 static int __init crash_save_vmcoreinfo_init(void)
1382 {
1383 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1384 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
1385 
1386 	VMCOREINFO_SYMBOL(init_uts_ns);
1387 	VMCOREINFO_SYMBOL(node_online_map);
1388 #ifdef CONFIG_MMU
1389 	VMCOREINFO_SYMBOL(swapper_pg_dir);
1390 #endif
1391 	VMCOREINFO_SYMBOL(_stext);
1392 	VMCOREINFO_SYMBOL(vmap_area_list);
1393 
1394 #ifndef CONFIG_NEED_MULTIPLE_NODES
1395 	VMCOREINFO_SYMBOL(mem_map);
1396 	VMCOREINFO_SYMBOL(contig_page_data);
1397 #endif
1398 #ifdef CONFIG_SPARSEMEM
1399 	VMCOREINFO_SYMBOL(mem_section);
1400 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1401 	VMCOREINFO_STRUCT_SIZE(mem_section);
1402 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
1403 #endif
1404 	VMCOREINFO_STRUCT_SIZE(page);
1405 	VMCOREINFO_STRUCT_SIZE(pglist_data);
1406 	VMCOREINFO_STRUCT_SIZE(zone);
1407 	VMCOREINFO_STRUCT_SIZE(free_area);
1408 	VMCOREINFO_STRUCT_SIZE(list_head);
1409 	VMCOREINFO_SIZE(nodemask_t);
1410 	VMCOREINFO_OFFSET(page, flags);
1411 	VMCOREINFO_OFFSET(page, _count);
1412 	VMCOREINFO_OFFSET(page, mapping);
1413 	VMCOREINFO_OFFSET(page, lru);
1414 	VMCOREINFO_OFFSET(page, _mapcount);
1415 	VMCOREINFO_OFFSET(page, private);
1416 	VMCOREINFO_OFFSET(pglist_data, node_zones);
1417 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
1418 #ifdef CONFIG_FLAT_NODE_MEM_MAP
1419 	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1420 #endif
1421 	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1422 	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1423 	VMCOREINFO_OFFSET(pglist_data, node_id);
1424 	VMCOREINFO_OFFSET(zone, free_area);
1425 	VMCOREINFO_OFFSET(zone, vm_stat);
1426 	VMCOREINFO_OFFSET(zone, spanned_pages);
1427 	VMCOREINFO_OFFSET(free_area, free_list);
1428 	VMCOREINFO_OFFSET(list_head, next);
1429 	VMCOREINFO_OFFSET(list_head, prev);
1430 	VMCOREINFO_OFFSET(vmap_area, va_start);
1431 	VMCOREINFO_OFFSET(vmap_area, list);
1432 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1433 	log_buf_kexec_setup();
1434 	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1435 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
1436 	VMCOREINFO_NUMBER(PG_lru);
1437 	VMCOREINFO_NUMBER(PG_private);
1438 	VMCOREINFO_NUMBER(PG_swapcache);
1439 	VMCOREINFO_NUMBER(PG_slab);
1440 #ifdef CONFIG_MEMORY_FAILURE
1441 	VMCOREINFO_NUMBER(PG_hwpoison);
1442 #endif
1443 	VMCOREINFO_NUMBER(PG_head_mask);
1444 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1445 #ifdef CONFIG_X86
1446 	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
1447 #endif
1448 #ifdef CONFIG_HUGETLBFS
1449 	VMCOREINFO_SYMBOL(free_huge_page);
1450 #endif
1451 
1452 	arch_crash_save_vmcoreinfo();
1453 	update_vmcoreinfo_note();
1454 
1455 	return 0;
1456 }
1457 
1458 subsys_initcall(crash_save_vmcoreinfo_init);
1459 
1460 /*
1461  * Move into place and start executing a preloaded standalone
1462  * executable.  If nothing was preloaded return an error.
1463  */
1464 int kernel_kexec(void)
1465 {
1466 	int error = 0;
1467 
1468 	if (!mutex_trylock(&kexec_mutex))
1469 		return -EBUSY;
1470 	if (!kexec_image) {
1471 		error = -EINVAL;
1472 		goto Unlock;
1473 	}
1474 
1475 #ifdef CONFIG_KEXEC_JUMP
1476 	if (kexec_image->preserve_context) {
1477 		lock_system_sleep();
1478 		pm_prepare_console();
1479 		error = freeze_processes();
1480 		if (error) {
1481 			error = -EBUSY;
1482 			goto Restore_console;
1483 		}
1484 		suspend_console();
1485 		error = dpm_suspend_start(PMSG_FREEZE);
1486 		if (error)
1487 			goto Resume_console;
1488 		/* At this point, dpm_suspend_start() has been called,
1489 		 * but *not* dpm_suspend_end(). We *must* call
1490 		 * dpm_suspend_end() now.  Otherwise, drivers for
1491 		 * some devices (e.g. interrupt controllers) become
1492 		 * desynchronized with the actual state of the
1493 		 * hardware at resume time, and evil weirdness ensues.
1494 		 */
1495 		error = dpm_suspend_end(PMSG_FREEZE);
1496 		if (error)
1497 			goto Resume_devices;
1498 		error = disable_nonboot_cpus();
1499 		if (error)
1500 			goto Enable_cpus;
1501 		local_irq_disable();
1502 		error = syscore_suspend();
1503 		if (error)
1504 			goto Enable_irqs;
1505 	} else
1506 #endif
1507 	{
1508 		kexec_in_progress = true;
1509 		kernel_restart_prepare(NULL);
1510 		migrate_to_reboot_cpu();
1511 
1512 		/*
1513 		 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1514 		 * no further code needs to use CPU hotplug (which is true in
1515 		 * the reboot case). However, the kexec path depends on using
1516 		 * CPU hotplug again; so re-enable it here.
1517 		 */
1518 		cpu_hotplug_enable();
1519 		pr_emerg("Starting new kernel\n");
1520 		machine_shutdown();
1521 	}
1522 
1523 	machine_kexec(kexec_image);
1524 
1525 #ifdef CONFIG_KEXEC_JUMP
1526 	if (kexec_image->preserve_context) {
1527 		syscore_resume();
1528  Enable_irqs:
1529 		local_irq_enable();
1530  Enable_cpus:
1531 		enable_nonboot_cpus();
1532 		dpm_resume_start(PMSG_RESTORE);
1533  Resume_devices:
1534 		dpm_resume_end(PMSG_RESTORE);
1535  Resume_console:
1536 		resume_console();
1537 		thaw_processes();
1538  Restore_console:
1539 		pm_restore_console();
1540 		unlock_system_sleep();
1541 	}
1542 #endif
1543 
1544  Unlock:
1545 	mutex_unlock(&kexec_mutex);
1546 	return error;
1547 }
1548 
1549 /*
1550  * Add and remove page tables for crashkernel memory
1551  *
1552  * Provide an empty default implementation here -- architecture
1553  * code may override this
1554  */
1555 void __weak crash_map_reserved_pages(void)
1556 {}
1557 
1558 void __weak crash_unmap_reserved_pages(void)
1559 {}
1560