xref: /openbmc/linux/kernel/kexec_core.c (revision 110e6f26)
1 /*
2  * kexec.c - kexec system call core code.
3  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
4  *
5  * This source code is licensed under the GNU General Public License,
6  * Version 2.  See the file COPYING for more details.
7  */
8 
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/capability.h>
12 #include <linux/mm.h>
13 #include <linux/file.h>
14 #include <linux/slab.h>
15 #include <linux/fs.h>
16 #include <linux/kexec.h>
17 #include <linux/mutex.h>
18 #include <linux/list.h>
19 #include <linux/highmem.h>
20 #include <linux/syscalls.h>
21 #include <linux/reboot.h>
22 #include <linux/ioport.h>
23 #include <linux/hardirq.h>
24 #include <linux/elf.h>
25 #include <linux/elfcore.h>
26 #include <linux/utsname.h>
27 #include <linux/numa.h>
28 #include <linux/suspend.h>
29 #include <linux/device.h>
30 #include <linux/freezer.h>
31 #include <linux/pm.h>
32 #include <linux/cpu.h>
33 #include <linux/uaccess.h>
34 #include <linux/io.h>
35 #include <linux/console.h>
36 #include <linux/vmalloc.h>
37 #include <linux/swap.h>
38 #include <linux/syscore_ops.h>
39 #include <linux/compiler.h>
40 #include <linux/hugetlb.h>
41 
42 #include <asm/page.h>
43 #include <asm/sections.h>
44 
45 #include <crypto/hash.h>
46 #include <crypto/sha.h>
47 #include "kexec_internal.h"
48 
49 DEFINE_MUTEX(kexec_mutex);
50 
51 /* Per cpu memory for storing cpu states in case of system crash. */
52 note_buf_t __percpu *crash_notes;
53 
54 /* vmcoreinfo stuff */
55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
57 size_t vmcoreinfo_size;
58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
59 
60 /* Flag to indicate we are going to kexec a new kernel */
61 bool kexec_in_progress = false;
62 
63 
64 /* Location of the reserved area for the crash kernel */
65 struct resource crashk_res = {
66 	.name  = "Crash kernel",
67 	.start = 0,
68 	.end   = 0,
69 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
70 	.desc  = IORES_DESC_CRASH_KERNEL
71 };
72 struct resource crashk_low_res = {
73 	.name  = "Crash kernel",
74 	.start = 0,
75 	.end   = 0,
76 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
77 	.desc  = IORES_DESC_CRASH_KERNEL
78 };
79 
80 int kexec_should_crash(struct task_struct *p)
81 {
82 	/*
83 	 * If crash_kexec_post_notifiers is enabled, don't run
84 	 * crash_kexec() here yet, which must be run after panic
85 	 * notifiers in panic().
86 	 */
87 	if (crash_kexec_post_notifiers)
88 		return 0;
89 	/*
90 	 * There are 4 panic() calls in do_exit() path, each of which
91 	 * corresponds to each of these 4 conditions.
92 	 */
93 	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
94 		return 1;
95 	return 0;
96 }
97 
98 /*
99  * When kexec transitions to the new kernel there is a one-to-one
100  * mapping between physical and virtual addresses.  On processors
101  * where you can disable the MMU this is trivial, and easy.  For
102  * others it is still a simple predictable page table to setup.
103  *
104  * In that environment kexec copies the new kernel to its final
105  * resting place.  This means I can only support memory whose
106  * physical address can fit in an unsigned long.  In particular
107  * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
108  * If the assembly stub has more restrictive requirements
109  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
110  * defined more restrictively in <asm/kexec.h>.
111  *
112  * The code for the transition from the current kernel to the
113  * the new kernel is placed in the control_code_buffer, whose size
114  * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
115  * page of memory is necessary, but some architectures require more.
116  * Because this memory must be identity mapped in the transition from
117  * virtual to physical addresses it must live in the range
118  * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
119  * modifiable.
120  *
121  * The assembly stub in the control code buffer is passed a linked list
122  * of descriptor pages detailing the source pages of the new kernel,
123  * and the destination addresses of those source pages.  As this data
124  * structure is not used in the context of the current OS, it must
125  * be self-contained.
126  *
127  * The code has been made to work with highmem pages and will use a
128  * destination page in its final resting place (if it happens
129  * to allocate it).  The end product of this is that most of the
130  * physical address space, and most of RAM can be used.
131  *
132  * Future directions include:
133  *  - allocating a page table with the control code buffer identity
134  *    mapped, to simplify machine_kexec and make kexec_on_panic more
135  *    reliable.
136  */
137 
138 /*
139  * KIMAGE_NO_DEST is an impossible destination address..., for
140  * allocating pages whose destination address we do not care about.
141  */
142 #define KIMAGE_NO_DEST (-1UL)
143 
144 static struct page *kimage_alloc_page(struct kimage *image,
145 				       gfp_t gfp_mask,
146 				       unsigned long dest);
147 
148 int sanity_check_segment_list(struct kimage *image)
149 {
150 	int result, i;
151 	unsigned long nr_segments = image->nr_segments;
152 
153 	/*
154 	 * Verify we have good destination addresses.  The caller is
155 	 * responsible for making certain we don't attempt to load
156 	 * the new image into invalid or reserved areas of RAM.  This
157 	 * just verifies it is an address we can use.
158 	 *
159 	 * Since the kernel does everything in page size chunks ensure
160 	 * the destination addresses are page aligned.  Too many
161 	 * special cases crop of when we don't do this.  The most
162 	 * insidious is getting overlapping destination addresses
163 	 * simply because addresses are changed to page size
164 	 * granularity.
165 	 */
166 	result = -EADDRNOTAVAIL;
167 	for (i = 0; i < nr_segments; i++) {
168 		unsigned long mstart, mend;
169 
170 		mstart = image->segment[i].mem;
171 		mend   = mstart + image->segment[i].memsz;
172 		if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
173 			return result;
174 		if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
175 			return result;
176 	}
177 
178 	/* Verify our destination addresses do not overlap.
179 	 * If we alloed overlapping destination addresses
180 	 * through very weird things can happen with no
181 	 * easy explanation as one segment stops on another.
182 	 */
183 	result = -EINVAL;
184 	for (i = 0; i < nr_segments; i++) {
185 		unsigned long mstart, mend;
186 		unsigned long j;
187 
188 		mstart = image->segment[i].mem;
189 		mend   = mstart + image->segment[i].memsz;
190 		for (j = 0; j < i; j++) {
191 			unsigned long pstart, pend;
192 
193 			pstart = image->segment[j].mem;
194 			pend   = pstart + image->segment[j].memsz;
195 			/* Do the segments overlap ? */
196 			if ((mend > pstart) && (mstart < pend))
197 				return result;
198 		}
199 	}
200 
201 	/* Ensure our buffer sizes are strictly less than
202 	 * our memory sizes.  This should always be the case,
203 	 * and it is easier to check up front than to be surprised
204 	 * later on.
205 	 */
206 	result = -EINVAL;
207 	for (i = 0; i < nr_segments; i++) {
208 		if (image->segment[i].bufsz > image->segment[i].memsz)
209 			return result;
210 	}
211 
212 	/*
213 	 * Verify we have good destination addresses.  Normally
214 	 * the caller is responsible for making certain we don't
215 	 * attempt to load the new image into invalid or reserved
216 	 * areas of RAM.  But crash kernels are preloaded into a
217 	 * reserved area of ram.  We must ensure the addresses
218 	 * are in the reserved area otherwise preloading the
219 	 * kernel could corrupt things.
220 	 */
221 
222 	if (image->type == KEXEC_TYPE_CRASH) {
223 		result = -EADDRNOTAVAIL;
224 		for (i = 0; i < nr_segments; i++) {
225 			unsigned long mstart, mend;
226 
227 			mstart = image->segment[i].mem;
228 			mend = mstart + image->segment[i].memsz - 1;
229 			/* Ensure we are within the crash kernel limits */
230 			if ((mstart < crashk_res.start) ||
231 			    (mend > crashk_res.end))
232 				return result;
233 		}
234 	}
235 
236 	return 0;
237 }
238 
239 struct kimage *do_kimage_alloc_init(void)
240 {
241 	struct kimage *image;
242 
243 	/* Allocate a controlling structure */
244 	image = kzalloc(sizeof(*image), GFP_KERNEL);
245 	if (!image)
246 		return NULL;
247 
248 	image->head = 0;
249 	image->entry = &image->head;
250 	image->last_entry = &image->head;
251 	image->control_page = ~0; /* By default this does not apply */
252 	image->type = KEXEC_TYPE_DEFAULT;
253 
254 	/* Initialize the list of control pages */
255 	INIT_LIST_HEAD(&image->control_pages);
256 
257 	/* Initialize the list of destination pages */
258 	INIT_LIST_HEAD(&image->dest_pages);
259 
260 	/* Initialize the list of unusable pages */
261 	INIT_LIST_HEAD(&image->unusable_pages);
262 
263 	return image;
264 }
265 
266 int kimage_is_destination_range(struct kimage *image,
267 					unsigned long start,
268 					unsigned long end)
269 {
270 	unsigned long i;
271 
272 	for (i = 0; i < image->nr_segments; i++) {
273 		unsigned long mstart, mend;
274 
275 		mstart = image->segment[i].mem;
276 		mend = mstart + image->segment[i].memsz;
277 		if ((end > mstart) && (start < mend))
278 			return 1;
279 	}
280 
281 	return 0;
282 }
283 
284 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
285 {
286 	struct page *pages;
287 
288 	pages = alloc_pages(gfp_mask, order);
289 	if (pages) {
290 		unsigned int count, i;
291 
292 		pages->mapping = NULL;
293 		set_page_private(pages, order);
294 		count = 1 << order;
295 		for (i = 0; i < count; i++)
296 			SetPageReserved(pages + i);
297 	}
298 
299 	return pages;
300 }
301 
302 static void kimage_free_pages(struct page *page)
303 {
304 	unsigned int order, count, i;
305 
306 	order = page_private(page);
307 	count = 1 << order;
308 	for (i = 0; i < count; i++)
309 		ClearPageReserved(page + i);
310 	__free_pages(page, order);
311 }
312 
313 void kimage_free_page_list(struct list_head *list)
314 {
315 	struct page *page, *next;
316 
317 	list_for_each_entry_safe(page, next, list, lru) {
318 		list_del(&page->lru);
319 		kimage_free_pages(page);
320 	}
321 }
322 
323 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
324 							unsigned int order)
325 {
326 	/* Control pages are special, they are the intermediaries
327 	 * that are needed while we copy the rest of the pages
328 	 * to their final resting place.  As such they must
329 	 * not conflict with either the destination addresses
330 	 * or memory the kernel is already using.
331 	 *
332 	 * The only case where we really need more than one of
333 	 * these are for architectures where we cannot disable
334 	 * the MMU and must instead generate an identity mapped
335 	 * page table for all of the memory.
336 	 *
337 	 * At worst this runs in O(N) of the image size.
338 	 */
339 	struct list_head extra_pages;
340 	struct page *pages;
341 	unsigned int count;
342 
343 	count = 1 << order;
344 	INIT_LIST_HEAD(&extra_pages);
345 
346 	/* Loop while I can allocate a page and the page allocated
347 	 * is a destination page.
348 	 */
349 	do {
350 		unsigned long pfn, epfn, addr, eaddr;
351 
352 		pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
353 		if (!pages)
354 			break;
355 		pfn   = page_to_pfn(pages);
356 		epfn  = pfn + count;
357 		addr  = pfn << PAGE_SHIFT;
358 		eaddr = epfn << PAGE_SHIFT;
359 		if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
360 			      kimage_is_destination_range(image, addr, eaddr)) {
361 			list_add(&pages->lru, &extra_pages);
362 			pages = NULL;
363 		}
364 	} while (!pages);
365 
366 	if (pages) {
367 		/* Remember the allocated page... */
368 		list_add(&pages->lru, &image->control_pages);
369 
370 		/* Because the page is already in it's destination
371 		 * location we will never allocate another page at
372 		 * that address.  Therefore kimage_alloc_pages
373 		 * will not return it (again) and we don't need
374 		 * to give it an entry in image->segment[].
375 		 */
376 	}
377 	/* Deal with the destination pages I have inadvertently allocated.
378 	 *
379 	 * Ideally I would convert multi-page allocations into single
380 	 * page allocations, and add everything to image->dest_pages.
381 	 *
382 	 * For now it is simpler to just free the pages.
383 	 */
384 	kimage_free_page_list(&extra_pages);
385 
386 	return pages;
387 }
388 
389 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
390 						      unsigned int order)
391 {
392 	/* Control pages are special, they are the intermediaries
393 	 * that are needed while we copy the rest of the pages
394 	 * to their final resting place.  As such they must
395 	 * not conflict with either the destination addresses
396 	 * or memory the kernel is already using.
397 	 *
398 	 * Control pages are also the only pags we must allocate
399 	 * when loading a crash kernel.  All of the other pages
400 	 * are specified by the segments and we just memcpy
401 	 * into them directly.
402 	 *
403 	 * The only case where we really need more than one of
404 	 * these are for architectures where we cannot disable
405 	 * the MMU and must instead generate an identity mapped
406 	 * page table for all of the memory.
407 	 *
408 	 * Given the low demand this implements a very simple
409 	 * allocator that finds the first hole of the appropriate
410 	 * size in the reserved memory region, and allocates all
411 	 * of the memory up to and including the hole.
412 	 */
413 	unsigned long hole_start, hole_end, size;
414 	struct page *pages;
415 
416 	pages = NULL;
417 	size = (1 << order) << PAGE_SHIFT;
418 	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
419 	hole_end   = hole_start + size - 1;
420 	while (hole_end <= crashk_res.end) {
421 		unsigned long i;
422 
423 		if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
424 			break;
425 		/* See if I overlap any of the segments */
426 		for (i = 0; i < image->nr_segments; i++) {
427 			unsigned long mstart, mend;
428 
429 			mstart = image->segment[i].mem;
430 			mend   = mstart + image->segment[i].memsz - 1;
431 			if ((hole_end >= mstart) && (hole_start <= mend)) {
432 				/* Advance the hole to the end of the segment */
433 				hole_start = (mend + (size - 1)) & ~(size - 1);
434 				hole_end   = hole_start + size - 1;
435 				break;
436 			}
437 		}
438 		/* If I don't overlap any segments I have found my hole! */
439 		if (i == image->nr_segments) {
440 			pages = pfn_to_page(hole_start >> PAGE_SHIFT);
441 			image->control_page = hole_end;
442 			break;
443 		}
444 	}
445 
446 	return pages;
447 }
448 
449 
450 struct page *kimage_alloc_control_pages(struct kimage *image,
451 					 unsigned int order)
452 {
453 	struct page *pages = NULL;
454 
455 	switch (image->type) {
456 	case KEXEC_TYPE_DEFAULT:
457 		pages = kimage_alloc_normal_control_pages(image, order);
458 		break;
459 	case KEXEC_TYPE_CRASH:
460 		pages = kimage_alloc_crash_control_pages(image, order);
461 		break;
462 	}
463 
464 	return pages;
465 }
466 
467 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
468 {
469 	if (*image->entry != 0)
470 		image->entry++;
471 
472 	if (image->entry == image->last_entry) {
473 		kimage_entry_t *ind_page;
474 		struct page *page;
475 
476 		page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
477 		if (!page)
478 			return -ENOMEM;
479 
480 		ind_page = page_address(page);
481 		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
482 		image->entry = ind_page;
483 		image->last_entry = ind_page +
484 				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
485 	}
486 	*image->entry = entry;
487 	image->entry++;
488 	*image->entry = 0;
489 
490 	return 0;
491 }
492 
493 static int kimage_set_destination(struct kimage *image,
494 				   unsigned long destination)
495 {
496 	int result;
497 
498 	destination &= PAGE_MASK;
499 	result = kimage_add_entry(image, destination | IND_DESTINATION);
500 
501 	return result;
502 }
503 
504 
505 static int kimage_add_page(struct kimage *image, unsigned long page)
506 {
507 	int result;
508 
509 	page &= PAGE_MASK;
510 	result = kimage_add_entry(image, page | IND_SOURCE);
511 
512 	return result;
513 }
514 
515 
516 static void kimage_free_extra_pages(struct kimage *image)
517 {
518 	/* Walk through and free any extra destination pages I may have */
519 	kimage_free_page_list(&image->dest_pages);
520 
521 	/* Walk through and free any unusable pages I have cached */
522 	kimage_free_page_list(&image->unusable_pages);
523 
524 }
525 void kimage_terminate(struct kimage *image)
526 {
527 	if (*image->entry != 0)
528 		image->entry++;
529 
530 	*image->entry = IND_DONE;
531 }
532 
533 #define for_each_kimage_entry(image, ptr, entry) \
534 	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
535 		ptr = (entry & IND_INDIRECTION) ? \
536 			phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
537 
538 static void kimage_free_entry(kimage_entry_t entry)
539 {
540 	struct page *page;
541 
542 	page = pfn_to_page(entry >> PAGE_SHIFT);
543 	kimage_free_pages(page);
544 }
545 
546 void kimage_free(struct kimage *image)
547 {
548 	kimage_entry_t *ptr, entry;
549 	kimage_entry_t ind = 0;
550 
551 	if (!image)
552 		return;
553 
554 	kimage_free_extra_pages(image);
555 	for_each_kimage_entry(image, ptr, entry) {
556 		if (entry & IND_INDIRECTION) {
557 			/* Free the previous indirection page */
558 			if (ind & IND_INDIRECTION)
559 				kimage_free_entry(ind);
560 			/* Save this indirection page until we are
561 			 * done with it.
562 			 */
563 			ind = entry;
564 		} else if (entry & IND_SOURCE)
565 			kimage_free_entry(entry);
566 	}
567 	/* Free the final indirection page */
568 	if (ind & IND_INDIRECTION)
569 		kimage_free_entry(ind);
570 
571 	/* Handle any machine specific cleanup */
572 	machine_kexec_cleanup(image);
573 
574 	/* Free the kexec control pages... */
575 	kimage_free_page_list(&image->control_pages);
576 
577 	/*
578 	 * Free up any temporary buffers allocated. This might hit if
579 	 * error occurred much later after buffer allocation.
580 	 */
581 	if (image->file_mode)
582 		kimage_file_post_load_cleanup(image);
583 
584 	kfree(image);
585 }
586 
587 static kimage_entry_t *kimage_dst_used(struct kimage *image,
588 					unsigned long page)
589 {
590 	kimage_entry_t *ptr, entry;
591 	unsigned long destination = 0;
592 
593 	for_each_kimage_entry(image, ptr, entry) {
594 		if (entry & IND_DESTINATION)
595 			destination = entry & PAGE_MASK;
596 		else if (entry & IND_SOURCE) {
597 			if (page == destination)
598 				return ptr;
599 			destination += PAGE_SIZE;
600 		}
601 	}
602 
603 	return NULL;
604 }
605 
606 static struct page *kimage_alloc_page(struct kimage *image,
607 					gfp_t gfp_mask,
608 					unsigned long destination)
609 {
610 	/*
611 	 * Here we implement safeguards to ensure that a source page
612 	 * is not copied to its destination page before the data on
613 	 * the destination page is no longer useful.
614 	 *
615 	 * To do this we maintain the invariant that a source page is
616 	 * either its own destination page, or it is not a
617 	 * destination page at all.
618 	 *
619 	 * That is slightly stronger than required, but the proof
620 	 * that no problems will not occur is trivial, and the
621 	 * implementation is simply to verify.
622 	 *
623 	 * When allocating all pages normally this algorithm will run
624 	 * in O(N) time, but in the worst case it will run in O(N^2)
625 	 * time.   If the runtime is a problem the data structures can
626 	 * be fixed.
627 	 */
628 	struct page *page;
629 	unsigned long addr;
630 
631 	/*
632 	 * Walk through the list of destination pages, and see if I
633 	 * have a match.
634 	 */
635 	list_for_each_entry(page, &image->dest_pages, lru) {
636 		addr = page_to_pfn(page) << PAGE_SHIFT;
637 		if (addr == destination) {
638 			list_del(&page->lru);
639 			return page;
640 		}
641 	}
642 	page = NULL;
643 	while (1) {
644 		kimage_entry_t *old;
645 
646 		/* Allocate a page, if we run out of memory give up */
647 		page = kimage_alloc_pages(gfp_mask, 0);
648 		if (!page)
649 			return NULL;
650 		/* If the page cannot be used file it away */
651 		if (page_to_pfn(page) >
652 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
653 			list_add(&page->lru, &image->unusable_pages);
654 			continue;
655 		}
656 		addr = page_to_pfn(page) << PAGE_SHIFT;
657 
658 		/* If it is the destination page we want use it */
659 		if (addr == destination)
660 			break;
661 
662 		/* If the page is not a destination page use it */
663 		if (!kimage_is_destination_range(image, addr,
664 						  addr + PAGE_SIZE))
665 			break;
666 
667 		/*
668 		 * I know that the page is someones destination page.
669 		 * See if there is already a source page for this
670 		 * destination page.  And if so swap the source pages.
671 		 */
672 		old = kimage_dst_used(image, addr);
673 		if (old) {
674 			/* If so move it */
675 			unsigned long old_addr;
676 			struct page *old_page;
677 
678 			old_addr = *old & PAGE_MASK;
679 			old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
680 			copy_highpage(page, old_page);
681 			*old = addr | (*old & ~PAGE_MASK);
682 
683 			/* The old page I have found cannot be a
684 			 * destination page, so return it if it's
685 			 * gfp_flags honor the ones passed in.
686 			 */
687 			if (!(gfp_mask & __GFP_HIGHMEM) &&
688 			    PageHighMem(old_page)) {
689 				kimage_free_pages(old_page);
690 				continue;
691 			}
692 			addr = old_addr;
693 			page = old_page;
694 			break;
695 		}
696 		/* Place the page on the destination list, to be used later */
697 		list_add(&page->lru, &image->dest_pages);
698 	}
699 
700 	return page;
701 }
702 
703 static int kimage_load_normal_segment(struct kimage *image,
704 					 struct kexec_segment *segment)
705 {
706 	unsigned long maddr;
707 	size_t ubytes, mbytes;
708 	int result;
709 	unsigned char __user *buf = NULL;
710 	unsigned char *kbuf = NULL;
711 
712 	result = 0;
713 	if (image->file_mode)
714 		kbuf = segment->kbuf;
715 	else
716 		buf = segment->buf;
717 	ubytes = segment->bufsz;
718 	mbytes = segment->memsz;
719 	maddr = segment->mem;
720 
721 	result = kimage_set_destination(image, maddr);
722 	if (result < 0)
723 		goto out;
724 
725 	while (mbytes) {
726 		struct page *page;
727 		char *ptr;
728 		size_t uchunk, mchunk;
729 
730 		page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
731 		if (!page) {
732 			result  = -ENOMEM;
733 			goto out;
734 		}
735 		result = kimage_add_page(image, page_to_pfn(page)
736 								<< PAGE_SHIFT);
737 		if (result < 0)
738 			goto out;
739 
740 		ptr = kmap(page);
741 		/* Start with a clear page */
742 		clear_page(ptr);
743 		ptr += maddr & ~PAGE_MASK;
744 		mchunk = min_t(size_t, mbytes,
745 				PAGE_SIZE - (maddr & ~PAGE_MASK));
746 		uchunk = min(ubytes, mchunk);
747 
748 		/* For file based kexec, source pages are in kernel memory */
749 		if (image->file_mode)
750 			memcpy(ptr, kbuf, uchunk);
751 		else
752 			result = copy_from_user(ptr, buf, uchunk);
753 		kunmap(page);
754 		if (result) {
755 			result = -EFAULT;
756 			goto out;
757 		}
758 		ubytes -= uchunk;
759 		maddr  += mchunk;
760 		if (image->file_mode)
761 			kbuf += mchunk;
762 		else
763 			buf += mchunk;
764 		mbytes -= mchunk;
765 	}
766 out:
767 	return result;
768 }
769 
770 static int kimage_load_crash_segment(struct kimage *image,
771 					struct kexec_segment *segment)
772 {
773 	/* For crash dumps kernels we simply copy the data from
774 	 * user space to it's destination.
775 	 * We do things a page at a time for the sake of kmap.
776 	 */
777 	unsigned long maddr;
778 	size_t ubytes, mbytes;
779 	int result;
780 	unsigned char __user *buf = NULL;
781 	unsigned char *kbuf = NULL;
782 
783 	result = 0;
784 	if (image->file_mode)
785 		kbuf = segment->kbuf;
786 	else
787 		buf = segment->buf;
788 	ubytes = segment->bufsz;
789 	mbytes = segment->memsz;
790 	maddr = segment->mem;
791 	while (mbytes) {
792 		struct page *page;
793 		char *ptr;
794 		size_t uchunk, mchunk;
795 
796 		page = pfn_to_page(maddr >> PAGE_SHIFT);
797 		if (!page) {
798 			result  = -ENOMEM;
799 			goto out;
800 		}
801 		ptr = kmap(page);
802 		ptr += maddr & ~PAGE_MASK;
803 		mchunk = min_t(size_t, mbytes,
804 				PAGE_SIZE - (maddr & ~PAGE_MASK));
805 		uchunk = min(ubytes, mchunk);
806 		if (mchunk > uchunk) {
807 			/* Zero the trailing part of the page */
808 			memset(ptr + uchunk, 0, mchunk - uchunk);
809 		}
810 
811 		/* For file based kexec, source pages are in kernel memory */
812 		if (image->file_mode)
813 			memcpy(ptr, kbuf, uchunk);
814 		else
815 			result = copy_from_user(ptr, buf, uchunk);
816 		kexec_flush_icache_page(page);
817 		kunmap(page);
818 		if (result) {
819 			result = -EFAULT;
820 			goto out;
821 		}
822 		ubytes -= uchunk;
823 		maddr  += mchunk;
824 		if (image->file_mode)
825 			kbuf += mchunk;
826 		else
827 			buf += mchunk;
828 		mbytes -= mchunk;
829 	}
830 out:
831 	return result;
832 }
833 
834 int kimage_load_segment(struct kimage *image,
835 				struct kexec_segment *segment)
836 {
837 	int result = -ENOMEM;
838 
839 	switch (image->type) {
840 	case KEXEC_TYPE_DEFAULT:
841 		result = kimage_load_normal_segment(image, segment);
842 		break;
843 	case KEXEC_TYPE_CRASH:
844 		result = kimage_load_crash_segment(image, segment);
845 		break;
846 	}
847 
848 	return result;
849 }
850 
851 struct kimage *kexec_image;
852 struct kimage *kexec_crash_image;
853 int kexec_load_disabled;
854 
855 /*
856  * No panic_cpu check version of crash_kexec().  This function is called
857  * only when panic_cpu holds the current CPU number; this is the only CPU
858  * which processes crash_kexec routines.
859  */
860 void __crash_kexec(struct pt_regs *regs)
861 {
862 	/* Take the kexec_mutex here to prevent sys_kexec_load
863 	 * running on one cpu from replacing the crash kernel
864 	 * we are using after a panic on a different cpu.
865 	 *
866 	 * If the crash kernel was not located in a fixed area
867 	 * of memory the xchg(&kexec_crash_image) would be
868 	 * sufficient.  But since I reuse the memory...
869 	 */
870 	if (mutex_trylock(&kexec_mutex)) {
871 		if (kexec_crash_image) {
872 			struct pt_regs fixed_regs;
873 
874 			crash_setup_regs(&fixed_regs, regs);
875 			crash_save_vmcoreinfo();
876 			machine_crash_shutdown(&fixed_regs);
877 			machine_kexec(kexec_crash_image);
878 		}
879 		mutex_unlock(&kexec_mutex);
880 	}
881 }
882 
883 void crash_kexec(struct pt_regs *regs)
884 {
885 	int old_cpu, this_cpu;
886 
887 	/*
888 	 * Only one CPU is allowed to execute the crash_kexec() code as with
889 	 * panic().  Otherwise parallel calls of panic() and crash_kexec()
890 	 * may stop each other.  To exclude them, we use panic_cpu here too.
891 	 */
892 	this_cpu = raw_smp_processor_id();
893 	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
894 	if (old_cpu == PANIC_CPU_INVALID) {
895 		/* This is the 1st CPU which comes here, so go ahead. */
896 		__crash_kexec(regs);
897 
898 		/*
899 		 * Reset panic_cpu to allow another panic()/crash_kexec()
900 		 * call.
901 		 */
902 		atomic_set(&panic_cpu, PANIC_CPU_INVALID);
903 	}
904 }
905 
906 size_t crash_get_memory_size(void)
907 {
908 	size_t size = 0;
909 
910 	mutex_lock(&kexec_mutex);
911 	if (crashk_res.end != crashk_res.start)
912 		size = resource_size(&crashk_res);
913 	mutex_unlock(&kexec_mutex);
914 	return size;
915 }
916 
917 void __weak crash_free_reserved_phys_range(unsigned long begin,
918 					   unsigned long end)
919 {
920 	unsigned long addr;
921 
922 	for (addr = begin; addr < end; addr += PAGE_SIZE)
923 		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
924 }
925 
926 int crash_shrink_memory(unsigned long new_size)
927 {
928 	int ret = 0;
929 	unsigned long start, end;
930 	unsigned long old_size;
931 	struct resource *ram_res;
932 
933 	mutex_lock(&kexec_mutex);
934 
935 	if (kexec_crash_image) {
936 		ret = -ENOENT;
937 		goto unlock;
938 	}
939 	start = crashk_res.start;
940 	end = crashk_res.end;
941 	old_size = (end == 0) ? 0 : end - start + 1;
942 	if (new_size >= old_size) {
943 		ret = (new_size == old_size) ? 0 : -EINVAL;
944 		goto unlock;
945 	}
946 
947 	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
948 	if (!ram_res) {
949 		ret = -ENOMEM;
950 		goto unlock;
951 	}
952 
953 	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
954 	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
955 
956 	crash_map_reserved_pages();
957 	crash_free_reserved_phys_range(end, crashk_res.end);
958 
959 	if ((start == end) && (crashk_res.parent != NULL))
960 		release_resource(&crashk_res);
961 
962 	ram_res->start = end;
963 	ram_res->end = crashk_res.end;
964 	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
965 	ram_res->name = "System RAM";
966 
967 	crashk_res.end = end - 1;
968 
969 	insert_resource(&iomem_resource, ram_res);
970 	crash_unmap_reserved_pages();
971 
972 unlock:
973 	mutex_unlock(&kexec_mutex);
974 	return ret;
975 }
976 
977 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
978 			    size_t data_len)
979 {
980 	struct elf_note note;
981 
982 	note.n_namesz = strlen(name) + 1;
983 	note.n_descsz = data_len;
984 	note.n_type   = type;
985 	memcpy(buf, &note, sizeof(note));
986 	buf += (sizeof(note) + 3)/4;
987 	memcpy(buf, name, note.n_namesz);
988 	buf += (note.n_namesz + 3)/4;
989 	memcpy(buf, data, note.n_descsz);
990 	buf += (note.n_descsz + 3)/4;
991 
992 	return buf;
993 }
994 
995 static void final_note(u32 *buf)
996 {
997 	struct elf_note note;
998 
999 	note.n_namesz = 0;
1000 	note.n_descsz = 0;
1001 	note.n_type   = 0;
1002 	memcpy(buf, &note, sizeof(note));
1003 }
1004 
1005 void crash_save_cpu(struct pt_regs *regs, int cpu)
1006 {
1007 	struct elf_prstatus prstatus;
1008 	u32 *buf;
1009 
1010 	if ((cpu < 0) || (cpu >= nr_cpu_ids))
1011 		return;
1012 
1013 	/* Using ELF notes here is opportunistic.
1014 	 * I need a well defined structure format
1015 	 * for the data I pass, and I need tags
1016 	 * on the data to indicate what information I have
1017 	 * squirrelled away.  ELF notes happen to provide
1018 	 * all of that, so there is no need to invent something new.
1019 	 */
1020 	buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
1021 	if (!buf)
1022 		return;
1023 	memset(&prstatus, 0, sizeof(prstatus));
1024 	prstatus.pr_pid = current->pid;
1025 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1026 	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1027 			      &prstatus, sizeof(prstatus));
1028 	final_note(buf);
1029 }
1030 
1031 static int __init crash_notes_memory_init(void)
1032 {
1033 	/* Allocate memory for saving cpu registers. */
1034 	size_t size, align;
1035 
1036 	/*
1037 	 * crash_notes could be allocated across 2 vmalloc pages when percpu
1038 	 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
1039 	 * pages are also on 2 continuous physical pages. In this case the
1040 	 * 2nd part of crash_notes in 2nd page could be lost since only the
1041 	 * starting address and size of crash_notes are exported through sysfs.
1042 	 * Here round up the size of crash_notes to the nearest power of two
1043 	 * and pass it to __alloc_percpu as align value. This can make sure
1044 	 * crash_notes is allocated inside one physical page.
1045 	 */
1046 	size = sizeof(note_buf_t);
1047 	align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
1048 
1049 	/*
1050 	 * Break compile if size is bigger than PAGE_SIZE since crash_notes
1051 	 * definitely will be in 2 pages with that.
1052 	 */
1053 	BUILD_BUG_ON(size > PAGE_SIZE);
1054 
1055 	crash_notes = __alloc_percpu(size, align);
1056 	if (!crash_notes) {
1057 		pr_warn("Memory allocation for saving cpu register states failed\n");
1058 		return -ENOMEM;
1059 	}
1060 	return 0;
1061 }
1062 subsys_initcall(crash_notes_memory_init);
1063 
1064 
1065 /*
1066  * parsing the "crashkernel" commandline
1067  *
1068  * this code is intended to be called from architecture specific code
1069  */
1070 
1071 
1072 /*
1073  * This function parses command lines in the format
1074  *
1075  *   crashkernel=ramsize-range:size[,...][@offset]
1076  *
1077  * The function returns 0 on success and -EINVAL on failure.
1078  */
1079 static int __init parse_crashkernel_mem(char *cmdline,
1080 					unsigned long long system_ram,
1081 					unsigned long long *crash_size,
1082 					unsigned long long *crash_base)
1083 {
1084 	char *cur = cmdline, *tmp;
1085 
1086 	/* for each entry of the comma-separated list */
1087 	do {
1088 		unsigned long long start, end = ULLONG_MAX, size;
1089 
1090 		/* get the start of the range */
1091 		start = memparse(cur, &tmp);
1092 		if (cur == tmp) {
1093 			pr_warn("crashkernel: Memory value expected\n");
1094 			return -EINVAL;
1095 		}
1096 		cur = tmp;
1097 		if (*cur != '-') {
1098 			pr_warn("crashkernel: '-' expected\n");
1099 			return -EINVAL;
1100 		}
1101 		cur++;
1102 
1103 		/* if no ':' is here, than we read the end */
1104 		if (*cur != ':') {
1105 			end = memparse(cur, &tmp);
1106 			if (cur == tmp) {
1107 				pr_warn("crashkernel: Memory value expected\n");
1108 				return -EINVAL;
1109 			}
1110 			cur = tmp;
1111 			if (end <= start) {
1112 				pr_warn("crashkernel: end <= start\n");
1113 				return -EINVAL;
1114 			}
1115 		}
1116 
1117 		if (*cur != ':') {
1118 			pr_warn("crashkernel: ':' expected\n");
1119 			return -EINVAL;
1120 		}
1121 		cur++;
1122 
1123 		size = memparse(cur, &tmp);
1124 		if (cur == tmp) {
1125 			pr_warn("Memory value expected\n");
1126 			return -EINVAL;
1127 		}
1128 		cur = tmp;
1129 		if (size >= system_ram) {
1130 			pr_warn("crashkernel: invalid size\n");
1131 			return -EINVAL;
1132 		}
1133 
1134 		/* match ? */
1135 		if (system_ram >= start && system_ram < end) {
1136 			*crash_size = size;
1137 			break;
1138 		}
1139 	} while (*cur++ == ',');
1140 
1141 	if (*crash_size > 0) {
1142 		while (*cur && *cur != ' ' && *cur != '@')
1143 			cur++;
1144 		if (*cur == '@') {
1145 			cur++;
1146 			*crash_base = memparse(cur, &tmp);
1147 			if (cur == tmp) {
1148 				pr_warn("Memory value expected after '@'\n");
1149 				return -EINVAL;
1150 			}
1151 		}
1152 	}
1153 
1154 	return 0;
1155 }
1156 
1157 /*
1158  * That function parses "simple" (old) crashkernel command lines like
1159  *
1160  *	crashkernel=size[@offset]
1161  *
1162  * It returns 0 on success and -EINVAL on failure.
1163  */
1164 static int __init parse_crashkernel_simple(char *cmdline,
1165 					   unsigned long long *crash_size,
1166 					   unsigned long long *crash_base)
1167 {
1168 	char *cur = cmdline;
1169 
1170 	*crash_size = memparse(cmdline, &cur);
1171 	if (cmdline == cur) {
1172 		pr_warn("crashkernel: memory value expected\n");
1173 		return -EINVAL;
1174 	}
1175 
1176 	if (*cur == '@')
1177 		*crash_base = memparse(cur+1, &cur);
1178 	else if (*cur != ' ' && *cur != '\0') {
1179 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1180 		return -EINVAL;
1181 	}
1182 
1183 	return 0;
1184 }
1185 
1186 #define SUFFIX_HIGH 0
1187 #define SUFFIX_LOW  1
1188 #define SUFFIX_NULL 2
1189 static __initdata char *suffix_tbl[] = {
1190 	[SUFFIX_HIGH] = ",high",
1191 	[SUFFIX_LOW]  = ",low",
1192 	[SUFFIX_NULL] = NULL,
1193 };
1194 
1195 /*
1196  * That function parses "suffix"  crashkernel command lines like
1197  *
1198  *	crashkernel=size,[high|low]
1199  *
1200  * It returns 0 on success and -EINVAL on failure.
1201  */
1202 static int __init parse_crashkernel_suffix(char *cmdline,
1203 					   unsigned long long	*crash_size,
1204 					   const char *suffix)
1205 {
1206 	char *cur = cmdline;
1207 
1208 	*crash_size = memparse(cmdline, &cur);
1209 	if (cmdline == cur) {
1210 		pr_warn("crashkernel: memory value expected\n");
1211 		return -EINVAL;
1212 	}
1213 
1214 	/* check with suffix */
1215 	if (strncmp(cur, suffix, strlen(suffix))) {
1216 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1217 		return -EINVAL;
1218 	}
1219 	cur += strlen(suffix);
1220 	if (*cur != ' ' && *cur != '\0') {
1221 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1222 		return -EINVAL;
1223 	}
1224 
1225 	return 0;
1226 }
1227 
1228 static __init char *get_last_crashkernel(char *cmdline,
1229 			     const char *name,
1230 			     const char *suffix)
1231 {
1232 	char *p = cmdline, *ck_cmdline = NULL;
1233 
1234 	/* find crashkernel and use the last one if there are more */
1235 	p = strstr(p, name);
1236 	while (p) {
1237 		char *end_p = strchr(p, ' ');
1238 		char *q;
1239 
1240 		if (!end_p)
1241 			end_p = p + strlen(p);
1242 
1243 		if (!suffix) {
1244 			int i;
1245 
1246 			/* skip the one with any known suffix */
1247 			for (i = 0; suffix_tbl[i]; i++) {
1248 				q = end_p - strlen(suffix_tbl[i]);
1249 				if (!strncmp(q, suffix_tbl[i],
1250 					     strlen(suffix_tbl[i])))
1251 					goto next;
1252 			}
1253 			ck_cmdline = p;
1254 		} else {
1255 			q = end_p - strlen(suffix);
1256 			if (!strncmp(q, suffix, strlen(suffix)))
1257 				ck_cmdline = p;
1258 		}
1259 next:
1260 		p = strstr(p+1, name);
1261 	}
1262 
1263 	if (!ck_cmdline)
1264 		return NULL;
1265 
1266 	return ck_cmdline;
1267 }
1268 
1269 static int __init __parse_crashkernel(char *cmdline,
1270 			     unsigned long long system_ram,
1271 			     unsigned long long *crash_size,
1272 			     unsigned long long *crash_base,
1273 			     const char *name,
1274 			     const char *suffix)
1275 {
1276 	char	*first_colon, *first_space;
1277 	char	*ck_cmdline;
1278 
1279 	BUG_ON(!crash_size || !crash_base);
1280 	*crash_size = 0;
1281 	*crash_base = 0;
1282 
1283 	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1284 
1285 	if (!ck_cmdline)
1286 		return -EINVAL;
1287 
1288 	ck_cmdline += strlen(name);
1289 
1290 	if (suffix)
1291 		return parse_crashkernel_suffix(ck_cmdline, crash_size,
1292 				suffix);
1293 	/*
1294 	 * if the commandline contains a ':', then that's the extended
1295 	 * syntax -- if not, it must be the classic syntax
1296 	 */
1297 	first_colon = strchr(ck_cmdline, ':');
1298 	first_space = strchr(ck_cmdline, ' ');
1299 	if (first_colon && (!first_space || first_colon < first_space))
1300 		return parse_crashkernel_mem(ck_cmdline, system_ram,
1301 				crash_size, crash_base);
1302 
1303 	return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1304 }
1305 
1306 /*
1307  * That function is the entry point for command line parsing and should be
1308  * called from the arch-specific code.
1309  */
1310 int __init parse_crashkernel(char *cmdline,
1311 			     unsigned long long system_ram,
1312 			     unsigned long long *crash_size,
1313 			     unsigned long long *crash_base)
1314 {
1315 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1316 					"crashkernel=", NULL);
1317 }
1318 
1319 int __init parse_crashkernel_high(char *cmdline,
1320 			     unsigned long long system_ram,
1321 			     unsigned long long *crash_size,
1322 			     unsigned long long *crash_base)
1323 {
1324 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1325 				"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1326 }
1327 
1328 int __init parse_crashkernel_low(char *cmdline,
1329 			     unsigned long long system_ram,
1330 			     unsigned long long *crash_size,
1331 			     unsigned long long *crash_base)
1332 {
1333 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1334 				"crashkernel=", suffix_tbl[SUFFIX_LOW]);
1335 }
1336 
1337 static void update_vmcoreinfo_note(void)
1338 {
1339 	u32 *buf = vmcoreinfo_note;
1340 
1341 	if (!vmcoreinfo_size)
1342 		return;
1343 	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1344 			      vmcoreinfo_size);
1345 	final_note(buf);
1346 }
1347 
1348 void crash_save_vmcoreinfo(void)
1349 {
1350 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1351 	update_vmcoreinfo_note();
1352 }
1353 
1354 void vmcoreinfo_append_str(const char *fmt, ...)
1355 {
1356 	va_list args;
1357 	char buf[0x50];
1358 	size_t r;
1359 
1360 	va_start(args, fmt);
1361 	r = vscnprintf(buf, sizeof(buf), fmt, args);
1362 	va_end(args);
1363 
1364 	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1365 
1366 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1367 
1368 	vmcoreinfo_size += r;
1369 }
1370 
1371 /*
1372  * provide an empty default implementation here -- architecture
1373  * code may override this
1374  */
1375 void __weak arch_crash_save_vmcoreinfo(void)
1376 {}
1377 
1378 unsigned long __weak paddr_vmcoreinfo_note(void)
1379 {
1380 	return __pa((unsigned long)(char *)&vmcoreinfo_note);
1381 }
1382 
1383 static int __init crash_save_vmcoreinfo_init(void)
1384 {
1385 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1386 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
1387 
1388 	VMCOREINFO_SYMBOL(init_uts_ns);
1389 	VMCOREINFO_SYMBOL(node_online_map);
1390 #ifdef CONFIG_MMU
1391 	VMCOREINFO_SYMBOL(swapper_pg_dir);
1392 #endif
1393 	VMCOREINFO_SYMBOL(_stext);
1394 	VMCOREINFO_SYMBOL(vmap_area_list);
1395 
1396 #ifndef CONFIG_NEED_MULTIPLE_NODES
1397 	VMCOREINFO_SYMBOL(mem_map);
1398 	VMCOREINFO_SYMBOL(contig_page_data);
1399 #endif
1400 #ifdef CONFIG_SPARSEMEM
1401 	VMCOREINFO_SYMBOL(mem_section);
1402 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1403 	VMCOREINFO_STRUCT_SIZE(mem_section);
1404 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
1405 #endif
1406 	VMCOREINFO_STRUCT_SIZE(page);
1407 	VMCOREINFO_STRUCT_SIZE(pglist_data);
1408 	VMCOREINFO_STRUCT_SIZE(zone);
1409 	VMCOREINFO_STRUCT_SIZE(free_area);
1410 	VMCOREINFO_STRUCT_SIZE(list_head);
1411 	VMCOREINFO_SIZE(nodemask_t);
1412 	VMCOREINFO_OFFSET(page, flags);
1413 	VMCOREINFO_OFFSET(page, _count);
1414 	VMCOREINFO_OFFSET(page, mapping);
1415 	VMCOREINFO_OFFSET(page, lru);
1416 	VMCOREINFO_OFFSET(page, _mapcount);
1417 	VMCOREINFO_OFFSET(page, private);
1418 	VMCOREINFO_OFFSET(pglist_data, node_zones);
1419 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
1420 #ifdef CONFIG_FLAT_NODE_MEM_MAP
1421 	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1422 #endif
1423 	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1424 	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1425 	VMCOREINFO_OFFSET(pglist_data, node_id);
1426 	VMCOREINFO_OFFSET(zone, free_area);
1427 	VMCOREINFO_OFFSET(zone, vm_stat);
1428 	VMCOREINFO_OFFSET(zone, spanned_pages);
1429 	VMCOREINFO_OFFSET(free_area, free_list);
1430 	VMCOREINFO_OFFSET(list_head, next);
1431 	VMCOREINFO_OFFSET(list_head, prev);
1432 	VMCOREINFO_OFFSET(vmap_area, va_start);
1433 	VMCOREINFO_OFFSET(vmap_area, list);
1434 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1435 	log_buf_kexec_setup();
1436 	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1437 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
1438 	VMCOREINFO_NUMBER(PG_lru);
1439 	VMCOREINFO_NUMBER(PG_private);
1440 	VMCOREINFO_NUMBER(PG_swapcache);
1441 	VMCOREINFO_NUMBER(PG_slab);
1442 #ifdef CONFIG_MEMORY_FAILURE
1443 	VMCOREINFO_NUMBER(PG_hwpoison);
1444 #endif
1445 	VMCOREINFO_NUMBER(PG_head_mask);
1446 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1447 #ifdef CONFIG_X86
1448 	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
1449 #endif
1450 #ifdef CONFIG_HUGETLBFS
1451 	VMCOREINFO_SYMBOL(free_huge_page);
1452 #endif
1453 
1454 	arch_crash_save_vmcoreinfo();
1455 	update_vmcoreinfo_note();
1456 
1457 	return 0;
1458 }
1459 
1460 subsys_initcall(crash_save_vmcoreinfo_init);
1461 
1462 /*
1463  * Move into place and start executing a preloaded standalone
1464  * executable.  If nothing was preloaded return an error.
1465  */
1466 int kernel_kexec(void)
1467 {
1468 	int error = 0;
1469 
1470 	if (!mutex_trylock(&kexec_mutex))
1471 		return -EBUSY;
1472 	if (!kexec_image) {
1473 		error = -EINVAL;
1474 		goto Unlock;
1475 	}
1476 
1477 #ifdef CONFIG_KEXEC_JUMP
1478 	if (kexec_image->preserve_context) {
1479 		lock_system_sleep();
1480 		pm_prepare_console();
1481 		error = freeze_processes();
1482 		if (error) {
1483 			error = -EBUSY;
1484 			goto Restore_console;
1485 		}
1486 		suspend_console();
1487 		error = dpm_suspend_start(PMSG_FREEZE);
1488 		if (error)
1489 			goto Resume_console;
1490 		/* At this point, dpm_suspend_start() has been called,
1491 		 * but *not* dpm_suspend_end(). We *must* call
1492 		 * dpm_suspend_end() now.  Otherwise, drivers for
1493 		 * some devices (e.g. interrupt controllers) become
1494 		 * desynchronized with the actual state of the
1495 		 * hardware at resume time, and evil weirdness ensues.
1496 		 */
1497 		error = dpm_suspend_end(PMSG_FREEZE);
1498 		if (error)
1499 			goto Resume_devices;
1500 		error = disable_nonboot_cpus();
1501 		if (error)
1502 			goto Enable_cpus;
1503 		local_irq_disable();
1504 		error = syscore_suspend();
1505 		if (error)
1506 			goto Enable_irqs;
1507 	} else
1508 #endif
1509 	{
1510 		kexec_in_progress = true;
1511 		kernel_restart_prepare(NULL);
1512 		migrate_to_reboot_cpu();
1513 
1514 		/*
1515 		 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1516 		 * no further code needs to use CPU hotplug (which is true in
1517 		 * the reboot case). However, the kexec path depends on using
1518 		 * CPU hotplug again; so re-enable it here.
1519 		 */
1520 		cpu_hotplug_enable();
1521 		pr_emerg("Starting new kernel\n");
1522 		machine_shutdown();
1523 	}
1524 
1525 	machine_kexec(kexec_image);
1526 
1527 #ifdef CONFIG_KEXEC_JUMP
1528 	if (kexec_image->preserve_context) {
1529 		syscore_resume();
1530  Enable_irqs:
1531 		local_irq_enable();
1532  Enable_cpus:
1533 		enable_nonboot_cpus();
1534 		dpm_resume_start(PMSG_RESTORE);
1535  Resume_devices:
1536 		dpm_resume_end(PMSG_RESTORE);
1537  Resume_console:
1538 		resume_console();
1539 		thaw_processes();
1540  Restore_console:
1541 		pm_restore_console();
1542 		unlock_system_sleep();
1543 	}
1544 #endif
1545 
1546  Unlock:
1547 	mutex_unlock(&kexec_mutex);
1548 	return error;
1549 }
1550 
1551 /*
1552  * Add and remove page tables for crashkernel memory
1553  *
1554  * Provide an empty default implementation here -- architecture
1555  * code may override this
1556  */
1557 void __weak crash_map_reserved_pages(void)
1558 {}
1559 
1560 void __weak crash_unmap_reserved_pages(void)
1561 {}
1562