xref: /openbmc/linux/kernel/kexec.c (revision 88d5e520)
1 /*
2  * kexec.c - kexec system call
3  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
4  *
5  * This source code is licensed under the GNU General Public License,
6  * Version 2.  See the file COPYING for more details.
7  */
8 
9 #define pr_fmt(fmt)	"kexec: " fmt
10 
11 #include <linux/capability.h>
12 #include <linux/mm.h>
13 #include <linux/file.h>
14 #include <linux/slab.h>
15 #include <linux/fs.h>
16 #include <linux/kexec.h>
17 #include <linux/mutex.h>
18 #include <linux/list.h>
19 #include <linux/highmem.h>
20 #include <linux/syscalls.h>
21 #include <linux/reboot.h>
22 #include <linux/ioport.h>
23 #include <linux/hardirq.h>
24 #include <linux/elf.h>
25 #include <linux/elfcore.h>
26 #include <linux/utsname.h>
27 #include <linux/numa.h>
28 #include <linux/suspend.h>
29 #include <linux/device.h>
30 #include <linux/freezer.h>
31 #include <linux/pm.h>
32 #include <linux/cpu.h>
33 #include <linux/console.h>
34 #include <linux/vmalloc.h>
35 #include <linux/swap.h>
36 #include <linux/syscore_ops.h>
37 #include <linux/compiler.h>
38 #include <linux/hugetlb.h>
39 
40 #include <asm/page.h>
41 #include <asm/uaccess.h>
42 #include <asm/io.h>
43 #include <asm/sections.h>
44 
45 #include <crypto/hash.h>
46 #include <crypto/sha.h>
47 
48 /* Per cpu memory for storing cpu states in case of system crash. */
49 note_buf_t __percpu *crash_notes;
50 
51 /* vmcoreinfo stuff */
52 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
53 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
54 size_t vmcoreinfo_size;
55 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
56 
57 /* Flag to indicate we are going to kexec a new kernel */
58 bool kexec_in_progress = false;
59 
60 /*
61  * Declare these symbols weak so that if architecture provides a purgatory,
62  * these will be overridden.
63  */
64 char __weak kexec_purgatory[0];
65 size_t __weak kexec_purgatory_size = 0;
66 
67 static int kexec_calculate_store_digests(struct kimage *image);
68 
69 /* Location of the reserved area for the crash kernel */
70 struct resource crashk_res = {
71 	.name  = "Crash kernel",
72 	.start = 0,
73 	.end   = 0,
74 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
75 };
76 struct resource crashk_low_res = {
77 	.name  = "Crash kernel",
78 	.start = 0,
79 	.end   = 0,
80 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
81 };
82 
83 int kexec_should_crash(struct task_struct *p)
84 {
85 	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
86 		return 1;
87 	return 0;
88 }
89 
90 /*
91  * When kexec transitions to the new kernel there is a one-to-one
92  * mapping between physical and virtual addresses.  On processors
93  * where you can disable the MMU this is trivial, and easy.  For
94  * others it is still a simple predictable page table to setup.
95  *
96  * In that environment kexec copies the new kernel to its final
97  * resting place.  This means I can only support memory whose
98  * physical address can fit in an unsigned long.  In particular
99  * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
100  * If the assembly stub has more restrictive requirements
101  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
102  * defined more restrictively in <asm/kexec.h>.
103  *
104  * The code for the transition from the current kernel to the
105  * the new kernel is placed in the control_code_buffer, whose size
106  * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
107  * page of memory is necessary, but some architectures require more.
108  * Because this memory must be identity mapped in the transition from
109  * virtual to physical addresses it must live in the range
110  * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
111  * modifiable.
112  *
113  * The assembly stub in the control code buffer is passed a linked list
114  * of descriptor pages detailing the source pages of the new kernel,
115  * and the destination addresses of those source pages.  As this data
116  * structure is not used in the context of the current OS, it must
117  * be self-contained.
118  *
119  * The code has been made to work with highmem pages and will use a
120  * destination page in its final resting place (if it happens
121  * to allocate it).  The end product of this is that most of the
122  * physical address space, and most of RAM can be used.
123  *
124  * Future directions include:
125  *  - allocating a page table with the control code buffer identity
126  *    mapped, to simplify machine_kexec and make kexec_on_panic more
127  *    reliable.
128  */
129 
130 /*
131  * KIMAGE_NO_DEST is an impossible destination address..., for
132  * allocating pages whose destination address we do not care about.
133  */
134 #define KIMAGE_NO_DEST (-1UL)
135 
136 static int kimage_is_destination_range(struct kimage *image,
137 				       unsigned long start, unsigned long end);
138 static struct page *kimage_alloc_page(struct kimage *image,
139 				       gfp_t gfp_mask,
140 				       unsigned long dest);
141 
142 static int copy_user_segment_list(struct kimage *image,
143 				  unsigned long nr_segments,
144 				  struct kexec_segment __user *segments)
145 {
146 	int ret;
147 	size_t segment_bytes;
148 
149 	/* Read in the segments */
150 	image->nr_segments = nr_segments;
151 	segment_bytes = nr_segments * sizeof(*segments);
152 	ret = copy_from_user(image->segment, segments, segment_bytes);
153 	if (ret)
154 		ret = -EFAULT;
155 
156 	return ret;
157 }
158 
159 static int sanity_check_segment_list(struct kimage *image)
160 {
161 	int result, i;
162 	unsigned long nr_segments = image->nr_segments;
163 
164 	/*
165 	 * Verify we have good destination addresses.  The caller is
166 	 * responsible for making certain we don't attempt to load
167 	 * the new image into invalid or reserved areas of RAM.  This
168 	 * just verifies it is an address we can use.
169 	 *
170 	 * Since the kernel does everything in page size chunks ensure
171 	 * the destination addresses are page aligned.  Too many
172 	 * special cases crop of when we don't do this.  The most
173 	 * insidious is getting overlapping destination addresses
174 	 * simply because addresses are changed to page size
175 	 * granularity.
176 	 */
177 	result = -EADDRNOTAVAIL;
178 	for (i = 0; i < nr_segments; i++) {
179 		unsigned long mstart, mend;
180 
181 		mstart = image->segment[i].mem;
182 		mend   = mstart + image->segment[i].memsz;
183 		if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
184 			return result;
185 		if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
186 			return result;
187 	}
188 
189 	/* Verify our destination addresses do not overlap.
190 	 * If we alloed overlapping destination addresses
191 	 * through very weird things can happen with no
192 	 * easy explanation as one segment stops on another.
193 	 */
194 	result = -EINVAL;
195 	for (i = 0; i < nr_segments; i++) {
196 		unsigned long mstart, mend;
197 		unsigned long j;
198 
199 		mstart = image->segment[i].mem;
200 		mend   = mstart + image->segment[i].memsz;
201 		for (j = 0; j < i; j++) {
202 			unsigned long pstart, pend;
203 			pstart = image->segment[j].mem;
204 			pend   = pstart + image->segment[j].memsz;
205 			/* Do the segments overlap ? */
206 			if ((mend > pstart) && (mstart < pend))
207 				return result;
208 		}
209 	}
210 
211 	/* Ensure our buffer sizes are strictly less than
212 	 * our memory sizes.  This should always be the case,
213 	 * and it is easier to check up front than to be surprised
214 	 * later on.
215 	 */
216 	result = -EINVAL;
217 	for (i = 0; i < nr_segments; i++) {
218 		if (image->segment[i].bufsz > image->segment[i].memsz)
219 			return result;
220 	}
221 
222 	/*
223 	 * Verify we have good destination addresses.  Normally
224 	 * the caller is responsible for making certain we don't
225 	 * attempt to load the new image into invalid or reserved
226 	 * areas of RAM.  But crash kernels are preloaded into a
227 	 * reserved area of ram.  We must ensure the addresses
228 	 * are in the reserved area otherwise preloading the
229 	 * kernel could corrupt things.
230 	 */
231 
232 	if (image->type == KEXEC_TYPE_CRASH) {
233 		result = -EADDRNOTAVAIL;
234 		for (i = 0; i < nr_segments; i++) {
235 			unsigned long mstart, mend;
236 
237 			mstart = image->segment[i].mem;
238 			mend = mstart + image->segment[i].memsz - 1;
239 			/* Ensure we are within the crash kernel limits */
240 			if ((mstart < crashk_res.start) ||
241 			    (mend > crashk_res.end))
242 				return result;
243 		}
244 	}
245 
246 	return 0;
247 }
248 
249 static struct kimage *do_kimage_alloc_init(void)
250 {
251 	struct kimage *image;
252 
253 	/* Allocate a controlling structure */
254 	image = kzalloc(sizeof(*image), GFP_KERNEL);
255 	if (!image)
256 		return NULL;
257 
258 	image->head = 0;
259 	image->entry = &image->head;
260 	image->last_entry = &image->head;
261 	image->control_page = ~0; /* By default this does not apply */
262 	image->type = KEXEC_TYPE_DEFAULT;
263 
264 	/* Initialize the list of control pages */
265 	INIT_LIST_HEAD(&image->control_pages);
266 
267 	/* Initialize the list of destination pages */
268 	INIT_LIST_HEAD(&image->dest_pages);
269 
270 	/* Initialize the list of unusable pages */
271 	INIT_LIST_HEAD(&image->unusable_pages);
272 
273 	return image;
274 }
275 
276 static void kimage_free_page_list(struct list_head *list);
277 
278 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
279 			     unsigned long nr_segments,
280 			     struct kexec_segment __user *segments,
281 			     unsigned long flags)
282 {
283 	int ret;
284 	struct kimage *image;
285 	bool kexec_on_panic = flags & KEXEC_ON_CRASH;
286 
287 	if (kexec_on_panic) {
288 		/* Verify we have a valid entry point */
289 		if ((entry < crashk_res.start) || (entry > crashk_res.end))
290 			return -EADDRNOTAVAIL;
291 	}
292 
293 	/* Allocate and initialize a controlling structure */
294 	image = do_kimage_alloc_init();
295 	if (!image)
296 		return -ENOMEM;
297 
298 	image->start = entry;
299 
300 	ret = copy_user_segment_list(image, nr_segments, segments);
301 	if (ret)
302 		goto out_free_image;
303 
304 	ret = sanity_check_segment_list(image);
305 	if (ret)
306 		goto out_free_image;
307 
308 	 /* Enable the special crash kernel control page allocation policy. */
309 	if (kexec_on_panic) {
310 		image->control_page = crashk_res.start;
311 		image->type = KEXEC_TYPE_CRASH;
312 	}
313 
314 	/*
315 	 * Find a location for the control code buffer, and add it
316 	 * the vector of segments so that it's pages will also be
317 	 * counted as destination pages.
318 	 */
319 	ret = -ENOMEM;
320 	image->control_code_page = kimage_alloc_control_pages(image,
321 					   get_order(KEXEC_CONTROL_PAGE_SIZE));
322 	if (!image->control_code_page) {
323 		pr_err("Could not allocate control_code_buffer\n");
324 		goto out_free_image;
325 	}
326 
327 	if (!kexec_on_panic) {
328 		image->swap_page = kimage_alloc_control_pages(image, 0);
329 		if (!image->swap_page) {
330 			pr_err("Could not allocate swap buffer\n");
331 			goto out_free_control_pages;
332 		}
333 	}
334 
335 	*rimage = image;
336 	return 0;
337 out_free_control_pages:
338 	kimage_free_page_list(&image->control_pages);
339 out_free_image:
340 	kfree(image);
341 	return ret;
342 }
343 
344 static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
345 {
346 	struct fd f = fdget(fd);
347 	int ret;
348 	struct kstat stat;
349 	loff_t pos;
350 	ssize_t bytes = 0;
351 
352 	if (!f.file)
353 		return -EBADF;
354 
355 	ret = vfs_getattr(&f.file->f_path, &stat);
356 	if (ret)
357 		goto out;
358 
359 	if (stat.size > INT_MAX) {
360 		ret = -EFBIG;
361 		goto out;
362 	}
363 
364 	/* Don't hand 0 to vmalloc, it whines. */
365 	if (stat.size == 0) {
366 		ret = -EINVAL;
367 		goto out;
368 	}
369 
370 	*buf = vmalloc(stat.size);
371 	if (!*buf) {
372 		ret = -ENOMEM;
373 		goto out;
374 	}
375 
376 	pos = 0;
377 	while (pos < stat.size) {
378 		bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
379 				    stat.size - pos);
380 		if (bytes < 0) {
381 			vfree(*buf);
382 			ret = bytes;
383 			goto out;
384 		}
385 
386 		if (bytes == 0)
387 			break;
388 		pos += bytes;
389 	}
390 
391 	if (pos != stat.size) {
392 		ret = -EBADF;
393 		vfree(*buf);
394 		goto out;
395 	}
396 
397 	*buf_len = pos;
398 out:
399 	fdput(f);
400 	return ret;
401 }
402 
403 /* Architectures can provide this probe function */
404 int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
405 					 unsigned long buf_len)
406 {
407 	return -ENOEXEC;
408 }
409 
410 void * __weak arch_kexec_kernel_image_load(struct kimage *image)
411 {
412 	return ERR_PTR(-ENOEXEC);
413 }
414 
415 void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
416 {
417 }
418 
419 int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
420 					unsigned long buf_len)
421 {
422 	return -EKEYREJECTED;
423 }
424 
425 /* Apply relocations of type RELA */
426 int __weak
427 arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
428 				 unsigned int relsec)
429 {
430 	pr_err("RELA relocation unsupported.\n");
431 	return -ENOEXEC;
432 }
433 
434 /* Apply relocations of type REL */
435 int __weak
436 arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
437 			     unsigned int relsec)
438 {
439 	pr_err("REL relocation unsupported.\n");
440 	return -ENOEXEC;
441 }
442 
443 /*
444  * Free up memory used by kernel, initrd, and comand line. This is temporary
445  * memory allocation which is not needed any more after these buffers have
446  * been loaded into separate segments and have been copied elsewhere.
447  */
448 static void kimage_file_post_load_cleanup(struct kimage *image)
449 {
450 	struct purgatory_info *pi = &image->purgatory_info;
451 
452 	vfree(image->kernel_buf);
453 	image->kernel_buf = NULL;
454 
455 	vfree(image->initrd_buf);
456 	image->initrd_buf = NULL;
457 
458 	kfree(image->cmdline_buf);
459 	image->cmdline_buf = NULL;
460 
461 	vfree(pi->purgatory_buf);
462 	pi->purgatory_buf = NULL;
463 
464 	vfree(pi->sechdrs);
465 	pi->sechdrs = NULL;
466 
467 	/* See if architecture has anything to cleanup post load */
468 	arch_kimage_file_post_load_cleanup(image);
469 
470 	/*
471 	 * Above call should have called into bootloader to free up
472 	 * any data stored in kimage->image_loader_data. It should
473 	 * be ok now to free it up.
474 	 */
475 	kfree(image->image_loader_data);
476 	image->image_loader_data = NULL;
477 }
478 
479 /*
480  * In file mode list of segments is prepared by kernel. Copy relevant
481  * data from user space, do error checking, prepare segment list
482  */
483 static int
484 kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
485 			     const char __user *cmdline_ptr,
486 			     unsigned long cmdline_len, unsigned flags)
487 {
488 	int ret = 0;
489 	void *ldata;
490 
491 	ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
492 				&image->kernel_buf_len);
493 	if (ret)
494 		return ret;
495 
496 	/* Call arch image probe handlers */
497 	ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
498 					    image->kernel_buf_len);
499 
500 	if (ret)
501 		goto out;
502 
503 #ifdef CONFIG_KEXEC_VERIFY_SIG
504 	ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
505 					   image->kernel_buf_len);
506 	if (ret) {
507 		pr_debug("kernel signature verification failed.\n");
508 		goto out;
509 	}
510 	pr_debug("kernel signature verification successful.\n");
511 #endif
512 	/* It is possible that there no initramfs is being loaded */
513 	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
514 		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
515 					&image->initrd_buf_len);
516 		if (ret)
517 			goto out;
518 	}
519 
520 	if (cmdline_len) {
521 		image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
522 		if (!image->cmdline_buf) {
523 			ret = -ENOMEM;
524 			goto out;
525 		}
526 
527 		ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
528 				     cmdline_len);
529 		if (ret) {
530 			ret = -EFAULT;
531 			goto out;
532 		}
533 
534 		image->cmdline_buf_len = cmdline_len;
535 
536 		/* command line should be a string with last byte null */
537 		if (image->cmdline_buf[cmdline_len - 1] != '\0') {
538 			ret = -EINVAL;
539 			goto out;
540 		}
541 	}
542 
543 	/* Call arch image load handlers */
544 	ldata = arch_kexec_kernel_image_load(image);
545 
546 	if (IS_ERR(ldata)) {
547 		ret = PTR_ERR(ldata);
548 		goto out;
549 	}
550 
551 	image->image_loader_data = ldata;
552 out:
553 	/* In case of error, free up all allocated memory in this function */
554 	if (ret)
555 		kimage_file_post_load_cleanup(image);
556 	return ret;
557 }
558 
559 static int
560 kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
561 		       int initrd_fd, const char __user *cmdline_ptr,
562 		       unsigned long cmdline_len, unsigned long flags)
563 {
564 	int ret;
565 	struct kimage *image;
566 	bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
567 
568 	image = do_kimage_alloc_init();
569 	if (!image)
570 		return -ENOMEM;
571 
572 	image->file_mode = 1;
573 
574 	if (kexec_on_panic) {
575 		/* Enable special crash kernel control page alloc policy. */
576 		image->control_page = crashk_res.start;
577 		image->type = KEXEC_TYPE_CRASH;
578 	}
579 
580 	ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
581 					   cmdline_ptr, cmdline_len, flags);
582 	if (ret)
583 		goto out_free_image;
584 
585 	ret = sanity_check_segment_list(image);
586 	if (ret)
587 		goto out_free_post_load_bufs;
588 
589 	ret = -ENOMEM;
590 	image->control_code_page = kimage_alloc_control_pages(image,
591 					   get_order(KEXEC_CONTROL_PAGE_SIZE));
592 	if (!image->control_code_page) {
593 		pr_err("Could not allocate control_code_buffer\n");
594 		goto out_free_post_load_bufs;
595 	}
596 
597 	if (!kexec_on_panic) {
598 		image->swap_page = kimage_alloc_control_pages(image, 0);
599 		if (!image->swap_page) {
600 			pr_err(KERN_ERR "Could not allocate swap buffer\n");
601 			goto out_free_control_pages;
602 		}
603 	}
604 
605 	*rimage = image;
606 	return 0;
607 out_free_control_pages:
608 	kimage_free_page_list(&image->control_pages);
609 out_free_post_load_bufs:
610 	kimage_file_post_load_cleanup(image);
611 out_free_image:
612 	kfree(image);
613 	return ret;
614 }
615 
616 static int kimage_is_destination_range(struct kimage *image,
617 					unsigned long start,
618 					unsigned long end)
619 {
620 	unsigned long i;
621 
622 	for (i = 0; i < image->nr_segments; i++) {
623 		unsigned long mstart, mend;
624 
625 		mstart = image->segment[i].mem;
626 		mend = mstart + image->segment[i].memsz;
627 		if ((end > mstart) && (start < mend))
628 			return 1;
629 	}
630 
631 	return 0;
632 }
633 
634 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
635 {
636 	struct page *pages;
637 
638 	pages = alloc_pages(gfp_mask, order);
639 	if (pages) {
640 		unsigned int count, i;
641 		pages->mapping = NULL;
642 		set_page_private(pages, order);
643 		count = 1 << order;
644 		for (i = 0; i < count; i++)
645 			SetPageReserved(pages + i);
646 	}
647 
648 	return pages;
649 }
650 
651 static void kimage_free_pages(struct page *page)
652 {
653 	unsigned int order, count, i;
654 
655 	order = page_private(page);
656 	count = 1 << order;
657 	for (i = 0; i < count; i++)
658 		ClearPageReserved(page + i);
659 	__free_pages(page, order);
660 }
661 
662 static void kimage_free_page_list(struct list_head *list)
663 {
664 	struct list_head *pos, *next;
665 
666 	list_for_each_safe(pos, next, list) {
667 		struct page *page;
668 
669 		page = list_entry(pos, struct page, lru);
670 		list_del(&page->lru);
671 		kimage_free_pages(page);
672 	}
673 }
674 
675 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
676 							unsigned int order)
677 {
678 	/* Control pages are special, they are the intermediaries
679 	 * that are needed while we copy the rest of the pages
680 	 * to their final resting place.  As such they must
681 	 * not conflict with either the destination addresses
682 	 * or memory the kernel is already using.
683 	 *
684 	 * The only case where we really need more than one of
685 	 * these are for architectures where we cannot disable
686 	 * the MMU and must instead generate an identity mapped
687 	 * page table for all of the memory.
688 	 *
689 	 * At worst this runs in O(N) of the image size.
690 	 */
691 	struct list_head extra_pages;
692 	struct page *pages;
693 	unsigned int count;
694 
695 	count = 1 << order;
696 	INIT_LIST_HEAD(&extra_pages);
697 
698 	/* Loop while I can allocate a page and the page allocated
699 	 * is a destination page.
700 	 */
701 	do {
702 		unsigned long pfn, epfn, addr, eaddr;
703 
704 		pages = kimage_alloc_pages(GFP_KERNEL, order);
705 		if (!pages)
706 			break;
707 		pfn   = page_to_pfn(pages);
708 		epfn  = pfn + count;
709 		addr  = pfn << PAGE_SHIFT;
710 		eaddr = epfn << PAGE_SHIFT;
711 		if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
712 			      kimage_is_destination_range(image, addr, eaddr)) {
713 			list_add(&pages->lru, &extra_pages);
714 			pages = NULL;
715 		}
716 	} while (!pages);
717 
718 	if (pages) {
719 		/* Remember the allocated page... */
720 		list_add(&pages->lru, &image->control_pages);
721 
722 		/* Because the page is already in it's destination
723 		 * location we will never allocate another page at
724 		 * that address.  Therefore kimage_alloc_pages
725 		 * will not return it (again) and we don't need
726 		 * to give it an entry in image->segment[].
727 		 */
728 	}
729 	/* Deal with the destination pages I have inadvertently allocated.
730 	 *
731 	 * Ideally I would convert multi-page allocations into single
732 	 * page allocations, and add everything to image->dest_pages.
733 	 *
734 	 * For now it is simpler to just free the pages.
735 	 */
736 	kimage_free_page_list(&extra_pages);
737 
738 	return pages;
739 }
740 
741 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
742 						      unsigned int order)
743 {
744 	/* Control pages are special, they are the intermediaries
745 	 * that are needed while we copy the rest of the pages
746 	 * to their final resting place.  As such they must
747 	 * not conflict with either the destination addresses
748 	 * or memory the kernel is already using.
749 	 *
750 	 * Control pages are also the only pags we must allocate
751 	 * when loading a crash kernel.  All of the other pages
752 	 * are specified by the segments and we just memcpy
753 	 * into them directly.
754 	 *
755 	 * The only case where we really need more than one of
756 	 * these are for architectures where we cannot disable
757 	 * the MMU and must instead generate an identity mapped
758 	 * page table for all of the memory.
759 	 *
760 	 * Given the low demand this implements a very simple
761 	 * allocator that finds the first hole of the appropriate
762 	 * size in the reserved memory region, and allocates all
763 	 * of the memory up to and including the hole.
764 	 */
765 	unsigned long hole_start, hole_end, size;
766 	struct page *pages;
767 
768 	pages = NULL;
769 	size = (1 << order) << PAGE_SHIFT;
770 	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
771 	hole_end   = hole_start + size - 1;
772 	while (hole_end <= crashk_res.end) {
773 		unsigned long i;
774 
775 		if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
776 			break;
777 		/* See if I overlap any of the segments */
778 		for (i = 0; i < image->nr_segments; i++) {
779 			unsigned long mstart, mend;
780 
781 			mstart = image->segment[i].mem;
782 			mend   = mstart + image->segment[i].memsz - 1;
783 			if ((hole_end >= mstart) && (hole_start <= mend)) {
784 				/* Advance the hole to the end of the segment */
785 				hole_start = (mend + (size - 1)) & ~(size - 1);
786 				hole_end   = hole_start + size - 1;
787 				break;
788 			}
789 		}
790 		/* If I don't overlap any segments I have found my hole! */
791 		if (i == image->nr_segments) {
792 			pages = pfn_to_page(hole_start >> PAGE_SHIFT);
793 			break;
794 		}
795 	}
796 	if (pages)
797 		image->control_page = hole_end;
798 
799 	return pages;
800 }
801 
802 
803 struct page *kimage_alloc_control_pages(struct kimage *image,
804 					 unsigned int order)
805 {
806 	struct page *pages = NULL;
807 
808 	switch (image->type) {
809 	case KEXEC_TYPE_DEFAULT:
810 		pages = kimage_alloc_normal_control_pages(image, order);
811 		break;
812 	case KEXEC_TYPE_CRASH:
813 		pages = kimage_alloc_crash_control_pages(image, order);
814 		break;
815 	}
816 
817 	return pages;
818 }
819 
820 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
821 {
822 	if (*image->entry != 0)
823 		image->entry++;
824 
825 	if (image->entry == image->last_entry) {
826 		kimage_entry_t *ind_page;
827 		struct page *page;
828 
829 		page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
830 		if (!page)
831 			return -ENOMEM;
832 
833 		ind_page = page_address(page);
834 		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
835 		image->entry = ind_page;
836 		image->last_entry = ind_page +
837 				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
838 	}
839 	*image->entry = entry;
840 	image->entry++;
841 	*image->entry = 0;
842 
843 	return 0;
844 }
845 
846 static int kimage_set_destination(struct kimage *image,
847 				   unsigned long destination)
848 {
849 	int result;
850 
851 	destination &= PAGE_MASK;
852 	result = kimage_add_entry(image, destination | IND_DESTINATION);
853 	if (result == 0)
854 		image->destination = destination;
855 
856 	return result;
857 }
858 
859 
860 static int kimage_add_page(struct kimage *image, unsigned long page)
861 {
862 	int result;
863 
864 	page &= PAGE_MASK;
865 	result = kimage_add_entry(image, page | IND_SOURCE);
866 	if (result == 0)
867 		image->destination += PAGE_SIZE;
868 
869 	return result;
870 }
871 
872 
873 static void kimage_free_extra_pages(struct kimage *image)
874 {
875 	/* Walk through and free any extra destination pages I may have */
876 	kimage_free_page_list(&image->dest_pages);
877 
878 	/* Walk through and free any unusable pages I have cached */
879 	kimage_free_page_list(&image->unusable_pages);
880 
881 }
882 static void kimage_terminate(struct kimage *image)
883 {
884 	if (*image->entry != 0)
885 		image->entry++;
886 
887 	*image->entry = IND_DONE;
888 }
889 
890 #define for_each_kimage_entry(image, ptr, entry) \
891 	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
892 		ptr = (entry & IND_INDIRECTION) ? \
893 			phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
894 
895 static void kimage_free_entry(kimage_entry_t entry)
896 {
897 	struct page *page;
898 
899 	page = pfn_to_page(entry >> PAGE_SHIFT);
900 	kimage_free_pages(page);
901 }
902 
903 static void kimage_free(struct kimage *image)
904 {
905 	kimage_entry_t *ptr, entry;
906 	kimage_entry_t ind = 0;
907 
908 	if (!image)
909 		return;
910 
911 	kimage_free_extra_pages(image);
912 	for_each_kimage_entry(image, ptr, entry) {
913 		if (entry & IND_INDIRECTION) {
914 			/* Free the previous indirection page */
915 			if (ind & IND_INDIRECTION)
916 				kimage_free_entry(ind);
917 			/* Save this indirection page until we are
918 			 * done with it.
919 			 */
920 			ind = entry;
921 		} else if (entry & IND_SOURCE)
922 			kimage_free_entry(entry);
923 	}
924 	/* Free the final indirection page */
925 	if (ind & IND_INDIRECTION)
926 		kimage_free_entry(ind);
927 
928 	/* Handle any machine specific cleanup */
929 	machine_kexec_cleanup(image);
930 
931 	/* Free the kexec control pages... */
932 	kimage_free_page_list(&image->control_pages);
933 
934 	/*
935 	 * Free up any temporary buffers allocated. This might hit if
936 	 * error occurred much later after buffer allocation.
937 	 */
938 	if (image->file_mode)
939 		kimage_file_post_load_cleanup(image);
940 
941 	kfree(image);
942 }
943 
944 static kimage_entry_t *kimage_dst_used(struct kimage *image,
945 					unsigned long page)
946 {
947 	kimage_entry_t *ptr, entry;
948 	unsigned long destination = 0;
949 
950 	for_each_kimage_entry(image, ptr, entry) {
951 		if (entry & IND_DESTINATION)
952 			destination = entry & PAGE_MASK;
953 		else if (entry & IND_SOURCE) {
954 			if (page == destination)
955 				return ptr;
956 			destination += PAGE_SIZE;
957 		}
958 	}
959 
960 	return NULL;
961 }
962 
963 static struct page *kimage_alloc_page(struct kimage *image,
964 					gfp_t gfp_mask,
965 					unsigned long destination)
966 {
967 	/*
968 	 * Here we implement safeguards to ensure that a source page
969 	 * is not copied to its destination page before the data on
970 	 * the destination page is no longer useful.
971 	 *
972 	 * To do this we maintain the invariant that a source page is
973 	 * either its own destination page, or it is not a
974 	 * destination page at all.
975 	 *
976 	 * That is slightly stronger than required, but the proof
977 	 * that no problems will not occur is trivial, and the
978 	 * implementation is simply to verify.
979 	 *
980 	 * When allocating all pages normally this algorithm will run
981 	 * in O(N) time, but in the worst case it will run in O(N^2)
982 	 * time.   If the runtime is a problem the data structures can
983 	 * be fixed.
984 	 */
985 	struct page *page;
986 	unsigned long addr;
987 
988 	/*
989 	 * Walk through the list of destination pages, and see if I
990 	 * have a match.
991 	 */
992 	list_for_each_entry(page, &image->dest_pages, lru) {
993 		addr = page_to_pfn(page) << PAGE_SHIFT;
994 		if (addr == destination) {
995 			list_del(&page->lru);
996 			return page;
997 		}
998 	}
999 	page = NULL;
1000 	while (1) {
1001 		kimage_entry_t *old;
1002 
1003 		/* Allocate a page, if we run out of memory give up */
1004 		page = kimage_alloc_pages(gfp_mask, 0);
1005 		if (!page)
1006 			return NULL;
1007 		/* If the page cannot be used file it away */
1008 		if (page_to_pfn(page) >
1009 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
1010 			list_add(&page->lru, &image->unusable_pages);
1011 			continue;
1012 		}
1013 		addr = page_to_pfn(page) << PAGE_SHIFT;
1014 
1015 		/* If it is the destination page we want use it */
1016 		if (addr == destination)
1017 			break;
1018 
1019 		/* If the page is not a destination page use it */
1020 		if (!kimage_is_destination_range(image, addr,
1021 						  addr + PAGE_SIZE))
1022 			break;
1023 
1024 		/*
1025 		 * I know that the page is someones destination page.
1026 		 * See if there is already a source page for this
1027 		 * destination page.  And if so swap the source pages.
1028 		 */
1029 		old = kimage_dst_used(image, addr);
1030 		if (old) {
1031 			/* If so move it */
1032 			unsigned long old_addr;
1033 			struct page *old_page;
1034 
1035 			old_addr = *old & PAGE_MASK;
1036 			old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
1037 			copy_highpage(page, old_page);
1038 			*old = addr | (*old & ~PAGE_MASK);
1039 
1040 			/* The old page I have found cannot be a
1041 			 * destination page, so return it if it's
1042 			 * gfp_flags honor the ones passed in.
1043 			 */
1044 			if (!(gfp_mask & __GFP_HIGHMEM) &&
1045 			    PageHighMem(old_page)) {
1046 				kimage_free_pages(old_page);
1047 				continue;
1048 			}
1049 			addr = old_addr;
1050 			page = old_page;
1051 			break;
1052 		} else {
1053 			/* Place the page on the destination list I
1054 			 * will use it later.
1055 			 */
1056 			list_add(&page->lru, &image->dest_pages);
1057 		}
1058 	}
1059 
1060 	return page;
1061 }
1062 
1063 static int kimage_load_normal_segment(struct kimage *image,
1064 					 struct kexec_segment *segment)
1065 {
1066 	unsigned long maddr;
1067 	size_t ubytes, mbytes;
1068 	int result;
1069 	unsigned char __user *buf = NULL;
1070 	unsigned char *kbuf = NULL;
1071 
1072 	result = 0;
1073 	if (image->file_mode)
1074 		kbuf = segment->kbuf;
1075 	else
1076 		buf = segment->buf;
1077 	ubytes = segment->bufsz;
1078 	mbytes = segment->memsz;
1079 	maddr = segment->mem;
1080 
1081 	result = kimage_set_destination(image, maddr);
1082 	if (result < 0)
1083 		goto out;
1084 
1085 	while (mbytes) {
1086 		struct page *page;
1087 		char *ptr;
1088 		size_t uchunk, mchunk;
1089 
1090 		page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
1091 		if (!page) {
1092 			result  = -ENOMEM;
1093 			goto out;
1094 		}
1095 		result = kimage_add_page(image, page_to_pfn(page)
1096 								<< PAGE_SHIFT);
1097 		if (result < 0)
1098 			goto out;
1099 
1100 		ptr = kmap(page);
1101 		/* Start with a clear page */
1102 		clear_page(ptr);
1103 		ptr += maddr & ~PAGE_MASK;
1104 		mchunk = min_t(size_t, mbytes,
1105 				PAGE_SIZE - (maddr & ~PAGE_MASK));
1106 		uchunk = min(ubytes, mchunk);
1107 
1108 		/* For file based kexec, source pages are in kernel memory */
1109 		if (image->file_mode)
1110 			memcpy(ptr, kbuf, uchunk);
1111 		else
1112 			result = copy_from_user(ptr, buf, uchunk);
1113 		kunmap(page);
1114 		if (result) {
1115 			result = -EFAULT;
1116 			goto out;
1117 		}
1118 		ubytes -= uchunk;
1119 		maddr  += mchunk;
1120 		if (image->file_mode)
1121 			kbuf += mchunk;
1122 		else
1123 			buf += mchunk;
1124 		mbytes -= mchunk;
1125 	}
1126 out:
1127 	return result;
1128 }
1129 
1130 static int kimage_load_crash_segment(struct kimage *image,
1131 					struct kexec_segment *segment)
1132 {
1133 	/* For crash dumps kernels we simply copy the data from
1134 	 * user space to it's destination.
1135 	 * We do things a page at a time for the sake of kmap.
1136 	 */
1137 	unsigned long maddr;
1138 	size_t ubytes, mbytes;
1139 	int result;
1140 	unsigned char __user *buf = NULL;
1141 	unsigned char *kbuf = NULL;
1142 
1143 	result = 0;
1144 	if (image->file_mode)
1145 		kbuf = segment->kbuf;
1146 	else
1147 		buf = segment->buf;
1148 	ubytes = segment->bufsz;
1149 	mbytes = segment->memsz;
1150 	maddr = segment->mem;
1151 	while (mbytes) {
1152 		struct page *page;
1153 		char *ptr;
1154 		size_t uchunk, mchunk;
1155 
1156 		page = pfn_to_page(maddr >> PAGE_SHIFT);
1157 		if (!page) {
1158 			result  = -ENOMEM;
1159 			goto out;
1160 		}
1161 		ptr = kmap(page);
1162 		ptr += maddr & ~PAGE_MASK;
1163 		mchunk = min_t(size_t, mbytes,
1164 				PAGE_SIZE - (maddr & ~PAGE_MASK));
1165 		uchunk = min(ubytes, mchunk);
1166 		if (mchunk > uchunk) {
1167 			/* Zero the trailing part of the page */
1168 			memset(ptr + uchunk, 0, mchunk - uchunk);
1169 		}
1170 
1171 		/* For file based kexec, source pages are in kernel memory */
1172 		if (image->file_mode)
1173 			memcpy(ptr, kbuf, uchunk);
1174 		else
1175 			result = copy_from_user(ptr, buf, uchunk);
1176 		kexec_flush_icache_page(page);
1177 		kunmap(page);
1178 		if (result) {
1179 			result = -EFAULT;
1180 			goto out;
1181 		}
1182 		ubytes -= uchunk;
1183 		maddr  += mchunk;
1184 		if (image->file_mode)
1185 			kbuf += mchunk;
1186 		else
1187 			buf += mchunk;
1188 		mbytes -= mchunk;
1189 	}
1190 out:
1191 	return result;
1192 }
1193 
1194 static int kimage_load_segment(struct kimage *image,
1195 				struct kexec_segment *segment)
1196 {
1197 	int result = -ENOMEM;
1198 
1199 	switch (image->type) {
1200 	case KEXEC_TYPE_DEFAULT:
1201 		result = kimage_load_normal_segment(image, segment);
1202 		break;
1203 	case KEXEC_TYPE_CRASH:
1204 		result = kimage_load_crash_segment(image, segment);
1205 		break;
1206 	}
1207 
1208 	return result;
1209 }
1210 
1211 /*
1212  * Exec Kernel system call: for obvious reasons only root may call it.
1213  *
1214  * This call breaks up into three pieces.
1215  * - A generic part which loads the new kernel from the current
1216  *   address space, and very carefully places the data in the
1217  *   allocated pages.
1218  *
1219  * - A generic part that interacts with the kernel and tells all of
1220  *   the devices to shut down.  Preventing on-going dmas, and placing
1221  *   the devices in a consistent state so a later kernel can
1222  *   reinitialize them.
1223  *
1224  * - A machine specific part that includes the syscall number
1225  *   and then copies the image to it's final destination.  And
1226  *   jumps into the image at entry.
1227  *
1228  * kexec does not sync, or unmount filesystems so if you need
1229  * that to happen you need to do that yourself.
1230  */
1231 struct kimage *kexec_image;
1232 struct kimage *kexec_crash_image;
1233 int kexec_load_disabled;
1234 
1235 static DEFINE_MUTEX(kexec_mutex);
1236 
1237 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
1238 		struct kexec_segment __user *, segments, unsigned long, flags)
1239 {
1240 	struct kimage **dest_image, *image;
1241 	int result;
1242 
1243 	/* We only trust the superuser with rebooting the system. */
1244 	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
1245 		return -EPERM;
1246 
1247 	/*
1248 	 * Verify we have a legal set of flags
1249 	 * This leaves us room for future extensions.
1250 	 */
1251 	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
1252 		return -EINVAL;
1253 
1254 	/* Verify we are on the appropriate architecture */
1255 	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
1256 		((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
1257 		return -EINVAL;
1258 
1259 	/* Put an artificial cap on the number
1260 	 * of segments passed to kexec_load.
1261 	 */
1262 	if (nr_segments > KEXEC_SEGMENT_MAX)
1263 		return -EINVAL;
1264 
1265 	image = NULL;
1266 	result = 0;
1267 
1268 	/* Because we write directly to the reserved memory
1269 	 * region when loading crash kernels we need a mutex here to
1270 	 * prevent multiple crash  kernels from attempting to load
1271 	 * simultaneously, and to prevent a crash kernel from loading
1272 	 * over the top of a in use crash kernel.
1273 	 *
1274 	 * KISS: always take the mutex.
1275 	 */
1276 	if (!mutex_trylock(&kexec_mutex))
1277 		return -EBUSY;
1278 
1279 	dest_image = &kexec_image;
1280 	if (flags & KEXEC_ON_CRASH)
1281 		dest_image = &kexec_crash_image;
1282 	if (nr_segments > 0) {
1283 		unsigned long i;
1284 
1285 		/* Loading another kernel to reboot into */
1286 		if ((flags & KEXEC_ON_CRASH) == 0)
1287 			result = kimage_alloc_init(&image, entry, nr_segments,
1288 						   segments, flags);
1289 		/* Loading another kernel to switch to if this one crashes */
1290 		else if (flags & KEXEC_ON_CRASH) {
1291 			/* Free any current crash dump kernel before
1292 			 * we corrupt it.
1293 			 */
1294 			kimage_free(xchg(&kexec_crash_image, NULL));
1295 			result = kimage_alloc_init(&image, entry, nr_segments,
1296 						   segments, flags);
1297 			crash_map_reserved_pages();
1298 		}
1299 		if (result)
1300 			goto out;
1301 
1302 		if (flags & KEXEC_PRESERVE_CONTEXT)
1303 			image->preserve_context = 1;
1304 		result = machine_kexec_prepare(image);
1305 		if (result)
1306 			goto out;
1307 
1308 		for (i = 0; i < nr_segments; i++) {
1309 			result = kimage_load_segment(image, &image->segment[i]);
1310 			if (result)
1311 				goto out;
1312 		}
1313 		kimage_terminate(image);
1314 		if (flags & KEXEC_ON_CRASH)
1315 			crash_unmap_reserved_pages();
1316 	}
1317 	/* Install the new kernel, and  Uninstall the old */
1318 	image = xchg(dest_image, image);
1319 
1320 out:
1321 	mutex_unlock(&kexec_mutex);
1322 	kimage_free(image);
1323 
1324 	return result;
1325 }
1326 
1327 /*
1328  * Add and remove page tables for crashkernel memory
1329  *
1330  * Provide an empty default implementation here -- architecture
1331  * code may override this
1332  */
1333 void __weak crash_map_reserved_pages(void)
1334 {}
1335 
1336 void __weak crash_unmap_reserved_pages(void)
1337 {}
1338 
1339 #ifdef CONFIG_COMPAT
1340 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
1341 		       compat_ulong_t, nr_segments,
1342 		       struct compat_kexec_segment __user *, segments,
1343 		       compat_ulong_t, flags)
1344 {
1345 	struct compat_kexec_segment in;
1346 	struct kexec_segment out, __user *ksegments;
1347 	unsigned long i, result;
1348 
1349 	/* Don't allow clients that don't understand the native
1350 	 * architecture to do anything.
1351 	 */
1352 	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
1353 		return -EINVAL;
1354 
1355 	if (nr_segments > KEXEC_SEGMENT_MAX)
1356 		return -EINVAL;
1357 
1358 	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
1359 	for (i = 0; i < nr_segments; i++) {
1360 		result = copy_from_user(&in, &segments[i], sizeof(in));
1361 		if (result)
1362 			return -EFAULT;
1363 
1364 		out.buf   = compat_ptr(in.buf);
1365 		out.bufsz = in.bufsz;
1366 		out.mem   = in.mem;
1367 		out.memsz = in.memsz;
1368 
1369 		result = copy_to_user(&ksegments[i], &out, sizeof(out));
1370 		if (result)
1371 			return -EFAULT;
1372 	}
1373 
1374 	return sys_kexec_load(entry, nr_segments, ksegments, flags);
1375 }
1376 #endif
1377 
1378 SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
1379 		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
1380 		unsigned long, flags)
1381 {
1382 	int ret = 0, i;
1383 	struct kimage **dest_image, *image;
1384 
1385 	/* We only trust the superuser with rebooting the system. */
1386 	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
1387 		return -EPERM;
1388 
1389 	/* Make sure we have a legal set of flags */
1390 	if (flags != (flags & KEXEC_FILE_FLAGS))
1391 		return -EINVAL;
1392 
1393 	image = NULL;
1394 
1395 	if (!mutex_trylock(&kexec_mutex))
1396 		return -EBUSY;
1397 
1398 	dest_image = &kexec_image;
1399 	if (flags & KEXEC_FILE_ON_CRASH)
1400 		dest_image = &kexec_crash_image;
1401 
1402 	if (flags & KEXEC_FILE_UNLOAD)
1403 		goto exchange;
1404 
1405 	/*
1406 	 * In case of crash, new kernel gets loaded in reserved region. It is
1407 	 * same memory where old crash kernel might be loaded. Free any
1408 	 * current crash dump kernel before we corrupt it.
1409 	 */
1410 	if (flags & KEXEC_FILE_ON_CRASH)
1411 		kimage_free(xchg(&kexec_crash_image, NULL));
1412 
1413 	ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
1414 				     cmdline_len, flags);
1415 	if (ret)
1416 		goto out;
1417 
1418 	ret = machine_kexec_prepare(image);
1419 	if (ret)
1420 		goto out;
1421 
1422 	ret = kexec_calculate_store_digests(image);
1423 	if (ret)
1424 		goto out;
1425 
1426 	for (i = 0; i < image->nr_segments; i++) {
1427 		struct kexec_segment *ksegment;
1428 
1429 		ksegment = &image->segment[i];
1430 		pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
1431 			 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
1432 			 ksegment->memsz);
1433 
1434 		ret = kimage_load_segment(image, &image->segment[i]);
1435 		if (ret)
1436 			goto out;
1437 	}
1438 
1439 	kimage_terminate(image);
1440 
1441 	/*
1442 	 * Free up any temporary buffers allocated which are not needed
1443 	 * after image has been loaded
1444 	 */
1445 	kimage_file_post_load_cleanup(image);
1446 exchange:
1447 	image = xchg(dest_image, image);
1448 out:
1449 	mutex_unlock(&kexec_mutex);
1450 	kimage_free(image);
1451 	return ret;
1452 }
1453 
1454 void crash_kexec(struct pt_regs *regs)
1455 {
1456 	/* Take the kexec_mutex here to prevent sys_kexec_load
1457 	 * running on one cpu from replacing the crash kernel
1458 	 * we are using after a panic on a different cpu.
1459 	 *
1460 	 * If the crash kernel was not located in a fixed area
1461 	 * of memory the xchg(&kexec_crash_image) would be
1462 	 * sufficient.  But since I reuse the memory...
1463 	 */
1464 	if (mutex_trylock(&kexec_mutex)) {
1465 		if (kexec_crash_image) {
1466 			struct pt_regs fixed_regs;
1467 
1468 			crash_setup_regs(&fixed_regs, regs);
1469 			crash_save_vmcoreinfo();
1470 			machine_crash_shutdown(&fixed_regs);
1471 			machine_kexec(kexec_crash_image);
1472 		}
1473 		mutex_unlock(&kexec_mutex);
1474 	}
1475 }
1476 
1477 size_t crash_get_memory_size(void)
1478 {
1479 	size_t size = 0;
1480 	mutex_lock(&kexec_mutex);
1481 	if (crashk_res.end != crashk_res.start)
1482 		size = resource_size(&crashk_res);
1483 	mutex_unlock(&kexec_mutex);
1484 	return size;
1485 }
1486 
1487 void __weak crash_free_reserved_phys_range(unsigned long begin,
1488 					   unsigned long end)
1489 {
1490 	unsigned long addr;
1491 
1492 	for (addr = begin; addr < end; addr += PAGE_SIZE)
1493 		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
1494 }
1495 
1496 int crash_shrink_memory(unsigned long new_size)
1497 {
1498 	int ret = 0;
1499 	unsigned long start, end;
1500 	unsigned long old_size;
1501 	struct resource *ram_res;
1502 
1503 	mutex_lock(&kexec_mutex);
1504 
1505 	if (kexec_crash_image) {
1506 		ret = -ENOENT;
1507 		goto unlock;
1508 	}
1509 	start = crashk_res.start;
1510 	end = crashk_res.end;
1511 	old_size = (end == 0) ? 0 : end - start + 1;
1512 	if (new_size >= old_size) {
1513 		ret = (new_size == old_size) ? 0 : -EINVAL;
1514 		goto unlock;
1515 	}
1516 
1517 	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
1518 	if (!ram_res) {
1519 		ret = -ENOMEM;
1520 		goto unlock;
1521 	}
1522 
1523 	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
1524 	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
1525 
1526 	crash_map_reserved_pages();
1527 	crash_free_reserved_phys_range(end, crashk_res.end);
1528 
1529 	if ((start == end) && (crashk_res.parent != NULL))
1530 		release_resource(&crashk_res);
1531 
1532 	ram_res->start = end;
1533 	ram_res->end = crashk_res.end;
1534 	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
1535 	ram_res->name = "System RAM";
1536 
1537 	crashk_res.end = end - 1;
1538 
1539 	insert_resource(&iomem_resource, ram_res);
1540 	crash_unmap_reserved_pages();
1541 
1542 unlock:
1543 	mutex_unlock(&kexec_mutex);
1544 	return ret;
1545 }
1546 
1547 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1548 			    size_t data_len)
1549 {
1550 	struct elf_note note;
1551 
1552 	note.n_namesz = strlen(name) + 1;
1553 	note.n_descsz = data_len;
1554 	note.n_type   = type;
1555 	memcpy(buf, &note, sizeof(note));
1556 	buf += (sizeof(note) + 3)/4;
1557 	memcpy(buf, name, note.n_namesz);
1558 	buf += (note.n_namesz + 3)/4;
1559 	memcpy(buf, data, note.n_descsz);
1560 	buf += (note.n_descsz + 3)/4;
1561 
1562 	return buf;
1563 }
1564 
1565 static void final_note(u32 *buf)
1566 {
1567 	struct elf_note note;
1568 
1569 	note.n_namesz = 0;
1570 	note.n_descsz = 0;
1571 	note.n_type   = 0;
1572 	memcpy(buf, &note, sizeof(note));
1573 }
1574 
1575 void crash_save_cpu(struct pt_regs *regs, int cpu)
1576 {
1577 	struct elf_prstatus prstatus;
1578 	u32 *buf;
1579 
1580 	if ((cpu < 0) || (cpu >= nr_cpu_ids))
1581 		return;
1582 
1583 	/* Using ELF notes here is opportunistic.
1584 	 * I need a well defined structure format
1585 	 * for the data I pass, and I need tags
1586 	 * on the data to indicate what information I have
1587 	 * squirrelled away.  ELF notes happen to provide
1588 	 * all of that, so there is no need to invent something new.
1589 	 */
1590 	buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
1591 	if (!buf)
1592 		return;
1593 	memset(&prstatus, 0, sizeof(prstatus));
1594 	prstatus.pr_pid = current->pid;
1595 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1596 	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1597 			      &prstatus, sizeof(prstatus));
1598 	final_note(buf);
1599 }
1600 
1601 static int __init crash_notes_memory_init(void)
1602 {
1603 	/* Allocate memory for saving cpu registers. */
1604 	crash_notes = alloc_percpu(note_buf_t);
1605 	if (!crash_notes) {
1606 		pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
1607 		return -ENOMEM;
1608 	}
1609 	return 0;
1610 }
1611 subsys_initcall(crash_notes_memory_init);
1612 
1613 
1614 /*
1615  * parsing the "crashkernel" commandline
1616  *
1617  * this code is intended to be called from architecture specific code
1618  */
1619 
1620 
1621 /*
1622  * This function parses command lines in the format
1623  *
1624  *   crashkernel=ramsize-range:size[,...][@offset]
1625  *
1626  * The function returns 0 on success and -EINVAL on failure.
1627  */
1628 static int __init parse_crashkernel_mem(char *cmdline,
1629 					unsigned long long system_ram,
1630 					unsigned long long *crash_size,
1631 					unsigned long long *crash_base)
1632 {
1633 	char *cur = cmdline, *tmp;
1634 
1635 	/* for each entry of the comma-separated list */
1636 	do {
1637 		unsigned long long start, end = ULLONG_MAX, size;
1638 
1639 		/* get the start of the range */
1640 		start = memparse(cur, &tmp);
1641 		if (cur == tmp) {
1642 			pr_warn("crashkernel: Memory value expected\n");
1643 			return -EINVAL;
1644 		}
1645 		cur = tmp;
1646 		if (*cur != '-') {
1647 			pr_warn("crashkernel: '-' expected\n");
1648 			return -EINVAL;
1649 		}
1650 		cur++;
1651 
1652 		/* if no ':' is here, than we read the end */
1653 		if (*cur != ':') {
1654 			end = memparse(cur, &tmp);
1655 			if (cur == tmp) {
1656 				pr_warn("crashkernel: Memory value expected\n");
1657 				return -EINVAL;
1658 			}
1659 			cur = tmp;
1660 			if (end <= start) {
1661 				pr_warn("crashkernel: end <= start\n");
1662 				return -EINVAL;
1663 			}
1664 		}
1665 
1666 		if (*cur != ':') {
1667 			pr_warn("crashkernel: ':' expected\n");
1668 			return -EINVAL;
1669 		}
1670 		cur++;
1671 
1672 		size = memparse(cur, &tmp);
1673 		if (cur == tmp) {
1674 			pr_warn("Memory value expected\n");
1675 			return -EINVAL;
1676 		}
1677 		cur = tmp;
1678 		if (size >= system_ram) {
1679 			pr_warn("crashkernel: invalid size\n");
1680 			return -EINVAL;
1681 		}
1682 
1683 		/* match ? */
1684 		if (system_ram >= start && system_ram < end) {
1685 			*crash_size = size;
1686 			break;
1687 		}
1688 	} while (*cur++ == ',');
1689 
1690 	if (*crash_size > 0) {
1691 		while (*cur && *cur != ' ' && *cur != '@')
1692 			cur++;
1693 		if (*cur == '@') {
1694 			cur++;
1695 			*crash_base = memparse(cur, &tmp);
1696 			if (cur == tmp) {
1697 				pr_warn("Memory value expected after '@'\n");
1698 				return -EINVAL;
1699 			}
1700 		}
1701 	}
1702 
1703 	return 0;
1704 }
1705 
1706 /*
1707  * That function parses "simple" (old) crashkernel command lines like
1708  *
1709  *	crashkernel=size[@offset]
1710  *
1711  * It returns 0 on success and -EINVAL on failure.
1712  */
1713 static int __init parse_crashkernel_simple(char *cmdline,
1714 					   unsigned long long *crash_size,
1715 					   unsigned long long *crash_base)
1716 {
1717 	char *cur = cmdline;
1718 
1719 	*crash_size = memparse(cmdline, &cur);
1720 	if (cmdline == cur) {
1721 		pr_warn("crashkernel: memory value expected\n");
1722 		return -EINVAL;
1723 	}
1724 
1725 	if (*cur == '@')
1726 		*crash_base = memparse(cur+1, &cur);
1727 	else if (*cur != ' ' && *cur != '\0') {
1728 		pr_warn("crashkernel: unrecognized char\n");
1729 		return -EINVAL;
1730 	}
1731 
1732 	return 0;
1733 }
1734 
1735 #define SUFFIX_HIGH 0
1736 #define SUFFIX_LOW  1
1737 #define SUFFIX_NULL 2
1738 static __initdata char *suffix_tbl[] = {
1739 	[SUFFIX_HIGH] = ",high",
1740 	[SUFFIX_LOW]  = ",low",
1741 	[SUFFIX_NULL] = NULL,
1742 };
1743 
1744 /*
1745  * That function parses "suffix"  crashkernel command lines like
1746  *
1747  *	crashkernel=size,[high|low]
1748  *
1749  * It returns 0 on success and -EINVAL on failure.
1750  */
1751 static int __init parse_crashkernel_suffix(char *cmdline,
1752 					   unsigned long long	*crash_size,
1753 					   unsigned long long	*crash_base,
1754 					   const char *suffix)
1755 {
1756 	char *cur = cmdline;
1757 
1758 	*crash_size = memparse(cmdline, &cur);
1759 	if (cmdline == cur) {
1760 		pr_warn("crashkernel: memory value expected\n");
1761 		return -EINVAL;
1762 	}
1763 
1764 	/* check with suffix */
1765 	if (strncmp(cur, suffix, strlen(suffix))) {
1766 		pr_warn("crashkernel: unrecognized char\n");
1767 		return -EINVAL;
1768 	}
1769 	cur += strlen(suffix);
1770 	if (*cur != ' ' && *cur != '\0') {
1771 		pr_warn("crashkernel: unrecognized char\n");
1772 		return -EINVAL;
1773 	}
1774 
1775 	return 0;
1776 }
1777 
1778 static __init char *get_last_crashkernel(char *cmdline,
1779 			     const char *name,
1780 			     const char *suffix)
1781 {
1782 	char *p = cmdline, *ck_cmdline = NULL;
1783 
1784 	/* find crashkernel and use the last one if there are more */
1785 	p = strstr(p, name);
1786 	while (p) {
1787 		char *end_p = strchr(p, ' ');
1788 		char *q;
1789 
1790 		if (!end_p)
1791 			end_p = p + strlen(p);
1792 
1793 		if (!suffix) {
1794 			int i;
1795 
1796 			/* skip the one with any known suffix */
1797 			for (i = 0; suffix_tbl[i]; i++) {
1798 				q = end_p - strlen(suffix_tbl[i]);
1799 				if (!strncmp(q, suffix_tbl[i],
1800 					     strlen(suffix_tbl[i])))
1801 					goto next;
1802 			}
1803 			ck_cmdline = p;
1804 		} else {
1805 			q = end_p - strlen(suffix);
1806 			if (!strncmp(q, suffix, strlen(suffix)))
1807 				ck_cmdline = p;
1808 		}
1809 next:
1810 		p = strstr(p+1, name);
1811 	}
1812 
1813 	if (!ck_cmdline)
1814 		return NULL;
1815 
1816 	return ck_cmdline;
1817 }
1818 
1819 static int __init __parse_crashkernel(char *cmdline,
1820 			     unsigned long long system_ram,
1821 			     unsigned long long *crash_size,
1822 			     unsigned long long *crash_base,
1823 			     const char *name,
1824 			     const char *suffix)
1825 {
1826 	char	*first_colon, *first_space;
1827 	char	*ck_cmdline;
1828 
1829 	BUG_ON(!crash_size || !crash_base);
1830 	*crash_size = 0;
1831 	*crash_base = 0;
1832 
1833 	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1834 
1835 	if (!ck_cmdline)
1836 		return -EINVAL;
1837 
1838 	ck_cmdline += strlen(name);
1839 
1840 	if (suffix)
1841 		return parse_crashkernel_suffix(ck_cmdline, crash_size,
1842 				crash_base, suffix);
1843 	/*
1844 	 * if the commandline contains a ':', then that's the extended
1845 	 * syntax -- if not, it must be the classic syntax
1846 	 */
1847 	first_colon = strchr(ck_cmdline, ':');
1848 	first_space = strchr(ck_cmdline, ' ');
1849 	if (first_colon && (!first_space || first_colon < first_space))
1850 		return parse_crashkernel_mem(ck_cmdline, system_ram,
1851 				crash_size, crash_base);
1852 
1853 	return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1854 }
1855 
1856 /*
1857  * That function is the entry point for command line parsing and should be
1858  * called from the arch-specific code.
1859  */
1860 int __init parse_crashkernel(char *cmdline,
1861 			     unsigned long long system_ram,
1862 			     unsigned long long *crash_size,
1863 			     unsigned long long *crash_base)
1864 {
1865 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1866 					"crashkernel=", NULL);
1867 }
1868 
1869 int __init parse_crashkernel_high(char *cmdline,
1870 			     unsigned long long system_ram,
1871 			     unsigned long long *crash_size,
1872 			     unsigned long long *crash_base)
1873 {
1874 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1875 				"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1876 }
1877 
1878 int __init parse_crashkernel_low(char *cmdline,
1879 			     unsigned long long system_ram,
1880 			     unsigned long long *crash_size,
1881 			     unsigned long long *crash_base)
1882 {
1883 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1884 				"crashkernel=", suffix_tbl[SUFFIX_LOW]);
1885 }
1886 
1887 static void update_vmcoreinfo_note(void)
1888 {
1889 	u32 *buf = vmcoreinfo_note;
1890 
1891 	if (!vmcoreinfo_size)
1892 		return;
1893 	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1894 			      vmcoreinfo_size);
1895 	final_note(buf);
1896 }
1897 
1898 void crash_save_vmcoreinfo(void)
1899 {
1900 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1901 	update_vmcoreinfo_note();
1902 }
1903 
1904 void vmcoreinfo_append_str(const char *fmt, ...)
1905 {
1906 	va_list args;
1907 	char buf[0x50];
1908 	size_t r;
1909 
1910 	va_start(args, fmt);
1911 	r = vscnprintf(buf, sizeof(buf), fmt, args);
1912 	va_end(args);
1913 
1914 	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1915 
1916 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1917 
1918 	vmcoreinfo_size += r;
1919 }
1920 
1921 /*
1922  * provide an empty default implementation here -- architecture
1923  * code may override this
1924  */
1925 void __weak arch_crash_save_vmcoreinfo(void)
1926 {}
1927 
1928 unsigned long __weak paddr_vmcoreinfo_note(void)
1929 {
1930 	return __pa((unsigned long)(char *)&vmcoreinfo_note);
1931 }
1932 
1933 static int __init crash_save_vmcoreinfo_init(void)
1934 {
1935 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1936 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
1937 
1938 	VMCOREINFO_SYMBOL(init_uts_ns);
1939 	VMCOREINFO_SYMBOL(node_online_map);
1940 #ifdef CONFIG_MMU
1941 	VMCOREINFO_SYMBOL(swapper_pg_dir);
1942 #endif
1943 	VMCOREINFO_SYMBOL(_stext);
1944 	VMCOREINFO_SYMBOL(vmap_area_list);
1945 
1946 #ifndef CONFIG_NEED_MULTIPLE_NODES
1947 	VMCOREINFO_SYMBOL(mem_map);
1948 	VMCOREINFO_SYMBOL(contig_page_data);
1949 #endif
1950 #ifdef CONFIG_SPARSEMEM
1951 	VMCOREINFO_SYMBOL(mem_section);
1952 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1953 	VMCOREINFO_STRUCT_SIZE(mem_section);
1954 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
1955 #endif
1956 	VMCOREINFO_STRUCT_SIZE(page);
1957 	VMCOREINFO_STRUCT_SIZE(pglist_data);
1958 	VMCOREINFO_STRUCT_SIZE(zone);
1959 	VMCOREINFO_STRUCT_SIZE(free_area);
1960 	VMCOREINFO_STRUCT_SIZE(list_head);
1961 	VMCOREINFO_SIZE(nodemask_t);
1962 	VMCOREINFO_OFFSET(page, flags);
1963 	VMCOREINFO_OFFSET(page, _count);
1964 	VMCOREINFO_OFFSET(page, mapping);
1965 	VMCOREINFO_OFFSET(page, lru);
1966 	VMCOREINFO_OFFSET(page, _mapcount);
1967 	VMCOREINFO_OFFSET(page, private);
1968 	VMCOREINFO_OFFSET(pglist_data, node_zones);
1969 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
1970 #ifdef CONFIG_FLAT_NODE_MEM_MAP
1971 	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1972 #endif
1973 	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1974 	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1975 	VMCOREINFO_OFFSET(pglist_data, node_id);
1976 	VMCOREINFO_OFFSET(zone, free_area);
1977 	VMCOREINFO_OFFSET(zone, vm_stat);
1978 	VMCOREINFO_OFFSET(zone, spanned_pages);
1979 	VMCOREINFO_OFFSET(free_area, free_list);
1980 	VMCOREINFO_OFFSET(list_head, next);
1981 	VMCOREINFO_OFFSET(list_head, prev);
1982 	VMCOREINFO_OFFSET(vmap_area, va_start);
1983 	VMCOREINFO_OFFSET(vmap_area, list);
1984 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1985 	log_buf_kexec_setup();
1986 	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1987 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
1988 	VMCOREINFO_NUMBER(PG_lru);
1989 	VMCOREINFO_NUMBER(PG_private);
1990 	VMCOREINFO_NUMBER(PG_swapcache);
1991 	VMCOREINFO_NUMBER(PG_slab);
1992 #ifdef CONFIG_MEMORY_FAILURE
1993 	VMCOREINFO_NUMBER(PG_hwpoison);
1994 #endif
1995 	VMCOREINFO_NUMBER(PG_head_mask);
1996 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1997 #ifdef CONFIG_HUGETLBFS
1998 	VMCOREINFO_SYMBOL(free_huge_page);
1999 #endif
2000 
2001 	arch_crash_save_vmcoreinfo();
2002 	update_vmcoreinfo_note();
2003 
2004 	return 0;
2005 }
2006 
2007 subsys_initcall(crash_save_vmcoreinfo_init);
2008 
2009 static int __kexec_add_segment(struct kimage *image, char *buf,
2010 			       unsigned long bufsz, unsigned long mem,
2011 			       unsigned long memsz)
2012 {
2013 	struct kexec_segment *ksegment;
2014 
2015 	ksegment = &image->segment[image->nr_segments];
2016 	ksegment->kbuf = buf;
2017 	ksegment->bufsz = bufsz;
2018 	ksegment->mem = mem;
2019 	ksegment->memsz = memsz;
2020 	image->nr_segments++;
2021 
2022 	return 0;
2023 }
2024 
2025 static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
2026 				    struct kexec_buf *kbuf)
2027 {
2028 	struct kimage *image = kbuf->image;
2029 	unsigned long temp_start, temp_end;
2030 
2031 	temp_end = min(end, kbuf->buf_max);
2032 	temp_start = temp_end - kbuf->memsz;
2033 
2034 	do {
2035 		/* align down start */
2036 		temp_start = temp_start & (~(kbuf->buf_align - 1));
2037 
2038 		if (temp_start < start || temp_start < kbuf->buf_min)
2039 			return 0;
2040 
2041 		temp_end = temp_start + kbuf->memsz - 1;
2042 
2043 		/*
2044 		 * Make sure this does not conflict with any of existing
2045 		 * segments
2046 		 */
2047 		if (kimage_is_destination_range(image, temp_start, temp_end)) {
2048 			temp_start = temp_start - PAGE_SIZE;
2049 			continue;
2050 		}
2051 
2052 		/* We found a suitable memory range */
2053 		break;
2054 	} while (1);
2055 
2056 	/* If we are here, we found a suitable memory range */
2057 	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
2058 			    kbuf->memsz);
2059 
2060 	/* Success, stop navigating through remaining System RAM ranges */
2061 	return 1;
2062 }
2063 
2064 static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
2065 				     struct kexec_buf *kbuf)
2066 {
2067 	struct kimage *image = kbuf->image;
2068 	unsigned long temp_start, temp_end;
2069 
2070 	temp_start = max(start, kbuf->buf_min);
2071 
2072 	do {
2073 		temp_start = ALIGN(temp_start, kbuf->buf_align);
2074 		temp_end = temp_start + kbuf->memsz - 1;
2075 
2076 		if (temp_end > end || temp_end > kbuf->buf_max)
2077 			return 0;
2078 		/*
2079 		 * Make sure this does not conflict with any of existing
2080 		 * segments
2081 		 */
2082 		if (kimage_is_destination_range(image, temp_start, temp_end)) {
2083 			temp_start = temp_start + PAGE_SIZE;
2084 			continue;
2085 		}
2086 
2087 		/* We found a suitable memory range */
2088 		break;
2089 	} while (1);
2090 
2091 	/* If we are here, we found a suitable memory range */
2092 	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
2093 			    kbuf->memsz);
2094 
2095 	/* Success, stop navigating through remaining System RAM ranges */
2096 	return 1;
2097 }
2098 
2099 static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
2100 {
2101 	struct kexec_buf *kbuf = (struct kexec_buf *)arg;
2102 	unsigned long sz = end - start + 1;
2103 
2104 	/* Returning 0 will take to next memory range */
2105 	if (sz < kbuf->memsz)
2106 		return 0;
2107 
2108 	if (end < kbuf->buf_min || start > kbuf->buf_max)
2109 		return 0;
2110 
2111 	/*
2112 	 * Allocate memory top down with-in ram range. Otherwise bottom up
2113 	 * allocation.
2114 	 */
2115 	if (kbuf->top_down)
2116 		return locate_mem_hole_top_down(start, end, kbuf);
2117 	return locate_mem_hole_bottom_up(start, end, kbuf);
2118 }
2119 
2120 /*
2121  * Helper function for placing a buffer in a kexec segment. This assumes
2122  * that kexec_mutex is held.
2123  */
2124 int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
2125 		     unsigned long memsz, unsigned long buf_align,
2126 		     unsigned long buf_min, unsigned long buf_max,
2127 		     bool top_down, unsigned long *load_addr)
2128 {
2129 
2130 	struct kexec_segment *ksegment;
2131 	struct kexec_buf buf, *kbuf;
2132 	int ret;
2133 
2134 	/* Currently adding segment this way is allowed only in file mode */
2135 	if (!image->file_mode)
2136 		return -EINVAL;
2137 
2138 	if (image->nr_segments >= KEXEC_SEGMENT_MAX)
2139 		return -EINVAL;
2140 
2141 	/*
2142 	 * Make sure we are not trying to add buffer after allocating
2143 	 * control pages. All segments need to be placed first before
2144 	 * any control pages are allocated. As control page allocation
2145 	 * logic goes through list of segments to make sure there are
2146 	 * no destination overlaps.
2147 	 */
2148 	if (!list_empty(&image->control_pages)) {
2149 		WARN_ON(1);
2150 		return -EINVAL;
2151 	}
2152 
2153 	memset(&buf, 0, sizeof(struct kexec_buf));
2154 	kbuf = &buf;
2155 	kbuf->image = image;
2156 	kbuf->buffer = buffer;
2157 	kbuf->bufsz = bufsz;
2158 
2159 	kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
2160 	kbuf->buf_align = max(buf_align, PAGE_SIZE);
2161 	kbuf->buf_min = buf_min;
2162 	kbuf->buf_max = buf_max;
2163 	kbuf->top_down = top_down;
2164 
2165 	/* Walk the RAM ranges and allocate a suitable range for the buffer */
2166 	if (image->type == KEXEC_TYPE_CRASH)
2167 		ret = walk_iomem_res("Crash kernel",
2168 				     IORESOURCE_MEM | IORESOURCE_BUSY,
2169 				     crashk_res.start, crashk_res.end, kbuf,
2170 				     locate_mem_hole_callback);
2171 	else
2172 		ret = walk_system_ram_res(0, -1, kbuf,
2173 					  locate_mem_hole_callback);
2174 	if (ret != 1) {
2175 		/* A suitable memory range could not be found for buffer */
2176 		return -EADDRNOTAVAIL;
2177 	}
2178 
2179 	/* Found a suitable memory range */
2180 	ksegment = &image->segment[image->nr_segments - 1];
2181 	*load_addr = ksegment->mem;
2182 	return 0;
2183 }
2184 
2185 /* Calculate and store the digest of segments */
2186 static int kexec_calculate_store_digests(struct kimage *image)
2187 {
2188 	struct crypto_shash *tfm;
2189 	struct shash_desc *desc;
2190 	int ret = 0, i, j, zero_buf_sz, sha_region_sz;
2191 	size_t desc_size, nullsz;
2192 	char *digest;
2193 	void *zero_buf;
2194 	struct kexec_sha_region *sha_regions;
2195 	struct purgatory_info *pi = &image->purgatory_info;
2196 
2197 	zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
2198 	zero_buf_sz = PAGE_SIZE;
2199 
2200 	tfm = crypto_alloc_shash("sha256", 0, 0);
2201 	if (IS_ERR(tfm)) {
2202 		ret = PTR_ERR(tfm);
2203 		goto out;
2204 	}
2205 
2206 	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
2207 	desc = kzalloc(desc_size, GFP_KERNEL);
2208 	if (!desc) {
2209 		ret = -ENOMEM;
2210 		goto out_free_tfm;
2211 	}
2212 
2213 	sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
2214 	sha_regions = vzalloc(sha_region_sz);
2215 	if (!sha_regions)
2216 		goto out_free_desc;
2217 
2218 	desc->tfm   = tfm;
2219 	desc->flags = 0;
2220 
2221 	ret = crypto_shash_init(desc);
2222 	if (ret < 0)
2223 		goto out_free_sha_regions;
2224 
2225 	digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
2226 	if (!digest) {
2227 		ret = -ENOMEM;
2228 		goto out_free_sha_regions;
2229 	}
2230 
2231 	for (j = i = 0; i < image->nr_segments; i++) {
2232 		struct kexec_segment *ksegment;
2233 
2234 		ksegment = &image->segment[i];
2235 		/*
2236 		 * Skip purgatory as it will be modified once we put digest
2237 		 * info in purgatory.
2238 		 */
2239 		if (ksegment->kbuf == pi->purgatory_buf)
2240 			continue;
2241 
2242 		ret = crypto_shash_update(desc, ksegment->kbuf,
2243 					  ksegment->bufsz);
2244 		if (ret)
2245 			break;
2246 
2247 		/*
2248 		 * Assume rest of the buffer is filled with zero and
2249 		 * update digest accordingly.
2250 		 */
2251 		nullsz = ksegment->memsz - ksegment->bufsz;
2252 		while (nullsz) {
2253 			unsigned long bytes = nullsz;
2254 
2255 			if (bytes > zero_buf_sz)
2256 				bytes = zero_buf_sz;
2257 			ret = crypto_shash_update(desc, zero_buf, bytes);
2258 			if (ret)
2259 				break;
2260 			nullsz -= bytes;
2261 		}
2262 
2263 		if (ret)
2264 			break;
2265 
2266 		sha_regions[j].start = ksegment->mem;
2267 		sha_regions[j].len = ksegment->memsz;
2268 		j++;
2269 	}
2270 
2271 	if (!ret) {
2272 		ret = crypto_shash_final(desc, digest);
2273 		if (ret)
2274 			goto out_free_digest;
2275 		ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
2276 						sha_regions, sha_region_sz, 0);
2277 		if (ret)
2278 			goto out_free_digest;
2279 
2280 		ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
2281 						digest, SHA256_DIGEST_SIZE, 0);
2282 		if (ret)
2283 			goto out_free_digest;
2284 	}
2285 
2286 out_free_digest:
2287 	kfree(digest);
2288 out_free_sha_regions:
2289 	vfree(sha_regions);
2290 out_free_desc:
2291 	kfree(desc);
2292 out_free_tfm:
2293 	kfree(tfm);
2294 out:
2295 	return ret;
2296 }
2297 
2298 /* Actually load purgatory. Lot of code taken from kexec-tools */
2299 static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
2300 				  unsigned long max, int top_down)
2301 {
2302 	struct purgatory_info *pi = &image->purgatory_info;
2303 	unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
2304 	unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
2305 	unsigned char *buf_addr, *src;
2306 	int i, ret = 0, entry_sidx = -1;
2307 	const Elf_Shdr *sechdrs_c;
2308 	Elf_Shdr *sechdrs = NULL;
2309 	void *purgatory_buf = NULL;
2310 
2311 	/*
2312 	 * sechdrs_c points to section headers in purgatory and are read
2313 	 * only. No modifications allowed.
2314 	 */
2315 	sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
2316 
2317 	/*
2318 	 * We can not modify sechdrs_c[] and its fields. It is read only.
2319 	 * Copy it over to a local copy where one can store some temporary
2320 	 * data and free it at the end. We need to modify ->sh_addr and
2321 	 * ->sh_offset fields to keep track of permanent and temporary
2322 	 * locations of sections.
2323 	 */
2324 	sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2325 	if (!sechdrs)
2326 		return -ENOMEM;
2327 
2328 	memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
2329 
2330 	/*
2331 	 * We seem to have multiple copies of sections. First copy is which
2332 	 * is embedded in kernel in read only section. Some of these sections
2333 	 * will be copied to a temporary buffer and relocated. And these
2334 	 * sections will finally be copied to their final destination at
2335 	 * segment load time.
2336 	 *
2337 	 * Use ->sh_offset to reflect section address in memory. It will
2338 	 * point to original read only copy if section is not allocatable.
2339 	 * Otherwise it will point to temporary copy which will be relocated.
2340 	 *
2341 	 * Use ->sh_addr to contain final address of the section where it
2342 	 * will go during execution time.
2343 	 */
2344 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
2345 		if (sechdrs[i].sh_type == SHT_NOBITS)
2346 			continue;
2347 
2348 		sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
2349 						sechdrs[i].sh_offset;
2350 	}
2351 
2352 	/*
2353 	 * Identify entry point section and make entry relative to section
2354 	 * start.
2355 	 */
2356 	entry = pi->ehdr->e_entry;
2357 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
2358 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2359 			continue;
2360 
2361 		if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
2362 			continue;
2363 
2364 		/* Make entry section relative */
2365 		if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
2366 		    ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
2367 		     pi->ehdr->e_entry)) {
2368 			entry_sidx = i;
2369 			entry -= sechdrs[i].sh_addr;
2370 			break;
2371 		}
2372 	}
2373 
2374 	/* Determine how much memory is needed to load relocatable object. */
2375 	buf_align = 1;
2376 	bss_align = 1;
2377 	buf_sz = 0;
2378 	bss_sz = 0;
2379 
2380 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
2381 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2382 			continue;
2383 
2384 		align = sechdrs[i].sh_addralign;
2385 		if (sechdrs[i].sh_type != SHT_NOBITS) {
2386 			if (buf_align < align)
2387 				buf_align = align;
2388 			buf_sz = ALIGN(buf_sz, align);
2389 			buf_sz += sechdrs[i].sh_size;
2390 		} else {
2391 			/* bss section */
2392 			if (bss_align < align)
2393 				bss_align = align;
2394 			bss_sz = ALIGN(bss_sz, align);
2395 			bss_sz += sechdrs[i].sh_size;
2396 		}
2397 	}
2398 
2399 	/* Determine the bss padding required to align bss properly */
2400 	bss_pad = 0;
2401 	if (buf_sz & (bss_align - 1))
2402 		bss_pad = bss_align - (buf_sz & (bss_align - 1));
2403 
2404 	memsz = buf_sz + bss_pad + bss_sz;
2405 
2406 	/* Allocate buffer for purgatory */
2407 	purgatory_buf = vzalloc(buf_sz);
2408 	if (!purgatory_buf) {
2409 		ret = -ENOMEM;
2410 		goto out;
2411 	}
2412 
2413 	if (buf_align < bss_align)
2414 		buf_align = bss_align;
2415 
2416 	/* Add buffer to segment list */
2417 	ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
2418 				buf_align, min, max, top_down,
2419 				&pi->purgatory_load_addr);
2420 	if (ret)
2421 		goto out;
2422 
2423 	/* Load SHF_ALLOC sections */
2424 	buf_addr = purgatory_buf;
2425 	load_addr = curr_load_addr = pi->purgatory_load_addr;
2426 	bss_addr = load_addr + buf_sz + bss_pad;
2427 
2428 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
2429 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
2430 			continue;
2431 
2432 		align = sechdrs[i].sh_addralign;
2433 		if (sechdrs[i].sh_type != SHT_NOBITS) {
2434 			curr_load_addr = ALIGN(curr_load_addr, align);
2435 			offset = curr_load_addr - load_addr;
2436 			/* We already modifed ->sh_offset to keep src addr */
2437 			src = (char *) sechdrs[i].sh_offset;
2438 			memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
2439 
2440 			/* Store load address and source address of section */
2441 			sechdrs[i].sh_addr = curr_load_addr;
2442 
2443 			/*
2444 			 * This section got copied to temporary buffer. Update
2445 			 * ->sh_offset accordingly.
2446 			 */
2447 			sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
2448 
2449 			/* Advance to the next address */
2450 			curr_load_addr += sechdrs[i].sh_size;
2451 		} else {
2452 			bss_addr = ALIGN(bss_addr, align);
2453 			sechdrs[i].sh_addr = bss_addr;
2454 			bss_addr += sechdrs[i].sh_size;
2455 		}
2456 	}
2457 
2458 	/* Update entry point based on load address of text section */
2459 	if (entry_sidx >= 0)
2460 		entry += sechdrs[entry_sidx].sh_addr;
2461 
2462 	/* Make kernel jump to purgatory after shutdown */
2463 	image->start = entry;
2464 
2465 	/* Used later to get/set symbol values */
2466 	pi->sechdrs = sechdrs;
2467 
2468 	/*
2469 	 * Used later to identify which section is purgatory and skip it
2470 	 * from checksumming.
2471 	 */
2472 	pi->purgatory_buf = purgatory_buf;
2473 	return ret;
2474 out:
2475 	vfree(sechdrs);
2476 	vfree(purgatory_buf);
2477 	return ret;
2478 }
2479 
2480 static int kexec_apply_relocations(struct kimage *image)
2481 {
2482 	int i, ret;
2483 	struct purgatory_info *pi = &image->purgatory_info;
2484 	Elf_Shdr *sechdrs = pi->sechdrs;
2485 
2486 	/* Apply relocations */
2487 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
2488 		Elf_Shdr *section, *symtab;
2489 
2490 		if (sechdrs[i].sh_type != SHT_RELA &&
2491 		    sechdrs[i].sh_type != SHT_REL)
2492 			continue;
2493 
2494 		/*
2495 		 * For section of type SHT_RELA/SHT_REL,
2496 		 * ->sh_link contains section header index of associated
2497 		 * symbol table. And ->sh_info contains section header
2498 		 * index of section to which relocations apply.
2499 		 */
2500 		if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
2501 		    sechdrs[i].sh_link >= pi->ehdr->e_shnum)
2502 			return -ENOEXEC;
2503 
2504 		section = &sechdrs[sechdrs[i].sh_info];
2505 		symtab = &sechdrs[sechdrs[i].sh_link];
2506 
2507 		if (!(section->sh_flags & SHF_ALLOC))
2508 			continue;
2509 
2510 		/*
2511 		 * symtab->sh_link contain section header index of associated
2512 		 * string table.
2513 		 */
2514 		if (symtab->sh_link >= pi->ehdr->e_shnum)
2515 			/* Invalid section number? */
2516 			continue;
2517 
2518 		/*
2519 		 * Respective archicture needs to provide support for applying
2520 		 * relocations of type SHT_RELA/SHT_REL.
2521 		 */
2522 		if (sechdrs[i].sh_type == SHT_RELA)
2523 			ret = arch_kexec_apply_relocations_add(pi->ehdr,
2524 							       sechdrs, i);
2525 		else if (sechdrs[i].sh_type == SHT_REL)
2526 			ret = arch_kexec_apply_relocations(pi->ehdr,
2527 							   sechdrs, i);
2528 		if (ret)
2529 			return ret;
2530 	}
2531 
2532 	return 0;
2533 }
2534 
2535 /* Load relocatable purgatory object and relocate it appropriately */
2536 int kexec_load_purgatory(struct kimage *image, unsigned long min,
2537 			 unsigned long max, int top_down,
2538 			 unsigned long *load_addr)
2539 {
2540 	struct purgatory_info *pi = &image->purgatory_info;
2541 	int ret;
2542 
2543 	if (kexec_purgatory_size <= 0)
2544 		return -EINVAL;
2545 
2546 	if (kexec_purgatory_size < sizeof(Elf_Ehdr))
2547 		return -ENOEXEC;
2548 
2549 	pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
2550 
2551 	if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
2552 	    || pi->ehdr->e_type != ET_REL
2553 	    || !elf_check_arch(pi->ehdr)
2554 	    || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
2555 		return -ENOEXEC;
2556 
2557 	if (pi->ehdr->e_shoff >= kexec_purgatory_size
2558 	    || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
2559 	    kexec_purgatory_size - pi->ehdr->e_shoff))
2560 		return -ENOEXEC;
2561 
2562 	ret = __kexec_load_purgatory(image, min, max, top_down);
2563 	if (ret)
2564 		return ret;
2565 
2566 	ret = kexec_apply_relocations(image);
2567 	if (ret)
2568 		goto out;
2569 
2570 	*load_addr = pi->purgatory_load_addr;
2571 	return 0;
2572 out:
2573 	vfree(pi->sechdrs);
2574 	vfree(pi->purgatory_buf);
2575 	return ret;
2576 }
2577 
2578 static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
2579 					    const char *name)
2580 {
2581 	Elf_Sym *syms;
2582 	Elf_Shdr *sechdrs;
2583 	Elf_Ehdr *ehdr;
2584 	int i, k;
2585 	const char *strtab;
2586 
2587 	if (!pi->sechdrs || !pi->ehdr)
2588 		return NULL;
2589 
2590 	sechdrs = pi->sechdrs;
2591 	ehdr = pi->ehdr;
2592 
2593 	for (i = 0; i < ehdr->e_shnum; i++) {
2594 		if (sechdrs[i].sh_type != SHT_SYMTAB)
2595 			continue;
2596 
2597 		if (sechdrs[i].sh_link >= ehdr->e_shnum)
2598 			/* Invalid strtab section number */
2599 			continue;
2600 		strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
2601 		syms = (Elf_Sym *)sechdrs[i].sh_offset;
2602 
2603 		/* Go through symbols for a match */
2604 		for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
2605 			if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
2606 				continue;
2607 
2608 			if (strcmp(strtab + syms[k].st_name, name) != 0)
2609 				continue;
2610 
2611 			if (syms[k].st_shndx == SHN_UNDEF ||
2612 			    syms[k].st_shndx >= ehdr->e_shnum) {
2613 				pr_debug("Symbol: %s has bad section index %d.\n",
2614 						name, syms[k].st_shndx);
2615 				return NULL;
2616 			}
2617 
2618 			/* Found the symbol we are looking for */
2619 			return &syms[k];
2620 		}
2621 	}
2622 
2623 	return NULL;
2624 }
2625 
2626 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
2627 {
2628 	struct purgatory_info *pi = &image->purgatory_info;
2629 	Elf_Sym *sym;
2630 	Elf_Shdr *sechdr;
2631 
2632 	sym = kexec_purgatory_find_symbol(pi, name);
2633 	if (!sym)
2634 		return ERR_PTR(-EINVAL);
2635 
2636 	sechdr = &pi->sechdrs[sym->st_shndx];
2637 
2638 	/*
2639 	 * Returns the address where symbol will finally be loaded after
2640 	 * kexec_load_segment()
2641 	 */
2642 	return (void *)(sechdr->sh_addr + sym->st_value);
2643 }
2644 
2645 /*
2646  * Get or set value of a symbol. If "get_value" is true, symbol value is
2647  * returned in buf otherwise symbol value is set based on value in buf.
2648  */
2649 int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
2650 				   void *buf, unsigned int size, bool get_value)
2651 {
2652 	Elf_Sym *sym;
2653 	Elf_Shdr *sechdrs;
2654 	struct purgatory_info *pi = &image->purgatory_info;
2655 	char *sym_buf;
2656 
2657 	sym = kexec_purgatory_find_symbol(pi, name);
2658 	if (!sym)
2659 		return -EINVAL;
2660 
2661 	if (sym->st_size != size) {
2662 		pr_err("symbol %s size mismatch: expected %lu actual %u\n",
2663 		       name, (unsigned long)sym->st_size, size);
2664 		return -EINVAL;
2665 	}
2666 
2667 	sechdrs = pi->sechdrs;
2668 
2669 	if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
2670 		pr_err("symbol %s is in a bss section. Cannot %s\n", name,
2671 		       get_value ? "get" : "set");
2672 		return -EINVAL;
2673 	}
2674 
2675 	sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
2676 					sym->st_value;
2677 
2678 	if (get_value)
2679 		memcpy((void *)buf, sym_buf, size);
2680 	else
2681 		memcpy((void *)sym_buf, buf, size);
2682 
2683 	return 0;
2684 }
2685 
2686 /*
2687  * Move into place and start executing a preloaded standalone
2688  * executable.  If nothing was preloaded return an error.
2689  */
2690 int kernel_kexec(void)
2691 {
2692 	int error = 0;
2693 
2694 	if (!mutex_trylock(&kexec_mutex))
2695 		return -EBUSY;
2696 	if (!kexec_image) {
2697 		error = -EINVAL;
2698 		goto Unlock;
2699 	}
2700 
2701 #ifdef CONFIG_KEXEC_JUMP
2702 	if (kexec_image->preserve_context) {
2703 		lock_system_sleep();
2704 		pm_prepare_console();
2705 		error = freeze_processes();
2706 		if (error) {
2707 			error = -EBUSY;
2708 			goto Restore_console;
2709 		}
2710 		suspend_console();
2711 		error = dpm_suspend_start(PMSG_FREEZE);
2712 		if (error)
2713 			goto Resume_console;
2714 		/* At this point, dpm_suspend_start() has been called,
2715 		 * but *not* dpm_suspend_end(). We *must* call
2716 		 * dpm_suspend_end() now.  Otherwise, drivers for
2717 		 * some devices (e.g. interrupt controllers) become
2718 		 * desynchronized with the actual state of the
2719 		 * hardware at resume time, and evil weirdness ensues.
2720 		 */
2721 		error = dpm_suspend_end(PMSG_FREEZE);
2722 		if (error)
2723 			goto Resume_devices;
2724 		error = disable_nonboot_cpus();
2725 		if (error)
2726 			goto Enable_cpus;
2727 		local_irq_disable();
2728 		error = syscore_suspend();
2729 		if (error)
2730 			goto Enable_irqs;
2731 	} else
2732 #endif
2733 	{
2734 		kexec_in_progress = true;
2735 		kernel_restart_prepare(NULL);
2736 		migrate_to_reboot_cpu();
2737 
2738 		/*
2739 		 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
2740 		 * no further code needs to use CPU hotplug (which is true in
2741 		 * the reboot case). However, the kexec path depends on using
2742 		 * CPU hotplug again; so re-enable it here.
2743 		 */
2744 		cpu_hotplug_enable();
2745 		pr_emerg("Starting new kernel\n");
2746 		machine_shutdown();
2747 	}
2748 
2749 	machine_kexec(kexec_image);
2750 
2751 #ifdef CONFIG_KEXEC_JUMP
2752 	if (kexec_image->preserve_context) {
2753 		syscore_resume();
2754  Enable_irqs:
2755 		local_irq_enable();
2756  Enable_cpus:
2757 		enable_nonboot_cpus();
2758 		dpm_resume_start(PMSG_RESTORE);
2759  Resume_devices:
2760 		dpm_resume_end(PMSG_RESTORE);
2761  Resume_console:
2762 		resume_console();
2763 		thaw_processes();
2764  Restore_console:
2765 		pm_restore_console();
2766 		unlock_system_sleep();
2767 	}
2768 #endif
2769 
2770  Unlock:
2771 	mutex_unlock(&kexec_mutex);
2772 	return error;
2773 }
2774