1 /* 2 * kexec.c - kexec system call core code. 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 4 * 5 * This source code is licensed under the GNU General Public License, 6 * Version 2. See the file COPYING for more details. 7 */ 8 9 #define pr_fmt(fmt) "kexec: " fmt 10 11 #include <linux/capability.h> 12 #include <linux/mm.h> 13 #include <linux/file.h> 14 #include <linux/slab.h> 15 #include <linux/fs.h> 16 #include <linux/kexec.h> 17 #include <linux/mutex.h> 18 #include <linux/list.h> 19 #include <linux/highmem.h> 20 #include <linux/syscalls.h> 21 #include <linux/reboot.h> 22 #include <linux/ioport.h> 23 #include <linux/hardirq.h> 24 #include <linux/elf.h> 25 #include <linux/elfcore.h> 26 #include <linux/utsname.h> 27 #include <linux/numa.h> 28 #include <linux/suspend.h> 29 #include <linux/device.h> 30 #include <linux/freezer.h> 31 #include <linux/pm.h> 32 #include <linux/cpu.h> 33 #include <linux/uaccess.h> 34 #include <linux/io.h> 35 #include <linux/console.h> 36 #include <linux/vmalloc.h> 37 #include <linux/swap.h> 38 #include <linux/syscore_ops.h> 39 #include <linux/compiler.h> 40 #include <linux/hugetlb.h> 41 42 #include <asm/page.h> 43 #include <asm/sections.h> 44 45 #include <crypto/hash.h> 46 #include <crypto/sha.h> 47 #include "kexec_internal.h" 48 49 DEFINE_MUTEX(kexec_mutex); 50 51 /* Per cpu memory for storing cpu states in case of system crash. */ 52 note_buf_t __percpu *crash_notes; 53 54 /* vmcoreinfo stuff */ 55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 57 size_t vmcoreinfo_size; 58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 59 60 /* Flag to indicate we are going to kexec a new kernel */ 61 bool kexec_in_progress = false; 62 63 64 /* Location of the reserved area for the crash kernel */ 65 struct resource crashk_res = { 66 .name = "Crash kernel", 67 .start = 0, 68 .end = 0, 69 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 70 }; 71 struct resource crashk_low_res = { 72 .name = "Crash kernel", 73 .start = 0, 74 .end = 0, 75 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 76 }; 77 78 int kexec_should_crash(struct task_struct *p) 79 { 80 /* 81 * If crash_kexec_post_notifiers is enabled, don't run 82 * crash_kexec() here yet, which must be run after panic 83 * notifiers in panic(). 84 */ 85 if (crash_kexec_post_notifiers) 86 return 0; 87 /* 88 * There are 4 panic() calls in do_exit() path, each of which 89 * corresponds to each of these 4 conditions. 90 */ 91 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 92 return 1; 93 return 0; 94 } 95 96 /* 97 * When kexec transitions to the new kernel there is a one-to-one 98 * mapping between physical and virtual addresses. On processors 99 * where you can disable the MMU this is trivial, and easy. For 100 * others it is still a simple predictable page table to setup. 101 * 102 * In that environment kexec copies the new kernel to its final 103 * resting place. This means I can only support memory whose 104 * physical address can fit in an unsigned long. In particular 105 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 106 * If the assembly stub has more restrictive requirements 107 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 108 * defined more restrictively in <asm/kexec.h>. 109 * 110 * The code for the transition from the current kernel to the 111 * the new kernel is placed in the control_code_buffer, whose size 112 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single 113 * page of memory is necessary, but some architectures require more. 114 * Because this memory must be identity mapped in the transition from 115 * virtual to physical addresses it must live in the range 116 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 117 * modifiable. 118 * 119 * The assembly stub in the control code buffer is passed a linked list 120 * of descriptor pages detailing the source pages of the new kernel, 121 * and the destination addresses of those source pages. As this data 122 * structure is not used in the context of the current OS, it must 123 * be self-contained. 124 * 125 * The code has been made to work with highmem pages and will use a 126 * destination page in its final resting place (if it happens 127 * to allocate it). The end product of this is that most of the 128 * physical address space, and most of RAM can be used. 129 * 130 * Future directions include: 131 * - allocating a page table with the control code buffer identity 132 * mapped, to simplify machine_kexec and make kexec_on_panic more 133 * reliable. 134 */ 135 136 /* 137 * KIMAGE_NO_DEST is an impossible destination address..., for 138 * allocating pages whose destination address we do not care about. 139 */ 140 #define KIMAGE_NO_DEST (-1UL) 141 142 static struct page *kimage_alloc_page(struct kimage *image, 143 gfp_t gfp_mask, 144 unsigned long dest); 145 146 int sanity_check_segment_list(struct kimage *image) 147 { 148 int result, i; 149 unsigned long nr_segments = image->nr_segments; 150 151 /* 152 * Verify we have good destination addresses. The caller is 153 * responsible for making certain we don't attempt to load 154 * the new image into invalid or reserved areas of RAM. This 155 * just verifies it is an address we can use. 156 * 157 * Since the kernel does everything in page size chunks ensure 158 * the destination addresses are page aligned. Too many 159 * special cases crop of when we don't do this. The most 160 * insidious is getting overlapping destination addresses 161 * simply because addresses are changed to page size 162 * granularity. 163 */ 164 result = -EADDRNOTAVAIL; 165 for (i = 0; i < nr_segments; i++) { 166 unsigned long mstart, mend; 167 168 mstart = image->segment[i].mem; 169 mend = mstart + image->segment[i].memsz; 170 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 171 return result; 172 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 173 return result; 174 } 175 176 /* Verify our destination addresses do not overlap. 177 * If we alloed overlapping destination addresses 178 * through very weird things can happen with no 179 * easy explanation as one segment stops on another. 180 */ 181 result = -EINVAL; 182 for (i = 0; i < nr_segments; i++) { 183 unsigned long mstart, mend; 184 unsigned long j; 185 186 mstart = image->segment[i].mem; 187 mend = mstart + image->segment[i].memsz; 188 for (j = 0; j < i; j++) { 189 unsigned long pstart, pend; 190 191 pstart = image->segment[j].mem; 192 pend = pstart + image->segment[j].memsz; 193 /* Do the segments overlap ? */ 194 if ((mend > pstart) && (mstart < pend)) 195 return result; 196 } 197 } 198 199 /* Ensure our buffer sizes are strictly less than 200 * our memory sizes. This should always be the case, 201 * and it is easier to check up front than to be surprised 202 * later on. 203 */ 204 result = -EINVAL; 205 for (i = 0; i < nr_segments; i++) { 206 if (image->segment[i].bufsz > image->segment[i].memsz) 207 return result; 208 } 209 210 /* 211 * Verify we have good destination addresses. Normally 212 * the caller is responsible for making certain we don't 213 * attempt to load the new image into invalid or reserved 214 * areas of RAM. But crash kernels are preloaded into a 215 * reserved area of ram. We must ensure the addresses 216 * are in the reserved area otherwise preloading the 217 * kernel could corrupt things. 218 */ 219 220 if (image->type == KEXEC_TYPE_CRASH) { 221 result = -EADDRNOTAVAIL; 222 for (i = 0; i < nr_segments; i++) { 223 unsigned long mstart, mend; 224 225 mstart = image->segment[i].mem; 226 mend = mstart + image->segment[i].memsz - 1; 227 /* Ensure we are within the crash kernel limits */ 228 if ((mstart < crashk_res.start) || 229 (mend > crashk_res.end)) 230 return result; 231 } 232 } 233 234 return 0; 235 } 236 237 struct kimage *do_kimage_alloc_init(void) 238 { 239 struct kimage *image; 240 241 /* Allocate a controlling structure */ 242 image = kzalloc(sizeof(*image), GFP_KERNEL); 243 if (!image) 244 return NULL; 245 246 image->head = 0; 247 image->entry = &image->head; 248 image->last_entry = &image->head; 249 image->control_page = ~0; /* By default this does not apply */ 250 image->type = KEXEC_TYPE_DEFAULT; 251 252 /* Initialize the list of control pages */ 253 INIT_LIST_HEAD(&image->control_pages); 254 255 /* Initialize the list of destination pages */ 256 INIT_LIST_HEAD(&image->dest_pages); 257 258 /* Initialize the list of unusable pages */ 259 INIT_LIST_HEAD(&image->unusable_pages); 260 261 return image; 262 } 263 264 int kimage_is_destination_range(struct kimage *image, 265 unsigned long start, 266 unsigned long end) 267 { 268 unsigned long i; 269 270 for (i = 0; i < image->nr_segments; i++) { 271 unsigned long mstart, mend; 272 273 mstart = image->segment[i].mem; 274 mend = mstart + image->segment[i].memsz; 275 if ((end > mstart) && (start < mend)) 276 return 1; 277 } 278 279 return 0; 280 } 281 282 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 283 { 284 struct page *pages; 285 286 pages = alloc_pages(gfp_mask, order); 287 if (pages) { 288 unsigned int count, i; 289 290 pages->mapping = NULL; 291 set_page_private(pages, order); 292 count = 1 << order; 293 for (i = 0; i < count; i++) 294 SetPageReserved(pages + i); 295 } 296 297 return pages; 298 } 299 300 static void kimage_free_pages(struct page *page) 301 { 302 unsigned int order, count, i; 303 304 order = page_private(page); 305 count = 1 << order; 306 for (i = 0; i < count; i++) 307 ClearPageReserved(page + i); 308 __free_pages(page, order); 309 } 310 311 void kimage_free_page_list(struct list_head *list) 312 { 313 struct list_head *pos, *next; 314 315 list_for_each_safe(pos, next, list) { 316 struct page *page; 317 318 page = list_entry(pos, struct page, lru); 319 list_del(&page->lru); 320 kimage_free_pages(page); 321 } 322 } 323 324 static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 325 unsigned int order) 326 { 327 /* Control pages are special, they are the intermediaries 328 * that are needed while we copy the rest of the pages 329 * to their final resting place. As such they must 330 * not conflict with either the destination addresses 331 * or memory the kernel is already using. 332 * 333 * The only case where we really need more than one of 334 * these are for architectures where we cannot disable 335 * the MMU and must instead generate an identity mapped 336 * page table for all of the memory. 337 * 338 * At worst this runs in O(N) of the image size. 339 */ 340 struct list_head extra_pages; 341 struct page *pages; 342 unsigned int count; 343 344 count = 1 << order; 345 INIT_LIST_HEAD(&extra_pages); 346 347 /* Loop while I can allocate a page and the page allocated 348 * is a destination page. 349 */ 350 do { 351 unsigned long pfn, epfn, addr, eaddr; 352 353 pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); 354 if (!pages) 355 break; 356 pfn = page_to_pfn(pages); 357 epfn = pfn + count; 358 addr = pfn << PAGE_SHIFT; 359 eaddr = epfn << PAGE_SHIFT; 360 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 361 kimage_is_destination_range(image, addr, eaddr)) { 362 list_add(&pages->lru, &extra_pages); 363 pages = NULL; 364 } 365 } while (!pages); 366 367 if (pages) { 368 /* Remember the allocated page... */ 369 list_add(&pages->lru, &image->control_pages); 370 371 /* Because the page is already in it's destination 372 * location we will never allocate another page at 373 * that address. Therefore kimage_alloc_pages 374 * will not return it (again) and we don't need 375 * to give it an entry in image->segment[]. 376 */ 377 } 378 /* Deal with the destination pages I have inadvertently allocated. 379 * 380 * Ideally I would convert multi-page allocations into single 381 * page allocations, and add everything to image->dest_pages. 382 * 383 * For now it is simpler to just free the pages. 384 */ 385 kimage_free_page_list(&extra_pages); 386 387 return pages; 388 } 389 390 static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 391 unsigned int order) 392 { 393 /* Control pages are special, they are the intermediaries 394 * that are needed while we copy the rest of the pages 395 * to their final resting place. As such they must 396 * not conflict with either the destination addresses 397 * or memory the kernel is already using. 398 * 399 * Control pages are also the only pags we must allocate 400 * when loading a crash kernel. All of the other pages 401 * are specified by the segments and we just memcpy 402 * into them directly. 403 * 404 * The only case where we really need more than one of 405 * these are for architectures where we cannot disable 406 * the MMU and must instead generate an identity mapped 407 * page table for all of the memory. 408 * 409 * Given the low demand this implements a very simple 410 * allocator that finds the first hole of the appropriate 411 * size in the reserved memory region, and allocates all 412 * of the memory up to and including the hole. 413 */ 414 unsigned long hole_start, hole_end, size; 415 struct page *pages; 416 417 pages = NULL; 418 size = (1 << order) << PAGE_SHIFT; 419 hole_start = (image->control_page + (size - 1)) & ~(size - 1); 420 hole_end = hole_start + size - 1; 421 while (hole_end <= crashk_res.end) { 422 unsigned long i; 423 424 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 425 break; 426 /* See if I overlap any of the segments */ 427 for (i = 0; i < image->nr_segments; i++) { 428 unsigned long mstart, mend; 429 430 mstart = image->segment[i].mem; 431 mend = mstart + image->segment[i].memsz - 1; 432 if ((hole_end >= mstart) && (hole_start <= mend)) { 433 /* Advance the hole to the end of the segment */ 434 hole_start = (mend + (size - 1)) & ~(size - 1); 435 hole_end = hole_start + size - 1; 436 break; 437 } 438 } 439 /* If I don't overlap any segments I have found my hole! */ 440 if (i == image->nr_segments) { 441 pages = pfn_to_page(hole_start >> PAGE_SHIFT); 442 image->control_page = hole_end; 443 break; 444 } 445 } 446 447 return pages; 448 } 449 450 451 struct page *kimage_alloc_control_pages(struct kimage *image, 452 unsigned int order) 453 { 454 struct page *pages = NULL; 455 456 switch (image->type) { 457 case KEXEC_TYPE_DEFAULT: 458 pages = kimage_alloc_normal_control_pages(image, order); 459 break; 460 case KEXEC_TYPE_CRASH: 461 pages = kimage_alloc_crash_control_pages(image, order); 462 break; 463 } 464 465 return pages; 466 } 467 468 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 469 { 470 if (*image->entry != 0) 471 image->entry++; 472 473 if (image->entry == image->last_entry) { 474 kimage_entry_t *ind_page; 475 struct page *page; 476 477 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 478 if (!page) 479 return -ENOMEM; 480 481 ind_page = page_address(page); 482 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 483 image->entry = ind_page; 484 image->last_entry = ind_page + 485 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 486 } 487 *image->entry = entry; 488 image->entry++; 489 *image->entry = 0; 490 491 return 0; 492 } 493 494 static int kimage_set_destination(struct kimage *image, 495 unsigned long destination) 496 { 497 int result; 498 499 destination &= PAGE_MASK; 500 result = kimage_add_entry(image, destination | IND_DESTINATION); 501 502 return result; 503 } 504 505 506 static int kimage_add_page(struct kimage *image, unsigned long page) 507 { 508 int result; 509 510 page &= PAGE_MASK; 511 result = kimage_add_entry(image, page | IND_SOURCE); 512 513 return result; 514 } 515 516 517 static void kimage_free_extra_pages(struct kimage *image) 518 { 519 /* Walk through and free any extra destination pages I may have */ 520 kimage_free_page_list(&image->dest_pages); 521 522 /* Walk through and free any unusable pages I have cached */ 523 kimage_free_page_list(&image->unusable_pages); 524 525 } 526 void kimage_terminate(struct kimage *image) 527 { 528 if (*image->entry != 0) 529 image->entry++; 530 531 *image->entry = IND_DONE; 532 } 533 534 #define for_each_kimage_entry(image, ptr, entry) \ 535 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 536 ptr = (entry & IND_INDIRECTION) ? \ 537 phys_to_virt((entry & PAGE_MASK)) : ptr + 1) 538 539 static void kimage_free_entry(kimage_entry_t entry) 540 { 541 struct page *page; 542 543 page = pfn_to_page(entry >> PAGE_SHIFT); 544 kimage_free_pages(page); 545 } 546 547 void kimage_free(struct kimage *image) 548 { 549 kimage_entry_t *ptr, entry; 550 kimage_entry_t ind = 0; 551 552 if (!image) 553 return; 554 555 kimage_free_extra_pages(image); 556 for_each_kimage_entry(image, ptr, entry) { 557 if (entry & IND_INDIRECTION) { 558 /* Free the previous indirection page */ 559 if (ind & IND_INDIRECTION) 560 kimage_free_entry(ind); 561 /* Save this indirection page until we are 562 * done with it. 563 */ 564 ind = entry; 565 } else if (entry & IND_SOURCE) 566 kimage_free_entry(entry); 567 } 568 /* Free the final indirection page */ 569 if (ind & IND_INDIRECTION) 570 kimage_free_entry(ind); 571 572 /* Handle any machine specific cleanup */ 573 machine_kexec_cleanup(image); 574 575 /* Free the kexec control pages... */ 576 kimage_free_page_list(&image->control_pages); 577 578 /* 579 * Free up any temporary buffers allocated. This might hit if 580 * error occurred much later after buffer allocation. 581 */ 582 if (image->file_mode) 583 kimage_file_post_load_cleanup(image); 584 585 kfree(image); 586 } 587 588 static kimage_entry_t *kimage_dst_used(struct kimage *image, 589 unsigned long page) 590 { 591 kimage_entry_t *ptr, entry; 592 unsigned long destination = 0; 593 594 for_each_kimage_entry(image, ptr, entry) { 595 if (entry & IND_DESTINATION) 596 destination = entry & PAGE_MASK; 597 else if (entry & IND_SOURCE) { 598 if (page == destination) 599 return ptr; 600 destination += PAGE_SIZE; 601 } 602 } 603 604 return NULL; 605 } 606 607 static struct page *kimage_alloc_page(struct kimage *image, 608 gfp_t gfp_mask, 609 unsigned long destination) 610 { 611 /* 612 * Here we implement safeguards to ensure that a source page 613 * is not copied to its destination page before the data on 614 * the destination page is no longer useful. 615 * 616 * To do this we maintain the invariant that a source page is 617 * either its own destination page, or it is not a 618 * destination page at all. 619 * 620 * That is slightly stronger than required, but the proof 621 * that no problems will not occur is trivial, and the 622 * implementation is simply to verify. 623 * 624 * When allocating all pages normally this algorithm will run 625 * in O(N) time, but in the worst case it will run in O(N^2) 626 * time. If the runtime is a problem the data structures can 627 * be fixed. 628 */ 629 struct page *page; 630 unsigned long addr; 631 632 /* 633 * Walk through the list of destination pages, and see if I 634 * have a match. 635 */ 636 list_for_each_entry(page, &image->dest_pages, lru) { 637 addr = page_to_pfn(page) << PAGE_SHIFT; 638 if (addr == destination) { 639 list_del(&page->lru); 640 return page; 641 } 642 } 643 page = NULL; 644 while (1) { 645 kimage_entry_t *old; 646 647 /* Allocate a page, if we run out of memory give up */ 648 page = kimage_alloc_pages(gfp_mask, 0); 649 if (!page) 650 return NULL; 651 /* If the page cannot be used file it away */ 652 if (page_to_pfn(page) > 653 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 654 list_add(&page->lru, &image->unusable_pages); 655 continue; 656 } 657 addr = page_to_pfn(page) << PAGE_SHIFT; 658 659 /* If it is the destination page we want use it */ 660 if (addr == destination) 661 break; 662 663 /* If the page is not a destination page use it */ 664 if (!kimage_is_destination_range(image, addr, 665 addr + PAGE_SIZE)) 666 break; 667 668 /* 669 * I know that the page is someones destination page. 670 * See if there is already a source page for this 671 * destination page. And if so swap the source pages. 672 */ 673 old = kimage_dst_used(image, addr); 674 if (old) { 675 /* If so move it */ 676 unsigned long old_addr; 677 struct page *old_page; 678 679 old_addr = *old & PAGE_MASK; 680 old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 681 copy_highpage(page, old_page); 682 *old = addr | (*old & ~PAGE_MASK); 683 684 /* The old page I have found cannot be a 685 * destination page, so return it if it's 686 * gfp_flags honor the ones passed in. 687 */ 688 if (!(gfp_mask & __GFP_HIGHMEM) && 689 PageHighMem(old_page)) { 690 kimage_free_pages(old_page); 691 continue; 692 } 693 addr = old_addr; 694 page = old_page; 695 break; 696 } 697 /* Place the page on the destination list, to be used later */ 698 list_add(&page->lru, &image->dest_pages); 699 } 700 701 return page; 702 } 703 704 static int kimage_load_normal_segment(struct kimage *image, 705 struct kexec_segment *segment) 706 { 707 unsigned long maddr; 708 size_t ubytes, mbytes; 709 int result; 710 unsigned char __user *buf = NULL; 711 unsigned char *kbuf = NULL; 712 713 result = 0; 714 if (image->file_mode) 715 kbuf = segment->kbuf; 716 else 717 buf = segment->buf; 718 ubytes = segment->bufsz; 719 mbytes = segment->memsz; 720 maddr = segment->mem; 721 722 result = kimage_set_destination(image, maddr); 723 if (result < 0) 724 goto out; 725 726 while (mbytes) { 727 struct page *page; 728 char *ptr; 729 size_t uchunk, mchunk; 730 731 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 732 if (!page) { 733 result = -ENOMEM; 734 goto out; 735 } 736 result = kimage_add_page(image, page_to_pfn(page) 737 << PAGE_SHIFT); 738 if (result < 0) 739 goto out; 740 741 ptr = kmap(page); 742 /* Start with a clear page */ 743 clear_page(ptr); 744 ptr += maddr & ~PAGE_MASK; 745 mchunk = min_t(size_t, mbytes, 746 PAGE_SIZE - (maddr & ~PAGE_MASK)); 747 uchunk = min(ubytes, mchunk); 748 749 /* For file based kexec, source pages are in kernel memory */ 750 if (image->file_mode) 751 memcpy(ptr, kbuf, uchunk); 752 else 753 result = copy_from_user(ptr, buf, uchunk); 754 kunmap(page); 755 if (result) { 756 result = -EFAULT; 757 goto out; 758 } 759 ubytes -= uchunk; 760 maddr += mchunk; 761 if (image->file_mode) 762 kbuf += mchunk; 763 else 764 buf += mchunk; 765 mbytes -= mchunk; 766 } 767 out: 768 return result; 769 } 770 771 static int kimage_load_crash_segment(struct kimage *image, 772 struct kexec_segment *segment) 773 { 774 /* For crash dumps kernels we simply copy the data from 775 * user space to it's destination. 776 * We do things a page at a time for the sake of kmap. 777 */ 778 unsigned long maddr; 779 size_t ubytes, mbytes; 780 int result; 781 unsigned char __user *buf = NULL; 782 unsigned char *kbuf = NULL; 783 784 result = 0; 785 if (image->file_mode) 786 kbuf = segment->kbuf; 787 else 788 buf = segment->buf; 789 ubytes = segment->bufsz; 790 mbytes = segment->memsz; 791 maddr = segment->mem; 792 while (mbytes) { 793 struct page *page; 794 char *ptr; 795 size_t uchunk, mchunk; 796 797 page = pfn_to_page(maddr >> PAGE_SHIFT); 798 if (!page) { 799 result = -ENOMEM; 800 goto out; 801 } 802 ptr = kmap(page); 803 ptr += maddr & ~PAGE_MASK; 804 mchunk = min_t(size_t, mbytes, 805 PAGE_SIZE - (maddr & ~PAGE_MASK)); 806 uchunk = min(ubytes, mchunk); 807 if (mchunk > uchunk) { 808 /* Zero the trailing part of the page */ 809 memset(ptr + uchunk, 0, mchunk - uchunk); 810 } 811 812 /* For file based kexec, source pages are in kernel memory */ 813 if (image->file_mode) 814 memcpy(ptr, kbuf, uchunk); 815 else 816 result = copy_from_user(ptr, buf, uchunk); 817 kexec_flush_icache_page(page); 818 kunmap(page); 819 if (result) { 820 result = -EFAULT; 821 goto out; 822 } 823 ubytes -= uchunk; 824 maddr += mchunk; 825 if (image->file_mode) 826 kbuf += mchunk; 827 else 828 buf += mchunk; 829 mbytes -= mchunk; 830 } 831 out: 832 return result; 833 } 834 835 int kimage_load_segment(struct kimage *image, 836 struct kexec_segment *segment) 837 { 838 int result = -ENOMEM; 839 840 switch (image->type) { 841 case KEXEC_TYPE_DEFAULT: 842 result = kimage_load_normal_segment(image, segment); 843 break; 844 case KEXEC_TYPE_CRASH: 845 result = kimage_load_crash_segment(image, segment); 846 break; 847 } 848 849 return result; 850 } 851 852 struct kimage *kexec_image; 853 struct kimage *kexec_crash_image; 854 int kexec_load_disabled; 855 856 void crash_kexec(struct pt_regs *regs) 857 { 858 /* Take the kexec_mutex here to prevent sys_kexec_load 859 * running on one cpu from replacing the crash kernel 860 * we are using after a panic on a different cpu. 861 * 862 * If the crash kernel was not located in a fixed area 863 * of memory the xchg(&kexec_crash_image) would be 864 * sufficient. But since I reuse the memory... 865 */ 866 if (mutex_trylock(&kexec_mutex)) { 867 if (kexec_crash_image) { 868 struct pt_regs fixed_regs; 869 870 crash_setup_regs(&fixed_regs, regs); 871 crash_save_vmcoreinfo(); 872 machine_crash_shutdown(&fixed_regs); 873 machine_kexec(kexec_crash_image); 874 } 875 mutex_unlock(&kexec_mutex); 876 } 877 } 878 879 size_t crash_get_memory_size(void) 880 { 881 size_t size = 0; 882 883 mutex_lock(&kexec_mutex); 884 if (crashk_res.end != crashk_res.start) 885 size = resource_size(&crashk_res); 886 mutex_unlock(&kexec_mutex); 887 return size; 888 } 889 890 void __weak crash_free_reserved_phys_range(unsigned long begin, 891 unsigned long end) 892 { 893 unsigned long addr; 894 895 for (addr = begin; addr < end; addr += PAGE_SIZE) 896 free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); 897 } 898 899 int crash_shrink_memory(unsigned long new_size) 900 { 901 int ret = 0; 902 unsigned long start, end; 903 unsigned long old_size; 904 struct resource *ram_res; 905 906 mutex_lock(&kexec_mutex); 907 908 if (kexec_crash_image) { 909 ret = -ENOENT; 910 goto unlock; 911 } 912 start = crashk_res.start; 913 end = crashk_res.end; 914 old_size = (end == 0) ? 0 : end - start + 1; 915 if (new_size >= old_size) { 916 ret = (new_size == old_size) ? 0 : -EINVAL; 917 goto unlock; 918 } 919 920 ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); 921 if (!ram_res) { 922 ret = -ENOMEM; 923 goto unlock; 924 } 925 926 start = roundup(start, KEXEC_CRASH_MEM_ALIGN); 927 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); 928 929 crash_map_reserved_pages(); 930 crash_free_reserved_phys_range(end, crashk_res.end); 931 932 if ((start == end) && (crashk_res.parent != NULL)) 933 release_resource(&crashk_res); 934 935 ram_res->start = end; 936 ram_res->end = crashk_res.end; 937 ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; 938 ram_res->name = "System RAM"; 939 940 crashk_res.end = end - 1; 941 942 insert_resource(&iomem_resource, ram_res); 943 crash_unmap_reserved_pages(); 944 945 unlock: 946 mutex_unlock(&kexec_mutex); 947 return ret; 948 } 949 950 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 951 size_t data_len) 952 { 953 struct elf_note note; 954 955 note.n_namesz = strlen(name) + 1; 956 note.n_descsz = data_len; 957 note.n_type = type; 958 memcpy(buf, ¬e, sizeof(note)); 959 buf += (sizeof(note) + 3)/4; 960 memcpy(buf, name, note.n_namesz); 961 buf += (note.n_namesz + 3)/4; 962 memcpy(buf, data, note.n_descsz); 963 buf += (note.n_descsz + 3)/4; 964 965 return buf; 966 } 967 968 static void final_note(u32 *buf) 969 { 970 struct elf_note note; 971 972 note.n_namesz = 0; 973 note.n_descsz = 0; 974 note.n_type = 0; 975 memcpy(buf, ¬e, sizeof(note)); 976 } 977 978 void crash_save_cpu(struct pt_regs *regs, int cpu) 979 { 980 struct elf_prstatus prstatus; 981 u32 *buf; 982 983 if ((cpu < 0) || (cpu >= nr_cpu_ids)) 984 return; 985 986 /* Using ELF notes here is opportunistic. 987 * I need a well defined structure format 988 * for the data I pass, and I need tags 989 * on the data to indicate what information I have 990 * squirrelled away. ELF notes happen to provide 991 * all of that, so there is no need to invent something new. 992 */ 993 buf = (u32 *)per_cpu_ptr(crash_notes, cpu); 994 if (!buf) 995 return; 996 memset(&prstatus, 0, sizeof(prstatus)); 997 prstatus.pr_pid = current->pid; 998 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 999 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 1000 &prstatus, sizeof(prstatus)); 1001 final_note(buf); 1002 } 1003 1004 static int __init crash_notes_memory_init(void) 1005 { 1006 /* Allocate memory for saving cpu registers. */ 1007 size_t size, align; 1008 1009 /* 1010 * crash_notes could be allocated across 2 vmalloc pages when percpu 1011 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 1012 * pages are also on 2 continuous physical pages. In this case the 1013 * 2nd part of crash_notes in 2nd page could be lost since only the 1014 * starting address and size of crash_notes are exported through sysfs. 1015 * Here round up the size of crash_notes to the nearest power of two 1016 * and pass it to __alloc_percpu as align value. This can make sure 1017 * crash_notes is allocated inside one physical page. 1018 */ 1019 size = sizeof(note_buf_t); 1020 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 1021 1022 /* 1023 * Break compile if size is bigger than PAGE_SIZE since crash_notes 1024 * definitely will be in 2 pages with that. 1025 */ 1026 BUILD_BUG_ON(size > PAGE_SIZE); 1027 1028 crash_notes = __alloc_percpu(size, align); 1029 if (!crash_notes) { 1030 pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); 1031 return -ENOMEM; 1032 } 1033 return 0; 1034 } 1035 subsys_initcall(crash_notes_memory_init); 1036 1037 1038 /* 1039 * parsing the "crashkernel" commandline 1040 * 1041 * this code is intended to be called from architecture specific code 1042 */ 1043 1044 1045 /* 1046 * This function parses command lines in the format 1047 * 1048 * crashkernel=ramsize-range:size[,...][@offset] 1049 * 1050 * The function returns 0 on success and -EINVAL on failure. 1051 */ 1052 static int __init parse_crashkernel_mem(char *cmdline, 1053 unsigned long long system_ram, 1054 unsigned long long *crash_size, 1055 unsigned long long *crash_base) 1056 { 1057 char *cur = cmdline, *tmp; 1058 1059 /* for each entry of the comma-separated list */ 1060 do { 1061 unsigned long long start, end = ULLONG_MAX, size; 1062 1063 /* get the start of the range */ 1064 start = memparse(cur, &tmp); 1065 if (cur == tmp) { 1066 pr_warn("crashkernel: Memory value expected\n"); 1067 return -EINVAL; 1068 } 1069 cur = tmp; 1070 if (*cur != '-') { 1071 pr_warn("crashkernel: '-' expected\n"); 1072 return -EINVAL; 1073 } 1074 cur++; 1075 1076 /* if no ':' is here, than we read the end */ 1077 if (*cur != ':') { 1078 end = memparse(cur, &tmp); 1079 if (cur == tmp) { 1080 pr_warn("crashkernel: Memory value expected\n"); 1081 return -EINVAL; 1082 } 1083 cur = tmp; 1084 if (end <= start) { 1085 pr_warn("crashkernel: end <= start\n"); 1086 return -EINVAL; 1087 } 1088 } 1089 1090 if (*cur != ':') { 1091 pr_warn("crashkernel: ':' expected\n"); 1092 return -EINVAL; 1093 } 1094 cur++; 1095 1096 size = memparse(cur, &tmp); 1097 if (cur == tmp) { 1098 pr_warn("Memory value expected\n"); 1099 return -EINVAL; 1100 } 1101 cur = tmp; 1102 if (size >= system_ram) { 1103 pr_warn("crashkernel: invalid size\n"); 1104 return -EINVAL; 1105 } 1106 1107 /* match ? */ 1108 if (system_ram >= start && system_ram < end) { 1109 *crash_size = size; 1110 break; 1111 } 1112 } while (*cur++ == ','); 1113 1114 if (*crash_size > 0) { 1115 while (*cur && *cur != ' ' && *cur != '@') 1116 cur++; 1117 if (*cur == '@') { 1118 cur++; 1119 *crash_base = memparse(cur, &tmp); 1120 if (cur == tmp) { 1121 pr_warn("Memory value expected after '@'\n"); 1122 return -EINVAL; 1123 } 1124 } 1125 } 1126 1127 return 0; 1128 } 1129 1130 /* 1131 * That function parses "simple" (old) crashkernel command lines like 1132 * 1133 * crashkernel=size[@offset] 1134 * 1135 * It returns 0 on success and -EINVAL on failure. 1136 */ 1137 static int __init parse_crashkernel_simple(char *cmdline, 1138 unsigned long long *crash_size, 1139 unsigned long long *crash_base) 1140 { 1141 char *cur = cmdline; 1142 1143 *crash_size = memparse(cmdline, &cur); 1144 if (cmdline == cur) { 1145 pr_warn("crashkernel: memory value expected\n"); 1146 return -EINVAL; 1147 } 1148 1149 if (*cur == '@') 1150 *crash_base = memparse(cur+1, &cur); 1151 else if (*cur != ' ' && *cur != '\0') { 1152 pr_warn("crashkernel: unrecognized char\n"); 1153 return -EINVAL; 1154 } 1155 1156 return 0; 1157 } 1158 1159 #define SUFFIX_HIGH 0 1160 #define SUFFIX_LOW 1 1161 #define SUFFIX_NULL 2 1162 static __initdata char *suffix_tbl[] = { 1163 [SUFFIX_HIGH] = ",high", 1164 [SUFFIX_LOW] = ",low", 1165 [SUFFIX_NULL] = NULL, 1166 }; 1167 1168 /* 1169 * That function parses "suffix" crashkernel command lines like 1170 * 1171 * crashkernel=size,[high|low] 1172 * 1173 * It returns 0 on success and -EINVAL on failure. 1174 */ 1175 static int __init parse_crashkernel_suffix(char *cmdline, 1176 unsigned long long *crash_size, 1177 const char *suffix) 1178 { 1179 char *cur = cmdline; 1180 1181 *crash_size = memparse(cmdline, &cur); 1182 if (cmdline == cur) { 1183 pr_warn("crashkernel: memory value expected\n"); 1184 return -EINVAL; 1185 } 1186 1187 /* check with suffix */ 1188 if (strncmp(cur, suffix, strlen(suffix))) { 1189 pr_warn("crashkernel: unrecognized char\n"); 1190 return -EINVAL; 1191 } 1192 cur += strlen(suffix); 1193 if (*cur != ' ' && *cur != '\0') { 1194 pr_warn("crashkernel: unrecognized char\n"); 1195 return -EINVAL; 1196 } 1197 1198 return 0; 1199 } 1200 1201 static __init char *get_last_crashkernel(char *cmdline, 1202 const char *name, 1203 const char *suffix) 1204 { 1205 char *p = cmdline, *ck_cmdline = NULL; 1206 1207 /* find crashkernel and use the last one if there are more */ 1208 p = strstr(p, name); 1209 while (p) { 1210 char *end_p = strchr(p, ' '); 1211 char *q; 1212 1213 if (!end_p) 1214 end_p = p + strlen(p); 1215 1216 if (!suffix) { 1217 int i; 1218 1219 /* skip the one with any known suffix */ 1220 for (i = 0; suffix_tbl[i]; i++) { 1221 q = end_p - strlen(suffix_tbl[i]); 1222 if (!strncmp(q, suffix_tbl[i], 1223 strlen(suffix_tbl[i]))) 1224 goto next; 1225 } 1226 ck_cmdline = p; 1227 } else { 1228 q = end_p - strlen(suffix); 1229 if (!strncmp(q, suffix, strlen(suffix))) 1230 ck_cmdline = p; 1231 } 1232 next: 1233 p = strstr(p+1, name); 1234 } 1235 1236 if (!ck_cmdline) 1237 return NULL; 1238 1239 return ck_cmdline; 1240 } 1241 1242 static int __init __parse_crashkernel(char *cmdline, 1243 unsigned long long system_ram, 1244 unsigned long long *crash_size, 1245 unsigned long long *crash_base, 1246 const char *name, 1247 const char *suffix) 1248 { 1249 char *first_colon, *first_space; 1250 char *ck_cmdline; 1251 1252 BUG_ON(!crash_size || !crash_base); 1253 *crash_size = 0; 1254 *crash_base = 0; 1255 1256 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 1257 1258 if (!ck_cmdline) 1259 return -EINVAL; 1260 1261 ck_cmdline += strlen(name); 1262 1263 if (suffix) 1264 return parse_crashkernel_suffix(ck_cmdline, crash_size, 1265 suffix); 1266 /* 1267 * if the commandline contains a ':', then that's the extended 1268 * syntax -- if not, it must be the classic syntax 1269 */ 1270 first_colon = strchr(ck_cmdline, ':'); 1271 first_space = strchr(ck_cmdline, ' '); 1272 if (first_colon && (!first_space || first_colon < first_space)) 1273 return parse_crashkernel_mem(ck_cmdline, system_ram, 1274 crash_size, crash_base); 1275 1276 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 1277 } 1278 1279 /* 1280 * That function is the entry point for command line parsing and should be 1281 * called from the arch-specific code. 1282 */ 1283 int __init parse_crashkernel(char *cmdline, 1284 unsigned long long system_ram, 1285 unsigned long long *crash_size, 1286 unsigned long long *crash_base) 1287 { 1288 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1289 "crashkernel=", NULL); 1290 } 1291 1292 int __init parse_crashkernel_high(char *cmdline, 1293 unsigned long long system_ram, 1294 unsigned long long *crash_size, 1295 unsigned long long *crash_base) 1296 { 1297 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1298 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 1299 } 1300 1301 int __init parse_crashkernel_low(char *cmdline, 1302 unsigned long long system_ram, 1303 unsigned long long *crash_size, 1304 unsigned long long *crash_base) 1305 { 1306 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1307 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 1308 } 1309 1310 static void update_vmcoreinfo_note(void) 1311 { 1312 u32 *buf = vmcoreinfo_note; 1313 1314 if (!vmcoreinfo_size) 1315 return; 1316 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 1317 vmcoreinfo_size); 1318 final_note(buf); 1319 } 1320 1321 void crash_save_vmcoreinfo(void) 1322 { 1323 vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); 1324 update_vmcoreinfo_note(); 1325 } 1326 1327 void vmcoreinfo_append_str(const char *fmt, ...) 1328 { 1329 va_list args; 1330 char buf[0x50]; 1331 size_t r; 1332 1333 va_start(args, fmt); 1334 r = vscnprintf(buf, sizeof(buf), fmt, args); 1335 va_end(args); 1336 1337 r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); 1338 1339 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1340 1341 vmcoreinfo_size += r; 1342 } 1343 1344 /* 1345 * provide an empty default implementation here -- architecture 1346 * code may override this 1347 */ 1348 void __weak arch_crash_save_vmcoreinfo(void) 1349 {} 1350 1351 unsigned long __weak paddr_vmcoreinfo_note(void) 1352 { 1353 return __pa((unsigned long)(char *)&vmcoreinfo_note); 1354 } 1355 1356 static int __init crash_save_vmcoreinfo_init(void) 1357 { 1358 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 1359 VMCOREINFO_PAGESIZE(PAGE_SIZE); 1360 1361 VMCOREINFO_SYMBOL(init_uts_ns); 1362 VMCOREINFO_SYMBOL(node_online_map); 1363 #ifdef CONFIG_MMU 1364 VMCOREINFO_SYMBOL(swapper_pg_dir); 1365 #endif 1366 VMCOREINFO_SYMBOL(_stext); 1367 VMCOREINFO_SYMBOL(vmap_area_list); 1368 1369 #ifndef CONFIG_NEED_MULTIPLE_NODES 1370 VMCOREINFO_SYMBOL(mem_map); 1371 VMCOREINFO_SYMBOL(contig_page_data); 1372 #endif 1373 #ifdef CONFIG_SPARSEMEM 1374 VMCOREINFO_SYMBOL(mem_section); 1375 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 1376 VMCOREINFO_STRUCT_SIZE(mem_section); 1377 VMCOREINFO_OFFSET(mem_section, section_mem_map); 1378 #endif 1379 VMCOREINFO_STRUCT_SIZE(page); 1380 VMCOREINFO_STRUCT_SIZE(pglist_data); 1381 VMCOREINFO_STRUCT_SIZE(zone); 1382 VMCOREINFO_STRUCT_SIZE(free_area); 1383 VMCOREINFO_STRUCT_SIZE(list_head); 1384 VMCOREINFO_SIZE(nodemask_t); 1385 VMCOREINFO_OFFSET(page, flags); 1386 VMCOREINFO_OFFSET(page, _count); 1387 VMCOREINFO_OFFSET(page, mapping); 1388 VMCOREINFO_OFFSET(page, lru); 1389 VMCOREINFO_OFFSET(page, _mapcount); 1390 VMCOREINFO_OFFSET(page, private); 1391 VMCOREINFO_OFFSET(pglist_data, node_zones); 1392 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1393 #ifdef CONFIG_FLAT_NODE_MEM_MAP 1394 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 1395 #endif 1396 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 1397 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 1398 VMCOREINFO_OFFSET(pglist_data, node_id); 1399 VMCOREINFO_OFFSET(zone, free_area); 1400 VMCOREINFO_OFFSET(zone, vm_stat); 1401 VMCOREINFO_OFFSET(zone, spanned_pages); 1402 VMCOREINFO_OFFSET(free_area, free_list); 1403 VMCOREINFO_OFFSET(list_head, next); 1404 VMCOREINFO_OFFSET(list_head, prev); 1405 VMCOREINFO_OFFSET(vmap_area, va_start); 1406 VMCOREINFO_OFFSET(vmap_area, list); 1407 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1408 log_buf_kexec_setup(); 1409 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1410 VMCOREINFO_NUMBER(NR_FREE_PAGES); 1411 VMCOREINFO_NUMBER(PG_lru); 1412 VMCOREINFO_NUMBER(PG_private); 1413 VMCOREINFO_NUMBER(PG_swapcache); 1414 VMCOREINFO_NUMBER(PG_slab); 1415 #ifdef CONFIG_MEMORY_FAILURE 1416 VMCOREINFO_NUMBER(PG_hwpoison); 1417 #endif 1418 VMCOREINFO_NUMBER(PG_head_mask); 1419 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1420 #ifdef CONFIG_X86 1421 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); 1422 #endif 1423 #ifdef CONFIG_HUGETLBFS 1424 VMCOREINFO_SYMBOL(free_huge_page); 1425 #endif 1426 1427 arch_crash_save_vmcoreinfo(); 1428 update_vmcoreinfo_note(); 1429 1430 return 0; 1431 } 1432 1433 subsys_initcall(crash_save_vmcoreinfo_init); 1434 1435 /* 1436 * Move into place and start executing a preloaded standalone 1437 * executable. If nothing was preloaded return an error. 1438 */ 1439 int kernel_kexec(void) 1440 { 1441 int error = 0; 1442 1443 if (!mutex_trylock(&kexec_mutex)) 1444 return -EBUSY; 1445 if (!kexec_image) { 1446 error = -EINVAL; 1447 goto Unlock; 1448 } 1449 1450 #ifdef CONFIG_KEXEC_JUMP 1451 if (kexec_image->preserve_context) { 1452 lock_system_sleep(); 1453 pm_prepare_console(); 1454 error = freeze_processes(); 1455 if (error) { 1456 error = -EBUSY; 1457 goto Restore_console; 1458 } 1459 suspend_console(); 1460 error = dpm_suspend_start(PMSG_FREEZE); 1461 if (error) 1462 goto Resume_console; 1463 /* At this point, dpm_suspend_start() has been called, 1464 * but *not* dpm_suspend_end(). We *must* call 1465 * dpm_suspend_end() now. Otherwise, drivers for 1466 * some devices (e.g. interrupt controllers) become 1467 * desynchronized with the actual state of the 1468 * hardware at resume time, and evil weirdness ensues. 1469 */ 1470 error = dpm_suspend_end(PMSG_FREEZE); 1471 if (error) 1472 goto Resume_devices; 1473 error = disable_nonboot_cpus(); 1474 if (error) 1475 goto Enable_cpus; 1476 local_irq_disable(); 1477 error = syscore_suspend(); 1478 if (error) 1479 goto Enable_irqs; 1480 } else 1481 #endif 1482 { 1483 kexec_in_progress = true; 1484 kernel_restart_prepare(NULL); 1485 migrate_to_reboot_cpu(); 1486 1487 /* 1488 * migrate_to_reboot_cpu() disables CPU hotplug assuming that 1489 * no further code needs to use CPU hotplug (which is true in 1490 * the reboot case). However, the kexec path depends on using 1491 * CPU hotplug again; so re-enable it here. 1492 */ 1493 cpu_hotplug_enable(); 1494 pr_emerg("Starting new kernel\n"); 1495 machine_shutdown(); 1496 } 1497 1498 machine_kexec(kexec_image); 1499 1500 #ifdef CONFIG_KEXEC_JUMP 1501 if (kexec_image->preserve_context) { 1502 syscore_resume(); 1503 Enable_irqs: 1504 local_irq_enable(); 1505 Enable_cpus: 1506 enable_nonboot_cpus(); 1507 dpm_resume_start(PMSG_RESTORE); 1508 Resume_devices: 1509 dpm_resume_end(PMSG_RESTORE); 1510 Resume_console: 1511 resume_console(); 1512 thaw_processes(); 1513 Restore_console: 1514 pm_restore_console(); 1515 unlock_system_sleep(); 1516 } 1517 #endif 1518 1519 Unlock: 1520 mutex_unlock(&kexec_mutex); 1521 return error; 1522 } 1523 1524 /* 1525 * Add and remove page tables for crashkernel memory 1526 * 1527 * Provide an empty default implementation here -- architecture 1528 * code may override this 1529 */ 1530 void __weak crash_map_reserved_pages(void) 1531 {} 1532 1533 void __weak crash_unmap_reserved_pages(void) 1534 {} 1535