1d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2d28f6df1SGeoff Levand /*
3d28f6df1SGeoff Levand * kexec for arm64
4d28f6df1SGeoff Levand *
5d28f6df1SGeoff Levand * Copyright (C) Linaro.
6d28f6df1SGeoff Levand * Copyright (C) Huawei Futurewei Technologies.
7d28f6df1SGeoff Levand */
8d28f6df1SGeoff Levand
978fd584cSAKASHI Takahiro #include <linux/interrupt.h>
1078fd584cSAKASHI Takahiro #include <linux/irq.h>
1178fd584cSAKASHI Takahiro #include <linux/kernel.h>
12d28f6df1SGeoff Levand #include <linux/kexec.h>
13254a41c0SAKASHI Takahiro #include <linux/page-flags.h>
14*b7b4ce84SSimon Horman #include <linux/reboot.h>
156d47c23bSMike Rapoport #include <linux/set_memory.h>
16d28f6df1SGeoff Levand #include <linux/smp.h>
17d28f6df1SGeoff Levand
18d28f6df1SGeoff Levand #include <asm/cacheflush.h>
19d28f6df1SGeoff Levand #include <asm/cpu_ops.h>
200fbeb318SJames Morse #include <asm/daifflags.h>
2120a16624SAKASHI Takahiro #include <asm/memory.h>
2298d2e153STakahiro Akashi #include <asm/mmu.h>
23d28f6df1SGeoff Levand #include <asm/mmu_context.h>
2498d2e153STakahiro Akashi #include <asm/page.h>
2519a046f0SPasha Tatashin #include <asm/sections.h>
2608eae0efSPasha Tatashin #include <asm/trans_pgd.h>
27d28f6df1SGeoff Levand
28221f2c77SGeoff Levand /**
29221f2c77SGeoff Levand * kexec_image_info - For debugging output.
30221f2c77SGeoff Levand */
31221f2c77SGeoff Levand #define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
_kexec_image_info(const char * func,int line,const struct kimage * kimage)32221f2c77SGeoff Levand static void _kexec_image_info(const char *func, int line,
33221f2c77SGeoff Levand const struct kimage *kimage)
34221f2c77SGeoff Levand {
35221f2c77SGeoff Levand unsigned long i;
36221f2c77SGeoff Levand
37221f2c77SGeoff Levand pr_debug("%s:%d:\n", func, line);
38221f2c77SGeoff Levand pr_debug(" kexec kimage info:\n");
39221f2c77SGeoff Levand pr_debug(" type: %d\n", kimage->type);
40221f2c77SGeoff Levand pr_debug(" start: %lx\n", kimage->start);
41221f2c77SGeoff Levand pr_debug(" head: %lx\n", kimage->head);
42221f2c77SGeoff Levand pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
4308eae0efSPasha Tatashin pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem);
444c3c3123SPavel Tatashin pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
4508eae0efSPasha Tatashin pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
46221f2c77SGeoff Levand
47221f2c77SGeoff Levand for (i = 0; i < kimage->nr_segments; i++) {
48221f2c77SGeoff Levand pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
49221f2c77SGeoff Levand i,
50221f2c77SGeoff Levand kimage->segment[i].mem,
51221f2c77SGeoff Levand kimage->segment[i].mem + kimage->segment[i].memsz,
52221f2c77SGeoff Levand kimage->segment[i].memsz,
53221f2c77SGeoff Levand kimage->segment[i].memsz / PAGE_SIZE);
54221f2c77SGeoff Levand }
55221f2c77SGeoff Levand }
56221f2c77SGeoff Levand
machine_kexec_cleanup(struct kimage * kimage)57d28f6df1SGeoff Levand void machine_kexec_cleanup(struct kimage *kimage)
58d28f6df1SGeoff Levand {
59d28f6df1SGeoff Levand /* Empty routine needed to avoid build errors. */
60d28f6df1SGeoff Levand }
61d28f6df1SGeoff Levand
62d28f6df1SGeoff Levand /**
63d28f6df1SGeoff Levand * machine_kexec_prepare - Prepare for a kexec reboot.
64d28f6df1SGeoff Levand *
65d28f6df1SGeoff Levand * Called from the core kexec code when a kernel image is loaded.
66d28f6df1SGeoff Levand * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
67d28f6df1SGeoff Levand * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
68d28f6df1SGeoff Levand */
machine_kexec_prepare(struct kimage * kimage)69d28f6df1SGeoff Levand int machine_kexec_prepare(struct kimage *kimage)
70d28f6df1SGeoff Levand {
71d28f6df1SGeoff Levand if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
72d28f6df1SGeoff Levand pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
73d28f6df1SGeoff Levand return -EBUSY;
74d28f6df1SGeoff Levand }
75d28f6df1SGeoff Levand
76d28f6df1SGeoff Levand return 0;
77d28f6df1SGeoff Levand }
78d28f6df1SGeoff Levand
79d28f6df1SGeoff Levand /**
80d28f6df1SGeoff Levand * kexec_segment_flush - Helper to flush the kimage segments to PoC.
81d28f6df1SGeoff Levand */
kexec_segment_flush(const struct kimage * kimage)82d28f6df1SGeoff Levand static void kexec_segment_flush(const struct kimage *kimage)
83d28f6df1SGeoff Levand {
84d28f6df1SGeoff Levand unsigned long i;
85d28f6df1SGeoff Levand
86d28f6df1SGeoff Levand pr_debug("%s:\n", __func__);
87d28f6df1SGeoff Levand
88d28f6df1SGeoff Levand for (i = 0; i < kimage->nr_segments; i++) {
89d28f6df1SGeoff Levand pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
90d28f6df1SGeoff Levand i,
91d28f6df1SGeoff Levand kimage->segment[i].mem,
92d28f6df1SGeoff Levand kimage->segment[i].mem + kimage->segment[i].memsz,
93d28f6df1SGeoff Levand kimage->segment[i].memsz,
94d28f6df1SGeoff Levand kimage->segment[i].memsz / PAGE_SIZE);
95d28f6df1SGeoff Levand
96fade9c2cSFuad Tabba dcache_clean_inval_poc(
97814b1860SFuad Tabba (unsigned long)phys_to_virt(kimage->segment[i].mem),
98814b1860SFuad Tabba (unsigned long)phys_to_virt(kimage->segment[i].mem) +
99d28f6df1SGeoff Levand kimage->segment[i].memsz);
100d28f6df1SGeoff Levand }
101d28f6df1SGeoff Levand }
102d28f6df1SGeoff Levand
10308eae0efSPasha Tatashin /* Allocates pages for kexec page table */
kexec_page_alloc(void * arg)10408eae0efSPasha Tatashin static void *kexec_page_alloc(void *arg)
10508eae0efSPasha Tatashin {
1066915cffdSYu Zhe struct kimage *kimage = arg;
10708eae0efSPasha Tatashin struct page *page = kimage_alloc_control_pages(kimage, 0);
1087afccde3SRongwei Wang void *vaddr = NULL;
10908eae0efSPasha Tatashin
11008eae0efSPasha Tatashin if (!page)
11108eae0efSPasha Tatashin return NULL;
11208eae0efSPasha Tatashin
1137afccde3SRongwei Wang vaddr = page_address(page);
1147afccde3SRongwei Wang memset(vaddr, 0, PAGE_SIZE);
11508eae0efSPasha Tatashin
1167afccde3SRongwei Wang return vaddr;
11708eae0efSPasha Tatashin }
11808eae0efSPasha Tatashin
machine_kexec_post_load(struct kimage * kimage)1190d8732e4SPasha Tatashin int machine_kexec_post_load(struct kimage *kimage)
1200d8732e4SPasha Tatashin {
1213744b528SPasha Tatashin int rc;
1223744b528SPasha Tatashin pgd_t *trans_pgd;
1230d8732e4SPasha Tatashin void *reloc_code = page_to_virt(kimage->control_code_page);
12419a046f0SPasha Tatashin long reloc_size;
12508eae0efSPasha Tatashin struct trans_pgd_info info = {
12608eae0efSPasha Tatashin .trans_alloc_page = kexec_page_alloc,
12708eae0efSPasha Tatashin .trans_alloc_arg = kimage,
12808eae0efSPasha Tatashin };
1290d8732e4SPasha Tatashin
1305bb6834fSPasha Tatashin /* If in place, relocation is not used, only flush next kernel */
1315bb6834fSPasha Tatashin if (kimage->head & IND_DONE) {
1320d8732e4SPasha Tatashin kexec_segment_flush(kimage);
1335bb6834fSPasha Tatashin kexec_image_info(kimage);
1345bb6834fSPasha Tatashin return 0;
1355bb6834fSPasha Tatashin }
1360d8732e4SPasha Tatashin
13708eae0efSPasha Tatashin kimage->arch.el2_vectors = 0;
13808eae0efSPasha Tatashin if (is_hyp_nvhe()) {
1393744b528SPasha Tatashin rc = trans_pgd_copy_el2_vectors(&info,
14008eae0efSPasha Tatashin &kimage->arch.el2_vectors);
14108eae0efSPasha Tatashin if (rc)
14208eae0efSPasha Tatashin return rc;
14308eae0efSPasha Tatashin }
14408eae0efSPasha Tatashin
1453744b528SPasha Tatashin /* Create a copy of the linear map */
1463744b528SPasha Tatashin trans_pgd = kexec_page_alloc(kimage);
1473744b528SPasha Tatashin if (!trans_pgd)
1483744b528SPasha Tatashin return -ENOMEM;
1493744b528SPasha Tatashin rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END);
1503744b528SPasha Tatashin if (rc)
1513744b528SPasha Tatashin return rc;
1523744b528SPasha Tatashin kimage->arch.ttbr1 = __pa(trans_pgd);
1532f218324SMark Rutland kimage->arch.zero_page = __pa_symbol(empty_zero_page);
1543744b528SPasha Tatashin
15519a046f0SPasha Tatashin reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
15619a046f0SPasha Tatashin memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
1570d8732e4SPasha Tatashin kimage->arch.kern_reloc = __pa(reloc_code);
158efc2d0f2SPasha Tatashin rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0,
159efc2d0f2SPasha Tatashin &kimage->arch.t0sz, reloc_code);
160efc2d0f2SPasha Tatashin if (rc)
161efc2d0f2SPasha Tatashin return rc;
162efc2d0f2SPasha Tatashin kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage;
1630d8732e4SPasha Tatashin
1640d8732e4SPasha Tatashin /* Flush the reloc_code in preparation for its execution. */
1650d8732e4SPasha Tatashin dcache_clean_inval_poc((unsigned long)reloc_code,
16619a046f0SPasha Tatashin (unsigned long)reloc_code + reloc_size);
1670d8732e4SPasha Tatashin icache_inval_pou((uintptr_t)reloc_code,
16819a046f0SPasha Tatashin (uintptr_t)reloc_code + reloc_size);
1695bb6834fSPasha Tatashin kexec_image_info(kimage);
1700d8732e4SPasha Tatashin
1710d8732e4SPasha Tatashin return 0;
1720d8732e4SPasha Tatashin }
1730d8732e4SPasha Tatashin
174d28f6df1SGeoff Levand /**
175d28f6df1SGeoff Levand * machine_kexec - Do the kexec reboot.
176d28f6df1SGeoff Levand *
177d28f6df1SGeoff Levand * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
178d28f6df1SGeoff Levand */
machine_kexec(struct kimage * kimage)179d28f6df1SGeoff Levand void machine_kexec(struct kimage *kimage)
180d28f6df1SGeoff Levand {
18178fd584cSAKASHI Takahiro bool in_kexec_crash = (kimage == kexec_crash_image);
18278fd584cSAKASHI Takahiro bool stuck_cpus = cpus_are_stuck_in_kernel();
183d28f6df1SGeoff Levand
184d28f6df1SGeoff Levand /*
185d28f6df1SGeoff Levand * New cpus may have become stuck_in_kernel after we loaded the image.
186d28f6df1SGeoff Levand */
18778fd584cSAKASHI Takahiro BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
18878fd584cSAKASHI Takahiro WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
18978fd584cSAKASHI Takahiro "Some CPUs may be stale, kdump will be unreliable.\n");
190d28f6df1SGeoff Levand
191d28f6df1SGeoff Levand pr_info("Bye!\n");
192d28f6df1SGeoff Levand
1930fbeb318SJames Morse local_daif_mask();
194d28f6df1SGeoff Levand
195d28f6df1SGeoff Levand /*
196efc2d0f2SPasha Tatashin * Both restart and kernel_reloc will shutdown the MMU, disable data
1975bb6834fSPasha Tatashin * caches. However, restart will start new kernel or purgatory directly,
198efc2d0f2SPasha Tatashin * kernel_reloc contains the body of arm64_relocate_new_kernel
1994c9e7e64SAKASHI Takahiro * In kexec case, kimage->start points to purgatory assuming that
2004c9e7e64SAKASHI Takahiro * kernel entry and dtb address are embedded in purgatory by
2014c9e7e64SAKASHI Takahiro * userspace (kexec-tools).
2024c9e7e64SAKASHI Takahiro * In kexec_file case, the kernel starts directly without purgatory.
203d28f6df1SGeoff Levand */
2045bb6834fSPasha Tatashin if (kimage->head & IND_DONE) {
2057a2512faSPasha Tatashin typeof(cpu_soft_restart) *restart;
2065bb6834fSPasha Tatashin
2075bb6834fSPasha Tatashin cpu_install_idmap();
208607289a7SSami Tolvanen restart = (void *)__pa_symbol(cpu_soft_restart);
2095bb6834fSPasha Tatashin restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
2105bb6834fSPasha Tatashin 0, 0);
2115bb6834fSPasha Tatashin } else {
212efc2d0f2SPasha Tatashin void (*kernel_reloc)(struct kimage *kimage);
213efc2d0f2SPasha Tatashin
21408eae0efSPasha Tatashin if (is_hyp_nvhe())
21508eae0efSPasha Tatashin __hyp_set_vectors(kimage->arch.el2_vectors);
216efc2d0f2SPasha Tatashin cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz);
217efc2d0f2SPasha Tatashin kernel_reloc = (void *)kimage->arch.kern_reloc;
218efc2d0f2SPasha Tatashin kernel_reloc(kimage);
2195bb6834fSPasha Tatashin }
220d28f6df1SGeoff Levand
221d28f6df1SGeoff Levand BUG(); /* Should never get here. */
222d28f6df1SGeoff Levand }
223d28f6df1SGeoff Levand
machine_kexec_mask_interrupts(void)22478fd584cSAKASHI Takahiro static void machine_kexec_mask_interrupts(void)
22578fd584cSAKASHI Takahiro {
22678fd584cSAKASHI Takahiro unsigned int i;
22778fd584cSAKASHI Takahiro struct irq_desc *desc;
22878fd584cSAKASHI Takahiro
22978fd584cSAKASHI Takahiro for_each_irq_desc(i, desc) {
23078fd584cSAKASHI Takahiro struct irq_chip *chip;
23178fd584cSAKASHI Takahiro int ret;
23278fd584cSAKASHI Takahiro
23378fd584cSAKASHI Takahiro chip = irq_desc_get_chip(desc);
23478fd584cSAKASHI Takahiro if (!chip)
23578fd584cSAKASHI Takahiro continue;
23678fd584cSAKASHI Takahiro
23778fd584cSAKASHI Takahiro /*
23878fd584cSAKASHI Takahiro * First try to remove the active state. If this
23978fd584cSAKASHI Takahiro * fails, try to EOI the interrupt.
24078fd584cSAKASHI Takahiro */
24178fd584cSAKASHI Takahiro ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
24278fd584cSAKASHI Takahiro
24378fd584cSAKASHI Takahiro if (ret && irqd_irq_inprogress(&desc->irq_data) &&
24478fd584cSAKASHI Takahiro chip->irq_eoi)
24578fd584cSAKASHI Takahiro chip->irq_eoi(&desc->irq_data);
24678fd584cSAKASHI Takahiro
24778fd584cSAKASHI Takahiro if (chip->irq_mask)
24878fd584cSAKASHI Takahiro chip->irq_mask(&desc->irq_data);
24978fd584cSAKASHI Takahiro
25078fd584cSAKASHI Takahiro if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
25178fd584cSAKASHI Takahiro chip->irq_disable(&desc->irq_data);
25278fd584cSAKASHI Takahiro }
25378fd584cSAKASHI Takahiro }
25478fd584cSAKASHI Takahiro
25578fd584cSAKASHI Takahiro /**
25678fd584cSAKASHI Takahiro * machine_crash_shutdown - shutdown non-crashing cpus and save registers
25778fd584cSAKASHI Takahiro */
machine_crash_shutdown(struct pt_regs * regs)258d28f6df1SGeoff Levand void machine_crash_shutdown(struct pt_regs *regs)
259d28f6df1SGeoff Levand {
26078fd584cSAKASHI Takahiro local_irq_disable();
26178fd584cSAKASHI Takahiro
26278fd584cSAKASHI Takahiro /* shutdown non-crashing cpus */
263a88ce63bSHoeun Ryu crash_smp_send_stop();
26478fd584cSAKASHI Takahiro
26578fd584cSAKASHI Takahiro /* for crashing cpu */
26678fd584cSAKASHI Takahiro crash_save_cpu(regs, smp_processor_id());
26778fd584cSAKASHI Takahiro machine_kexec_mask_interrupts();
26878fd584cSAKASHI Takahiro
26978fd584cSAKASHI Takahiro pr_info("Starting crashdump kernel...\n");
270d28f6df1SGeoff Levand }
27198d2e153STakahiro Akashi
272254a41c0SAKASHI Takahiro #ifdef CONFIG_HIBERNATION
273254a41c0SAKASHI Takahiro /*
274254a41c0SAKASHI Takahiro * To preserve the crash dump kernel image, the relevant memory segments
275254a41c0SAKASHI Takahiro * should be mapped again around the hibernation.
276254a41c0SAKASHI Takahiro */
crash_prepare_suspend(void)277254a41c0SAKASHI Takahiro void crash_prepare_suspend(void)
278254a41c0SAKASHI Takahiro {
279254a41c0SAKASHI Takahiro if (kexec_crash_image)
280254a41c0SAKASHI Takahiro arch_kexec_unprotect_crashkres();
281254a41c0SAKASHI Takahiro }
282254a41c0SAKASHI Takahiro
crash_post_resume(void)283254a41c0SAKASHI Takahiro void crash_post_resume(void)
284254a41c0SAKASHI Takahiro {
285254a41c0SAKASHI Takahiro if (kexec_crash_image)
286254a41c0SAKASHI Takahiro arch_kexec_protect_crashkres();
287254a41c0SAKASHI Takahiro }
288254a41c0SAKASHI Takahiro
289254a41c0SAKASHI Takahiro /*
290254a41c0SAKASHI Takahiro * crash_is_nosave
291254a41c0SAKASHI Takahiro *
292254a41c0SAKASHI Takahiro * Return true only if a page is part of reserved memory for crash dump kernel,
293254a41c0SAKASHI Takahiro * but does not hold any data of loaded kernel image.
294254a41c0SAKASHI Takahiro *
295254a41c0SAKASHI Takahiro * Note that all the pages in crash dump kernel memory have been initially
296d9fa9d95SDavid Hildenbrand * marked as Reserved as memory was allocated via memblock_reserve().
297254a41c0SAKASHI Takahiro *
298254a41c0SAKASHI Takahiro * In hibernation, the pages which are Reserved and yet "nosave" are excluded
299254a41c0SAKASHI Takahiro * from the hibernation iamge. crash_is_nosave() does thich check for crash
300254a41c0SAKASHI Takahiro * dump kernel and will reduce the total size of hibernation image.
301254a41c0SAKASHI Takahiro */
302254a41c0SAKASHI Takahiro
crash_is_nosave(unsigned long pfn)303254a41c0SAKASHI Takahiro bool crash_is_nosave(unsigned long pfn)
304254a41c0SAKASHI Takahiro {
305254a41c0SAKASHI Takahiro int i;
306254a41c0SAKASHI Takahiro phys_addr_t addr;
307254a41c0SAKASHI Takahiro
308254a41c0SAKASHI Takahiro if (!crashk_res.end)
309254a41c0SAKASHI Takahiro return false;
310254a41c0SAKASHI Takahiro
311254a41c0SAKASHI Takahiro /* in reserved memory? */
312254a41c0SAKASHI Takahiro addr = __pfn_to_phys(pfn);
313944a45abSChen Zhou if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
314944a45abSChen Zhou if (!crashk_low_res.end)
315254a41c0SAKASHI Takahiro return false;
316254a41c0SAKASHI Takahiro
317944a45abSChen Zhou if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
318944a45abSChen Zhou return false;
319944a45abSChen Zhou }
320944a45abSChen Zhou
321254a41c0SAKASHI Takahiro if (!kexec_crash_image)
322254a41c0SAKASHI Takahiro return true;
323254a41c0SAKASHI Takahiro
324254a41c0SAKASHI Takahiro /* not part of loaded kernel image? */
325254a41c0SAKASHI Takahiro for (i = 0; i < kexec_crash_image->nr_segments; i++)
326254a41c0SAKASHI Takahiro if (addr >= kexec_crash_image->segment[i].mem &&
327254a41c0SAKASHI Takahiro addr < (kexec_crash_image->segment[i].mem +
328254a41c0SAKASHI Takahiro kexec_crash_image->segment[i].memsz))
329254a41c0SAKASHI Takahiro return false;
330254a41c0SAKASHI Takahiro
331254a41c0SAKASHI Takahiro return true;
332254a41c0SAKASHI Takahiro }
333254a41c0SAKASHI Takahiro
crash_free_reserved_phys_range(unsigned long begin,unsigned long end)334254a41c0SAKASHI Takahiro void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
335254a41c0SAKASHI Takahiro {
336254a41c0SAKASHI Takahiro unsigned long addr;
337254a41c0SAKASHI Takahiro struct page *page;
338254a41c0SAKASHI Takahiro
339254a41c0SAKASHI Takahiro for (addr = begin; addr < end; addr += PAGE_SIZE) {
340254a41c0SAKASHI Takahiro page = phys_to_page(addr);
341254a41c0SAKASHI Takahiro free_reserved_page(page);
342254a41c0SAKASHI Takahiro }
343254a41c0SAKASHI Takahiro }
344254a41c0SAKASHI Takahiro #endif /* CONFIG_HIBERNATION */
345