14a03b2acSYouling Tang // SPDX-License-Identifier: GPL-2.0-only
24a03b2acSYouling Tang /*
34a03b2acSYouling Tang * machine_kexec.c for kexec
44a03b2acSYouling Tang *
54a03b2acSYouling Tang * Copyright (C) 2022 Loongson Technology Corporation Limited
64a03b2acSYouling Tang */
74a03b2acSYouling Tang #include <linux/compiler.h>
84a03b2acSYouling Tang #include <linux/cpu.h>
94a03b2acSYouling Tang #include <linux/kexec.h>
104e62d1d8SYouling Tang #include <linux/crash_dump.h>
114a03b2acSYouling Tang #include <linux/delay.h>
124e62d1d8SYouling Tang #include <linux/irq.h>
134a03b2acSYouling Tang #include <linux/libfdt.h>
144e62d1d8SYouling Tang #include <linux/mm.h>
154a03b2acSYouling Tang #include <linux/of_fdt.h>
164e62d1d8SYouling Tang #include <linux/reboot.h>
174e62d1d8SYouling Tang #include <linux/sched.h>
184e62d1d8SYouling Tang #include <linux/sched/task_stack.h>
194a03b2acSYouling Tang
204a03b2acSYouling Tang #include <asm/bootinfo.h>
214a03b2acSYouling Tang #include <asm/cacheflush.h>
224a03b2acSYouling Tang #include <asm/page.h>
234a03b2acSYouling Tang
244a03b2acSYouling Tang /* 0x100000 ~ 0x200000 is safe */
254a03b2acSYouling Tang #define KEXEC_CONTROL_CODE TO_CACHE(0x100000UL)
264a03b2acSYouling Tang #define KEXEC_CMDLINE_ADDR TO_CACHE(0x108000UL)
274a03b2acSYouling Tang
284a03b2acSYouling Tang static unsigned long reboot_code_buffer;
294e62d1d8SYouling Tang static cpumask_t cpus_in_crash = CPU_MASK_NONE;
304a03b2acSYouling Tang
314a03b2acSYouling Tang #ifdef CONFIG_SMP
324a03b2acSYouling Tang static void (*relocated_kexec_smp_wait)(void *);
334a03b2acSYouling Tang atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
344a03b2acSYouling Tang #endif
354a03b2acSYouling Tang
364a03b2acSYouling Tang static unsigned long efi_boot;
374a03b2acSYouling Tang static unsigned long cmdline_ptr;
384a03b2acSYouling Tang static unsigned long systable_ptr;
394a03b2acSYouling Tang static unsigned long start_addr;
404a03b2acSYouling Tang static unsigned long first_ind_entry;
414a03b2acSYouling Tang
kexec_image_info(const struct kimage * kimage)424a03b2acSYouling Tang static void kexec_image_info(const struct kimage *kimage)
434a03b2acSYouling Tang {
444a03b2acSYouling Tang unsigned long i;
454a03b2acSYouling Tang
464a03b2acSYouling Tang pr_debug("kexec kimage info:\n");
474a03b2acSYouling Tang pr_debug("\ttype: %d\n", kimage->type);
484a03b2acSYouling Tang pr_debug("\tstart: %lx\n", kimage->start);
494a03b2acSYouling Tang pr_debug("\thead: %lx\n", kimage->head);
504a03b2acSYouling Tang pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
514a03b2acSYouling Tang
524a03b2acSYouling Tang for (i = 0; i < kimage->nr_segments; i++) {
534a03b2acSYouling Tang pr_debug("\t segment[%lu]: %016lx - %016lx", i,
544a03b2acSYouling Tang kimage->segment[i].mem,
554a03b2acSYouling Tang kimage->segment[i].mem + kimage->segment[i].memsz);
564a03b2acSYouling Tang pr_debug("\t\t0x%lx bytes, %lu pages\n",
574a03b2acSYouling Tang (unsigned long)kimage->segment[i].memsz,
584a03b2acSYouling Tang (unsigned long)kimage->segment[i].memsz / PAGE_SIZE);
594a03b2acSYouling Tang }
604a03b2acSYouling Tang }
614a03b2acSYouling Tang
machine_kexec_prepare(struct kimage * kimage)624a03b2acSYouling Tang int machine_kexec_prepare(struct kimage *kimage)
634a03b2acSYouling Tang {
644a03b2acSYouling Tang int i;
654a03b2acSYouling Tang char *bootloader = "kexec";
664a03b2acSYouling Tang void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR;
674a03b2acSYouling Tang
684a03b2acSYouling Tang kexec_image_info(kimage);
694a03b2acSYouling Tang
704a03b2acSYouling Tang kimage->arch.efi_boot = fw_arg0;
714a03b2acSYouling Tang kimage->arch.systable_ptr = fw_arg2;
724a03b2acSYouling Tang
734a03b2acSYouling Tang /* Find the command line */
744a03b2acSYouling Tang for (i = 0; i < kimage->nr_segments; i++) {
754a03b2acSYouling Tang if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) {
764a03b2acSYouling Tang if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE))
774a03b2acSYouling Tang kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr;
784a03b2acSYouling Tang break;
794a03b2acSYouling Tang }
804a03b2acSYouling Tang }
814a03b2acSYouling Tang
824a03b2acSYouling Tang if (!kimage->arch.cmdline_ptr) {
834a03b2acSYouling Tang pr_err("Command line not included in the provided image\n");
844a03b2acSYouling Tang return -EINVAL;
854a03b2acSYouling Tang }
864a03b2acSYouling Tang
874e62d1d8SYouling Tang /* kexec/kdump need a safe page to save reboot_code_buffer */
884a03b2acSYouling Tang kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE);
894a03b2acSYouling Tang
904a03b2acSYouling Tang reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page);
914a03b2acSYouling Tang memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
924a03b2acSYouling Tang
934a03b2acSYouling Tang #ifdef CONFIG_SMP
944a03b2acSYouling Tang /* All secondary cpus now may jump to kexec_smp_wait cycle */
954a03b2acSYouling Tang relocated_kexec_smp_wait = reboot_code_buffer + (void *)(kexec_smp_wait - relocate_new_kernel);
964a03b2acSYouling Tang #endif
974a03b2acSYouling Tang
984a03b2acSYouling Tang return 0;
994a03b2acSYouling Tang }
1004a03b2acSYouling Tang
machine_kexec_cleanup(struct kimage * kimage)1014a03b2acSYouling Tang void machine_kexec_cleanup(struct kimage *kimage)
1024a03b2acSYouling Tang {
1034a03b2acSYouling Tang }
1044a03b2acSYouling Tang
kexec_reboot(void)1054a03b2acSYouling Tang void kexec_reboot(void)
1064a03b2acSYouling Tang {
1074a03b2acSYouling Tang do_kexec_t do_kexec = NULL;
1084a03b2acSYouling Tang
1094a03b2acSYouling Tang /*
1104a03b2acSYouling Tang * We know we were online, and there will be no incoming IPIs at
1114e62d1d8SYouling Tang * this point. Mark online again before rebooting so that the crash
1124e62d1d8SYouling Tang * analysis tool will see us correctly.
1134a03b2acSYouling Tang */
1144a03b2acSYouling Tang set_cpu_online(smp_processor_id(), true);
1154a03b2acSYouling Tang
1164a03b2acSYouling Tang /* Ensure remote CPUs observe that we're online before rebooting. */
1174a03b2acSYouling Tang smp_mb__after_atomic();
1184a03b2acSYouling Tang
1194a03b2acSYouling Tang /*
1204a03b2acSYouling Tang * Make sure we get correct instructions written by the
1214a03b2acSYouling Tang * machine_kexec_prepare() CPU.
1224a03b2acSYouling Tang */
1234a03b2acSYouling Tang __asm__ __volatile__ ("\tibar 0\n"::);
1244a03b2acSYouling Tang
1254a03b2acSYouling Tang #ifdef CONFIG_SMP
1264a03b2acSYouling Tang /* All secondary cpus go to kexec_smp_wait */
1274a03b2acSYouling Tang if (smp_processor_id() > 0) {
1284a03b2acSYouling Tang relocated_kexec_smp_wait(NULL);
129*7929d362STiezhu Yang BUG();
1304a03b2acSYouling Tang }
1314a03b2acSYouling Tang #endif
1324a03b2acSYouling Tang
1334a03b2acSYouling Tang do_kexec = (void *)reboot_code_buffer;
1344a03b2acSYouling Tang do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry);
1354a03b2acSYouling Tang
136*7929d362STiezhu Yang BUG();
1374a03b2acSYouling Tang }
1384a03b2acSYouling Tang
1394a03b2acSYouling Tang
1404a03b2acSYouling Tang #ifdef CONFIG_SMP
kexec_shutdown_secondary(void * regs)1414a03b2acSYouling Tang static void kexec_shutdown_secondary(void *regs)
1424a03b2acSYouling Tang {
1434a03b2acSYouling Tang int cpu = smp_processor_id();
1444a03b2acSYouling Tang
1454a03b2acSYouling Tang if (!cpu_online(cpu))
1464a03b2acSYouling Tang return;
1474a03b2acSYouling Tang
1484a03b2acSYouling Tang /* We won't be sent IPIs any more. */
1494a03b2acSYouling Tang set_cpu_online(cpu, false);
1504a03b2acSYouling Tang
1514a03b2acSYouling Tang local_irq_disable();
1524a03b2acSYouling Tang while (!atomic_read(&kexec_ready_to_reboot))
1534a03b2acSYouling Tang cpu_relax();
1544a03b2acSYouling Tang
1554a03b2acSYouling Tang kexec_reboot();
1564a03b2acSYouling Tang }
1574e62d1d8SYouling Tang
crash_shutdown_secondary(void * passed_regs)1584e62d1d8SYouling Tang static void crash_shutdown_secondary(void *passed_regs)
1594e62d1d8SYouling Tang {
1604e62d1d8SYouling Tang int cpu = smp_processor_id();
1614e62d1d8SYouling Tang struct pt_regs *regs = passed_regs;
1624e62d1d8SYouling Tang
1634e62d1d8SYouling Tang /*
1644e62d1d8SYouling Tang * If we are passed registers, use those. Otherwise get the
1654e62d1d8SYouling Tang * regs from the last interrupt, which should be correct, as
1664e62d1d8SYouling Tang * we are in an interrupt. But if the regs are not there,
1674e62d1d8SYouling Tang * pull them from the top of the stack. They are probably
1684e62d1d8SYouling Tang * wrong, but we need something to keep from crashing again.
1694e62d1d8SYouling Tang */
1704e62d1d8SYouling Tang if (!regs)
1714e62d1d8SYouling Tang regs = get_irq_regs();
1724e62d1d8SYouling Tang if (!regs)
1734e62d1d8SYouling Tang regs = task_pt_regs(current);
1744e62d1d8SYouling Tang
1754e62d1d8SYouling Tang if (!cpu_online(cpu))
1764e62d1d8SYouling Tang return;
1774e62d1d8SYouling Tang
1784e62d1d8SYouling Tang /* We won't be sent IPIs any more. */
1794e62d1d8SYouling Tang set_cpu_online(cpu, false);
1804e62d1d8SYouling Tang
1814e62d1d8SYouling Tang local_irq_disable();
1824e62d1d8SYouling Tang if (!cpumask_test_cpu(cpu, &cpus_in_crash))
1834e62d1d8SYouling Tang crash_save_cpu(regs, cpu);
1844e62d1d8SYouling Tang cpumask_set_cpu(cpu, &cpus_in_crash);
1854e62d1d8SYouling Tang
1864e62d1d8SYouling Tang while (!atomic_read(&kexec_ready_to_reboot))
1874e62d1d8SYouling Tang cpu_relax();
1884e62d1d8SYouling Tang
1894e62d1d8SYouling Tang kexec_reboot();
1904e62d1d8SYouling Tang }
1914e62d1d8SYouling Tang
crash_smp_send_stop(void)1924e62d1d8SYouling Tang void crash_smp_send_stop(void)
1934e62d1d8SYouling Tang {
1944e62d1d8SYouling Tang unsigned int ncpus;
1954e62d1d8SYouling Tang unsigned long timeout;
1964e62d1d8SYouling Tang static int cpus_stopped;
1974e62d1d8SYouling Tang
1984e62d1d8SYouling Tang /*
1994e62d1d8SYouling Tang * This function can be called twice in panic path, but obviously
2004e62d1d8SYouling Tang * we should execute this only once.
2014e62d1d8SYouling Tang */
2024e62d1d8SYouling Tang if (cpus_stopped)
2034e62d1d8SYouling Tang return;
2044e62d1d8SYouling Tang
2054e62d1d8SYouling Tang cpus_stopped = 1;
2064e62d1d8SYouling Tang
2074e62d1d8SYouling Tang /* Excluding the panic cpu */
2084e62d1d8SYouling Tang ncpus = num_online_cpus() - 1;
2094e62d1d8SYouling Tang
2104e62d1d8SYouling Tang smp_call_function(crash_shutdown_secondary, NULL, 0);
2114e62d1d8SYouling Tang smp_wmb();
2124e62d1d8SYouling Tang
2134e62d1d8SYouling Tang /*
2144e62d1d8SYouling Tang * The crash CPU sends an IPI and wait for other CPUs to
2154e62d1d8SYouling Tang * respond. Delay of at least 10 seconds.
2164e62d1d8SYouling Tang */
2174e62d1d8SYouling Tang timeout = MSEC_PER_SEC * 10;
2184e62d1d8SYouling Tang pr_emerg("Sending IPI to other cpus...\n");
2194e62d1d8SYouling Tang while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
2204e62d1d8SYouling Tang mdelay(1);
2214e62d1d8SYouling Tang cpu_relax();
2224e62d1d8SYouling Tang }
2234e62d1d8SYouling Tang }
2244e62d1d8SYouling Tang #endif /* defined(CONFIG_SMP) */
2254a03b2acSYouling Tang
machine_shutdown(void)2264a03b2acSYouling Tang void machine_shutdown(void)
2274a03b2acSYouling Tang {
2284a03b2acSYouling Tang int cpu;
2294a03b2acSYouling Tang
2304a03b2acSYouling Tang /* All CPUs go to reboot_code_buffer */
2314a03b2acSYouling Tang for_each_possible_cpu(cpu)
2324a03b2acSYouling Tang if (!cpu_online(cpu))
2334a03b2acSYouling Tang cpu_device_up(get_cpu_device(cpu));
2344a03b2acSYouling Tang
2354a03b2acSYouling Tang #ifdef CONFIG_SMP
2364a03b2acSYouling Tang smp_call_function(kexec_shutdown_secondary, NULL, 0);
2374a03b2acSYouling Tang #endif
2384a03b2acSYouling Tang }
2394a03b2acSYouling Tang
machine_crash_shutdown(struct pt_regs * regs)2404a03b2acSYouling Tang void machine_crash_shutdown(struct pt_regs *regs)
2414a03b2acSYouling Tang {
2424e62d1d8SYouling Tang int crashing_cpu;
2434e62d1d8SYouling Tang
2444e62d1d8SYouling Tang local_irq_disable();
2454e62d1d8SYouling Tang
2464e62d1d8SYouling Tang crashing_cpu = smp_processor_id();
2474e62d1d8SYouling Tang crash_save_cpu(regs, crashing_cpu);
2484e62d1d8SYouling Tang
2494e62d1d8SYouling Tang #ifdef CONFIG_SMP
2504e62d1d8SYouling Tang crash_smp_send_stop();
2514e62d1d8SYouling Tang #endif
2524e62d1d8SYouling Tang cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
2534e62d1d8SYouling Tang
2544e62d1d8SYouling Tang pr_info("Starting crashdump kernel...\n");
2554a03b2acSYouling Tang }
2564a03b2acSYouling Tang
machine_kexec(struct kimage * image)2574a03b2acSYouling Tang void machine_kexec(struct kimage *image)
2584a03b2acSYouling Tang {
2594a03b2acSYouling Tang unsigned long entry, *ptr;
2604a03b2acSYouling Tang struct kimage_arch *internal = &image->arch;
2614a03b2acSYouling Tang
2624a03b2acSYouling Tang efi_boot = internal->efi_boot;
2634a03b2acSYouling Tang cmdline_ptr = internal->cmdline_ptr;
2644a03b2acSYouling Tang systable_ptr = internal->systable_ptr;
2654a03b2acSYouling Tang
2664a03b2acSYouling Tang start_addr = (unsigned long)phys_to_virt(image->start);
2674a03b2acSYouling Tang
2684e62d1d8SYouling Tang first_ind_entry = (image->type == KEXEC_TYPE_DEFAULT) ?
2694e62d1d8SYouling Tang (unsigned long)phys_to_virt(image->head & PAGE_MASK) : 0;
2704a03b2acSYouling Tang
2714a03b2acSYouling Tang /*
2724a03b2acSYouling Tang * The generic kexec code builds a page list with physical
2734a03b2acSYouling Tang * addresses. they are directly accessible through XKPRANGE
2744a03b2acSYouling Tang * hence the phys_to_virt() call.
2754a03b2acSYouling Tang */
2764a03b2acSYouling Tang for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
2774a03b2acSYouling Tang ptr = (entry & IND_INDIRECTION) ?
2784a03b2acSYouling Tang phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
2794a03b2acSYouling Tang if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
2804a03b2acSYouling Tang *ptr & IND_DESTINATION)
2814a03b2acSYouling Tang *ptr = (unsigned long) phys_to_virt(*ptr);
2824a03b2acSYouling Tang }
2834a03b2acSYouling Tang
2844a03b2acSYouling Tang /* Mark offline before disabling local irq. */
2854a03b2acSYouling Tang set_cpu_online(smp_processor_id(), false);
2864a03b2acSYouling Tang
2874a03b2acSYouling Tang /* We do not want to be bothered. */
2884a03b2acSYouling Tang local_irq_disable();
2894a03b2acSYouling Tang
2904a03b2acSYouling Tang pr_notice("EFI boot flag 0x%lx\n", efi_boot);
2914a03b2acSYouling Tang pr_notice("Command line at 0x%lx\n", cmdline_ptr);
2924a03b2acSYouling Tang pr_notice("System table at 0x%lx\n", systable_ptr);
2934a03b2acSYouling Tang pr_notice("We will call new kernel at 0x%lx\n", start_addr);
2944a03b2acSYouling Tang pr_notice("Bye ...\n");
2954a03b2acSYouling Tang
2964a03b2acSYouling Tang /* Make reboot code buffer available to the boot CPU. */
2974a03b2acSYouling Tang flush_cache_all();
2984a03b2acSYouling Tang
2994a03b2acSYouling Tang #ifdef CONFIG_SMP
3004a03b2acSYouling Tang atomic_set(&kexec_ready_to_reboot, 1);
3014a03b2acSYouling Tang #endif
3024a03b2acSYouling Tang
3034a03b2acSYouling Tang kexec_reboot();
3044a03b2acSYouling Tang }
305