1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * kexec for arm64 4 * 5 * Copyright (C) Linaro. 6 * Copyright (C) Huawei Futurewei Technologies. 7 */ 8 9 #include <linux/interrupt.h> 10 #include <linux/irq.h> 11 #include <linux/kernel.h> 12 #include <linux/kexec.h> 13 #include <linux/page-flags.h> 14 #include <linux/smp.h> 15 16 #include <asm/cacheflush.h> 17 #include <asm/cpu_ops.h> 18 #include <asm/daifflags.h> 19 #include <asm/memory.h> 20 #include <asm/mmu.h> 21 #include <asm/mmu_context.h> 22 #include <asm/page.h> 23 24 #include "cpu-reset.h" 25 26 /* Global variables for the arm64_relocate_new_kernel routine. */ 27 extern const unsigned char arm64_relocate_new_kernel[]; 28 extern const unsigned long arm64_relocate_new_kernel_size; 29 30 /** 31 * kexec_image_info - For debugging output. 32 */ 33 #define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) 34 static void _kexec_image_info(const char *func, int line, 35 const struct kimage *kimage) 36 { 37 unsigned long i; 38 39 pr_debug("%s:%d:\n", func, line); 40 pr_debug(" kexec kimage info:\n"); 41 pr_debug(" type: %d\n", kimage->type); 42 pr_debug(" start: %lx\n", kimage->start); 43 pr_debug(" head: %lx\n", kimage->head); 44 pr_debug(" nr_segments: %lu\n", kimage->nr_segments); 45 pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); 46 47 for (i = 0; i < kimage->nr_segments; i++) { 48 pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", 49 i, 50 kimage->segment[i].mem, 51 kimage->segment[i].mem + kimage->segment[i].memsz, 52 kimage->segment[i].memsz, 53 kimage->segment[i].memsz / PAGE_SIZE); 54 } 55 } 56 57 void machine_kexec_cleanup(struct kimage *kimage) 58 { 59 /* Empty routine needed to avoid build errors. */ 60 } 61 62 int machine_kexec_post_load(struct kimage *kimage) 63 { 64 void *reloc_code = page_to_virt(kimage->control_code_page); 65 66 memcpy(reloc_code, arm64_relocate_new_kernel, 67 arm64_relocate_new_kernel_size); 68 kimage->arch.kern_reloc = __pa(reloc_code); 69 kexec_image_info(kimage); 70 71 /* 72 * For execution with the MMU off, reloc_code needs to be cleaned to the 73 * PoC and invalidated from the I-cache. 74 */ 75 dcache_clean_inval_poc((unsigned long)reloc_code, 76 (unsigned long)reloc_code + 77 arm64_relocate_new_kernel_size); 78 icache_inval_pou((uintptr_t)reloc_code, 79 (uintptr_t)reloc_code + 80 arm64_relocate_new_kernel_size); 81 82 return 0; 83 } 84 85 /** 86 * machine_kexec_prepare - Prepare for a kexec reboot. 87 * 88 * Called from the core kexec code when a kernel image is loaded. 89 * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus 90 * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). 91 */ 92 int machine_kexec_prepare(struct kimage *kimage) 93 { 94 if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { 95 pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); 96 return -EBUSY; 97 } 98 99 return 0; 100 } 101 102 /** 103 * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. 104 */ 105 static void kexec_list_flush(struct kimage *kimage) 106 { 107 kimage_entry_t *entry; 108 109 for (entry = &kimage->head; ; entry++) { 110 unsigned int flag; 111 unsigned long addr; 112 113 /* flush the list entries. */ 114 dcache_clean_inval_poc((unsigned long)entry, 115 (unsigned long)entry + 116 sizeof(kimage_entry_t)); 117 118 flag = *entry & IND_FLAGS; 119 if (flag == IND_DONE) 120 break; 121 122 addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); 123 124 switch (flag) { 125 case IND_INDIRECTION: 126 /* Set entry point just before the new list page. */ 127 entry = (kimage_entry_t *)addr - 1; 128 break; 129 case IND_SOURCE: 130 /* flush the source pages. */ 131 dcache_clean_inval_poc(addr, addr + PAGE_SIZE); 132 break; 133 case IND_DESTINATION: 134 break; 135 default: 136 BUG(); 137 } 138 } 139 } 140 141 /** 142 * kexec_segment_flush - Helper to flush the kimage segments to PoC. 143 */ 144 static void kexec_segment_flush(const struct kimage *kimage) 145 { 146 unsigned long i; 147 148 pr_debug("%s:\n", __func__); 149 150 for (i = 0; i < kimage->nr_segments; i++) { 151 pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", 152 i, 153 kimage->segment[i].mem, 154 kimage->segment[i].mem + kimage->segment[i].memsz, 155 kimage->segment[i].memsz, 156 kimage->segment[i].memsz / PAGE_SIZE); 157 158 dcache_clean_inval_poc( 159 (unsigned long)phys_to_virt(kimage->segment[i].mem), 160 (unsigned long)phys_to_virt(kimage->segment[i].mem) + 161 kimage->segment[i].memsz); 162 } 163 } 164 165 /** 166 * machine_kexec - Do the kexec reboot. 167 * 168 * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. 169 */ 170 void machine_kexec(struct kimage *kimage) 171 { 172 bool in_kexec_crash = (kimage == kexec_crash_image); 173 bool stuck_cpus = cpus_are_stuck_in_kernel(); 174 175 /* 176 * New cpus may have become stuck_in_kernel after we loaded the image. 177 */ 178 BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); 179 WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), 180 "Some CPUs may be stale, kdump will be unreliable.\n"); 181 182 /* Flush the kimage list and its buffers. */ 183 kexec_list_flush(kimage); 184 185 /* Flush the new image if already in place. */ 186 if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) 187 kexec_segment_flush(kimage); 188 189 pr_info("Bye!\n"); 190 191 local_daif_mask(); 192 193 /* 194 * cpu_soft_restart will shutdown the MMU, disable data caches, then 195 * transfer control to the kern_reloc which contains a copy of 196 * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel 197 * uses physical addressing to relocate the new image to its final 198 * position and transfers control to the image entry point when the 199 * relocation is complete. 200 * In kexec case, kimage->start points to purgatory assuming that 201 * kernel entry and dtb address are embedded in purgatory by 202 * userspace (kexec-tools). 203 * In kexec_file case, the kernel starts directly without purgatory. 204 */ 205 cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start, 206 kimage->arch.dtb_mem); 207 208 BUG(); /* Should never get here. */ 209 } 210 211 static void machine_kexec_mask_interrupts(void) 212 { 213 unsigned int i; 214 struct irq_desc *desc; 215 216 for_each_irq_desc(i, desc) { 217 struct irq_chip *chip; 218 int ret; 219 220 chip = irq_desc_get_chip(desc); 221 if (!chip) 222 continue; 223 224 /* 225 * First try to remove the active state. If this 226 * fails, try to EOI the interrupt. 227 */ 228 ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); 229 230 if (ret && irqd_irq_inprogress(&desc->irq_data) && 231 chip->irq_eoi) 232 chip->irq_eoi(&desc->irq_data); 233 234 if (chip->irq_mask) 235 chip->irq_mask(&desc->irq_data); 236 237 if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) 238 chip->irq_disable(&desc->irq_data); 239 } 240 } 241 242 /** 243 * machine_crash_shutdown - shutdown non-crashing cpus and save registers 244 */ 245 void machine_crash_shutdown(struct pt_regs *regs) 246 { 247 local_irq_disable(); 248 249 /* shutdown non-crashing cpus */ 250 crash_smp_send_stop(); 251 252 /* for crashing cpu */ 253 crash_save_cpu(regs, smp_processor_id()); 254 machine_kexec_mask_interrupts(); 255 256 pr_info("Starting crashdump kernel...\n"); 257 } 258 259 void arch_kexec_protect_crashkres(void) 260 { 261 int i; 262 263 kexec_segment_flush(kexec_crash_image); 264 265 for (i = 0; i < kexec_crash_image->nr_segments; i++) 266 set_memory_valid( 267 __phys_to_virt(kexec_crash_image->segment[i].mem), 268 kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0); 269 } 270 271 void arch_kexec_unprotect_crashkres(void) 272 { 273 int i; 274 275 for (i = 0; i < kexec_crash_image->nr_segments; i++) 276 set_memory_valid( 277 __phys_to_virt(kexec_crash_image->segment[i].mem), 278 kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1); 279 } 280 281 #ifdef CONFIG_HIBERNATION 282 /* 283 * To preserve the crash dump kernel image, the relevant memory segments 284 * should be mapped again around the hibernation. 285 */ 286 void crash_prepare_suspend(void) 287 { 288 if (kexec_crash_image) 289 arch_kexec_unprotect_crashkres(); 290 } 291 292 void crash_post_resume(void) 293 { 294 if (kexec_crash_image) 295 arch_kexec_protect_crashkres(); 296 } 297 298 /* 299 * crash_is_nosave 300 * 301 * Return true only if a page is part of reserved memory for crash dump kernel, 302 * but does not hold any data of loaded kernel image. 303 * 304 * Note that all the pages in crash dump kernel memory have been initially 305 * marked as Reserved as memory was allocated via memblock_reserve(). 306 * 307 * In hibernation, the pages which are Reserved and yet "nosave" are excluded 308 * from the hibernation iamge. crash_is_nosave() does thich check for crash 309 * dump kernel and will reduce the total size of hibernation image. 310 */ 311 312 bool crash_is_nosave(unsigned long pfn) 313 { 314 int i; 315 phys_addr_t addr; 316 317 if (!crashk_res.end) 318 return false; 319 320 /* in reserved memory? */ 321 addr = __pfn_to_phys(pfn); 322 if ((addr < crashk_res.start) || (crashk_res.end < addr)) 323 return false; 324 325 if (!kexec_crash_image) 326 return true; 327 328 /* not part of loaded kernel image? */ 329 for (i = 0; i < kexec_crash_image->nr_segments; i++) 330 if (addr >= kexec_crash_image->segment[i].mem && 331 addr < (kexec_crash_image->segment[i].mem + 332 kexec_crash_image->segment[i].memsz)) 333 return false; 334 335 return true; 336 } 337 338 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) 339 { 340 unsigned long addr; 341 struct page *page; 342 343 for (addr = begin; addr < end; addr += PAGE_SIZE) { 344 page = phys_to_page(addr); 345 free_reserved_page(page); 346 } 347 } 348 #endif /* CONFIG_HIBERNATION */ 349