1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * kexec for arm64 4 * 5 * Copyright (C) Linaro. 6 * Copyright (C) Huawei Futurewei Technologies. 7 */ 8 9 #include <linux/interrupt.h> 10 #include <linux/irq.h> 11 #include <linux/kernel.h> 12 #include <linux/kexec.h> 13 #include <linux/page-flags.h> 14 #include <linux/set_memory.h> 15 #include <linux/smp.h> 16 17 #include <asm/cacheflush.h> 18 #include <asm/cpu_ops.h> 19 #include <asm/daifflags.h> 20 #include <asm/memory.h> 21 #include <asm/mmu.h> 22 #include <asm/mmu_context.h> 23 #include <asm/page.h> 24 #include <asm/sections.h> 25 #include <asm/trans_pgd.h> 26 27 /** 28 * kexec_image_info - For debugging output. 29 */ 30 #define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) 31 static void _kexec_image_info(const char *func, int line, 32 const struct kimage *kimage) 33 { 34 unsigned long i; 35 36 pr_debug("%s:%d:\n", func, line); 37 pr_debug(" kexec kimage info:\n"); 38 pr_debug(" type: %d\n", kimage->type); 39 pr_debug(" start: %lx\n", kimage->start); 40 pr_debug(" head: %lx\n", kimage->head); 41 pr_debug(" nr_segments: %lu\n", kimage->nr_segments); 42 pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem); 43 pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); 44 pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors); 45 46 for (i = 0; i < kimage->nr_segments; i++) { 47 pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", 48 i, 49 kimage->segment[i].mem, 50 kimage->segment[i].mem + kimage->segment[i].memsz, 51 kimage->segment[i].memsz, 52 kimage->segment[i].memsz / PAGE_SIZE); 53 } 54 } 55 56 void machine_kexec_cleanup(struct kimage *kimage) 57 { 58 /* Empty routine needed to avoid build errors. */ 59 } 60 61 /** 62 * machine_kexec_prepare - Prepare for a kexec reboot. 63 * 64 * Called from the core kexec code when a kernel image is loaded. 65 * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus 66 * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). 67 */ 68 int machine_kexec_prepare(struct kimage *kimage) 69 { 70 if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { 71 pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); 72 return -EBUSY; 73 } 74 75 return 0; 76 } 77 78 /** 79 * kexec_segment_flush - Helper to flush the kimage segments to PoC. 80 */ 81 static void kexec_segment_flush(const struct kimage *kimage) 82 { 83 unsigned long i; 84 85 pr_debug("%s:\n", __func__); 86 87 for (i = 0; i < kimage->nr_segments; i++) { 88 pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", 89 i, 90 kimage->segment[i].mem, 91 kimage->segment[i].mem + kimage->segment[i].memsz, 92 kimage->segment[i].memsz, 93 kimage->segment[i].memsz / PAGE_SIZE); 94 95 dcache_clean_inval_poc( 96 (unsigned long)phys_to_virt(kimage->segment[i].mem), 97 (unsigned long)phys_to_virt(kimage->segment[i].mem) + 98 kimage->segment[i].memsz); 99 } 100 } 101 102 /* Allocates pages for kexec page table */ 103 static void *kexec_page_alloc(void *arg) 104 { 105 struct kimage *kimage = (struct kimage *)arg; 106 struct page *page = kimage_alloc_control_pages(kimage, 0); 107 108 if (!page) 109 return NULL; 110 111 memset(page_address(page), 0, PAGE_SIZE); 112 113 return page_address(page); 114 } 115 116 int machine_kexec_post_load(struct kimage *kimage) 117 { 118 int rc; 119 pgd_t *trans_pgd; 120 void *reloc_code = page_to_virt(kimage->control_code_page); 121 long reloc_size; 122 struct trans_pgd_info info = { 123 .trans_alloc_page = kexec_page_alloc, 124 .trans_alloc_arg = kimage, 125 }; 126 127 /* If in place, relocation is not used, only flush next kernel */ 128 if (kimage->head & IND_DONE) { 129 kexec_segment_flush(kimage); 130 kexec_image_info(kimage); 131 return 0; 132 } 133 134 kimage->arch.el2_vectors = 0; 135 if (is_hyp_nvhe()) { 136 rc = trans_pgd_copy_el2_vectors(&info, 137 &kimage->arch.el2_vectors); 138 if (rc) 139 return rc; 140 } 141 142 /* Create a copy of the linear map */ 143 trans_pgd = kexec_page_alloc(kimage); 144 if (!trans_pgd) 145 return -ENOMEM; 146 rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END); 147 if (rc) 148 return rc; 149 kimage->arch.ttbr1 = __pa(trans_pgd); 150 kimage->arch.zero_page = __pa_symbol(empty_zero_page); 151 152 reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; 153 memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); 154 kimage->arch.kern_reloc = __pa(reloc_code); 155 rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0, 156 &kimage->arch.t0sz, reloc_code); 157 if (rc) 158 return rc; 159 kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage; 160 161 /* Flush the reloc_code in preparation for its execution. */ 162 dcache_clean_inval_poc((unsigned long)reloc_code, 163 (unsigned long)reloc_code + reloc_size); 164 icache_inval_pou((uintptr_t)reloc_code, 165 (uintptr_t)reloc_code + reloc_size); 166 kexec_image_info(kimage); 167 168 return 0; 169 } 170 171 /** 172 * machine_kexec - Do the kexec reboot. 173 * 174 * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. 175 */ 176 void machine_kexec(struct kimage *kimage) 177 { 178 bool in_kexec_crash = (kimage == kexec_crash_image); 179 bool stuck_cpus = cpus_are_stuck_in_kernel(); 180 181 /* 182 * New cpus may have become stuck_in_kernel after we loaded the image. 183 */ 184 BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); 185 WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), 186 "Some CPUs may be stale, kdump will be unreliable.\n"); 187 188 pr_info("Bye!\n"); 189 190 local_daif_mask(); 191 192 /* 193 * Both restart and kernel_reloc will shutdown the MMU, disable data 194 * caches. However, restart will start new kernel or purgatory directly, 195 * kernel_reloc contains the body of arm64_relocate_new_kernel 196 * In kexec case, kimage->start points to purgatory assuming that 197 * kernel entry and dtb address are embedded in purgatory by 198 * userspace (kexec-tools). 199 * In kexec_file case, the kernel starts directly without purgatory. 200 */ 201 if (kimage->head & IND_DONE) { 202 typeof(cpu_soft_restart) *restart; 203 204 cpu_install_idmap(); 205 restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart)); 206 restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, 207 0, 0); 208 } else { 209 void (*kernel_reloc)(struct kimage *kimage); 210 211 if (is_hyp_nvhe()) 212 __hyp_set_vectors(kimage->arch.el2_vectors); 213 cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz); 214 kernel_reloc = (void *)kimage->arch.kern_reloc; 215 kernel_reloc(kimage); 216 } 217 218 BUG(); /* Should never get here. */ 219 } 220 221 static void machine_kexec_mask_interrupts(void) 222 { 223 unsigned int i; 224 struct irq_desc *desc; 225 226 for_each_irq_desc(i, desc) { 227 struct irq_chip *chip; 228 int ret; 229 230 chip = irq_desc_get_chip(desc); 231 if (!chip) 232 continue; 233 234 /* 235 * First try to remove the active state. If this 236 * fails, try to EOI the interrupt. 237 */ 238 ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); 239 240 if (ret && irqd_irq_inprogress(&desc->irq_data) && 241 chip->irq_eoi) 242 chip->irq_eoi(&desc->irq_data); 243 244 if (chip->irq_mask) 245 chip->irq_mask(&desc->irq_data); 246 247 if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) 248 chip->irq_disable(&desc->irq_data); 249 } 250 } 251 252 /** 253 * machine_crash_shutdown - shutdown non-crashing cpus and save registers 254 */ 255 void machine_crash_shutdown(struct pt_regs *regs) 256 { 257 local_irq_disable(); 258 259 /* shutdown non-crashing cpus */ 260 crash_smp_send_stop(); 261 262 /* for crashing cpu */ 263 crash_save_cpu(regs, smp_processor_id()); 264 machine_kexec_mask_interrupts(); 265 266 pr_info("Starting crashdump kernel...\n"); 267 } 268 269 void arch_kexec_protect_crashkres(void) 270 { 271 int i; 272 273 for (i = 0; i < kexec_crash_image->nr_segments; i++) 274 set_memory_valid( 275 __phys_to_virt(kexec_crash_image->segment[i].mem), 276 kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0); 277 } 278 279 void arch_kexec_unprotect_crashkres(void) 280 { 281 int i; 282 283 for (i = 0; i < kexec_crash_image->nr_segments; i++) 284 set_memory_valid( 285 __phys_to_virt(kexec_crash_image->segment[i].mem), 286 kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1); 287 } 288 289 #ifdef CONFIG_HIBERNATION 290 /* 291 * To preserve the crash dump kernel image, the relevant memory segments 292 * should be mapped again around the hibernation. 293 */ 294 void crash_prepare_suspend(void) 295 { 296 if (kexec_crash_image) 297 arch_kexec_unprotect_crashkres(); 298 } 299 300 void crash_post_resume(void) 301 { 302 if (kexec_crash_image) 303 arch_kexec_protect_crashkres(); 304 } 305 306 /* 307 * crash_is_nosave 308 * 309 * Return true only if a page is part of reserved memory for crash dump kernel, 310 * but does not hold any data of loaded kernel image. 311 * 312 * Note that all the pages in crash dump kernel memory have been initially 313 * marked as Reserved as memory was allocated via memblock_reserve(). 314 * 315 * In hibernation, the pages which are Reserved and yet "nosave" are excluded 316 * from the hibernation iamge. crash_is_nosave() does thich check for crash 317 * dump kernel and will reduce the total size of hibernation image. 318 */ 319 320 bool crash_is_nosave(unsigned long pfn) 321 { 322 int i; 323 phys_addr_t addr; 324 325 if (!crashk_res.end) 326 return false; 327 328 /* in reserved memory? */ 329 addr = __pfn_to_phys(pfn); 330 if ((addr < crashk_res.start) || (crashk_res.end < addr)) 331 return false; 332 333 if (!kexec_crash_image) 334 return true; 335 336 /* not part of loaded kernel image? */ 337 for (i = 0; i < kexec_crash_image->nr_segments; i++) 338 if (addr >= kexec_crash_image->segment[i].mem && 339 addr < (kexec_crash_image->segment[i].mem + 340 kexec_crash_image->segment[i].memsz)) 341 return false; 342 343 return true; 344 } 345 346 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) 347 { 348 unsigned long addr; 349 struct page *page; 350 351 for (addr = begin; addr < end; addr += PAGE_SIZE) { 352 page = phys_to_page(addr); 353 free_reserved_page(page); 354 } 355 } 356 #endif /* CONFIG_HIBERNATION */ 357