1 /* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 4 */ 5 #include <linux/kallsyms.h> 6 #include <linux/kprobes.h> 7 #include <linux/uaccess.h> 8 #include <linux/utsname.h> 9 #include <linux/hardirq.h> 10 #include <linux/kdebug.h> 11 #include <linux/module.h> 12 #include <linux/ptrace.h> 13 #include <linux/sched/debug.h> 14 #include <linux/sched/task_stack.h> 15 #include <linux/ftrace.h> 16 #include <linux/kexec.h> 17 #include <linux/bug.h> 18 #include <linux/nmi.h> 19 #include <linux/sysfs.h> 20 21 #include <asm/cpu_entry_area.h> 22 #include <asm/stacktrace.h> 23 #include <asm/unwind.h> 24 25 int panic_on_unrecovered_nmi; 26 int panic_on_io_nmi; 27 static unsigned int code_bytes = 64; 28 static int die_counter; 29 30 bool in_task_stack(unsigned long *stack, struct task_struct *task, 31 struct stack_info *info) 32 { 33 unsigned long *begin = task_stack_page(task); 34 unsigned long *end = task_stack_page(task) + THREAD_SIZE; 35 36 if (stack < begin || stack >= end) 37 return false; 38 39 info->type = STACK_TYPE_TASK; 40 info->begin = begin; 41 info->end = end; 42 info->next_sp = NULL; 43 44 return true; 45 } 46 47 bool in_entry_stack(unsigned long *stack, struct stack_info *info) 48 { 49 struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); 50 51 void *begin = ss; 52 void *end = ss + 1; 53 54 if ((void *)stack < begin || (void *)stack >= end) 55 return false; 56 57 info->type = STACK_TYPE_ENTRY; 58 info->begin = begin; 59 info->end = end; 60 info->next_sp = NULL; 61 62 return true; 63 } 64 65 static void printk_stack_address(unsigned long address, int reliable, 66 char *log_lvl) 67 { 68 touch_nmi_watchdog(); 69 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); 70 } 71 72 void show_iret_regs(struct pt_regs *regs) 73 { 74 printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); 75 printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, 76 regs->sp, regs->flags); 77 } 78 79 static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, 80 bool partial) 81 { 82 /* 83 * These on_stack() checks aren't strictly necessary: the unwind code 84 * has already validated the 'regs' pointer. The checks are done for 85 * ordering reasons: if the registers are on the next stack, we don't 86 * want to print them out yet. Otherwise they'll be shown as part of 87 * the wrong stack. Later, when show_trace_log_lvl() switches to the 88 * next stack, this function will be called again with the same regs so 89 * they can be printed in the right context. 90 */ 91 if (!partial && on_stack(info, regs, sizeof(*regs))) { 92 __show_regs(regs, 0); 93 94 } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, 95 IRET_FRAME_SIZE)) { 96 /* 97 * When an interrupt or exception occurs in entry code, the 98 * full pt_regs might not have been saved yet. In that case 99 * just print the iret frame. 100 */ 101 show_iret_regs(regs); 102 } 103 } 104 105 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 106 unsigned long *stack, char *log_lvl) 107 { 108 struct unwind_state state; 109 struct stack_info stack_info = {0}; 110 unsigned long visit_mask = 0; 111 int graph_idx = 0; 112 bool partial = false; 113 114 printk("%sCall Trace:\n", log_lvl); 115 116 unwind_start(&state, task, regs, stack); 117 stack = stack ? : get_stack_pointer(task, regs); 118 regs = unwind_get_entry_regs(&state, &partial); 119 120 /* 121 * Iterate through the stacks, starting with the current stack pointer. 122 * Each stack has a pointer to the next one. 123 * 124 * x86-64 can have several stacks: 125 * - task stack 126 * - interrupt stack 127 * - HW exception stacks (double fault, nmi, debug, mce) 128 * - entry stack 129 * 130 * x86-32 can have up to four stacks: 131 * - task stack 132 * - softirq stack 133 * - hardirq stack 134 * - entry stack 135 */ 136 for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 137 const char *stack_name; 138 139 if (get_stack_info(stack, task, &stack_info, &visit_mask)) { 140 /* 141 * We weren't on a valid stack. It's possible that 142 * we overflowed a valid stack into a guard page. 143 * See if the next page up is valid so that we can 144 * generate some kind of backtrace if this happens. 145 */ 146 stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); 147 if (get_stack_info(stack, task, &stack_info, &visit_mask)) 148 break; 149 } 150 151 stack_name = stack_type_name(stack_info.type); 152 if (stack_name) 153 printk("%s <%s>\n", log_lvl, stack_name); 154 155 if (regs) 156 show_regs_if_on_stack(&stack_info, regs, partial); 157 158 /* 159 * Scan the stack, printing any text addresses we find. At the 160 * same time, follow proper stack frames with the unwinder. 161 * 162 * Addresses found during the scan which are not reported by 163 * the unwinder are considered to be additional clues which are 164 * sometimes useful for debugging and are prefixed with '?'. 165 * This also serves as a failsafe option in case the unwinder 166 * goes off in the weeds. 167 */ 168 for (; stack < stack_info.end; stack++) { 169 unsigned long real_addr; 170 int reliable = 0; 171 unsigned long addr = READ_ONCE_NOCHECK(*stack); 172 unsigned long *ret_addr_p = 173 unwind_get_return_address_ptr(&state); 174 175 if (!__kernel_text_address(addr)) 176 continue; 177 178 /* 179 * Don't print regs->ip again if it was already printed 180 * by show_regs_if_on_stack(). 181 */ 182 if (regs && stack == ®s->ip) 183 goto next; 184 185 if (stack == ret_addr_p) 186 reliable = 1; 187 188 /* 189 * When function graph tracing is enabled for a 190 * function, its return address on the stack is 191 * replaced with the address of an ftrace handler 192 * (return_to_handler). In that case, before printing 193 * the "real" address, we want to print the handler 194 * address as an "unreliable" hint that function graph 195 * tracing was involved. 196 */ 197 real_addr = ftrace_graph_ret_addr(task, &graph_idx, 198 addr, stack); 199 if (real_addr != addr) 200 printk_stack_address(addr, 0, log_lvl); 201 printk_stack_address(real_addr, reliable, log_lvl); 202 203 if (!reliable) 204 continue; 205 206 next: 207 /* 208 * Get the next frame from the unwinder. No need to 209 * check for an error: if anything goes wrong, the rest 210 * of the addresses will just be printed as unreliable. 211 */ 212 unwind_next_frame(&state); 213 214 /* if the frame has entry regs, print them */ 215 regs = unwind_get_entry_regs(&state, &partial); 216 if (regs) 217 show_regs_if_on_stack(&stack_info, regs, partial); 218 } 219 220 if (stack_name) 221 printk("%s </%s>\n", log_lvl, stack_name); 222 } 223 } 224 225 void show_stack(struct task_struct *task, unsigned long *sp) 226 { 227 task = task ? : current; 228 229 /* 230 * Stack frames below this one aren't interesting. Don't show them 231 * if we're printing for %current. 232 */ 233 if (!sp && task == current) 234 sp = get_stack_pointer(current, NULL); 235 236 show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT); 237 } 238 239 void show_stack_regs(struct pt_regs *regs) 240 { 241 show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); 242 } 243 244 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; 245 static int die_owner = -1; 246 static unsigned int die_nest_count; 247 248 unsigned long oops_begin(void) 249 { 250 int cpu; 251 unsigned long flags; 252 253 oops_enter(); 254 255 /* racy, but better than risking deadlock. */ 256 raw_local_irq_save(flags); 257 cpu = smp_processor_id(); 258 if (!arch_spin_trylock(&die_lock)) { 259 if (cpu == die_owner) 260 /* nested oops. should stop eventually */; 261 else 262 arch_spin_lock(&die_lock); 263 } 264 die_nest_count++; 265 die_owner = cpu; 266 console_verbose(); 267 bust_spinlocks(1); 268 return flags; 269 } 270 EXPORT_SYMBOL_GPL(oops_begin); 271 NOKPROBE_SYMBOL(oops_begin); 272 273 void __noreturn rewind_stack_do_exit(int signr); 274 275 void oops_end(unsigned long flags, struct pt_regs *regs, int signr) 276 { 277 if (regs && kexec_should_crash(current)) 278 crash_kexec(regs); 279 280 bust_spinlocks(0); 281 die_owner = -1; 282 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); 283 die_nest_count--; 284 if (!die_nest_count) 285 /* Nest count reaches zero, release the lock. */ 286 arch_spin_unlock(&die_lock); 287 raw_local_irq_restore(flags); 288 oops_exit(); 289 290 if (!signr) 291 return; 292 if (in_interrupt()) 293 panic("Fatal exception in interrupt"); 294 if (panic_on_oops) 295 panic("Fatal exception"); 296 297 /* 298 * We're not going to return, but we might be on an IST stack or 299 * have very little stack space left. Rewind the stack and kill 300 * the task. 301 */ 302 rewind_stack_do_exit(signr); 303 } 304 NOKPROBE_SYMBOL(oops_end); 305 306 int __die(const char *str, struct pt_regs *regs, long err) 307 { 308 #ifdef CONFIG_X86_32 309 unsigned short ss; 310 unsigned long sp; 311 #endif 312 printk(KERN_DEFAULT 313 "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, 314 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", 315 IS_ENABLED(CONFIG_SMP) ? " SMP" : "", 316 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", 317 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", 318 IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? 319 (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); 320 321 if (notify_die(DIE_OOPS, str, regs, err, 322 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 323 return 1; 324 325 print_modules(); 326 show_regs(regs); 327 #ifdef CONFIG_X86_32 328 if (user_mode(regs)) { 329 sp = regs->sp; 330 ss = regs->ss; 331 } else { 332 sp = kernel_stack_pointer(regs); 333 savesegment(ss, ss); 334 } 335 printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n", 336 (void *)regs->ip, ss, sp); 337 #else 338 /* Executive summary in case the oops scrolled away */ 339 printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp); 340 #endif 341 return 0; 342 } 343 NOKPROBE_SYMBOL(__die); 344 345 /* 346 * This is gone through when something in the kernel has done something bad 347 * and is about to be terminated: 348 */ 349 void die(const char *str, struct pt_regs *regs, long err) 350 { 351 unsigned long flags = oops_begin(); 352 int sig = SIGSEGV; 353 354 if (__die(str, regs, err)) 355 sig = 0; 356 oops_end(flags, regs, sig); 357 } 358 359 static int __init code_bytes_setup(char *s) 360 { 361 ssize_t ret; 362 unsigned long val; 363 364 if (!s) 365 return -EINVAL; 366 367 ret = kstrtoul(s, 0, &val); 368 if (ret) 369 return ret; 370 371 code_bytes = val; 372 if (code_bytes > 8192) 373 code_bytes = 8192; 374 375 return 1; 376 } 377 __setup("code_bytes=", code_bytes_setup); 378 379 void show_regs(struct pt_regs *regs) 380 { 381 bool all = true; 382 int i; 383 384 show_regs_print_info(KERN_DEFAULT); 385 386 if (IS_ENABLED(CONFIG_X86_32)) 387 all = !user_mode(regs); 388 389 __show_regs(regs, all); 390 391 /* 392 * When in-kernel, we also print out the stack and code at the 393 * time of the fault.. 394 */ 395 if (!user_mode(regs)) { 396 unsigned int code_prologue = code_bytes * 43 / 64; 397 unsigned int code_len = code_bytes; 398 unsigned char c; 399 u8 *ip; 400 401 show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); 402 403 printk(KERN_DEFAULT "Code: "); 404 405 ip = (u8 *)regs->ip - code_prologue; 406 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { 407 /* try starting at IP */ 408 ip = (u8 *)regs->ip; 409 code_len = code_len - code_prologue + 1; 410 } 411 for (i = 0; i < code_len; i++, ip++) { 412 if (ip < (u8 *)PAGE_OFFSET || 413 probe_kernel_address(ip, c)) { 414 pr_cont(" Bad RIP value."); 415 break; 416 } 417 if (ip == (u8 *)regs->ip) 418 pr_cont("<%02x> ", c); 419 else 420 pr_cont("%02x ", c); 421 } 422 } 423 pr_cont("\n"); 424 } 425