1 /* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 4 */ 5 #include <linux/kallsyms.h> 6 #include <linux/kprobes.h> 7 #include <linux/uaccess.h> 8 #include <linux/utsname.h> 9 #include <linux/hardirq.h> 10 #include <linux/kdebug.h> 11 #include <linux/module.h> 12 #include <linux/ptrace.h> 13 #include <linux/sched/debug.h> 14 #include <linux/sched/task_stack.h> 15 #include <linux/ftrace.h> 16 #include <linux/kexec.h> 17 #include <linux/bug.h> 18 #include <linux/nmi.h> 19 #include <linux/sysfs.h> 20 21 #include <asm/stacktrace.h> 22 #include <asm/unwind.h> 23 24 int panic_on_unrecovered_nmi; 25 int panic_on_io_nmi; 26 unsigned int code_bytes = 64; 27 static int die_counter; 28 29 bool in_task_stack(unsigned long *stack, struct task_struct *task, 30 struct stack_info *info) 31 { 32 unsigned long *begin = task_stack_page(task); 33 unsigned long *end = task_stack_page(task) + THREAD_SIZE; 34 35 if (stack < begin || stack >= end) 36 return false; 37 38 info->type = STACK_TYPE_TASK; 39 info->begin = begin; 40 info->end = end; 41 info->next_sp = NULL; 42 43 return true; 44 } 45 46 bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) 47 { 48 struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); 49 50 void *begin = ss; 51 void *end = ss + 1; 52 53 if ((void *)stack < begin || (void *)stack >= end) 54 return false; 55 56 info->type = STACK_TYPE_SYSENTER; 57 info->begin = begin; 58 info->end = end; 59 info->next_sp = NULL; 60 61 return true; 62 } 63 64 static void printk_stack_address(unsigned long address, int reliable, 65 char *log_lvl) 66 { 67 touch_nmi_watchdog(); 68 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); 69 } 70 71 void show_iret_regs(struct pt_regs *regs) 72 { 73 printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); 74 printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, 75 regs->sp, regs->flags); 76 } 77 78 static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) 79 { 80 if (on_stack(info, regs, sizeof(*regs))) 81 __show_regs(regs, 0); 82 else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, 83 IRET_FRAME_SIZE)) { 84 /* 85 * When an interrupt or exception occurs in entry code, the 86 * full pt_regs might not have been saved yet. In that case 87 * just print the iret frame. 88 */ 89 show_iret_regs(regs); 90 } 91 } 92 93 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 94 unsigned long *stack, char *log_lvl) 95 { 96 struct unwind_state state; 97 struct stack_info stack_info = {0}; 98 unsigned long visit_mask = 0; 99 int graph_idx = 0; 100 101 printk("%sCall Trace:\n", log_lvl); 102 103 unwind_start(&state, task, regs, stack); 104 stack = stack ? : get_stack_pointer(task, regs); 105 106 /* 107 * Iterate through the stacks, starting with the current stack pointer. 108 * Each stack has a pointer to the next one. 109 * 110 * x86-64 can have several stacks: 111 * - task stack 112 * - interrupt stack 113 * - HW exception stacks (double fault, nmi, debug, mce) 114 * - SYSENTER stack 115 * 116 * x86-32 can have up to four stacks: 117 * - task stack 118 * - softirq stack 119 * - hardirq stack 120 * - SYSENTER stack 121 */ 122 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 123 const char *stack_name; 124 125 if (get_stack_info(stack, task, &stack_info, &visit_mask)) { 126 /* 127 * We weren't on a valid stack. It's possible that 128 * we overflowed a valid stack into a guard page. 129 * See if the next page up is valid so that we can 130 * generate some kind of backtrace if this happens. 131 */ 132 stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); 133 if (get_stack_info(stack, task, &stack_info, &visit_mask)) 134 break; 135 } 136 137 stack_name = stack_type_name(stack_info.type); 138 if (stack_name) 139 printk("%s <%s>\n", log_lvl, stack_name); 140 141 if (regs) 142 show_regs_safe(&stack_info, regs); 143 144 /* 145 * Scan the stack, printing any text addresses we find. At the 146 * same time, follow proper stack frames with the unwinder. 147 * 148 * Addresses found during the scan which are not reported by 149 * the unwinder are considered to be additional clues which are 150 * sometimes useful for debugging and are prefixed with '?'. 151 * This also serves as a failsafe option in case the unwinder 152 * goes off in the weeds. 153 */ 154 for (; stack < stack_info.end; stack++) { 155 unsigned long real_addr; 156 int reliable = 0; 157 unsigned long addr = READ_ONCE_NOCHECK(*stack); 158 unsigned long *ret_addr_p = 159 unwind_get_return_address_ptr(&state); 160 161 if (!__kernel_text_address(addr)) 162 continue; 163 164 /* 165 * Don't print regs->ip again if it was already printed 166 * by show_regs_safe() below. 167 */ 168 if (regs && stack == ®s->ip) 169 goto next; 170 171 if (stack == ret_addr_p) 172 reliable = 1; 173 174 /* 175 * When function graph tracing is enabled for a 176 * function, its return address on the stack is 177 * replaced with the address of an ftrace handler 178 * (return_to_handler). In that case, before printing 179 * the "real" address, we want to print the handler 180 * address as an "unreliable" hint that function graph 181 * tracing was involved. 182 */ 183 real_addr = ftrace_graph_ret_addr(task, &graph_idx, 184 addr, stack); 185 if (real_addr != addr) 186 printk_stack_address(addr, 0, log_lvl); 187 printk_stack_address(real_addr, reliable, log_lvl); 188 189 if (!reliable) 190 continue; 191 192 next: 193 /* 194 * Get the next frame from the unwinder. No need to 195 * check for an error: if anything goes wrong, the rest 196 * of the addresses will just be printed as unreliable. 197 */ 198 unwind_next_frame(&state); 199 200 /* if the frame has entry regs, print them */ 201 regs = unwind_get_entry_regs(&state); 202 if (regs) 203 show_regs_safe(&stack_info, regs); 204 } 205 206 if (stack_name) 207 printk("%s </%s>\n", log_lvl, stack_name); 208 } 209 } 210 211 void show_stack(struct task_struct *task, unsigned long *sp) 212 { 213 task = task ? : current; 214 215 /* 216 * Stack frames below this one aren't interesting. Don't show them 217 * if we're printing for %current. 218 */ 219 if (!sp && task == current) 220 sp = get_stack_pointer(current, NULL); 221 222 show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT); 223 } 224 225 void show_stack_regs(struct pt_regs *regs) 226 { 227 show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); 228 } 229 230 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; 231 static int die_owner = -1; 232 static unsigned int die_nest_count; 233 234 unsigned long oops_begin(void) 235 { 236 int cpu; 237 unsigned long flags; 238 239 oops_enter(); 240 241 /* racy, but better than risking deadlock. */ 242 raw_local_irq_save(flags); 243 cpu = smp_processor_id(); 244 if (!arch_spin_trylock(&die_lock)) { 245 if (cpu == die_owner) 246 /* nested oops. should stop eventually */; 247 else 248 arch_spin_lock(&die_lock); 249 } 250 die_nest_count++; 251 die_owner = cpu; 252 console_verbose(); 253 bust_spinlocks(1); 254 return flags; 255 } 256 EXPORT_SYMBOL_GPL(oops_begin); 257 NOKPROBE_SYMBOL(oops_begin); 258 259 void __noreturn rewind_stack_do_exit(int signr); 260 261 void oops_end(unsigned long flags, struct pt_regs *regs, int signr) 262 { 263 if (regs && kexec_should_crash(current)) 264 crash_kexec(regs); 265 266 bust_spinlocks(0); 267 die_owner = -1; 268 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); 269 die_nest_count--; 270 if (!die_nest_count) 271 /* Nest count reaches zero, release the lock. */ 272 arch_spin_unlock(&die_lock); 273 raw_local_irq_restore(flags); 274 oops_exit(); 275 276 if (!signr) 277 return; 278 if (in_interrupt()) 279 panic("Fatal exception in interrupt"); 280 if (panic_on_oops) 281 panic("Fatal exception"); 282 283 /* 284 * We're not going to return, but we might be on an IST stack or 285 * have very little stack space left. Rewind the stack and kill 286 * the task. 287 */ 288 rewind_stack_do_exit(signr); 289 } 290 NOKPROBE_SYMBOL(oops_end); 291 292 int __die(const char *str, struct pt_regs *regs, long err) 293 { 294 #ifdef CONFIG_X86_32 295 unsigned short ss; 296 unsigned long sp; 297 #endif 298 printk(KERN_DEFAULT 299 "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, 300 IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", 301 IS_ENABLED(CONFIG_SMP) ? " SMP" : "", 302 debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", 303 IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); 304 305 if (notify_die(DIE_OOPS, str, regs, err, 306 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 307 return 1; 308 309 print_modules(); 310 show_regs(regs); 311 #ifdef CONFIG_X86_32 312 if (user_mode(regs)) { 313 sp = regs->sp; 314 ss = regs->ss; 315 } else { 316 sp = kernel_stack_pointer(regs); 317 savesegment(ss, ss); 318 } 319 printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n", 320 (void *)regs->ip, ss, sp); 321 #else 322 /* Executive summary in case the oops scrolled away */ 323 printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp); 324 #endif 325 return 0; 326 } 327 NOKPROBE_SYMBOL(__die); 328 329 /* 330 * This is gone through when something in the kernel has done something bad 331 * and is about to be terminated: 332 */ 333 void die(const char *str, struct pt_regs *regs, long err) 334 { 335 unsigned long flags = oops_begin(); 336 int sig = SIGSEGV; 337 338 if (__die(str, regs, err)) 339 sig = 0; 340 oops_end(flags, regs, sig); 341 } 342 343 static int __init code_bytes_setup(char *s) 344 { 345 ssize_t ret; 346 unsigned long val; 347 348 if (!s) 349 return -EINVAL; 350 351 ret = kstrtoul(s, 0, &val); 352 if (ret) 353 return ret; 354 355 code_bytes = val; 356 if (code_bytes > 8192) 357 code_bytes = 8192; 358 359 return 1; 360 } 361 __setup("code_bytes=", code_bytes_setup); 362