1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 1995 - 2000 by Ralf Baechle 7 */ 8 #include <linux/context_tracking.h> 9 #include <linux/signal.h> 10 #include <linux/sched.h> 11 #include <linux/interrupt.h> 12 #include <linux/kernel.h> 13 #include <linux/errno.h> 14 #include <linux/string.h> 15 #include <linux/types.h> 16 #include <linux/ptrace.h> 17 #include <linux/ratelimit.h> 18 #include <linux/mman.h> 19 #include <linux/mm.h> 20 #include <linux/smp.h> 21 #include <linux/module.h> 22 #include <linux/kprobes.h> 23 #include <linux/perf_event.h> 24 #include <linux/uaccess.h> 25 26 #include <asm/branch.h> 27 #include <asm/mmu_context.h> 28 #include <asm/ptrace.h> 29 #include <asm/highmem.h> /* For VMALLOC_END */ 30 #include <linux/kdebug.h> 31 32 int show_unhandled_signals = 1; 33 34 /* 35 * This routine handles page faults. It determines the address, 36 * and the problem, and then passes it off to one of the appropriate 37 * routines. 38 */ 39 static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, 40 unsigned long address) 41 { 42 struct vm_area_struct * vma = NULL; 43 struct task_struct *tsk = current; 44 struct mm_struct *mm = tsk->mm; 45 const int field = sizeof(unsigned long) * 2; 46 siginfo_t info; 47 int fault; 48 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 49 50 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); 51 52 #if 0 53 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), 54 current->comm, current->pid, field, address, write, 55 field, regs->cp0_epc); 56 #endif 57 58 #ifdef CONFIG_KPROBES 59 /* 60 * This is to notify the fault handler of the kprobes. The 61 * exception code is redundant as it is also carried in REGS, 62 * but we pass it anyhow. 63 */ 64 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, 65 (regs->cp0_cause >> 2) & 0x1f, SIGSEGV) == NOTIFY_STOP) 66 return; 67 #endif 68 69 info.si_code = SEGV_MAPERR; 70 71 /* 72 * We fault-in kernel-space virtual memory on-demand. The 73 * 'reference' page table is init_mm.pgd. 74 * 75 * NOTE! We MUST NOT take any locks for this case. We may 76 * be in an interrupt or a critical region, and should 77 * only copy the information from the master page table, 78 * nothing more. 79 */ 80 #ifdef CONFIG_64BIT 81 # define VMALLOC_FAULT_TARGET no_context 82 #else 83 # define VMALLOC_FAULT_TARGET vmalloc_fault 84 #endif 85 86 if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) 87 goto VMALLOC_FAULT_TARGET; 88 #ifdef MODULE_START 89 if (unlikely(address >= MODULE_START && address < MODULE_END)) 90 goto VMALLOC_FAULT_TARGET; 91 #endif 92 93 /* 94 * If we're in an interrupt or have no user 95 * context, we must not take the fault.. 96 */ 97 if (faulthandler_disabled() || !mm) 98 goto bad_area_nosemaphore; 99 100 if (user_mode(regs)) 101 flags |= FAULT_FLAG_USER; 102 retry: 103 down_read(&mm->mmap_sem); 104 vma = find_vma(mm, address); 105 if (!vma) 106 goto bad_area; 107 if (vma->vm_start <= address) 108 goto good_area; 109 if (!(vma->vm_flags & VM_GROWSDOWN)) 110 goto bad_area; 111 if (expand_stack(vma, address)) 112 goto bad_area; 113 /* 114 * Ok, we have a good vm_area for this memory access, so 115 * we can handle it.. 116 */ 117 good_area: 118 info.si_code = SEGV_ACCERR; 119 120 if (write) { 121 if (!(vma->vm_flags & VM_WRITE)) 122 goto bad_area; 123 flags |= FAULT_FLAG_WRITE; 124 } else { 125 if (cpu_has_rixi) { 126 if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { 127 #if 0 128 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n", 129 raw_smp_processor_id(), 130 current->comm, current->pid, 131 field, address, write, 132 field, regs->cp0_epc); 133 #endif 134 goto bad_area; 135 } 136 if (!(vma->vm_flags & VM_READ)) { 137 #if 0 138 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", 139 raw_smp_processor_id(), 140 current->comm, current->pid, 141 field, address, write, 142 field, regs->cp0_epc); 143 #endif 144 goto bad_area; 145 } 146 } else { 147 if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) 148 goto bad_area; 149 } 150 } 151 152 /* 153 * If for any reason at all we couldn't handle the fault, 154 * make sure we exit gracefully rather than endlessly redo 155 * the fault. 156 */ 157 fault = handle_mm_fault(mm, vma, address, flags); 158 159 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) 160 return; 161 162 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 163 if (unlikely(fault & VM_FAULT_ERROR)) { 164 if (fault & VM_FAULT_OOM) 165 goto out_of_memory; 166 else if (fault & VM_FAULT_SIGSEGV) 167 goto bad_area; 168 else if (fault & VM_FAULT_SIGBUS) 169 goto do_sigbus; 170 BUG(); 171 } 172 if (flags & FAULT_FLAG_ALLOW_RETRY) { 173 if (fault & VM_FAULT_MAJOR) { 174 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 175 regs, address); 176 tsk->maj_flt++; 177 } else { 178 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 179 regs, address); 180 tsk->min_flt++; 181 } 182 if (fault & VM_FAULT_RETRY) { 183 flags &= ~FAULT_FLAG_ALLOW_RETRY; 184 flags |= FAULT_FLAG_TRIED; 185 186 /* 187 * No need to up_read(&mm->mmap_sem) as we would 188 * have already released it in __lock_page_or_retry 189 * in mm/filemap.c. 190 */ 191 192 goto retry; 193 } 194 } 195 196 up_read(&mm->mmap_sem); 197 return; 198 199 /* 200 * Something tried to access memory that isn't in our memory map.. 201 * Fix it, but check if it's kernel or user first.. 202 */ 203 bad_area: 204 up_read(&mm->mmap_sem); 205 206 bad_area_nosemaphore: 207 /* User mode accesses just cause a SIGSEGV */ 208 if (user_mode(regs)) { 209 tsk->thread.cp0_badvaddr = address; 210 tsk->thread.error_code = write; 211 if (show_unhandled_signals && 212 unhandled_signal(tsk, SIGSEGV) && 213 __ratelimit(&ratelimit_state)) { 214 pr_info("\ndo_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx", 215 tsk->comm, 216 write ? "write access to" : "read access from", 217 field, address); 218 pr_info("epc = %0*lx in", field, 219 (unsigned long) regs->cp0_epc); 220 print_vma_addr(" ", regs->cp0_epc); 221 pr_info("ra = %0*lx in", field, 222 (unsigned long) regs->regs[31]); 223 print_vma_addr(" ", regs->regs[31]); 224 pr_info("\n"); 225 } 226 info.si_signo = SIGSEGV; 227 info.si_errno = 0; 228 /* info.si_code has been set above */ 229 info.si_addr = (void __user *) address; 230 force_sig_info(SIGSEGV, &info, tsk); 231 return; 232 } 233 234 no_context: 235 /* Are we prepared to handle this kernel fault? */ 236 if (fixup_exception(regs)) { 237 current->thread.cp0_baduaddr = address; 238 return; 239 } 240 241 /* 242 * Oops. The kernel tried to access some bad page. We'll have to 243 * terminate things with extreme prejudice. 244 */ 245 bust_spinlocks(1); 246 247 printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " 248 "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", 249 raw_smp_processor_id(), field, address, field, regs->cp0_epc, 250 field, regs->regs[31]); 251 die("Oops", regs); 252 253 out_of_memory: 254 /* 255 * We ran out of memory, call the OOM killer, and return the userspace 256 * (which will retry the fault, or kill us if we got oom-killed). 257 */ 258 up_read(&mm->mmap_sem); 259 if (!user_mode(regs)) 260 goto no_context; 261 pagefault_out_of_memory(); 262 return; 263 264 do_sigbus: 265 up_read(&mm->mmap_sem); 266 267 /* Kernel mode? Handle exceptions or die */ 268 if (!user_mode(regs)) 269 goto no_context; 270 else 271 /* 272 * Send a sigbus, regardless of whether we were in kernel 273 * or user mode. 274 */ 275 #if 0 276 printk("do_page_fault() #3: sending SIGBUS to %s for " 277 "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", 278 tsk->comm, 279 write ? "write access to" : "read access from", 280 field, address, 281 field, (unsigned long) regs->cp0_epc, 282 field, (unsigned long) regs->regs[31]); 283 #endif 284 tsk->thread.cp0_badvaddr = address; 285 info.si_signo = SIGBUS; 286 info.si_errno = 0; 287 info.si_code = BUS_ADRERR; 288 info.si_addr = (void __user *) address; 289 force_sig_info(SIGBUS, &info, tsk); 290 291 return; 292 #ifndef CONFIG_64BIT 293 vmalloc_fault: 294 { 295 /* 296 * Synchronize this task's top level page-table 297 * with the 'reference' page table. 298 * 299 * Do _not_ use "tsk" here. We might be inside 300 * an interrupt in the middle of a task switch.. 301 */ 302 int offset = __pgd_offset(address); 303 pgd_t *pgd, *pgd_k; 304 pud_t *pud, *pud_k; 305 pmd_t *pmd, *pmd_k; 306 pte_t *pte_k; 307 308 pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset; 309 pgd_k = init_mm.pgd + offset; 310 311 if (!pgd_present(*pgd_k)) 312 goto no_context; 313 set_pgd(pgd, *pgd_k); 314 315 pud = pud_offset(pgd, address); 316 pud_k = pud_offset(pgd_k, address); 317 if (!pud_present(*pud_k)) 318 goto no_context; 319 320 pmd = pmd_offset(pud, address); 321 pmd_k = pmd_offset(pud_k, address); 322 if (!pmd_present(*pmd_k)) 323 goto no_context; 324 set_pmd(pmd, *pmd_k); 325 326 pte_k = pte_offset_kernel(pmd_k, address); 327 if (!pte_present(*pte_k)) 328 goto no_context; 329 return; 330 } 331 #endif 332 } 333 334 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, 335 unsigned long write, unsigned long address) 336 { 337 enum ctx_state prev_state; 338 339 prev_state = exception_enter(); 340 __do_page_fault(regs, write, address); 341 exception_exit(prev_state); 342 } 343