1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. 4 * Lennox Wu <lennox.wu@sunplusct.com> 5 * Chen Liqin <liqin.chen@sunplusct.com> 6 * Copyright (C) 2012 Regents of the University of California 7 */ 8 9 10 #include <linux/mm.h> 11 #include <linux/kernel.h> 12 #include <linux/interrupt.h> 13 #include <linux/perf_event.h> 14 #include <linux/signal.h> 15 #include <linux/uaccess.h> 16 #include <linux/kprobes.h> 17 #include <linux/kfence.h> 18 #include <linux/entry-common.h> 19 20 #include <asm/ptrace.h> 21 #include <asm/tlbflush.h> 22 23 #include "../kernel/head.h" 24 25 static void die_kernel_fault(const char *msg, unsigned long addr, 26 struct pt_regs *regs) 27 { 28 bust_spinlocks(1); 29 30 pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg, 31 addr); 32 33 bust_spinlocks(0); 34 die(regs, "Oops"); 35 make_task_dead(SIGKILL); 36 } 37 38 static inline void no_context(struct pt_regs *regs, unsigned long addr) 39 { 40 const char *msg; 41 42 /* Are we prepared to handle this kernel fault? */ 43 if (fixup_exception(regs)) 44 return; 45 46 /* 47 * Oops. The kernel tried to access some bad page. We'll have to 48 * terminate things with extreme prejudice. 49 */ 50 if (addr < PAGE_SIZE) 51 msg = "NULL pointer dereference"; 52 else { 53 if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs)) 54 return; 55 56 msg = "paging request"; 57 } 58 59 die_kernel_fault(msg, addr, regs); 60 } 61 62 static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) 63 { 64 if (!user_mode(regs)) { 65 no_context(regs, addr); 66 return; 67 } 68 69 if (fault & VM_FAULT_OOM) { 70 /* 71 * We ran out of memory, call the OOM killer, and return the userspace 72 * (which will retry the fault, or kill us if we got oom-killed). 73 */ 74 pagefault_out_of_memory(); 75 return; 76 } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { 77 /* Kernel mode? Handle exceptions or die */ 78 do_trap(regs, SIGBUS, BUS_ADRERR, addr); 79 return; 80 } else if (fault & VM_FAULT_SIGSEGV) { 81 do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); 82 return; 83 } 84 85 BUG(); 86 } 87 88 static inline void 89 bad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr) 90 { 91 /* 92 * Something tried to access memory that isn't in our memory map. 93 * Fix it, but check if it's kernel or user first. 94 */ 95 /* User mode accesses just cause a SIGSEGV */ 96 if (user_mode(regs)) { 97 do_trap(regs, SIGSEGV, code, addr); 98 return; 99 } 100 101 no_context(regs, addr); 102 } 103 104 static inline void 105 bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, 106 unsigned long addr) 107 { 108 mmap_read_unlock(mm); 109 110 bad_area_nosemaphore(regs, code, addr); 111 } 112 113 static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) 114 { 115 pgd_t *pgd, *pgd_k; 116 pud_t *pud_k; 117 p4d_t *p4d_k; 118 pmd_t *pmd_k; 119 pte_t *pte_k; 120 int index; 121 unsigned long pfn; 122 123 /* User mode accesses just cause a SIGSEGV */ 124 if (user_mode(regs)) 125 return do_trap(regs, SIGSEGV, code, addr); 126 127 /* 128 * Synchronize this task's top level page-table 129 * with the 'reference' page table. 130 * 131 * Do _not_ use "tsk->active_mm->pgd" here. 132 * We might be inside an interrupt in the middle 133 * of a task switch. 134 */ 135 index = pgd_index(addr); 136 pfn = csr_read(CSR_SATP) & SATP_PPN; 137 pgd = (pgd_t *)pfn_to_virt(pfn) + index; 138 pgd_k = init_mm.pgd + index; 139 140 if (!pgd_present(*pgd_k)) { 141 no_context(regs, addr); 142 return; 143 } 144 set_pgd(pgd, *pgd_k); 145 146 p4d_k = p4d_offset(pgd_k, addr); 147 if (!p4d_present(*p4d_k)) { 148 no_context(regs, addr); 149 return; 150 } 151 152 pud_k = pud_offset(p4d_k, addr); 153 if (!pud_present(*pud_k)) { 154 no_context(regs, addr); 155 return; 156 } 157 if (pud_leaf(*pud_k)) 158 goto flush_tlb; 159 160 /* 161 * Since the vmalloc area is global, it is unnecessary 162 * to copy individual PTEs 163 */ 164 pmd_k = pmd_offset(pud_k, addr); 165 if (!pmd_present(*pmd_k)) { 166 no_context(regs, addr); 167 return; 168 } 169 if (pmd_leaf(*pmd_k)) 170 goto flush_tlb; 171 172 /* 173 * Make sure the actual PTE exists as well to 174 * catch kernel vmalloc-area accesses to non-mapped 175 * addresses. If we don't do this, this will just 176 * silently loop forever. 177 */ 178 pte_k = pte_offset_kernel(pmd_k, addr); 179 if (!pte_present(*pte_k)) { 180 no_context(regs, addr); 181 return; 182 } 183 184 /* 185 * The kernel assumes that TLBs don't cache invalid 186 * entries, but in RISC-V, SFENCE.VMA specifies an 187 * ordering constraint, not a cache flush; it is 188 * necessary even after writing invalid entries. 189 */ 190 flush_tlb: 191 local_flush_tlb_page(addr); 192 } 193 194 static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) 195 { 196 switch (cause) { 197 case EXC_INST_PAGE_FAULT: 198 if (!(vma->vm_flags & VM_EXEC)) { 199 return true; 200 } 201 break; 202 case EXC_LOAD_PAGE_FAULT: 203 /* Write implies read */ 204 if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { 205 return true; 206 } 207 break; 208 case EXC_STORE_PAGE_FAULT: 209 if (!(vma->vm_flags & VM_WRITE)) { 210 return true; 211 } 212 break; 213 default: 214 panic("%s: unhandled cause %lu", __func__, cause); 215 } 216 return false; 217 } 218 219 /* 220 * This routine handles page faults. It determines the address and the 221 * problem, and then passes it off to one of the appropriate routines. 222 */ 223 void handle_page_fault(struct pt_regs *regs) 224 { 225 struct task_struct *tsk; 226 struct vm_area_struct *vma; 227 struct mm_struct *mm; 228 unsigned long addr, cause; 229 unsigned int flags = FAULT_FLAG_DEFAULT; 230 int code = SEGV_MAPERR; 231 vm_fault_t fault; 232 233 cause = regs->cause; 234 addr = regs->badaddr; 235 236 tsk = current; 237 mm = tsk->mm; 238 239 if (kprobe_page_fault(regs, cause)) 240 return; 241 242 /* 243 * Fault-in kernel-space virtual memory on-demand. 244 * The 'reference' page table is init_mm.pgd. 245 * 246 * NOTE! We MUST NOT take any locks for this case. We may 247 * be in an interrupt or a critical region, and should 248 * only copy the information from the master page table, 249 * nothing more. 250 */ 251 if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) && 252 unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) { 253 vmalloc_fault(regs, code, addr); 254 return; 255 } 256 257 /* Enable interrupts if they were enabled in the parent context. */ 258 if (!regs_irqs_disabled(regs)) 259 local_irq_enable(); 260 261 /* 262 * If we're in an interrupt, have no user context, or are running 263 * in an atomic region, then we must not take the fault. 264 */ 265 if (unlikely(faulthandler_disabled() || !mm)) { 266 tsk->thread.bad_cause = cause; 267 no_context(regs, addr); 268 return; 269 } 270 271 if (user_mode(regs)) 272 flags |= FAULT_FLAG_USER; 273 274 if (!user_mode(regs) && addr < TASK_SIZE && unlikely(!(regs->status & SR_SUM))) { 275 if (fixup_exception(regs)) 276 return; 277 278 die_kernel_fault("access to user memory without uaccess routines", addr, regs); 279 } 280 281 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 282 283 if (cause == EXC_STORE_PAGE_FAULT) 284 flags |= FAULT_FLAG_WRITE; 285 else if (cause == EXC_INST_PAGE_FAULT) 286 flags |= FAULT_FLAG_INSTRUCTION; 287 if (!(flags & FAULT_FLAG_USER)) 288 goto lock_mmap; 289 290 vma = lock_vma_under_rcu(mm, addr); 291 if (!vma) 292 goto lock_mmap; 293 294 if (unlikely(access_error(cause, vma))) { 295 vma_end_read(vma); 296 goto lock_mmap; 297 } 298 299 fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); 300 if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) 301 vma_end_read(vma); 302 303 if (!(fault & VM_FAULT_RETRY)) { 304 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 305 goto done; 306 } 307 count_vm_vma_lock_event(VMA_LOCK_RETRY); 308 309 if (fault_signal_pending(fault, regs)) { 310 if (!user_mode(regs)) 311 no_context(regs, addr); 312 return; 313 } 314 lock_mmap: 315 316 retry: 317 vma = lock_mm_and_find_vma(mm, addr, regs); 318 if (unlikely(!vma)) { 319 tsk->thread.bad_cause = cause; 320 bad_area_nosemaphore(regs, code, addr); 321 return; 322 } 323 324 /* 325 * Ok, we have a good vm_area for this memory access, so 326 * we can handle it. 327 */ 328 code = SEGV_ACCERR; 329 330 if (unlikely(access_error(cause, vma))) { 331 tsk->thread.bad_cause = cause; 332 bad_area(regs, mm, code, addr); 333 return; 334 } 335 336 /* 337 * If for any reason at all we could not handle the fault, 338 * make sure we exit gracefully rather than endlessly redo 339 * the fault. 340 */ 341 fault = handle_mm_fault(vma, addr, flags, regs); 342 343 /* 344 * If we need to retry but a fatal signal is pending, handle the 345 * signal first. We do not need to release the mmap_lock because it 346 * would already be released in __lock_page_or_retry in mm/filemap.c. 347 */ 348 if (fault_signal_pending(fault, regs)) { 349 if (!user_mode(regs)) 350 no_context(regs, addr); 351 return; 352 } 353 354 /* The fault is fully completed (including releasing mmap lock) */ 355 if (fault & VM_FAULT_COMPLETED) 356 return; 357 358 if (unlikely(fault & VM_FAULT_RETRY)) { 359 flags |= FAULT_FLAG_TRIED; 360 361 /* 362 * No need to mmap_read_unlock(mm) as we would 363 * have already released it in __lock_page_or_retry 364 * in mm/filemap.c. 365 */ 366 goto retry; 367 } 368 369 mmap_read_unlock(mm); 370 371 done: 372 if (unlikely(fault & VM_FAULT_ERROR)) { 373 tsk->thread.bad_cause = cause; 374 mm_fault_error(regs, addr, fault); 375 return; 376 } 377 return; 378 } 379