1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Based on arch/arm/mm/fault.c 4 * 5 * Copyright (C) 1995 Linus Torvalds 6 * Copyright (C) 1995-2004 Russell King 7 * Copyright (C) 2012 ARM Ltd. 8 */ 9 10 #include <linux/acpi.h> 11 #include <linux/extable.h> 12 #include <linux/signal.h> 13 #include <linux/mm.h> 14 #include <linux/hardirq.h> 15 #include <linux/init.h> 16 #include <linux/kprobes.h> 17 #include <linux/uaccess.h> 18 #include <linux/page-flags.h> 19 #include <linux/sched/signal.h> 20 #include <linux/sched/debug.h> 21 #include <linux/highmem.h> 22 #include <linux/perf_event.h> 23 #include <linux/preempt.h> 24 #include <linux/hugetlb.h> 25 26 #include <asm/acpi.h> 27 #include <asm/bug.h> 28 #include <asm/cmpxchg.h> 29 #include <asm/cpufeature.h> 30 #include <asm/exception.h> 31 #include <asm/daifflags.h> 32 #include <asm/debug-monitors.h> 33 #include <asm/esr.h> 34 #include <asm/kasan.h> 35 #include <asm/sysreg.h> 36 #include <asm/system_misc.h> 37 #include <asm/pgtable.h> 38 #include <asm/tlbflush.h> 39 #include <asm/traps.h> 40 41 struct fault_info { 42 int (*fn)(unsigned long addr, unsigned int esr, 43 struct pt_regs *regs); 44 int sig; 45 int code; 46 const char *name; 47 }; 48 49 static const struct fault_info fault_info[]; 50 static struct fault_info debug_fault_info[]; 51 52 static inline const struct fault_info *esr_to_fault_info(unsigned int esr) 53 { 54 return fault_info + (esr & ESR_ELx_FSC); 55 } 56 57 static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) 58 { 59 return debug_fault_info + DBG_ESR_EVT(esr); 60 } 61 62 static void data_abort_decode(unsigned int esr) 63 { 64 pr_alert("Data abort info:\n"); 65 66 if (esr & ESR_ELx_ISV) { 67 pr_alert(" Access size = %u byte(s)\n", 68 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT)); 69 pr_alert(" SSE = %lu, SRT = %lu\n", 70 (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT, 71 (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT); 72 pr_alert(" SF = %lu, AR = %lu\n", 73 (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, 74 (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); 75 } else { 76 pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); 77 } 78 79 pr_alert(" CM = %lu, WnR = %lu\n", 80 (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, 81 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); 82 } 83 84 static void mem_abort_decode(unsigned int esr) 85 { 86 pr_alert("Mem abort info:\n"); 87 88 pr_alert(" ESR = 0x%08x\n", esr); 89 pr_alert(" Exception class = %s, IL = %u bits\n", 90 esr_get_class_string(esr), 91 (esr & ESR_ELx_IL) ? 32 : 16); 92 pr_alert(" SET = %lu, FnV = %lu\n", 93 (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, 94 (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT); 95 pr_alert(" EA = %lu, S1PTW = %lu\n", 96 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, 97 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); 98 99 if (esr_is_data_abort(esr)) 100 data_abort_decode(esr); 101 } 102 103 static inline bool is_ttbr0_addr(unsigned long addr) 104 { 105 /* entry assembly clears tags for TTBR0 addrs */ 106 return addr < TASK_SIZE; 107 } 108 109 static inline bool is_ttbr1_addr(unsigned long addr) 110 { 111 /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ 112 return arch_kasan_reset_tag(addr) >= VA_START; 113 } 114 115 /* 116 * Dump out the page tables associated with 'addr' in the currently active mm. 117 */ 118 static void show_pte(unsigned long addr) 119 { 120 struct mm_struct *mm; 121 pgd_t *pgdp; 122 pgd_t pgd; 123 124 if (is_ttbr0_addr(addr)) { 125 /* TTBR0 */ 126 mm = current->active_mm; 127 if (mm == &init_mm) { 128 pr_alert("[%016lx] user address but active_mm is swapper\n", 129 addr); 130 return; 131 } 132 } else if (is_ttbr1_addr(addr)) { 133 /* TTBR1 */ 134 mm = &init_mm; 135 } else { 136 pr_alert("[%016lx] address between user and kernel address ranges\n", 137 addr); 138 return; 139 } 140 141 pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp=%016lx\n", 142 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, 143 mm == &init_mm ? VA_BITS : (int)vabits_user, 144 (unsigned long)virt_to_phys(mm->pgd)); 145 pgdp = pgd_offset(mm, addr); 146 pgd = READ_ONCE(*pgdp); 147 pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); 148 149 do { 150 pud_t *pudp, pud; 151 pmd_t *pmdp, pmd; 152 pte_t *ptep, pte; 153 154 if (pgd_none(pgd) || pgd_bad(pgd)) 155 break; 156 157 pudp = pud_offset(pgdp, addr); 158 pud = READ_ONCE(*pudp); 159 pr_cont(", pud=%016llx", pud_val(pud)); 160 if (pud_none(pud) || pud_bad(pud)) 161 break; 162 163 pmdp = pmd_offset(pudp, addr); 164 pmd = READ_ONCE(*pmdp); 165 pr_cont(", pmd=%016llx", pmd_val(pmd)); 166 if (pmd_none(pmd) || pmd_bad(pmd)) 167 break; 168 169 ptep = pte_offset_map(pmdp, addr); 170 pte = READ_ONCE(*ptep); 171 pr_cont(", pte=%016llx", pte_val(pte)); 172 pte_unmap(ptep); 173 } while(0); 174 175 pr_cont("\n"); 176 } 177 178 /* 179 * This function sets the access flags (dirty, accessed), as well as write 180 * permission, and only to a more permissive setting. 181 * 182 * It needs to cope with hardware update of the accessed/dirty state by other 183 * agents in the system and can safely skip the __sync_icache_dcache() call as, 184 * like set_pte_at(), the PTE is never changed from no-exec to exec here. 185 * 186 * Returns whether or not the PTE actually changed. 187 */ 188 int ptep_set_access_flags(struct vm_area_struct *vma, 189 unsigned long address, pte_t *ptep, 190 pte_t entry, int dirty) 191 { 192 pteval_t old_pteval, pteval; 193 pte_t pte = READ_ONCE(*ptep); 194 195 if (pte_same(pte, entry)) 196 return 0; 197 198 /* only preserve the access flags and write permission */ 199 pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; 200 201 /* 202 * Setting the flags must be done atomically to avoid racing with the 203 * hardware update of the access/dirty state. The PTE_RDONLY bit must 204 * be set to the most permissive (lowest value) of *ptep and entry 205 * (calculated as: a & b == ~(~a | ~b)). 206 */ 207 pte_val(entry) ^= PTE_RDONLY; 208 pteval = pte_val(pte); 209 do { 210 old_pteval = pteval; 211 pteval ^= PTE_RDONLY; 212 pteval |= pte_val(entry); 213 pteval ^= PTE_RDONLY; 214 pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); 215 } while (pteval != old_pteval); 216 217 flush_tlb_fix_spurious_fault(vma, address); 218 return 1; 219 } 220 221 static bool is_el1_instruction_abort(unsigned int esr) 222 { 223 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; 224 } 225 226 static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, 227 struct pt_regs *regs) 228 { 229 unsigned int ec = ESR_ELx_EC(esr); 230 unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; 231 232 if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) 233 return false; 234 235 if (fsc_type == ESR_ELx_FSC_PERM) 236 return true; 237 238 if (is_ttbr0_addr(addr) && system_uses_ttbr0_pan()) 239 return fsc_type == ESR_ELx_FSC_FAULT && 240 (regs->pstate & PSR_PAN_BIT); 241 242 return false; 243 } 244 245 static void die_kernel_fault(const char *msg, unsigned long addr, 246 unsigned int esr, struct pt_regs *regs) 247 { 248 bust_spinlocks(1); 249 250 pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, 251 addr); 252 253 mem_abort_decode(esr); 254 255 show_pte(addr); 256 die("Oops", regs, esr); 257 bust_spinlocks(0); 258 do_exit(SIGKILL); 259 } 260 261 static void __do_kernel_fault(unsigned long addr, unsigned int esr, 262 struct pt_regs *regs) 263 { 264 const char *msg; 265 266 /* 267 * Are we prepared to handle this kernel fault? 268 * We are almost certainly not prepared to handle instruction faults. 269 */ 270 if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) 271 return; 272 273 if (is_el1_permission_fault(addr, esr, regs)) { 274 if (esr & ESR_ELx_WNR) 275 msg = "write to read-only memory"; 276 else 277 msg = "read from unreadable memory"; 278 } else if (addr < PAGE_SIZE) { 279 msg = "NULL pointer dereference"; 280 } else { 281 msg = "paging request"; 282 } 283 284 die_kernel_fault(msg, addr, esr, regs); 285 } 286 287 static void set_thread_esr(unsigned long address, unsigned int esr) 288 { 289 current->thread.fault_address = address; 290 291 /* 292 * If the faulting address is in the kernel, we must sanitize the ESR. 293 * From userspace's point of view, kernel-only mappings don't exist 294 * at all, so we report them as level 0 translation faults. 295 * (This is not quite the way that "no mapping there at all" behaves: 296 * an alignment fault not caused by the memory type would take 297 * precedence over translation fault for a real access to empty 298 * space. Unfortunately we can't easily distinguish "alignment fault 299 * not caused by memory type" from "alignment fault caused by memory 300 * type", so we ignore this wrinkle and just return the translation 301 * fault.) 302 */ 303 if (!is_ttbr0_addr(current->thread.fault_address)) { 304 switch (ESR_ELx_EC(esr)) { 305 case ESR_ELx_EC_DABT_LOW: 306 /* 307 * These bits provide only information about the 308 * faulting instruction, which userspace knows already. 309 * We explicitly clear bits which are architecturally 310 * RES0 in case they are given meanings in future. 311 * We always report the ESR as if the fault was taken 312 * to EL1 and so ISV and the bits in ISS[23:14] are 313 * clear. (In fact it always will be a fault to EL1.) 314 */ 315 esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | 316 ESR_ELx_CM | ESR_ELx_WNR; 317 esr |= ESR_ELx_FSC_FAULT; 318 break; 319 case ESR_ELx_EC_IABT_LOW: 320 /* 321 * Claim a level 0 translation fault. 322 * All other bits are architecturally RES0 for faults 323 * reported with that DFSC value, so we clear them. 324 */ 325 esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; 326 esr |= ESR_ELx_FSC_FAULT; 327 break; 328 default: 329 /* 330 * This should never happen (entry.S only brings us 331 * into this code for insn and data aborts from a lower 332 * exception level). Fail safe by not providing an ESR 333 * context record at all. 334 */ 335 WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); 336 esr = 0; 337 break; 338 } 339 } 340 341 current->thread.fault_code = esr; 342 } 343 344 static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) 345 { 346 /* 347 * If we are in kernel mode at this point, we have no context to 348 * handle this fault with. 349 */ 350 if (user_mode(regs)) { 351 const struct fault_info *inf = esr_to_fault_info(esr); 352 353 set_thread_esr(addr, esr); 354 arm64_force_sig_fault(inf->sig, inf->code, (void __user *)addr, 355 inf->name); 356 } else { 357 __do_kernel_fault(addr, esr, regs); 358 } 359 } 360 361 #define VM_FAULT_BADMAP 0x010000 362 #define VM_FAULT_BADACCESS 0x020000 363 364 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, 365 unsigned int mm_flags, unsigned long vm_flags) 366 { 367 struct vm_area_struct *vma = find_vma(mm, addr); 368 369 if (unlikely(!vma)) 370 return VM_FAULT_BADMAP; 371 372 /* 373 * Ok, we have a good vm_area for this memory access, so we can handle 374 * it. 375 */ 376 if (unlikely(vma->vm_start > addr)) { 377 if (!(vma->vm_flags & VM_GROWSDOWN)) 378 return VM_FAULT_BADMAP; 379 if (expand_stack(vma, addr)) 380 return VM_FAULT_BADMAP; 381 } 382 383 /* 384 * Check that the permissions on the VMA allow for the fault which 385 * occurred. 386 */ 387 if (!(vma->vm_flags & vm_flags)) 388 return VM_FAULT_BADACCESS; 389 return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); 390 } 391 392 static bool is_el0_instruction_abort(unsigned int esr) 393 { 394 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; 395 } 396 397 /* 398 * Note: not valid for EL1 DC IVAC, but we never use that such that it 399 * should fault. EL0 cannot issue DC IVAC (undef). 400 */ 401 static bool is_write_abort(unsigned int esr) 402 { 403 return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); 404 } 405 406 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, 407 struct pt_regs *regs) 408 { 409 const struct fault_info *inf; 410 struct mm_struct *mm = current->mm; 411 vm_fault_t fault, major = 0; 412 unsigned long vm_flags = VM_READ | VM_WRITE; 413 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 414 415 if (kprobe_page_fault(regs, esr)) 416 return 0; 417 418 /* 419 * If we're in an interrupt or have no user context, we must not take 420 * the fault. 421 */ 422 if (faulthandler_disabled() || !mm) 423 goto no_context; 424 425 if (user_mode(regs)) 426 mm_flags |= FAULT_FLAG_USER; 427 428 if (is_el0_instruction_abort(esr)) { 429 vm_flags = VM_EXEC; 430 mm_flags |= FAULT_FLAG_INSTRUCTION; 431 } else if (is_write_abort(esr)) { 432 vm_flags = VM_WRITE; 433 mm_flags |= FAULT_FLAG_WRITE; 434 } 435 436 if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { 437 /* regs->orig_addr_limit may be 0 if we entered from EL0 */ 438 if (regs->orig_addr_limit == KERNEL_DS) 439 die_kernel_fault("access to user memory with fs=KERNEL_DS", 440 addr, esr, regs); 441 442 if (is_el1_instruction_abort(esr)) 443 die_kernel_fault("execution of user memory", 444 addr, esr, regs); 445 446 if (!search_exception_tables(regs->pc)) 447 die_kernel_fault("access to user memory outside uaccess routines", 448 addr, esr, regs); 449 } 450 451 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 452 453 /* 454 * As per x86, we may deadlock here. However, since the kernel only 455 * validly references user space from well defined areas of the code, 456 * we can bug out early if this is from code which shouldn't. 457 */ 458 if (!down_read_trylock(&mm->mmap_sem)) { 459 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 460 goto no_context; 461 retry: 462 down_read(&mm->mmap_sem); 463 } else { 464 /* 465 * The above down_read_trylock() might have succeeded in which 466 * case, we'll have missed the might_sleep() from down_read(). 467 */ 468 might_sleep(); 469 #ifdef CONFIG_DEBUG_VM 470 if (!user_mode(regs) && !search_exception_tables(regs->pc)) { 471 up_read(&mm->mmap_sem); 472 goto no_context; 473 } 474 #endif 475 } 476 477 fault = __do_page_fault(mm, addr, mm_flags, vm_flags); 478 major |= fault & VM_FAULT_MAJOR; 479 480 if (fault & VM_FAULT_RETRY) { 481 /* 482 * If we need to retry but a fatal signal is pending, 483 * handle the signal first. We do not need to release 484 * the mmap_sem because it would already be released 485 * in __lock_page_or_retry in mm/filemap.c. 486 */ 487 if (fatal_signal_pending(current)) { 488 if (!user_mode(regs)) 489 goto no_context; 490 return 0; 491 } 492 493 /* 494 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of 495 * starvation. 496 */ 497 if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { 498 mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; 499 mm_flags |= FAULT_FLAG_TRIED; 500 goto retry; 501 } 502 } 503 up_read(&mm->mmap_sem); 504 505 /* 506 * Handle the "normal" (no error) case first. 507 */ 508 if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | 509 VM_FAULT_BADACCESS)))) { 510 /* 511 * Major/minor page fault accounting is only done 512 * once. If we go through a retry, it is extremely 513 * likely that the page will be found in page cache at 514 * that point. 515 */ 516 if (major) { 517 current->maj_flt++; 518 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, 519 addr); 520 } else { 521 current->min_flt++; 522 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, 523 addr); 524 } 525 526 return 0; 527 } 528 529 /* 530 * If we are in kernel mode at this point, we have no context to 531 * handle this fault with. 532 */ 533 if (!user_mode(regs)) 534 goto no_context; 535 536 if (fault & VM_FAULT_OOM) { 537 /* 538 * We ran out of memory, call the OOM killer, and return to 539 * userspace (which will retry the fault, or kill us if we got 540 * oom-killed). 541 */ 542 pagefault_out_of_memory(); 543 return 0; 544 } 545 546 inf = esr_to_fault_info(esr); 547 set_thread_esr(addr, esr); 548 if (fault & VM_FAULT_SIGBUS) { 549 /* 550 * We had some memory, but were unable to successfully fix up 551 * this page fault. 552 */ 553 arm64_force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, 554 inf->name); 555 } else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) { 556 unsigned int lsb; 557 558 lsb = PAGE_SHIFT; 559 if (fault & VM_FAULT_HWPOISON_LARGE) 560 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); 561 562 arm64_force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr, lsb, 563 inf->name); 564 } else { 565 /* 566 * Something tried to access memory that isn't in our memory 567 * map. 568 */ 569 arm64_force_sig_fault(SIGSEGV, 570 fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, 571 (void __user *)addr, 572 inf->name); 573 } 574 575 return 0; 576 577 no_context: 578 __do_kernel_fault(addr, esr, regs); 579 return 0; 580 } 581 582 static int __kprobes do_translation_fault(unsigned long addr, 583 unsigned int esr, 584 struct pt_regs *regs) 585 { 586 if (is_ttbr0_addr(addr)) 587 return do_page_fault(addr, esr, regs); 588 589 do_bad_area(addr, esr, regs); 590 return 0; 591 } 592 593 static int do_alignment_fault(unsigned long addr, unsigned int esr, 594 struct pt_regs *regs) 595 { 596 do_bad_area(addr, esr, regs); 597 return 0; 598 } 599 600 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) 601 { 602 return 1; /* "fault" */ 603 } 604 605 static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) 606 { 607 const struct fault_info *inf; 608 void __user *siaddr; 609 610 inf = esr_to_fault_info(esr); 611 612 /* 613 * Return value ignored as we rely on signal merging. 614 * Future patches will make this more robust. 615 */ 616 apei_claim_sea(regs); 617 618 if (esr & ESR_ELx_FnV) 619 siaddr = NULL; 620 else 621 siaddr = (void __user *)addr; 622 arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr); 623 624 return 0; 625 } 626 627 static const struct fault_info fault_info[] = { 628 { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, 629 { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, 630 { do_bad, SIGKILL, SI_KERNEL, "level 2 address size fault" }, 631 { do_bad, SIGKILL, SI_KERNEL, "level 3 address size fault" }, 632 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, 633 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, 634 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, 635 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 636 { do_bad, SIGKILL, SI_KERNEL, "unknown 8" }, 637 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, 638 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, 639 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, 640 { do_bad, SIGKILL, SI_KERNEL, "unknown 12" }, 641 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, 642 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, 643 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, 644 { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, 645 { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, 646 { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, 647 { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, 648 { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, 649 { do_sea, SIGKILL, SI_KERNEL, "level 1 (translation table walk)" }, 650 { do_sea, SIGKILL, SI_KERNEL, "level 2 (translation table walk)" }, 651 { do_sea, SIGKILL, SI_KERNEL, "level 3 (translation table walk)" }, 652 { do_sea, SIGBUS, BUS_OBJERR, "synchronous parity or ECC error" }, // Reserved when RAS is implemented 653 { do_bad, SIGKILL, SI_KERNEL, "unknown 25" }, 654 { do_bad, SIGKILL, SI_KERNEL, "unknown 26" }, 655 { do_bad, SIGKILL, SI_KERNEL, "unknown 27" }, 656 { do_sea, SIGKILL, SI_KERNEL, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 657 { do_sea, SIGKILL, SI_KERNEL, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 658 { do_sea, SIGKILL, SI_KERNEL, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 659 { do_sea, SIGKILL, SI_KERNEL, "level 3 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 660 { do_bad, SIGKILL, SI_KERNEL, "unknown 32" }, 661 { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, 662 { do_bad, SIGKILL, SI_KERNEL, "unknown 34" }, 663 { do_bad, SIGKILL, SI_KERNEL, "unknown 35" }, 664 { do_bad, SIGKILL, SI_KERNEL, "unknown 36" }, 665 { do_bad, SIGKILL, SI_KERNEL, "unknown 37" }, 666 { do_bad, SIGKILL, SI_KERNEL, "unknown 38" }, 667 { do_bad, SIGKILL, SI_KERNEL, "unknown 39" }, 668 { do_bad, SIGKILL, SI_KERNEL, "unknown 40" }, 669 { do_bad, SIGKILL, SI_KERNEL, "unknown 41" }, 670 { do_bad, SIGKILL, SI_KERNEL, "unknown 42" }, 671 { do_bad, SIGKILL, SI_KERNEL, "unknown 43" }, 672 { do_bad, SIGKILL, SI_KERNEL, "unknown 44" }, 673 { do_bad, SIGKILL, SI_KERNEL, "unknown 45" }, 674 { do_bad, SIGKILL, SI_KERNEL, "unknown 46" }, 675 { do_bad, SIGKILL, SI_KERNEL, "unknown 47" }, 676 { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" }, 677 { do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" }, 678 { do_bad, SIGKILL, SI_KERNEL, "unknown 50" }, 679 { do_bad, SIGKILL, SI_KERNEL, "unknown 51" }, 680 { do_bad, SIGKILL, SI_KERNEL, "implementation fault (lockdown abort)" }, 681 { do_bad, SIGBUS, BUS_OBJERR, "implementation fault (unsupported exclusive)" }, 682 { do_bad, SIGKILL, SI_KERNEL, "unknown 54" }, 683 { do_bad, SIGKILL, SI_KERNEL, "unknown 55" }, 684 { do_bad, SIGKILL, SI_KERNEL, "unknown 56" }, 685 { do_bad, SIGKILL, SI_KERNEL, "unknown 57" }, 686 { do_bad, SIGKILL, SI_KERNEL, "unknown 58" }, 687 { do_bad, SIGKILL, SI_KERNEL, "unknown 59" }, 688 { do_bad, SIGKILL, SI_KERNEL, "unknown 60" }, 689 { do_bad, SIGKILL, SI_KERNEL, "section domain fault" }, 690 { do_bad, SIGKILL, SI_KERNEL, "page domain fault" }, 691 { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, 692 }; 693 694 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, 695 struct pt_regs *regs) 696 { 697 const struct fault_info *inf = esr_to_fault_info(esr); 698 699 if (!inf->fn(addr, esr, regs)) 700 return; 701 702 if (!user_mode(regs)) { 703 pr_alert("Unhandled fault at 0x%016lx\n", addr); 704 mem_abort_decode(esr); 705 show_pte(addr); 706 } 707 708 arm64_notify_die(inf->name, regs, 709 inf->sig, inf->code, (void __user *)addr, esr); 710 } 711 712 asmlinkage void __exception do_el0_irq_bp_hardening(void) 713 { 714 /* PC has already been checked in entry.S */ 715 arm64_apply_bp_hardening(); 716 } 717 718 asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, 719 unsigned int esr, 720 struct pt_regs *regs) 721 { 722 /* 723 * We've taken an instruction abort from userspace and not yet 724 * re-enabled IRQs. If the address is a kernel address, apply 725 * BP hardening prior to enabling IRQs and pre-emption. 726 */ 727 if (!is_ttbr0_addr(addr)) 728 arm64_apply_bp_hardening(); 729 730 local_daif_restore(DAIF_PROCCTX); 731 do_mem_abort(addr, esr, regs); 732 } 733 734 735 asmlinkage void __exception do_sp_pc_abort(unsigned long addr, 736 unsigned int esr, 737 struct pt_regs *regs) 738 { 739 if (user_mode(regs)) { 740 if (!is_ttbr0_addr(instruction_pointer(regs))) 741 arm64_apply_bp_hardening(); 742 local_daif_restore(DAIF_PROCCTX); 743 } 744 745 arm64_notify_die("SP/PC alignment exception", regs, 746 SIGBUS, BUS_ADRALN, (void __user *)addr, esr); 747 } 748 749 int __init early_brk64(unsigned long addr, unsigned int esr, 750 struct pt_regs *regs); 751 752 /* 753 * __refdata because early_brk64 is __init, but the reference to it is 754 * clobbered at arch_initcall time. 755 * See traps.c and debug-monitors.c:debug_traps_init(). 756 */ 757 static struct fault_info __refdata debug_fault_info[] = { 758 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, 759 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, 760 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, 761 { do_bad, SIGKILL, SI_KERNEL, "unknown 3" }, 762 { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, 763 { do_bad, SIGKILL, SI_KERNEL, "aarch32 vector catch" }, 764 { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, 765 { do_bad, SIGKILL, SI_KERNEL, "unknown 7" }, 766 }; 767 768 void __init hook_debug_fault_code(int nr, 769 int (*fn)(unsigned long, unsigned int, struct pt_regs *), 770 int sig, int code, const char *name) 771 { 772 BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); 773 774 debug_fault_info[nr].fn = fn; 775 debug_fault_info[nr].sig = sig; 776 debug_fault_info[nr].code = code; 777 debug_fault_info[nr].name = name; 778 } 779 780 /* 781 * In debug exception context, we explicitly disable preemption despite 782 * having interrupts disabled. 783 * This serves two purposes: it makes it much less likely that we would 784 * accidentally schedule in exception context and it will force a warning 785 * if we somehow manage to schedule by accident. 786 */ 787 static void debug_exception_enter(struct pt_regs *regs) 788 { 789 /* 790 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were 791 * already disabled to preserve the last enabled/disabled addresses. 792 */ 793 if (interrupts_enabled(regs)) 794 trace_hardirqs_off(); 795 796 if (user_mode(regs)) { 797 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 798 } else { 799 /* 800 * We might have interrupted pretty much anything. In 801 * fact, if we're a debug exception, we can even interrupt 802 * NMI processing. We don't want this code makes in_nmi() 803 * to return true, but we need to notify RCU. 804 */ 805 rcu_nmi_enter(); 806 } 807 808 preempt_disable(); 809 810 /* This code is a bit fragile. Test it. */ 811 RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); 812 } 813 NOKPROBE_SYMBOL(debug_exception_enter); 814 815 static void debug_exception_exit(struct pt_regs *regs) 816 { 817 preempt_enable_no_resched(); 818 819 if (!user_mode(regs)) 820 rcu_nmi_exit(); 821 822 if (interrupts_enabled(regs)) 823 trace_hardirqs_on(); 824 } 825 NOKPROBE_SYMBOL(debug_exception_exit); 826 827 #ifdef CONFIG_ARM64_ERRATUM_1463225 828 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); 829 830 static int __exception 831 cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) 832 { 833 if (user_mode(regs)) 834 return 0; 835 836 if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) 837 return 0; 838 839 /* 840 * We've taken a dummy step exception from the kernel to ensure 841 * that interrupts are re-enabled on the syscall path. Return back 842 * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions 843 * masked so that we can safely restore the mdscr and get on with 844 * handling the syscall. 845 */ 846 regs->pstate |= PSR_D_BIT; 847 return 1; 848 } 849 #else 850 static int __exception 851 cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) 852 { 853 return 0; 854 } 855 #endif /* CONFIG_ARM64_ERRATUM_1463225 */ 856 857 asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, 858 unsigned int esr, 859 struct pt_regs *regs) 860 { 861 const struct fault_info *inf = esr_to_debug_fault_info(esr); 862 unsigned long pc = instruction_pointer(regs); 863 864 if (cortex_a76_erratum_1463225_debug_handler(regs)) 865 return; 866 867 debug_exception_enter(regs); 868 869 if (user_mode(regs) && !is_ttbr0_addr(pc)) 870 arm64_apply_bp_hardening(); 871 872 if (inf->fn(addr_if_watchpoint, esr, regs)) { 873 arm64_notify_die(inf->name, regs, 874 inf->sig, inf->code, (void __user *)pc, esr); 875 } 876 877 debug_exception_exit(regs); 878 } 879 NOKPROBE_SYMBOL(do_debug_exception); 880