1 /* 2 * Based on arch/arm/mm/fault.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * Copyright (C) 1995-2004 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #include <linux/extable.h> 22 #include <linux/signal.h> 23 #include <linux/mm.h> 24 #include <linux/hardirq.h> 25 #include <linux/init.h> 26 #include <linux/kprobes.h> 27 #include <linux/uaccess.h> 28 #include <linux/page-flags.h> 29 #include <linux/sched/signal.h> 30 #include <linux/sched/debug.h> 31 #include <linux/highmem.h> 32 #include <linux/perf_event.h> 33 #include <linux/preempt.h> 34 #include <linux/hugetlb.h> 35 36 #include <asm/bug.h> 37 #include <asm/cmpxchg.h> 38 #include <asm/cpufeature.h> 39 #include <asm/exception.h> 40 #include <asm/debug-monitors.h> 41 #include <asm/esr.h> 42 #include <asm/sysreg.h> 43 #include <asm/system_misc.h> 44 #include <asm/pgtable.h> 45 #include <asm/tlbflush.h> 46 #include <asm/traps.h> 47 48 #include <acpi/ghes.h> 49 50 struct fault_info { 51 int (*fn)(unsigned long addr, unsigned int esr, 52 struct pt_regs *regs); 53 int sig; 54 int code; 55 const char *name; 56 }; 57 58 static const struct fault_info fault_info[]; 59 60 static inline const struct fault_info *esr_to_fault_info(unsigned int esr) 61 { 62 return fault_info + (esr & 63); 63 } 64 65 #ifdef CONFIG_KPROBES 66 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) 67 { 68 int ret = 0; 69 70 /* kprobe_running() needs smp_processor_id() */ 71 if (!user_mode(regs)) { 72 preempt_disable(); 73 if (kprobe_running() && kprobe_fault_handler(regs, esr)) 74 ret = 1; 75 preempt_enable(); 76 } 77 78 return ret; 79 } 80 #else 81 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) 82 { 83 return 0; 84 } 85 #endif 86 87 static void data_abort_decode(unsigned int esr) 88 { 89 pr_alert("Data abort info:\n"); 90 91 if (esr & ESR_ELx_ISV) { 92 pr_alert(" Access size = %u byte(s)\n", 93 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT)); 94 pr_alert(" SSE = %lu, SRT = %lu\n", 95 (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT, 96 (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT); 97 pr_alert(" SF = %lu, AR = %lu\n", 98 (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, 99 (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); 100 } else { 101 pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); 102 } 103 104 pr_alert(" CM = %lu, WnR = %lu\n", 105 (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, 106 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); 107 } 108 109 static void mem_abort_decode(unsigned int esr) 110 { 111 pr_alert("Mem abort info:\n"); 112 113 pr_alert(" ESR = 0x%08x\n", esr); 114 pr_alert(" Exception class = %s, IL = %u bits\n", 115 esr_get_class_string(esr), 116 (esr & ESR_ELx_IL) ? 32 : 16); 117 pr_alert(" SET = %lu, FnV = %lu\n", 118 (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, 119 (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT); 120 pr_alert(" EA = %lu, S1PTW = %lu\n", 121 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, 122 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); 123 124 if (esr_is_data_abort(esr)) 125 data_abort_decode(esr); 126 } 127 128 /* 129 * Dump out the page tables associated with 'addr' in the currently active mm. 130 */ 131 void show_pte(unsigned long addr) 132 { 133 struct mm_struct *mm; 134 pgd_t *pgdp; 135 pgd_t pgd; 136 137 if (addr < TASK_SIZE) { 138 /* TTBR0 */ 139 mm = current->active_mm; 140 if (mm == &init_mm) { 141 pr_alert("[%016lx] user address but active_mm is swapper\n", 142 addr); 143 return; 144 } 145 } else if (addr >= VA_START) { 146 /* TTBR1 */ 147 mm = &init_mm; 148 } else { 149 pr_alert("[%016lx] address between user and kernel address ranges\n", 150 addr); 151 return; 152 } 153 154 pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", 155 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, 156 VA_BITS, mm->pgd); 157 pgdp = pgd_offset(mm, addr); 158 pgd = READ_ONCE(*pgdp); 159 pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); 160 161 do { 162 pud_t *pudp, pud; 163 pmd_t *pmdp, pmd; 164 pte_t *ptep, pte; 165 166 if (pgd_none(pgd) || pgd_bad(pgd)) 167 break; 168 169 pudp = pud_offset(pgdp, addr); 170 pud = READ_ONCE(*pudp); 171 pr_cont(", pud=%016llx", pud_val(pud)); 172 if (pud_none(pud) || pud_bad(pud)) 173 break; 174 175 pmdp = pmd_offset(pudp, addr); 176 pmd = READ_ONCE(*pmdp); 177 pr_cont(", pmd=%016llx", pmd_val(pmd)); 178 if (pmd_none(pmd) || pmd_bad(pmd)) 179 break; 180 181 ptep = pte_offset_map(pmdp, addr); 182 pte = READ_ONCE(*ptep); 183 pr_cont(", pte=%016llx", pte_val(pte)); 184 pte_unmap(ptep); 185 } while(0); 186 187 pr_cont("\n"); 188 } 189 190 /* 191 * This function sets the access flags (dirty, accessed), as well as write 192 * permission, and only to a more permissive setting. 193 * 194 * It needs to cope with hardware update of the accessed/dirty state by other 195 * agents in the system and can safely skip the __sync_icache_dcache() call as, 196 * like set_pte_at(), the PTE is never changed from no-exec to exec here. 197 * 198 * Returns whether or not the PTE actually changed. 199 */ 200 int ptep_set_access_flags(struct vm_area_struct *vma, 201 unsigned long address, pte_t *ptep, 202 pte_t entry, int dirty) 203 { 204 pteval_t old_pteval, pteval; 205 pte_t pte = READ_ONCE(*ptep); 206 207 if (pte_same(pte, entry)) 208 return 0; 209 210 /* only preserve the access flags and write permission */ 211 pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; 212 213 /* 214 * Setting the flags must be done atomically to avoid racing with the 215 * hardware update of the access/dirty state. The PTE_RDONLY bit must 216 * be set to the most permissive (lowest value) of *ptep and entry 217 * (calculated as: a & b == ~(~a | ~b)). 218 */ 219 pte_val(entry) ^= PTE_RDONLY; 220 pteval = pte_val(pte); 221 do { 222 old_pteval = pteval; 223 pteval ^= PTE_RDONLY; 224 pteval |= pte_val(entry); 225 pteval ^= PTE_RDONLY; 226 pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); 227 } while (pteval != old_pteval); 228 229 flush_tlb_fix_spurious_fault(vma, address); 230 return 1; 231 } 232 233 static bool is_el1_instruction_abort(unsigned int esr) 234 { 235 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; 236 } 237 238 static inline bool is_el1_permission_fault(unsigned int esr, 239 struct pt_regs *regs, 240 unsigned long addr) 241 { 242 unsigned int ec = ESR_ELx_EC(esr); 243 unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; 244 245 if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) 246 return false; 247 248 if (fsc_type == ESR_ELx_FSC_PERM) 249 return true; 250 251 if (addr < TASK_SIZE && system_uses_ttbr0_pan()) 252 return fsc_type == ESR_ELx_FSC_FAULT && 253 (regs->pstate & PSR_PAN_BIT); 254 255 return false; 256 } 257 258 static void die_kernel_fault(const char *msg, unsigned long addr, 259 unsigned int esr, struct pt_regs *regs) 260 { 261 bust_spinlocks(1); 262 263 pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, 264 addr); 265 266 mem_abort_decode(esr); 267 268 show_pte(addr); 269 die("Oops", regs, esr); 270 bust_spinlocks(0); 271 do_exit(SIGKILL); 272 } 273 274 static void __do_kernel_fault(unsigned long addr, unsigned int esr, 275 struct pt_regs *regs) 276 { 277 const char *msg; 278 279 /* 280 * Are we prepared to handle this kernel fault? 281 * We are almost certainly not prepared to handle instruction faults. 282 */ 283 if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) 284 return; 285 286 if (is_el1_permission_fault(esr, regs, addr)) { 287 if (esr & ESR_ELx_WNR) 288 msg = "write to read-only memory"; 289 else 290 msg = "read from unreadable memory"; 291 } else if (addr < PAGE_SIZE) { 292 msg = "NULL pointer dereference"; 293 } else { 294 msg = "paging request"; 295 } 296 297 die_kernel_fault(msg, addr, esr, regs); 298 } 299 300 static void __do_user_fault(struct siginfo *info, unsigned int esr) 301 { 302 current->thread.fault_address = (unsigned long)info->si_addr; 303 304 /* 305 * If the faulting address is in the kernel, we must sanitize the ESR. 306 * From userspace's point of view, kernel-only mappings don't exist 307 * at all, so we report them as level 0 translation faults. 308 * (This is not quite the way that "no mapping there at all" behaves: 309 * an alignment fault not caused by the memory type would take 310 * precedence over translation fault for a real access to empty 311 * space. Unfortunately we can't easily distinguish "alignment fault 312 * not caused by memory type" from "alignment fault caused by memory 313 * type", so we ignore this wrinkle and just return the translation 314 * fault.) 315 */ 316 if (current->thread.fault_address >= TASK_SIZE) { 317 switch (ESR_ELx_EC(esr)) { 318 case ESR_ELx_EC_DABT_LOW: 319 /* 320 * These bits provide only information about the 321 * faulting instruction, which userspace knows already. 322 * We explicitly clear bits which are architecturally 323 * RES0 in case they are given meanings in future. 324 * We always report the ESR as if the fault was taken 325 * to EL1 and so ISV and the bits in ISS[23:14] are 326 * clear. (In fact it always will be a fault to EL1.) 327 */ 328 esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | 329 ESR_ELx_CM | ESR_ELx_WNR; 330 esr |= ESR_ELx_FSC_FAULT; 331 break; 332 case ESR_ELx_EC_IABT_LOW: 333 /* 334 * Claim a level 0 translation fault. 335 * All other bits are architecturally RES0 for faults 336 * reported with that DFSC value, so we clear them. 337 */ 338 esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; 339 esr |= ESR_ELx_FSC_FAULT; 340 break; 341 default: 342 /* 343 * This should never happen (entry.S only brings us 344 * into this code for insn and data aborts from a lower 345 * exception level). Fail safe by not providing an ESR 346 * context record at all. 347 */ 348 WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); 349 esr = 0; 350 break; 351 } 352 } 353 354 current->thread.fault_code = esr; 355 arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); 356 } 357 358 static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) 359 { 360 /* 361 * If we are in kernel mode at this point, we have no context to 362 * handle this fault with. 363 */ 364 if (user_mode(regs)) { 365 const struct fault_info *inf = esr_to_fault_info(esr); 366 struct siginfo si; 367 368 clear_siginfo(&si); 369 si.si_signo = inf->sig; 370 si.si_code = inf->code; 371 si.si_addr = (void __user *)addr; 372 373 __do_user_fault(&si, esr); 374 } else { 375 __do_kernel_fault(addr, esr, regs); 376 } 377 } 378 379 #define VM_FAULT_BADMAP 0x010000 380 #define VM_FAULT_BADACCESS 0x020000 381 382 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, 383 unsigned int mm_flags, unsigned long vm_flags, 384 struct task_struct *tsk) 385 { 386 struct vm_area_struct *vma; 387 vm_fault_t fault; 388 389 vma = find_vma(mm, addr); 390 fault = VM_FAULT_BADMAP; 391 if (unlikely(!vma)) 392 goto out; 393 if (unlikely(vma->vm_start > addr)) 394 goto check_stack; 395 396 /* 397 * Ok, we have a good vm_area for this memory access, so we can handle 398 * it. 399 */ 400 good_area: 401 /* 402 * Check that the permissions on the VMA allow for the fault which 403 * occurred. 404 */ 405 if (!(vma->vm_flags & vm_flags)) { 406 fault = VM_FAULT_BADACCESS; 407 goto out; 408 } 409 410 return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); 411 412 check_stack: 413 if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) 414 goto good_area; 415 out: 416 return fault; 417 } 418 419 static bool is_el0_instruction_abort(unsigned int esr) 420 { 421 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; 422 } 423 424 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, 425 struct pt_regs *regs) 426 { 427 struct task_struct *tsk; 428 struct mm_struct *mm; 429 struct siginfo si; 430 vm_fault_t fault, major = 0; 431 unsigned long vm_flags = VM_READ | VM_WRITE; 432 unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 433 434 if (notify_page_fault(regs, esr)) 435 return 0; 436 437 tsk = current; 438 mm = tsk->mm; 439 440 /* 441 * If we're in an interrupt or have no user context, we must not take 442 * the fault. 443 */ 444 if (faulthandler_disabled() || !mm) 445 goto no_context; 446 447 if (user_mode(regs)) 448 mm_flags |= FAULT_FLAG_USER; 449 450 if (is_el0_instruction_abort(esr)) { 451 vm_flags = VM_EXEC; 452 } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { 453 vm_flags = VM_WRITE; 454 mm_flags |= FAULT_FLAG_WRITE; 455 } 456 457 if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { 458 /* regs->orig_addr_limit may be 0 if we entered from EL0 */ 459 if (regs->orig_addr_limit == KERNEL_DS) 460 die_kernel_fault("access to user memory with fs=KERNEL_DS", 461 addr, esr, regs); 462 463 if (is_el1_instruction_abort(esr)) 464 die_kernel_fault("execution of user memory", 465 addr, esr, regs); 466 467 if (!search_exception_tables(regs->pc)) 468 die_kernel_fault("access to user memory outside uaccess routines", 469 addr, esr, regs); 470 } 471 472 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); 473 474 /* 475 * As per x86, we may deadlock here. However, since the kernel only 476 * validly references user space from well defined areas of the code, 477 * we can bug out early if this is from code which shouldn't. 478 */ 479 if (!down_read_trylock(&mm->mmap_sem)) { 480 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 481 goto no_context; 482 retry: 483 down_read(&mm->mmap_sem); 484 } else { 485 /* 486 * The above down_read_trylock() might have succeeded in which 487 * case, we'll have missed the might_sleep() from down_read(). 488 */ 489 might_sleep(); 490 #ifdef CONFIG_DEBUG_VM 491 if (!user_mode(regs) && !search_exception_tables(regs->pc)) 492 goto no_context; 493 #endif 494 } 495 496 fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); 497 major |= fault & VM_FAULT_MAJOR; 498 499 if (fault & VM_FAULT_RETRY) { 500 /* 501 * If we need to retry but a fatal signal is pending, 502 * handle the signal first. We do not need to release 503 * the mmap_sem because it would already be released 504 * in __lock_page_or_retry in mm/filemap.c. 505 */ 506 if (fatal_signal_pending(current)) { 507 if (!user_mode(regs)) 508 goto no_context; 509 return 0; 510 } 511 512 /* 513 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of 514 * starvation. 515 */ 516 if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { 517 mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; 518 mm_flags |= FAULT_FLAG_TRIED; 519 goto retry; 520 } 521 } 522 up_read(&mm->mmap_sem); 523 524 /* 525 * Handle the "normal" (no error) case first. 526 */ 527 if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | 528 VM_FAULT_BADACCESS)))) { 529 /* 530 * Major/minor page fault accounting is only done 531 * once. If we go through a retry, it is extremely 532 * likely that the page will be found in page cache at 533 * that point. 534 */ 535 if (major) { 536 tsk->maj_flt++; 537 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, 538 addr); 539 } else { 540 tsk->min_flt++; 541 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, 542 addr); 543 } 544 545 return 0; 546 } 547 548 /* 549 * If we are in kernel mode at this point, we have no context to 550 * handle this fault with. 551 */ 552 if (!user_mode(regs)) 553 goto no_context; 554 555 if (fault & VM_FAULT_OOM) { 556 /* 557 * We ran out of memory, call the OOM killer, and return to 558 * userspace (which will retry the fault, or kill us if we got 559 * oom-killed). 560 */ 561 pagefault_out_of_memory(); 562 return 0; 563 } 564 565 clear_siginfo(&si); 566 si.si_addr = (void __user *)addr; 567 568 if (fault & VM_FAULT_SIGBUS) { 569 /* 570 * We had some memory, but were unable to successfully fix up 571 * this page fault. 572 */ 573 si.si_signo = SIGBUS; 574 si.si_code = BUS_ADRERR; 575 } else if (fault & VM_FAULT_HWPOISON_LARGE) { 576 unsigned int hindex = VM_FAULT_GET_HINDEX(fault); 577 578 si.si_signo = SIGBUS; 579 si.si_code = BUS_MCEERR_AR; 580 si.si_addr_lsb = hstate_index_to_shift(hindex); 581 } else if (fault & VM_FAULT_HWPOISON) { 582 si.si_signo = SIGBUS; 583 si.si_code = BUS_MCEERR_AR; 584 si.si_addr_lsb = PAGE_SHIFT; 585 } else { 586 /* 587 * Something tried to access memory that isn't in our memory 588 * map. 589 */ 590 si.si_signo = SIGSEGV; 591 si.si_code = fault == VM_FAULT_BADACCESS ? 592 SEGV_ACCERR : SEGV_MAPERR; 593 } 594 595 __do_user_fault(&si, esr); 596 return 0; 597 598 no_context: 599 __do_kernel_fault(addr, esr, regs); 600 return 0; 601 } 602 603 static int __kprobes do_translation_fault(unsigned long addr, 604 unsigned int esr, 605 struct pt_regs *regs) 606 { 607 if (addr < TASK_SIZE) 608 return do_page_fault(addr, esr, regs); 609 610 do_bad_area(addr, esr, regs); 611 return 0; 612 } 613 614 static int do_alignment_fault(unsigned long addr, unsigned int esr, 615 struct pt_regs *regs) 616 { 617 do_bad_area(addr, esr, regs); 618 return 0; 619 } 620 621 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) 622 { 623 return 1; /* "fault" */ 624 } 625 626 static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) 627 { 628 struct siginfo info; 629 const struct fault_info *inf; 630 631 inf = esr_to_fault_info(esr); 632 633 /* 634 * Synchronous aborts may interrupt code which had interrupts masked. 635 * Before calling out into the wider kernel tell the interested 636 * subsystems. 637 */ 638 if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { 639 if (interrupts_enabled(regs)) 640 nmi_enter(); 641 642 ghes_notify_sea(); 643 644 if (interrupts_enabled(regs)) 645 nmi_exit(); 646 } 647 648 clear_siginfo(&info); 649 info.si_signo = inf->sig; 650 info.si_errno = 0; 651 info.si_code = inf->code; 652 if (esr & ESR_ELx_FnV) 653 info.si_addr = NULL; 654 else 655 info.si_addr = (void __user *)addr; 656 arm64_notify_die(inf->name, regs, &info, esr); 657 658 return 0; 659 } 660 661 static const struct fault_info fault_info[] = { 662 { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, 663 { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, 664 { do_bad, SIGKILL, SI_KERNEL, "level 2 address size fault" }, 665 { do_bad, SIGKILL, SI_KERNEL, "level 3 address size fault" }, 666 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, 667 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, 668 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, 669 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 670 { do_bad, SIGKILL, SI_KERNEL, "unknown 8" }, 671 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, 672 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, 673 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, 674 { do_bad, SIGKILL, SI_KERNEL, "unknown 12" }, 675 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, 676 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, 677 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, 678 { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, 679 { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, 680 { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, 681 { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, 682 { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, 683 { do_sea, SIGKILL, SI_KERNEL, "level 1 (translation table walk)" }, 684 { do_sea, SIGKILL, SI_KERNEL, "level 2 (translation table walk)" }, 685 { do_sea, SIGKILL, SI_KERNEL, "level 3 (translation table walk)" }, 686 { do_sea, SIGBUS, BUS_OBJERR, "synchronous parity or ECC error" }, // Reserved when RAS is implemented 687 { do_bad, SIGKILL, SI_KERNEL, "unknown 25" }, 688 { do_bad, SIGKILL, SI_KERNEL, "unknown 26" }, 689 { do_bad, SIGKILL, SI_KERNEL, "unknown 27" }, 690 { do_sea, SIGKILL, SI_KERNEL, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 691 { do_sea, SIGKILL, SI_KERNEL, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 692 { do_sea, SIGKILL, SI_KERNEL, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 693 { do_sea, SIGKILL, SI_KERNEL, "level 3 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented 694 { do_bad, SIGKILL, SI_KERNEL, "unknown 32" }, 695 { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, 696 { do_bad, SIGKILL, SI_KERNEL, "unknown 34" }, 697 { do_bad, SIGKILL, SI_KERNEL, "unknown 35" }, 698 { do_bad, SIGKILL, SI_KERNEL, "unknown 36" }, 699 { do_bad, SIGKILL, SI_KERNEL, "unknown 37" }, 700 { do_bad, SIGKILL, SI_KERNEL, "unknown 38" }, 701 { do_bad, SIGKILL, SI_KERNEL, "unknown 39" }, 702 { do_bad, SIGKILL, SI_KERNEL, "unknown 40" }, 703 { do_bad, SIGKILL, SI_KERNEL, "unknown 41" }, 704 { do_bad, SIGKILL, SI_KERNEL, "unknown 42" }, 705 { do_bad, SIGKILL, SI_KERNEL, "unknown 43" }, 706 { do_bad, SIGKILL, SI_KERNEL, "unknown 44" }, 707 { do_bad, SIGKILL, SI_KERNEL, "unknown 45" }, 708 { do_bad, SIGKILL, SI_KERNEL, "unknown 46" }, 709 { do_bad, SIGKILL, SI_KERNEL, "unknown 47" }, 710 { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" }, 711 { do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" }, 712 { do_bad, SIGKILL, SI_KERNEL, "unknown 50" }, 713 { do_bad, SIGKILL, SI_KERNEL, "unknown 51" }, 714 { do_bad, SIGKILL, SI_KERNEL, "implementation fault (lockdown abort)" }, 715 { do_bad, SIGBUS, BUS_OBJERR, "implementation fault (unsupported exclusive)" }, 716 { do_bad, SIGKILL, SI_KERNEL, "unknown 54" }, 717 { do_bad, SIGKILL, SI_KERNEL, "unknown 55" }, 718 { do_bad, SIGKILL, SI_KERNEL, "unknown 56" }, 719 { do_bad, SIGKILL, SI_KERNEL, "unknown 57" }, 720 { do_bad, SIGKILL, SI_KERNEL, "unknown 58" }, 721 { do_bad, SIGKILL, SI_KERNEL, "unknown 59" }, 722 { do_bad, SIGKILL, SI_KERNEL, "unknown 60" }, 723 { do_bad, SIGKILL, SI_KERNEL, "section domain fault" }, 724 { do_bad, SIGKILL, SI_KERNEL, "page domain fault" }, 725 { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, 726 }; 727 728 int handle_guest_sea(phys_addr_t addr, unsigned int esr) 729 { 730 return ghes_notify_sea(); 731 } 732 733 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, 734 struct pt_regs *regs) 735 { 736 const struct fault_info *inf = esr_to_fault_info(esr); 737 struct siginfo info; 738 739 if (!inf->fn(addr, esr, regs)) 740 return; 741 742 if (!user_mode(regs)) { 743 pr_alert("Unhandled fault at 0x%016lx\n", addr); 744 mem_abort_decode(esr); 745 show_pte(addr); 746 } 747 748 clear_siginfo(&info); 749 info.si_signo = inf->sig; 750 info.si_errno = 0; 751 info.si_code = inf->code; 752 info.si_addr = (void __user *)addr; 753 arm64_notify_die(inf->name, regs, &info, esr); 754 } 755 756 asmlinkage void __exception do_el0_irq_bp_hardening(void) 757 { 758 /* PC has already been checked in entry.S */ 759 arm64_apply_bp_hardening(); 760 } 761 762 asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, 763 unsigned int esr, 764 struct pt_regs *regs) 765 { 766 /* 767 * We've taken an instruction abort from userspace and not yet 768 * re-enabled IRQs. If the address is a kernel address, apply 769 * BP hardening prior to enabling IRQs and pre-emption. 770 */ 771 if (addr > TASK_SIZE) 772 arm64_apply_bp_hardening(); 773 774 local_irq_enable(); 775 do_mem_abort(addr, esr, regs); 776 } 777 778 779 asmlinkage void __exception do_sp_pc_abort(unsigned long addr, 780 unsigned int esr, 781 struct pt_regs *regs) 782 { 783 struct siginfo info; 784 785 if (user_mode(regs)) { 786 if (instruction_pointer(regs) > TASK_SIZE) 787 arm64_apply_bp_hardening(); 788 local_irq_enable(); 789 } 790 791 clear_siginfo(&info); 792 info.si_signo = SIGBUS; 793 info.si_errno = 0; 794 info.si_code = BUS_ADRALN; 795 info.si_addr = (void __user *)addr; 796 arm64_notify_die("SP/PC alignment exception", regs, &info, esr); 797 } 798 799 int __init early_brk64(unsigned long addr, unsigned int esr, 800 struct pt_regs *regs); 801 802 /* 803 * __refdata because early_brk64 is __init, but the reference to it is 804 * clobbered at arch_initcall time. 805 * See traps.c and debug-monitors.c:debug_traps_init(). 806 */ 807 static struct fault_info __refdata debug_fault_info[] = { 808 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, 809 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, 810 { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, 811 { do_bad, SIGKILL, SI_KERNEL, "unknown 3" }, 812 { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, 813 { do_bad, SIGKILL, SI_KERNEL, "aarch32 vector catch" }, 814 { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, 815 { do_bad, SIGKILL, SI_KERNEL, "unknown 7" }, 816 }; 817 818 void __init hook_debug_fault_code(int nr, 819 int (*fn)(unsigned long, unsigned int, struct pt_regs *), 820 int sig, int code, const char *name) 821 { 822 BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); 823 824 debug_fault_info[nr].fn = fn; 825 debug_fault_info[nr].sig = sig; 826 debug_fault_info[nr].code = code; 827 debug_fault_info[nr].name = name; 828 } 829 830 asmlinkage int __exception do_debug_exception(unsigned long addr, 831 unsigned int esr, 832 struct pt_regs *regs) 833 { 834 const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); 835 int rv; 836 837 /* 838 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were 839 * already disabled to preserve the last enabled/disabled addresses. 840 */ 841 if (interrupts_enabled(regs)) 842 trace_hardirqs_off(); 843 844 if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE) 845 arm64_apply_bp_hardening(); 846 847 if (!inf->fn(addr, esr, regs)) { 848 rv = 1; 849 } else { 850 struct siginfo info; 851 852 clear_siginfo(&info); 853 info.si_signo = inf->sig; 854 info.si_errno = 0; 855 info.si_code = inf->code; 856 info.si_addr = (void __user *)addr; 857 arm64_notify_die(inf->name, regs, &info, esr); 858 rv = 0; 859 } 860 861 if (interrupts_enabled(regs)) 862 trace_hardirqs_on(); 863 864 return rv; 865 } 866 NOKPROBE_SYMBOL(do_debug_exception); 867 868 #ifdef CONFIG_ARM64_PAN 869 void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) 870 { 871 /* 872 * We modify PSTATE. This won't work from irq context as the PSTATE 873 * is discarded once we return from the exception. 874 */ 875 WARN_ON_ONCE(in_interrupt()); 876 877 sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); 878 asm(SET_PSTATE_PAN(1)); 879 } 880 #endif /* CONFIG_ARM64_PAN */ 881