109cfefb7SHuacai Chen // SPDX-License-Identifier: GPL-2.0
209cfefb7SHuacai Chen /*
309cfefb7SHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
409cfefb7SHuacai Chen *
509cfefb7SHuacai Chen * Derived from MIPS:
609cfefb7SHuacai Chen * Copyright (C) 1995 - 2000 by Ralf Baechle
709cfefb7SHuacai Chen */
809cfefb7SHuacai Chen #include <linux/context_tracking.h>
909cfefb7SHuacai Chen #include <linux/signal.h>
1009cfefb7SHuacai Chen #include <linux/sched.h>
1109cfefb7SHuacai Chen #include <linux/interrupt.h>
1209cfefb7SHuacai Chen #include <linux/kernel.h>
1309cfefb7SHuacai Chen #include <linux/entry-common.h>
1409cfefb7SHuacai Chen #include <linux/errno.h>
1509cfefb7SHuacai Chen #include <linux/string.h>
1609cfefb7SHuacai Chen #include <linux/types.h>
1709cfefb7SHuacai Chen #include <linux/ptrace.h>
1809cfefb7SHuacai Chen #include <linux/ratelimit.h>
1909cfefb7SHuacai Chen #include <linux/mman.h>
2009cfefb7SHuacai Chen #include <linux/mm.h>
2109cfefb7SHuacai Chen #include <linux/smp.h>
2209cfefb7SHuacai Chen #include <linux/kdebug.h>
2309cfefb7SHuacai Chen #include <linux/perf_event.h>
2409cfefb7SHuacai Chen #include <linux/uaccess.h>
256ad3df56SEnze Li #include <linux/kfence.h>
2609cfefb7SHuacai Chen
2709cfefb7SHuacai Chen #include <asm/branch.h>
28c718a0baSBibo Mao #include <asm/exception.h>
2909cfefb7SHuacai Chen #include <asm/mmu_context.h>
3009cfefb7SHuacai Chen #include <asm/ptrace.h>
3109cfefb7SHuacai Chen
3209cfefb7SHuacai Chen int show_unhandled_signals = 1;
3309cfefb7SHuacai Chen
no_context(struct pt_regs * regs,unsigned long write,unsigned long address)346ad3df56SEnze Li static void __kprobes no_context(struct pt_regs *regs,
356ad3df56SEnze Li unsigned long write, unsigned long address)
3609cfefb7SHuacai Chen {
3709cfefb7SHuacai Chen const int field = sizeof(unsigned long) * 2;
3809cfefb7SHuacai Chen
3909cfefb7SHuacai Chen /* Are we prepared to handle this kernel fault? */
4009cfefb7SHuacai Chen if (fixup_exception(regs))
4109cfefb7SHuacai Chen return;
4209cfefb7SHuacai Chen
436ad3df56SEnze Li if (kfence_handle_page_fault(address, write, regs))
446ad3df56SEnze Li return;
456ad3df56SEnze Li
4609cfefb7SHuacai Chen /*
4709cfefb7SHuacai Chen * Oops. The kernel tried to access some bad page. We'll have to
4809cfefb7SHuacai Chen * terminate things with extreme prejudice.
4909cfefb7SHuacai Chen */
5009cfefb7SHuacai Chen bust_spinlocks(1);
5109cfefb7SHuacai Chen
5209cfefb7SHuacai Chen pr_alert("CPU %d Unable to handle kernel paging request at "
5309cfefb7SHuacai Chen "virtual address %0*lx, era == %0*lx, ra == %0*lx\n",
5409cfefb7SHuacai Chen raw_smp_processor_id(), field, address, field, regs->csr_era,
5509cfefb7SHuacai Chen field, regs->regs[1]);
5609cfefb7SHuacai Chen die("Oops", regs);
5709cfefb7SHuacai Chen }
5809cfefb7SHuacai Chen
do_out_of_memory(struct pt_regs * regs,unsigned long write,unsigned long address)596ad3df56SEnze Li static void __kprobes do_out_of_memory(struct pt_regs *regs,
606ad3df56SEnze Li unsigned long write, unsigned long address)
6109cfefb7SHuacai Chen {
6209cfefb7SHuacai Chen /*
6309cfefb7SHuacai Chen * We ran out of memory, call the OOM killer, and return the userspace
6409cfefb7SHuacai Chen * (which will retry the fault, or kill us if we got oom-killed).
6509cfefb7SHuacai Chen */
6609cfefb7SHuacai Chen if (!user_mode(regs)) {
676ad3df56SEnze Li no_context(regs, write, address);
6809cfefb7SHuacai Chen return;
6909cfefb7SHuacai Chen }
7009cfefb7SHuacai Chen pagefault_out_of_memory();
7109cfefb7SHuacai Chen }
7209cfefb7SHuacai Chen
do_sigbus(struct pt_regs * regs,unsigned long write,unsigned long address,int si_code)7309cfefb7SHuacai Chen static void __kprobes do_sigbus(struct pt_regs *regs,
7409cfefb7SHuacai Chen unsigned long write, unsigned long address, int si_code)
7509cfefb7SHuacai Chen {
7609cfefb7SHuacai Chen /* Kernel mode? Handle exceptions or die */
7709cfefb7SHuacai Chen if (!user_mode(regs)) {
786ad3df56SEnze Li no_context(regs, write, address);
7909cfefb7SHuacai Chen return;
8009cfefb7SHuacai Chen }
8109cfefb7SHuacai Chen
8209cfefb7SHuacai Chen /*
8309cfefb7SHuacai Chen * Send a sigbus, regardless of whether we were in kernel
8409cfefb7SHuacai Chen * or user mode.
8509cfefb7SHuacai Chen */
8609cfefb7SHuacai Chen current->thread.csr_badvaddr = address;
8709cfefb7SHuacai Chen current->thread.trap_nr = read_csr_excode();
8809cfefb7SHuacai Chen force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
8909cfefb7SHuacai Chen }
9009cfefb7SHuacai Chen
do_sigsegv(struct pt_regs * regs,unsigned long write,unsigned long address,int si_code)9109cfefb7SHuacai Chen static void __kprobes do_sigsegv(struct pt_regs *regs,
9209cfefb7SHuacai Chen unsigned long write, unsigned long address, int si_code)
9309cfefb7SHuacai Chen {
9409cfefb7SHuacai Chen const int field = sizeof(unsigned long) * 2;
9509cfefb7SHuacai Chen static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
9609cfefb7SHuacai Chen
9709cfefb7SHuacai Chen /* Kernel mode? Handle exceptions or die */
9809cfefb7SHuacai Chen if (!user_mode(regs)) {
996ad3df56SEnze Li no_context(regs, write, address);
10009cfefb7SHuacai Chen return;
10109cfefb7SHuacai Chen }
10209cfefb7SHuacai Chen
10309cfefb7SHuacai Chen /* User mode accesses just cause a SIGSEGV */
10409cfefb7SHuacai Chen current->thread.csr_badvaddr = address;
10509cfefb7SHuacai Chen if (!write)
10609cfefb7SHuacai Chen current->thread.error_code = 1;
10709cfefb7SHuacai Chen else
10809cfefb7SHuacai Chen current->thread.error_code = 2;
10909cfefb7SHuacai Chen current->thread.trap_nr = read_csr_excode();
11009cfefb7SHuacai Chen
11109cfefb7SHuacai Chen if (show_unhandled_signals &&
11209cfefb7SHuacai Chen unhandled_signal(current, SIGSEGV) && __ratelimit(&ratelimit_state)) {
11309cfefb7SHuacai Chen pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n",
11409cfefb7SHuacai Chen current->comm,
11509cfefb7SHuacai Chen write ? "write access to" : "read access from",
11609cfefb7SHuacai Chen field, address);
11709cfefb7SHuacai Chen pr_info("era = %0*lx in", field,
11809cfefb7SHuacai Chen (unsigned long) regs->csr_era);
11909cfefb7SHuacai Chen print_vma_addr(KERN_CONT " ", regs->csr_era);
12009cfefb7SHuacai Chen pr_cont("\n");
12109cfefb7SHuacai Chen pr_info("ra = %0*lx in", field,
12209cfefb7SHuacai Chen (unsigned long) regs->regs[1]);
12309cfefb7SHuacai Chen print_vma_addr(KERN_CONT " ", regs->regs[1]);
12409cfefb7SHuacai Chen pr_cont("\n");
12509cfefb7SHuacai Chen }
12609cfefb7SHuacai Chen force_sig_fault(SIGSEGV, si_code, (void __user *)address);
12709cfefb7SHuacai Chen }
12809cfefb7SHuacai Chen
12909cfefb7SHuacai Chen /*
13009cfefb7SHuacai Chen * This routine handles page faults. It determines the address,
13109cfefb7SHuacai Chen * and the problem, and then passes it off to one of the appropriate
13209cfefb7SHuacai Chen * routines.
13309cfefb7SHuacai Chen */
__do_page_fault(struct pt_regs * regs,unsigned long write,unsigned long address)13409cfefb7SHuacai Chen static void __kprobes __do_page_fault(struct pt_regs *regs,
13509cfefb7SHuacai Chen unsigned long write, unsigned long address)
13609cfefb7SHuacai Chen {
13709cfefb7SHuacai Chen int si_code = SEGV_MAPERR;
13809cfefb7SHuacai Chen unsigned int flags = FAULT_FLAG_DEFAULT;
13909cfefb7SHuacai Chen struct task_struct *tsk = current;
14009cfefb7SHuacai Chen struct mm_struct *mm = tsk->mm;
14109cfefb7SHuacai Chen struct vm_area_struct *vma = NULL;
14209cfefb7SHuacai Chen vm_fault_t fault;
14309cfefb7SHuacai Chen
1446d4cc40fSTiezhu Yang if (kprobe_page_fault(regs, current->thread.trap_nr))
1456d4cc40fSTiezhu Yang return;
1466d4cc40fSTiezhu Yang
14709cfefb7SHuacai Chen /*
14809cfefb7SHuacai Chen * We fault-in kernel-space virtual memory on-demand. The
14909cfefb7SHuacai Chen * 'reference' page table is init_mm.pgd.
15009cfefb7SHuacai Chen *
15109cfefb7SHuacai Chen * NOTE! We MUST NOT take any locks for this case. We may
15209cfefb7SHuacai Chen * be in an interrupt or a critical region, and should
15309cfefb7SHuacai Chen * only copy the information from the master page table,
15409cfefb7SHuacai Chen * nothing more.
15509cfefb7SHuacai Chen */
15609cfefb7SHuacai Chen if (address & __UA_LIMIT) {
15709cfefb7SHuacai Chen if (!user_mode(regs))
1586ad3df56SEnze Li no_context(regs, write, address);
15909cfefb7SHuacai Chen else
16009cfefb7SHuacai Chen do_sigsegv(regs, write, address, si_code);
16109cfefb7SHuacai Chen return;
16209cfefb7SHuacai Chen }
16309cfefb7SHuacai Chen
16409cfefb7SHuacai Chen /*
16509cfefb7SHuacai Chen * If we're in an interrupt or have no user
16609cfefb7SHuacai Chen * context, we must not take the fault..
16709cfefb7SHuacai Chen */
16809cfefb7SHuacai Chen if (faulthandler_disabled() || !mm) {
16909cfefb7SHuacai Chen do_sigsegv(regs, write, address, si_code);
17009cfefb7SHuacai Chen return;
17109cfefb7SHuacai Chen }
17209cfefb7SHuacai Chen
17309cfefb7SHuacai Chen if (user_mode(regs))
17409cfefb7SHuacai Chen flags |= FAULT_FLAG_USER;
17509cfefb7SHuacai Chen
17609cfefb7SHuacai Chen perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
17709cfefb7SHuacai Chen retry:
178a050ba1eSLinus Torvalds vma = lock_mm_and_find_vma(mm, address, regs);
179a050ba1eSLinus Torvalds if (unlikely(!vma))
180a050ba1eSLinus Torvalds goto bad_area_nosemaphore;
18109cfefb7SHuacai Chen goto good_area;
182a050ba1eSLinus Torvalds
18309cfefb7SHuacai Chen /*
18409cfefb7SHuacai Chen * Something tried to access memory that isn't in our memory map..
18509cfefb7SHuacai Chen * Fix it, but check if it's kernel or user first..
18609cfefb7SHuacai Chen */
18709cfefb7SHuacai Chen bad_area:
18809cfefb7SHuacai Chen mmap_read_unlock(mm);
189a050ba1eSLinus Torvalds bad_area_nosemaphore:
19009cfefb7SHuacai Chen do_sigsegv(regs, write, address, si_code);
19109cfefb7SHuacai Chen return;
19209cfefb7SHuacai Chen
19309cfefb7SHuacai Chen /*
19409cfefb7SHuacai Chen * Ok, we have a good vm_area for this memory access, so
19509cfefb7SHuacai Chen * we can handle it..
19609cfefb7SHuacai Chen */
19709cfefb7SHuacai Chen good_area:
19809cfefb7SHuacai Chen si_code = SEGV_ACCERR;
19909cfefb7SHuacai Chen
20009cfefb7SHuacai Chen if (write) {
20109cfefb7SHuacai Chen flags |= FAULT_FLAG_WRITE;
20209cfefb7SHuacai Chen if (!(vma->vm_flags & VM_WRITE))
20309cfefb7SHuacai Chen goto bad_area;
20409cfefb7SHuacai Chen } else {
20509cfefb7SHuacai Chen if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs))
20609cfefb7SHuacai Chen goto bad_area;
207*c15b5c6fSJiantao Shan if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs))
208*c15b5c6fSJiantao Shan goto bad_area;
20909cfefb7SHuacai Chen }
21009cfefb7SHuacai Chen
21109cfefb7SHuacai Chen /*
21209cfefb7SHuacai Chen * If for any reason at all we couldn't handle the fault,
21309cfefb7SHuacai Chen * make sure we exit gracefully rather than endlessly redo
21409cfefb7SHuacai Chen * the fault.
21509cfefb7SHuacai Chen */
21609cfefb7SHuacai Chen fault = handle_mm_fault(vma, address, flags, regs);
21709cfefb7SHuacai Chen
21809cfefb7SHuacai Chen if (fault_signal_pending(fault, regs)) {
21909cfefb7SHuacai Chen if (!user_mode(regs))
2206ad3df56SEnze Li no_context(regs, write, address);
22109cfefb7SHuacai Chen return;
22209cfefb7SHuacai Chen }
22309cfefb7SHuacai Chen
224b83699eaSHuacai Chen /* The fault is fully completed (including releasing mmap lock) */
225b83699eaSHuacai Chen if (fault & VM_FAULT_COMPLETED)
226b83699eaSHuacai Chen return;
227b83699eaSHuacai Chen
22809cfefb7SHuacai Chen if (unlikely(fault & VM_FAULT_RETRY)) {
22909cfefb7SHuacai Chen flags |= FAULT_FLAG_TRIED;
23009cfefb7SHuacai Chen
23109cfefb7SHuacai Chen /*
23209cfefb7SHuacai Chen * No need to mmap_read_unlock(mm) as we would
23309cfefb7SHuacai Chen * have already released it in __lock_page_or_retry
23409cfefb7SHuacai Chen * in mm/filemap.c.
23509cfefb7SHuacai Chen */
23609cfefb7SHuacai Chen goto retry;
23709cfefb7SHuacai Chen }
23809cfefb7SHuacai Chen if (unlikely(fault & VM_FAULT_ERROR)) {
23909cfefb7SHuacai Chen mmap_read_unlock(mm);
24009cfefb7SHuacai Chen if (fault & VM_FAULT_OOM) {
2416ad3df56SEnze Li do_out_of_memory(regs, write, address);
24209cfefb7SHuacai Chen return;
24309cfefb7SHuacai Chen } else if (fault & VM_FAULT_SIGSEGV) {
24409cfefb7SHuacai Chen do_sigsegv(regs, write, address, si_code);
24509cfefb7SHuacai Chen return;
24609cfefb7SHuacai Chen } else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
24709cfefb7SHuacai Chen do_sigbus(regs, write, address, si_code);
24809cfefb7SHuacai Chen return;
24909cfefb7SHuacai Chen }
25009cfefb7SHuacai Chen BUG();
25109cfefb7SHuacai Chen }
25209cfefb7SHuacai Chen
25309cfefb7SHuacai Chen mmap_read_unlock(mm);
25409cfefb7SHuacai Chen }
25509cfefb7SHuacai Chen
do_page_fault(struct pt_regs * regs,unsigned long write,unsigned long address)25609cfefb7SHuacai Chen asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
25709cfefb7SHuacai Chen unsigned long write, unsigned long address)
25809cfefb7SHuacai Chen {
25909cfefb7SHuacai Chen irqentry_state_t state = irqentry_enter(regs);
26009cfefb7SHuacai Chen
26109cfefb7SHuacai Chen /* Enable interrupt if enabled in parent context */
26209cfefb7SHuacai Chen if (likely(regs->csr_prmd & CSR_PRMD_PIE))
26309cfefb7SHuacai Chen local_irq_enable();
26409cfefb7SHuacai Chen
26509cfefb7SHuacai Chen __do_page_fault(regs, write, address);
26609cfefb7SHuacai Chen
26709cfefb7SHuacai Chen local_irq_disable();
26809cfefb7SHuacai Chen
26909cfefb7SHuacai Chen irqentry_exit(regs, state);
27009cfefb7SHuacai Chen }
271