xref: /openbmc/linux/arch/riscv/mm/fault.c (revision 750afb08)
1 /*
2  * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
3  *  Lennox Wu <lennox.wu@sunplusct.com>
4  *  Chen Liqin <liqin.chen@sunplusct.com>
5  * Copyright (C) 2012 Regents of the University of California
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see the file COPYING, or write
19  * to the Free Software Foundation, Inc.,
20  */
21 
22 
23 #include <linux/mm.h>
24 #include <linux/kernel.h>
25 #include <linux/interrupt.h>
26 #include <linux/perf_event.h>
27 #include <linux/signal.h>
28 #include <linux/uaccess.h>
29 
30 #include <asm/pgalloc.h>
31 #include <asm/ptrace.h>
32 
33 /*
34  * This routine handles page faults.  It determines the address and the
35  * problem, and then passes it off to one of the appropriate routines.
36  */
37 asmlinkage void do_page_fault(struct pt_regs *regs)
38 {
39 	struct task_struct *tsk;
40 	struct vm_area_struct *vma;
41 	struct mm_struct *mm;
42 	unsigned long addr, cause;
43 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
44 	int code = SEGV_MAPERR;
45 	vm_fault_t fault;
46 
47 	cause = regs->scause;
48 	addr = regs->sbadaddr;
49 
50 	tsk = current;
51 	mm = tsk->mm;
52 
53 	/*
54 	 * Fault-in kernel-space virtual memory on-demand.
55 	 * The 'reference' page table is init_mm.pgd.
56 	 *
57 	 * NOTE! We MUST NOT take any locks for this case. We may
58 	 * be in an interrupt or a critical region, and should
59 	 * only copy the information from the master page table,
60 	 * nothing more.
61 	 */
62 	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
63 		goto vmalloc_fault;
64 
65 	/* Enable interrupts if they were enabled in the parent context. */
66 	if (likely(regs->sstatus & SR_SPIE))
67 		local_irq_enable();
68 
69 	/*
70 	 * If we're in an interrupt, have no user context, or are running
71 	 * in an atomic region, then we must not take the fault.
72 	 */
73 	if (unlikely(faulthandler_disabled() || !mm))
74 		goto no_context;
75 
76 	if (user_mode(regs))
77 		flags |= FAULT_FLAG_USER;
78 
79 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
80 
81 retry:
82 	down_read(&mm->mmap_sem);
83 	vma = find_vma(mm, addr);
84 	if (unlikely(!vma))
85 		goto bad_area;
86 	if (likely(vma->vm_start <= addr))
87 		goto good_area;
88 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
89 		goto bad_area;
90 	if (unlikely(expand_stack(vma, addr)))
91 		goto bad_area;
92 
93 	/*
94 	 * Ok, we have a good vm_area for this memory access, so
95 	 * we can handle it.
96 	 */
97 good_area:
98 	code = SEGV_ACCERR;
99 
100 	switch (cause) {
101 	case EXC_INST_PAGE_FAULT:
102 		if (!(vma->vm_flags & VM_EXEC))
103 			goto bad_area;
104 		break;
105 	case EXC_LOAD_PAGE_FAULT:
106 		if (!(vma->vm_flags & VM_READ))
107 			goto bad_area;
108 		break;
109 	case EXC_STORE_PAGE_FAULT:
110 		if (!(vma->vm_flags & VM_WRITE))
111 			goto bad_area;
112 		flags |= FAULT_FLAG_WRITE;
113 		break;
114 	default:
115 		panic("%s: unhandled cause %lu", __func__, cause);
116 	}
117 
118 	/*
119 	 * If for any reason at all we could not handle the fault,
120 	 * make sure we exit gracefully rather than endlessly redo
121 	 * the fault.
122 	 */
123 	fault = handle_mm_fault(vma, addr, flags);
124 
125 	/*
126 	 * If we need to retry but a fatal signal is pending, handle the
127 	 * signal first. We do not need to release the mmap_sem because it
128 	 * would already be released in __lock_page_or_retry in mm/filemap.c.
129 	 */
130 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
131 		return;
132 
133 	if (unlikely(fault & VM_FAULT_ERROR)) {
134 		if (fault & VM_FAULT_OOM)
135 			goto out_of_memory;
136 		else if (fault & VM_FAULT_SIGBUS)
137 			goto do_sigbus;
138 		BUG();
139 	}
140 
141 	/*
142 	 * Major/minor page fault accounting is only done on the
143 	 * initial attempt. If we go through a retry, it is extremely
144 	 * likely that the page will be found in page cache at that point.
145 	 */
146 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
147 		if (fault & VM_FAULT_MAJOR) {
148 			tsk->maj_flt++;
149 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
150 				      1, regs, addr);
151 		} else {
152 			tsk->min_flt++;
153 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
154 				      1, regs, addr);
155 		}
156 		if (fault & VM_FAULT_RETRY) {
157 			/*
158 			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
159 			 * of starvation.
160 			 */
161 			flags &= ~(FAULT_FLAG_ALLOW_RETRY);
162 			flags |= FAULT_FLAG_TRIED;
163 
164 			/*
165 			 * No need to up_read(&mm->mmap_sem) as we would
166 			 * have already released it in __lock_page_or_retry
167 			 * in mm/filemap.c.
168 			 */
169 			goto retry;
170 		}
171 	}
172 
173 	up_read(&mm->mmap_sem);
174 	return;
175 
176 	/*
177 	 * Something tried to access memory that isn't in our memory map.
178 	 * Fix it, but check if it's kernel or user first.
179 	 */
180 bad_area:
181 	up_read(&mm->mmap_sem);
182 	/* User mode accesses just cause a SIGSEGV */
183 	if (user_mode(regs)) {
184 		do_trap(regs, SIGSEGV, code, addr, tsk);
185 		return;
186 	}
187 
188 no_context:
189 	/* Are we prepared to handle this kernel fault? */
190 	if (fixup_exception(regs))
191 		return;
192 
193 	/*
194 	 * Oops. The kernel tried to access some bad page. We'll have to
195 	 * terminate things with extreme prejudice.
196 	 */
197 	bust_spinlocks(1);
198 	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
199 		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
200 		"paging request", addr);
201 	die(regs, "Oops");
202 	do_exit(SIGKILL);
203 
204 	/*
205 	 * We ran out of memory, call the OOM killer, and return the userspace
206 	 * (which will retry the fault, or kill us if we got oom-killed).
207 	 */
208 out_of_memory:
209 	up_read(&mm->mmap_sem);
210 	if (!user_mode(regs))
211 		goto no_context;
212 	pagefault_out_of_memory();
213 	return;
214 
215 do_sigbus:
216 	up_read(&mm->mmap_sem);
217 	/* Kernel mode? Handle exceptions or die */
218 	if (!user_mode(regs))
219 		goto no_context;
220 	do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk);
221 	return;
222 
223 vmalloc_fault:
224 	{
225 		pgd_t *pgd, *pgd_k;
226 		pud_t *pud, *pud_k;
227 		p4d_t *p4d, *p4d_k;
228 		pmd_t *pmd, *pmd_k;
229 		pte_t *pte_k;
230 		int index;
231 
232 		if (user_mode(regs))
233 			goto bad_area;
234 
235 		/*
236 		 * Synchronize this task's top level page-table
237 		 * with the 'reference' page table.
238 		 *
239 		 * Do _not_ use "tsk->active_mm->pgd" here.
240 		 * We might be inside an interrupt in the middle
241 		 * of a task switch.
242 		 *
243 		 * Note: Use the old spbtr name instead of using the current
244 		 * satp name to support binutils 2.29 which doesn't know about
245 		 * the privileged ISA 1.10 yet.
246 		 */
247 		index = pgd_index(addr);
248 		pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index;
249 		pgd_k = init_mm.pgd + index;
250 
251 		if (!pgd_present(*pgd_k))
252 			goto no_context;
253 		set_pgd(pgd, *pgd_k);
254 
255 		p4d = p4d_offset(pgd, addr);
256 		p4d_k = p4d_offset(pgd_k, addr);
257 		if (!p4d_present(*p4d_k))
258 			goto no_context;
259 
260 		pud = pud_offset(p4d, addr);
261 		pud_k = pud_offset(p4d_k, addr);
262 		if (!pud_present(*pud_k))
263 			goto no_context;
264 
265 		/*
266 		 * Since the vmalloc area is global, it is unnecessary
267 		 * to copy individual PTEs
268 		 */
269 		pmd = pmd_offset(pud, addr);
270 		pmd_k = pmd_offset(pud_k, addr);
271 		if (!pmd_present(*pmd_k))
272 			goto no_context;
273 		set_pmd(pmd, *pmd_k);
274 
275 		/*
276 		 * Make sure the actual PTE exists as well to
277 		 * catch kernel vmalloc-area accesses to non-mapped
278 		 * addresses. If we don't do this, this will just
279 		 * silently loop forever.
280 		 */
281 		pte_k = pte_offset_kernel(pmd_k, addr);
282 		if (!pte_present(*pte_k))
283 			goto no_context;
284 		return;
285 	}
286 }
287