xref: /openbmc/linux/arch/riscv/mm/fault.c (revision 7f2e85840871f199057e65232ebde846192ed989)
1 /*
2  * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
3  *  Lennox Wu <lennox.wu@sunplusct.com>
4  *  Chen Liqin <liqin.chen@sunplusct.com>
5  * Copyright (C) 2012 Regents of the University of California
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see the file COPYING, or write
19  * to the Free Software Foundation, Inc.,
20  */
21 
22 
23 #include <linux/mm.h>
24 #include <linux/kernel.h>
25 #include <linux/interrupt.h>
26 #include <linux/perf_event.h>
27 #include <linux/signal.h>
28 #include <linux/uaccess.h>
29 
30 #include <asm/pgalloc.h>
31 #include <asm/ptrace.h>
32 
33 /*
34  * This routine handles page faults.  It determines the address and the
35  * problem, and then passes it off to one of the appropriate routines.
36  */
37 asmlinkage void do_page_fault(struct pt_regs *regs)
38 {
39 	struct task_struct *tsk;
40 	struct vm_area_struct *vma;
41 	struct mm_struct *mm;
42 	unsigned long addr, cause;
43 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
44 	int fault, code = SEGV_MAPERR;
45 
46 	cause = regs->scause;
47 	addr = regs->sbadaddr;
48 
49 	tsk = current;
50 	mm = tsk->mm;
51 
52 	/*
53 	 * Fault-in kernel-space virtual memory on-demand.
54 	 * The 'reference' page table is init_mm.pgd.
55 	 *
56 	 * NOTE! We MUST NOT take any locks for this case. We may
57 	 * be in an interrupt or a critical region, and should
58 	 * only copy the information from the master page table,
59 	 * nothing more.
60 	 */
61 	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
62 		goto vmalloc_fault;
63 
64 	/* Enable interrupts if they were enabled in the parent context. */
65 	if (likely(regs->sstatus & SR_SPIE))
66 		local_irq_enable();
67 
68 	/*
69 	 * If we're in an interrupt, have no user context, or are running
70 	 * in an atomic region, then we must not take the fault.
71 	 */
72 	if (unlikely(faulthandler_disabled() || !mm))
73 		goto no_context;
74 
75 	if (user_mode(regs))
76 		flags |= FAULT_FLAG_USER;
77 
78 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
79 
80 retry:
81 	down_read(&mm->mmap_sem);
82 	vma = find_vma(mm, addr);
83 	if (unlikely(!vma))
84 		goto bad_area;
85 	if (likely(vma->vm_start <= addr))
86 		goto good_area;
87 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
88 		goto bad_area;
89 	if (unlikely(expand_stack(vma, addr)))
90 		goto bad_area;
91 
92 	/*
93 	 * Ok, we have a good vm_area for this memory access, so
94 	 * we can handle it.
95 	 */
96 good_area:
97 	code = SEGV_ACCERR;
98 
99 	switch (cause) {
100 	case EXC_INST_PAGE_FAULT:
101 		if (!(vma->vm_flags & VM_EXEC))
102 			goto bad_area;
103 		break;
104 	case EXC_LOAD_PAGE_FAULT:
105 		if (!(vma->vm_flags & VM_READ))
106 			goto bad_area;
107 		break;
108 	case EXC_STORE_PAGE_FAULT:
109 		if (!(vma->vm_flags & VM_WRITE))
110 			goto bad_area;
111 		flags |= FAULT_FLAG_WRITE;
112 		break;
113 	default:
114 		panic("%s: unhandled cause %lu", __func__, cause);
115 	}
116 
117 	/*
118 	 * If for any reason at all we could not handle the fault,
119 	 * make sure we exit gracefully rather than endlessly redo
120 	 * the fault.
121 	 */
122 	fault = handle_mm_fault(vma, addr, flags);
123 
124 	/*
125 	 * If we need to retry but a fatal signal is pending, handle the
126 	 * signal first. We do not need to release the mmap_sem because it
127 	 * would already be released in __lock_page_or_retry in mm/filemap.c.
128 	 */
129 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
130 		return;
131 
132 	if (unlikely(fault & VM_FAULT_ERROR)) {
133 		if (fault & VM_FAULT_OOM)
134 			goto out_of_memory;
135 		else if (fault & VM_FAULT_SIGBUS)
136 			goto do_sigbus;
137 		BUG();
138 	}
139 
140 	/*
141 	 * Major/minor page fault accounting is only done on the
142 	 * initial attempt. If we go through a retry, it is extremely
143 	 * likely that the page will be found in page cache at that point.
144 	 */
145 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
146 		if (fault & VM_FAULT_MAJOR) {
147 			tsk->maj_flt++;
148 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
149 				      1, regs, addr);
150 		} else {
151 			tsk->min_flt++;
152 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
153 				      1, regs, addr);
154 		}
155 		if (fault & VM_FAULT_RETRY) {
156 			/*
157 			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
158 			 * of starvation.
159 			 */
160 			flags &= ~(FAULT_FLAG_ALLOW_RETRY);
161 			flags |= FAULT_FLAG_TRIED;
162 
163 			/*
164 			 * No need to up_read(&mm->mmap_sem) as we would
165 			 * have already released it in __lock_page_or_retry
166 			 * in mm/filemap.c.
167 			 */
168 			goto retry;
169 		}
170 	}
171 
172 	up_read(&mm->mmap_sem);
173 	return;
174 
175 	/*
176 	 * Something tried to access memory that isn't in our memory map.
177 	 * Fix it, but check if it's kernel or user first.
178 	 */
179 bad_area:
180 	up_read(&mm->mmap_sem);
181 	/* User mode accesses just cause a SIGSEGV */
182 	if (user_mode(regs)) {
183 		do_trap(regs, SIGSEGV, code, addr, tsk);
184 		return;
185 	}
186 
187 no_context:
188 	/* Are we prepared to handle this kernel fault? */
189 	if (fixup_exception(regs))
190 		return;
191 
192 	/*
193 	 * Oops. The kernel tried to access some bad page. We'll have to
194 	 * terminate things with extreme prejudice.
195 	 */
196 	bust_spinlocks(1);
197 	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
198 		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
199 		"paging request", addr);
200 	die(regs, "Oops");
201 	do_exit(SIGKILL);
202 
203 	/*
204 	 * We ran out of memory, call the OOM killer, and return the userspace
205 	 * (which will retry the fault, or kill us if we got oom-killed).
206 	 */
207 out_of_memory:
208 	up_read(&mm->mmap_sem);
209 	if (!user_mode(regs))
210 		goto no_context;
211 	pagefault_out_of_memory();
212 	return;
213 
214 do_sigbus:
215 	up_read(&mm->mmap_sem);
216 	/* Kernel mode? Handle exceptions or die */
217 	if (!user_mode(regs))
218 		goto no_context;
219 	do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk);
220 	return;
221 
222 vmalloc_fault:
223 	{
224 		pgd_t *pgd, *pgd_k;
225 		pud_t *pud, *pud_k;
226 		p4d_t *p4d, *p4d_k;
227 		pmd_t *pmd, *pmd_k;
228 		pte_t *pte_k;
229 		int index;
230 
231 		if (user_mode(regs))
232 			goto bad_area;
233 
234 		/*
235 		 * Synchronize this task's top level page-table
236 		 * with the 'reference' page table.
237 		 *
238 		 * Do _not_ use "tsk->active_mm->pgd" here.
239 		 * We might be inside an interrupt in the middle
240 		 * of a task switch.
241 		 *
242 		 * Note: Use the old spbtr name instead of using the current
243 		 * satp name to support binutils 2.29 which doesn't know about
244 		 * the privileged ISA 1.10 yet.
245 		 */
246 		index = pgd_index(addr);
247 		pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index;
248 		pgd_k = init_mm.pgd + index;
249 
250 		if (!pgd_present(*pgd_k))
251 			goto no_context;
252 		set_pgd(pgd, *pgd_k);
253 
254 		p4d = p4d_offset(pgd, addr);
255 		p4d_k = p4d_offset(pgd_k, addr);
256 		if (!p4d_present(*p4d_k))
257 			goto no_context;
258 
259 		pud = pud_offset(p4d, addr);
260 		pud_k = pud_offset(p4d_k, addr);
261 		if (!pud_present(*pud_k))
262 			goto no_context;
263 
264 		/*
265 		 * Since the vmalloc area is global, it is unnecessary
266 		 * to copy individual PTEs
267 		 */
268 		pmd = pmd_offset(pud, addr);
269 		pmd_k = pmd_offset(pud_k, addr);
270 		if (!pmd_present(*pmd_k))
271 			goto no_context;
272 		set_pmd(pmd, *pmd_k);
273 
274 		/*
275 		 * Make sure the actual PTE exists as well to
276 		 * catch kernel vmalloc-area accesses to non-mapped
277 		 * addresses. If we don't do this, this will just
278 		 * silently loop forever.
279 		 */
280 		pte_k = pte_offset_kernel(pmd_k, addr);
281 		if (!pte_present(*pte_k))
282 			goto no_context;
283 		return;
284 	}
285 }
286