xref: /openbmc/linux/arch/um/kernel/tlb.c (revision c4c3c32d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
4  */
5 
6 #include <linux/mm.h>
7 #include <linux/module.h>
8 #include <linux/sched/signal.h>
9 
10 #include <asm/tlbflush.h>
11 #include <as-layout.h>
12 #include <mem_user.h>
13 #include <os.h>
14 #include <skas.h>
15 #include <kern_util.h>
16 
17 struct host_vm_change {
18 	struct host_vm_op {
19 		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
20 		union {
21 			struct {
22 				unsigned long addr;
23 				unsigned long len;
24 				unsigned int prot;
25 				int fd;
26 				__u64 offset;
27 			} mmap;
28 			struct {
29 				unsigned long addr;
30 				unsigned long len;
31 			} munmap;
32 			struct {
33 				unsigned long addr;
34 				unsigned long len;
35 				unsigned int prot;
36 			} mprotect;
37 		} u;
38 	} ops[1];
39 	int userspace;
40 	int index;
41 	struct mm_struct *mm;
42 	void *data;
43 	int force;
44 };
45 
46 #define INIT_HVC(mm, force, userspace) \
47 	((struct host_vm_change) \
48 	 { .ops		= { { .type = NONE } },	\
49 	   .mm		= mm, \
50        	   .data	= NULL, \
51 	   .userspace	= userspace, \
52 	   .index	= 0, \
53 	   .force	= force })
54 
55 static void report_enomem(void)
56 {
57 	printk(KERN_ERR "UML ran out of memory on the host side! "
58 			"This can happen due to a memory limitation or "
59 			"vm.max_map_count has been reached.\n");
60 }
61 
62 static int do_ops(struct host_vm_change *hvc, int end,
63 		  int finished)
64 {
65 	struct host_vm_op *op;
66 	int i, ret = 0;
67 
68 	for (i = 0; i < end && !ret; i++) {
69 		op = &hvc->ops[i];
70 		switch (op->type) {
71 		case MMAP:
72 			if (hvc->userspace)
73 				ret = map(&hvc->mm->context.id, op->u.mmap.addr,
74 					  op->u.mmap.len, op->u.mmap.prot,
75 					  op->u.mmap.fd,
76 					  op->u.mmap.offset, finished,
77 					  &hvc->data);
78 			else
79 				map_memory(op->u.mmap.addr, op->u.mmap.offset,
80 					   op->u.mmap.len, 1, 1, 1);
81 			break;
82 		case MUNMAP:
83 			if (hvc->userspace)
84 				ret = unmap(&hvc->mm->context.id,
85 					    op->u.munmap.addr,
86 					    op->u.munmap.len, finished,
87 					    &hvc->data);
88 			else
89 				ret = os_unmap_memory(
90 					(void *) op->u.munmap.addr,
91 						      op->u.munmap.len);
92 
93 			break;
94 		case MPROTECT:
95 			if (hvc->userspace)
96 				ret = protect(&hvc->mm->context.id,
97 					      op->u.mprotect.addr,
98 					      op->u.mprotect.len,
99 					      op->u.mprotect.prot,
100 					      finished, &hvc->data);
101 			else
102 				ret = os_protect_memory(
103 					(void *) op->u.mprotect.addr,
104 							op->u.mprotect.len,
105 							1, 1, 1);
106 			break;
107 		default:
108 			printk(KERN_ERR "Unknown op type %d in do_ops\n",
109 			       op->type);
110 			BUG();
111 			break;
112 		}
113 	}
114 
115 	if (ret == -ENOMEM)
116 		report_enomem();
117 
118 	return ret;
119 }
120 
121 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
122 		    unsigned int prot, struct host_vm_change *hvc)
123 {
124 	__u64 offset;
125 	struct host_vm_op *last;
126 	int fd = -1, ret = 0;
127 
128 	if (hvc->userspace)
129 		fd = phys_mapping(phys, &offset);
130 	else
131 		offset = phys;
132 	if (hvc->index != 0) {
133 		last = &hvc->ops[hvc->index - 1];
134 		if ((last->type == MMAP) &&
135 		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
136 		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
137 		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
138 			last->u.mmap.len += len;
139 			return 0;
140 		}
141 	}
142 
143 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
144 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
145 		hvc->index = 0;
146 	}
147 
148 	hvc->ops[hvc->index++] = ((struct host_vm_op)
149 				  { .type	= MMAP,
150 				    .u = { .mmap = { .addr	= virt,
151 						     .len	= len,
152 						     .prot	= prot,
153 						     .fd	= fd,
154 						     .offset	= offset }
155 			   } });
156 	return ret;
157 }
158 
159 static int add_munmap(unsigned long addr, unsigned long len,
160 		      struct host_vm_change *hvc)
161 {
162 	struct host_vm_op *last;
163 	int ret = 0;
164 
165 	if (hvc->index != 0) {
166 		last = &hvc->ops[hvc->index - 1];
167 		if ((last->type == MUNMAP) &&
168 		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
169 			last->u.munmap.len += len;
170 			return 0;
171 		}
172 	}
173 
174 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
175 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
176 		hvc->index = 0;
177 	}
178 
179 	hvc->ops[hvc->index++] = ((struct host_vm_op)
180 				  { .type	= MUNMAP,
181 			     	    .u = { .munmap = { .addr	= addr,
182 						       .len	= len } } });
183 	return ret;
184 }
185 
186 static int add_mprotect(unsigned long addr, unsigned long len,
187 			unsigned int prot, struct host_vm_change *hvc)
188 {
189 	struct host_vm_op *last;
190 	int ret = 0;
191 
192 	if (hvc->index != 0) {
193 		last = &hvc->ops[hvc->index - 1];
194 		if ((last->type == MPROTECT) &&
195 		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
196 		   (last->u.mprotect.prot == prot)) {
197 			last->u.mprotect.len += len;
198 			return 0;
199 		}
200 	}
201 
202 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
203 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
204 		hvc->index = 0;
205 	}
206 
207 	hvc->ops[hvc->index++] = ((struct host_vm_op)
208 				  { .type	= MPROTECT,
209 			     	    .u = { .mprotect = { .addr	= addr,
210 							 .len	= len,
211 							 .prot	= prot } } });
212 	return ret;
213 }
214 
215 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
216 
217 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
218 				   unsigned long end,
219 				   struct host_vm_change *hvc)
220 {
221 	pte_t *pte;
222 	int r, w, x, prot, ret = 0;
223 
224 	pte = pte_offset_kernel(pmd, addr);
225 	do {
226 		r = pte_read(*pte);
227 		w = pte_write(*pte);
228 		x = pte_exec(*pte);
229 		if (!pte_young(*pte)) {
230 			r = 0;
231 			w = 0;
232 		} else if (!pte_dirty(*pte))
233 			w = 0;
234 
235 		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
236 			(x ? UM_PROT_EXEC : 0));
237 		if (hvc->force || pte_newpage(*pte)) {
238 			if (pte_present(*pte)) {
239 				if (pte_newpage(*pte))
240 					ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
241 						       PAGE_SIZE, prot, hvc);
242 			} else
243 				ret = add_munmap(addr, PAGE_SIZE, hvc);
244 		} else if (pte_newprot(*pte))
245 			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
246 		*pte = pte_mkuptodate(*pte);
247 	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
248 	return ret;
249 }
250 
251 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
252 				   unsigned long end,
253 				   struct host_vm_change *hvc)
254 {
255 	pmd_t *pmd;
256 	unsigned long next;
257 	int ret = 0;
258 
259 	pmd = pmd_offset(pud, addr);
260 	do {
261 		next = pmd_addr_end(addr, end);
262 		if (!pmd_present(*pmd)) {
263 			if (hvc->force || pmd_newpage(*pmd)) {
264 				ret = add_munmap(addr, next - addr, hvc);
265 				pmd_mkuptodate(*pmd);
266 			}
267 		}
268 		else ret = update_pte_range(pmd, addr, next, hvc);
269 	} while (pmd++, addr = next, ((addr < end) && !ret));
270 	return ret;
271 }
272 
273 static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
274 				   unsigned long end,
275 				   struct host_vm_change *hvc)
276 {
277 	pud_t *pud;
278 	unsigned long next;
279 	int ret = 0;
280 
281 	pud = pud_offset(p4d, addr);
282 	do {
283 		next = pud_addr_end(addr, end);
284 		if (!pud_present(*pud)) {
285 			if (hvc->force || pud_newpage(*pud)) {
286 				ret = add_munmap(addr, next - addr, hvc);
287 				pud_mkuptodate(*pud);
288 			}
289 		}
290 		else ret = update_pmd_range(pud, addr, next, hvc);
291 	} while (pud++, addr = next, ((addr < end) && !ret));
292 	return ret;
293 }
294 
295 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
296 				   unsigned long end,
297 				   struct host_vm_change *hvc)
298 {
299 	p4d_t *p4d;
300 	unsigned long next;
301 	int ret = 0;
302 
303 	p4d = p4d_offset(pgd, addr);
304 	do {
305 		next = p4d_addr_end(addr, end);
306 		if (!p4d_present(*p4d)) {
307 			if (hvc->force || p4d_newpage(*p4d)) {
308 				ret = add_munmap(addr, next - addr, hvc);
309 				p4d_mkuptodate(*p4d);
310 			}
311 		} else
312 			ret = update_pud_range(p4d, addr, next, hvc);
313 	} while (p4d++, addr = next, ((addr < end) && !ret));
314 	return ret;
315 }
316 
317 static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
318 			     unsigned long end_addr, int force)
319 {
320 	pgd_t *pgd;
321 	struct host_vm_change hvc;
322 	unsigned long addr = start_addr, next;
323 	int ret = 0, userspace = 1;
324 
325 	hvc = INIT_HVC(mm, force, userspace);
326 	pgd = pgd_offset(mm, addr);
327 	do {
328 		next = pgd_addr_end(addr, end_addr);
329 		if (!pgd_present(*pgd)) {
330 			if (force || pgd_newpage(*pgd)) {
331 				ret = add_munmap(addr, next - addr, &hvc);
332 				pgd_mkuptodate(*pgd);
333 			}
334 		} else
335 			ret = update_p4d_range(pgd, addr, next, &hvc);
336 	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
337 
338 	if (!ret)
339 		ret = do_ops(&hvc, hvc.index, 1);
340 
341 	/* This is not an else because ret is modified above */
342 	if (ret) {
343 		struct mm_id *mm_idp = &current->mm->context.id;
344 
345 		printk(KERN_ERR "fix_range_common: failed, killing current "
346 		       "process: %d\n", task_tgid_vnr(current));
347 		mm_idp->kill = 1;
348 	}
349 }
350 
351 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
352 {
353 	struct mm_struct *mm;
354 	pgd_t *pgd;
355 	p4d_t *p4d;
356 	pud_t *pud;
357 	pmd_t *pmd;
358 	pte_t *pte;
359 	unsigned long addr, last;
360 	int updated = 0, err = 0, force = 0, userspace = 0;
361 	struct host_vm_change hvc;
362 
363 	mm = &init_mm;
364 	hvc = INIT_HVC(mm, force, userspace);
365 	for (addr = start; addr < end;) {
366 		pgd = pgd_offset(mm, addr);
367 		if (!pgd_present(*pgd)) {
368 			last = ADD_ROUND(addr, PGDIR_SIZE);
369 			if (last > end)
370 				last = end;
371 			if (pgd_newpage(*pgd)) {
372 				updated = 1;
373 				err = add_munmap(addr, last - addr, &hvc);
374 				if (err < 0)
375 					panic("munmap failed, errno = %d\n",
376 					      -err);
377 			}
378 			addr = last;
379 			continue;
380 		}
381 
382 		p4d = p4d_offset(pgd, addr);
383 		if (!p4d_present(*p4d)) {
384 			last = ADD_ROUND(addr, P4D_SIZE);
385 			if (last > end)
386 				last = end;
387 			if (p4d_newpage(*p4d)) {
388 				updated = 1;
389 				err = add_munmap(addr, last - addr, &hvc);
390 				if (err < 0)
391 					panic("munmap failed, errno = %d\n",
392 					      -err);
393 			}
394 			addr = last;
395 			continue;
396 		}
397 
398 		pud = pud_offset(p4d, addr);
399 		if (!pud_present(*pud)) {
400 			last = ADD_ROUND(addr, PUD_SIZE);
401 			if (last > end)
402 				last = end;
403 			if (pud_newpage(*pud)) {
404 				updated = 1;
405 				err = add_munmap(addr, last - addr, &hvc);
406 				if (err < 0)
407 					panic("munmap failed, errno = %d\n",
408 					      -err);
409 			}
410 			addr = last;
411 			continue;
412 		}
413 
414 		pmd = pmd_offset(pud, addr);
415 		if (!pmd_present(*pmd)) {
416 			last = ADD_ROUND(addr, PMD_SIZE);
417 			if (last > end)
418 				last = end;
419 			if (pmd_newpage(*pmd)) {
420 				updated = 1;
421 				err = add_munmap(addr, last - addr, &hvc);
422 				if (err < 0)
423 					panic("munmap failed, errno = %d\n",
424 					      -err);
425 			}
426 			addr = last;
427 			continue;
428 		}
429 
430 		pte = pte_offset_kernel(pmd, addr);
431 		if (!pte_present(*pte) || pte_newpage(*pte)) {
432 			updated = 1;
433 			err = add_munmap(addr, PAGE_SIZE, &hvc);
434 			if (err < 0)
435 				panic("munmap failed, errno = %d\n",
436 				      -err);
437 			if (pte_present(*pte))
438 				err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
439 					       PAGE_SIZE, 0, &hvc);
440 		}
441 		else if (pte_newprot(*pte)) {
442 			updated = 1;
443 			err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
444 		}
445 		addr += PAGE_SIZE;
446 	}
447 	if (!err)
448 		err = do_ops(&hvc, hvc.index, 1);
449 
450 	if (err < 0)
451 		panic("flush_tlb_kernel failed, errno = %d\n", err);
452 	return updated;
453 }
454 
455 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
456 {
457 	pgd_t *pgd;
458 	p4d_t *p4d;
459 	pud_t *pud;
460 	pmd_t *pmd;
461 	pte_t *pte;
462 	struct mm_struct *mm = vma->vm_mm;
463 	void *flush = NULL;
464 	int r, w, x, prot, err = 0;
465 	struct mm_id *mm_id;
466 
467 	address &= PAGE_MASK;
468 
469 	pgd = pgd_offset(mm, address);
470 	if (!pgd_present(*pgd))
471 		goto kill;
472 
473 	p4d = p4d_offset(pgd, address);
474 	if (!p4d_present(*p4d))
475 		goto kill;
476 
477 	pud = pud_offset(p4d, address);
478 	if (!pud_present(*pud))
479 		goto kill;
480 
481 	pmd = pmd_offset(pud, address);
482 	if (!pmd_present(*pmd))
483 		goto kill;
484 
485 	pte = pte_offset_kernel(pmd, address);
486 
487 	r = pte_read(*pte);
488 	w = pte_write(*pte);
489 	x = pte_exec(*pte);
490 	if (!pte_young(*pte)) {
491 		r = 0;
492 		w = 0;
493 	} else if (!pte_dirty(*pte)) {
494 		w = 0;
495 	}
496 
497 	mm_id = &mm->context.id;
498 	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
499 		(x ? UM_PROT_EXEC : 0));
500 	if (pte_newpage(*pte)) {
501 		if (pte_present(*pte)) {
502 			unsigned long long offset;
503 			int fd;
504 
505 			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
506 			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
507 				  1, &flush);
508 		}
509 		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
510 	}
511 	else if (pte_newprot(*pte))
512 		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
513 
514 	if (err) {
515 		if (err == -ENOMEM)
516 			report_enomem();
517 
518 		goto kill;
519 	}
520 
521 	*pte = pte_mkuptodate(*pte);
522 
523 	return;
524 
525 kill:
526 	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
527 	force_sig(SIGKILL);
528 }
529 
530 void flush_tlb_all(void)
531 {
532 	/*
533 	 * Don't bother flushing if this address space is about to be
534 	 * destroyed.
535 	 */
536 	if (atomic_read(&current->mm->mm_users) == 0)
537 		return;
538 
539 	flush_tlb_mm(current->mm);
540 }
541 
542 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
543 {
544 	flush_tlb_kernel_range_common(start, end);
545 }
546 
547 void flush_tlb_kernel_vm(void)
548 {
549 	flush_tlb_kernel_range_common(start_vm, end_vm);
550 }
551 
552 void __flush_tlb_one(unsigned long addr)
553 {
554 	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
555 }
556 
557 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
558 		      unsigned long end_addr, int force)
559 {
560 	/*
561 	 * Don't bother flushing if this address space is about to be
562 	 * destroyed.
563 	 */
564 	if (atomic_read(&mm->mm_users) == 0)
565 		return;
566 
567 	fix_range_common(mm, start_addr, end_addr, force);
568 }
569 
570 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
571 		     unsigned long end)
572 {
573 	if (vma->vm_mm == NULL)
574 		flush_tlb_kernel_range_common(start, end);
575 	else fix_range(vma->vm_mm, start, end, 0);
576 }
577 EXPORT_SYMBOL(flush_tlb_range);
578 
579 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
580 			unsigned long end)
581 {
582 	fix_range(mm, start, end, 0);
583 }
584 
585 void flush_tlb_mm(struct mm_struct *mm)
586 {
587 	struct vm_area_struct *vma;
588 	VMA_ITERATOR(vmi, mm, 0);
589 
590 	for_each_vma(vmi, vma)
591 		fix_range(mm, vma->vm_start, vma->vm_end, 0);
592 }
593 
594 void force_flush_all(void)
595 {
596 	struct mm_struct *mm = current->mm;
597 	struct vm_area_struct *vma;
598 	VMA_ITERATOR(vmi, mm, 0);
599 
600 	mmap_read_lock(mm);
601 	for_each_vma(vmi, vma)
602 		fix_range(mm, vma->vm_start, vma->vm_end, 1);
603 	mmap_read_unlock(mm);
604 }
605