xref: /openbmc/linux/arch/um/kernel/tlb.c (revision b9dd2add)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
4  */
5 
6 #include <linux/mm.h>
7 #include <linux/module.h>
8 #include <linux/sched/signal.h>
9 
10 #include <asm/tlbflush.h>
11 #include <as-layout.h>
12 #include <mem_user.h>
13 #include <os.h>
14 #include <skas.h>
15 #include <kern_util.h>
16 
17 struct host_vm_change {
18 	struct host_vm_op {
19 		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
20 		union {
21 			struct {
22 				unsigned long addr;
23 				unsigned long len;
24 				unsigned int prot;
25 				int fd;
26 				__u64 offset;
27 			} mmap;
28 			struct {
29 				unsigned long addr;
30 				unsigned long len;
31 			} munmap;
32 			struct {
33 				unsigned long addr;
34 				unsigned long len;
35 				unsigned int prot;
36 			} mprotect;
37 		} u;
38 	} ops[1];
39 	int userspace;
40 	int index;
41 	struct mm_struct *mm;
42 	void *data;
43 	int force;
44 };
45 
46 #define INIT_HVC(mm, force, userspace) \
47 	((struct host_vm_change) \
48 	 { .ops		= { { .type = NONE } },	\
49 	   .mm		= mm, \
50        	   .data	= NULL, \
51 	   .userspace	= userspace, \
52 	   .index	= 0, \
53 	   .force	= force })
54 
55 static void report_enomem(void)
56 {
57 	printk(KERN_ERR "UML ran out of memory on the host side! "
58 			"This can happen due to a memory limitation or "
59 			"vm.max_map_count has been reached.\n");
60 }
61 
62 static int do_ops(struct host_vm_change *hvc, int end,
63 		  int finished)
64 {
65 	struct host_vm_op *op;
66 	int i, ret = 0;
67 
68 	for (i = 0; i < end && !ret; i++) {
69 		op = &hvc->ops[i];
70 		switch (op->type) {
71 		case MMAP:
72 			if (hvc->userspace)
73 				ret = map(&hvc->mm->context.id, op->u.mmap.addr,
74 					  op->u.mmap.len, op->u.mmap.prot,
75 					  op->u.mmap.fd,
76 					  op->u.mmap.offset, finished,
77 					  &hvc->data);
78 			else
79 				map_memory(op->u.mmap.addr, op->u.mmap.offset,
80 					   op->u.mmap.len, 1, 1, 1);
81 			break;
82 		case MUNMAP:
83 			if (hvc->userspace)
84 				ret = unmap(&hvc->mm->context.id,
85 					    op->u.munmap.addr,
86 					    op->u.munmap.len, finished,
87 					    &hvc->data);
88 			else
89 				ret = os_unmap_memory(
90 					(void *) op->u.munmap.addr,
91 						      op->u.munmap.len);
92 
93 			break;
94 		case MPROTECT:
95 			if (hvc->userspace)
96 				ret = protect(&hvc->mm->context.id,
97 					      op->u.mprotect.addr,
98 					      op->u.mprotect.len,
99 					      op->u.mprotect.prot,
100 					      finished, &hvc->data);
101 			else
102 				ret = os_protect_memory(
103 					(void *) op->u.mprotect.addr,
104 							op->u.mprotect.len,
105 							1, 1, 1);
106 			break;
107 		default:
108 			printk(KERN_ERR "Unknown op type %d in do_ops\n",
109 			       op->type);
110 			BUG();
111 			break;
112 		}
113 	}
114 
115 	if (ret == -ENOMEM)
116 		report_enomem();
117 
118 	return ret;
119 }
120 
121 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
122 		    unsigned int prot, struct host_vm_change *hvc)
123 {
124 	__u64 offset;
125 	struct host_vm_op *last;
126 	int fd = -1, ret = 0;
127 
128 	if (hvc->userspace)
129 		fd = phys_mapping(phys, &offset);
130 	else
131 		offset = phys;
132 	if (hvc->index != 0) {
133 		last = &hvc->ops[hvc->index - 1];
134 		if ((last->type == MMAP) &&
135 		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
136 		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
137 		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
138 			last->u.mmap.len += len;
139 			return 0;
140 		}
141 	}
142 
143 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
144 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
145 		hvc->index = 0;
146 	}
147 
148 	hvc->ops[hvc->index++] = ((struct host_vm_op)
149 				  { .type	= MMAP,
150 				    .u = { .mmap = { .addr	= virt,
151 						     .len	= len,
152 						     .prot	= prot,
153 						     .fd	= fd,
154 						     .offset	= offset }
155 			   } });
156 	return ret;
157 }
158 
159 static int add_munmap(unsigned long addr, unsigned long len,
160 		      struct host_vm_change *hvc)
161 {
162 	struct host_vm_op *last;
163 	int ret = 0;
164 
165 	if ((addr >= STUB_START) && (addr < STUB_END))
166 		return -EINVAL;
167 
168 	if (hvc->index != 0) {
169 		last = &hvc->ops[hvc->index - 1];
170 		if ((last->type == MUNMAP) &&
171 		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
172 			last->u.munmap.len += len;
173 			return 0;
174 		}
175 	}
176 
177 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
178 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
179 		hvc->index = 0;
180 	}
181 
182 	hvc->ops[hvc->index++] = ((struct host_vm_op)
183 				  { .type	= MUNMAP,
184 			     	    .u = { .munmap = { .addr	= addr,
185 						       .len	= len } } });
186 	return ret;
187 }
188 
189 static int add_mprotect(unsigned long addr, unsigned long len,
190 			unsigned int prot, struct host_vm_change *hvc)
191 {
192 	struct host_vm_op *last;
193 	int ret = 0;
194 
195 	if (hvc->index != 0) {
196 		last = &hvc->ops[hvc->index - 1];
197 		if ((last->type == MPROTECT) &&
198 		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
199 		   (last->u.mprotect.prot == prot)) {
200 			last->u.mprotect.len += len;
201 			return 0;
202 		}
203 	}
204 
205 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
206 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
207 		hvc->index = 0;
208 	}
209 
210 	hvc->ops[hvc->index++] = ((struct host_vm_op)
211 				  { .type	= MPROTECT,
212 			     	    .u = { .mprotect = { .addr	= addr,
213 							 .len	= len,
214 							 .prot	= prot } } });
215 	return ret;
216 }
217 
218 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
219 
220 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
221 				   unsigned long end,
222 				   struct host_vm_change *hvc)
223 {
224 	pte_t *pte;
225 	int r, w, x, prot, ret = 0;
226 
227 	pte = pte_offset_kernel(pmd, addr);
228 	do {
229 		if ((addr >= STUB_START) && (addr < STUB_END))
230 			continue;
231 
232 		r = pte_read(*pte);
233 		w = pte_write(*pte);
234 		x = pte_exec(*pte);
235 		if (!pte_young(*pte)) {
236 			r = 0;
237 			w = 0;
238 		} else if (!pte_dirty(*pte))
239 			w = 0;
240 
241 		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
242 			(x ? UM_PROT_EXEC : 0));
243 		if (hvc->force || pte_newpage(*pte)) {
244 			if (pte_present(*pte)) {
245 				if (pte_newpage(*pte))
246 					ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
247 						       PAGE_SIZE, prot, hvc);
248 			} else
249 				ret = add_munmap(addr, PAGE_SIZE, hvc);
250 		} else if (pte_newprot(*pte))
251 			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
252 		*pte = pte_mkuptodate(*pte);
253 	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
254 	return ret;
255 }
256 
257 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
258 				   unsigned long end,
259 				   struct host_vm_change *hvc)
260 {
261 	pmd_t *pmd;
262 	unsigned long next;
263 	int ret = 0;
264 
265 	pmd = pmd_offset(pud, addr);
266 	do {
267 		next = pmd_addr_end(addr, end);
268 		if (!pmd_present(*pmd)) {
269 			if (hvc->force || pmd_newpage(*pmd)) {
270 				ret = add_munmap(addr, next - addr, hvc);
271 				pmd_mkuptodate(*pmd);
272 			}
273 		}
274 		else ret = update_pte_range(pmd, addr, next, hvc);
275 	} while (pmd++, addr = next, ((addr < end) && !ret));
276 	return ret;
277 }
278 
279 static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
280 				   unsigned long end,
281 				   struct host_vm_change *hvc)
282 {
283 	pud_t *pud;
284 	unsigned long next;
285 	int ret = 0;
286 
287 	pud = pud_offset(p4d, addr);
288 	do {
289 		next = pud_addr_end(addr, end);
290 		if (!pud_present(*pud)) {
291 			if (hvc->force || pud_newpage(*pud)) {
292 				ret = add_munmap(addr, next - addr, hvc);
293 				pud_mkuptodate(*pud);
294 			}
295 		}
296 		else ret = update_pmd_range(pud, addr, next, hvc);
297 	} while (pud++, addr = next, ((addr < end) && !ret));
298 	return ret;
299 }
300 
301 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
302 				   unsigned long end,
303 				   struct host_vm_change *hvc)
304 {
305 	p4d_t *p4d;
306 	unsigned long next;
307 	int ret = 0;
308 
309 	p4d = p4d_offset(pgd, addr);
310 	do {
311 		next = p4d_addr_end(addr, end);
312 		if (!p4d_present(*p4d)) {
313 			if (hvc->force || p4d_newpage(*p4d)) {
314 				ret = add_munmap(addr, next - addr, hvc);
315 				p4d_mkuptodate(*p4d);
316 			}
317 		} else
318 			ret = update_pud_range(p4d, addr, next, hvc);
319 	} while (p4d++, addr = next, ((addr < end) && !ret));
320 	return ret;
321 }
322 
323 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
324 		      unsigned long end_addr, int force)
325 {
326 	pgd_t *pgd;
327 	struct host_vm_change hvc;
328 	unsigned long addr = start_addr, next;
329 	int ret = 0, userspace = 1;
330 
331 	hvc = INIT_HVC(mm, force, userspace);
332 	pgd = pgd_offset(mm, addr);
333 	do {
334 		next = pgd_addr_end(addr, end_addr);
335 		if (!pgd_present(*pgd)) {
336 			if (force || pgd_newpage(*pgd)) {
337 				ret = add_munmap(addr, next - addr, &hvc);
338 				pgd_mkuptodate(*pgd);
339 			}
340 		} else
341 			ret = update_p4d_range(pgd, addr, next, &hvc);
342 	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
343 
344 	if (!ret)
345 		ret = do_ops(&hvc, hvc.index, 1);
346 
347 	/* This is not an else because ret is modified above */
348 	if (ret) {
349 		printk(KERN_ERR "fix_range_common: failed, killing current "
350 		       "process: %d\n", task_tgid_vnr(current));
351 		/* We are under mmap_lock, release it such that current can terminate */
352 		mmap_write_unlock(current->mm);
353 		force_sig(SIGKILL);
354 		do_signal(&current->thread.regs);
355 	}
356 }
357 
358 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
359 {
360 	struct mm_struct *mm;
361 	pgd_t *pgd;
362 	p4d_t *p4d;
363 	pud_t *pud;
364 	pmd_t *pmd;
365 	pte_t *pte;
366 	unsigned long addr, last;
367 	int updated = 0, err = 0, force = 0, userspace = 0;
368 	struct host_vm_change hvc;
369 
370 	mm = &init_mm;
371 	hvc = INIT_HVC(mm, force, userspace);
372 	for (addr = start; addr < end;) {
373 		pgd = pgd_offset(mm, addr);
374 		if (!pgd_present(*pgd)) {
375 			last = ADD_ROUND(addr, PGDIR_SIZE);
376 			if (last > end)
377 				last = end;
378 			if (pgd_newpage(*pgd)) {
379 				updated = 1;
380 				err = add_munmap(addr, last - addr, &hvc);
381 				if (err < 0)
382 					panic("munmap failed, errno = %d\n",
383 					      -err);
384 			}
385 			addr = last;
386 			continue;
387 		}
388 
389 		p4d = p4d_offset(pgd, addr);
390 		if (!p4d_present(*p4d)) {
391 			last = ADD_ROUND(addr, P4D_SIZE);
392 			if (last > end)
393 				last = end;
394 			if (p4d_newpage(*p4d)) {
395 				updated = 1;
396 				err = add_munmap(addr, last - addr, &hvc);
397 				if (err < 0)
398 					panic("munmap failed, errno = %d\n",
399 					      -err);
400 			}
401 			addr = last;
402 			continue;
403 		}
404 
405 		pud = pud_offset(p4d, addr);
406 		if (!pud_present(*pud)) {
407 			last = ADD_ROUND(addr, PUD_SIZE);
408 			if (last > end)
409 				last = end;
410 			if (pud_newpage(*pud)) {
411 				updated = 1;
412 				err = add_munmap(addr, last - addr, &hvc);
413 				if (err < 0)
414 					panic("munmap failed, errno = %d\n",
415 					      -err);
416 			}
417 			addr = last;
418 			continue;
419 		}
420 
421 		pmd = pmd_offset(pud, addr);
422 		if (!pmd_present(*pmd)) {
423 			last = ADD_ROUND(addr, PMD_SIZE);
424 			if (last > end)
425 				last = end;
426 			if (pmd_newpage(*pmd)) {
427 				updated = 1;
428 				err = add_munmap(addr, last - addr, &hvc);
429 				if (err < 0)
430 					panic("munmap failed, errno = %d\n",
431 					      -err);
432 			}
433 			addr = last;
434 			continue;
435 		}
436 
437 		pte = pte_offset_kernel(pmd, addr);
438 		if (!pte_present(*pte) || pte_newpage(*pte)) {
439 			updated = 1;
440 			err = add_munmap(addr, PAGE_SIZE, &hvc);
441 			if (err < 0)
442 				panic("munmap failed, errno = %d\n",
443 				      -err);
444 			if (pte_present(*pte))
445 				err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
446 					       PAGE_SIZE, 0, &hvc);
447 		}
448 		else if (pte_newprot(*pte)) {
449 			updated = 1;
450 			err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
451 		}
452 		addr += PAGE_SIZE;
453 	}
454 	if (!err)
455 		err = do_ops(&hvc, hvc.index, 1);
456 
457 	if (err < 0)
458 		panic("flush_tlb_kernel failed, errno = %d\n", err);
459 	return updated;
460 }
461 
462 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
463 {
464 	pgd_t *pgd;
465 	p4d_t *p4d;
466 	pud_t *pud;
467 	pmd_t *pmd;
468 	pte_t *pte;
469 	struct mm_struct *mm = vma->vm_mm;
470 	void *flush = NULL;
471 	int r, w, x, prot, err = 0;
472 	struct mm_id *mm_id;
473 
474 	address &= PAGE_MASK;
475 	pgd = pgd_offset(mm, address);
476 	if (!pgd_present(*pgd))
477 		goto kill;
478 
479 	p4d = p4d_offset(pgd, address);
480 	if (!p4d_present(*p4d))
481 		goto kill;
482 
483 	pud = pud_offset(p4d, address);
484 	if (!pud_present(*pud))
485 		goto kill;
486 
487 	pmd = pmd_offset(pud, address);
488 	if (!pmd_present(*pmd))
489 		goto kill;
490 
491 	pte = pte_offset_kernel(pmd, address);
492 
493 	r = pte_read(*pte);
494 	w = pte_write(*pte);
495 	x = pte_exec(*pte);
496 	if (!pte_young(*pte)) {
497 		r = 0;
498 		w = 0;
499 	} else if (!pte_dirty(*pte)) {
500 		w = 0;
501 	}
502 
503 	mm_id = &mm->context.id;
504 	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
505 		(x ? UM_PROT_EXEC : 0));
506 	if (pte_newpage(*pte)) {
507 		if (pte_present(*pte)) {
508 			unsigned long long offset;
509 			int fd;
510 
511 			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
512 			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
513 				  1, &flush);
514 		}
515 		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
516 	}
517 	else if (pte_newprot(*pte))
518 		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
519 
520 	if (err) {
521 		if (err == -ENOMEM)
522 			report_enomem();
523 
524 		goto kill;
525 	}
526 
527 	*pte = pte_mkuptodate(*pte);
528 
529 	return;
530 
531 kill:
532 	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
533 	force_sig(SIGKILL);
534 }
535 
536 void flush_tlb_all(void)
537 {
538 	/*
539 	 * Don't bother flushing if this address space is about to be
540 	 * destroyed.
541 	 */
542 	if (atomic_read(&current->mm->mm_users) == 0)
543 		return;
544 
545 	flush_tlb_mm(current->mm);
546 }
547 
548 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
549 {
550 	flush_tlb_kernel_range_common(start, end);
551 }
552 
553 void flush_tlb_kernel_vm(void)
554 {
555 	flush_tlb_kernel_range_common(start_vm, end_vm);
556 }
557 
558 void __flush_tlb_one(unsigned long addr)
559 {
560 	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
561 }
562 
563 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
564 		      unsigned long end_addr, int force)
565 {
566 	/*
567 	 * Don't bother flushing if this address space is about to be
568 	 * destroyed.
569 	 */
570 	if (atomic_read(&mm->mm_users) == 0)
571 		return;
572 
573 	fix_range_common(mm, start_addr, end_addr, force);
574 }
575 
576 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
577 		     unsigned long end)
578 {
579 	if (vma->vm_mm == NULL)
580 		flush_tlb_kernel_range_common(start, end);
581 	else fix_range(vma->vm_mm, start, end, 0);
582 }
583 EXPORT_SYMBOL(flush_tlb_range);
584 
585 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
586 			unsigned long end)
587 {
588 	fix_range(mm, start, end, 0);
589 }
590 
591 void flush_tlb_mm(struct mm_struct *mm)
592 {
593 	struct vm_area_struct *vma = mm->mmap;
594 
595 	while (vma != NULL) {
596 		fix_range(mm, vma->vm_start, vma->vm_end, 0);
597 		vma = vma->vm_next;
598 	}
599 }
600 
601 void force_flush_all(void)
602 {
603 	struct mm_struct *mm = current->mm;
604 	struct vm_area_struct *vma = mm->mmap;
605 
606 	while (vma != NULL) {
607 		fix_range(mm, vma->vm_start, vma->vm_end, 1);
608 		vma = vma->vm_next;
609 	}
610 }
611 
612 struct page_change_data {
613 	unsigned int set_mask, clear_mask;
614 };
615 
616 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
617 {
618 	struct page_change_data *cdata = data;
619 	pte_t pte = READ_ONCE(*ptep);
620 
621 	pte_clear_bits(pte, cdata->clear_mask);
622 	pte_set_bits(pte, cdata->set_mask);
623 
624 	set_pte(ptep, pte);
625 	return 0;
626 }
627 
628 static int change_memory(unsigned long start, unsigned long pages,
629 			 unsigned int set_mask, unsigned int clear_mask)
630 {
631 	unsigned long size = pages * PAGE_SIZE;
632 	struct page_change_data data;
633 	int ret;
634 
635 	data.set_mask = set_mask;
636 	data.clear_mask = clear_mask;
637 
638 	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
639 				  &data);
640 
641 	flush_tlb_kernel_range(start, start + size);
642 
643 	return ret;
644 }
645 
646 int set_memory_ro(unsigned long addr, int numpages)
647 {
648 	return change_memory(addr, numpages, 0, _PAGE_RW);
649 }
650 
651 int set_memory_rw(unsigned long addr, int numpages)
652 {
653 	return change_memory(addr, numpages, _PAGE_RW, 0);
654 }
655 
656 int set_memory_nx(unsigned long addr, int numpages)
657 {
658 	return -EOPNOTSUPP;
659 }
660 
661 int set_memory_x(unsigned long addr, int numpages)
662 {
663 	return -EOPNOTSUPP;
664 }
665