1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/sched/signal.h> 9 10 #include <asm/tlbflush.h> 11 #include <as-layout.h> 12 #include <mem_user.h> 13 #include <os.h> 14 #include <skas.h> 15 #include <kern_util.h> 16 17 struct host_vm_change { 18 struct host_vm_op { 19 enum { NONE, MMAP, MUNMAP, MPROTECT } type; 20 union { 21 struct { 22 unsigned long addr; 23 unsigned long len; 24 unsigned int prot; 25 int fd; 26 __u64 offset; 27 } mmap; 28 struct { 29 unsigned long addr; 30 unsigned long len; 31 } munmap; 32 struct { 33 unsigned long addr; 34 unsigned long len; 35 unsigned int prot; 36 } mprotect; 37 } u; 38 } ops[1]; 39 int userspace; 40 int index; 41 struct mm_struct *mm; 42 void *data; 43 int force; 44 }; 45 46 #define INIT_HVC(mm, force, userspace) \ 47 ((struct host_vm_change) \ 48 { .ops = { { .type = NONE } }, \ 49 .mm = mm, \ 50 .data = NULL, \ 51 .userspace = userspace, \ 52 .index = 0, \ 53 .force = force }) 54 55 static void report_enomem(void) 56 { 57 printk(KERN_ERR "UML ran out of memory on the host side! " 58 "This can happen due to a memory limitation or " 59 "vm.max_map_count has been reached.\n"); 60 } 61 62 static int do_ops(struct host_vm_change *hvc, int end, 63 int finished) 64 { 65 struct host_vm_op *op; 66 int i, ret = 0; 67 68 for (i = 0; i < end && !ret; i++) { 69 op = &hvc->ops[i]; 70 switch (op->type) { 71 case MMAP: 72 if (hvc->userspace) 73 ret = map(&hvc->mm->context.id, op->u.mmap.addr, 74 op->u.mmap.len, op->u.mmap.prot, 75 op->u.mmap.fd, 76 op->u.mmap.offset, finished, 77 &hvc->data); 78 else 79 map_memory(op->u.mmap.addr, op->u.mmap.offset, 80 op->u.mmap.len, 1, 1, 1); 81 break; 82 case MUNMAP: 83 if (hvc->userspace) 84 ret = unmap(&hvc->mm->context.id, 85 op->u.munmap.addr, 86 op->u.munmap.len, finished, 87 &hvc->data); 88 else 89 ret = os_unmap_memory( 90 (void *) op->u.munmap.addr, 91 op->u.munmap.len); 92 93 break; 94 case MPROTECT: 95 if (hvc->userspace) 96 ret = protect(&hvc->mm->context.id, 97 op->u.mprotect.addr, 98 op->u.mprotect.len, 99 op->u.mprotect.prot, 100 finished, &hvc->data); 101 else 102 ret = os_protect_memory( 103 (void *) op->u.mprotect.addr, 104 op->u.mprotect.len, 105 1, 1, 1); 106 break; 107 default: 108 printk(KERN_ERR "Unknown op type %d in do_ops\n", 109 op->type); 110 BUG(); 111 break; 112 } 113 } 114 115 if (ret == -ENOMEM) 116 report_enomem(); 117 118 return ret; 119 } 120 121 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 122 unsigned int prot, struct host_vm_change *hvc) 123 { 124 __u64 offset; 125 struct host_vm_op *last; 126 int fd = -1, ret = 0; 127 128 if (hvc->userspace) 129 fd = phys_mapping(phys, &offset); 130 else 131 offset = phys; 132 if (hvc->index != 0) { 133 last = &hvc->ops[hvc->index - 1]; 134 if ((last->type == MMAP) && 135 (last->u.mmap.addr + last->u.mmap.len == virt) && 136 (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && 137 (last->u.mmap.offset + last->u.mmap.len == offset)) { 138 last->u.mmap.len += len; 139 return 0; 140 } 141 } 142 143 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 144 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 145 hvc->index = 0; 146 } 147 148 hvc->ops[hvc->index++] = ((struct host_vm_op) 149 { .type = MMAP, 150 .u = { .mmap = { .addr = virt, 151 .len = len, 152 .prot = prot, 153 .fd = fd, 154 .offset = offset } 155 } }); 156 return ret; 157 } 158 159 static int add_munmap(unsigned long addr, unsigned long len, 160 struct host_vm_change *hvc) 161 { 162 struct host_vm_op *last; 163 int ret = 0; 164 165 if (hvc->index != 0) { 166 last = &hvc->ops[hvc->index - 1]; 167 if ((last->type == MUNMAP) && 168 (last->u.munmap.addr + last->u.mmap.len == addr)) { 169 last->u.munmap.len += len; 170 return 0; 171 } 172 } 173 174 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 175 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 176 hvc->index = 0; 177 } 178 179 hvc->ops[hvc->index++] = ((struct host_vm_op) 180 { .type = MUNMAP, 181 .u = { .munmap = { .addr = addr, 182 .len = len } } }); 183 return ret; 184 } 185 186 static int add_mprotect(unsigned long addr, unsigned long len, 187 unsigned int prot, struct host_vm_change *hvc) 188 { 189 struct host_vm_op *last; 190 int ret = 0; 191 192 if (hvc->index != 0) { 193 last = &hvc->ops[hvc->index - 1]; 194 if ((last->type == MPROTECT) && 195 (last->u.mprotect.addr + last->u.mprotect.len == addr) && 196 (last->u.mprotect.prot == prot)) { 197 last->u.mprotect.len += len; 198 return 0; 199 } 200 } 201 202 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 203 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 204 hvc->index = 0; 205 } 206 207 hvc->ops[hvc->index++] = ((struct host_vm_op) 208 { .type = MPROTECT, 209 .u = { .mprotect = { .addr = addr, 210 .len = len, 211 .prot = prot } } }); 212 return ret; 213 } 214 215 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 216 217 static inline int update_pte_range(pmd_t *pmd, unsigned long addr, 218 unsigned long end, 219 struct host_vm_change *hvc) 220 { 221 pte_t *pte; 222 int r, w, x, prot, ret = 0; 223 224 pte = pte_offset_kernel(pmd, addr); 225 do { 226 r = pte_read(*pte); 227 w = pte_write(*pte); 228 x = pte_exec(*pte); 229 if (!pte_young(*pte)) { 230 r = 0; 231 w = 0; 232 } else if (!pte_dirty(*pte)) 233 w = 0; 234 235 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 236 (x ? UM_PROT_EXEC : 0)); 237 if (hvc->force || pte_newpage(*pte)) { 238 if (pte_present(*pte)) { 239 if (pte_newpage(*pte)) 240 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 241 PAGE_SIZE, prot, hvc); 242 } else 243 ret = add_munmap(addr, PAGE_SIZE, hvc); 244 } else if (pte_newprot(*pte)) 245 ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); 246 *pte = pte_mkuptodate(*pte); 247 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 248 return ret; 249 } 250 251 static inline int update_pmd_range(pud_t *pud, unsigned long addr, 252 unsigned long end, 253 struct host_vm_change *hvc) 254 { 255 pmd_t *pmd; 256 unsigned long next; 257 int ret = 0; 258 259 pmd = pmd_offset(pud, addr); 260 do { 261 next = pmd_addr_end(addr, end); 262 if (!pmd_present(*pmd)) { 263 if (hvc->force || pmd_newpage(*pmd)) { 264 ret = add_munmap(addr, next - addr, hvc); 265 pmd_mkuptodate(*pmd); 266 } 267 } 268 else ret = update_pte_range(pmd, addr, next, hvc); 269 } while (pmd++, addr = next, ((addr < end) && !ret)); 270 return ret; 271 } 272 273 static inline int update_pud_range(p4d_t *p4d, unsigned long addr, 274 unsigned long end, 275 struct host_vm_change *hvc) 276 { 277 pud_t *pud; 278 unsigned long next; 279 int ret = 0; 280 281 pud = pud_offset(p4d, addr); 282 do { 283 next = pud_addr_end(addr, end); 284 if (!pud_present(*pud)) { 285 if (hvc->force || pud_newpage(*pud)) { 286 ret = add_munmap(addr, next - addr, hvc); 287 pud_mkuptodate(*pud); 288 } 289 } 290 else ret = update_pmd_range(pud, addr, next, hvc); 291 } while (pud++, addr = next, ((addr < end) && !ret)); 292 return ret; 293 } 294 295 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, 296 unsigned long end, 297 struct host_vm_change *hvc) 298 { 299 p4d_t *p4d; 300 unsigned long next; 301 int ret = 0; 302 303 p4d = p4d_offset(pgd, addr); 304 do { 305 next = p4d_addr_end(addr, end); 306 if (!p4d_present(*p4d)) { 307 if (hvc->force || p4d_newpage(*p4d)) { 308 ret = add_munmap(addr, next - addr, hvc); 309 p4d_mkuptodate(*p4d); 310 } 311 } else 312 ret = update_pud_range(p4d, addr, next, hvc); 313 } while (p4d++, addr = next, ((addr < end) && !ret)); 314 return ret; 315 } 316 317 void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 318 unsigned long end_addr, int force) 319 { 320 pgd_t *pgd; 321 struct host_vm_change hvc; 322 unsigned long addr = start_addr, next; 323 int ret = 0, userspace = 1; 324 325 hvc = INIT_HVC(mm, force, userspace); 326 pgd = pgd_offset(mm, addr); 327 do { 328 next = pgd_addr_end(addr, end_addr); 329 if (!pgd_present(*pgd)) { 330 if (force || pgd_newpage(*pgd)) { 331 ret = add_munmap(addr, next - addr, &hvc); 332 pgd_mkuptodate(*pgd); 333 } 334 } else 335 ret = update_p4d_range(pgd, addr, next, &hvc); 336 } while (pgd++, addr = next, ((addr < end_addr) && !ret)); 337 338 if (!ret) 339 ret = do_ops(&hvc, hvc.index, 1); 340 341 /* This is not an else because ret is modified above */ 342 if (ret) { 343 struct mm_id *mm_idp = ¤t->mm->context.id; 344 345 printk(KERN_ERR "fix_range_common: failed, killing current " 346 "process: %d\n", task_tgid_vnr(current)); 347 mm_idp->kill = 1; 348 } 349 } 350 351 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 352 { 353 struct mm_struct *mm; 354 pgd_t *pgd; 355 p4d_t *p4d; 356 pud_t *pud; 357 pmd_t *pmd; 358 pte_t *pte; 359 unsigned long addr, last; 360 int updated = 0, err = 0, force = 0, userspace = 0; 361 struct host_vm_change hvc; 362 363 mm = &init_mm; 364 hvc = INIT_HVC(mm, force, userspace); 365 for (addr = start; addr < end;) { 366 pgd = pgd_offset(mm, addr); 367 if (!pgd_present(*pgd)) { 368 last = ADD_ROUND(addr, PGDIR_SIZE); 369 if (last > end) 370 last = end; 371 if (pgd_newpage(*pgd)) { 372 updated = 1; 373 err = add_munmap(addr, last - addr, &hvc); 374 if (err < 0) 375 panic("munmap failed, errno = %d\n", 376 -err); 377 } 378 addr = last; 379 continue; 380 } 381 382 p4d = p4d_offset(pgd, addr); 383 if (!p4d_present(*p4d)) { 384 last = ADD_ROUND(addr, P4D_SIZE); 385 if (last > end) 386 last = end; 387 if (p4d_newpage(*p4d)) { 388 updated = 1; 389 err = add_munmap(addr, last - addr, &hvc); 390 if (err < 0) 391 panic("munmap failed, errno = %d\n", 392 -err); 393 } 394 addr = last; 395 continue; 396 } 397 398 pud = pud_offset(p4d, addr); 399 if (!pud_present(*pud)) { 400 last = ADD_ROUND(addr, PUD_SIZE); 401 if (last > end) 402 last = end; 403 if (pud_newpage(*pud)) { 404 updated = 1; 405 err = add_munmap(addr, last - addr, &hvc); 406 if (err < 0) 407 panic("munmap failed, errno = %d\n", 408 -err); 409 } 410 addr = last; 411 continue; 412 } 413 414 pmd = pmd_offset(pud, addr); 415 if (!pmd_present(*pmd)) { 416 last = ADD_ROUND(addr, PMD_SIZE); 417 if (last > end) 418 last = end; 419 if (pmd_newpage(*pmd)) { 420 updated = 1; 421 err = add_munmap(addr, last - addr, &hvc); 422 if (err < 0) 423 panic("munmap failed, errno = %d\n", 424 -err); 425 } 426 addr = last; 427 continue; 428 } 429 430 pte = pte_offset_kernel(pmd, addr); 431 if (!pte_present(*pte) || pte_newpage(*pte)) { 432 updated = 1; 433 err = add_munmap(addr, PAGE_SIZE, &hvc); 434 if (err < 0) 435 panic("munmap failed, errno = %d\n", 436 -err); 437 if (pte_present(*pte)) 438 err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 439 PAGE_SIZE, 0, &hvc); 440 } 441 else if (pte_newprot(*pte)) { 442 updated = 1; 443 err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); 444 } 445 addr += PAGE_SIZE; 446 } 447 if (!err) 448 err = do_ops(&hvc, hvc.index, 1); 449 450 if (err < 0) 451 panic("flush_tlb_kernel failed, errno = %d\n", err); 452 return updated; 453 } 454 455 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 456 { 457 pgd_t *pgd; 458 p4d_t *p4d; 459 pud_t *pud; 460 pmd_t *pmd; 461 pte_t *pte; 462 struct mm_struct *mm = vma->vm_mm; 463 void *flush = NULL; 464 int r, w, x, prot, err = 0; 465 struct mm_id *mm_id; 466 467 address &= PAGE_MASK; 468 469 pgd = pgd_offset(mm, address); 470 if (!pgd_present(*pgd)) 471 goto kill; 472 473 p4d = p4d_offset(pgd, address); 474 if (!p4d_present(*p4d)) 475 goto kill; 476 477 pud = pud_offset(p4d, address); 478 if (!pud_present(*pud)) 479 goto kill; 480 481 pmd = pmd_offset(pud, address); 482 if (!pmd_present(*pmd)) 483 goto kill; 484 485 pte = pte_offset_kernel(pmd, address); 486 487 r = pte_read(*pte); 488 w = pte_write(*pte); 489 x = pte_exec(*pte); 490 if (!pte_young(*pte)) { 491 r = 0; 492 w = 0; 493 } else if (!pte_dirty(*pte)) { 494 w = 0; 495 } 496 497 mm_id = &mm->context.id; 498 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 499 (x ? UM_PROT_EXEC : 0)); 500 if (pte_newpage(*pte)) { 501 if (pte_present(*pte)) { 502 unsigned long long offset; 503 int fd; 504 505 fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); 506 err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, 507 1, &flush); 508 } 509 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); 510 } 511 else if (pte_newprot(*pte)) 512 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); 513 514 if (err) { 515 if (err == -ENOMEM) 516 report_enomem(); 517 518 goto kill; 519 } 520 521 *pte = pte_mkuptodate(*pte); 522 523 return; 524 525 kill: 526 printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); 527 force_sig(SIGKILL); 528 } 529 530 void flush_tlb_all(void) 531 { 532 /* 533 * Don't bother flushing if this address space is about to be 534 * destroyed. 535 */ 536 if (atomic_read(¤t->mm->mm_users) == 0) 537 return; 538 539 flush_tlb_mm(current->mm); 540 } 541 542 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 543 { 544 flush_tlb_kernel_range_common(start, end); 545 } 546 547 void flush_tlb_kernel_vm(void) 548 { 549 flush_tlb_kernel_range_common(start_vm, end_vm); 550 } 551 552 void __flush_tlb_one(unsigned long addr) 553 { 554 flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); 555 } 556 557 static void fix_range(struct mm_struct *mm, unsigned long start_addr, 558 unsigned long end_addr, int force) 559 { 560 /* 561 * Don't bother flushing if this address space is about to be 562 * destroyed. 563 */ 564 if (atomic_read(&mm->mm_users) == 0) 565 return; 566 567 fix_range_common(mm, start_addr, end_addr, force); 568 } 569 570 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 571 unsigned long end) 572 { 573 if (vma->vm_mm == NULL) 574 flush_tlb_kernel_range_common(start, end); 575 else fix_range(vma->vm_mm, start, end, 0); 576 } 577 EXPORT_SYMBOL(flush_tlb_range); 578 579 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 580 unsigned long end) 581 { 582 fix_range(mm, start, end, 0); 583 } 584 585 void flush_tlb_mm(struct mm_struct *mm) 586 { 587 struct vm_area_struct *vma = mm->mmap; 588 589 while (vma != NULL) { 590 fix_range(mm, vma->vm_start, vma->vm_end, 0); 591 vma = vma->vm_next; 592 } 593 } 594 595 void force_flush_all(void) 596 { 597 struct mm_struct *mm = current->mm; 598 struct vm_area_struct *vma = mm->mmap; 599 600 while (vma != NULL) { 601 fix_range(mm, vma->vm_start, vma->vm_end, 1); 602 vma = vma->vm_next; 603 } 604 } 605