1 /* 2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 * Licensed under the GPL 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/sched/signal.h> 9 10 #include <asm/pgtable.h> 11 #include <asm/tlbflush.h> 12 #include <as-layout.h> 13 #include <mem_user.h> 14 #include <os.h> 15 #include <skas.h> 16 #include <kern_util.h> 17 18 struct host_vm_change { 19 struct host_vm_op { 20 enum { NONE, MMAP, MUNMAP, MPROTECT } type; 21 union { 22 struct { 23 unsigned long addr; 24 unsigned long len; 25 unsigned int prot; 26 int fd; 27 __u64 offset; 28 } mmap; 29 struct { 30 unsigned long addr; 31 unsigned long len; 32 } munmap; 33 struct { 34 unsigned long addr; 35 unsigned long len; 36 unsigned int prot; 37 } mprotect; 38 } u; 39 } ops[1]; 40 int userspace; 41 int index; 42 struct mm_struct *mm; 43 void *data; 44 int force; 45 }; 46 47 #define INIT_HVC(mm, force, userspace) \ 48 ((struct host_vm_change) \ 49 { .ops = { { .type = NONE } }, \ 50 .mm = mm, \ 51 .data = NULL, \ 52 .userspace = userspace, \ 53 .index = 0, \ 54 .force = force }) 55 56 static void report_enomem(void) 57 { 58 printk(KERN_ERR "UML ran out of memory on the host side! " 59 "This can happen due to a memory limitation or " 60 "vm.max_map_count has been reached.\n"); 61 } 62 63 static int do_ops(struct host_vm_change *hvc, int end, 64 int finished) 65 { 66 struct host_vm_op *op; 67 int i, ret = 0; 68 69 for (i = 0; i < end && !ret; i++) { 70 op = &hvc->ops[i]; 71 switch (op->type) { 72 case MMAP: 73 if (hvc->userspace) 74 ret = map(&hvc->mm->context.id, op->u.mmap.addr, 75 op->u.mmap.len, op->u.mmap.prot, 76 op->u.mmap.fd, 77 op->u.mmap.offset, finished, 78 &hvc->data); 79 else 80 map_memory(op->u.mmap.addr, op->u.mmap.offset, 81 op->u.mmap.len, 1, 1, 1); 82 break; 83 case MUNMAP: 84 if (hvc->userspace) 85 ret = unmap(&hvc->mm->context.id, 86 op->u.munmap.addr, 87 op->u.munmap.len, finished, 88 &hvc->data); 89 else 90 ret = os_unmap_memory( 91 (void *) op->u.munmap.addr, 92 op->u.munmap.len); 93 94 break; 95 case MPROTECT: 96 if (hvc->userspace) 97 ret = protect(&hvc->mm->context.id, 98 op->u.mprotect.addr, 99 op->u.mprotect.len, 100 op->u.mprotect.prot, 101 finished, &hvc->data); 102 else 103 ret = os_protect_memory( 104 (void *) op->u.mprotect.addr, 105 op->u.mprotect.len, 106 1, 1, 1); 107 break; 108 default: 109 printk(KERN_ERR "Unknown op type %d in do_ops\n", 110 op->type); 111 BUG(); 112 break; 113 } 114 } 115 116 if (ret == -ENOMEM) 117 report_enomem(); 118 119 return ret; 120 } 121 122 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 123 unsigned int prot, struct host_vm_change *hvc) 124 { 125 __u64 offset; 126 struct host_vm_op *last; 127 int fd = -1, ret = 0; 128 129 if (hvc->userspace) 130 fd = phys_mapping(phys, &offset); 131 else 132 offset = phys; 133 if (hvc->index != 0) { 134 last = &hvc->ops[hvc->index - 1]; 135 if ((last->type == MMAP) && 136 (last->u.mmap.addr + last->u.mmap.len == virt) && 137 (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && 138 (last->u.mmap.offset + last->u.mmap.len == offset)) { 139 last->u.mmap.len += len; 140 return 0; 141 } 142 } 143 144 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 145 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 146 hvc->index = 0; 147 } 148 149 hvc->ops[hvc->index++] = ((struct host_vm_op) 150 { .type = MMAP, 151 .u = { .mmap = { .addr = virt, 152 .len = len, 153 .prot = prot, 154 .fd = fd, 155 .offset = offset } 156 } }); 157 return ret; 158 } 159 160 static int add_munmap(unsigned long addr, unsigned long len, 161 struct host_vm_change *hvc) 162 { 163 struct host_vm_op *last; 164 int ret = 0; 165 166 if ((addr >= STUB_START) && (addr < STUB_END)) 167 return -EINVAL; 168 169 if (hvc->index != 0) { 170 last = &hvc->ops[hvc->index - 1]; 171 if ((last->type == MUNMAP) && 172 (last->u.munmap.addr + last->u.mmap.len == addr)) { 173 last->u.munmap.len += len; 174 return 0; 175 } 176 } 177 178 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 179 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 180 hvc->index = 0; 181 } 182 183 hvc->ops[hvc->index++] = ((struct host_vm_op) 184 { .type = MUNMAP, 185 .u = { .munmap = { .addr = addr, 186 .len = len } } }); 187 return ret; 188 } 189 190 static int add_mprotect(unsigned long addr, unsigned long len, 191 unsigned int prot, struct host_vm_change *hvc) 192 { 193 struct host_vm_op *last; 194 int ret = 0; 195 196 if (hvc->index != 0) { 197 last = &hvc->ops[hvc->index - 1]; 198 if ((last->type == MPROTECT) && 199 (last->u.mprotect.addr + last->u.mprotect.len == addr) && 200 (last->u.mprotect.prot == prot)) { 201 last->u.mprotect.len += len; 202 return 0; 203 } 204 } 205 206 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 207 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 208 hvc->index = 0; 209 } 210 211 hvc->ops[hvc->index++] = ((struct host_vm_op) 212 { .type = MPROTECT, 213 .u = { .mprotect = { .addr = addr, 214 .len = len, 215 .prot = prot } } }); 216 return ret; 217 } 218 219 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 220 221 static inline int update_pte_range(pmd_t *pmd, unsigned long addr, 222 unsigned long end, 223 struct host_vm_change *hvc) 224 { 225 pte_t *pte; 226 int r, w, x, prot, ret = 0; 227 228 pte = pte_offset_kernel(pmd, addr); 229 do { 230 if ((addr >= STUB_START) && (addr < STUB_END)) 231 continue; 232 233 r = pte_read(*pte); 234 w = pte_write(*pte); 235 x = pte_exec(*pte); 236 if (!pte_young(*pte)) { 237 r = 0; 238 w = 0; 239 } else if (!pte_dirty(*pte)) 240 w = 0; 241 242 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 243 (x ? UM_PROT_EXEC : 0)); 244 if (hvc->force || pte_newpage(*pte)) { 245 if (pte_present(*pte)) { 246 if (pte_newpage(*pte)) 247 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 248 PAGE_SIZE, prot, hvc); 249 } else 250 ret = add_munmap(addr, PAGE_SIZE, hvc); 251 } else if (pte_newprot(*pte)) 252 ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); 253 *pte = pte_mkuptodate(*pte); 254 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 255 return ret; 256 } 257 258 static inline int update_pmd_range(pud_t *pud, unsigned long addr, 259 unsigned long end, 260 struct host_vm_change *hvc) 261 { 262 pmd_t *pmd; 263 unsigned long next; 264 int ret = 0; 265 266 pmd = pmd_offset(pud, addr); 267 do { 268 next = pmd_addr_end(addr, end); 269 if (!pmd_present(*pmd)) { 270 if (hvc->force || pmd_newpage(*pmd)) { 271 ret = add_munmap(addr, next - addr, hvc); 272 pmd_mkuptodate(*pmd); 273 } 274 } 275 else ret = update_pte_range(pmd, addr, next, hvc); 276 } while (pmd++, addr = next, ((addr < end) && !ret)); 277 return ret; 278 } 279 280 static inline int update_pud_range(pgd_t *pgd, unsigned long addr, 281 unsigned long end, 282 struct host_vm_change *hvc) 283 { 284 pud_t *pud; 285 unsigned long next; 286 int ret = 0; 287 288 pud = pud_offset(pgd, addr); 289 do { 290 next = pud_addr_end(addr, end); 291 if (!pud_present(*pud)) { 292 if (hvc->force || pud_newpage(*pud)) { 293 ret = add_munmap(addr, next - addr, hvc); 294 pud_mkuptodate(*pud); 295 } 296 } 297 else ret = update_pmd_range(pud, addr, next, hvc); 298 } while (pud++, addr = next, ((addr < end) && !ret)); 299 return ret; 300 } 301 302 void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 303 unsigned long end_addr, int force) 304 { 305 pgd_t *pgd; 306 struct host_vm_change hvc; 307 unsigned long addr = start_addr, next; 308 int ret = 0, userspace = 1; 309 310 hvc = INIT_HVC(mm, force, userspace); 311 pgd = pgd_offset(mm, addr); 312 do { 313 next = pgd_addr_end(addr, end_addr); 314 if (!pgd_present(*pgd)) { 315 if (force || pgd_newpage(*pgd)) { 316 ret = add_munmap(addr, next - addr, &hvc); 317 pgd_mkuptodate(*pgd); 318 } 319 } 320 else ret = update_pud_range(pgd, addr, next, &hvc); 321 } while (pgd++, addr = next, ((addr < end_addr) && !ret)); 322 323 if (!ret) 324 ret = do_ops(&hvc, hvc.index, 1); 325 326 /* This is not an else because ret is modified above */ 327 if (ret) { 328 printk(KERN_ERR "fix_range_common: failed, killing current " 329 "process: %d\n", task_tgid_vnr(current)); 330 /* We are under mmap_sem, release it such that current can terminate */ 331 up_write(¤t->mm->mmap_sem); 332 force_sig(SIGKILL); 333 do_signal(¤t->thread.regs); 334 } 335 } 336 337 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 338 { 339 struct mm_struct *mm; 340 pgd_t *pgd; 341 pud_t *pud; 342 pmd_t *pmd; 343 pte_t *pte; 344 unsigned long addr, last; 345 int updated = 0, err = 0, force = 0, userspace = 0; 346 struct host_vm_change hvc; 347 348 mm = &init_mm; 349 hvc = INIT_HVC(mm, force, userspace); 350 for (addr = start; addr < end;) { 351 pgd = pgd_offset(mm, addr); 352 if (!pgd_present(*pgd)) { 353 last = ADD_ROUND(addr, PGDIR_SIZE); 354 if (last > end) 355 last = end; 356 if (pgd_newpage(*pgd)) { 357 updated = 1; 358 err = add_munmap(addr, last - addr, &hvc); 359 if (err < 0) 360 panic("munmap failed, errno = %d\n", 361 -err); 362 } 363 addr = last; 364 continue; 365 } 366 367 pud = pud_offset(pgd, addr); 368 if (!pud_present(*pud)) { 369 last = ADD_ROUND(addr, PUD_SIZE); 370 if (last > end) 371 last = end; 372 if (pud_newpage(*pud)) { 373 updated = 1; 374 err = add_munmap(addr, last - addr, &hvc); 375 if (err < 0) 376 panic("munmap failed, errno = %d\n", 377 -err); 378 } 379 addr = last; 380 continue; 381 } 382 383 pmd = pmd_offset(pud, addr); 384 if (!pmd_present(*pmd)) { 385 last = ADD_ROUND(addr, PMD_SIZE); 386 if (last > end) 387 last = end; 388 if (pmd_newpage(*pmd)) { 389 updated = 1; 390 err = add_munmap(addr, last - addr, &hvc); 391 if (err < 0) 392 panic("munmap failed, errno = %d\n", 393 -err); 394 } 395 addr = last; 396 continue; 397 } 398 399 pte = pte_offset_kernel(pmd, addr); 400 if (!pte_present(*pte) || pte_newpage(*pte)) { 401 updated = 1; 402 err = add_munmap(addr, PAGE_SIZE, &hvc); 403 if (err < 0) 404 panic("munmap failed, errno = %d\n", 405 -err); 406 if (pte_present(*pte)) 407 err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 408 PAGE_SIZE, 0, &hvc); 409 } 410 else if (pte_newprot(*pte)) { 411 updated = 1; 412 err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); 413 } 414 addr += PAGE_SIZE; 415 } 416 if (!err) 417 err = do_ops(&hvc, hvc.index, 1); 418 419 if (err < 0) 420 panic("flush_tlb_kernel failed, errno = %d\n", err); 421 return updated; 422 } 423 424 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 425 { 426 pgd_t *pgd; 427 pud_t *pud; 428 pmd_t *pmd; 429 pte_t *pte; 430 struct mm_struct *mm = vma->vm_mm; 431 void *flush = NULL; 432 int r, w, x, prot, err = 0; 433 struct mm_id *mm_id; 434 435 address &= PAGE_MASK; 436 pgd = pgd_offset(mm, address); 437 if (!pgd_present(*pgd)) 438 goto kill; 439 440 pud = pud_offset(pgd, address); 441 if (!pud_present(*pud)) 442 goto kill; 443 444 pmd = pmd_offset(pud, address); 445 if (!pmd_present(*pmd)) 446 goto kill; 447 448 pte = pte_offset_kernel(pmd, address); 449 450 r = pte_read(*pte); 451 w = pte_write(*pte); 452 x = pte_exec(*pte); 453 if (!pte_young(*pte)) { 454 r = 0; 455 w = 0; 456 } else if (!pte_dirty(*pte)) { 457 w = 0; 458 } 459 460 mm_id = &mm->context.id; 461 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 462 (x ? UM_PROT_EXEC : 0)); 463 if (pte_newpage(*pte)) { 464 if (pte_present(*pte)) { 465 unsigned long long offset; 466 int fd; 467 468 fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); 469 err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, 470 1, &flush); 471 } 472 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); 473 } 474 else if (pte_newprot(*pte)) 475 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); 476 477 if (err) { 478 if (err == -ENOMEM) 479 report_enomem(); 480 481 goto kill; 482 } 483 484 *pte = pte_mkuptodate(*pte); 485 486 return; 487 488 kill: 489 printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); 490 force_sig(SIGKILL); 491 } 492 493 pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) 494 { 495 return pgd_offset(mm, address); 496 } 497 498 pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address) 499 { 500 return pud_offset(pgd, address); 501 } 502 503 pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address) 504 { 505 return pmd_offset(pud, address); 506 } 507 508 pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) 509 { 510 return pte_offset_kernel(pmd, address); 511 } 512 513 pte_t *addr_pte(struct task_struct *task, unsigned long addr) 514 { 515 pgd_t *pgd = pgd_offset(task->mm, addr); 516 pud_t *pud = pud_offset(pgd, addr); 517 pmd_t *pmd = pmd_offset(pud, addr); 518 519 return pte_offset_map(pmd, addr); 520 } 521 522 void flush_tlb_all(void) 523 { 524 /* 525 * Don't bother flushing if this address space is about to be 526 * destroyed. 527 */ 528 if (atomic_read(¤t->mm->mm_users) == 0) 529 return; 530 531 flush_tlb_mm(current->mm); 532 } 533 534 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 535 { 536 flush_tlb_kernel_range_common(start, end); 537 } 538 539 void flush_tlb_kernel_vm(void) 540 { 541 flush_tlb_kernel_range_common(start_vm, end_vm); 542 } 543 544 void __flush_tlb_one(unsigned long addr) 545 { 546 flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); 547 } 548 549 static void fix_range(struct mm_struct *mm, unsigned long start_addr, 550 unsigned long end_addr, int force) 551 { 552 /* 553 * Don't bother flushing if this address space is about to be 554 * destroyed. 555 */ 556 if (atomic_read(&mm->mm_users) == 0) 557 return; 558 559 fix_range_common(mm, start_addr, end_addr, force); 560 } 561 562 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 563 unsigned long end) 564 { 565 if (vma->vm_mm == NULL) 566 flush_tlb_kernel_range_common(start, end); 567 else fix_range(vma->vm_mm, start, end, 0); 568 } 569 EXPORT_SYMBOL(flush_tlb_range); 570 571 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 572 unsigned long end) 573 { 574 fix_range(mm, start, end, 0); 575 } 576 577 void flush_tlb_mm(struct mm_struct *mm) 578 { 579 struct vm_area_struct *vma = mm->mmap; 580 581 while (vma != NULL) { 582 fix_range(mm, vma->vm_start, vma->vm_end, 0); 583 vma = vma->vm_next; 584 } 585 } 586 587 void force_flush_all(void) 588 { 589 struct mm_struct *mm = current->mm; 590 struct vm_area_struct *vma = mm->mmap; 591 592 while (vma != NULL) { 593 fix_range(mm, vma->vm_start, vma->vm_end, 1); 594 vma = vma->vm_next; 595 } 596 } 597