1 /* 2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 * Licensed under the GPL 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/sched.h> 9 #include <asm/pgtable.h> 10 #include <asm/tlbflush.h> 11 #include <as-layout.h> 12 #include <mem_user.h> 13 #include <os.h> 14 #include <skas.h> 15 #include <kern_util.h> 16 17 struct host_vm_change { 18 struct host_vm_op { 19 enum { NONE, MMAP, MUNMAP, MPROTECT } type; 20 union { 21 struct { 22 unsigned long addr; 23 unsigned long len; 24 unsigned int prot; 25 int fd; 26 __u64 offset; 27 } mmap; 28 struct { 29 unsigned long addr; 30 unsigned long len; 31 } munmap; 32 struct { 33 unsigned long addr; 34 unsigned long len; 35 unsigned int prot; 36 } mprotect; 37 } u; 38 } ops[1]; 39 int index; 40 struct mm_id *id; 41 void *data; 42 int force; 43 }; 44 45 #define INIT_HVC(mm, force) \ 46 ((struct host_vm_change) \ 47 { .ops = { { .type = NONE } }, \ 48 .id = &mm->context.id, \ 49 .data = NULL, \ 50 .index = 0, \ 51 .force = force }) 52 53 static int do_ops(struct host_vm_change *hvc, int end, 54 int finished) 55 { 56 struct host_vm_op *op; 57 int i, ret = 0; 58 59 for (i = 0; i < end && !ret; i++) { 60 op = &hvc->ops[i]; 61 switch (op->type) { 62 case MMAP: 63 ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len, 64 op->u.mmap.prot, op->u.mmap.fd, 65 op->u.mmap.offset, finished, &hvc->data); 66 break; 67 case MUNMAP: 68 ret = unmap(hvc->id, op->u.munmap.addr, 69 op->u.munmap.len, finished, &hvc->data); 70 break; 71 case MPROTECT: 72 ret = protect(hvc->id, op->u.mprotect.addr, 73 op->u.mprotect.len, op->u.mprotect.prot, 74 finished, &hvc->data); 75 break; 76 default: 77 printk(KERN_ERR "Unknown op type %d in do_ops\n", 78 op->type); 79 BUG(); 80 break; 81 } 82 } 83 84 return ret; 85 } 86 87 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 88 unsigned int prot, struct host_vm_change *hvc) 89 { 90 __u64 offset; 91 struct host_vm_op *last; 92 int fd, ret = 0; 93 94 fd = phys_mapping(phys, &offset); 95 if (hvc->index != 0) { 96 last = &hvc->ops[hvc->index - 1]; 97 if ((last->type == MMAP) && 98 (last->u.mmap.addr + last->u.mmap.len == virt) && 99 (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && 100 (last->u.mmap.offset + last->u.mmap.len == offset)) { 101 last->u.mmap.len += len; 102 return 0; 103 } 104 } 105 106 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 107 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 108 hvc->index = 0; 109 } 110 111 hvc->ops[hvc->index++] = ((struct host_vm_op) 112 { .type = MMAP, 113 .u = { .mmap = { .addr = virt, 114 .len = len, 115 .prot = prot, 116 .fd = fd, 117 .offset = offset } 118 } }); 119 return ret; 120 } 121 122 static int add_munmap(unsigned long addr, unsigned long len, 123 struct host_vm_change *hvc) 124 { 125 struct host_vm_op *last; 126 int ret = 0; 127 128 if ((addr >= STUB_START) && (addr < STUB_END)) 129 return -EINVAL; 130 131 if (hvc->index != 0) { 132 last = &hvc->ops[hvc->index - 1]; 133 if ((last->type == MUNMAP) && 134 (last->u.munmap.addr + last->u.mmap.len == addr)) { 135 last->u.munmap.len += len; 136 return 0; 137 } 138 } 139 140 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 141 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 142 hvc->index = 0; 143 } 144 145 hvc->ops[hvc->index++] = ((struct host_vm_op) 146 { .type = MUNMAP, 147 .u = { .munmap = { .addr = addr, 148 .len = len } } }); 149 return ret; 150 } 151 152 static int add_mprotect(unsigned long addr, unsigned long len, 153 unsigned int prot, struct host_vm_change *hvc) 154 { 155 struct host_vm_op *last; 156 int ret = 0; 157 158 if (hvc->index != 0) { 159 last = &hvc->ops[hvc->index - 1]; 160 if ((last->type == MPROTECT) && 161 (last->u.mprotect.addr + last->u.mprotect.len == addr) && 162 (last->u.mprotect.prot == prot)) { 163 last->u.mprotect.len += len; 164 return 0; 165 } 166 } 167 168 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 169 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 170 hvc->index = 0; 171 } 172 173 hvc->ops[hvc->index++] = ((struct host_vm_op) 174 { .type = MPROTECT, 175 .u = { .mprotect = { .addr = addr, 176 .len = len, 177 .prot = prot } } }); 178 return ret; 179 } 180 181 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 182 183 static inline int update_pte_range(pmd_t *pmd, unsigned long addr, 184 unsigned long end, 185 struct host_vm_change *hvc) 186 { 187 pte_t *pte; 188 int r, w, x, prot, ret = 0; 189 190 pte = pte_offset_kernel(pmd, addr); 191 do { 192 if ((addr >= STUB_START) && (addr < STUB_END)) 193 continue; 194 195 r = pte_read(*pte); 196 w = pte_write(*pte); 197 x = pte_exec(*pte); 198 if (!pte_young(*pte)) { 199 r = 0; 200 w = 0; 201 } else if (!pte_dirty(*pte)) 202 w = 0; 203 204 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 205 (x ? UM_PROT_EXEC : 0)); 206 if (hvc->force || pte_newpage(*pte)) { 207 if (pte_present(*pte)) 208 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 209 PAGE_SIZE, prot, hvc); 210 else 211 ret = add_munmap(addr, PAGE_SIZE, hvc); 212 } else if (pte_newprot(*pte)) 213 ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); 214 *pte = pte_mkuptodate(*pte); 215 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 216 return ret; 217 } 218 219 static inline int update_pmd_range(pud_t *pud, unsigned long addr, 220 unsigned long end, 221 struct host_vm_change *hvc) 222 { 223 pmd_t *pmd; 224 unsigned long next; 225 int ret = 0; 226 227 pmd = pmd_offset(pud, addr); 228 do { 229 next = pmd_addr_end(addr, end); 230 if (!pmd_present(*pmd)) { 231 if (hvc->force || pmd_newpage(*pmd)) { 232 ret = add_munmap(addr, next - addr, hvc); 233 pmd_mkuptodate(*pmd); 234 } 235 } 236 else ret = update_pte_range(pmd, addr, next, hvc); 237 } while (pmd++, addr = next, ((addr < end) && !ret)); 238 return ret; 239 } 240 241 static inline int update_pud_range(pgd_t *pgd, unsigned long addr, 242 unsigned long end, 243 struct host_vm_change *hvc) 244 { 245 pud_t *pud; 246 unsigned long next; 247 int ret = 0; 248 249 pud = pud_offset(pgd, addr); 250 do { 251 next = pud_addr_end(addr, end); 252 if (!pud_present(*pud)) { 253 if (hvc->force || pud_newpage(*pud)) { 254 ret = add_munmap(addr, next - addr, hvc); 255 pud_mkuptodate(*pud); 256 } 257 } 258 else ret = update_pmd_range(pud, addr, next, hvc); 259 } while (pud++, addr = next, ((addr < end) && !ret)); 260 return ret; 261 } 262 263 void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 264 unsigned long end_addr, int force) 265 { 266 pgd_t *pgd; 267 struct host_vm_change hvc; 268 unsigned long addr = start_addr, next; 269 int ret = 0; 270 271 hvc = INIT_HVC(mm, force); 272 pgd = pgd_offset(mm, addr); 273 do { 274 next = pgd_addr_end(addr, end_addr); 275 if (!pgd_present(*pgd)) { 276 if (force || pgd_newpage(*pgd)) { 277 ret = add_munmap(addr, next - addr, &hvc); 278 pgd_mkuptodate(*pgd); 279 } 280 } 281 else ret = update_pud_range(pgd, addr, next, &hvc); 282 } while (pgd++, addr = next, ((addr < end_addr) && !ret)); 283 284 if (!ret) 285 ret = do_ops(&hvc, hvc.index, 1); 286 287 /* This is not an else because ret is modified above */ 288 if (ret) { 289 printk(KERN_ERR "fix_range_common: failed, killing current " 290 "process: %d\n", task_tgid_vnr(current)); 291 /* We are under mmap_sem, release it such that current can terminate */ 292 up_write(¤t->mm->mmap_sem); 293 force_sig(SIGKILL, current); 294 do_signal(); 295 } 296 } 297 298 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 299 { 300 struct mm_struct *mm; 301 pgd_t *pgd; 302 pud_t *pud; 303 pmd_t *pmd; 304 pte_t *pte; 305 unsigned long addr, last; 306 int updated = 0, err; 307 308 mm = &init_mm; 309 for (addr = start; addr < end;) { 310 pgd = pgd_offset(mm, addr); 311 if (!pgd_present(*pgd)) { 312 last = ADD_ROUND(addr, PGDIR_SIZE); 313 if (last > end) 314 last = end; 315 if (pgd_newpage(*pgd)) { 316 updated = 1; 317 err = os_unmap_memory((void *) addr, 318 last - addr); 319 if (err < 0) 320 panic("munmap failed, errno = %d\n", 321 -err); 322 } 323 addr = last; 324 continue; 325 } 326 327 pud = pud_offset(pgd, addr); 328 if (!pud_present(*pud)) { 329 last = ADD_ROUND(addr, PUD_SIZE); 330 if (last > end) 331 last = end; 332 if (pud_newpage(*pud)) { 333 updated = 1; 334 err = os_unmap_memory((void *) addr, 335 last - addr); 336 if (err < 0) 337 panic("munmap failed, errno = %d\n", 338 -err); 339 } 340 addr = last; 341 continue; 342 } 343 344 pmd = pmd_offset(pud, addr); 345 if (!pmd_present(*pmd)) { 346 last = ADD_ROUND(addr, PMD_SIZE); 347 if (last > end) 348 last = end; 349 if (pmd_newpage(*pmd)) { 350 updated = 1; 351 err = os_unmap_memory((void *) addr, 352 last - addr); 353 if (err < 0) 354 panic("munmap failed, errno = %d\n", 355 -err); 356 } 357 addr = last; 358 continue; 359 } 360 361 pte = pte_offset_kernel(pmd, addr); 362 if (!pte_present(*pte) || pte_newpage(*pte)) { 363 updated = 1; 364 err = os_unmap_memory((void *) addr, 365 PAGE_SIZE); 366 if (err < 0) 367 panic("munmap failed, errno = %d\n", 368 -err); 369 if (pte_present(*pte)) 370 map_memory(addr, 371 pte_val(*pte) & PAGE_MASK, 372 PAGE_SIZE, 1, 1, 1); 373 } 374 else if (pte_newprot(*pte)) { 375 updated = 1; 376 os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1); 377 } 378 addr += PAGE_SIZE; 379 } 380 return updated; 381 } 382 383 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 384 { 385 pgd_t *pgd; 386 pud_t *pud; 387 pmd_t *pmd; 388 pte_t *pte; 389 struct mm_struct *mm = vma->vm_mm; 390 void *flush = NULL; 391 int r, w, x, prot, err = 0; 392 struct mm_id *mm_id; 393 394 address &= PAGE_MASK; 395 pgd = pgd_offset(mm, address); 396 if (!pgd_present(*pgd)) 397 goto kill; 398 399 pud = pud_offset(pgd, address); 400 if (!pud_present(*pud)) 401 goto kill; 402 403 pmd = pmd_offset(pud, address); 404 if (!pmd_present(*pmd)) 405 goto kill; 406 407 pte = pte_offset_kernel(pmd, address); 408 409 r = pte_read(*pte); 410 w = pte_write(*pte); 411 x = pte_exec(*pte); 412 if (!pte_young(*pte)) { 413 r = 0; 414 w = 0; 415 } else if (!pte_dirty(*pte)) { 416 w = 0; 417 } 418 419 mm_id = &mm->context.id; 420 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 421 (x ? UM_PROT_EXEC : 0)); 422 if (pte_newpage(*pte)) { 423 if (pte_present(*pte)) { 424 unsigned long long offset; 425 int fd; 426 427 fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); 428 err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, 429 1, &flush); 430 } 431 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); 432 } 433 else if (pte_newprot(*pte)) 434 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); 435 436 if (err) 437 goto kill; 438 439 *pte = pte_mkuptodate(*pte); 440 441 return; 442 443 kill: 444 printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); 445 force_sig(SIGKILL, current); 446 } 447 448 pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) 449 { 450 return pgd_offset(mm, address); 451 } 452 453 pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address) 454 { 455 return pud_offset(pgd, address); 456 } 457 458 pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address) 459 { 460 return pmd_offset(pud, address); 461 } 462 463 pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) 464 { 465 return pte_offset_kernel(pmd, address); 466 } 467 468 pte_t *addr_pte(struct task_struct *task, unsigned long addr) 469 { 470 pgd_t *pgd = pgd_offset(task->mm, addr); 471 pud_t *pud = pud_offset(pgd, addr); 472 pmd_t *pmd = pmd_offset(pud, addr); 473 474 return pte_offset_map(pmd, addr); 475 } 476 477 void flush_tlb_all(void) 478 { 479 flush_tlb_mm(current->mm); 480 } 481 482 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 483 { 484 flush_tlb_kernel_range_common(start, end); 485 } 486 487 void flush_tlb_kernel_vm(void) 488 { 489 flush_tlb_kernel_range_common(start_vm, end_vm); 490 } 491 492 void __flush_tlb_one(unsigned long addr) 493 { 494 flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); 495 } 496 497 static void fix_range(struct mm_struct *mm, unsigned long start_addr, 498 unsigned long end_addr, int force) 499 { 500 fix_range_common(mm, start_addr, end_addr, force); 501 } 502 503 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 504 unsigned long end) 505 { 506 if (vma->vm_mm == NULL) 507 flush_tlb_kernel_range_common(start, end); 508 else fix_range(vma->vm_mm, start, end, 0); 509 } 510 EXPORT_SYMBOL(flush_tlb_range); 511 512 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 513 unsigned long end) 514 { 515 /* 516 * Don't bother flushing if this address space is about to be 517 * destroyed. 518 */ 519 if (atomic_read(&mm->mm_users) == 0) 520 return; 521 522 fix_range(mm, start, end, 0); 523 } 524 525 void flush_tlb_mm(struct mm_struct *mm) 526 { 527 struct vm_area_struct *vma = mm->mmap; 528 529 while (vma != NULL) { 530 fix_range(mm, vma->vm_start, vma->vm_end, 0); 531 vma = vma->vm_next; 532 } 533 } 534 535 void force_flush_all(void) 536 { 537 struct mm_struct *mm = current->mm; 538 struct vm_area_struct *vma = mm->mmap; 539 540 while (vma != NULL) { 541 fix_range(mm, vma->vm_start, vma->vm_end, 1); 542 vma = vma->vm_next; 543 } 544 } 545