1 /* 2 * linux/mm/nommu.c 3 * 4 * Replacement code for mm functions to support CPU's that don't 5 * have any form of memory management unit (thus no virtual memory). 6 * 7 * See Documentation/nommu-mmap.txt 8 * 9 * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 13 * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> 14 */ 15 16 #include <linux/module.h> 17 #include <linux/mm.h> 18 #include <linux/mman.h> 19 #include <linux/swap.h> 20 #include <linux/file.h> 21 #include <linux/highmem.h> 22 #include <linux/pagemap.h> 23 #include <linux/slab.h> 24 #include <linux/vmalloc.h> 25 #include <linux/ptrace.h> 26 #include <linux/blkdev.h> 27 #include <linux/backing-dev.h> 28 #include <linux/mount.h> 29 #include <linux/personality.h> 30 #include <linux/security.h> 31 #include <linux/syscalls.h> 32 33 #include <asm/uaccess.h> 34 #include <asm/tlb.h> 35 #include <asm/tlbflush.h> 36 37 void *high_memory; 38 struct page *mem_map; 39 unsigned long max_mapnr; 40 unsigned long num_physpages; 41 unsigned long askedalloc, realalloc; 42 atomic_t vm_committed_space = ATOMIC_INIT(0); 43 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 44 int sysctl_overcommit_ratio = 50; /* default is 50% */ 45 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 46 int heap_stack_gap = 0; 47 48 EXPORT_SYMBOL(mem_map); 49 EXPORT_SYMBOL(num_physpages); 50 51 /* list of shareable VMAs */ 52 struct rb_root nommu_vma_tree = RB_ROOT; 53 DECLARE_RWSEM(nommu_vma_sem); 54 55 struct vm_operations_struct generic_file_vm_ops = { 56 }; 57 58 /* 59 * Handle all mappings that got truncated by a "truncate()" 60 * system call. 61 * 62 * NOTE! We have to be ready to update the memory sharing 63 * between the file and the memory map for a potential last 64 * incomplete page. Ugly, but necessary. 65 */ 66 int vmtruncate(struct inode *inode, loff_t offset) 67 { 68 struct address_space *mapping = inode->i_mapping; 69 unsigned long limit; 70 71 if (inode->i_size < offset) 72 goto do_expand; 73 i_size_write(inode, offset); 74 75 truncate_inode_pages(mapping, offset); 76 goto out_truncate; 77 78 do_expand: 79 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 80 if (limit != RLIM_INFINITY && offset > limit) 81 goto out_sig; 82 if (offset > inode->i_sb->s_maxbytes) 83 goto out; 84 i_size_write(inode, offset); 85 86 out_truncate: 87 if (inode->i_op && inode->i_op->truncate) 88 inode->i_op->truncate(inode); 89 return 0; 90 out_sig: 91 send_sig(SIGXFSZ, current, 0); 92 out: 93 return -EFBIG; 94 } 95 96 EXPORT_SYMBOL(vmtruncate); 97 98 /* 99 * Return the total memory allocated for this pointer, not 100 * just what the caller asked for. 101 * 102 * Doesn't have to be accurate, i.e. may have races. 103 */ 104 unsigned int kobjsize(const void *objp) 105 { 106 struct page *page; 107 108 /* 109 * If the object we have should not have ksize performed on it, 110 * return size of 0 111 */ 112 if (!objp || (unsigned long)objp >= memory_end || !((page = virt_to_page(objp)))) 113 return 0; 114 115 if (PageSlab(page)) 116 return ksize(objp); 117 118 BUG_ON(page->index < 0); 119 BUG_ON(page->index >= MAX_ORDER); 120 121 return (PAGE_SIZE << page->index); 122 } 123 124 /* 125 * get a list of pages in an address range belonging to the specified process 126 * and indicate the VMA that covers each page 127 * - this is potentially dodgy as we may end incrementing the page count of a 128 * slab page or a secondary page from a compound page 129 * - don't permit access to VMAs that don't support it, such as I/O mappings 130 */ 131 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 132 unsigned long start, int len, int write, int force, 133 struct page **pages, struct vm_area_struct **vmas) 134 { 135 struct vm_area_struct *vma; 136 unsigned long vm_flags; 137 int i; 138 139 /* calculate required read or write permissions. 140 * - if 'force' is set, we only require the "MAY" flags. 141 */ 142 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 143 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 144 145 for (i = 0; i < len; i++) { 146 vma = find_vma(mm, start); 147 if (!vma) 148 goto finish_or_fault; 149 150 /* protect what we can, including chardevs */ 151 if (vma->vm_flags & (VM_IO | VM_PFNMAP) || 152 !(vm_flags & vma->vm_flags)) 153 goto finish_or_fault; 154 155 if (pages) { 156 pages[i] = virt_to_page(start); 157 if (pages[i]) 158 page_cache_get(pages[i]); 159 } 160 if (vmas) 161 vmas[i] = vma; 162 start += PAGE_SIZE; 163 } 164 165 return i; 166 167 finish_or_fault: 168 return i ? : -EFAULT; 169 } 170 EXPORT_SYMBOL(get_user_pages); 171 172 DEFINE_RWLOCK(vmlist_lock); 173 struct vm_struct *vmlist; 174 175 void vfree(const void *addr) 176 { 177 kfree(addr); 178 } 179 EXPORT_SYMBOL(vfree); 180 181 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 182 { 183 /* 184 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 185 * returns only a logical address. 186 */ 187 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 188 } 189 EXPORT_SYMBOL(__vmalloc); 190 191 void *vmalloc_user(unsigned long size) 192 { 193 void *ret; 194 195 ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 196 PAGE_KERNEL); 197 if (ret) { 198 struct vm_area_struct *vma; 199 200 down_write(¤t->mm->mmap_sem); 201 vma = find_vma(current->mm, (unsigned long)ret); 202 if (vma) 203 vma->vm_flags |= VM_USERMAP; 204 up_write(¤t->mm->mmap_sem); 205 } 206 207 return ret; 208 } 209 EXPORT_SYMBOL(vmalloc_user); 210 211 struct page *vmalloc_to_page(const void *addr) 212 { 213 return virt_to_page(addr); 214 } 215 EXPORT_SYMBOL(vmalloc_to_page); 216 217 unsigned long vmalloc_to_pfn(const void *addr) 218 { 219 return page_to_pfn(virt_to_page(addr)); 220 } 221 EXPORT_SYMBOL(vmalloc_to_pfn); 222 223 long vread(char *buf, char *addr, unsigned long count) 224 { 225 memcpy(buf, addr, count); 226 return count; 227 } 228 229 long vwrite(char *buf, char *addr, unsigned long count) 230 { 231 /* Don't allow overflow */ 232 if ((unsigned long) addr + count < count) 233 count = -(unsigned long) addr; 234 235 memcpy(addr, buf, count); 236 return(count); 237 } 238 239 /* 240 * vmalloc - allocate virtually continguos memory 241 * 242 * @size: allocation size 243 * 244 * Allocate enough pages to cover @size from the page level 245 * allocator and map them into continguos kernel virtual space. 246 * 247 * For tight control over page level allocator and protection flags 248 * use __vmalloc() instead. 249 */ 250 void *vmalloc(unsigned long size) 251 { 252 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 253 } 254 EXPORT_SYMBOL(vmalloc); 255 256 void *vmalloc_node(unsigned long size, int node) 257 { 258 return vmalloc(size); 259 } 260 EXPORT_SYMBOL(vmalloc_node); 261 262 /** 263 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 264 * @size: allocation size 265 * 266 * Allocate enough 32bit PA addressable pages to cover @size from the 267 * page level allocator and map them into continguos kernel virtual space. 268 */ 269 void *vmalloc_32(unsigned long size) 270 { 271 return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 272 } 273 EXPORT_SYMBOL(vmalloc_32); 274 275 /** 276 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 277 * @size: allocation size 278 * 279 * The resulting memory area is 32bit addressable and zeroed so it can be 280 * mapped to userspace without leaking data. 281 * 282 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 283 * remap_vmalloc_range() are permissible. 284 */ 285 void *vmalloc_32_user(unsigned long size) 286 { 287 /* 288 * We'll have to sort out the ZONE_DMA bits for 64-bit, 289 * but for now this can simply use vmalloc_user() directly. 290 */ 291 return vmalloc_user(size); 292 } 293 EXPORT_SYMBOL(vmalloc_32_user); 294 295 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 296 { 297 BUG(); 298 return NULL; 299 } 300 EXPORT_SYMBOL(vmap); 301 302 void vunmap(const void *addr) 303 { 304 BUG(); 305 } 306 EXPORT_SYMBOL(vunmap); 307 308 /* 309 * Implement a stub for vmalloc_sync_all() if the architecture chose not to 310 * have one. 311 */ 312 void __attribute__((weak)) vmalloc_sync_all(void) 313 { 314 } 315 316 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 317 struct page *page) 318 { 319 return -EINVAL; 320 } 321 EXPORT_SYMBOL(vm_insert_page); 322 323 /* 324 * sys_brk() for the most part doesn't need the global kernel 325 * lock, except when an application is doing something nasty 326 * like trying to un-brk an area that has already been mapped 327 * to a regular file. in this case, the unmapping will need 328 * to invoke file system routines that need the global lock. 329 */ 330 asmlinkage unsigned long sys_brk(unsigned long brk) 331 { 332 struct mm_struct *mm = current->mm; 333 334 if (brk < mm->start_brk || brk > mm->context.end_brk) 335 return mm->brk; 336 337 if (mm->brk == brk) 338 return mm->brk; 339 340 /* 341 * Always allow shrinking brk 342 */ 343 if (brk <= mm->brk) { 344 mm->brk = brk; 345 return brk; 346 } 347 348 /* 349 * Ok, looks good - let it rip. 350 */ 351 return mm->brk = brk; 352 } 353 354 #ifdef DEBUG 355 static void show_process_blocks(void) 356 { 357 struct vm_list_struct *vml; 358 359 printk("Process blocks %d:", current->pid); 360 361 for (vml = ¤t->mm->context.vmlist; vml; vml = vml->next) { 362 printk(" %p: %p", vml, vml->vma); 363 if (vml->vma) 364 printk(" (%d @%lx #%d)", 365 kobjsize((void *) vml->vma->vm_start), 366 vml->vma->vm_start, 367 atomic_read(&vml->vma->vm_usage)); 368 printk(vml->next ? " ->" : ".\n"); 369 } 370 } 371 #endif /* DEBUG */ 372 373 /* 374 * add a VMA into a process's mm_struct in the appropriate place in the list 375 * - should be called with mm->mmap_sem held writelocked 376 */ 377 static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) 378 { 379 struct vm_list_struct **ppv; 380 381 for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) 382 if ((*ppv)->vma->vm_start > vml->vma->vm_start) 383 break; 384 385 vml->next = *ppv; 386 *ppv = vml; 387 } 388 389 /* 390 * look up the first VMA in which addr resides, NULL if none 391 * - should be called with mm->mmap_sem at least held readlocked 392 */ 393 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 394 { 395 struct vm_list_struct *loop, *vml; 396 397 /* search the vm_start ordered list */ 398 vml = NULL; 399 for (loop = mm->context.vmlist; loop; loop = loop->next) { 400 if (loop->vma->vm_start > addr) 401 break; 402 vml = loop; 403 } 404 405 if (vml && vml->vma->vm_end > addr) 406 return vml->vma; 407 408 return NULL; 409 } 410 EXPORT_SYMBOL(find_vma); 411 412 /* 413 * find a VMA 414 * - we don't extend stack VMAs under NOMMU conditions 415 */ 416 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 417 { 418 return find_vma(mm, addr); 419 } 420 421 int expand_stack(struct vm_area_struct *vma, unsigned long address) 422 { 423 return -ENOMEM; 424 } 425 426 /* 427 * look up the first VMA exactly that exactly matches addr 428 * - should be called with mm->mmap_sem at least held readlocked 429 */ 430 static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 431 unsigned long addr) 432 { 433 struct vm_list_struct *vml; 434 435 /* search the vm_start ordered list */ 436 for (vml = mm->context.vmlist; vml; vml = vml->next) { 437 if (vml->vma->vm_start == addr) 438 return vml->vma; 439 if (vml->vma->vm_start > addr) 440 break; 441 } 442 443 return NULL; 444 } 445 446 /* 447 * find a VMA in the global tree 448 */ 449 static inline struct vm_area_struct *find_nommu_vma(unsigned long start) 450 { 451 struct vm_area_struct *vma; 452 struct rb_node *n = nommu_vma_tree.rb_node; 453 454 while (n) { 455 vma = rb_entry(n, struct vm_area_struct, vm_rb); 456 457 if (start < vma->vm_start) 458 n = n->rb_left; 459 else if (start > vma->vm_start) 460 n = n->rb_right; 461 else 462 return vma; 463 } 464 465 return NULL; 466 } 467 468 /* 469 * add a VMA in the global tree 470 */ 471 static void add_nommu_vma(struct vm_area_struct *vma) 472 { 473 struct vm_area_struct *pvma; 474 struct address_space *mapping; 475 struct rb_node **p = &nommu_vma_tree.rb_node; 476 struct rb_node *parent = NULL; 477 478 /* add the VMA to the mapping */ 479 if (vma->vm_file) { 480 mapping = vma->vm_file->f_mapping; 481 482 flush_dcache_mmap_lock(mapping); 483 vma_prio_tree_insert(vma, &mapping->i_mmap); 484 flush_dcache_mmap_unlock(mapping); 485 } 486 487 /* add the VMA to the master list */ 488 while (*p) { 489 parent = *p; 490 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 491 492 if (vma->vm_start < pvma->vm_start) { 493 p = &(*p)->rb_left; 494 } 495 else if (vma->vm_start > pvma->vm_start) { 496 p = &(*p)->rb_right; 497 } 498 else { 499 /* mappings are at the same address - this can only 500 * happen for shared-mem chardevs and shared file 501 * mappings backed by ramfs/tmpfs */ 502 BUG_ON(!(pvma->vm_flags & VM_SHARED)); 503 504 if (vma < pvma) 505 p = &(*p)->rb_left; 506 else if (vma > pvma) 507 p = &(*p)->rb_right; 508 else 509 BUG(); 510 } 511 } 512 513 rb_link_node(&vma->vm_rb, parent, p); 514 rb_insert_color(&vma->vm_rb, &nommu_vma_tree); 515 } 516 517 /* 518 * delete a VMA from the global list 519 */ 520 static void delete_nommu_vma(struct vm_area_struct *vma) 521 { 522 struct address_space *mapping; 523 524 /* remove the VMA from the mapping */ 525 if (vma->vm_file) { 526 mapping = vma->vm_file->f_mapping; 527 528 flush_dcache_mmap_lock(mapping); 529 vma_prio_tree_remove(vma, &mapping->i_mmap); 530 flush_dcache_mmap_unlock(mapping); 531 } 532 533 /* remove from the master list */ 534 rb_erase(&vma->vm_rb, &nommu_vma_tree); 535 } 536 537 /* 538 * determine whether a mapping should be permitted and, if so, what sort of 539 * mapping we're capable of supporting 540 */ 541 static int validate_mmap_request(struct file *file, 542 unsigned long addr, 543 unsigned long len, 544 unsigned long prot, 545 unsigned long flags, 546 unsigned long pgoff, 547 unsigned long *_capabilities) 548 { 549 unsigned long capabilities; 550 unsigned long reqprot = prot; 551 int ret; 552 553 /* do the simple checks first */ 554 if (flags & MAP_FIXED || addr) { 555 printk(KERN_DEBUG 556 "%d: Can't do fixed-address/overlay mmap of RAM\n", 557 current->pid); 558 return -EINVAL; 559 } 560 561 if ((flags & MAP_TYPE) != MAP_PRIVATE && 562 (flags & MAP_TYPE) != MAP_SHARED) 563 return -EINVAL; 564 565 if (!len) 566 return -EINVAL; 567 568 /* Careful about overflows.. */ 569 len = PAGE_ALIGN(len); 570 if (!len || len > TASK_SIZE) 571 return -ENOMEM; 572 573 /* offset overflow? */ 574 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) 575 return -EOVERFLOW; 576 577 if (file) { 578 /* validate file mapping requests */ 579 struct address_space *mapping; 580 581 /* files must support mmap */ 582 if (!file->f_op || !file->f_op->mmap) 583 return -ENODEV; 584 585 /* work out if what we've got could possibly be shared 586 * - we support chardevs that provide their own "memory" 587 * - we support files/blockdevs that are memory backed 588 */ 589 mapping = file->f_mapping; 590 if (!mapping) 591 mapping = file->f_path.dentry->d_inode->i_mapping; 592 593 capabilities = 0; 594 if (mapping && mapping->backing_dev_info) 595 capabilities = mapping->backing_dev_info->capabilities; 596 597 if (!capabilities) { 598 /* no explicit capabilities set, so assume some 599 * defaults */ 600 switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { 601 case S_IFREG: 602 case S_IFBLK: 603 capabilities = BDI_CAP_MAP_COPY; 604 break; 605 606 case S_IFCHR: 607 capabilities = 608 BDI_CAP_MAP_DIRECT | 609 BDI_CAP_READ_MAP | 610 BDI_CAP_WRITE_MAP; 611 break; 612 613 default: 614 return -EINVAL; 615 } 616 } 617 618 /* eliminate any capabilities that we can't support on this 619 * device */ 620 if (!file->f_op->get_unmapped_area) 621 capabilities &= ~BDI_CAP_MAP_DIRECT; 622 if (!file->f_op->read) 623 capabilities &= ~BDI_CAP_MAP_COPY; 624 625 if (flags & MAP_SHARED) { 626 /* do checks for writing, appending and locking */ 627 if ((prot & PROT_WRITE) && 628 !(file->f_mode & FMODE_WRITE)) 629 return -EACCES; 630 631 if (IS_APPEND(file->f_path.dentry->d_inode) && 632 (file->f_mode & FMODE_WRITE)) 633 return -EACCES; 634 635 if (locks_verify_locked(file->f_path.dentry->d_inode)) 636 return -EAGAIN; 637 638 if (!(capabilities & BDI_CAP_MAP_DIRECT)) 639 return -ENODEV; 640 641 if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) || 642 ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) || 643 ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP)) 644 ) { 645 printk("MAP_SHARED not completely supported on !MMU\n"); 646 return -EINVAL; 647 } 648 649 /* we mustn't privatise shared mappings */ 650 capabilities &= ~BDI_CAP_MAP_COPY; 651 } 652 else { 653 /* we're going to read the file into private memory we 654 * allocate */ 655 if (!(capabilities & BDI_CAP_MAP_COPY)) 656 return -ENODEV; 657 658 /* we don't permit a private writable mapping to be 659 * shared with the backing device */ 660 if (prot & PROT_WRITE) 661 capabilities &= ~BDI_CAP_MAP_DIRECT; 662 } 663 664 /* handle executable mappings and implied executable 665 * mappings */ 666 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { 667 if (prot & PROT_EXEC) 668 return -EPERM; 669 } 670 else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 671 /* handle implication of PROT_EXEC by PROT_READ */ 672 if (current->personality & READ_IMPLIES_EXEC) { 673 if (capabilities & BDI_CAP_EXEC_MAP) 674 prot |= PROT_EXEC; 675 } 676 } 677 else if ((prot & PROT_READ) && 678 (prot & PROT_EXEC) && 679 !(capabilities & BDI_CAP_EXEC_MAP) 680 ) { 681 /* backing file is not executable, try to copy */ 682 capabilities &= ~BDI_CAP_MAP_DIRECT; 683 } 684 } 685 else { 686 /* anonymous mappings are always memory backed and can be 687 * privately mapped 688 */ 689 capabilities = BDI_CAP_MAP_COPY; 690 691 /* handle PROT_EXEC implication by PROT_READ */ 692 if ((prot & PROT_READ) && 693 (current->personality & READ_IMPLIES_EXEC)) 694 prot |= PROT_EXEC; 695 } 696 697 /* allow the security API to have its say */ 698 ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); 699 if (ret < 0) 700 return ret; 701 702 /* looks okay */ 703 *_capabilities = capabilities; 704 return 0; 705 } 706 707 /* 708 * we've determined that we can make the mapping, now translate what we 709 * now know into VMA flags 710 */ 711 static unsigned long determine_vm_flags(struct file *file, 712 unsigned long prot, 713 unsigned long flags, 714 unsigned long capabilities) 715 { 716 unsigned long vm_flags; 717 718 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); 719 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 720 /* vm_flags |= mm->def_flags; */ 721 722 if (!(capabilities & BDI_CAP_MAP_DIRECT)) { 723 /* attempt to share read-only copies of mapped file chunks */ 724 if (file && !(prot & PROT_WRITE)) 725 vm_flags |= VM_MAYSHARE; 726 } 727 else { 728 /* overlay a shareable mapping on the backing device or inode 729 * if possible - used for chardevs, ramfs/tmpfs/shmfs and 730 * romfs/cramfs */ 731 if (flags & MAP_SHARED) 732 vm_flags |= VM_MAYSHARE | VM_SHARED; 733 else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0) 734 vm_flags |= VM_MAYSHARE; 735 } 736 737 /* refuse to let anyone share private mappings with this process if 738 * it's being traced - otherwise breakpoints set in it may interfere 739 * with another untraced process 740 */ 741 if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED)) 742 vm_flags &= ~VM_MAYSHARE; 743 744 return vm_flags; 745 } 746 747 /* 748 * set up a shared mapping on a file 749 */ 750 static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) 751 { 752 int ret; 753 754 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 755 if (ret != -ENOSYS) 756 return ret; 757 758 /* getting an ENOSYS error indicates that direct mmap isn't 759 * possible (as opposed to tried but failed) so we'll fall 760 * through to making a private copy of the data and mapping 761 * that if we can */ 762 return -ENODEV; 763 } 764 765 /* 766 * set up a private mapping or an anonymous shared mapping 767 */ 768 static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) 769 { 770 void *base; 771 int ret; 772 773 /* invoke the file's mapping function so that it can keep track of 774 * shared mappings on devices or memory 775 * - VM_MAYSHARE will be set if it may attempt to share 776 */ 777 if (vma->vm_file) { 778 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 779 if (ret != -ENOSYS) { 780 /* shouldn't return success if we're not sharing */ 781 BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); 782 return ret; /* success or a real error */ 783 } 784 785 /* getting an ENOSYS error indicates that direct mmap isn't 786 * possible (as opposed to tried but failed) so we'll try to 787 * make a private copy of the data and map that instead */ 788 } 789 790 /* allocate some memory to hold the mapping 791 * - note that this may not return a page-aligned address if the object 792 * we're allocating is smaller than a page 793 */ 794 base = kmalloc(len, GFP_KERNEL|__GFP_COMP); 795 if (!base) 796 goto enomem; 797 798 vma->vm_start = (unsigned long) base; 799 vma->vm_end = vma->vm_start + len; 800 vma->vm_flags |= VM_MAPPED_COPY; 801 802 #ifdef WARN_ON_SLACK 803 if (len + WARN_ON_SLACK <= kobjsize(result)) 804 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", 805 len, current->pid, kobjsize(result) - len); 806 #endif 807 808 if (vma->vm_file) { 809 /* read the contents of a file into the copy */ 810 mm_segment_t old_fs; 811 loff_t fpos; 812 813 fpos = vma->vm_pgoff; 814 fpos <<= PAGE_SHIFT; 815 816 old_fs = get_fs(); 817 set_fs(KERNEL_DS); 818 ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); 819 set_fs(old_fs); 820 821 if (ret < 0) 822 goto error_free; 823 824 /* clear the last little bit */ 825 if (ret < len) 826 memset(base + ret, 0, len - ret); 827 828 } else { 829 /* if it's an anonymous mapping, then just clear it */ 830 memset(base, 0, len); 831 } 832 833 return 0; 834 835 error_free: 836 kfree(base); 837 vma->vm_start = 0; 838 return ret; 839 840 enomem: 841 printk("Allocation of length %lu from process %d failed\n", 842 len, current->pid); 843 show_free_areas(); 844 return -ENOMEM; 845 } 846 847 /* 848 * handle mapping creation for uClinux 849 */ 850 unsigned long do_mmap_pgoff(struct file *file, 851 unsigned long addr, 852 unsigned long len, 853 unsigned long prot, 854 unsigned long flags, 855 unsigned long pgoff) 856 { 857 struct vm_list_struct *vml = NULL; 858 struct vm_area_struct *vma = NULL; 859 struct rb_node *rb; 860 unsigned long capabilities, vm_flags; 861 void *result; 862 int ret; 863 864 if (!(flags & MAP_FIXED)) 865 addr = round_hint_to_min(addr); 866 867 /* decide whether we should attempt the mapping, and if so what sort of 868 * mapping */ 869 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 870 &capabilities); 871 if (ret < 0) 872 return ret; 873 874 /* we've determined that we can make the mapping, now translate what we 875 * now know into VMA flags */ 876 vm_flags = determine_vm_flags(file, prot, flags, capabilities); 877 878 /* we're going to need to record the mapping if it works */ 879 vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); 880 if (!vml) 881 goto error_getting_vml; 882 883 down_write(&nommu_vma_sem); 884 885 /* if we want to share, we need to check for VMAs created by other 886 * mmap() calls that overlap with our proposed mapping 887 * - we can only share with an exact match on most regular files 888 * - shared mappings on character devices and memory backed files are 889 * permitted to overlap inexactly as far as we are concerned for in 890 * these cases, sharing is handled in the driver or filesystem rather 891 * than here 892 */ 893 if (vm_flags & VM_MAYSHARE) { 894 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 895 unsigned long vmpglen; 896 897 /* suppress VMA sharing for shared regions */ 898 if (vm_flags & VM_SHARED && 899 capabilities & BDI_CAP_MAP_DIRECT) 900 goto dont_share_VMAs; 901 902 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { 903 vma = rb_entry(rb, struct vm_area_struct, vm_rb); 904 905 if (!(vma->vm_flags & VM_MAYSHARE)) 906 continue; 907 908 /* search for overlapping mappings on the same file */ 909 if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) 910 continue; 911 912 if (vma->vm_pgoff >= pgoff + pglen) 913 continue; 914 915 vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; 916 vmpglen >>= PAGE_SHIFT; 917 if (pgoff >= vma->vm_pgoff + vmpglen) 918 continue; 919 920 /* handle inexactly overlapping matches between mappings */ 921 if (vma->vm_pgoff != pgoff || vmpglen != pglen) { 922 if (!(capabilities & BDI_CAP_MAP_DIRECT)) 923 goto sharing_violation; 924 continue; 925 } 926 927 /* we've found a VMA we can share */ 928 atomic_inc(&vma->vm_usage); 929 930 vml->vma = vma; 931 result = (void *) vma->vm_start; 932 goto shared; 933 } 934 935 dont_share_VMAs: 936 vma = NULL; 937 938 /* obtain the address at which to make a shared mapping 939 * - this is the hook for quasi-memory character devices to 940 * tell us the location of a shared mapping 941 */ 942 if (file && file->f_op->get_unmapped_area) { 943 addr = file->f_op->get_unmapped_area(file, addr, len, 944 pgoff, flags); 945 if (IS_ERR((void *) addr)) { 946 ret = addr; 947 if (ret != (unsigned long) -ENOSYS) 948 goto error; 949 950 /* the driver refused to tell us where to site 951 * the mapping so we'll have to attempt to copy 952 * it */ 953 ret = (unsigned long) -ENODEV; 954 if (!(capabilities & BDI_CAP_MAP_COPY)) 955 goto error; 956 957 capabilities &= ~BDI_CAP_MAP_DIRECT; 958 } 959 } 960 } 961 962 /* we're going to need a VMA struct as well */ 963 vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); 964 if (!vma) 965 goto error_getting_vma; 966 967 INIT_LIST_HEAD(&vma->anon_vma_node); 968 atomic_set(&vma->vm_usage, 1); 969 if (file) { 970 get_file(file); 971 if (vm_flags & VM_EXECUTABLE) { 972 added_exe_file_vma(current->mm); 973 vma->vm_mm = current->mm; 974 } 975 } 976 vma->vm_file = file; 977 vma->vm_flags = vm_flags; 978 vma->vm_start = addr; 979 vma->vm_end = addr + len; 980 vma->vm_pgoff = pgoff; 981 982 vml->vma = vma; 983 984 /* set up the mapping */ 985 if (file && vma->vm_flags & VM_SHARED) 986 ret = do_mmap_shared_file(vma, len); 987 else 988 ret = do_mmap_private(vma, len); 989 if (ret < 0) 990 goto error; 991 992 /* okay... we have a mapping; now we have to register it */ 993 result = (void *) vma->vm_start; 994 995 if (vma->vm_flags & VM_MAPPED_COPY) { 996 realalloc += kobjsize(result); 997 askedalloc += len; 998 } 999 1000 realalloc += kobjsize(vma); 1001 askedalloc += sizeof(*vma); 1002 1003 current->mm->total_vm += len >> PAGE_SHIFT; 1004 1005 add_nommu_vma(vma); 1006 1007 shared: 1008 realalloc += kobjsize(vml); 1009 askedalloc += sizeof(*vml); 1010 1011 add_vma_to_mm(current->mm, vml); 1012 1013 up_write(&nommu_vma_sem); 1014 1015 if (prot & PROT_EXEC) 1016 flush_icache_range((unsigned long) result, 1017 (unsigned long) result + len); 1018 1019 #ifdef DEBUG 1020 printk("do_mmap:\n"); 1021 show_process_blocks(); 1022 #endif 1023 1024 return (unsigned long) result; 1025 1026 error: 1027 up_write(&nommu_vma_sem); 1028 kfree(vml); 1029 if (vma) { 1030 if (vma->vm_file) { 1031 fput(vma->vm_file); 1032 if (vma->vm_flags & VM_EXECUTABLE) 1033 removed_exe_file_vma(vma->vm_mm); 1034 } 1035 kfree(vma); 1036 } 1037 return ret; 1038 1039 sharing_violation: 1040 up_write(&nommu_vma_sem); 1041 printk("Attempt to share mismatched mappings\n"); 1042 kfree(vml); 1043 return -EINVAL; 1044 1045 error_getting_vma: 1046 up_write(&nommu_vma_sem); 1047 kfree(vml); 1048 printk("Allocation of vma for %lu byte allocation from process %d failed\n", 1049 len, current->pid); 1050 show_free_areas(); 1051 return -ENOMEM; 1052 1053 error_getting_vml: 1054 printk("Allocation of vml for %lu byte allocation from process %d failed\n", 1055 len, current->pid); 1056 show_free_areas(); 1057 return -ENOMEM; 1058 } 1059 EXPORT_SYMBOL(do_mmap_pgoff); 1060 1061 /* 1062 * handle mapping disposal for uClinux 1063 */ 1064 static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) 1065 { 1066 if (vma) { 1067 down_write(&nommu_vma_sem); 1068 1069 if (atomic_dec_and_test(&vma->vm_usage)) { 1070 delete_nommu_vma(vma); 1071 1072 if (vma->vm_ops && vma->vm_ops->close) 1073 vma->vm_ops->close(vma); 1074 1075 /* IO memory and memory shared directly out of the pagecache from 1076 * ramfs/tmpfs mustn't be released here */ 1077 if (vma->vm_flags & VM_MAPPED_COPY) { 1078 realalloc -= kobjsize((void *) vma->vm_start); 1079 askedalloc -= vma->vm_end - vma->vm_start; 1080 kfree((void *) vma->vm_start); 1081 } 1082 1083 realalloc -= kobjsize(vma); 1084 askedalloc -= sizeof(*vma); 1085 1086 if (vma->vm_file) { 1087 fput(vma->vm_file); 1088 if (vma->vm_flags & VM_EXECUTABLE) 1089 removed_exe_file_vma(mm); 1090 } 1091 kfree(vma); 1092 } 1093 1094 up_write(&nommu_vma_sem); 1095 } 1096 } 1097 1098 /* 1099 * release a mapping 1100 * - under NOMMU conditions the parameters must match exactly to the mapping to 1101 * be removed 1102 */ 1103 int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) 1104 { 1105 struct vm_list_struct *vml, **parent; 1106 unsigned long end = addr + len; 1107 1108 #ifdef DEBUG 1109 printk("do_munmap:\n"); 1110 #endif 1111 1112 for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { 1113 if ((*parent)->vma->vm_start > addr) 1114 break; 1115 if ((*parent)->vma->vm_start == addr && 1116 ((len == 0) || ((*parent)->vma->vm_end == end))) 1117 goto found; 1118 } 1119 1120 printk("munmap of non-mmaped memory by process %d (%s): %p\n", 1121 current->pid, current->comm, (void *) addr); 1122 return -EINVAL; 1123 1124 found: 1125 vml = *parent; 1126 1127 put_vma(mm, vml->vma); 1128 1129 *parent = vml->next; 1130 realalloc -= kobjsize(vml); 1131 askedalloc -= sizeof(*vml); 1132 kfree(vml); 1133 1134 update_hiwater_vm(mm); 1135 mm->total_vm -= len >> PAGE_SHIFT; 1136 1137 #ifdef DEBUG 1138 show_process_blocks(); 1139 #endif 1140 1141 return 0; 1142 } 1143 EXPORT_SYMBOL(do_munmap); 1144 1145 asmlinkage long sys_munmap(unsigned long addr, size_t len) 1146 { 1147 int ret; 1148 struct mm_struct *mm = current->mm; 1149 1150 down_write(&mm->mmap_sem); 1151 ret = do_munmap(mm, addr, len); 1152 up_write(&mm->mmap_sem); 1153 return ret; 1154 } 1155 1156 /* 1157 * Release all mappings 1158 */ 1159 void exit_mmap(struct mm_struct * mm) 1160 { 1161 struct vm_list_struct *tmp; 1162 1163 if (mm) { 1164 #ifdef DEBUG 1165 printk("Exit_mmap:\n"); 1166 #endif 1167 1168 mm->total_vm = 0; 1169 1170 while ((tmp = mm->context.vmlist)) { 1171 mm->context.vmlist = tmp->next; 1172 put_vma(mm, tmp->vma); 1173 1174 realalloc -= kobjsize(tmp); 1175 askedalloc -= sizeof(*tmp); 1176 kfree(tmp); 1177 } 1178 1179 #ifdef DEBUG 1180 show_process_blocks(); 1181 #endif 1182 } 1183 } 1184 1185 unsigned long do_brk(unsigned long addr, unsigned long len) 1186 { 1187 return -ENOMEM; 1188 } 1189 1190 /* 1191 * expand (or shrink) an existing mapping, potentially moving it at the same 1192 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1193 * 1194 * under NOMMU conditions, we only permit changing a mapping's size, and only 1195 * as long as it stays within the hole allocated by the kmalloc() call in 1196 * do_mmap_pgoff() and the block is not shareable 1197 * 1198 * MREMAP_FIXED is not supported under NOMMU conditions 1199 */ 1200 unsigned long do_mremap(unsigned long addr, 1201 unsigned long old_len, unsigned long new_len, 1202 unsigned long flags, unsigned long new_addr) 1203 { 1204 struct vm_area_struct *vma; 1205 1206 /* insanity checks first */ 1207 if (new_len == 0) 1208 return (unsigned long) -EINVAL; 1209 1210 if (flags & MREMAP_FIXED && new_addr != addr) 1211 return (unsigned long) -EINVAL; 1212 1213 vma = find_vma_exact(current->mm, addr); 1214 if (!vma) 1215 return (unsigned long) -EINVAL; 1216 1217 if (vma->vm_end != vma->vm_start + old_len) 1218 return (unsigned long) -EFAULT; 1219 1220 if (vma->vm_flags & VM_MAYSHARE) 1221 return (unsigned long) -EPERM; 1222 1223 if (new_len > kobjsize((void *) addr)) 1224 return (unsigned long) -ENOMEM; 1225 1226 /* all checks complete - do it */ 1227 vma->vm_end = vma->vm_start + new_len; 1228 1229 askedalloc -= old_len; 1230 askedalloc += new_len; 1231 1232 return vma->vm_start; 1233 } 1234 EXPORT_SYMBOL(do_mremap); 1235 1236 asmlinkage unsigned long sys_mremap(unsigned long addr, 1237 unsigned long old_len, unsigned long new_len, 1238 unsigned long flags, unsigned long new_addr) 1239 { 1240 unsigned long ret; 1241 1242 down_write(¤t->mm->mmap_sem); 1243 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1244 up_write(¤t->mm->mmap_sem); 1245 return ret; 1246 } 1247 1248 struct page *follow_page(struct vm_area_struct *vma, unsigned long address, 1249 unsigned int foll_flags) 1250 { 1251 return NULL; 1252 } 1253 1254 int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, 1255 unsigned long to, unsigned long size, pgprot_t prot) 1256 { 1257 vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; 1258 return 0; 1259 } 1260 EXPORT_SYMBOL(remap_pfn_range); 1261 1262 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1263 unsigned long pgoff) 1264 { 1265 unsigned int size = vma->vm_end - vma->vm_start; 1266 1267 if (!(vma->vm_flags & VM_USERMAP)) 1268 return -EINVAL; 1269 1270 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1271 vma->vm_end = vma->vm_start + size; 1272 1273 return 0; 1274 } 1275 EXPORT_SYMBOL(remap_vmalloc_range); 1276 1277 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 1278 { 1279 } 1280 1281 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, 1282 unsigned long len, unsigned long pgoff, unsigned long flags) 1283 { 1284 return -ENOMEM; 1285 } 1286 1287 void arch_unmap_area(struct mm_struct *mm, unsigned long addr) 1288 { 1289 } 1290 1291 void unmap_mapping_range(struct address_space *mapping, 1292 loff_t const holebegin, loff_t const holelen, 1293 int even_cows) 1294 { 1295 } 1296 EXPORT_SYMBOL(unmap_mapping_range); 1297 1298 /* 1299 * ask for an unmapped area at which to create a mapping on a file 1300 */ 1301 unsigned long get_unmapped_area(struct file *file, unsigned long addr, 1302 unsigned long len, unsigned long pgoff, 1303 unsigned long flags) 1304 { 1305 unsigned long (*get_area)(struct file *, unsigned long, unsigned long, 1306 unsigned long, unsigned long); 1307 1308 get_area = current->mm->get_unmapped_area; 1309 if (file && file->f_op && file->f_op->get_unmapped_area) 1310 get_area = file->f_op->get_unmapped_area; 1311 1312 if (!get_area) 1313 return -ENOSYS; 1314 1315 return get_area(file, addr, len, pgoff, flags); 1316 } 1317 EXPORT_SYMBOL(get_unmapped_area); 1318 1319 /* 1320 * Check that a process has enough memory to allocate a new virtual 1321 * mapping. 0 means there is enough memory for the allocation to 1322 * succeed and -ENOMEM implies there is not. 1323 * 1324 * We currently support three overcommit policies, which are set via the 1325 * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 1326 * 1327 * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 1328 * Additional code 2002 Jul 20 by Robert Love. 1329 * 1330 * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 1331 * 1332 * Note this is a helper function intended to be used by LSMs which 1333 * wish to use this logic. 1334 */ 1335 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 1336 { 1337 unsigned long free, allowed; 1338 1339 vm_acct_memory(pages); 1340 1341 /* 1342 * Sometimes we want to use more memory than we have 1343 */ 1344 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 1345 return 0; 1346 1347 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 1348 unsigned long n; 1349 1350 free = global_page_state(NR_FILE_PAGES); 1351 free += nr_swap_pages; 1352 1353 /* 1354 * Any slabs which are created with the 1355 * SLAB_RECLAIM_ACCOUNT flag claim to have contents 1356 * which are reclaimable, under pressure. The dentry 1357 * cache and most inode caches should fall into this 1358 */ 1359 free += global_page_state(NR_SLAB_RECLAIMABLE); 1360 1361 /* 1362 * Leave the last 3% for root 1363 */ 1364 if (!cap_sys_admin) 1365 free -= free / 32; 1366 1367 if (free > pages) 1368 return 0; 1369 1370 /* 1371 * nr_free_pages() is very expensive on large systems, 1372 * only call if we're about to fail. 1373 */ 1374 n = nr_free_pages(); 1375 1376 /* 1377 * Leave reserved pages. The pages are not for anonymous pages. 1378 */ 1379 if (n <= totalreserve_pages) 1380 goto error; 1381 else 1382 n -= totalreserve_pages; 1383 1384 /* 1385 * Leave the last 3% for root 1386 */ 1387 if (!cap_sys_admin) 1388 n -= n / 32; 1389 free += n; 1390 1391 if (free > pages) 1392 return 0; 1393 1394 goto error; 1395 } 1396 1397 allowed = totalram_pages * sysctl_overcommit_ratio / 100; 1398 /* 1399 * Leave the last 3% for root 1400 */ 1401 if (!cap_sys_admin) 1402 allowed -= allowed / 32; 1403 allowed += total_swap_pages; 1404 1405 /* Don't let a single process grow too big: 1406 leave 3% of the size of this process for other processes */ 1407 allowed -= current->mm->total_vm / 32; 1408 1409 /* 1410 * cast `allowed' as a signed long because vm_committed_space 1411 * sometimes has a negative value 1412 */ 1413 if (atomic_read(&vm_committed_space) < (long)allowed) 1414 return 0; 1415 error: 1416 vm_unacct_memory(pages); 1417 1418 return -ENOMEM; 1419 } 1420 1421 int in_gate_area_no_task(unsigned long addr) 1422 { 1423 return 0; 1424 } 1425 1426 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1427 { 1428 BUG(); 1429 return 0; 1430 } 1431 EXPORT_SYMBOL(filemap_fault); 1432 1433 /* 1434 * Access another process' address space. 1435 * - source/target buffer must be kernel space 1436 */ 1437 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) 1438 { 1439 struct vm_area_struct *vma; 1440 struct mm_struct *mm; 1441 1442 if (addr + len < addr) 1443 return 0; 1444 1445 mm = get_task_mm(tsk); 1446 if (!mm) 1447 return 0; 1448 1449 down_read(&mm->mmap_sem); 1450 1451 /* the access must start within one of the target process's mappings */ 1452 vma = find_vma(mm, addr); 1453 if (vma) { 1454 /* don't overrun this mapping */ 1455 if (addr + len >= vma->vm_end) 1456 len = vma->vm_end - addr; 1457 1458 /* only read or write mappings where it is permitted */ 1459 if (write && vma->vm_flags & VM_MAYWRITE) 1460 len -= copy_to_user((void *) addr, buf, len); 1461 else if (!write && vma->vm_flags & VM_MAYREAD) 1462 len -= copy_from_user(buf, (void *) addr, len); 1463 else 1464 len = 0; 1465 } else { 1466 len = 0; 1467 } 1468 1469 up_read(&mm->mmap_sem); 1470 mmput(mm); 1471 return len; 1472 } 1473