1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/mm/nommu.c 4 * 5 * Replacement code for mm functions to support CPU's that don't 6 * have any form of memory management unit (thus no virtual memory). 7 * 8 * See Documentation/nommu-mmap.txt 9 * 10 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 11 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 12 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 13 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 14 * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/export.h> 20 #include <linux/mm.h> 21 #include <linux/sched/mm.h> 22 #include <linux/vmacache.h> 23 #include <linux/mman.h> 24 #include <linux/swap.h> 25 #include <linux/file.h> 26 #include <linux/highmem.h> 27 #include <linux/pagemap.h> 28 #include <linux/slab.h> 29 #include <linux/vmalloc.h> 30 #include <linux/blkdev.h> 31 #include <linux/backing-dev.h> 32 #include <linux/compiler.h> 33 #include <linux/mount.h> 34 #include <linux/personality.h> 35 #include <linux/security.h> 36 #include <linux/syscalls.h> 37 #include <linux/audit.h> 38 #include <linux/printk.h> 39 40 #include <linux/uaccess.h> 41 #include <asm/tlb.h> 42 #include <asm/tlbflush.h> 43 #include <asm/mmu_context.h> 44 #include "internal.h" 45 46 void *high_memory; 47 EXPORT_SYMBOL(high_memory); 48 struct page *mem_map; 49 unsigned long max_mapnr; 50 EXPORT_SYMBOL(max_mapnr); 51 unsigned long highest_memmap_pfn; 52 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 53 int heap_stack_gap = 0; 54 55 atomic_long_t mmap_pages_allocated; 56 57 EXPORT_SYMBOL(mem_map); 58 59 /* list of mapped, potentially shareable regions */ 60 static struct kmem_cache *vm_region_jar; 61 struct rb_root nommu_region_tree = RB_ROOT; 62 DECLARE_RWSEM(nommu_region_sem); 63 64 const struct vm_operations_struct generic_file_vm_ops = { 65 }; 66 67 /* 68 * Return the total memory allocated for this pointer, not 69 * just what the caller asked for. 70 * 71 * Doesn't have to be accurate, i.e. may have races. 72 */ 73 unsigned int kobjsize(const void *objp) 74 { 75 struct page *page; 76 77 /* 78 * If the object we have should not have ksize performed on it, 79 * return size of 0 80 */ 81 if (!objp || !virt_addr_valid(objp)) 82 return 0; 83 84 page = virt_to_head_page(objp); 85 86 /* 87 * If the allocator sets PageSlab, we know the pointer came from 88 * kmalloc(). 89 */ 90 if (PageSlab(page)) 91 return ksize(objp); 92 93 /* 94 * If it's not a compound page, see if we have a matching VMA 95 * region. This test is intentionally done in reverse order, 96 * so if there's no VMA, we still fall through and hand back 97 * PAGE_SIZE for 0-order pages. 98 */ 99 if (!PageCompound(page)) { 100 struct vm_area_struct *vma; 101 102 vma = find_vma(current->mm, (unsigned long)objp); 103 if (vma) 104 return vma->vm_end - vma->vm_start; 105 } 106 107 /* 108 * The ksize() function is only guaranteed to work for pointers 109 * returned by kmalloc(). So handle arbitrary pointers here. 110 */ 111 return page_size(page); 112 } 113 114 /** 115 * follow_pfn - look up PFN at a user virtual address 116 * @vma: memory mapping 117 * @address: user virtual address 118 * @pfn: location to store found PFN 119 * 120 * Only IO mappings and raw PFN mappings are allowed. 121 * 122 * Returns zero and the pfn at @pfn on success, -ve otherwise. 123 */ 124 int follow_pfn(struct vm_area_struct *vma, unsigned long address, 125 unsigned long *pfn) 126 { 127 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) 128 return -EINVAL; 129 130 *pfn = address >> PAGE_SHIFT; 131 return 0; 132 } 133 EXPORT_SYMBOL(follow_pfn); 134 135 LIST_HEAD(vmap_area_list); 136 137 void vfree(const void *addr) 138 { 139 kfree(addr); 140 } 141 EXPORT_SYMBOL(vfree); 142 143 void *__vmalloc(unsigned long size, gfp_t gfp_mask) 144 { 145 /* 146 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 147 * returns only a logical address. 148 */ 149 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 150 } 151 EXPORT_SYMBOL(__vmalloc); 152 153 void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) 154 { 155 return __vmalloc(size, flags); 156 } 157 158 static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) 159 { 160 void *ret; 161 162 ret = __vmalloc(size, flags); 163 if (ret) { 164 struct vm_area_struct *vma; 165 166 down_write(¤t->mm->mmap_sem); 167 vma = find_vma(current->mm, (unsigned long)ret); 168 if (vma) 169 vma->vm_flags |= VM_USERMAP; 170 up_write(¤t->mm->mmap_sem); 171 } 172 173 return ret; 174 } 175 176 void *vmalloc_user(unsigned long size) 177 { 178 return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO); 179 } 180 EXPORT_SYMBOL(vmalloc_user); 181 182 void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) 183 { 184 return __vmalloc_user_flags(size, flags | __GFP_ZERO); 185 } 186 EXPORT_SYMBOL(vmalloc_user_node_flags); 187 188 struct page *vmalloc_to_page(const void *addr) 189 { 190 return virt_to_page(addr); 191 } 192 EXPORT_SYMBOL(vmalloc_to_page); 193 194 unsigned long vmalloc_to_pfn(const void *addr) 195 { 196 return page_to_pfn(virt_to_page(addr)); 197 } 198 EXPORT_SYMBOL(vmalloc_to_pfn); 199 200 long vread(char *buf, char *addr, unsigned long count) 201 { 202 /* Don't allow overflow */ 203 if ((unsigned long) buf + count < count) 204 count = -(unsigned long) buf; 205 206 memcpy(buf, addr, count); 207 return count; 208 } 209 210 long vwrite(char *buf, char *addr, unsigned long count) 211 { 212 /* Don't allow overflow */ 213 if ((unsigned long) addr + count < count) 214 count = -(unsigned long) addr; 215 216 memcpy(addr, buf, count); 217 return count; 218 } 219 220 /* 221 * vmalloc - allocate virtually contiguous memory 222 * 223 * @size: allocation size 224 * 225 * Allocate enough pages to cover @size from the page level 226 * allocator and map them into contiguous kernel virtual space. 227 * 228 * For tight control over page level allocator and protection flags 229 * use __vmalloc() instead. 230 */ 231 void *vmalloc(unsigned long size) 232 { 233 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM); 234 } 235 EXPORT_SYMBOL(vmalloc); 236 237 /* 238 * vzalloc - allocate virtually contiguous memory with zero fill 239 * 240 * @size: allocation size 241 * 242 * Allocate enough pages to cover @size from the page level 243 * allocator and map them into contiguous kernel virtual space. 244 * The memory allocated is set to zero. 245 * 246 * For tight control over page level allocator and protection flags 247 * use __vmalloc() instead. 248 */ 249 void *vzalloc(unsigned long size) 250 { 251 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); 252 } 253 EXPORT_SYMBOL(vzalloc); 254 255 /** 256 * vmalloc_node - allocate memory on a specific node 257 * @size: allocation size 258 * @node: numa node 259 * 260 * Allocate enough pages to cover @size from the page level 261 * allocator and map them into contiguous kernel virtual space. 262 * 263 * For tight control over page level allocator and protection flags 264 * use __vmalloc() instead. 265 */ 266 void *vmalloc_node(unsigned long size, int node) 267 { 268 return vmalloc(size); 269 } 270 EXPORT_SYMBOL(vmalloc_node); 271 272 /** 273 * vzalloc_node - allocate memory on a specific node with zero fill 274 * @size: allocation size 275 * @node: numa node 276 * 277 * Allocate enough pages to cover @size from the page level 278 * allocator and map them into contiguous kernel virtual space. 279 * The memory allocated is set to zero. 280 * 281 * For tight control over page level allocator and protection flags 282 * use __vmalloc() instead. 283 */ 284 void *vzalloc_node(unsigned long size, int node) 285 { 286 return vzalloc(size); 287 } 288 EXPORT_SYMBOL(vzalloc_node); 289 290 /** 291 * vmalloc_exec - allocate virtually contiguous, executable memory 292 * @size: allocation size 293 * 294 * Kernel-internal function to allocate enough pages to cover @size 295 * the page level allocator and map them into contiguous and 296 * executable kernel virtual space. 297 * 298 * For tight control over page level allocator and protection flags 299 * use __vmalloc() instead. 300 */ 301 302 void *vmalloc_exec(unsigned long size) 303 { 304 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM); 305 } 306 307 /** 308 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 309 * @size: allocation size 310 * 311 * Allocate enough 32bit PA addressable pages to cover @size from the 312 * page level allocator and map them into contiguous kernel virtual space. 313 */ 314 void *vmalloc_32(unsigned long size) 315 { 316 return __vmalloc(size, GFP_KERNEL); 317 } 318 EXPORT_SYMBOL(vmalloc_32); 319 320 /** 321 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 322 * @size: allocation size 323 * 324 * The resulting memory area is 32bit addressable and zeroed so it can be 325 * mapped to userspace without leaking data. 326 * 327 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 328 * remap_vmalloc_range() are permissible. 329 */ 330 void *vmalloc_32_user(unsigned long size) 331 { 332 /* 333 * We'll have to sort out the ZONE_DMA bits for 64-bit, 334 * but for now this can simply use vmalloc_user() directly. 335 */ 336 return vmalloc_user(size); 337 } 338 EXPORT_SYMBOL(vmalloc_32_user); 339 340 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 341 { 342 BUG(); 343 return NULL; 344 } 345 EXPORT_SYMBOL(vmap); 346 347 void vunmap(const void *addr) 348 { 349 BUG(); 350 } 351 EXPORT_SYMBOL(vunmap); 352 353 void *vm_map_ram(struct page **pages, unsigned int count, int node) 354 { 355 BUG(); 356 return NULL; 357 } 358 EXPORT_SYMBOL(vm_map_ram); 359 360 void vm_unmap_ram(const void *mem, unsigned int count) 361 { 362 BUG(); 363 } 364 EXPORT_SYMBOL(vm_unmap_ram); 365 366 void vm_unmap_aliases(void) 367 { 368 } 369 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 370 371 /* 372 * Implement a stub for vmalloc_sync_[un]mapping() if the architecture 373 * chose not to have one. 374 */ 375 void __weak vmalloc_sync_mappings(void) 376 { 377 } 378 379 void __weak vmalloc_sync_unmappings(void) 380 { 381 } 382 383 struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) 384 { 385 BUG(); 386 return NULL; 387 } 388 EXPORT_SYMBOL_GPL(alloc_vm_area); 389 390 void free_vm_area(struct vm_struct *area) 391 { 392 BUG(); 393 } 394 EXPORT_SYMBOL_GPL(free_vm_area); 395 396 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 397 struct page *page) 398 { 399 return -EINVAL; 400 } 401 EXPORT_SYMBOL(vm_insert_page); 402 403 int vm_map_pages(struct vm_area_struct *vma, struct page **pages, 404 unsigned long num) 405 { 406 return -EINVAL; 407 } 408 EXPORT_SYMBOL(vm_map_pages); 409 410 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, 411 unsigned long num) 412 { 413 return -EINVAL; 414 } 415 EXPORT_SYMBOL(vm_map_pages_zero); 416 417 /* 418 * sys_brk() for the most part doesn't need the global kernel 419 * lock, except when an application is doing something nasty 420 * like trying to un-brk an area that has already been mapped 421 * to a regular file. in this case, the unmapping will need 422 * to invoke file system routines that need the global lock. 423 */ 424 SYSCALL_DEFINE1(brk, unsigned long, brk) 425 { 426 struct mm_struct *mm = current->mm; 427 428 if (brk < mm->start_brk || brk > mm->context.end_brk) 429 return mm->brk; 430 431 if (mm->brk == brk) 432 return mm->brk; 433 434 /* 435 * Always allow shrinking brk 436 */ 437 if (brk <= mm->brk) { 438 mm->brk = brk; 439 return brk; 440 } 441 442 /* 443 * Ok, looks good - let it rip. 444 */ 445 flush_icache_range(mm->brk, brk); 446 return mm->brk = brk; 447 } 448 449 /* 450 * initialise the percpu counter for VM and region record slabs 451 */ 452 void __init mmap_init(void) 453 { 454 int ret; 455 456 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 457 VM_BUG_ON(ret); 458 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); 459 } 460 461 /* 462 * validate the region tree 463 * - the caller must hold the region lock 464 */ 465 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 466 static noinline void validate_nommu_regions(void) 467 { 468 struct vm_region *region, *last; 469 struct rb_node *p, *lastp; 470 471 lastp = rb_first(&nommu_region_tree); 472 if (!lastp) 473 return; 474 475 last = rb_entry(lastp, struct vm_region, vm_rb); 476 BUG_ON(last->vm_end <= last->vm_start); 477 BUG_ON(last->vm_top < last->vm_end); 478 479 while ((p = rb_next(lastp))) { 480 region = rb_entry(p, struct vm_region, vm_rb); 481 last = rb_entry(lastp, struct vm_region, vm_rb); 482 483 BUG_ON(region->vm_end <= region->vm_start); 484 BUG_ON(region->vm_top < region->vm_end); 485 BUG_ON(region->vm_start < last->vm_top); 486 487 lastp = p; 488 } 489 } 490 #else 491 static void validate_nommu_regions(void) 492 { 493 } 494 #endif 495 496 /* 497 * add a region into the global tree 498 */ 499 static void add_nommu_region(struct vm_region *region) 500 { 501 struct vm_region *pregion; 502 struct rb_node **p, *parent; 503 504 validate_nommu_regions(); 505 506 parent = NULL; 507 p = &nommu_region_tree.rb_node; 508 while (*p) { 509 parent = *p; 510 pregion = rb_entry(parent, struct vm_region, vm_rb); 511 if (region->vm_start < pregion->vm_start) 512 p = &(*p)->rb_left; 513 else if (region->vm_start > pregion->vm_start) 514 p = &(*p)->rb_right; 515 else if (pregion == region) 516 return; 517 else 518 BUG(); 519 } 520 521 rb_link_node(®ion->vm_rb, parent, p); 522 rb_insert_color(®ion->vm_rb, &nommu_region_tree); 523 524 validate_nommu_regions(); 525 } 526 527 /* 528 * delete a region from the global tree 529 */ 530 static void delete_nommu_region(struct vm_region *region) 531 { 532 BUG_ON(!nommu_region_tree.rb_node); 533 534 validate_nommu_regions(); 535 rb_erase(®ion->vm_rb, &nommu_region_tree); 536 validate_nommu_regions(); 537 } 538 539 /* 540 * free a contiguous series of pages 541 */ 542 static void free_page_series(unsigned long from, unsigned long to) 543 { 544 for (; from < to; from += PAGE_SIZE) { 545 struct page *page = virt_to_page(from); 546 547 atomic_long_dec(&mmap_pages_allocated); 548 put_page(page); 549 } 550 } 551 552 /* 553 * release a reference to a region 554 * - the caller must hold the region semaphore for writing, which this releases 555 * - the region may not have been added to the tree yet, in which case vm_top 556 * will equal vm_start 557 */ 558 static void __put_nommu_region(struct vm_region *region) 559 __releases(nommu_region_sem) 560 { 561 BUG_ON(!nommu_region_tree.rb_node); 562 563 if (--region->vm_usage == 0) { 564 if (region->vm_top > region->vm_start) 565 delete_nommu_region(region); 566 up_write(&nommu_region_sem); 567 568 if (region->vm_file) 569 fput(region->vm_file); 570 571 /* IO memory and memory shared directly out of the pagecache 572 * from ramfs/tmpfs mustn't be released here */ 573 if (region->vm_flags & VM_MAPPED_COPY) 574 free_page_series(region->vm_start, region->vm_top); 575 kmem_cache_free(vm_region_jar, region); 576 } else { 577 up_write(&nommu_region_sem); 578 } 579 } 580 581 /* 582 * release a reference to a region 583 */ 584 static void put_nommu_region(struct vm_region *region) 585 { 586 down_write(&nommu_region_sem); 587 __put_nommu_region(region); 588 } 589 590 /* 591 * add a VMA into a process's mm_struct in the appropriate place in the list 592 * and tree and add to the address space's page tree also if not an anonymous 593 * page 594 * - should be called with mm->mmap_sem held writelocked 595 */ 596 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) 597 { 598 struct vm_area_struct *pvma, *prev; 599 struct address_space *mapping; 600 struct rb_node **p, *parent, *rb_prev; 601 602 BUG_ON(!vma->vm_region); 603 604 mm->map_count++; 605 vma->vm_mm = mm; 606 607 /* add the VMA to the mapping */ 608 if (vma->vm_file) { 609 mapping = vma->vm_file->f_mapping; 610 611 i_mmap_lock_write(mapping); 612 flush_dcache_mmap_lock(mapping); 613 vma_interval_tree_insert(vma, &mapping->i_mmap); 614 flush_dcache_mmap_unlock(mapping); 615 i_mmap_unlock_write(mapping); 616 } 617 618 /* add the VMA to the tree */ 619 parent = rb_prev = NULL; 620 p = &mm->mm_rb.rb_node; 621 while (*p) { 622 parent = *p; 623 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 624 625 /* sort by: start addr, end addr, VMA struct addr in that order 626 * (the latter is necessary as we may get identical VMAs) */ 627 if (vma->vm_start < pvma->vm_start) 628 p = &(*p)->rb_left; 629 else if (vma->vm_start > pvma->vm_start) { 630 rb_prev = parent; 631 p = &(*p)->rb_right; 632 } else if (vma->vm_end < pvma->vm_end) 633 p = &(*p)->rb_left; 634 else if (vma->vm_end > pvma->vm_end) { 635 rb_prev = parent; 636 p = &(*p)->rb_right; 637 } else if (vma < pvma) 638 p = &(*p)->rb_left; 639 else if (vma > pvma) { 640 rb_prev = parent; 641 p = &(*p)->rb_right; 642 } else 643 BUG(); 644 } 645 646 rb_link_node(&vma->vm_rb, parent, p); 647 rb_insert_color(&vma->vm_rb, &mm->mm_rb); 648 649 /* add VMA to the VMA list also */ 650 prev = NULL; 651 if (rb_prev) 652 prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); 653 654 __vma_link_list(mm, vma, prev); 655 } 656 657 /* 658 * delete a VMA from its owning mm_struct and address space 659 */ 660 static void delete_vma_from_mm(struct vm_area_struct *vma) 661 { 662 int i; 663 struct address_space *mapping; 664 struct mm_struct *mm = vma->vm_mm; 665 struct task_struct *curr = current; 666 667 mm->map_count--; 668 for (i = 0; i < VMACACHE_SIZE; i++) { 669 /* if the vma is cached, invalidate the entire cache */ 670 if (curr->vmacache.vmas[i] == vma) { 671 vmacache_invalidate(mm); 672 break; 673 } 674 } 675 676 /* remove the VMA from the mapping */ 677 if (vma->vm_file) { 678 mapping = vma->vm_file->f_mapping; 679 680 i_mmap_lock_write(mapping); 681 flush_dcache_mmap_lock(mapping); 682 vma_interval_tree_remove(vma, &mapping->i_mmap); 683 flush_dcache_mmap_unlock(mapping); 684 i_mmap_unlock_write(mapping); 685 } 686 687 /* remove from the MM's tree and list */ 688 rb_erase(&vma->vm_rb, &mm->mm_rb); 689 690 __vma_unlink_list(mm, vma); 691 } 692 693 /* 694 * destroy a VMA record 695 */ 696 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 697 { 698 if (vma->vm_ops && vma->vm_ops->close) 699 vma->vm_ops->close(vma); 700 if (vma->vm_file) 701 fput(vma->vm_file); 702 put_nommu_region(vma->vm_region); 703 vm_area_free(vma); 704 } 705 706 /* 707 * look up the first VMA in which addr resides, NULL if none 708 * - should be called with mm->mmap_sem at least held readlocked 709 */ 710 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 711 { 712 struct vm_area_struct *vma; 713 714 /* check the cache first */ 715 vma = vmacache_find(mm, addr); 716 if (likely(vma)) 717 return vma; 718 719 /* trawl the list (there may be multiple mappings in which addr 720 * resides) */ 721 for (vma = mm->mmap; vma; vma = vma->vm_next) { 722 if (vma->vm_start > addr) 723 return NULL; 724 if (vma->vm_end > addr) { 725 vmacache_update(addr, vma); 726 return vma; 727 } 728 } 729 730 return NULL; 731 } 732 EXPORT_SYMBOL(find_vma); 733 734 /* 735 * find a VMA 736 * - we don't extend stack VMAs under NOMMU conditions 737 */ 738 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 739 { 740 return find_vma(mm, addr); 741 } 742 743 /* 744 * expand a stack to a given address 745 * - not supported under NOMMU conditions 746 */ 747 int expand_stack(struct vm_area_struct *vma, unsigned long address) 748 { 749 return -ENOMEM; 750 } 751 752 /* 753 * look up the first VMA exactly that exactly matches addr 754 * - should be called with mm->mmap_sem at least held readlocked 755 */ 756 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 757 unsigned long addr, 758 unsigned long len) 759 { 760 struct vm_area_struct *vma; 761 unsigned long end = addr + len; 762 763 /* check the cache first */ 764 vma = vmacache_find_exact(mm, addr, end); 765 if (vma) 766 return vma; 767 768 /* trawl the list (there may be multiple mappings in which addr 769 * resides) */ 770 for (vma = mm->mmap; vma; vma = vma->vm_next) { 771 if (vma->vm_start < addr) 772 continue; 773 if (vma->vm_start > addr) 774 return NULL; 775 if (vma->vm_end == end) { 776 vmacache_update(addr, vma); 777 return vma; 778 } 779 } 780 781 return NULL; 782 } 783 784 /* 785 * determine whether a mapping should be permitted and, if so, what sort of 786 * mapping we're capable of supporting 787 */ 788 static int validate_mmap_request(struct file *file, 789 unsigned long addr, 790 unsigned long len, 791 unsigned long prot, 792 unsigned long flags, 793 unsigned long pgoff, 794 unsigned long *_capabilities) 795 { 796 unsigned long capabilities, rlen; 797 int ret; 798 799 /* do the simple checks first */ 800 if (flags & MAP_FIXED) 801 return -EINVAL; 802 803 if ((flags & MAP_TYPE) != MAP_PRIVATE && 804 (flags & MAP_TYPE) != MAP_SHARED) 805 return -EINVAL; 806 807 if (!len) 808 return -EINVAL; 809 810 /* Careful about overflows.. */ 811 rlen = PAGE_ALIGN(len); 812 if (!rlen || rlen > TASK_SIZE) 813 return -ENOMEM; 814 815 /* offset overflow? */ 816 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 817 return -EOVERFLOW; 818 819 if (file) { 820 /* files must support mmap */ 821 if (!file->f_op->mmap) 822 return -ENODEV; 823 824 /* work out if what we've got could possibly be shared 825 * - we support chardevs that provide their own "memory" 826 * - we support files/blockdevs that are memory backed 827 */ 828 if (file->f_op->mmap_capabilities) { 829 capabilities = file->f_op->mmap_capabilities(file); 830 } else { 831 /* no explicit capabilities set, so assume some 832 * defaults */ 833 switch (file_inode(file)->i_mode & S_IFMT) { 834 case S_IFREG: 835 case S_IFBLK: 836 capabilities = NOMMU_MAP_COPY; 837 break; 838 839 case S_IFCHR: 840 capabilities = 841 NOMMU_MAP_DIRECT | 842 NOMMU_MAP_READ | 843 NOMMU_MAP_WRITE; 844 break; 845 846 default: 847 return -EINVAL; 848 } 849 } 850 851 /* eliminate any capabilities that we can't support on this 852 * device */ 853 if (!file->f_op->get_unmapped_area) 854 capabilities &= ~NOMMU_MAP_DIRECT; 855 if (!(file->f_mode & FMODE_CAN_READ)) 856 capabilities &= ~NOMMU_MAP_COPY; 857 858 /* The file shall have been opened with read permission. */ 859 if (!(file->f_mode & FMODE_READ)) 860 return -EACCES; 861 862 if (flags & MAP_SHARED) { 863 /* do checks for writing, appending and locking */ 864 if ((prot & PROT_WRITE) && 865 !(file->f_mode & FMODE_WRITE)) 866 return -EACCES; 867 868 if (IS_APPEND(file_inode(file)) && 869 (file->f_mode & FMODE_WRITE)) 870 return -EACCES; 871 872 if (locks_verify_locked(file)) 873 return -EAGAIN; 874 875 if (!(capabilities & NOMMU_MAP_DIRECT)) 876 return -ENODEV; 877 878 /* we mustn't privatise shared mappings */ 879 capabilities &= ~NOMMU_MAP_COPY; 880 } else { 881 /* we're going to read the file into private memory we 882 * allocate */ 883 if (!(capabilities & NOMMU_MAP_COPY)) 884 return -ENODEV; 885 886 /* we don't permit a private writable mapping to be 887 * shared with the backing device */ 888 if (prot & PROT_WRITE) 889 capabilities &= ~NOMMU_MAP_DIRECT; 890 } 891 892 if (capabilities & NOMMU_MAP_DIRECT) { 893 if (((prot & PROT_READ) && !(capabilities & NOMMU_MAP_READ)) || 894 ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) || 895 ((prot & PROT_EXEC) && !(capabilities & NOMMU_MAP_EXEC)) 896 ) { 897 capabilities &= ~NOMMU_MAP_DIRECT; 898 if (flags & MAP_SHARED) { 899 pr_warn("MAP_SHARED not completely supported on !MMU\n"); 900 return -EINVAL; 901 } 902 } 903 } 904 905 /* handle executable mappings and implied executable 906 * mappings */ 907 if (path_noexec(&file->f_path)) { 908 if (prot & PROT_EXEC) 909 return -EPERM; 910 } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 911 /* handle implication of PROT_EXEC by PROT_READ */ 912 if (current->personality & READ_IMPLIES_EXEC) { 913 if (capabilities & NOMMU_MAP_EXEC) 914 prot |= PROT_EXEC; 915 } 916 } else if ((prot & PROT_READ) && 917 (prot & PROT_EXEC) && 918 !(capabilities & NOMMU_MAP_EXEC) 919 ) { 920 /* backing file is not executable, try to copy */ 921 capabilities &= ~NOMMU_MAP_DIRECT; 922 } 923 } else { 924 /* anonymous mappings are always memory backed and can be 925 * privately mapped 926 */ 927 capabilities = NOMMU_MAP_COPY; 928 929 /* handle PROT_EXEC implication by PROT_READ */ 930 if ((prot & PROT_READ) && 931 (current->personality & READ_IMPLIES_EXEC)) 932 prot |= PROT_EXEC; 933 } 934 935 /* allow the security API to have its say */ 936 ret = security_mmap_addr(addr); 937 if (ret < 0) 938 return ret; 939 940 /* looks okay */ 941 *_capabilities = capabilities; 942 return 0; 943 } 944 945 /* 946 * we've determined that we can make the mapping, now translate what we 947 * now know into VMA flags 948 */ 949 static unsigned long determine_vm_flags(struct file *file, 950 unsigned long prot, 951 unsigned long flags, 952 unsigned long capabilities) 953 { 954 unsigned long vm_flags; 955 956 vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); 957 /* vm_flags |= mm->def_flags; */ 958 959 if (!(capabilities & NOMMU_MAP_DIRECT)) { 960 /* attempt to share read-only copies of mapped file chunks */ 961 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 962 if (file && !(prot & PROT_WRITE)) 963 vm_flags |= VM_MAYSHARE; 964 } else { 965 /* overlay a shareable mapping on the backing device or inode 966 * if possible - used for chardevs, ramfs/tmpfs/shmfs and 967 * romfs/cramfs */ 968 vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS); 969 if (flags & MAP_SHARED) 970 vm_flags |= VM_SHARED; 971 } 972 973 /* refuse to let anyone share private mappings with this process if 974 * it's being traced - otherwise breakpoints set in it may interfere 975 * with another untraced process 976 */ 977 if ((flags & MAP_PRIVATE) && current->ptrace) 978 vm_flags &= ~VM_MAYSHARE; 979 980 return vm_flags; 981 } 982 983 /* 984 * set up a shared mapping on a file (the driver or filesystem provides and 985 * pins the storage) 986 */ 987 static int do_mmap_shared_file(struct vm_area_struct *vma) 988 { 989 int ret; 990 991 ret = call_mmap(vma->vm_file, vma); 992 if (ret == 0) { 993 vma->vm_region->vm_top = vma->vm_region->vm_end; 994 return 0; 995 } 996 if (ret != -ENOSYS) 997 return ret; 998 999 /* getting -ENOSYS indicates that direct mmap isn't possible (as 1000 * opposed to tried but failed) so we can only give a suitable error as 1001 * it's not possible to make a private copy if MAP_SHARED was given */ 1002 return -ENODEV; 1003 } 1004 1005 /* 1006 * set up a private mapping or an anonymous shared mapping 1007 */ 1008 static int do_mmap_private(struct vm_area_struct *vma, 1009 struct vm_region *region, 1010 unsigned long len, 1011 unsigned long capabilities) 1012 { 1013 unsigned long total, point; 1014 void *base; 1015 int ret, order; 1016 1017 /* invoke the file's mapping function so that it can keep track of 1018 * shared mappings on devices or memory 1019 * - VM_MAYSHARE will be set if it may attempt to share 1020 */ 1021 if (capabilities & NOMMU_MAP_DIRECT) { 1022 ret = call_mmap(vma->vm_file, vma); 1023 if (ret == 0) { 1024 /* shouldn't return success if we're not sharing */ 1025 BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 1026 vma->vm_region->vm_top = vma->vm_region->vm_end; 1027 return 0; 1028 } 1029 if (ret != -ENOSYS) 1030 return ret; 1031 1032 /* getting an ENOSYS error indicates that direct mmap isn't 1033 * possible (as opposed to tried but failed) so we'll try to 1034 * make a private copy of the data and map that instead */ 1035 } 1036 1037 1038 /* allocate some memory to hold the mapping 1039 * - note that this may not return a page-aligned address if the object 1040 * we're allocating is smaller than a page 1041 */ 1042 order = get_order(len); 1043 total = 1 << order; 1044 point = len >> PAGE_SHIFT; 1045 1046 /* we don't want to allocate a power-of-2 sized page set */ 1047 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) 1048 total = point; 1049 1050 base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); 1051 if (!base) 1052 goto enomem; 1053 1054 atomic_long_add(total, &mmap_pages_allocated); 1055 1056 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 1057 region->vm_start = (unsigned long) base; 1058 region->vm_end = region->vm_start + len; 1059 region->vm_top = region->vm_start + (total << PAGE_SHIFT); 1060 1061 vma->vm_start = region->vm_start; 1062 vma->vm_end = region->vm_start + len; 1063 1064 if (vma->vm_file) { 1065 /* read the contents of a file into the copy */ 1066 loff_t fpos; 1067 1068 fpos = vma->vm_pgoff; 1069 fpos <<= PAGE_SHIFT; 1070 1071 ret = kernel_read(vma->vm_file, base, len, &fpos); 1072 if (ret < 0) 1073 goto error_free; 1074 1075 /* clear the last little bit */ 1076 if (ret < len) 1077 memset(base + ret, 0, len - ret); 1078 1079 } else { 1080 vma_set_anonymous(vma); 1081 } 1082 1083 return 0; 1084 1085 error_free: 1086 free_page_series(region->vm_start, region->vm_top); 1087 region->vm_start = vma->vm_start = 0; 1088 region->vm_end = vma->vm_end = 0; 1089 region->vm_top = 0; 1090 return ret; 1091 1092 enomem: 1093 pr_err("Allocation of length %lu from process %d (%s) failed\n", 1094 len, current->pid, current->comm); 1095 show_free_areas(0, NULL); 1096 return -ENOMEM; 1097 } 1098 1099 /* 1100 * handle mapping creation for uClinux 1101 */ 1102 unsigned long do_mmap(struct file *file, 1103 unsigned long addr, 1104 unsigned long len, 1105 unsigned long prot, 1106 unsigned long flags, 1107 vm_flags_t vm_flags, 1108 unsigned long pgoff, 1109 unsigned long *populate, 1110 struct list_head *uf) 1111 { 1112 struct vm_area_struct *vma; 1113 struct vm_region *region; 1114 struct rb_node *rb; 1115 unsigned long capabilities, result; 1116 int ret; 1117 1118 *populate = 0; 1119 1120 /* decide whether we should attempt the mapping, and if so what sort of 1121 * mapping */ 1122 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1123 &capabilities); 1124 if (ret < 0) 1125 return ret; 1126 1127 /* we ignore the address hint */ 1128 addr = 0; 1129 len = PAGE_ALIGN(len); 1130 1131 /* we've determined that we can make the mapping, now translate what we 1132 * now know into VMA flags */ 1133 vm_flags |= determine_vm_flags(file, prot, flags, capabilities); 1134 1135 /* we're going to need to record the mapping */ 1136 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 1137 if (!region) 1138 goto error_getting_region; 1139 1140 vma = vm_area_alloc(current->mm); 1141 if (!vma) 1142 goto error_getting_vma; 1143 1144 region->vm_usage = 1; 1145 region->vm_flags = vm_flags; 1146 region->vm_pgoff = pgoff; 1147 1148 vma->vm_flags = vm_flags; 1149 vma->vm_pgoff = pgoff; 1150 1151 if (file) { 1152 region->vm_file = get_file(file); 1153 vma->vm_file = get_file(file); 1154 } 1155 1156 down_write(&nommu_region_sem); 1157 1158 /* if we want to share, we need to check for regions created by other 1159 * mmap() calls that overlap with our proposed mapping 1160 * - we can only share with a superset match on most regular files 1161 * - shared mappings on character devices and memory backed files are 1162 * permitted to overlap inexactly as far as we are concerned for in 1163 * these cases, sharing is handled in the driver or filesystem rather 1164 * than here 1165 */ 1166 if (vm_flags & VM_MAYSHARE) { 1167 struct vm_region *pregion; 1168 unsigned long pglen, rpglen, pgend, rpgend, start; 1169 1170 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1171 pgend = pgoff + pglen; 1172 1173 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 1174 pregion = rb_entry(rb, struct vm_region, vm_rb); 1175 1176 if (!(pregion->vm_flags & VM_MAYSHARE)) 1177 continue; 1178 1179 /* search for overlapping mappings on the same file */ 1180 if (file_inode(pregion->vm_file) != 1181 file_inode(file)) 1182 continue; 1183 1184 if (pregion->vm_pgoff >= pgend) 1185 continue; 1186 1187 rpglen = pregion->vm_end - pregion->vm_start; 1188 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 1189 rpgend = pregion->vm_pgoff + rpglen; 1190 if (pgoff >= rpgend) 1191 continue; 1192 1193 /* handle inexactly overlapping matches between 1194 * mappings */ 1195 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 1196 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 1197 /* new mapping is not a subset of the region */ 1198 if (!(capabilities & NOMMU_MAP_DIRECT)) 1199 goto sharing_violation; 1200 continue; 1201 } 1202 1203 /* we've found a region we can share */ 1204 pregion->vm_usage++; 1205 vma->vm_region = pregion; 1206 start = pregion->vm_start; 1207 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1208 vma->vm_start = start; 1209 vma->vm_end = start + len; 1210 1211 if (pregion->vm_flags & VM_MAPPED_COPY) 1212 vma->vm_flags |= VM_MAPPED_COPY; 1213 else { 1214 ret = do_mmap_shared_file(vma); 1215 if (ret < 0) { 1216 vma->vm_region = NULL; 1217 vma->vm_start = 0; 1218 vma->vm_end = 0; 1219 pregion->vm_usage--; 1220 pregion = NULL; 1221 goto error_just_free; 1222 } 1223 } 1224 fput(region->vm_file); 1225 kmem_cache_free(vm_region_jar, region); 1226 region = pregion; 1227 result = start; 1228 goto share; 1229 } 1230 1231 /* obtain the address at which to make a shared mapping 1232 * - this is the hook for quasi-memory character devices to 1233 * tell us the location of a shared mapping 1234 */ 1235 if (capabilities & NOMMU_MAP_DIRECT) { 1236 addr = file->f_op->get_unmapped_area(file, addr, len, 1237 pgoff, flags); 1238 if (IS_ERR_VALUE(addr)) { 1239 ret = addr; 1240 if (ret != -ENOSYS) 1241 goto error_just_free; 1242 1243 /* the driver refused to tell us where to site 1244 * the mapping so we'll have to attempt to copy 1245 * it */ 1246 ret = -ENODEV; 1247 if (!(capabilities & NOMMU_MAP_COPY)) 1248 goto error_just_free; 1249 1250 capabilities &= ~NOMMU_MAP_DIRECT; 1251 } else { 1252 vma->vm_start = region->vm_start = addr; 1253 vma->vm_end = region->vm_end = addr + len; 1254 } 1255 } 1256 } 1257 1258 vma->vm_region = region; 1259 1260 /* set up the mapping 1261 * - the region is filled in if NOMMU_MAP_DIRECT is still set 1262 */ 1263 if (file && vma->vm_flags & VM_SHARED) 1264 ret = do_mmap_shared_file(vma); 1265 else 1266 ret = do_mmap_private(vma, region, len, capabilities); 1267 if (ret < 0) 1268 goto error_just_free; 1269 add_nommu_region(region); 1270 1271 /* clear anonymous mappings that don't ask for uninitialized data */ 1272 if (!vma->vm_file && 1273 (!IS_ENABLED(CONFIG_MMAP_ALLOW_UNINITIALIZED) || 1274 !(flags & MAP_UNINITIALIZED))) 1275 memset((void *)region->vm_start, 0, 1276 region->vm_end - region->vm_start); 1277 1278 /* okay... we have a mapping; now we have to register it */ 1279 result = vma->vm_start; 1280 1281 current->mm->total_vm += len >> PAGE_SHIFT; 1282 1283 share: 1284 add_vma_to_mm(current->mm, vma); 1285 1286 /* we flush the region from the icache only when the first executable 1287 * mapping of it is made */ 1288 if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { 1289 flush_icache_range(region->vm_start, region->vm_end); 1290 region->vm_icache_flushed = true; 1291 } 1292 1293 up_write(&nommu_region_sem); 1294 1295 return result; 1296 1297 error_just_free: 1298 up_write(&nommu_region_sem); 1299 error: 1300 if (region->vm_file) 1301 fput(region->vm_file); 1302 kmem_cache_free(vm_region_jar, region); 1303 if (vma->vm_file) 1304 fput(vma->vm_file); 1305 vm_area_free(vma); 1306 return ret; 1307 1308 sharing_violation: 1309 up_write(&nommu_region_sem); 1310 pr_warn("Attempt to share mismatched mappings\n"); 1311 ret = -EINVAL; 1312 goto error; 1313 1314 error_getting_vma: 1315 kmem_cache_free(vm_region_jar, region); 1316 pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", 1317 len, current->pid); 1318 show_free_areas(0, NULL); 1319 return -ENOMEM; 1320 1321 error_getting_region: 1322 pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", 1323 len, current->pid); 1324 show_free_areas(0, NULL); 1325 return -ENOMEM; 1326 } 1327 1328 unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, 1329 unsigned long prot, unsigned long flags, 1330 unsigned long fd, unsigned long pgoff) 1331 { 1332 struct file *file = NULL; 1333 unsigned long retval = -EBADF; 1334 1335 audit_mmap_fd(fd, flags); 1336 if (!(flags & MAP_ANONYMOUS)) { 1337 file = fget(fd); 1338 if (!file) 1339 goto out; 1340 } 1341 1342 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); 1343 1344 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 1345 1346 if (file) 1347 fput(file); 1348 out: 1349 return retval; 1350 } 1351 1352 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1353 unsigned long, prot, unsigned long, flags, 1354 unsigned long, fd, unsigned long, pgoff) 1355 { 1356 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); 1357 } 1358 1359 #ifdef __ARCH_WANT_SYS_OLD_MMAP 1360 struct mmap_arg_struct { 1361 unsigned long addr; 1362 unsigned long len; 1363 unsigned long prot; 1364 unsigned long flags; 1365 unsigned long fd; 1366 unsigned long offset; 1367 }; 1368 1369 SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 1370 { 1371 struct mmap_arg_struct a; 1372 1373 if (copy_from_user(&a, arg, sizeof(a))) 1374 return -EFAULT; 1375 if (offset_in_page(a.offset)) 1376 return -EINVAL; 1377 1378 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 1379 a.offset >> PAGE_SHIFT); 1380 } 1381 #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1382 1383 /* 1384 * split a vma into two pieces at address 'addr', a new vma is allocated either 1385 * for the first part or the tail. 1386 */ 1387 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 1388 unsigned long addr, int new_below) 1389 { 1390 struct vm_area_struct *new; 1391 struct vm_region *region; 1392 unsigned long npages; 1393 1394 /* we're only permitted to split anonymous regions (these should have 1395 * only a single usage on the region) */ 1396 if (vma->vm_file) 1397 return -ENOMEM; 1398 1399 if (mm->map_count >= sysctl_max_map_count) 1400 return -ENOMEM; 1401 1402 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 1403 if (!region) 1404 return -ENOMEM; 1405 1406 new = vm_area_dup(vma); 1407 if (!new) { 1408 kmem_cache_free(vm_region_jar, region); 1409 return -ENOMEM; 1410 } 1411 1412 /* most fields are the same, copy all, and then fixup */ 1413 *region = *vma->vm_region; 1414 new->vm_region = region; 1415 1416 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1417 1418 if (new_below) { 1419 region->vm_top = region->vm_end = new->vm_end = addr; 1420 } else { 1421 region->vm_start = new->vm_start = addr; 1422 region->vm_pgoff = new->vm_pgoff += npages; 1423 } 1424 1425 if (new->vm_ops && new->vm_ops->open) 1426 new->vm_ops->open(new); 1427 1428 delete_vma_from_mm(vma); 1429 down_write(&nommu_region_sem); 1430 delete_nommu_region(vma->vm_region); 1431 if (new_below) { 1432 vma->vm_region->vm_start = vma->vm_start = addr; 1433 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1434 } else { 1435 vma->vm_region->vm_end = vma->vm_end = addr; 1436 vma->vm_region->vm_top = addr; 1437 } 1438 add_nommu_region(vma->vm_region); 1439 add_nommu_region(new->vm_region); 1440 up_write(&nommu_region_sem); 1441 add_vma_to_mm(mm, vma); 1442 add_vma_to_mm(mm, new); 1443 return 0; 1444 } 1445 1446 /* 1447 * shrink a VMA by removing the specified chunk from either the beginning or 1448 * the end 1449 */ 1450 static int shrink_vma(struct mm_struct *mm, 1451 struct vm_area_struct *vma, 1452 unsigned long from, unsigned long to) 1453 { 1454 struct vm_region *region; 1455 1456 /* adjust the VMA's pointers, which may reposition it in the MM's tree 1457 * and list */ 1458 delete_vma_from_mm(vma); 1459 if (from > vma->vm_start) 1460 vma->vm_end = from; 1461 else 1462 vma->vm_start = to; 1463 add_vma_to_mm(mm, vma); 1464 1465 /* cut the backing region down to size */ 1466 region = vma->vm_region; 1467 BUG_ON(region->vm_usage != 1); 1468 1469 down_write(&nommu_region_sem); 1470 delete_nommu_region(region); 1471 if (from > region->vm_start) { 1472 to = region->vm_top; 1473 region->vm_top = region->vm_end = from; 1474 } else { 1475 region->vm_start = to; 1476 } 1477 add_nommu_region(region); 1478 up_write(&nommu_region_sem); 1479 1480 free_page_series(from, to); 1481 return 0; 1482 } 1483 1484 /* 1485 * release a mapping 1486 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1487 * VMA, though it need not cover the whole VMA 1488 */ 1489 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) 1490 { 1491 struct vm_area_struct *vma; 1492 unsigned long end; 1493 int ret; 1494 1495 len = PAGE_ALIGN(len); 1496 if (len == 0) 1497 return -EINVAL; 1498 1499 end = start + len; 1500 1501 /* find the first potentially overlapping VMA */ 1502 vma = find_vma(mm, start); 1503 if (!vma) { 1504 static int limit; 1505 if (limit < 5) { 1506 pr_warn("munmap of memory not mmapped by process %d (%s): 0x%lx-0x%lx\n", 1507 current->pid, current->comm, 1508 start, start + len - 1); 1509 limit++; 1510 } 1511 return -EINVAL; 1512 } 1513 1514 /* we're allowed to split an anonymous VMA but not a file-backed one */ 1515 if (vma->vm_file) { 1516 do { 1517 if (start > vma->vm_start) 1518 return -EINVAL; 1519 if (end == vma->vm_end) 1520 goto erase_whole_vma; 1521 vma = vma->vm_next; 1522 } while (vma); 1523 return -EINVAL; 1524 } else { 1525 /* the chunk must be a subset of the VMA found */ 1526 if (start == vma->vm_start && end == vma->vm_end) 1527 goto erase_whole_vma; 1528 if (start < vma->vm_start || end > vma->vm_end) 1529 return -EINVAL; 1530 if (offset_in_page(start)) 1531 return -EINVAL; 1532 if (end != vma->vm_end && offset_in_page(end)) 1533 return -EINVAL; 1534 if (start != vma->vm_start && end != vma->vm_end) { 1535 ret = split_vma(mm, vma, start, 1); 1536 if (ret < 0) 1537 return ret; 1538 } 1539 return shrink_vma(mm, vma, start, end); 1540 } 1541 1542 erase_whole_vma: 1543 delete_vma_from_mm(vma); 1544 delete_vma(mm, vma); 1545 return 0; 1546 } 1547 EXPORT_SYMBOL(do_munmap); 1548 1549 int vm_munmap(unsigned long addr, size_t len) 1550 { 1551 struct mm_struct *mm = current->mm; 1552 int ret; 1553 1554 down_write(&mm->mmap_sem); 1555 ret = do_munmap(mm, addr, len, NULL); 1556 up_write(&mm->mmap_sem); 1557 return ret; 1558 } 1559 EXPORT_SYMBOL(vm_munmap); 1560 1561 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1562 { 1563 return vm_munmap(addr, len); 1564 } 1565 1566 /* 1567 * release all the mappings made in a process's VM space 1568 */ 1569 void exit_mmap(struct mm_struct *mm) 1570 { 1571 struct vm_area_struct *vma; 1572 1573 if (!mm) 1574 return; 1575 1576 mm->total_vm = 0; 1577 1578 while ((vma = mm->mmap)) { 1579 mm->mmap = vma->vm_next; 1580 delete_vma_from_mm(vma); 1581 delete_vma(mm, vma); 1582 cond_resched(); 1583 } 1584 } 1585 1586 int vm_brk(unsigned long addr, unsigned long len) 1587 { 1588 return -ENOMEM; 1589 } 1590 1591 /* 1592 * expand (or shrink) an existing mapping, potentially moving it at the same 1593 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1594 * 1595 * under NOMMU conditions, we only permit changing a mapping's size, and only 1596 * as long as it stays within the region allocated by do_mmap_private() and the 1597 * block is not shareable 1598 * 1599 * MREMAP_FIXED is not supported under NOMMU conditions 1600 */ 1601 static unsigned long do_mremap(unsigned long addr, 1602 unsigned long old_len, unsigned long new_len, 1603 unsigned long flags, unsigned long new_addr) 1604 { 1605 struct vm_area_struct *vma; 1606 1607 /* insanity checks first */ 1608 old_len = PAGE_ALIGN(old_len); 1609 new_len = PAGE_ALIGN(new_len); 1610 if (old_len == 0 || new_len == 0) 1611 return (unsigned long) -EINVAL; 1612 1613 if (offset_in_page(addr)) 1614 return -EINVAL; 1615 1616 if (flags & MREMAP_FIXED && new_addr != addr) 1617 return (unsigned long) -EINVAL; 1618 1619 vma = find_vma_exact(current->mm, addr, old_len); 1620 if (!vma) 1621 return (unsigned long) -EINVAL; 1622 1623 if (vma->vm_end != vma->vm_start + old_len) 1624 return (unsigned long) -EFAULT; 1625 1626 if (vma->vm_flags & VM_MAYSHARE) 1627 return (unsigned long) -EPERM; 1628 1629 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 1630 return (unsigned long) -ENOMEM; 1631 1632 /* all checks complete - do it */ 1633 vma->vm_end = vma->vm_start + new_len; 1634 return vma->vm_start; 1635 } 1636 1637 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 1638 unsigned long, new_len, unsigned long, flags, 1639 unsigned long, new_addr) 1640 { 1641 unsigned long ret; 1642 1643 down_write(¤t->mm->mmap_sem); 1644 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1645 up_write(¤t->mm->mmap_sem); 1646 return ret; 1647 } 1648 1649 struct page *follow_page(struct vm_area_struct *vma, unsigned long address, 1650 unsigned int foll_flags) 1651 { 1652 return NULL; 1653 } 1654 1655 int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1656 unsigned long pfn, unsigned long size, pgprot_t prot) 1657 { 1658 if (addr != (pfn << PAGE_SHIFT)) 1659 return -EINVAL; 1660 1661 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; 1662 return 0; 1663 } 1664 EXPORT_SYMBOL(remap_pfn_range); 1665 1666 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 1667 { 1668 unsigned long pfn = start >> PAGE_SHIFT; 1669 unsigned long vm_len = vma->vm_end - vma->vm_start; 1670 1671 pfn += vma->vm_pgoff; 1672 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); 1673 } 1674 EXPORT_SYMBOL(vm_iomap_memory); 1675 1676 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1677 unsigned long pgoff) 1678 { 1679 unsigned int size = vma->vm_end - vma->vm_start; 1680 1681 if (!(vma->vm_flags & VM_USERMAP)) 1682 return -EINVAL; 1683 1684 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1685 vma->vm_end = vma->vm_start + size; 1686 1687 return 0; 1688 } 1689 EXPORT_SYMBOL(remap_vmalloc_range); 1690 1691 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, 1692 unsigned long len, unsigned long pgoff, unsigned long flags) 1693 { 1694 return -ENOMEM; 1695 } 1696 1697 vm_fault_t filemap_fault(struct vm_fault *vmf) 1698 { 1699 BUG(); 1700 return 0; 1701 } 1702 EXPORT_SYMBOL(filemap_fault); 1703 1704 void filemap_map_pages(struct vm_fault *vmf, 1705 pgoff_t start_pgoff, pgoff_t end_pgoff) 1706 { 1707 BUG(); 1708 } 1709 EXPORT_SYMBOL(filemap_map_pages); 1710 1711 int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 1712 unsigned long addr, void *buf, int len, unsigned int gup_flags) 1713 { 1714 struct vm_area_struct *vma; 1715 int write = gup_flags & FOLL_WRITE; 1716 1717 if (down_read_killable(&mm->mmap_sem)) 1718 return 0; 1719 1720 /* the access must start within one of the target process's mappings */ 1721 vma = find_vma(mm, addr); 1722 if (vma) { 1723 /* don't overrun this mapping */ 1724 if (addr + len >= vma->vm_end) 1725 len = vma->vm_end - addr; 1726 1727 /* only read or write mappings where it is permitted */ 1728 if (write && vma->vm_flags & VM_MAYWRITE) 1729 copy_to_user_page(vma, NULL, addr, 1730 (void *) addr, buf, len); 1731 else if (!write && vma->vm_flags & VM_MAYREAD) 1732 copy_from_user_page(vma, NULL, addr, 1733 buf, (void *) addr, len); 1734 else 1735 len = 0; 1736 } else { 1737 len = 0; 1738 } 1739 1740 up_read(&mm->mmap_sem); 1741 1742 return len; 1743 } 1744 1745 /** 1746 * access_remote_vm - access another process' address space 1747 * @mm: the mm_struct of the target address space 1748 * @addr: start address to access 1749 * @buf: source or destination buffer 1750 * @len: number of bytes to transfer 1751 * @gup_flags: flags modifying lookup behaviour 1752 * 1753 * The caller must hold a reference on @mm. 1754 */ 1755 int access_remote_vm(struct mm_struct *mm, unsigned long addr, 1756 void *buf, int len, unsigned int gup_flags) 1757 { 1758 return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); 1759 } 1760 1761 /* 1762 * Access another process' address space. 1763 * - source/target buffer must be kernel space 1764 */ 1765 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, 1766 unsigned int gup_flags) 1767 { 1768 struct mm_struct *mm; 1769 1770 if (addr + len < addr) 1771 return 0; 1772 1773 mm = get_task_mm(tsk); 1774 if (!mm) 1775 return 0; 1776 1777 len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); 1778 1779 mmput(mm); 1780 return len; 1781 } 1782 EXPORT_SYMBOL_GPL(access_process_vm); 1783 1784 /** 1785 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1786 * @inode: The inode to check 1787 * @size: The current filesize of the inode 1788 * @newsize: The proposed filesize of the inode 1789 * 1790 * Check the shared mappings on an inode on behalf of a shrinking truncate to 1791 * make sure that that any outstanding VMAs aren't broken and then shrink the 1792 * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't 1793 * automatically grant mappings that are too large. 1794 */ 1795 int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 1796 size_t newsize) 1797 { 1798 struct vm_area_struct *vma; 1799 struct vm_region *region; 1800 pgoff_t low, high; 1801 size_t r_size, r_top; 1802 1803 low = newsize >> PAGE_SHIFT; 1804 high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1805 1806 down_write(&nommu_region_sem); 1807 i_mmap_lock_read(inode->i_mapping); 1808 1809 /* search for VMAs that fall within the dead zone */ 1810 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { 1811 /* found one - only interested if it's shared out of the page 1812 * cache */ 1813 if (vma->vm_flags & VM_SHARED) { 1814 i_mmap_unlock_read(inode->i_mapping); 1815 up_write(&nommu_region_sem); 1816 return -ETXTBSY; /* not quite true, but near enough */ 1817 } 1818 } 1819 1820 /* reduce any regions that overlap the dead zone - if in existence, 1821 * these will be pointed to by VMAs that don't overlap the dead zone 1822 * 1823 * we don't check for any regions that start beyond the EOF as there 1824 * shouldn't be any 1825 */ 1826 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) { 1827 if (!(vma->vm_flags & VM_SHARED)) 1828 continue; 1829 1830 region = vma->vm_region; 1831 r_size = region->vm_top - region->vm_start; 1832 r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 1833 1834 if (r_top > newsize) { 1835 region->vm_top -= r_top - newsize; 1836 if (region->vm_end > region->vm_top) 1837 region->vm_end = region->vm_top; 1838 } 1839 } 1840 1841 i_mmap_unlock_read(inode->i_mapping); 1842 up_write(&nommu_region_sem); 1843 return 0; 1844 } 1845 1846 /* 1847 * Initialise sysctl_user_reserve_kbytes. 1848 * 1849 * This is intended to prevent a user from starting a single memory hogging 1850 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 1851 * mode. 1852 * 1853 * The default value is min(3% of free memory, 128MB) 1854 * 128MB is enough to recover with sshd/login, bash, and top/kill. 1855 */ 1856 static int __meminit init_user_reserve(void) 1857 { 1858 unsigned long free_kbytes; 1859 1860 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1861 1862 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 1863 return 0; 1864 } 1865 subsys_initcall(init_user_reserve); 1866 1867 /* 1868 * Initialise sysctl_admin_reserve_kbytes. 1869 * 1870 * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 1871 * to log in and kill a memory hogging process. 1872 * 1873 * Systems with more than 256MB will reserve 8MB, enough to recover 1874 * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 1875 * only reserve 3% of free pages by default. 1876 */ 1877 static int __meminit init_admin_reserve(void) 1878 { 1879 unsigned long free_kbytes; 1880 1881 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1882 1883 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 1884 return 0; 1885 } 1886 subsys_initcall(init_admin_reserve); 1887