mmap.c (4af6600fd793023c01634cca5abfe4a2b707788f) mmap.c (64e455079e1bd7787cc47be30b7f601ce682a5f6)
1/*
2 * mm/mmap.c
3 *
4 * Written by obz.
5 *
6 * Address space accounting code <alan@lxorguk.ukuu.org.uk>
7 */
8

--- 56 unchanged lines hidden (view full) ---

65 * this is due to the limited x86 page protection hardware. The expected
66 * behavior is in parens:
67 *
68 * map_type prot
69 * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
70 * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
71 * w: (no) no w: (no) no w: (yes) yes w: (no) no
72 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
1/*
2 * mm/mmap.c
3 *
4 * Written by obz.
5 *
6 * Address space accounting code <alan@lxorguk.ukuu.org.uk>
7 */
8

--- 56 unchanged lines hidden (view full) ---

65 * this is due to the limited x86 page protection hardware. The expected
66 * behavior is in parens:
67 *
68 * map_type prot
69 * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
70 * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
71 * w: (no) no w: (no) no w: (yes) yes w: (no) no
72 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
73 *
73 *
74 * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
75 * w: (no) no w: (no) no w: (copy) copy w: (no) no
76 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
77 *
78 */
79pgprot_t protection_map[16] = {
80 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
81 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
82};
83
84pgprot_t vm_get_page_prot(unsigned long vm_flags)
85{
86 return __pgprot(pgprot_val(protection_map[vm_flags &
87 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
88 pgprot_val(arch_vm_get_page_prot(vm_flags)));
89}
90EXPORT_SYMBOL(vm_get_page_prot);
91
74 * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
75 * w: (no) no w: (no) no w: (copy) copy w: (no) no
76 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
77 *
78 */
79pgprot_t protection_map[16] = {
80 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
81 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
82};
83
84pgprot_t vm_get_page_prot(unsigned long vm_flags)
85{
86 return __pgprot(pgprot_val(protection_map[vm_flags &
87 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
88 pgprot_val(arch_vm_get_page_prot(vm_flags)));
89}
90EXPORT_SYMBOL(vm_get_page_prot);
91
92static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
93{
94 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
95}
96
97/* Update vma->vm_page_prot to reflect vma->vm_flags. */
98void vma_set_page_prot(struct vm_area_struct *vma)
99{
100 unsigned long vm_flags = vma->vm_flags;
101
102 vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
103 if (vma_wants_writenotify(vma)) {
104 vm_flags &= ~VM_SHARED;
105 vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
106 vm_flags);
107 }
108}
109
110
92int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */
93int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */
94unsigned long sysctl_overcommit_kbytes __read_mostly;
95int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
96unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
97unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
98/*
99 * Make sure vm_committed_as in one cacheline and not cacheline shared with

--- 163 unchanged lines hidden (view full) ---

263 kmem_cache_free(vm_area_cachep, vma);
264 return next;
265}
266
267static unsigned long do_brk(unsigned long addr, unsigned long len);
268
269SYSCALL_DEFINE1(brk, unsigned long, brk)
270{
111int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */
112int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */
113unsigned long sysctl_overcommit_kbytes __read_mostly;
114int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
115unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
116unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
117/*
118 * Make sure vm_committed_as in one cacheline and not cacheline shared with

--- 163 unchanged lines hidden (view full) ---

282 kmem_cache_free(vm_area_cachep, vma);
283 return next;
284}
285
286static unsigned long do_brk(unsigned long addr, unsigned long len);
287
288SYSCALL_DEFINE1(brk, unsigned long, brk)
289{
271 unsigned long rlim, retval;
290 unsigned long retval;
272 unsigned long newbrk, oldbrk;
273 struct mm_struct *mm = current->mm;
274 unsigned long min_brk;
275 bool populate;
276
277 down_write(&mm->mmap_sem);
278
279#ifdef CONFIG_COMPAT_BRK

--- 13 unchanged lines hidden (view full) ---

293 goto out;
294
295 /*
296 * Check against rlimit here. If this check is done later after the test
297 * of oldbrk with newbrk then it can escape the test and let the data
298 * segment grow beyond its set limit the in case where the limit is
299 * not page aligned -Ram Gupta
300 */
291 unsigned long newbrk, oldbrk;
292 struct mm_struct *mm = current->mm;
293 unsigned long min_brk;
294 bool populate;
295
296 down_write(&mm->mmap_sem);
297
298#ifdef CONFIG_COMPAT_BRK

--- 13 unchanged lines hidden (view full) ---

312 goto out;
313
314 /*
315 * Check against rlimit here. If this check is done later after the test
316 * of oldbrk with newbrk then it can escape the test and let the data
317 * segment grow beyond its set limit the in case where the limit is
318 * not page aligned -Ram Gupta
319 */
301 rlim = rlimit(RLIMIT_DATA);
302 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
303 (mm->end_data - mm->start_data) > rlim)
320 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
321 mm->end_data, mm->start_data))
304 goto out;
305
306 newbrk = PAGE_ALIGN(brk);
307 oldbrk = PAGE_ALIGN(mm->brk);
308 if (oldbrk == newbrk)
309 goto set_brk;
310
311 /* Always allow shrinking brk. */

--- 52 unchanged lines hidden (view full) ---

364 int i = 0, j, bug = 0;
365 struct rb_node *nd, *pn = NULL;
366 unsigned long prev = 0, pend = 0;
367
368 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
369 struct vm_area_struct *vma;
370 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
371 if (vma->vm_start < prev) {
322 goto out;
323
324 newbrk = PAGE_ALIGN(brk);
325 oldbrk = PAGE_ALIGN(mm->brk);
326 if (oldbrk == newbrk)
327 goto set_brk;
328
329 /* Always allow shrinking brk. */

--- 52 unchanged lines hidden (view full) ---

382 int i = 0, j, bug = 0;
383 struct rb_node *nd, *pn = NULL;
384 unsigned long prev = 0, pend = 0;
385
386 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
387 struct vm_area_struct *vma;
388 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
389 if (vma->vm_start < prev) {
372 pr_emerg("vm_start %lx prev %lx\n", vma->vm_start, prev);
390 pr_emerg("vm_start %lx < prev %lx\n",
391 vma->vm_start, prev);
373 bug = 1;
374 }
375 if (vma->vm_start < pend) {
392 bug = 1;
393 }
394 if (vma->vm_start < pend) {
376 pr_emerg("vm_start %lx pend %lx\n", vma->vm_start, pend);
395 pr_emerg("vm_start %lx < pend %lx\n",
396 vma->vm_start, pend);
377 bug = 1;
378 }
379 if (vma->vm_start > vma->vm_end) {
397 bug = 1;
398 }
399 if (vma->vm_start > vma->vm_end) {
380 pr_emerg("vm_end %lx < vm_start %lx\n",
381 vma->vm_end, vma->vm_start);
400 pr_emerg("vm_start %lx > vm_end %lx\n",
401 vma->vm_start, vma->vm_end);
382 bug = 1;
383 }
384 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
385 pr_emerg("free gap %lx, correct %lx\n",
386 vma->rb_subtree_gap,
387 vma_compute_subtree_gap(vma));
388 bug = 1;
389 }

--- 14 unchanged lines hidden (view full) ---

404
405static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
406{
407 struct rb_node *nd;
408
409 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
410 struct vm_area_struct *vma;
411 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
402 bug = 1;
403 }
404 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
405 pr_emerg("free gap %lx, correct %lx\n",
406 vma->rb_subtree_gap,
407 vma_compute_subtree_gap(vma));
408 bug = 1;
409 }

--- 14 unchanged lines hidden (view full) ---

424
425static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
426{
427 struct rb_node *nd;
428
429 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
430 struct vm_area_struct *vma;
431 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
412 BUG_ON(vma != ignore &&
413 vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
432 VM_BUG_ON_VMA(vma != ignore &&
433 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
434 vma);
414 }
415}
416
417static void validate_mm(struct mm_struct *mm)
418{
419 int bug = 0;
420 int i = 0;
421 unsigned long highest_address = 0;
422 struct vm_area_struct *vma = mm->mmap;
435 }
436}
437
438static void validate_mm(struct mm_struct *mm)
439{
440 int bug = 0;
441 int i = 0;
442 unsigned long highest_address = 0;
443 struct vm_area_struct *vma = mm->mmap;
444
423 while (vma) {
424 struct anon_vma_chain *avc;
445 while (vma) {
446 struct anon_vma_chain *avc;
447
425 vma_lock_anon_vma(vma);
426 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
427 anon_vma_interval_tree_verify(avc);
428 vma_unlock_anon_vma(vma);
429 highest_address = vma->vm_end;
430 vma = vma->vm_next;
431 i++;
432 }
433 if (i != mm->map_count) {
434 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
435 bug = 1;
436 }
437 if (highest_address != mm->highest_vm_end) {
438 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
448 vma_lock_anon_vma(vma);
449 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
450 anon_vma_interval_tree_verify(avc);
451 vma_unlock_anon_vma(vma);
452 highest_address = vma->vm_end;
453 vma = vma->vm_next;
454 i++;
455 }
456 if (i != mm->map_count) {
457 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
458 bug = 1;
459 }
460 if (highest_address != mm->highest_vm_end) {
461 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
439 mm->highest_vm_end, highest_address);
462 mm->highest_vm_end, highest_address);
440 bug = 1;
441 }
442 i = browse_rb(&mm->mm_rb);
443 if (i != mm->map_count) {
463 bug = 1;
464 }
465 i = browse_rb(&mm->mm_rb);
466 if (i != mm->map_count) {
444 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
467 if (i != -1)
468 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
445 bug = 1;
446 }
469 bug = 1;
470 }
447 BUG_ON(bug);
471 VM_BUG_ON_MM(bug, mm);
448}
449#else
450#define validate_mm_rb(root, ignore) do { } while (0)
451#define validate_mm(mm) do { } while (0)
452#endif
453
454RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
455 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)

--- 280 unchanged lines hidden (view full) ---

736 exporter = next;
737 importer = vma;
738 } else if (end < vma->vm_end) {
739 /*
740 * vma shrinks, and !insert tells it's not
741 * split_vma inserting another: so it must be
742 * mprotect case 4 shifting the boundary down.
743 */
472}
473#else
474#define validate_mm_rb(root, ignore) do { } while (0)
475#define validate_mm(mm) do { } while (0)
476#endif
477
478RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
479 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)

--- 280 unchanged lines hidden (view full) ---

760 exporter = next;
761 importer = vma;
762 } else if (end < vma->vm_end) {
763 /*
764 * vma shrinks, and !insert tells it's not
765 * split_vma inserting another: so it must be
766 * mprotect case 4 shifting the boundary down.
767 */
744 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
768 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
745 exporter = vma;
746 importer = next;
747 }
748
749 /*
750 * Easily overlooked: when mprotect shifts the boundary,
751 * make sure the expanding vma has anon_vma set if the
752 * shrinking vma had, to cover any anon pages imported.

--- 29 unchanged lines hidden (view full) ---

782 }
783
784 vma_adjust_trans_huge(vma, start, end, adjust_next);
785
786 anon_vma = vma->anon_vma;
787 if (!anon_vma && adjust_next)
788 anon_vma = next->anon_vma;
789 if (anon_vma) {
769 exporter = vma;
770 importer = next;
771 }
772
773 /*
774 * Easily overlooked: when mprotect shifts the boundary,
775 * make sure the expanding vma has anon_vma set if the
776 * shrinking vma had, to cover any anon pages imported.

--- 29 unchanged lines hidden (view full) ---

806 }
807
808 vma_adjust_trans_huge(vma, start, end, adjust_next);
809
810 anon_vma = vma->anon_vma;
811 if (!anon_vma && adjust_next)
812 anon_vma = next->anon_vma;
813 if (anon_vma) {
790 VM_BUG_ON(adjust_next && next->anon_vma &&
791 anon_vma != next->anon_vma);
814 VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
815 anon_vma != next->anon_vma, next);
792 anon_vma_lock_write(anon_vma);
793 anon_vma_interval_tree_pre_update_vma(vma);
794 if (adjust_next)
795 anon_vma_interval_tree_pre_update_vma(next);
796 }
797
798 if (root) {
799 flush_dcache_mmap_lock(mapping);

--- 205 unchanged lines hidden (view full) ---

1005 * might become case 1 below case 2 below case 3 below
1006 *
1007 * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
1008 * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
1009 */
1010struct vm_area_struct *vma_merge(struct mm_struct *mm,
1011 struct vm_area_struct *prev, unsigned long addr,
1012 unsigned long end, unsigned long vm_flags,
816 anon_vma_lock_write(anon_vma);
817 anon_vma_interval_tree_pre_update_vma(vma);
818 if (adjust_next)
819 anon_vma_interval_tree_pre_update_vma(next);
820 }
821
822 if (root) {
823 flush_dcache_mmap_lock(mapping);

--- 205 unchanged lines hidden (view full) ---

1029 * might become case 1 below case 2 below case 3 below
1030 *
1031 * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
1032 * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
1033 */
1034struct vm_area_struct *vma_merge(struct mm_struct *mm,
1035 struct vm_area_struct *prev, unsigned long addr,
1036 unsigned long end, unsigned long vm_flags,
1013 struct anon_vma *anon_vma, struct file *file,
1037 struct anon_vma *anon_vma, struct file *file,
1014 pgoff_t pgoff, struct mempolicy *policy)
1015{
1016 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1017 struct vm_area_struct *area, *next;
1018 int err;
1019
1020 /*
1021 * We later require that vma->vm_flags == vm_flags,

--- 9 unchanged lines hidden (view full) ---

1031 area = next;
1032 if (next && next->vm_end == end) /* cases 6, 7, 8 */
1033 next = next->vm_next;
1034
1035 /*
1036 * Can it merge with the predecessor?
1037 */
1038 if (prev && prev->vm_end == addr &&
1038 pgoff_t pgoff, struct mempolicy *policy)
1039{
1040 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1041 struct vm_area_struct *area, *next;
1042 int err;
1043
1044 /*
1045 * We later require that vma->vm_flags == vm_flags,

--- 9 unchanged lines hidden (view full) ---

1055 area = next;
1056 if (next && next->vm_end == end) /* cases 6, 7, 8 */
1057 next = next->vm_next;
1058
1059 /*
1060 * Can it merge with the predecessor?
1061 */
1062 if (prev && prev->vm_end == addr &&
1039 mpol_equal(vma_policy(prev), policy) &&
1063 mpol_equal(vma_policy(prev), policy) &&
1040 can_vma_merge_after(prev, vm_flags,
1041 anon_vma, file, pgoff)) {
1042 /*
1043 * OK, it can. Can we now merge in the successor as well?
1044 */
1045 if (next && end == next->vm_start &&
1046 mpol_equal(policy, vma_policy(next)) &&
1047 can_vma_merge_before(next, vm_flags,

--- 11 unchanged lines hidden (view full) ---

1059 khugepaged_enter_vma_merge(prev);
1060 return prev;
1061 }
1062
1063 /*
1064 * Can this new request be merged in front of next?
1065 */
1066 if (next && end == next->vm_start &&
1064 can_vma_merge_after(prev, vm_flags,
1065 anon_vma, file, pgoff)) {
1066 /*
1067 * OK, it can. Can we now merge in the successor as well?
1068 */
1069 if (next && end == next->vm_start &&
1070 mpol_equal(policy, vma_policy(next)) &&
1071 can_vma_merge_before(next, vm_flags,

--- 11 unchanged lines hidden (view full) ---

1083 khugepaged_enter_vma_merge(prev);
1084 return prev;
1085 }
1086
1087 /*
1088 * Can this new request be merged in front of next?
1089 */
1090 if (next && end == next->vm_start &&
1067 mpol_equal(policy, vma_policy(next)) &&
1091 mpol_equal(policy, vma_policy(next)) &&
1068 can_vma_merge_before(next, vm_flags,
1069 anon_vma, file, pgoff+pglen)) {
1070 if (prev && addr < prev->vm_end) /* case 4 */
1071 err = vma_adjust(prev, prev->vm_start,
1072 addr, prev->vm_pgoff, NULL);
1073 else /* cases 3, 8 */
1074 err = vma_adjust(area, addr, next->vm_end,
1075 next->vm_pgoff - pglen, NULL);

--- 154 unchanged lines hidden (view full) ---

1230 * The caller must hold down_write(&current->mm->mmap_sem).
1231 */
1232
1233unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1234 unsigned long len, unsigned long prot,
1235 unsigned long flags, unsigned long pgoff,
1236 unsigned long *populate)
1237{
1092 can_vma_merge_before(next, vm_flags,
1093 anon_vma, file, pgoff+pglen)) {
1094 if (prev && addr < prev->vm_end) /* case 4 */
1095 err = vma_adjust(prev, prev->vm_start,
1096 addr, prev->vm_pgoff, NULL);
1097 else /* cases 3, 8 */
1098 err = vma_adjust(area, addr, next->vm_end,
1099 next->vm_pgoff - pglen, NULL);

--- 154 unchanged lines hidden (view full) ---

1254 * The caller must hold down_write(&current->mm->mmap_sem).
1255 */
1256
1257unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1258 unsigned long len, unsigned long prot,
1259 unsigned long flags, unsigned long pgoff,
1260 unsigned long *populate)
1261{
1238 struct mm_struct * mm = current->mm;
1262 struct mm_struct *mm = current->mm;
1239 vm_flags_t vm_flags;
1240
1241 *populate = 0;
1242
1243 /*
1244 * Does the application expect PROT_READ to imply PROT_EXEC?
1245 *
1246 * (the exception is when the underlying filesystem is noexec

--- 11 unchanged lines hidden (view full) ---

1258
1259 /* Careful about overflows.. */
1260 len = PAGE_ALIGN(len);
1261 if (!len)
1262 return -ENOMEM;
1263
1264 /* offset overflow? */
1265 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1263 vm_flags_t vm_flags;
1264
1265 *populate = 0;
1266
1267 /*
1268 * Does the application expect PROT_READ to imply PROT_EXEC?
1269 *
1270 * (the exception is when the underlying filesystem is noexec

--- 11 unchanged lines hidden (view full) ---

1282
1283 /* Careful about overflows.. */
1284 len = PAGE_ALIGN(len);
1285 if (!len)
1286 return -ENOMEM;
1287
1288 /* offset overflow? */
1289 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1266 return -EOVERFLOW;
1290 return -EOVERFLOW;
1267
1268 /* Too many mappings? */
1269 if (mm->map_count > sysctl_max_map_count)
1270 return -ENOMEM;
1271
1272 /* Obtain the address to map to. we verify (or select) it and ensure
1273 * that it represents a valid section of the address space.
1274 */

--- 190 unchanged lines hidden (view full) ---

1465 /* If it was private or non-writable, the write bit is already clear */
1466 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1467 return 0;
1468
1469 /* The backer wishes to know when pages are first written to? */
1470 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1471 return 1;
1472
1291
1292 /* Too many mappings? */
1293 if (mm->map_count > sysctl_max_map_count)
1294 return -ENOMEM;
1295
1296 /* Obtain the address to map to. we verify (or select) it and ensure
1297 * that it represents a valid section of the address space.
1298 */

--- 190 unchanged lines hidden (view full) ---

1489 /* If it was private or non-writable, the write bit is already clear */
1490 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1491 return 0;
1492
1493 /* The backer wishes to know when pages are first written to? */
1494 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1495 return 1;
1496
1473 /* The open routine did something to the protections already? */
1497 /* The open routine did something to the protections that pgprot_modify
1498 * won't preserve? */
1474 if (pgprot_val(vma->vm_page_prot) !=
1499 if (pgprot_val(vma->vm_page_prot) !=
1475 pgprot_val(vm_get_page_prot(vm_flags)))
1500 pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
1476 return 0;
1477
1501 return 0;
1502
1503 /* Do we need to track softdirty? */
1504 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1505 return 1;
1506
1478 /* Specialty mapping? */
1479 if (vm_flags & VM_PFNMAP)
1480 return 0;
1481
1482 /* Can the mapping track the dirty pages? */
1483 return vma->vm_file && vma->vm_file->f_mapping &&
1484 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1485}

--- 119 unchanged lines hidden (view full) ---

1605 addr = vma->vm_start;
1606 vm_flags = vma->vm_flags;
1607 } else if (vm_flags & VM_SHARED) {
1608 error = shmem_zero_setup(vma);
1609 if (error)
1610 goto free_vma;
1611 }
1612
1507 /* Specialty mapping? */
1508 if (vm_flags & VM_PFNMAP)
1509 return 0;
1510
1511 /* Can the mapping track the dirty pages? */
1512 return vma->vm_file && vma->vm_file->f_mapping &&
1513 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1514}

--- 119 unchanged lines hidden (view full) ---

1634 addr = vma->vm_start;
1635 vm_flags = vma->vm_flags;
1636 } else if (vm_flags & VM_SHARED) {
1637 error = shmem_zero_setup(vma);
1638 if (error)
1639 goto free_vma;
1640 }
1641
1613 if (vma_wants_writenotify(vma)) {
1614 pgprot_t pprot = vma->vm_page_prot;
1615
1616 /* Can vma->vm_page_prot have changed??
1617 *
1618 * Answer: Yes, drivers may have changed it in their
1619 * f_op->mmap method.
1620 *
1621 * Ensures that vmas marked as uncached stay that way.
1622 */
1623 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1624 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1625 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1626 }
1627
1628 vma_link(mm, vma, prev, rb_link, rb_parent);
1629 /* Once vma denies write, undo our temporary denial count */
1630 if (file) {
1631 if (vm_flags & VM_SHARED)
1632 mapping_unmap_writable(file->f_mapping);
1633 if (vm_flags & VM_DENYWRITE)
1634 allow_write_access(file);
1635 }

--- 17 unchanged lines hidden (view full) ---

1653 * New (or expanded) vma always get soft dirty status.
1654 * Otherwise user-space soft-dirty page tracker won't
1655 * be able to distinguish situation when vma area unmapped,
1656 * then new mapped in-place (which must be aimed as
1657 * a completely new data area).
1658 */
1659 vma->vm_flags |= VM_SOFTDIRTY;
1660
1642 vma_link(mm, vma, prev, rb_link, rb_parent);
1643 /* Once vma denies write, undo our temporary denial count */
1644 if (file) {
1645 if (vm_flags & VM_SHARED)
1646 mapping_unmap_writable(file->f_mapping);
1647 if (vm_flags & VM_DENYWRITE)
1648 allow_write_access(file);
1649 }

--- 17 unchanged lines hidden (view full) ---

1667 * New (or expanded) vma always get soft dirty status.
1668 * Otherwise user-space soft-dirty page tracker won't
1669 * be able to distinguish situation when vma area unmapped,
1670 * then new mapped in-place (which must be aimed as
1671 * a completely new data area).
1672 */
1673 vma->vm_flags |= VM_SOFTDIRTY;
1674
1675 vma_set_page_prot(vma);
1676
1661 return addr;
1662
1663unmap_and_free_vma:
1664 vma->vm_file = NULL;
1665 fput(file);
1666
1667 /* Undo any partial mapping done by a device driver. */
1668 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);

--- 247 unchanged lines hidden (view full) ---

1916
1917 info.flags = 0;
1918 info.length = len;
1919 info.low_limit = mm->mmap_base;
1920 info.high_limit = TASK_SIZE;
1921 info.align_mask = 0;
1922 return vm_unmapped_area(&info);
1923}
1677 return addr;
1678
1679unmap_and_free_vma:
1680 vma->vm_file = NULL;
1681 fput(file);
1682
1683 /* Undo any partial mapping done by a device driver. */
1684 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);

--- 247 unchanged lines hidden (view full) ---

1932
1933 info.flags = 0;
1934 info.length = len;
1935 info.low_limit = mm->mmap_base;
1936 info.high_limit = TASK_SIZE;
1937 info.align_mask = 0;
1938 return vm_unmapped_area(&info);
1939}
1924#endif
1940#endif
1925
1926/*
1927 * This mmap-allocator allocates new areas top-down from below the
1928 * stack's low limit (the base):
1929 */
1930#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1931unsigned long
1932arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,

--- 383 unchanged lines hidden (view full) ---

2316 if (prev && prev->vm_end == address) {
2317 if (!(prev->vm_flags & VM_GROWSDOWN))
2318 return -ENOMEM;
2319 }
2320 return expand_downwards(vma, address);
2321}
2322
2323struct vm_area_struct *
1941
1942/*
1943 * This mmap-allocator allocates new areas top-down from below the
1944 * stack's low limit (the base):
1945 */
1946#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1947unsigned long
1948arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,

--- 383 unchanged lines hidden (view full) ---

2332 if (prev && prev->vm_end == address) {
2333 if (!(prev->vm_flags & VM_GROWSDOWN))
2334 return -ENOMEM;
2335 }
2336 return expand_downwards(vma, address);
2337}
2338
2339struct vm_area_struct *
2324find_extend_vma(struct mm_struct * mm, unsigned long addr)
2340find_extend_vma(struct mm_struct *mm, unsigned long addr)
2325{
2341{
2326 struct vm_area_struct * vma;
2342 struct vm_area_struct *vma;
2327 unsigned long start;
2328
2329 addr &= PAGE_MASK;
2343 unsigned long start;
2344
2345 addr &= PAGE_MASK;
2330 vma = find_vma(mm,addr);
2346 vma = find_vma(mm, addr);
2331 if (!vma)
2332 return NULL;
2333 if (vma->vm_start <= addr)
2334 return vma;
2335 if (!(vma->vm_flags & VM_GROWSDOWN))
2336 return NULL;
2337 start = vma->vm_start;
2338 if (expand_stack(vma, addr))

--- 32 unchanged lines hidden (view full) ---

2371 * Get rid of page table information in the indicated region.
2372 *
2373 * Called with the mm semaphore held.
2374 */
2375static void unmap_region(struct mm_struct *mm,
2376 struct vm_area_struct *vma, struct vm_area_struct *prev,
2377 unsigned long start, unsigned long end)
2378{
2347 if (!vma)
2348 return NULL;
2349 if (vma->vm_start <= addr)
2350 return vma;
2351 if (!(vma->vm_flags & VM_GROWSDOWN))
2352 return NULL;
2353 start = vma->vm_start;
2354 if (expand_stack(vma, addr))

--- 32 unchanged lines hidden (view full) ---

2387 * Get rid of page table information in the indicated region.
2388 *
2389 * Called with the mm semaphore held.
2390 */
2391static void unmap_region(struct mm_struct *mm,
2392 struct vm_area_struct *vma, struct vm_area_struct *prev,
2393 unsigned long start, unsigned long end)
2394{
2379 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
2395 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2380 struct mmu_gather tlb;
2381
2382 lru_add_drain();
2383 tlb_gather_mmu(&tlb, mm, start, end);
2384 update_hiwater_rss(mm);
2385 unmap_vmas(&tlb, vma, start, end);
2386 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2387 next ? next->vm_start : USER_PGTABLES_CEILING);

--- 30 unchanged lines hidden (view full) ---

2418 /* Kill the cache */
2419 vmacache_invalidate(mm);
2420}
2421
2422/*
2423 * __split_vma() bypasses sysctl_max_map_count checking. We use this on the
2424 * munmap path where it doesn't make sense to fail.
2425 */
2396 struct mmu_gather tlb;
2397
2398 lru_add_drain();
2399 tlb_gather_mmu(&tlb, mm, start, end);
2400 update_hiwater_rss(mm);
2401 unmap_vmas(&tlb, vma, start, end);
2402 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2403 next ? next->vm_start : USER_PGTABLES_CEILING);

--- 30 unchanged lines hidden (view full) ---

2434 /* Kill the cache */
2435 vmacache_invalidate(mm);
2436}
2437
2438/*
2439 * __split_vma() bypasses sysctl_max_map_count checking. We use this on the
2440 * munmap path where it doesn't make sense to fail.
2441 */
2426static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2442static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2427 unsigned long addr, int new_below)
2428{
2429 struct vm_area_struct *new;
2430 int err = -ENOMEM;
2431
2432 if (is_vm_hugetlb_page(vma) && (addr &
2433 ~(huge_page_mask(hstate_vma(vma)))))
2434 return -EINVAL;

--- 72 unchanged lines hidden (view full) ---

2507int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2508{
2509 unsigned long end;
2510 struct vm_area_struct *vma, *prev, *last;
2511
2512 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2513 return -EINVAL;
2514
2443 unsigned long addr, int new_below)
2444{
2445 struct vm_area_struct *new;
2446 int err = -ENOMEM;
2447
2448 if (is_vm_hugetlb_page(vma) && (addr &
2449 ~(huge_page_mask(hstate_vma(vma)))))
2450 return -EINVAL;

--- 72 unchanged lines hidden (view full) ---

2523int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2524{
2525 unsigned long end;
2526 struct vm_area_struct *vma, *prev, *last;
2527
2528 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2529 return -EINVAL;
2530
2515 if ((len = PAGE_ALIGN(len)) == 0)
2531 len = PAGE_ALIGN(len);
2532 if (len == 0)
2516 return -EINVAL;
2517
2518 /* Find the first overlapping VMA */
2519 vma = find_vma(mm, start);
2520 if (!vma)
2521 return 0;
2522 prev = vma->vm_prev;
2523 /* we have start < vma->vm_end */

--- 29 unchanged lines hidden (view full) ---

2553
2554 /* Does it split the last one? */
2555 last = find_vma(mm, end);
2556 if (last && end > last->vm_start) {
2557 int error = __split_vma(mm, last, end, 1);
2558 if (error)
2559 return error;
2560 }
2533 return -EINVAL;
2534
2535 /* Find the first overlapping VMA */
2536 vma = find_vma(mm, start);
2537 if (!vma)
2538 return 0;
2539 prev = vma->vm_prev;
2540 /* we have start < vma->vm_end */

--- 29 unchanged lines hidden (view full) ---

2570
2571 /* Does it split the last one? */
2572 last = find_vma(mm, end);
2573 if (last && end > last->vm_start) {
2574 int error = __split_vma(mm, last, end, 1);
2575 if (error)
2576 return error;
2577 }
2561 vma = prev? prev->vm_next: mm->mmap;
2578 vma = prev ? prev->vm_next : mm->mmap;
2562
2563 /*
2564 * unlock any mlock()ed ranges before detaching vmas
2565 */
2566 if (mm->locked_vm) {
2567 struct vm_area_struct *tmp = vma;
2568 while (tmp && tmp->vm_start < end) {
2569 if (tmp->vm_flags & VM_LOCKED) {

--- 46 unchanged lines hidden (view full) ---

2616
2617/*
2618 * this is really a simplified "do_mmap". it only handles
2619 * anonymous maps. eventually we may be able to do some
2620 * brk-specific accounting here.
2621 */
2622static unsigned long do_brk(unsigned long addr, unsigned long len)
2623{
2579
2580 /*
2581 * unlock any mlock()ed ranges before detaching vmas
2582 */
2583 if (mm->locked_vm) {
2584 struct vm_area_struct *tmp = vma;
2585 while (tmp && tmp->vm_start < end) {
2586 if (tmp->vm_flags & VM_LOCKED) {

--- 46 unchanged lines hidden (view full) ---

2633
2634/*
2635 * this is really a simplified "do_mmap". it only handles
2636 * anonymous maps. eventually we may be able to do some
2637 * brk-specific accounting here.
2638 */
2639static unsigned long do_brk(unsigned long addr, unsigned long len)
2640{
2624 struct mm_struct * mm = current->mm;
2625 struct vm_area_struct * vma, * prev;
2641 struct mm_struct *mm = current->mm;
2642 struct vm_area_struct *vma, *prev;
2626 unsigned long flags;
2643 unsigned long flags;
2627 struct rb_node ** rb_link, * rb_parent;
2644 struct rb_node **rb_link, *rb_parent;
2628 pgoff_t pgoff = addr >> PAGE_SHIFT;
2629 int error;
2630
2631 len = PAGE_ALIGN(len);
2632 if (!len)
2633 return addr;
2634
2635 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;

--- 207 unchanged lines hidden (view full) ---

2843 * reset the dst vma->vm_pgoff to the
2844 * destination address of the mremap to allow
2845 * the merge to happen. mremap must change the
2846 * vm_pgoff linearity between src and dst vmas
2847 * (in turn preventing a vma_merge) to be
2848 * safe. It is only safe to keep the vm_pgoff
2849 * linear if there are no pages mapped yet.
2850 */
2645 pgoff_t pgoff = addr >> PAGE_SHIFT;
2646 int error;
2647
2648 len = PAGE_ALIGN(len);
2649 if (!len)
2650 return addr;
2651
2652 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;

--- 207 unchanged lines hidden (view full) ---

2860 * reset the dst vma->vm_pgoff to the
2861 * destination address of the mremap to allow
2862 * the merge to happen. mremap must change the
2863 * vm_pgoff linearity between src and dst vmas
2864 * (in turn preventing a vma_merge) to be
2865 * safe. It is only safe to keep the vm_pgoff
2866 * linear if there are no pages mapped yet.
2867 */
2851 VM_BUG_ON(faulted_in_anon_vma);
2868 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
2852 *vmap = vma = new_vma;
2853 }
2854 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2855 } else {
2856 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2857 if (new_vma) {
2858 *new_vma = *vma;
2859 new_vma->vm_start = addr;

--- 331 unchanged lines hidden (view full) ---

3191
3192/*
3193 * initialise the VMA slab
3194 */
3195void __init mmap_init(void)
3196{
3197 int ret;
3198
2869 *vmap = vma = new_vma;
2870 }
2871 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2872 } else {
2873 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2874 if (new_vma) {
2875 *new_vma = *vma;
2876 new_vma->vm_start = addr;

--- 331 unchanged lines hidden (view full) ---

3208
3209/*
3210 * initialise the VMA slab
3211 */
3212void __init mmap_init(void)
3213{
3214 int ret;
3215
3199 ret = percpu_counter_init(&vm_committed_as, 0);
3216 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3200 VM_BUG_ON(ret);
3201}
3202
3203/*
3204 * Initialise sysctl_user_reserve_kbytes.
3205 *
3206 * This is intended to prevent a user from starting a single memory hogging
3207 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER

--- 106 unchanged lines hidden ---
3217 VM_BUG_ON(ret);
3218}
3219
3220/*
3221 * Initialise sysctl_user_reserve_kbytes.
3222 *
3223 * This is intended to prevent a user from starting a single memory hogging
3224 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER

--- 106 unchanged lines hidden ---