1 /* 2 * linux/mm/msync.c 3 * 4 * Copyright (C) 1994-1999 Linus Torvalds 5 */ 6 7 /* 8 * The msync() system call. 9 */ 10 #include <linux/slab.h> 11 #include <linux/pagemap.h> 12 #include <linux/fs.h> 13 #include <linux/mm.h> 14 #include <linux/mman.h> 15 #include <linux/hugetlb.h> 16 #include <linux/writeback.h> 17 #include <linux/file.h> 18 #include <linux/syscalls.h> 19 20 #include <asm/pgtable.h> 21 #include <asm/tlbflush.h> 22 23 static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 24 unsigned long addr, unsigned long end) 25 { 26 pte_t *pte; 27 spinlock_t *ptl; 28 int progress = 0; 29 unsigned long ret = 0; 30 31 again: 32 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 33 do { 34 struct page *page; 35 36 if (progress >= 64) { 37 progress = 0; 38 if (need_resched() || need_lockbreak(ptl)) 39 break; 40 } 41 progress++; 42 if (!pte_present(*pte)) 43 continue; 44 if (!pte_maybe_dirty(*pte)) 45 continue; 46 page = vm_normal_page(vma, addr, *pte); 47 if (!page) 48 continue; 49 if (ptep_clear_flush_dirty(vma, addr, pte) || 50 page_test_and_clear_dirty(page)) 51 ret += set_page_dirty(page); 52 progress += 3; 53 } while (pte++, addr += PAGE_SIZE, addr != end); 54 pte_unmap_unlock(pte - 1, ptl); 55 cond_resched(); 56 if (addr != end) 57 goto again; 58 return ret; 59 } 60 61 static inline unsigned long msync_pmd_range(struct vm_area_struct *vma, 62 pud_t *pud, unsigned long addr, unsigned long end) 63 { 64 pmd_t *pmd; 65 unsigned long next; 66 unsigned long ret = 0; 67 68 pmd = pmd_offset(pud, addr); 69 do { 70 next = pmd_addr_end(addr, end); 71 if (pmd_none_or_clear_bad(pmd)) 72 continue; 73 ret += msync_pte_range(vma, pmd, addr, next); 74 } while (pmd++, addr = next, addr != end); 75 return ret; 76 } 77 78 static inline unsigned long msync_pud_range(struct vm_area_struct *vma, 79 pgd_t *pgd, unsigned long addr, unsigned long end) 80 { 81 pud_t *pud; 82 unsigned long next; 83 unsigned long ret = 0; 84 85 pud = pud_offset(pgd, addr); 86 do { 87 next = pud_addr_end(addr, end); 88 if (pud_none_or_clear_bad(pud)) 89 continue; 90 ret += msync_pmd_range(vma, pud, addr, next); 91 } while (pud++, addr = next, addr != end); 92 return ret; 93 } 94 95 static unsigned long msync_page_range(struct vm_area_struct *vma, 96 unsigned long addr, unsigned long end) 97 { 98 pgd_t *pgd; 99 unsigned long next; 100 unsigned long ret = 0; 101 102 /* For hugepages we can't go walking the page table normally, 103 * but that's ok, hugetlbfs is memory based, so we don't need 104 * to do anything more on an msync(). 105 */ 106 if (vma->vm_flags & VM_HUGETLB) 107 return 0; 108 109 BUG_ON(addr >= end); 110 pgd = pgd_offset(vma->vm_mm, addr); 111 flush_cache_range(vma, addr, end); 112 do { 113 next = pgd_addr_end(addr, end); 114 if (pgd_none_or_clear_bad(pgd)) 115 continue; 116 ret += msync_pud_range(vma, pgd, addr, next); 117 } while (pgd++, addr = next, addr != end); 118 return ret; 119 } 120 121 /* 122 * MS_SYNC syncs the entire file - including mappings. 123 * 124 * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just 125 * marks the relevant pages dirty. The application may now run fsync() to 126 * write out the dirty pages and wait on the writeout and check the result. 127 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start 128 * async writeout immediately. 129 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to 130 * applications. 131 */ 132 static int msync_interval(struct vm_area_struct *vma, unsigned long addr, 133 unsigned long end, int flags, 134 unsigned long *nr_pages_dirtied) 135 { 136 struct file *file = vma->vm_file; 137 138 if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED)) 139 return -EBUSY; 140 141 if (file && (vma->vm_flags & VM_SHARED)) 142 *nr_pages_dirtied = msync_page_range(vma, addr, end); 143 return 0; 144 } 145 146 asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 147 { 148 unsigned long end; 149 struct vm_area_struct *vma; 150 int unmapped_error = 0; 151 int error = -EINVAL; 152 int done = 0; 153 154 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) 155 goto out; 156 if (start & ~PAGE_MASK) 157 goto out; 158 if ((flags & MS_ASYNC) && (flags & MS_SYNC)) 159 goto out; 160 error = -ENOMEM; 161 len = (len + ~PAGE_MASK) & PAGE_MASK; 162 end = start + len; 163 if (end < start) 164 goto out; 165 error = 0; 166 if (end == start) 167 goto out; 168 /* 169 * If the interval [start,end) covers some unmapped address ranges, 170 * just ignore them, but return -ENOMEM at the end. 171 */ 172 down_read(¤t->mm->mmap_sem); 173 vma = find_vma(current->mm, start); 174 if (!vma) { 175 error = -ENOMEM; 176 goto out_unlock; 177 } 178 do { 179 unsigned long nr_pages_dirtied = 0; 180 struct file *file; 181 182 /* Here start < vma->vm_end. */ 183 if (start < vma->vm_start) { 184 unmapped_error = -ENOMEM; 185 start = vma->vm_start; 186 } 187 /* Here vma->vm_start <= start < vma->vm_end. */ 188 if (end <= vma->vm_end) { 189 if (start < end) { 190 error = msync_interval(vma, start, end, flags, 191 &nr_pages_dirtied); 192 if (error) 193 goto out_unlock; 194 } 195 error = unmapped_error; 196 done = 1; 197 } else { 198 /* Here vma->vm_start <= start < vma->vm_end < end. */ 199 error = msync_interval(vma, start, vma->vm_end, flags, 200 &nr_pages_dirtied); 201 if (error) 202 goto out_unlock; 203 } 204 file = vma->vm_file; 205 start = vma->vm_end; 206 if ((flags & MS_ASYNC) && file && nr_pages_dirtied) { 207 get_file(file); 208 up_read(¤t->mm->mmap_sem); 209 balance_dirty_pages_ratelimited_nr(file->f_mapping, 210 nr_pages_dirtied); 211 fput(file); 212 down_read(¤t->mm->mmap_sem); 213 vma = find_vma(current->mm, start); 214 } else if ((flags & MS_SYNC) && file && 215 (vma->vm_flags & VM_SHARED)) { 216 get_file(file); 217 up_read(¤t->mm->mmap_sem); 218 error = do_fsync(file, 0); 219 fput(file); 220 down_read(¤t->mm->mmap_sem); 221 if (error) 222 goto out_unlock; 223 vma = find_vma(current->mm, start); 224 } else { 225 vma = vma->vm_next; 226 } 227 } while (vma && !done); 228 out_unlock: 229 up_read(¤t->mm->mmap_sem); 230 out: 231 return error; 232 } 233