1 /* 2 * linux/mm/msync.c 3 * 4 * Copyright (C) 1994-1999 Linus Torvalds 5 */ 6 7 /* 8 * The msync() system call. 9 */ 10 #include <linux/slab.h> 11 #include <linux/pagemap.h> 12 #include <linux/mm.h> 13 #include <linux/mman.h> 14 #include <linux/hugetlb.h> 15 #include <linux/syscalls.h> 16 17 #include <asm/pgtable.h> 18 #include <asm/tlbflush.h> 19 20 /* 21 * Called with mm->page_table_lock held to protect against other 22 * threads/the swapper from ripping pte's out from under us. 23 */ 24 25 static void sync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 26 unsigned long addr, unsigned long end) 27 { 28 pte_t *pte; 29 30 pte = pte_offset_map(pmd, addr); 31 do { 32 unsigned long pfn; 33 struct page *page; 34 35 if (!pte_present(*pte)) 36 continue; 37 pfn = pte_pfn(*pte); 38 if (!pfn_valid(pfn)) 39 continue; 40 page = pfn_to_page(pfn); 41 if (PageReserved(page)) 42 continue; 43 44 if (ptep_clear_flush_dirty(vma, addr, pte) || 45 page_test_and_clear_dirty(page)) 46 set_page_dirty(page); 47 } while (pte++, addr += PAGE_SIZE, addr != end); 48 pte_unmap(pte - 1); 49 } 50 51 static inline void sync_pmd_range(struct vm_area_struct *vma, pud_t *pud, 52 unsigned long addr, unsigned long end) 53 { 54 pmd_t *pmd; 55 unsigned long next; 56 57 pmd = pmd_offset(pud, addr); 58 do { 59 next = pmd_addr_end(addr, end); 60 if (pmd_none_or_clear_bad(pmd)) 61 continue; 62 sync_pte_range(vma, pmd, addr, next); 63 } while (pmd++, addr = next, addr != end); 64 } 65 66 static inline void sync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 67 unsigned long addr, unsigned long end) 68 { 69 pud_t *pud; 70 unsigned long next; 71 72 pud = pud_offset(pgd, addr); 73 do { 74 next = pud_addr_end(addr, end); 75 if (pud_none_or_clear_bad(pud)) 76 continue; 77 sync_pmd_range(vma, pud, addr, next); 78 } while (pud++, addr = next, addr != end); 79 } 80 81 static void sync_page_range(struct vm_area_struct *vma, 82 unsigned long addr, unsigned long end) 83 { 84 struct mm_struct *mm = vma->vm_mm; 85 pgd_t *pgd; 86 unsigned long next; 87 88 /* For hugepages we can't go walking the page table normally, 89 * but that's ok, hugetlbfs is memory based, so we don't need 90 * to do anything more on an msync() */ 91 if (is_vm_hugetlb_page(vma)) 92 return; 93 94 BUG_ON(addr >= end); 95 pgd = pgd_offset(mm, addr); 96 flush_cache_range(vma, addr, end); 97 spin_lock(&mm->page_table_lock); 98 do { 99 next = pgd_addr_end(addr, end); 100 if (pgd_none_or_clear_bad(pgd)) 101 continue; 102 sync_pud_range(vma, pgd, addr, next); 103 } while (pgd++, addr = next, addr != end); 104 spin_unlock(&mm->page_table_lock); 105 } 106 107 #ifdef CONFIG_PREEMPT 108 static inline void filemap_sync(struct vm_area_struct *vma, 109 unsigned long addr, unsigned long end) 110 { 111 const size_t chunk = 64 * 1024; /* bytes */ 112 unsigned long next; 113 114 do { 115 next = addr + chunk; 116 if (next > end || next < addr) 117 next = end; 118 sync_page_range(vma, addr, next); 119 cond_resched(); 120 } while (addr = next, addr != end); 121 } 122 #else 123 static inline void filemap_sync(struct vm_area_struct *vma, 124 unsigned long addr, unsigned long end) 125 { 126 sync_page_range(vma, addr, end); 127 } 128 #endif 129 130 /* 131 * MS_SYNC syncs the entire file - including mappings. 132 * 133 * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just 134 * marks the relevant pages dirty. The application may now run fsync() to 135 * write out the dirty pages and wait on the writeout and check the result. 136 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start 137 * async writeout immediately. 138 * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to 139 * applications. 140 */ 141 static int msync_interval(struct vm_area_struct *vma, 142 unsigned long addr, unsigned long end, int flags) 143 { 144 int ret = 0; 145 struct file *file = vma->vm_file; 146 147 if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED)) 148 return -EBUSY; 149 150 if (file && (vma->vm_flags & VM_SHARED)) { 151 filemap_sync(vma, addr, end); 152 153 if (flags & MS_SYNC) { 154 struct address_space *mapping = file->f_mapping; 155 int err; 156 157 ret = filemap_fdatawrite(mapping); 158 if (file->f_op && file->f_op->fsync) { 159 /* 160 * We don't take i_sem here because mmap_sem 161 * is already held. 162 */ 163 err = file->f_op->fsync(file,file->f_dentry,1); 164 if (err && !ret) 165 ret = err; 166 } 167 err = filemap_fdatawait(mapping); 168 if (!ret) 169 ret = err; 170 } 171 } 172 return ret; 173 } 174 175 asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 176 { 177 unsigned long end; 178 struct vm_area_struct *vma; 179 int unmapped_error, error = -EINVAL; 180 181 if (flags & MS_SYNC) 182 current->flags |= PF_SYNCWRITE; 183 184 down_read(¤t->mm->mmap_sem); 185 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) 186 goto out; 187 if (start & ~PAGE_MASK) 188 goto out; 189 if ((flags & MS_ASYNC) && (flags & MS_SYNC)) 190 goto out; 191 error = -ENOMEM; 192 len = (len + ~PAGE_MASK) & PAGE_MASK; 193 end = start + len; 194 if (end < start) 195 goto out; 196 error = 0; 197 if (end == start) 198 goto out; 199 /* 200 * If the interval [start,end) covers some unmapped address ranges, 201 * just ignore them, but return -ENOMEM at the end. 202 */ 203 vma = find_vma(current->mm, start); 204 unmapped_error = 0; 205 for (;;) { 206 /* Still start < end. */ 207 error = -ENOMEM; 208 if (!vma) 209 goto out; 210 /* Here start < vma->vm_end. */ 211 if (start < vma->vm_start) { 212 unmapped_error = -ENOMEM; 213 start = vma->vm_start; 214 } 215 /* Here vma->vm_start <= start < vma->vm_end. */ 216 if (end <= vma->vm_end) { 217 if (start < end) { 218 error = msync_interval(vma, start, end, flags); 219 if (error) 220 goto out; 221 } 222 error = unmapped_error; 223 goto out; 224 } 225 /* Here vma->vm_start <= start < vma->vm_end < end. */ 226 error = msync_interval(vma, start, vma->vm_end, flags); 227 if (error) 228 goto out; 229 start = vma->vm_end; 230 vma = vma->vm_next; 231 } 232 out: 233 up_read(¤t->mm->mmap_sem); 234 current->flags &= ~PF_SYNCWRITE; 235 return error; 236 } 237