xref: /openbmc/linux/mm/msync.c (revision 1da177e4)
1 /*
2  *	linux/mm/msync.c
3  *
4  * Copyright (C) 1994-1999  Linus Torvalds
5  */
6 
7 /*
8  * The msync() system call.
9  */
10 #include <linux/slab.h>
11 #include <linux/pagemap.h>
12 #include <linux/mm.h>
13 #include <linux/mman.h>
14 #include <linux/hugetlb.h>
15 #include <linux/syscalls.h>
16 
17 #include <asm/pgtable.h>
18 #include <asm/tlbflush.h>
19 
20 /*
21  * Called with mm->page_table_lock held to protect against other
22  * threads/the swapper from ripping pte's out from under us.
23  */
24 
25 static void sync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
26 				unsigned long addr, unsigned long end)
27 {
28 	pte_t *pte;
29 
30 	pte = pte_offset_map(pmd, addr);
31 	do {
32 		unsigned long pfn;
33 		struct page *page;
34 
35 		if (!pte_present(*pte))
36 			continue;
37 		pfn = pte_pfn(*pte);
38 		if (!pfn_valid(pfn))
39 			continue;
40 		page = pfn_to_page(pfn);
41 		if (PageReserved(page))
42 			continue;
43 
44 		if (ptep_clear_flush_dirty(vma, addr, pte) ||
45 		    page_test_and_clear_dirty(page))
46 			set_page_dirty(page);
47 	} while (pte++, addr += PAGE_SIZE, addr != end);
48 	pte_unmap(pte - 1);
49 }
50 
51 static inline void sync_pmd_range(struct vm_area_struct *vma, pud_t *pud,
52 				unsigned long addr, unsigned long end)
53 {
54 	pmd_t *pmd;
55 	unsigned long next;
56 
57 	pmd = pmd_offset(pud, addr);
58 	do {
59 		next = pmd_addr_end(addr, end);
60 		if (pmd_none_or_clear_bad(pmd))
61 			continue;
62 		sync_pte_range(vma, pmd, addr, next);
63 	} while (pmd++, addr = next, addr != end);
64 }
65 
66 static inline void sync_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
67 				unsigned long addr, unsigned long end)
68 {
69 	pud_t *pud;
70 	unsigned long next;
71 
72 	pud = pud_offset(pgd, addr);
73 	do {
74 		next = pud_addr_end(addr, end);
75 		if (pud_none_or_clear_bad(pud))
76 			continue;
77 		sync_pmd_range(vma, pud, addr, next);
78 	} while (pud++, addr = next, addr != end);
79 }
80 
81 static void sync_page_range(struct vm_area_struct *vma,
82 				unsigned long addr, unsigned long end)
83 {
84 	struct mm_struct *mm = vma->vm_mm;
85 	pgd_t *pgd;
86 	unsigned long next;
87 
88 	/* For hugepages we can't go walking the page table normally,
89 	 * but that's ok, hugetlbfs is memory based, so we don't need
90 	 * to do anything more on an msync() */
91 	if (is_vm_hugetlb_page(vma))
92 		return;
93 
94 	BUG_ON(addr >= end);
95 	pgd = pgd_offset(mm, addr);
96 	flush_cache_range(vma, addr, end);
97 	spin_lock(&mm->page_table_lock);
98 	do {
99 		next = pgd_addr_end(addr, end);
100 		if (pgd_none_or_clear_bad(pgd))
101 			continue;
102 		sync_pud_range(vma, pgd, addr, next);
103 	} while (pgd++, addr = next, addr != end);
104 	spin_unlock(&mm->page_table_lock);
105 }
106 
107 #ifdef CONFIG_PREEMPT
108 static inline void filemap_sync(struct vm_area_struct *vma,
109 				unsigned long addr, unsigned long end)
110 {
111 	const size_t chunk = 64 * 1024;	/* bytes */
112 	unsigned long next;
113 
114 	do {
115 		next = addr + chunk;
116 		if (next > end || next < addr)
117 			next = end;
118 		sync_page_range(vma, addr, next);
119 		cond_resched();
120 	} while (addr = next, addr != end);
121 }
122 #else
123 static inline void filemap_sync(struct vm_area_struct *vma,
124 				unsigned long addr, unsigned long end)
125 {
126 	sync_page_range(vma, addr, end);
127 }
128 #endif
129 
130 /*
131  * MS_SYNC syncs the entire file - including mappings.
132  *
133  * MS_ASYNC does not start I/O (it used to, up to 2.5.67).  Instead, it just
134  * marks the relevant pages dirty.  The application may now run fsync() to
135  * write out the dirty pages and wait on the writeout and check the result.
136  * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
137  * async writeout immediately.
138  * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
139  * applications.
140  */
141 static int msync_interval(struct vm_area_struct *vma,
142 			unsigned long addr, unsigned long end, int flags)
143 {
144 	int ret = 0;
145 	struct file *file = vma->vm_file;
146 
147 	if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
148 		return -EBUSY;
149 
150 	if (file && (vma->vm_flags & VM_SHARED)) {
151 		filemap_sync(vma, addr, end);
152 
153 		if (flags & MS_SYNC) {
154 			struct address_space *mapping = file->f_mapping;
155 			int err;
156 
157 			ret = filemap_fdatawrite(mapping);
158 			if (file->f_op && file->f_op->fsync) {
159 				/*
160 				 * We don't take i_sem here because mmap_sem
161 				 * is already held.
162 				 */
163 				err = file->f_op->fsync(file,file->f_dentry,1);
164 				if (err && !ret)
165 					ret = err;
166 			}
167 			err = filemap_fdatawait(mapping);
168 			if (!ret)
169 				ret = err;
170 		}
171 	}
172 	return ret;
173 }
174 
175 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
176 {
177 	unsigned long end;
178 	struct vm_area_struct *vma;
179 	int unmapped_error, error = -EINVAL;
180 
181 	if (flags & MS_SYNC)
182 		current->flags |= PF_SYNCWRITE;
183 
184 	down_read(&current->mm->mmap_sem);
185 	if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
186 		goto out;
187 	if (start & ~PAGE_MASK)
188 		goto out;
189 	if ((flags & MS_ASYNC) && (flags & MS_SYNC))
190 		goto out;
191 	error = -ENOMEM;
192 	len = (len + ~PAGE_MASK) & PAGE_MASK;
193 	end = start + len;
194 	if (end < start)
195 		goto out;
196 	error = 0;
197 	if (end == start)
198 		goto out;
199 	/*
200 	 * If the interval [start,end) covers some unmapped address ranges,
201 	 * just ignore them, but return -ENOMEM at the end.
202 	 */
203 	vma = find_vma(current->mm, start);
204 	unmapped_error = 0;
205 	for (;;) {
206 		/* Still start < end. */
207 		error = -ENOMEM;
208 		if (!vma)
209 			goto out;
210 		/* Here start < vma->vm_end. */
211 		if (start < vma->vm_start) {
212 			unmapped_error = -ENOMEM;
213 			start = vma->vm_start;
214 		}
215 		/* Here vma->vm_start <= start < vma->vm_end. */
216 		if (end <= vma->vm_end) {
217 			if (start < end) {
218 				error = msync_interval(vma, start, end, flags);
219 				if (error)
220 					goto out;
221 			}
222 			error = unmapped_error;
223 			goto out;
224 		}
225 		/* Here vma->vm_start <= start < vma->vm_end < end. */
226 		error = msync_interval(vma, start, vma->vm_end, flags);
227 		if (error)
228 			goto out;
229 		start = vma->vm_end;
230 		vma = vma->vm_next;
231 	}
232 out:
233 	up_read(&current->mm->mmap_sem);
234 	current->flags &= ~PF_SYNCWRITE;
235 	return error;
236 }
237