xref: /openbmc/linux/fs/vboxsf/file.c (revision aa017ab9)
1 // SPDX-License-Identifier: MIT
2 /*
3  * VirtualBox Guest Shared Folders support: Regular file inode and file ops.
4  *
5  * Copyright (C) 2006-2018 Oracle Corporation
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/page-flags.h>
10 #include <linux/pagemap.h>
11 #include <linux/highmem.h>
12 #include <linux/sizes.h>
13 #include "vfsmod.h"
14 
15 struct vboxsf_handle {
16 	u64 handle;
17 	u32 root;
18 	u32 access_flags;
19 	struct kref refcount;
20 	struct list_head head;
21 };
22 
23 static int vboxsf_file_open(struct inode *inode, struct file *file)
24 {
25 	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
26 	struct shfl_createparms params = {};
27 	struct vboxsf_handle *sf_handle;
28 	u32 access_flags = 0;
29 	int err;
30 
31 	sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL);
32 	if (!sf_handle)
33 		return -ENOMEM;
34 
35 	/*
36 	 * We check the value of params.handle afterwards to find out if
37 	 * the call succeeded or failed, as the API does not seem to cleanly
38 	 * distinguish error and informational messages.
39 	 *
40 	 * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to
41 	 * make the shared folders host service use our mode parameter.
42 	 */
43 	params.handle = SHFL_HANDLE_NIL;
44 	if (file->f_flags & O_CREAT) {
45 		params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW;
46 		/*
47 		 * We ignore O_EXCL, as the Linux kernel seems to call create
48 		 * beforehand itself, so O_EXCL should always fail.
49 		 */
50 		if (file->f_flags & O_TRUNC)
51 			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
52 		else
53 			params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
54 	} else {
55 		params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW;
56 		if (file->f_flags & O_TRUNC)
57 			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
58 	}
59 
60 	switch (file->f_flags & O_ACCMODE) {
61 	case O_RDONLY:
62 		access_flags |= SHFL_CF_ACCESS_READ;
63 		break;
64 
65 	case O_WRONLY:
66 		access_flags |= SHFL_CF_ACCESS_WRITE;
67 		break;
68 
69 	case O_RDWR:
70 		access_flags |= SHFL_CF_ACCESS_READWRITE;
71 		break;
72 
73 	default:
74 		WARN_ON(1);
75 	}
76 
77 	if (file->f_flags & O_APPEND)
78 		access_flags |= SHFL_CF_ACCESS_APPEND;
79 
80 	params.create_flags |= access_flags;
81 	params.info.attr.mode = inode->i_mode;
82 
83 	err = vboxsf_create_at_dentry(file_dentry(file), &params);
84 	if (err == 0 && params.handle == SHFL_HANDLE_NIL)
85 		err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT;
86 	if (err) {
87 		kfree(sf_handle);
88 		return err;
89 	}
90 
91 	/* the host may have given us different attr then requested */
92 	sf_i->force_restat = 1;
93 
94 	/* init our handle struct and add it to the inode's handles list */
95 	sf_handle->handle = params.handle;
96 	sf_handle->root = VBOXSF_SBI(inode->i_sb)->root;
97 	sf_handle->access_flags = access_flags;
98 	kref_init(&sf_handle->refcount);
99 
100 	mutex_lock(&sf_i->handle_list_mutex);
101 	list_add(&sf_handle->head, &sf_i->handle_list);
102 	mutex_unlock(&sf_i->handle_list_mutex);
103 
104 	file->private_data = sf_handle;
105 	return 0;
106 }
107 
108 static void vboxsf_handle_release(struct kref *refcount)
109 {
110 	struct vboxsf_handle *sf_handle =
111 		container_of(refcount, struct vboxsf_handle, refcount);
112 
113 	vboxsf_close(sf_handle->root, sf_handle->handle);
114 	kfree(sf_handle);
115 }
116 
117 static int vboxsf_file_release(struct inode *inode, struct file *file)
118 {
119 	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
120 	struct vboxsf_handle *sf_handle = file->private_data;
121 
122 	/*
123 	 * When a file is closed on our (the guest) side, we want any subsequent
124 	 * accesses done on the host side to see all changes done from our side.
125 	 */
126 	filemap_write_and_wait(inode->i_mapping);
127 
128 	mutex_lock(&sf_i->handle_list_mutex);
129 	list_del(&sf_handle->head);
130 	mutex_unlock(&sf_i->handle_list_mutex);
131 
132 	kref_put(&sf_handle->refcount, vboxsf_handle_release);
133 	return 0;
134 }
135 
136 /*
137  * Write back dirty pages now, because there may not be any suitable
138  * open files later
139  */
140 static void vboxsf_vma_close(struct vm_area_struct *vma)
141 {
142 	filemap_write_and_wait(vma->vm_file->f_mapping);
143 }
144 
145 static const struct vm_operations_struct vboxsf_file_vm_ops = {
146 	.close		= vboxsf_vma_close,
147 	.fault		= filemap_fault,
148 	.map_pages	= filemap_map_pages,
149 };
150 
151 static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma)
152 {
153 	int err;
154 
155 	err = generic_file_mmap(file, vma);
156 	if (!err)
157 		vma->vm_ops = &vboxsf_file_vm_ops;
158 
159 	return err;
160 }
161 
162 /*
163  * Note that since we are accessing files on the host's filesystem, files
164  * may always be changed underneath us by the host!
165  *
166  * The vboxsf API between the guest and the host does not offer any functions
167  * to deal with this. There is no inode-generation to check for changes, no
168  * events / callback on changes and no way to lock files.
169  *
170  * To avoid returning stale data when a file gets *opened* on our (the guest)
171  * side, we do a "stat" on the host side, then compare the mtime with the
172  * last known mtime and invalidate the page-cache if they differ.
173  * This is done from vboxsf_inode_revalidate().
174  *
175  * When reads are done through the read_iter fop, it is possible to do
176  * further cache revalidation then, there are 3 options to deal with this:
177  *
178  * 1)  Rely solely on the revalidation done at open time
179  * 2)  Do another "stat" and compare mtime again. Unfortunately the vboxsf
180  *     host API does not allow stat on handles, so we would need to use
181  *     file->f_path.dentry and the stat will then fail if the file was unlinked
182  *     or renamed (and there is no thing like NFS' silly-rename). So we get:
183  * 2a) "stat" and compare mtime, on stat failure invalidate the cache
184  * 2b) "stat" and compare mtime, on stat failure do nothing
185  * 3)  Simply always call invalidate_inode_pages2_range on the range of the read
186  *
187  * Currently we are keeping things KISS and using option 1. this allows
188  * directly using generic_file_read_iter without wrapping it.
189  *
190  * This means that only data written on the host side before open() on
191  * the guest side is guaranteed to be seen by the guest. If necessary
192  * we may provide other read-cache strategies in the future and make this
193  * configurable through a mount option.
194  */
195 const struct file_operations vboxsf_reg_fops = {
196 	.llseek = generic_file_llseek,
197 	.read_iter = generic_file_read_iter,
198 	.write_iter = generic_file_write_iter,
199 	.mmap = vboxsf_file_mmap,
200 	.open = vboxsf_file_open,
201 	.release = vboxsf_file_release,
202 	.fsync = noop_fsync,
203 	.splice_read = generic_file_splice_read,
204 };
205 
206 const struct inode_operations vboxsf_reg_iops = {
207 	.getattr = vboxsf_getattr,
208 	.setattr = vboxsf_setattr
209 };
210 
211 static int vboxsf_readpage(struct file *file, struct page *page)
212 {
213 	struct vboxsf_handle *sf_handle = file->private_data;
214 	loff_t off = page_offset(page);
215 	u32 nread = PAGE_SIZE;
216 	u8 *buf;
217 	int err;
218 
219 	buf = kmap(page);
220 
221 	err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
222 	if (err == 0) {
223 		memset(&buf[nread], 0, PAGE_SIZE - nread);
224 		flush_dcache_page(page);
225 		SetPageUptodate(page);
226 	} else {
227 		SetPageError(page);
228 	}
229 
230 	kunmap(page);
231 	unlock_page(page);
232 	return err;
233 }
234 
235 static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i)
236 {
237 	struct vboxsf_handle *h, *sf_handle = NULL;
238 
239 	mutex_lock(&sf_i->handle_list_mutex);
240 	list_for_each_entry(h, &sf_i->handle_list, head) {
241 		if (h->access_flags == SHFL_CF_ACCESS_WRITE ||
242 		    h->access_flags == SHFL_CF_ACCESS_READWRITE) {
243 			kref_get(&h->refcount);
244 			sf_handle = h;
245 			break;
246 		}
247 	}
248 	mutex_unlock(&sf_i->handle_list_mutex);
249 
250 	return sf_handle;
251 }
252 
253 static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
254 {
255 	struct inode *inode = page->mapping->host;
256 	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
257 	struct vboxsf_handle *sf_handle;
258 	loff_t off = page_offset(page);
259 	loff_t size = i_size_read(inode);
260 	u32 nwrite = PAGE_SIZE;
261 	u8 *buf;
262 	int err;
263 
264 	if (off + PAGE_SIZE > size)
265 		nwrite = size & ~PAGE_MASK;
266 
267 	sf_handle = vboxsf_get_write_handle(sf_i);
268 	if (!sf_handle)
269 		return -EBADF;
270 
271 	buf = kmap(page);
272 	err = vboxsf_write(sf_handle->root, sf_handle->handle,
273 			   off, &nwrite, buf);
274 	kunmap(page);
275 
276 	kref_put(&sf_handle->refcount, vboxsf_handle_release);
277 
278 	if (err == 0) {
279 		ClearPageError(page);
280 		/* mtime changed */
281 		sf_i->force_restat = 1;
282 	} else {
283 		ClearPageUptodate(page);
284 	}
285 
286 	unlock_page(page);
287 	return err;
288 }
289 
290 static int vboxsf_write_end(struct file *file, struct address_space *mapping,
291 			    loff_t pos, unsigned int len, unsigned int copied,
292 			    struct page *page, void *fsdata)
293 {
294 	struct inode *inode = mapping->host;
295 	struct vboxsf_handle *sf_handle = file->private_data;
296 	unsigned int from = pos & ~PAGE_MASK;
297 	u32 nwritten = len;
298 	u8 *buf;
299 	int err;
300 
301 	/* zero the stale part of the page if we did a short copy */
302 	if (!PageUptodate(page) && copied < len)
303 		zero_user(page, from + copied, len - copied);
304 
305 	buf = kmap(page);
306 	err = vboxsf_write(sf_handle->root, sf_handle->handle,
307 			   pos, &nwritten, buf + from);
308 	kunmap(page);
309 
310 	if (err) {
311 		nwritten = 0;
312 		goto out;
313 	}
314 
315 	/* mtime changed */
316 	VBOXSF_I(inode)->force_restat = 1;
317 
318 	if (!PageUptodate(page) && nwritten == PAGE_SIZE)
319 		SetPageUptodate(page);
320 
321 	pos += nwritten;
322 	if (pos > inode->i_size)
323 		i_size_write(inode, pos);
324 
325 out:
326 	unlock_page(page);
327 	put_page(page);
328 
329 	return nwritten;
330 }
331 
332 /*
333  * Note simple_write_begin does not read the page from disk on partial writes
334  * this is ok since vboxsf_write_end only writes the written parts of the
335  * page and it does not call SetPageUptodate for partial writes.
336  */
337 const struct address_space_operations vboxsf_reg_aops = {
338 	.readpage = vboxsf_readpage,
339 	.writepage = vboxsf_writepage,
340 	.set_page_dirty = __set_page_dirty_nobuffers,
341 	.write_begin = simple_write_begin,
342 	.write_end = vboxsf_write_end,
343 };
344 
345 static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
346 				   struct delayed_call *done)
347 {
348 	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
349 	struct shfl_string *path;
350 	char *link;
351 	int err;
352 
353 	if (!dentry)
354 		return ERR_PTR(-ECHILD);
355 
356 	path = vboxsf_path_from_dentry(sbi, dentry);
357 	if (IS_ERR(path))
358 		return ERR_CAST(path);
359 
360 	link = kzalloc(PATH_MAX, GFP_KERNEL);
361 	if (!link) {
362 		__putname(path);
363 		return ERR_PTR(-ENOMEM);
364 	}
365 
366 	err = vboxsf_readlink(sbi->root, path, PATH_MAX, link);
367 	__putname(path);
368 	if (err) {
369 		kfree(link);
370 		return ERR_PTR(err);
371 	}
372 
373 	set_delayed_call(done, kfree_link, link);
374 	return link;
375 }
376 
377 const struct inode_operations vboxsf_lnk_iops = {
378 	.get_link = vboxsf_get_link
379 };
380