xref: /openbmc/qemu/block/export/fuse.c (revision f89f54d52bf8fdc6de1c90367f9bdd65e40fa382)
1 /*
2  * Present a block device as a raw image through FUSE
3  *
4  * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; under version 2 or later of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #define FUSE_USE_VERSION 31
20 
21 #include "qemu/osdep.h"
22 #include "qemu/memalign.h"
23 #include "block/aio.h"
24 #include "block/block_int-common.h"
25 #include "block/export.h"
26 #include "block/fuse.h"
27 #include "block/qapi.h"
28 #include "qapi/error.h"
29 #include "qapi/qapi-commands-block.h"
30 #include "qemu/main-loop.h"
31 #include "sysemu/block-backend.h"
32 
33 #include <fuse.h>
34 #include <fuse_lowlevel.h>
35 
36 #if defined(CONFIG_FALLOCATE_ZERO_RANGE)
37 #include <linux/falloc.h>
38 #endif
39 
40 #ifdef __linux__
41 #include <linux/fs.h>
42 #endif
43 
44 /* Prevent overly long bounce buffer allocations */
45 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
46 
47 
48 typedef struct FuseExport {
49     BlockExport common;
50 
51     struct fuse_session *fuse_session;
52     struct fuse_buf fuse_buf;
53     unsigned int in_flight; /* atomic */
54     bool mounted, fd_handler_set_up;
55 
56     char *mountpoint;
57     bool writable;
58     bool growable;
59     /* Whether allow_other was used as a mount option or not */
60     bool allow_other;
61 
62     mode_t st_mode;
63     uid_t st_uid;
64     gid_t st_gid;
65 } FuseExport;
66 
67 static GHashTable *exports;
68 static const struct fuse_lowlevel_ops fuse_ops;
69 
70 static void fuse_export_shutdown(BlockExport *exp);
71 static void fuse_export_delete(BlockExport *exp);
72 
73 static void init_exports_table(void);
74 
75 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
76                              bool allow_other, Error **errp);
77 static void read_from_fuse_export(void *opaque);
78 
79 static bool is_regular_file(const char *path, Error **errp);
80 
81 
fuse_export_drained_begin(void * opaque)82 static void fuse_export_drained_begin(void *opaque)
83 {
84     FuseExport *exp = opaque;
85 
86     aio_set_fd_handler(exp->common.ctx,
87                        fuse_session_fd(exp->fuse_session),
88                        NULL, NULL, NULL, NULL, NULL);
89     exp->fd_handler_set_up = false;
90 }
91 
fuse_export_drained_end(void * opaque)92 static void fuse_export_drained_end(void *opaque)
93 {
94     FuseExport *exp = opaque;
95 
96     /* Refresh AioContext in case it changed */
97     exp->common.ctx = blk_get_aio_context(exp->common.blk);
98 
99     aio_set_fd_handler(exp->common.ctx,
100                        fuse_session_fd(exp->fuse_session),
101                        read_from_fuse_export, NULL, NULL, NULL, exp);
102     exp->fd_handler_set_up = true;
103 }
104 
fuse_export_drained_poll(void * opaque)105 static bool fuse_export_drained_poll(void *opaque)
106 {
107     FuseExport *exp = opaque;
108 
109     return qatomic_read(&exp->in_flight) > 0;
110 }
111 
112 static const BlockDevOps fuse_export_blk_dev_ops = {
113     .drained_begin = fuse_export_drained_begin,
114     .drained_end   = fuse_export_drained_end,
115     .drained_poll  = fuse_export_drained_poll,
116 };
117 
fuse_export_create(BlockExport * blk_exp,BlockExportOptions * blk_exp_args,Error ** errp)118 static int fuse_export_create(BlockExport *blk_exp,
119                               BlockExportOptions *blk_exp_args,
120                               Error **errp)
121 {
122     FuseExport *exp = container_of(blk_exp, FuseExport, common);
123     BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
124     int ret;
125 
126     assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
127 
128     /* For growable and writable exports, take the RESIZE permission */
129     if (args->growable || blk_exp_args->writable) {
130         uint64_t blk_perm, blk_shared_perm;
131 
132         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
133 
134         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
135                            blk_shared_perm, errp);
136         if (ret < 0) {
137             return ret;
138         }
139     }
140 
141     blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp);
142 
143     /*
144      * We handle draining ourselves using an in-flight counter and by disabling
145      * the FUSE fd handler. Do not queue BlockBackend requests, they need to
146      * complete so the in-flight counter reaches zero.
147      */
148     blk_set_disable_request_queuing(exp->common.blk, true);
149 
150     init_exports_table();
151 
152     /*
153      * It is important to do this check before calling is_regular_file() --
154      * that function will do a stat(), which we would have to handle if we
155      * already exported something on @mountpoint.  But we cannot, because
156      * we are currently caught up here.
157      * (Note that ideally we would want to resolve relative paths here,
158      * but bdrv_make_absolute_filename() might do the wrong thing for
159      * paths that contain colons, and realpath() would resolve symlinks,
160      * which we do not want: The mount point is not going to be the
161      * symlink's destination, but the link itself.)
162      * So this will not catch all potential clashes, but hopefully at
163      * least the most common one of specifying exactly the same path
164      * string twice.
165      */
166     if (g_hash_table_contains(exports, args->mountpoint)) {
167         error_setg(errp, "There already is a FUSE export on '%s'",
168                    args->mountpoint);
169         ret = -EEXIST;
170         goto fail;
171     }
172 
173     if (!is_regular_file(args->mountpoint, errp)) {
174         ret = -EINVAL;
175         goto fail;
176     }
177 
178     exp->mountpoint = g_strdup(args->mountpoint);
179     exp->writable = blk_exp_args->writable;
180     exp->growable = args->growable;
181 
182     /* set default */
183     if (!args->has_allow_other) {
184         args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
185     }
186 
187     exp->st_mode = S_IFREG | S_IRUSR;
188     if (exp->writable) {
189         exp->st_mode |= S_IWUSR;
190     }
191     exp->st_uid = getuid();
192     exp->st_gid = getgid();
193 
194     if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
195         /* Ignore errors on our first attempt */
196         ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
197         exp->allow_other = ret == 0;
198         if (ret < 0) {
199             ret = setup_fuse_export(exp, args->mountpoint, false, errp);
200         }
201     } else {
202         exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
203         ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
204     }
205     if (ret < 0) {
206         goto fail;
207     }
208 
209     return 0;
210 
211 fail:
212     fuse_export_delete(blk_exp);
213     return ret;
214 }
215 
216 /**
217  * Allocates the global @exports hash table.
218  */
init_exports_table(void)219 static void init_exports_table(void)
220 {
221     if (exports) {
222         return;
223     }
224 
225     exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
226 }
227 
228 /**
229  * Create exp->fuse_session and mount it.
230  */
setup_fuse_export(FuseExport * exp,const char * mountpoint,bool allow_other,Error ** errp)231 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
232                              bool allow_other, Error **errp)
233 {
234     const char *fuse_argv[4];
235     char *mount_opts;
236     struct fuse_args fuse_args;
237     int ret;
238 
239     /*
240      * max_read needs to match what fuse_init() sets.
241      * max_write need not be supplied.
242      */
243     mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
244                                  FUSE_MAX_BOUNCE_BYTES,
245                                  allow_other ? ",allow_other" : "");
246 
247     fuse_argv[0] = ""; /* Dummy program name */
248     fuse_argv[1] = "-o";
249     fuse_argv[2] = mount_opts;
250     fuse_argv[3] = NULL;
251     fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
252 
253     exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
254                                          sizeof(fuse_ops), exp);
255     g_free(mount_opts);
256     if (!exp->fuse_session) {
257         error_setg(errp, "Failed to set up FUSE session");
258         ret = -EIO;
259         goto fail;
260     }
261 
262     ret = fuse_session_mount(exp->fuse_session, mountpoint);
263     if (ret < 0) {
264         error_setg(errp, "Failed to mount FUSE session to export");
265         ret = -EIO;
266         goto fail;
267     }
268     exp->mounted = true;
269 
270     g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
271 
272     aio_set_fd_handler(exp->common.ctx,
273                        fuse_session_fd(exp->fuse_session),
274                        read_from_fuse_export, NULL, NULL, NULL, exp);
275     exp->fd_handler_set_up = true;
276 
277     return 0;
278 
279 fail:
280     fuse_export_shutdown(&exp->common);
281     return ret;
282 }
283 
284 /**
285  * Callback to be invoked when the FUSE session FD can be read from.
286  * (This is basically the FUSE event loop.)
287  */
read_from_fuse_export(void * opaque)288 static void read_from_fuse_export(void *opaque)
289 {
290     FuseExport *exp = opaque;
291     int ret;
292 
293     blk_exp_ref(&exp->common);
294 
295     qatomic_inc(&exp->in_flight);
296 
297     do {
298         ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
299     } while (ret == -EINTR);
300     if (ret < 0) {
301         goto out;
302     }
303 
304     fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
305 
306 out:
307     if (qatomic_fetch_dec(&exp->in_flight) == 1) {
308         aio_wait_kick(); /* wake AIO_WAIT_WHILE() */
309     }
310 
311     blk_exp_unref(&exp->common);
312 }
313 
fuse_export_shutdown(BlockExport * blk_exp)314 static void fuse_export_shutdown(BlockExport *blk_exp)
315 {
316     FuseExport *exp = container_of(blk_exp, FuseExport, common);
317 
318     if (exp->fuse_session) {
319         fuse_session_exit(exp->fuse_session);
320 
321         if (exp->fd_handler_set_up) {
322             aio_set_fd_handler(exp->common.ctx,
323                                fuse_session_fd(exp->fuse_session),
324                                NULL, NULL, NULL, NULL, NULL);
325             exp->fd_handler_set_up = false;
326         }
327     }
328 
329     if (exp->mountpoint) {
330         /*
331          * Safe to drop now, because we will not handle any requests
332          * for this export anymore anyway.
333          */
334         g_hash_table_remove(exports, exp->mountpoint);
335     }
336 }
337 
fuse_export_delete(BlockExport * blk_exp)338 static void fuse_export_delete(BlockExport *blk_exp)
339 {
340     FuseExport *exp = container_of(blk_exp, FuseExport, common);
341 
342     if (exp->fuse_session) {
343         if (exp->mounted) {
344             fuse_session_unmount(exp->fuse_session);
345         }
346 
347         fuse_session_destroy(exp->fuse_session);
348     }
349 
350     free(exp->fuse_buf.mem);
351     g_free(exp->mountpoint);
352 }
353 
354 /**
355  * Check whether @path points to a regular file.  If not, put an
356  * appropriate message into *errp.
357  */
is_regular_file(const char * path,Error ** errp)358 static bool is_regular_file(const char *path, Error **errp)
359 {
360     struct stat statbuf;
361     int ret;
362 
363     ret = stat(path, &statbuf);
364     if (ret < 0) {
365         error_setg_errno(errp, errno, "Failed to stat '%s'", path);
366         return false;
367     }
368 
369     if (!S_ISREG(statbuf.st_mode)) {
370         error_setg(errp, "'%s' is not a regular file", path);
371         return false;
372     }
373 
374     return true;
375 }
376 
377 /**
378  * A chance to set change some parameters supplied to FUSE_INIT.
379  */
fuse_init(void * userdata,struct fuse_conn_info * conn)380 static void fuse_init(void *userdata, struct fuse_conn_info *conn)
381 {
382     /*
383      * MIN_NON_ZERO() would not be wrong here, but what we set here
384      * must equal what has been passed to fuse_session_new().
385      * Therefore, as long as max_read must be passed as a mount option
386      * (which libfuse claims will be changed at some point), we have
387      * to set max_read to a fixed value here.
388      */
389     conn->max_read = FUSE_MAX_BOUNCE_BYTES;
390 
391     conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
392 }
393 
394 /**
395  * Let clients look up files.  Always return ENOENT because we only
396  * care about the mountpoint itself.
397  */
fuse_lookup(fuse_req_t req,fuse_ino_t parent,const char * name)398 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
399 {
400     fuse_reply_err(req, ENOENT);
401 }
402 
403 /**
404  * Let clients get file attributes (i.e., stat() the file).
405  */
fuse_getattr(fuse_req_t req,fuse_ino_t inode,struct fuse_file_info * fi)406 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
407                          struct fuse_file_info *fi)
408 {
409     struct stat statbuf;
410     int64_t length, allocated_blocks;
411     time_t now = time(NULL);
412     FuseExport *exp = fuse_req_userdata(req);
413 
414     length = blk_getlength(exp->common.blk);
415     if (length < 0) {
416         fuse_reply_err(req, -length);
417         return;
418     }
419 
420     allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
421     if (allocated_blocks <= 0) {
422         allocated_blocks = DIV_ROUND_UP(length, 512);
423     } else {
424         allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
425     }
426 
427     statbuf = (struct stat) {
428         .st_ino     = inode,
429         .st_mode    = exp->st_mode,
430         .st_nlink   = 1,
431         .st_uid     = exp->st_uid,
432         .st_gid     = exp->st_gid,
433         .st_size    = length,
434         .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
435         .st_blocks  = allocated_blocks,
436         .st_atime   = now,
437         .st_mtime   = now,
438         .st_ctime   = now,
439     };
440 
441     fuse_reply_attr(req, &statbuf, 1.);
442 }
443 
fuse_do_truncate(const FuseExport * exp,int64_t size,bool req_zero_write,PreallocMode prealloc)444 static int fuse_do_truncate(const FuseExport *exp, int64_t size,
445                             bool req_zero_write, PreallocMode prealloc)
446 {
447     uint64_t blk_perm, blk_shared_perm;
448     BdrvRequestFlags truncate_flags = 0;
449     bool add_resize_perm;
450     int ret, ret_check;
451 
452     /* Growable and writable exports have a permanent RESIZE permission */
453     add_resize_perm = !exp->growable && !exp->writable;
454 
455     if (req_zero_write) {
456         truncate_flags |= BDRV_REQ_ZERO_WRITE;
457     }
458 
459     if (add_resize_perm) {
460 
461         if (!qemu_in_main_thread()) {
462             /* Changing permissions like below only works in the main thread */
463             return -EPERM;
464         }
465 
466         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
467 
468         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
469                            blk_shared_perm, NULL);
470         if (ret < 0) {
471             return ret;
472         }
473     }
474 
475     ret = blk_truncate(exp->common.blk, size, true, prealloc,
476                        truncate_flags, NULL);
477 
478     if (add_resize_perm) {
479         /* Must succeed, because we are only giving up the RESIZE permission */
480         ret_check = blk_set_perm(exp->common.blk, blk_perm,
481                                  blk_shared_perm, &error_abort);
482         assert(ret_check == 0);
483     }
484 
485     return ret;
486 }
487 
488 /**
489  * Let clients set file attributes.  Only resizing and changing
490  * permissions (st_mode, st_uid, st_gid) is allowed.
491  * Changing permissions is only allowed as far as it will actually
492  * permit access: Read-only exports cannot be given +w, and exports
493  * without allow_other cannot be given a different UID or GID, and
494  * they cannot be given non-owner access.
495  */
fuse_setattr(fuse_req_t req,fuse_ino_t inode,struct stat * statbuf,int to_set,struct fuse_file_info * fi)496 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
497                          int to_set, struct fuse_file_info *fi)
498 {
499     FuseExport *exp = fuse_req_userdata(req);
500     int supported_attrs;
501     int ret;
502 
503     supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
504     if (exp->allow_other) {
505         supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
506     }
507 
508     if (to_set & ~supported_attrs) {
509         fuse_reply_err(req, ENOTSUP);
510         return;
511     }
512 
513     /* Do some argument checks first before committing to anything */
514     if (to_set & FUSE_SET_ATTR_MODE) {
515         /*
516          * Without allow_other, non-owners can never access the export, so do
517          * not allow setting permissions for them
518          */
519         if (!exp->allow_other &&
520             (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
521         {
522             fuse_reply_err(req, EPERM);
523             return;
524         }
525 
526         /* +w for read-only exports makes no sense, disallow it */
527         if (!exp->writable &&
528             (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
529         {
530             fuse_reply_err(req, EROFS);
531             return;
532         }
533     }
534 
535     if (to_set & FUSE_SET_ATTR_SIZE) {
536         if (!exp->writable) {
537             fuse_reply_err(req, EACCES);
538             return;
539         }
540 
541         ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
542         if (ret < 0) {
543             fuse_reply_err(req, -ret);
544             return;
545         }
546     }
547 
548     if (to_set & FUSE_SET_ATTR_MODE) {
549         /* Ignore FUSE-supplied file type, only change the mode */
550         exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
551     }
552 
553     if (to_set & FUSE_SET_ATTR_UID) {
554         exp->st_uid = statbuf->st_uid;
555     }
556 
557     if (to_set & FUSE_SET_ATTR_GID) {
558         exp->st_gid = statbuf->st_gid;
559     }
560 
561     fuse_getattr(req, inode, fi);
562 }
563 
564 /**
565  * Let clients open a file (i.e., the exported image).
566  */
fuse_open(fuse_req_t req,fuse_ino_t inode,struct fuse_file_info * fi)567 static void fuse_open(fuse_req_t req, fuse_ino_t inode,
568                       struct fuse_file_info *fi)
569 {
570     fuse_reply_open(req, fi);
571 }
572 
573 /**
574  * Handle client reads from the exported image.
575  */
fuse_read(fuse_req_t req,fuse_ino_t inode,size_t size,off_t offset,struct fuse_file_info * fi)576 static void fuse_read(fuse_req_t req, fuse_ino_t inode,
577                       size_t size, off_t offset, struct fuse_file_info *fi)
578 {
579     FuseExport *exp = fuse_req_userdata(req);
580     int64_t length;
581     void *buf;
582     int ret;
583 
584     /* Limited by max_read, should not happen */
585     if (size > FUSE_MAX_BOUNCE_BYTES) {
586         fuse_reply_err(req, EINVAL);
587         return;
588     }
589 
590     /**
591      * Clients will expect short reads at EOF, so we have to limit
592      * offset+size to the image length.
593      */
594     length = blk_getlength(exp->common.blk);
595     if (length < 0) {
596         fuse_reply_err(req, -length);
597         return;
598     }
599 
600     if (offset + size > length) {
601         size = length - offset;
602     }
603 
604     buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
605     if (!buf) {
606         fuse_reply_err(req, ENOMEM);
607         return;
608     }
609 
610     ret = blk_pread(exp->common.blk, offset, size, buf, 0);
611     if (ret >= 0) {
612         fuse_reply_buf(req, buf, size);
613     } else {
614         fuse_reply_err(req, -ret);
615     }
616 
617     qemu_vfree(buf);
618 }
619 
620 /**
621  * Handle client writes to the exported image.
622  */
fuse_write(fuse_req_t req,fuse_ino_t inode,const char * buf,size_t size,off_t offset,struct fuse_file_info * fi)623 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
624                        size_t size, off_t offset, struct fuse_file_info *fi)
625 {
626     FuseExport *exp = fuse_req_userdata(req);
627     int64_t length;
628     int ret;
629 
630     /* Limited by max_write, should not happen */
631     if (size > BDRV_REQUEST_MAX_BYTES) {
632         fuse_reply_err(req, EINVAL);
633         return;
634     }
635 
636     if (!exp->writable) {
637         fuse_reply_err(req, EACCES);
638         return;
639     }
640 
641     /**
642      * Clients will expect short writes at EOF, so we have to limit
643      * offset+size to the image length.
644      */
645     length = blk_getlength(exp->common.blk);
646     if (length < 0) {
647         fuse_reply_err(req, -length);
648         return;
649     }
650 
651     if (offset + size > length) {
652         if (exp->growable) {
653             ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
654             if (ret < 0) {
655                 fuse_reply_err(req, -ret);
656                 return;
657             }
658         } else {
659             size = length - offset;
660         }
661     }
662 
663     ret = blk_pwrite(exp->common.blk, offset, size, buf, 0);
664     if (ret >= 0) {
665         fuse_reply_write(req, size);
666     } else {
667         fuse_reply_err(req, -ret);
668     }
669 }
670 
671 /**
672  * Let clients perform various fallocate() operations.
673  */
fuse_fallocate(fuse_req_t req,fuse_ino_t inode,int mode,off_t offset,off_t length,struct fuse_file_info * fi)674 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
675                            off_t offset, off_t length,
676                            struct fuse_file_info *fi)
677 {
678     FuseExport *exp = fuse_req_userdata(req);
679     int64_t blk_len;
680     int ret;
681 
682     if (!exp->writable) {
683         fuse_reply_err(req, EACCES);
684         return;
685     }
686 
687     blk_len = blk_getlength(exp->common.blk);
688     if (blk_len < 0) {
689         fuse_reply_err(req, -blk_len);
690         return;
691     }
692 
693 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
694     if (mode & FALLOC_FL_KEEP_SIZE) {
695         length = MIN(length, blk_len - offset);
696     }
697 #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
698 
699     if (!mode) {
700         /* We can only fallocate at the EOF with a truncate */
701         if (offset < blk_len) {
702             fuse_reply_err(req, EOPNOTSUPP);
703             return;
704         }
705 
706         if (offset > blk_len) {
707             /* No preallocation needed here */
708             ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
709             if (ret < 0) {
710                 fuse_reply_err(req, -ret);
711                 return;
712             }
713         }
714 
715         ret = fuse_do_truncate(exp, offset + length, true,
716                                PREALLOC_MODE_FALLOC);
717     }
718 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
719     else if (mode & FALLOC_FL_PUNCH_HOLE) {
720         if (!(mode & FALLOC_FL_KEEP_SIZE)) {
721             fuse_reply_err(req, EINVAL);
722             return;
723         }
724 
725         do {
726             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
727 
728             ret = blk_pwrite_zeroes(exp->common.blk, offset, size,
729                                     BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
730             if (ret == -ENOTSUP) {
731                 /*
732                  * fallocate() specifies to return EOPNOTSUPP for unsupported
733                  * operations
734                  */
735                 ret = -EOPNOTSUPP;
736             }
737 
738             offset += size;
739             length -= size;
740         } while (ret == 0 && length > 0);
741     }
742 #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
743 #ifdef CONFIG_FALLOCATE_ZERO_RANGE
744     else if (mode & FALLOC_FL_ZERO_RANGE) {
745         if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
746             /* No need for zeroes, we are going to write them ourselves */
747             ret = fuse_do_truncate(exp, offset + length, false,
748                                    PREALLOC_MODE_OFF);
749             if (ret < 0) {
750                 fuse_reply_err(req, -ret);
751                 return;
752             }
753         }
754 
755         do {
756             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
757 
758             ret = blk_pwrite_zeroes(exp->common.blk,
759                                     offset, size, 0);
760             offset += size;
761             length -= size;
762         } while (ret == 0 && length > 0);
763     }
764 #endif /* CONFIG_FALLOCATE_ZERO_RANGE */
765     else {
766         ret = -EOPNOTSUPP;
767     }
768 
769     fuse_reply_err(req, ret < 0 ? -ret : 0);
770 }
771 
772 /**
773  * Let clients fsync the exported image.
774  */
fuse_fsync(fuse_req_t req,fuse_ino_t inode,int datasync,struct fuse_file_info * fi)775 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
776                        struct fuse_file_info *fi)
777 {
778     FuseExport *exp = fuse_req_userdata(req);
779     int ret;
780 
781     ret = blk_flush(exp->common.blk);
782     fuse_reply_err(req, ret < 0 ? -ret : 0);
783 }
784 
785 /**
786  * Called before an FD to the exported image is closed.  (libfuse
787  * notes this to be a way to return last-minute errors.)
788  */
fuse_flush(fuse_req_t req,fuse_ino_t inode,struct fuse_file_info * fi)789 static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
790                         struct fuse_file_info *fi)
791 {
792     fuse_fsync(req, inode, 1, fi);
793 }
794 
795 #ifdef CONFIG_FUSE_LSEEK
796 /**
797  * Let clients inquire allocation status.
798  */
fuse_lseek(fuse_req_t req,fuse_ino_t inode,off_t offset,int whence,struct fuse_file_info * fi)799 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
800                        int whence, struct fuse_file_info *fi)
801 {
802     FuseExport *exp = fuse_req_userdata(req);
803 
804     if (whence != SEEK_HOLE && whence != SEEK_DATA) {
805         fuse_reply_err(req, EINVAL);
806         return;
807     }
808 
809     while (true) {
810         int64_t pnum;
811         int ret;
812 
813         ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
814                                       offset, INT64_MAX, &pnum, NULL, NULL);
815         if (ret < 0) {
816             fuse_reply_err(req, -ret);
817             return;
818         }
819 
820         if (!pnum && (ret & BDRV_BLOCK_EOF)) {
821             int64_t blk_len;
822 
823             /*
824              * If blk_getlength() rounds (e.g. by sectors), then the
825              * export length will be rounded, too.  However,
826              * bdrv_block_status_above() may return EOF at unaligned
827              * offsets.  We must not let this become visible and thus
828              * always simulate a hole between @offset (the real EOF)
829              * and @blk_len (the client-visible EOF).
830              */
831 
832             blk_len = blk_getlength(exp->common.blk);
833             if (blk_len < 0) {
834                 fuse_reply_err(req, -blk_len);
835                 return;
836             }
837 
838             if (offset > blk_len || whence == SEEK_DATA) {
839                 fuse_reply_err(req, ENXIO);
840             } else {
841                 fuse_reply_lseek(req, offset);
842             }
843             return;
844         }
845 
846         if (ret & BDRV_BLOCK_DATA) {
847             if (whence == SEEK_DATA) {
848                 fuse_reply_lseek(req, offset);
849                 return;
850             }
851         } else {
852             if (whence == SEEK_HOLE) {
853                 fuse_reply_lseek(req, offset);
854                 return;
855             }
856         }
857 
858         /* Safety check against infinite loops */
859         if (!pnum) {
860             fuse_reply_err(req, ENXIO);
861             return;
862         }
863 
864         offset += pnum;
865     }
866 }
867 #endif
868 
869 static const struct fuse_lowlevel_ops fuse_ops = {
870     .init       = fuse_init,
871     .lookup     = fuse_lookup,
872     .getattr    = fuse_getattr,
873     .setattr    = fuse_setattr,
874     .open       = fuse_open,
875     .read       = fuse_read,
876     .write      = fuse_write,
877     .fallocate  = fuse_fallocate,
878     .flush      = fuse_flush,
879     .fsync      = fuse_fsync,
880 #ifdef CONFIG_FUSE_LSEEK
881     .lseek      = fuse_lseek,
882 #endif
883 };
884 
885 const BlockExportDriver blk_exp_fuse = {
886     .type               = BLOCK_EXPORT_TYPE_FUSE,
887     .instance_size      = sizeof(FuseExport),
888     .create             = fuse_export_create,
889     .delete             = fuse_export_delete,
890     .request_shutdown   = fuse_export_shutdown,
891 };
892