xref: /openbmc/qemu/block/export/fuse.c (revision f89f54d52bf8fdc6de1c90367f9bdd65e40fa382)
10c9b70d5SMax Reitz /*
20c9b70d5SMax Reitz  * Present a block device as a raw image through FUSE
30c9b70d5SMax Reitz  *
40c9b70d5SMax Reitz  * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
50c9b70d5SMax Reitz  *
60c9b70d5SMax Reitz  * This program is free software; you can redistribute it and/or modify
70c9b70d5SMax Reitz  * it under the terms of the GNU General Public License as published by
80c9b70d5SMax Reitz  * the Free Software Foundation; under version 2 or later of the License.
90c9b70d5SMax Reitz  *
100c9b70d5SMax Reitz  * This program is distributed in the hope that it will be useful,
110c9b70d5SMax Reitz  * but WITHOUT ANY WARRANTY; without even the implied warranty of
120c9b70d5SMax Reitz  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
130c9b70d5SMax Reitz  * GNU General Public License for more details.
140c9b70d5SMax Reitz  *
150c9b70d5SMax Reitz  * You should have received a copy of the GNU General Public License
160c9b70d5SMax Reitz  * along with this program; if not, see <http://www.gnu.org/licenses/>.
170c9b70d5SMax Reitz  */
180c9b70d5SMax Reitz 
190c9b70d5SMax Reitz #define FUSE_USE_VERSION 31
200c9b70d5SMax Reitz 
210c9b70d5SMax Reitz #include "qemu/osdep.h"
225df022cfSPeter Maydell #include "qemu/memalign.h"
230c9b70d5SMax Reitz #include "block/aio.h"
24e2c1c34fSMarkus Armbruster #include "block/block_int-common.h"
250c9b70d5SMax Reitz #include "block/export.h"
260c9b70d5SMax Reitz #include "block/fuse.h"
270c9b70d5SMax Reitz #include "block/qapi.h"
280c9b70d5SMax Reitz #include "qapi/error.h"
290c9b70d5SMax Reitz #include "qapi/qapi-commands-block.h"
30e2c1c34fSMarkus Armbruster #include "qemu/main-loop.h"
310c9b70d5SMax Reitz #include "sysemu/block-backend.h"
320c9b70d5SMax Reitz 
330c9b70d5SMax Reitz #include <fuse.h>
340c9b70d5SMax Reitz #include <fuse_lowlevel.h>
350c9b70d5SMax Reitz 
3630433203SFabrice Fontaine #if defined(CONFIG_FALLOCATE_ZERO_RANGE)
3730433203SFabrice Fontaine #include <linux/falloc.h>
3830433203SFabrice Fontaine #endif
3930433203SFabrice Fontaine 
4028031d5cSFabrice Fontaine #ifdef __linux__
4128031d5cSFabrice Fontaine #include <linux/fs.h>
4228031d5cSFabrice Fontaine #endif
430c9b70d5SMax Reitz 
440c9b70d5SMax Reitz /* Prevent overly long bounce buffer allocations */
450c9b70d5SMax Reitz #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
460c9b70d5SMax Reitz 
470c9b70d5SMax Reitz 
480c9b70d5SMax Reitz typedef struct FuseExport {
490c9b70d5SMax Reitz     BlockExport common;
500c9b70d5SMax Reitz 
510c9b70d5SMax Reitz     struct fuse_session *fuse_session;
520c9b70d5SMax Reitz     struct fuse_buf fuse_buf;
5317b69c0fSStefan Hajnoczi     unsigned int in_flight; /* atomic */
540c9b70d5SMax Reitz     bool mounted, fd_handler_set_up;
550c9b70d5SMax Reitz 
560c9b70d5SMax Reitz     char *mountpoint;
570c9b70d5SMax Reitz     bool writable;
584fba06d5SMax Reitz     bool growable;
598fc54f94SMax Reitz     /* Whether allow_other was used as a mount option or not */
608fc54f94SMax Reitz     bool allow_other;
616aeeaed2SMax Reitz 
626aeeaed2SMax Reitz     mode_t st_mode;
636aeeaed2SMax Reitz     uid_t st_uid;
646aeeaed2SMax Reitz     gid_t st_gid;
650c9b70d5SMax Reitz } FuseExport;
660c9b70d5SMax Reitz 
670c9b70d5SMax Reitz static GHashTable *exports;
680c9b70d5SMax Reitz static const struct fuse_lowlevel_ops fuse_ops;
690c9b70d5SMax Reitz 
700c9b70d5SMax Reitz static void fuse_export_shutdown(BlockExport *exp);
710c9b70d5SMax Reitz static void fuse_export_delete(BlockExport *exp);
720c9b70d5SMax Reitz 
730c9b70d5SMax Reitz static void init_exports_table(void);
740c9b70d5SMax Reitz 
750c9b70d5SMax Reitz static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
768fc54f94SMax Reitz                              bool allow_other, Error **errp);
770c9b70d5SMax Reitz static void read_from_fuse_export(void *opaque);
780c9b70d5SMax Reitz 
790c9b70d5SMax Reitz static bool is_regular_file(const char *path, Error **errp);
800c9b70d5SMax Reitz 
810c9b70d5SMax Reitz 
fuse_export_drained_begin(void * opaque)8217b69c0fSStefan Hajnoczi static void fuse_export_drained_begin(void *opaque)
8317b69c0fSStefan Hajnoczi {
8417b69c0fSStefan Hajnoczi     FuseExport *exp = opaque;
8517b69c0fSStefan Hajnoczi 
8617b69c0fSStefan Hajnoczi     aio_set_fd_handler(exp->common.ctx,
87*60f782b6SStefan Hajnoczi                        fuse_session_fd(exp->fuse_session),
8817b69c0fSStefan Hajnoczi                        NULL, NULL, NULL, NULL, NULL);
8917b69c0fSStefan Hajnoczi     exp->fd_handler_set_up = false;
9017b69c0fSStefan Hajnoczi }
9117b69c0fSStefan Hajnoczi 
fuse_export_drained_end(void * opaque)9217b69c0fSStefan Hajnoczi static void fuse_export_drained_end(void *opaque)
9317b69c0fSStefan Hajnoczi {
9417b69c0fSStefan Hajnoczi     FuseExport *exp = opaque;
9517b69c0fSStefan Hajnoczi 
9617b69c0fSStefan Hajnoczi     /* Refresh AioContext in case it changed */
9717b69c0fSStefan Hajnoczi     exp->common.ctx = blk_get_aio_context(exp->common.blk);
9817b69c0fSStefan Hajnoczi 
9917b69c0fSStefan Hajnoczi     aio_set_fd_handler(exp->common.ctx,
100*60f782b6SStefan Hajnoczi                        fuse_session_fd(exp->fuse_session),
10117b69c0fSStefan Hajnoczi                        read_from_fuse_export, NULL, NULL, NULL, exp);
10217b69c0fSStefan Hajnoczi     exp->fd_handler_set_up = true;
10317b69c0fSStefan Hajnoczi }
10417b69c0fSStefan Hajnoczi 
fuse_export_drained_poll(void * opaque)10517b69c0fSStefan Hajnoczi static bool fuse_export_drained_poll(void *opaque)
10617b69c0fSStefan Hajnoczi {
10717b69c0fSStefan Hajnoczi     FuseExport *exp = opaque;
10817b69c0fSStefan Hajnoczi 
10917b69c0fSStefan Hajnoczi     return qatomic_read(&exp->in_flight) > 0;
11017b69c0fSStefan Hajnoczi }
11117b69c0fSStefan Hajnoczi 
11217b69c0fSStefan Hajnoczi static const BlockDevOps fuse_export_blk_dev_ops = {
11317b69c0fSStefan Hajnoczi     .drained_begin = fuse_export_drained_begin,
11417b69c0fSStefan Hajnoczi     .drained_end   = fuse_export_drained_end,
11517b69c0fSStefan Hajnoczi     .drained_poll  = fuse_export_drained_poll,
11617b69c0fSStefan Hajnoczi };
11717b69c0fSStefan Hajnoczi 
fuse_export_create(BlockExport * blk_exp,BlockExportOptions * blk_exp_args,Error ** errp)1180c9b70d5SMax Reitz static int fuse_export_create(BlockExport *blk_exp,
1190c9b70d5SMax Reitz                               BlockExportOptions *blk_exp_args,
1200c9b70d5SMax Reitz                               Error **errp)
1210c9b70d5SMax Reitz {
1220c9b70d5SMax Reitz     FuseExport *exp = container_of(blk_exp, FuseExport, common);
1230c9b70d5SMax Reitz     BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
1240c9b70d5SMax Reitz     int ret;
1250c9b70d5SMax Reitz 
1260c9b70d5SMax Reitz     assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
1270c9b70d5SMax Reitz 
1288cc5882cSEmanuele Giuseppe Esposito     /* For growable and writable exports, take the RESIZE permission */
1298cc5882cSEmanuele Giuseppe Esposito     if (args->growable || blk_exp_args->writable) {
1304fba06d5SMax Reitz         uint64_t blk_perm, blk_shared_perm;
1314fba06d5SMax Reitz 
1324fba06d5SMax Reitz         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
1334fba06d5SMax Reitz 
1344fba06d5SMax Reitz         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
1354fba06d5SMax Reitz                            blk_shared_perm, errp);
1364fba06d5SMax Reitz         if (ret < 0) {
1374fba06d5SMax Reitz             return ret;
1384fba06d5SMax Reitz         }
1394fba06d5SMax Reitz     }
1404fba06d5SMax Reitz 
14117b69c0fSStefan Hajnoczi     blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp);
14217b69c0fSStefan Hajnoczi 
14317b69c0fSStefan Hajnoczi     /*
14417b69c0fSStefan Hajnoczi      * We handle draining ourselves using an in-flight counter and by disabling
14517b69c0fSStefan Hajnoczi      * the FUSE fd handler. Do not queue BlockBackend requests, they need to
14617b69c0fSStefan Hajnoczi      * complete so the in-flight counter reaches zero.
14717b69c0fSStefan Hajnoczi      */
14817b69c0fSStefan Hajnoczi     blk_set_disable_request_queuing(exp->common.blk, true);
14917b69c0fSStefan Hajnoczi 
1500c9b70d5SMax Reitz     init_exports_table();
1510c9b70d5SMax Reitz 
1520c9b70d5SMax Reitz     /*
1530c9b70d5SMax Reitz      * It is important to do this check before calling is_regular_file() --
1540c9b70d5SMax Reitz      * that function will do a stat(), which we would have to handle if we
1550c9b70d5SMax Reitz      * already exported something on @mountpoint.  But we cannot, because
1560c9b70d5SMax Reitz      * we are currently caught up here.
1570c9b70d5SMax Reitz      * (Note that ideally we would want to resolve relative paths here,
1580c9b70d5SMax Reitz      * but bdrv_make_absolute_filename() might do the wrong thing for
1590c9b70d5SMax Reitz      * paths that contain colons, and realpath() would resolve symlinks,
1600c9b70d5SMax Reitz      * which we do not want: The mount point is not going to be the
1610c9b70d5SMax Reitz      * symlink's destination, but the link itself.)
1620c9b70d5SMax Reitz      * So this will not catch all potential clashes, but hopefully at
1630c9b70d5SMax Reitz      * least the most common one of specifying exactly the same path
1640c9b70d5SMax Reitz      * string twice.
1650c9b70d5SMax Reitz      */
1660c9b70d5SMax Reitz     if (g_hash_table_contains(exports, args->mountpoint)) {
1670c9b70d5SMax Reitz         error_setg(errp, "There already is a FUSE export on '%s'",
1680c9b70d5SMax Reitz                    args->mountpoint);
1690c9b70d5SMax Reitz         ret = -EEXIST;
1700c9b70d5SMax Reitz         goto fail;
1710c9b70d5SMax Reitz     }
1720c9b70d5SMax Reitz 
1730c9b70d5SMax Reitz     if (!is_regular_file(args->mountpoint, errp)) {
1740c9b70d5SMax Reitz         ret = -EINVAL;
1750c9b70d5SMax Reitz         goto fail;
1760c9b70d5SMax Reitz     }
1770c9b70d5SMax Reitz 
1780c9b70d5SMax Reitz     exp->mountpoint = g_strdup(args->mountpoint);
1790c9b70d5SMax Reitz     exp->writable = blk_exp_args->writable;
1804fba06d5SMax Reitz     exp->growable = args->growable;
1810c9b70d5SMax Reitz 
1828fc54f94SMax Reitz     /* set default */
1838fc54f94SMax Reitz     if (!args->has_allow_other) {
1848fc54f94SMax Reitz         args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
1858fc54f94SMax Reitz     }
1868fc54f94SMax Reitz 
1876aeeaed2SMax Reitz     exp->st_mode = S_IFREG | S_IRUSR;
1886aeeaed2SMax Reitz     if (exp->writable) {
1896aeeaed2SMax Reitz         exp->st_mode |= S_IWUSR;
1906aeeaed2SMax Reitz     }
1916aeeaed2SMax Reitz     exp->st_uid = getuid();
1926aeeaed2SMax Reitz     exp->st_gid = getgid();
1936aeeaed2SMax Reitz 
1948fc54f94SMax Reitz     if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
1958fc54f94SMax Reitz         /* Ignore errors on our first attempt */
1968fc54f94SMax Reitz         ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
1978fc54f94SMax Reitz         exp->allow_other = ret == 0;
1988fc54f94SMax Reitz         if (ret < 0) {
1998fc54f94SMax Reitz             ret = setup_fuse_export(exp, args->mountpoint, false, errp);
2008fc54f94SMax Reitz         }
2018fc54f94SMax Reitz     } else {
2028fc54f94SMax Reitz         exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
2038fc54f94SMax Reitz         ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
2048fc54f94SMax Reitz     }
2050c9b70d5SMax Reitz     if (ret < 0) {
2060c9b70d5SMax Reitz         goto fail;
2070c9b70d5SMax Reitz     }
2080c9b70d5SMax Reitz 
2090c9b70d5SMax Reitz     return 0;
2100c9b70d5SMax Reitz 
2110c9b70d5SMax Reitz fail:
2120c9b70d5SMax Reitz     fuse_export_delete(blk_exp);
2130c9b70d5SMax Reitz     return ret;
2140c9b70d5SMax Reitz }
2150c9b70d5SMax Reitz 
2160c9b70d5SMax Reitz /**
2170c9b70d5SMax Reitz  * Allocates the global @exports hash table.
2180c9b70d5SMax Reitz  */
init_exports_table(void)2190c9b70d5SMax Reitz static void init_exports_table(void)
2200c9b70d5SMax Reitz {
2210c9b70d5SMax Reitz     if (exports) {
2220c9b70d5SMax Reitz         return;
2230c9b70d5SMax Reitz     }
2240c9b70d5SMax Reitz 
2250c9b70d5SMax Reitz     exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
2260c9b70d5SMax Reitz }
2270c9b70d5SMax Reitz 
2280c9b70d5SMax Reitz /**
2290c9b70d5SMax Reitz  * Create exp->fuse_session and mount it.
2300c9b70d5SMax Reitz  */
setup_fuse_export(FuseExport * exp,const char * mountpoint,bool allow_other,Error ** errp)2310c9b70d5SMax Reitz static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
2328fc54f94SMax Reitz                              bool allow_other, Error **errp)
2330c9b70d5SMax Reitz {
2340c9b70d5SMax Reitz     const char *fuse_argv[4];
2350c9b70d5SMax Reitz     char *mount_opts;
2360c9b70d5SMax Reitz     struct fuse_args fuse_args;
2370c9b70d5SMax Reitz     int ret;
2380c9b70d5SMax Reitz 
2392c7dd057SMax Reitz     /*
2402c7dd057SMax Reitz      * max_read needs to match what fuse_init() sets.
2412c7dd057SMax Reitz      * max_write need not be supplied.
2422c7dd057SMax Reitz      */
2438fc54f94SMax Reitz     mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
2448fc54f94SMax Reitz                                  FUSE_MAX_BOUNCE_BYTES,
2458fc54f94SMax Reitz                                  allow_other ? ",allow_other" : "");
2460c9b70d5SMax Reitz 
2470c9b70d5SMax Reitz     fuse_argv[0] = ""; /* Dummy program name */
2480c9b70d5SMax Reitz     fuse_argv[1] = "-o";
2490c9b70d5SMax Reitz     fuse_argv[2] = mount_opts;
2500c9b70d5SMax Reitz     fuse_argv[3] = NULL;
2510c9b70d5SMax Reitz     fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
2520c9b70d5SMax Reitz 
2530c9b70d5SMax Reitz     exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
2540c9b70d5SMax Reitz                                          sizeof(fuse_ops), exp);
2550c9b70d5SMax Reitz     g_free(mount_opts);
2560c9b70d5SMax Reitz     if (!exp->fuse_session) {
2570c9b70d5SMax Reitz         error_setg(errp, "Failed to set up FUSE session");
2580c9b70d5SMax Reitz         ret = -EIO;
2590c9b70d5SMax Reitz         goto fail;
2600c9b70d5SMax Reitz     }
2610c9b70d5SMax Reitz 
2620c9b70d5SMax Reitz     ret = fuse_session_mount(exp->fuse_session, mountpoint);
2630c9b70d5SMax Reitz     if (ret < 0) {
2640c9b70d5SMax Reitz         error_setg(errp, "Failed to mount FUSE session to export");
2650c9b70d5SMax Reitz         ret = -EIO;
2660c9b70d5SMax Reitz         goto fail;
2670c9b70d5SMax Reitz     }
2680c9b70d5SMax Reitz     exp->mounted = true;
2690c9b70d5SMax Reitz 
2700c9b70d5SMax Reitz     g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
2710c9b70d5SMax Reitz 
2720c9b70d5SMax Reitz     aio_set_fd_handler(exp->common.ctx,
273*60f782b6SStefan Hajnoczi                        fuse_session_fd(exp->fuse_session),
274826cc324SStefan Hajnoczi                        read_from_fuse_export, NULL, NULL, NULL, exp);
2750c9b70d5SMax Reitz     exp->fd_handler_set_up = true;
2760c9b70d5SMax Reitz 
2770c9b70d5SMax Reitz     return 0;
2780c9b70d5SMax Reitz 
2790c9b70d5SMax Reitz fail:
2800c9b70d5SMax Reitz     fuse_export_shutdown(&exp->common);
2810c9b70d5SMax Reitz     return ret;
2820c9b70d5SMax Reitz }
2830c9b70d5SMax Reitz 
2840c9b70d5SMax Reitz /**
2850c9b70d5SMax Reitz  * Callback to be invoked when the FUSE session FD can be read from.
2860c9b70d5SMax Reitz  * (This is basically the FUSE event loop.)
2870c9b70d5SMax Reitz  */
read_from_fuse_export(void * opaque)2880c9b70d5SMax Reitz static void read_from_fuse_export(void *opaque)
2890c9b70d5SMax Reitz {
2900c9b70d5SMax Reitz     FuseExport *exp = opaque;
2910c9b70d5SMax Reitz     int ret;
2920c9b70d5SMax Reitz 
2930c9b70d5SMax Reitz     blk_exp_ref(&exp->common);
2940c9b70d5SMax Reitz 
29517b69c0fSStefan Hajnoczi     qatomic_inc(&exp->in_flight);
29617b69c0fSStefan Hajnoczi 
2970c9b70d5SMax Reitz     do {
2980c9b70d5SMax Reitz         ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
2990c9b70d5SMax Reitz     } while (ret == -EINTR);
3000c9b70d5SMax Reitz     if (ret < 0) {
3010c9b70d5SMax Reitz         goto out;
3020c9b70d5SMax Reitz     }
3030c9b70d5SMax Reitz 
3040c9b70d5SMax Reitz     fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
3050c9b70d5SMax Reitz 
3060c9b70d5SMax Reitz out:
30717b69c0fSStefan Hajnoczi     if (qatomic_fetch_dec(&exp->in_flight) == 1) {
30817b69c0fSStefan Hajnoczi         aio_wait_kick(); /* wake AIO_WAIT_WHILE() */
30917b69c0fSStefan Hajnoczi     }
31017b69c0fSStefan Hajnoczi 
3110c9b70d5SMax Reitz     blk_exp_unref(&exp->common);
3120c9b70d5SMax Reitz }
3130c9b70d5SMax Reitz 
fuse_export_shutdown(BlockExport * blk_exp)3140c9b70d5SMax Reitz static void fuse_export_shutdown(BlockExport *blk_exp)
3150c9b70d5SMax Reitz {
3160c9b70d5SMax Reitz     FuseExport *exp = container_of(blk_exp, FuseExport, common);
3170c9b70d5SMax Reitz 
3180c9b70d5SMax Reitz     if (exp->fuse_session) {
3190c9b70d5SMax Reitz         fuse_session_exit(exp->fuse_session);
3200c9b70d5SMax Reitz 
3210c9b70d5SMax Reitz         if (exp->fd_handler_set_up) {
3220c9b70d5SMax Reitz             aio_set_fd_handler(exp->common.ctx,
323*60f782b6SStefan Hajnoczi                                fuse_session_fd(exp->fuse_session),
324826cc324SStefan Hajnoczi                                NULL, NULL, NULL, NULL, NULL);
3250c9b70d5SMax Reitz             exp->fd_handler_set_up = false;
3260c9b70d5SMax Reitz         }
3270c9b70d5SMax Reitz     }
3280c9b70d5SMax Reitz 
3290c9b70d5SMax Reitz     if (exp->mountpoint) {
3300c9b70d5SMax Reitz         /*
3310c9b70d5SMax Reitz          * Safe to drop now, because we will not handle any requests
3320c9b70d5SMax Reitz          * for this export anymore anyway.
3330c9b70d5SMax Reitz          */
3340c9b70d5SMax Reitz         g_hash_table_remove(exports, exp->mountpoint);
3350c9b70d5SMax Reitz     }
3360c9b70d5SMax Reitz }
3370c9b70d5SMax Reitz 
fuse_export_delete(BlockExport * blk_exp)3380c9b70d5SMax Reitz static void fuse_export_delete(BlockExport *blk_exp)
3390c9b70d5SMax Reitz {
3400c9b70d5SMax Reitz     FuseExport *exp = container_of(blk_exp, FuseExport, common);
3410c9b70d5SMax Reitz 
3420c9b70d5SMax Reitz     if (exp->fuse_session) {
3430c9b70d5SMax Reitz         if (exp->mounted) {
3440c9b70d5SMax Reitz             fuse_session_unmount(exp->fuse_session);
3450c9b70d5SMax Reitz         }
3460c9b70d5SMax Reitz 
3470c9b70d5SMax Reitz         fuse_session_destroy(exp->fuse_session);
3480c9b70d5SMax Reitz     }
3490c9b70d5SMax Reitz 
3500c9b70d5SMax Reitz     free(exp->fuse_buf.mem);
3510c9b70d5SMax Reitz     g_free(exp->mountpoint);
3520c9b70d5SMax Reitz }
3530c9b70d5SMax Reitz 
3540c9b70d5SMax Reitz /**
3550c9b70d5SMax Reitz  * Check whether @path points to a regular file.  If not, put an
3560c9b70d5SMax Reitz  * appropriate message into *errp.
3570c9b70d5SMax Reitz  */
is_regular_file(const char * path,Error ** errp)3580c9b70d5SMax Reitz static bool is_regular_file(const char *path, Error **errp)
3590c9b70d5SMax Reitz {
3600c9b70d5SMax Reitz     struct stat statbuf;
3610c9b70d5SMax Reitz     int ret;
3620c9b70d5SMax Reitz 
3630c9b70d5SMax Reitz     ret = stat(path, &statbuf);
3640c9b70d5SMax Reitz     if (ret < 0) {
3650c9b70d5SMax Reitz         error_setg_errno(errp, errno, "Failed to stat '%s'", path);
3660c9b70d5SMax Reitz         return false;
3670c9b70d5SMax Reitz     }
3680c9b70d5SMax Reitz 
3690c9b70d5SMax Reitz     if (!S_ISREG(statbuf.st_mode)) {
3700c9b70d5SMax Reitz         error_setg(errp, "'%s' is not a regular file", path);
3710c9b70d5SMax Reitz         return false;
3720c9b70d5SMax Reitz     }
3730c9b70d5SMax Reitz 
3740c9b70d5SMax Reitz     return true;
3750c9b70d5SMax Reitz }
3760c9b70d5SMax Reitz 
3770c9b70d5SMax Reitz /**
3780c9b70d5SMax Reitz  * A chance to set change some parameters supplied to FUSE_INIT.
3790c9b70d5SMax Reitz  */
fuse_init(void * userdata,struct fuse_conn_info * conn)3800c9b70d5SMax Reitz static void fuse_init(void *userdata, struct fuse_conn_info *conn)
3810c9b70d5SMax Reitz {
3820c9b70d5SMax Reitz     /*
3830c9b70d5SMax Reitz      * MIN_NON_ZERO() would not be wrong here, but what we set here
3840c9b70d5SMax Reitz      * must equal what has been passed to fuse_session_new().
3850c9b70d5SMax Reitz      * Therefore, as long as max_read must be passed as a mount option
3860c9b70d5SMax Reitz      * (which libfuse claims will be changed at some point), we have
3870c9b70d5SMax Reitz      * to set max_read to a fixed value here.
3880c9b70d5SMax Reitz      */
3890c9b70d5SMax Reitz     conn->max_read = FUSE_MAX_BOUNCE_BYTES;
3900c9b70d5SMax Reitz 
3910c9b70d5SMax Reitz     conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
3920c9b70d5SMax Reitz }
3930c9b70d5SMax Reitz 
39441429e3dSMax Reitz /**
39541429e3dSMax Reitz  * Let clients look up files.  Always return ENOENT because we only
39641429e3dSMax Reitz  * care about the mountpoint itself.
39741429e3dSMax Reitz  */
fuse_lookup(fuse_req_t req,fuse_ino_t parent,const char * name)39841429e3dSMax Reitz static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
39941429e3dSMax Reitz {
40041429e3dSMax Reitz     fuse_reply_err(req, ENOENT);
40141429e3dSMax Reitz }
40241429e3dSMax Reitz 
40341429e3dSMax Reitz /**
40441429e3dSMax Reitz  * Let clients get file attributes (i.e., stat() the file).
40541429e3dSMax Reitz  */
fuse_getattr(fuse_req_t req,fuse_ino_t inode,struct fuse_file_info * fi)40641429e3dSMax Reitz static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
40741429e3dSMax Reitz                          struct fuse_file_info *fi)
40841429e3dSMax Reitz {
40941429e3dSMax Reitz     struct stat statbuf;
41041429e3dSMax Reitz     int64_t length, allocated_blocks;
41141429e3dSMax Reitz     time_t now = time(NULL);
41241429e3dSMax Reitz     FuseExport *exp = fuse_req_userdata(req);
41341429e3dSMax Reitz 
41441429e3dSMax Reitz     length = blk_getlength(exp->common.blk);
41541429e3dSMax Reitz     if (length < 0) {
41641429e3dSMax Reitz         fuse_reply_err(req, -length);
41741429e3dSMax Reitz         return;
41841429e3dSMax Reitz     }
41941429e3dSMax Reitz 
42041429e3dSMax Reitz     allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
42141429e3dSMax Reitz     if (allocated_blocks <= 0) {
42241429e3dSMax Reitz         allocated_blocks = DIV_ROUND_UP(length, 512);
42341429e3dSMax Reitz     } else {
42441429e3dSMax Reitz         allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
42541429e3dSMax Reitz     }
42641429e3dSMax Reitz 
42741429e3dSMax Reitz     statbuf = (struct stat) {
42841429e3dSMax Reitz         .st_ino     = inode,
4296aeeaed2SMax Reitz         .st_mode    = exp->st_mode,
43041429e3dSMax Reitz         .st_nlink   = 1,
4316aeeaed2SMax Reitz         .st_uid     = exp->st_uid,
4326aeeaed2SMax Reitz         .st_gid     = exp->st_gid,
43341429e3dSMax Reitz         .st_size    = length,
43441429e3dSMax Reitz         .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
43541429e3dSMax Reitz         .st_blocks  = allocated_blocks,
43641429e3dSMax Reitz         .st_atime   = now,
43741429e3dSMax Reitz         .st_mtime   = now,
43841429e3dSMax Reitz         .st_ctime   = now,
43941429e3dSMax Reitz     };
44041429e3dSMax Reitz 
44141429e3dSMax Reitz     fuse_reply_attr(req, &statbuf, 1.);
44241429e3dSMax Reitz }
44341429e3dSMax Reitz 
fuse_do_truncate(const FuseExport * exp,int64_t size,bool req_zero_write,PreallocMode prealloc)44441429e3dSMax Reitz static int fuse_do_truncate(const FuseExport *exp, int64_t size,
44541429e3dSMax Reitz                             bool req_zero_write, PreallocMode prealloc)
44641429e3dSMax Reitz {
44741429e3dSMax Reitz     uint64_t blk_perm, blk_shared_perm;
44841429e3dSMax Reitz     BdrvRequestFlags truncate_flags = 0;
4498cc5882cSEmanuele Giuseppe Esposito     bool add_resize_perm;
4508cc5882cSEmanuele Giuseppe Esposito     int ret, ret_check;
4518cc5882cSEmanuele Giuseppe Esposito 
4528cc5882cSEmanuele Giuseppe Esposito     /* Growable and writable exports have a permanent RESIZE permission */
4538cc5882cSEmanuele Giuseppe Esposito     add_resize_perm = !exp->growable && !exp->writable;
45441429e3dSMax Reitz 
45541429e3dSMax Reitz     if (req_zero_write) {
45641429e3dSMax Reitz         truncate_flags |= BDRV_REQ_ZERO_WRITE;
45741429e3dSMax Reitz     }
45841429e3dSMax Reitz 
4598cc5882cSEmanuele Giuseppe Esposito     if (add_resize_perm) {
4608cc5882cSEmanuele Giuseppe Esposito 
4618cc5882cSEmanuele Giuseppe Esposito         if (!qemu_in_main_thread()) {
4628cc5882cSEmanuele Giuseppe Esposito             /* Changing permissions like below only works in the main thread */
4638cc5882cSEmanuele Giuseppe Esposito             return -EPERM;
4648cc5882cSEmanuele Giuseppe Esposito         }
4658cc5882cSEmanuele Giuseppe Esposito 
46641429e3dSMax Reitz         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
46741429e3dSMax Reitz 
46841429e3dSMax Reitz         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
46941429e3dSMax Reitz                            blk_shared_perm, NULL);
47041429e3dSMax Reitz         if (ret < 0) {
47141429e3dSMax Reitz             return ret;
47241429e3dSMax Reitz         }
4734fba06d5SMax Reitz     }
47441429e3dSMax Reitz 
47541429e3dSMax Reitz     ret = blk_truncate(exp->common.blk, size, true, prealloc,
47641429e3dSMax Reitz                        truncate_flags, NULL);
47741429e3dSMax Reitz 
4788cc5882cSEmanuele Giuseppe Esposito     if (add_resize_perm) {
47941429e3dSMax Reitz         /* Must succeed, because we are only giving up the RESIZE permission */
4808cc5882cSEmanuele Giuseppe Esposito         ret_check = blk_set_perm(exp->common.blk, blk_perm,
4818cc5882cSEmanuele Giuseppe Esposito                                  blk_shared_perm, &error_abort);
4828cc5882cSEmanuele Giuseppe Esposito         assert(ret_check == 0);
4834fba06d5SMax Reitz     }
48441429e3dSMax Reitz 
48541429e3dSMax Reitz     return ret;
48641429e3dSMax Reitz }
48741429e3dSMax Reitz 
48841429e3dSMax Reitz /**
4896aeeaed2SMax Reitz  * Let clients set file attributes.  Only resizing and changing
4906aeeaed2SMax Reitz  * permissions (st_mode, st_uid, st_gid) is allowed.
4916aeeaed2SMax Reitz  * Changing permissions is only allowed as far as it will actually
4926aeeaed2SMax Reitz  * permit access: Read-only exports cannot be given +w, and exports
4936aeeaed2SMax Reitz  * without allow_other cannot be given a different UID or GID, and
4946aeeaed2SMax Reitz  * they cannot be given non-owner access.
49541429e3dSMax Reitz  */
fuse_setattr(fuse_req_t req,fuse_ino_t inode,struct stat * statbuf,int to_set,struct fuse_file_info * fi)49641429e3dSMax Reitz static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
49741429e3dSMax Reitz                          int to_set, struct fuse_file_info *fi)
49841429e3dSMax Reitz {
49941429e3dSMax Reitz     FuseExport *exp = fuse_req_userdata(req);
5006aeeaed2SMax Reitz     int supported_attrs;
50141429e3dSMax Reitz     int ret;
50241429e3dSMax Reitz 
5036aeeaed2SMax Reitz     supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
5046aeeaed2SMax Reitz     if (exp->allow_other) {
5056aeeaed2SMax Reitz         supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
5066aeeaed2SMax Reitz     }
5076aeeaed2SMax Reitz 
5086aeeaed2SMax Reitz     if (to_set & ~supported_attrs) {
5099bad96a8SMax Reitz         fuse_reply_err(req, ENOTSUP);
51041429e3dSMax Reitz         return;
51141429e3dSMax Reitz     }
51241429e3dSMax Reitz 
5136aeeaed2SMax Reitz     /* Do some argument checks first before committing to anything */
5146aeeaed2SMax Reitz     if (to_set & FUSE_SET_ATTR_MODE) {
5156aeeaed2SMax Reitz         /*
5166aeeaed2SMax Reitz          * Without allow_other, non-owners can never access the export, so do
5176aeeaed2SMax Reitz          * not allow setting permissions for them
5186aeeaed2SMax Reitz          */
5196aeeaed2SMax Reitz         if (!exp->allow_other &&
5206aeeaed2SMax Reitz             (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
5216aeeaed2SMax Reitz         {
5226aeeaed2SMax Reitz             fuse_reply_err(req, EPERM);
5236aeeaed2SMax Reitz             return;
5246aeeaed2SMax Reitz         }
5256aeeaed2SMax Reitz 
5266aeeaed2SMax Reitz         /* +w for read-only exports makes no sense, disallow it */
5276aeeaed2SMax Reitz         if (!exp->writable &&
5286aeeaed2SMax Reitz             (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
5296aeeaed2SMax Reitz         {
5306aeeaed2SMax Reitz             fuse_reply_err(req, EROFS);
5316aeeaed2SMax Reitz             return;
5326aeeaed2SMax Reitz         }
5336aeeaed2SMax Reitz     }
5346aeeaed2SMax Reitz 
5359bad96a8SMax Reitz     if (to_set & FUSE_SET_ATTR_SIZE) {
5369bad96a8SMax Reitz         if (!exp->writable) {
5379bad96a8SMax Reitz             fuse_reply_err(req, EACCES);
53841429e3dSMax Reitz             return;
53941429e3dSMax Reitz         }
54041429e3dSMax Reitz 
54141429e3dSMax Reitz         ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
54241429e3dSMax Reitz         if (ret < 0) {
54341429e3dSMax Reitz             fuse_reply_err(req, -ret);
54441429e3dSMax Reitz             return;
54541429e3dSMax Reitz         }
5469bad96a8SMax Reitz     }
54741429e3dSMax Reitz 
5486aeeaed2SMax Reitz     if (to_set & FUSE_SET_ATTR_MODE) {
5496aeeaed2SMax Reitz         /* Ignore FUSE-supplied file type, only change the mode */
5506aeeaed2SMax Reitz         exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
5516aeeaed2SMax Reitz     }
5526aeeaed2SMax Reitz 
5536aeeaed2SMax Reitz     if (to_set & FUSE_SET_ATTR_UID) {
5546aeeaed2SMax Reitz         exp->st_uid = statbuf->st_uid;
5556aeeaed2SMax Reitz     }
5566aeeaed2SMax Reitz 
5576aeeaed2SMax Reitz     if (to_set & FUSE_SET_ATTR_GID) {
5586aeeaed2SMax Reitz         exp->st_gid = statbuf->st_gid;
5596aeeaed2SMax Reitz     }
5606aeeaed2SMax Reitz 
56141429e3dSMax Reitz     fuse_getattr(req, inode, fi);
56241429e3dSMax Reitz }
56341429e3dSMax Reitz 
56441429e3dSMax Reitz /**
56541429e3dSMax Reitz  * Let clients open a file (i.e., the exported image).
56641429e3dSMax Reitz  */
fuse_open(fuse_req_t req,fuse_ino_t inode,struct fuse_file_info * fi)56741429e3dSMax Reitz static void fuse_open(fuse_req_t req, fuse_ino_t inode,
56841429e3dSMax Reitz                       struct fuse_file_info *fi)
56941429e3dSMax Reitz {
57041429e3dSMax Reitz     fuse_reply_open(req, fi);
57141429e3dSMax Reitz }
57241429e3dSMax Reitz 
57341429e3dSMax Reitz /**
57441429e3dSMax Reitz  * Handle client reads from the exported image.
57541429e3dSMax Reitz  */
fuse_read(fuse_req_t req,fuse_ino_t inode,size_t size,off_t offset,struct fuse_file_info * fi)57641429e3dSMax Reitz static void fuse_read(fuse_req_t req, fuse_ino_t inode,
57741429e3dSMax Reitz                       size_t size, off_t offset, struct fuse_file_info *fi)
57841429e3dSMax Reitz {
57941429e3dSMax Reitz     FuseExport *exp = fuse_req_userdata(req);
58041429e3dSMax Reitz     int64_t length;
58141429e3dSMax Reitz     void *buf;
58241429e3dSMax Reitz     int ret;
58341429e3dSMax Reitz 
58441429e3dSMax Reitz     /* Limited by max_read, should not happen */
58541429e3dSMax Reitz     if (size > FUSE_MAX_BOUNCE_BYTES) {
58641429e3dSMax Reitz         fuse_reply_err(req, EINVAL);
58741429e3dSMax Reitz         return;
58841429e3dSMax Reitz     }
58941429e3dSMax Reitz 
59041429e3dSMax Reitz     /**
59141429e3dSMax Reitz      * Clients will expect short reads at EOF, so we have to limit
59241429e3dSMax Reitz      * offset+size to the image length.
59341429e3dSMax Reitz      */
59441429e3dSMax Reitz     length = blk_getlength(exp->common.blk);
59541429e3dSMax Reitz     if (length < 0) {
59641429e3dSMax Reitz         fuse_reply_err(req, -length);
59741429e3dSMax Reitz         return;
59841429e3dSMax Reitz     }
59941429e3dSMax Reitz 
60041429e3dSMax Reitz     if (offset + size > length) {
60141429e3dSMax Reitz         size = length - offset;
60241429e3dSMax Reitz     }
60341429e3dSMax Reitz 
60441429e3dSMax Reitz     buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
60541429e3dSMax Reitz     if (!buf) {
60641429e3dSMax Reitz         fuse_reply_err(req, ENOMEM);
60741429e3dSMax Reitz         return;
60841429e3dSMax Reitz     }
60941429e3dSMax Reitz 
610a9262f55SAlberto Faria     ret = blk_pread(exp->common.blk, offset, size, buf, 0);
61141429e3dSMax Reitz     if (ret >= 0) {
61241429e3dSMax Reitz         fuse_reply_buf(req, buf, size);
61341429e3dSMax Reitz     } else {
61441429e3dSMax Reitz         fuse_reply_err(req, -ret);
61541429e3dSMax Reitz     }
61641429e3dSMax Reitz 
61741429e3dSMax Reitz     qemu_vfree(buf);
61841429e3dSMax Reitz }
61941429e3dSMax Reitz 
62041429e3dSMax Reitz /**
62141429e3dSMax Reitz  * Handle client writes to the exported image.
62241429e3dSMax Reitz  */
fuse_write(fuse_req_t req,fuse_ino_t inode,const char * buf,size_t size,off_t offset,struct fuse_file_info * fi)62341429e3dSMax Reitz static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
62441429e3dSMax Reitz                        size_t size, off_t offset, struct fuse_file_info *fi)
62541429e3dSMax Reitz {
62641429e3dSMax Reitz     FuseExport *exp = fuse_req_userdata(req);
62741429e3dSMax Reitz     int64_t length;
62841429e3dSMax Reitz     int ret;
62941429e3dSMax Reitz 
63041429e3dSMax Reitz     /* Limited by max_write, should not happen */
63141429e3dSMax Reitz     if (size > BDRV_REQUEST_MAX_BYTES) {
63241429e3dSMax Reitz         fuse_reply_err(req, EINVAL);
63341429e3dSMax Reitz         return;
63441429e3dSMax Reitz     }
63541429e3dSMax Reitz 
63641429e3dSMax Reitz     if (!exp->writable) {
63741429e3dSMax Reitz         fuse_reply_err(req, EACCES);
63841429e3dSMax Reitz         return;
63941429e3dSMax Reitz     }
64041429e3dSMax Reitz 
64141429e3dSMax Reitz     /**
64241429e3dSMax Reitz      * Clients will expect short writes at EOF, so we have to limit
64341429e3dSMax Reitz      * offset+size to the image length.
64441429e3dSMax Reitz      */
64541429e3dSMax Reitz     length = blk_getlength(exp->common.blk);
64641429e3dSMax Reitz     if (length < 0) {
64741429e3dSMax Reitz         fuse_reply_err(req, -length);
64841429e3dSMax Reitz         return;
64941429e3dSMax Reitz     }
65041429e3dSMax Reitz 
65141429e3dSMax Reitz     if (offset + size > length) {
6524fba06d5SMax Reitz         if (exp->growable) {
6534fba06d5SMax Reitz             ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
6544fba06d5SMax Reitz             if (ret < 0) {
6554fba06d5SMax Reitz                 fuse_reply_err(req, -ret);
6564fba06d5SMax Reitz                 return;
6574fba06d5SMax Reitz             }
6584fba06d5SMax Reitz         } else {
65941429e3dSMax Reitz             size = length - offset;
66041429e3dSMax Reitz         }
6614fba06d5SMax Reitz     }
66241429e3dSMax Reitz 
663a9262f55SAlberto Faria     ret = blk_pwrite(exp->common.blk, offset, size, buf, 0);
66441429e3dSMax Reitz     if (ret >= 0) {
66541429e3dSMax Reitz         fuse_reply_write(req, size);
66641429e3dSMax Reitz     } else {
66741429e3dSMax Reitz         fuse_reply_err(req, -ret);
66841429e3dSMax Reitz     }
66941429e3dSMax Reitz }
67041429e3dSMax Reitz 
67141429e3dSMax Reitz /**
6724ca37a96SMax Reitz  * Let clients perform various fallocate() operations.
6734ca37a96SMax Reitz  */
fuse_fallocate(fuse_req_t req,fuse_ino_t inode,int mode,off_t offset,off_t length,struct fuse_file_info * fi)6744ca37a96SMax Reitz static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
6754ca37a96SMax Reitz                            off_t offset, off_t length,
6764ca37a96SMax Reitz                            struct fuse_file_info *fi)
6774ca37a96SMax Reitz {
6784ca37a96SMax Reitz     FuseExport *exp = fuse_req_userdata(req);
6794ca37a96SMax Reitz     int64_t blk_len;
6804ca37a96SMax Reitz     int ret;
6814ca37a96SMax Reitz 
6824ca37a96SMax Reitz     if (!exp->writable) {
6834ca37a96SMax Reitz         fuse_reply_err(req, EACCES);
6844ca37a96SMax Reitz         return;
6854ca37a96SMax Reitz     }
6864ca37a96SMax Reitz 
6874ca37a96SMax Reitz     blk_len = blk_getlength(exp->common.blk);
6884ca37a96SMax Reitz     if (blk_len < 0) {
6894ca37a96SMax Reitz         fuse_reply_err(req, -blk_len);
6904ca37a96SMax Reitz         return;
6914ca37a96SMax Reitz     }
6924ca37a96SMax Reitz 
6933c9c7034SPhilippe Mathieu-Daudé #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
6944ca37a96SMax Reitz     if (mode & FALLOC_FL_KEEP_SIZE) {
6954ca37a96SMax Reitz         length = MIN(length, blk_len - offset);
6964ca37a96SMax Reitz     }
6973c9c7034SPhilippe Mathieu-Daudé #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
6984ca37a96SMax Reitz 
699ac504194SPhilippe Mathieu-Daudé     if (!mode) {
700ac504194SPhilippe Mathieu-Daudé         /* We can only fallocate at the EOF with a truncate */
701ac504194SPhilippe Mathieu-Daudé         if (offset < blk_len) {
702ac504194SPhilippe Mathieu-Daudé             fuse_reply_err(req, EOPNOTSUPP);
703ac504194SPhilippe Mathieu-Daudé             return;
704ac504194SPhilippe Mathieu-Daudé         }
705ac504194SPhilippe Mathieu-Daudé 
706ac504194SPhilippe Mathieu-Daudé         if (offset > blk_len) {
707ac504194SPhilippe Mathieu-Daudé             /* No preallocation needed here */
708ac504194SPhilippe Mathieu-Daudé             ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
709ac504194SPhilippe Mathieu-Daudé             if (ret < 0) {
710ac504194SPhilippe Mathieu-Daudé                 fuse_reply_err(req, -ret);
711ac504194SPhilippe Mathieu-Daudé                 return;
712ac504194SPhilippe Mathieu-Daudé             }
713ac504194SPhilippe Mathieu-Daudé         }
714ac504194SPhilippe Mathieu-Daudé 
715ac504194SPhilippe Mathieu-Daudé         ret = fuse_do_truncate(exp, offset + length, true,
716ac504194SPhilippe Mathieu-Daudé                                PREALLOC_MODE_FALLOC);
717ac504194SPhilippe Mathieu-Daudé     }
7183c9c7034SPhilippe Mathieu-Daudé #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
719ac504194SPhilippe Mathieu-Daudé     else if (mode & FALLOC_FL_PUNCH_HOLE) {
7204ca37a96SMax Reitz         if (!(mode & FALLOC_FL_KEEP_SIZE)) {
7214ca37a96SMax Reitz             fuse_reply_err(req, EINVAL);
7224ca37a96SMax Reitz             return;
7234ca37a96SMax Reitz         }
7244ca37a96SMax Reitz 
7254ca37a96SMax Reitz         do {
7264ca37a96SMax Reitz             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
7274ca37a96SMax Reitz 
7281703eb1cSHanna Czenczek             ret = blk_pwrite_zeroes(exp->common.blk, offset, size,
7291703eb1cSHanna Czenczek                                     BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
7301703eb1cSHanna Czenczek             if (ret == -ENOTSUP) {
7311703eb1cSHanna Czenczek                 /*
7321703eb1cSHanna Czenczek                  * fallocate() specifies to return EOPNOTSUPP for unsupported
7331703eb1cSHanna Czenczek                  * operations
7341703eb1cSHanna Czenczek                  */
7351703eb1cSHanna Czenczek                 ret = -EOPNOTSUPP;
7361703eb1cSHanna Czenczek             }
7371703eb1cSHanna Czenczek 
7384ca37a96SMax Reitz             offset += size;
7394ca37a96SMax Reitz             length -= size;
7404ca37a96SMax Reitz         } while (ret == 0 && length > 0);
74150482fdaSFabrice Fontaine     }
7423c9c7034SPhilippe Mathieu-Daudé #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
74350482fdaSFabrice Fontaine #ifdef CONFIG_FALLOCATE_ZERO_RANGE
74450482fdaSFabrice Fontaine     else if (mode & FALLOC_FL_ZERO_RANGE) {
7454ca37a96SMax Reitz         if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
7464ca37a96SMax Reitz             /* No need for zeroes, we are going to write them ourselves */
7474ca37a96SMax Reitz             ret = fuse_do_truncate(exp, offset + length, false,
7484ca37a96SMax Reitz                                    PREALLOC_MODE_OFF);
7494ca37a96SMax Reitz             if (ret < 0) {
7504ca37a96SMax Reitz                 fuse_reply_err(req, -ret);
7514ca37a96SMax Reitz                 return;
7524ca37a96SMax Reitz             }
7534ca37a96SMax Reitz         }
7544ca37a96SMax Reitz 
7554ca37a96SMax Reitz         do {
7564ca37a96SMax Reitz             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
7574ca37a96SMax Reitz 
7584ca37a96SMax Reitz             ret = blk_pwrite_zeroes(exp->common.blk,
7594ca37a96SMax Reitz                                     offset, size, 0);
7604ca37a96SMax Reitz             offset += size;
7614ca37a96SMax Reitz             length -= size;
7624ca37a96SMax Reitz         } while (ret == 0 && length > 0);
76350482fdaSFabrice Fontaine     }
76450482fdaSFabrice Fontaine #endif /* CONFIG_FALLOCATE_ZERO_RANGE */
765ac504194SPhilippe Mathieu-Daudé     else {
7664ca37a96SMax Reitz         ret = -EOPNOTSUPP;
7674ca37a96SMax Reitz     }
7684ca37a96SMax Reitz 
7694ca37a96SMax Reitz     fuse_reply_err(req, ret < 0 ? -ret : 0);
7704ca37a96SMax Reitz }
7714ca37a96SMax Reitz 
7724ca37a96SMax Reitz /**
77341429e3dSMax Reitz  * Let clients fsync the exported image.
77441429e3dSMax Reitz  */
fuse_fsync(fuse_req_t req,fuse_ino_t inode,int datasync,struct fuse_file_info * fi)77541429e3dSMax Reitz static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
77641429e3dSMax Reitz                        struct fuse_file_info *fi)
77741429e3dSMax Reitz {
77841429e3dSMax Reitz     FuseExport *exp = fuse_req_userdata(req);
77941429e3dSMax Reitz     int ret;
78041429e3dSMax Reitz 
78141429e3dSMax Reitz     ret = blk_flush(exp->common.blk);
78241429e3dSMax Reitz     fuse_reply_err(req, ret < 0 ? -ret : 0);
78341429e3dSMax Reitz }
78441429e3dSMax Reitz 
78541429e3dSMax Reitz /**
78641429e3dSMax Reitz  * Called before an FD to the exported image is closed.  (libfuse
78741429e3dSMax Reitz  * notes this to be a way to return last-minute errors.)
78841429e3dSMax Reitz  */
fuse_flush(fuse_req_t req,fuse_ino_t inode,struct fuse_file_info * fi)78941429e3dSMax Reitz static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
79041429e3dSMax Reitz                         struct fuse_file_info *fi)
79141429e3dSMax Reitz {
79241429e3dSMax Reitz     fuse_fsync(req, inode, 1, fi);
79341429e3dSMax Reitz }
79441429e3dSMax Reitz 
795df4ea709SMax Reitz #ifdef CONFIG_FUSE_LSEEK
796df4ea709SMax Reitz /**
797df4ea709SMax Reitz  * Let clients inquire allocation status.
798df4ea709SMax Reitz  */
fuse_lseek(fuse_req_t req,fuse_ino_t inode,off_t offset,int whence,struct fuse_file_info * fi)799df4ea709SMax Reitz static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
800df4ea709SMax Reitz                        int whence, struct fuse_file_info *fi)
801df4ea709SMax Reitz {
802df4ea709SMax Reitz     FuseExport *exp = fuse_req_userdata(req);
803df4ea709SMax Reitz 
804df4ea709SMax Reitz     if (whence != SEEK_HOLE && whence != SEEK_DATA) {
805df4ea709SMax Reitz         fuse_reply_err(req, EINVAL);
806df4ea709SMax Reitz         return;
807df4ea709SMax Reitz     }
808df4ea709SMax Reitz 
809df4ea709SMax Reitz     while (true) {
810df4ea709SMax Reitz         int64_t pnum;
811df4ea709SMax Reitz         int ret;
812df4ea709SMax Reitz 
813df4ea709SMax Reitz         ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
814df4ea709SMax Reitz                                       offset, INT64_MAX, &pnum, NULL, NULL);
815df4ea709SMax Reitz         if (ret < 0) {
816df4ea709SMax Reitz             fuse_reply_err(req, -ret);
817df4ea709SMax Reitz             return;
818df4ea709SMax Reitz         }
819df4ea709SMax Reitz 
820df4ea709SMax Reitz         if (!pnum && (ret & BDRV_BLOCK_EOF)) {
821df4ea709SMax Reitz             int64_t blk_len;
822df4ea709SMax Reitz 
823df4ea709SMax Reitz             /*
824df4ea709SMax Reitz              * If blk_getlength() rounds (e.g. by sectors), then the
825df4ea709SMax Reitz              * export length will be rounded, too.  However,
826df4ea709SMax Reitz              * bdrv_block_status_above() may return EOF at unaligned
827df4ea709SMax Reitz              * offsets.  We must not let this become visible and thus
828df4ea709SMax Reitz              * always simulate a hole between @offset (the real EOF)
829df4ea709SMax Reitz              * and @blk_len (the client-visible EOF).
830df4ea709SMax Reitz              */
831df4ea709SMax Reitz 
832df4ea709SMax Reitz             blk_len = blk_getlength(exp->common.blk);
833df4ea709SMax Reitz             if (blk_len < 0) {
834df4ea709SMax Reitz                 fuse_reply_err(req, -blk_len);
835df4ea709SMax Reitz                 return;
836df4ea709SMax Reitz             }
837df4ea709SMax Reitz 
838df4ea709SMax Reitz             if (offset > blk_len || whence == SEEK_DATA) {
839df4ea709SMax Reitz                 fuse_reply_err(req, ENXIO);
840df4ea709SMax Reitz             } else {
841df4ea709SMax Reitz                 fuse_reply_lseek(req, offset);
842df4ea709SMax Reitz             }
843df4ea709SMax Reitz             return;
844df4ea709SMax Reitz         }
845df4ea709SMax Reitz 
846df4ea709SMax Reitz         if (ret & BDRV_BLOCK_DATA) {
847df4ea709SMax Reitz             if (whence == SEEK_DATA) {
848df4ea709SMax Reitz                 fuse_reply_lseek(req, offset);
849df4ea709SMax Reitz                 return;
850df4ea709SMax Reitz             }
851df4ea709SMax Reitz         } else {
852df4ea709SMax Reitz             if (whence == SEEK_HOLE) {
853df4ea709SMax Reitz                 fuse_reply_lseek(req, offset);
854df4ea709SMax Reitz                 return;
855df4ea709SMax Reitz             }
856df4ea709SMax Reitz         }
857df4ea709SMax Reitz 
858df4ea709SMax Reitz         /* Safety check against infinite loops */
859df4ea709SMax Reitz         if (!pnum) {
860df4ea709SMax Reitz             fuse_reply_err(req, ENXIO);
861df4ea709SMax Reitz             return;
862df4ea709SMax Reitz         }
863df4ea709SMax Reitz 
864df4ea709SMax Reitz         offset += pnum;
865df4ea709SMax Reitz     }
866df4ea709SMax Reitz }
867df4ea709SMax Reitz #endif
868df4ea709SMax Reitz 
8690c9b70d5SMax Reitz static const struct fuse_lowlevel_ops fuse_ops = {
8700c9b70d5SMax Reitz     .init       = fuse_init,
87141429e3dSMax Reitz     .lookup     = fuse_lookup,
87241429e3dSMax Reitz     .getattr    = fuse_getattr,
87341429e3dSMax Reitz     .setattr    = fuse_setattr,
87441429e3dSMax Reitz     .open       = fuse_open,
87541429e3dSMax Reitz     .read       = fuse_read,
87641429e3dSMax Reitz     .write      = fuse_write,
8774ca37a96SMax Reitz     .fallocate  = fuse_fallocate,
87841429e3dSMax Reitz     .flush      = fuse_flush,
87941429e3dSMax Reitz     .fsync      = fuse_fsync,
880df4ea709SMax Reitz #ifdef CONFIG_FUSE_LSEEK
881df4ea709SMax Reitz     .lseek      = fuse_lseek,
882df4ea709SMax Reitz #endif
8830c9b70d5SMax Reitz };
8840c9b70d5SMax Reitz 
8850c9b70d5SMax Reitz const BlockExportDriver blk_exp_fuse = {
8860c9b70d5SMax Reitz     .type               = BLOCK_EXPORT_TYPE_FUSE,
8870c9b70d5SMax Reitz     .instance_size      = sizeof(FuseExport),
8880c9b70d5SMax Reitz     .create             = fuse_export_create,
8890c9b70d5SMax Reitz     .delete             = fuse_export_delete,
8900c9b70d5SMax Reitz     .request_shutdown   = fuse_export_shutdown,
8910c9b70d5SMax Reitz };
892