xref: /openbmc/qemu/block/export/fuse.c (revision d5657258)
1 /*
2  * Present a block device as a raw image through FUSE
3  *
4  * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; under version 2 or later of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #define FUSE_USE_VERSION 31
20 
21 #include "qemu/osdep.h"
22 #include "qemu/memalign.h"
23 #include "block/aio.h"
24 #include "block/block_int-common.h"
25 #include "block/export.h"
26 #include "block/fuse.h"
27 #include "block/qapi.h"
28 #include "qapi/error.h"
29 #include "qapi/qapi-commands-block.h"
30 #include "qemu/main-loop.h"
31 #include "sysemu/block-backend.h"
32 
33 #include <fuse.h>
34 #include <fuse_lowlevel.h>
35 
36 #if defined(CONFIG_FALLOCATE_ZERO_RANGE)
37 #include <linux/falloc.h>
38 #endif
39 
40 #ifdef __linux__
41 #include <linux/fs.h>
42 #endif
43 
44 /* Prevent overly long bounce buffer allocations */
45 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
46 
47 
48 typedef struct FuseExport {
49     BlockExport common;
50 
51     struct fuse_session *fuse_session;
52     struct fuse_buf fuse_buf;
53     bool mounted, fd_handler_set_up;
54 
55     char *mountpoint;
56     bool writable;
57     bool growable;
58     /* Whether allow_other was used as a mount option or not */
59     bool allow_other;
60 
61     mode_t st_mode;
62     uid_t st_uid;
63     gid_t st_gid;
64 } FuseExport;
65 
66 static GHashTable *exports;
67 static const struct fuse_lowlevel_ops fuse_ops;
68 
69 static void fuse_export_shutdown(BlockExport *exp);
70 static void fuse_export_delete(BlockExport *exp);
71 
72 static void init_exports_table(void);
73 
74 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
75                              bool allow_other, Error **errp);
76 static void read_from_fuse_export(void *opaque);
77 
78 static bool is_regular_file(const char *path, Error **errp);
79 
80 
81 static int fuse_export_create(BlockExport *blk_exp,
82                               BlockExportOptions *blk_exp_args,
83                               Error **errp)
84 {
85     FuseExport *exp = container_of(blk_exp, FuseExport, common);
86     BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
87     int ret;
88 
89     assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
90 
91     /* For growable and writable exports, take the RESIZE permission */
92     if (args->growable || blk_exp_args->writable) {
93         uint64_t blk_perm, blk_shared_perm;
94 
95         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
96 
97         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
98                            blk_shared_perm, errp);
99         if (ret < 0) {
100             return ret;
101         }
102     }
103 
104     init_exports_table();
105 
106     /*
107      * It is important to do this check before calling is_regular_file() --
108      * that function will do a stat(), which we would have to handle if we
109      * already exported something on @mountpoint.  But we cannot, because
110      * we are currently caught up here.
111      * (Note that ideally we would want to resolve relative paths here,
112      * but bdrv_make_absolute_filename() might do the wrong thing for
113      * paths that contain colons, and realpath() would resolve symlinks,
114      * which we do not want: The mount point is not going to be the
115      * symlink's destination, but the link itself.)
116      * So this will not catch all potential clashes, but hopefully at
117      * least the most common one of specifying exactly the same path
118      * string twice.
119      */
120     if (g_hash_table_contains(exports, args->mountpoint)) {
121         error_setg(errp, "There already is a FUSE export on '%s'",
122                    args->mountpoint);
123         ret = -EEXIST;
124         goto fail;
125     }
126 
127     if (!is_regular_file(args->mountpoint, errp)) {
128         ret = -EINVAL;
129         goto fail;
130     }
131 
132     exp->mountpoint = g_strdup(args->mountpoint);
133     exp->writable = blk_exp_args->writable;
134     exp->growable = args->growable;
135 
136     /* set default */
137     if (!args->has_allow_other) {
138         args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
139     }
140 
141     exp->st_mode = S_IFREG | S_IRUSR;
142     if (exp->writable) {
143         exp->st_mode |= S_IWUSR;
144     }
145     exp->st_uid = getuid();
146     exp->st_gid = getgid();
147 
148     if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
149         /* Ignore errors on our first attempt */
150         ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
151         exp->allow_other = ret == 0;
152         if (ret < 0) {
153             ret = setup_fuse_export(exp, args->mountpoint, false, errp);
154         }
155     } else {
156         exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
157         ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
158     }
159     if (ret < 0) {
160         goto fail;
161     }
162 
163     return 0;
164 
165 fail:
166     fuse_export_delete(blk_exp);
167     return ret;
168 }
169 
170 /**
171  * Allocates the global @exports hash table.
172  */
173 static void init_exports_table(void)
174 {
175     if (exports) {
176         return;
177     }
178 
179     exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
180 }
181 
182 /**
183  * Create exp->fuse_session and mount it.
184  */
185 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
186                              bool allow_other, Error **errp)
187 {
188     const char *fuse_argv[4];
189     char *mount_opts;
190     struct fuse_args fuse_args;
191     int ret;
192 
193     /*
194      * max_read needs to match what fuse_init() sets.
195      * max_write need not be supplied.
196      */
197     mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
198                                  FUSE_MAX_BOUNCE_BYTES,
199                                  allow_other ? ",allow_other" : "");
200 
201     fuse_argv[0] = ""; /* Dummy program name */
202     fuse_argv[1] = "-o";
203     fuse_argv[2] = mount_opts;
204     fuse_argv[3] = NULL;
205     fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
206 
207     exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
208                                          sizeof(fuse_ops), exp);
209     g_free(mount_opts);
210     if (!exp->fuse_session) {
211         error_setg(errp, "Failed to set up FUSE session");
212         ret = -EIO;
213         goto fail;
214     }
215 
216     ret = fuse_session_mount(exp->fuse_session, mountpoint);
217     if (ret < 0) {
218         error_setg(errp, "Failed to mount FUSE session to export");
219         ret = -EIO;
220         goto fail;
221     }
222     exp->mounted = true;
223 
224     g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
225 
226     aio_set_fd_handler(exp->common.ctx,
227                        fuse_session_fd(exp->fuse_session), true,
228                        read_from_fuse_export, NULL, NULL, NULL, exp);
229     exp->fd_handler_set_up = true;
230 
231     return 0;
232 
233 fail:
234     fuse_export_shutdown(&exp->common);
235     return ret;
236 }
237 
238 /**
239  * Callback to be invoked when the FUSE session FD can be read from.
240  * (This is basically the FUSE event loop.)
241  */
242 static void read_from_fuse_export(void *opaque)
243 {
244     FuseExport *exp = opaque;
245     int ret;
246 
247     blk_exp_ref(&exp->common);
248 
249     do {
250         ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
251     } while (ret == -EINTR);
252     if (ret < 0) {
253         goto out;
254     }
255 
256     fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
257 
258 out:
259     blk_exp_unref(&exp->common);
260 }
261 
262 static void fuse_export_shutdown(BlockExport *blk_exp)
263 {
264     FuseExport *exp = container_of(blk_exp, FuseExport, common);
265 
266     if (exp->fuse_session) {
267         fuse_session_exit(exp->fuse_session);
268 
269         if (exp->fd_handler_set_up) {
270             aio_set_fd_handler(exp->common.ctx,
271                                fuse_session_fd(exp->fuse_session), true,
272                                NULL, NULL, NULL, NULL, NULL);
273             exp->fd_handler_set_up = false;
274         }
275     }
276 
277     if (exp->mountpoint) {
278         /*
279          * Safe to drop now, because we will not handle any requests
280          * for this export anymore anyway.
281          */
282         g_hash_table_remove(exports, exp->mountpoint);
283     }
284 }
285 
286 static void fuse_export_delete(BlockExport *blk_exp)
287 {
288     FuseExport *exp = container_of(blk_exp, FuseExport, common);
289 
290     if (exp->fuse_session) {
291         if (exp->mounted) {
292             fuse_session_unmount(exp->fuse_session);
293         }
294 
295         fuse_session_destroy(exp->fuse_session);
296     }
297 
298     free(exp->fuse_buf.mem);
299     g_free(exp->mountpoint);
300 }
301 
302 /**
303  * Check whether @path points to a regular file.  If not, put an
304  * appropriate message into *errp.
305  */
306 static bool is_regular_file(const char *path, Error **errp)
307 {
308     struct stat statbuf;
309     int ret;
310 
311     ret = stat(path, &statbuf);
312     if (ret < 0) {
313         error_setg_errno(errp, errno, "Failed to stat '%s'", path);
314         return false;
315     }
316 
317     if (!S_ISREG(statbuf.st_mode)) {
318         error_setg(errp, "'%s' is not a regular file", path);
319         return false;
320     }
321 
322     return true;
323 }
324 
325 /**
326  * A chance to set change some parameters supplied to FUSE_INIT.
327  */
328 static void fuse_init(void *userdata, struct fuse_conn_info *conn)
329 {
330     /*
331      * MIN_NON_ZERO() would not be wrong here, but what we set here
332      * must equal what has been passed to fuse_session_new().
333      * Therefore, as long as max_read must be passed as a mount option
334      * (which libfuse claims will be changed at some point), we have
335      * to set max_read to a fixed value here.
336      */
337     conn->max_read = FUSE_MAX_BOUNCE_BYTES;
338 
339     conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
340 }
341 
342 /**
343  * Let clients look up files.  Always return ENOENT because we only
344  * care about the mountpoint itself.
345  */
346 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
347 {
348     fuse_reply_err(req, ENOENT);
349 }
350 
351 /**
352  * Let clients get file attributes (i.e., stat() the file).
353  */
354 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
355                          struct fuse_file_info *fi)
356 {
357     struct stat statbuf;
358     int64_t length, allocated_blocks;
359     time_t now = time(NULL);
360     FuseExport *exp = fuse_req_userdata(req);
361 
362     length = blk_getlength(exp->common.blk);
363     if (length < 0) {
364         fuse_reply_err(req, -length);
365         return;
366     }
367 
368     allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
369     if (allocated_blocks <= 0) {
370         allocated_blocks = DIV_ROUND_UP(length, 512);
371     } else {
372         allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
373     }
374 
375     statbuf = (struct stat) {
376         .st_ino     = inode,
377         .st_mode    = exp->st_mode,
378         .st_nlink   = 1,
379         .st_uid     = exp->st_uid,
380         .st_gid     = exp->st_gid,
381         .st_size    = length,
382         .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
383         .st_blocks  = allocated_blocks,
384         .st_atime   = now,
385         .st_mtime   = now,
386         .st_ctime   = now,
387     };
388 
389     fuse_reply_attr(req, &statbuf, 1.);
390 }
391 
392 static int fuse_do_truncate(const FuseExport *exp, int64_t size,
393                             bool req_zero_write, PreallocMode prealloc)
394 {
395     uint64_t blk_perm, blk_shared_perm;
396     BdrvRequestFlags truncate_flags = 0;
397     bool add_resize_perm;
398     int ret, ret_check;
399 
400     /* Growable and writable exports have a permanent RESIZE permission */
401     add_resize_perm = !exp->growable && !exp->writable;
402 
403     if (req_zero_write) {
404         truncate_flags |= BDRV_REQ_ZERO_WRITE;
405     }
406 
407     if (add_resize_perm) {
408 
409         if (!qemu_in_main_thread()) {
410             /* Changing permissions like below only works in the main thread */
411             return -EPERM;
412         }
413 
414         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
415 
416         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
417                            blk_shared_perm, NULL);
418         if (ret < 0) {
419             return ret;
420         }
421     }
422 
423     ret = blk_truncate(exp->common.blk, size, true, prealloc,
424                        truncate_flags, NULL);
425 
426     if (add_resize_perm) {
427         /* Must succeed, because we are only giving up the RESIZE permission */
428         ret_check = blk_set_perm(exp->common.blk, blk_perm,
429                                  blk_shared_perm, &error_abort);
430         assert(ret_check == 0);
431     }
432 
433     return ret;
434 }
435 
436 /**
437  * Let clients set file attributes.  Only resizing and changing
438  * permissions (st_mode, st_uid, st_gid) is allowed.
439  * Changing permissions is only allowed as far as it will actually
440  * permit access: Read-only exports cannot be given +w, and exports
441  * without allow_other cannot be given a different UID or GID, and
442  * they cannot be given non-owner access.
443  */
444 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
445                          int to_set, struct fuse_file_info *fi)
446 {
447     FuseExport *exp = fuse_req_userdata(req);
448     int supported_attrs;
449     int ret;
450 
451     supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
452     if (exp->allow_other) {
453         supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
454     }
455 
456     if (to_set & ~supported_attrs) {
457         fuse_reply_err(req, ENOTSUP);
458         return;
459     }
460 
461     /* Do some argument checks first before committing to anything */
462     if (to_set & FUSE_SET_ATTR_MODE) {
463         /*
464          * Without allow_other, non-owners can never access the export, so do
465          * not allow setting permissions for them
466          */
467         if (!exp->allow_other &&
468             (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
469         {
470             fuse_reply_err(req, EPERM);
471             return;
472         }
473 
474         /* +w for read-only exports makes no sense, disallow it */
475         if (!exp->writable &&
476             (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
477         {
478             fuse_reply_err(req, EROFS);
479             return;
480         }
481     }
482 
483     if (to_set & FUSE_SET_ATTR_SIZE) {
484         if (!exp->writable) {
485             fuse_reply_err(req, EACCES);
486             return;
487         }
488 
489         ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
490         if (ret < 0) {
491             fuse_reply_err(req, -ret);
492             return;
493         }
494     }
495 
496     if (to_set & FUSE_SET_ATTR_MODE) {
497         /* Ignore FUSE-supplied file type, only change the mode */
498         exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
499     }
500 
501     if (to_set & FUSE_SET_ATTR_UID) {
502         exp->st_uid = statbuf->st_uid;
503     }
504 
505     if (to_set & FUSE_SET_ATTR_GID) {
506         exp->st_gid = statbuf->st_gid;
507     }
508 
509     fuse_getattr(req, inode, fi);
510 }
511 
512 /**
513  * Let clients open a file (i.e., the exported image).
514  */
515 static void fuse_open(fuse_req_t req, fuse_ino_t inode,
516                       struct fuse_file_info *fi)
517 {
518     fuse_reply_open(req, fi);
519 }
520 
521 /**
522  * Handle client reads from the exported image.
523  */
524 static void fuse_read(fuse_req_t req, fuse_ino_t inode,
525                       size_t size, off_t offset, struct fuse_file_info *fi)
526 {
527     FuseExport *exp = fuse_req_userdata(req);
528     int64_t length;
529     void *buf;
530     int ret;
531 
532     /* Limited by max_read, should not happen */
533     if (size > FUSE_MAX_BOUNCE_BYTES) {
534         fuse_reply_err(req, EINVAL);
535         return;
536     }
537 
538     /**
539      * Clients will expect short reads at EOF, so we have to limit
540      * offset+size to the image length.
541      */
542     length = blk_getlength(exp->common.blk);
543     if (length < 0) {
544         fuse_reply_err(req, -length);
545         return;
546     }
547 
548     if (offset + size > length) {
549         size = length - offset;
550     }
551 
552     buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
553     if (!buf) {
554         fuse_reply_err(req, ENOMEM);
555         return;
556     }
557 
558     ret = blk_pread(exp->common.blk, offset, size, buf, 0);
559     if (ret >= 0) {
560         fuse_reply_buf(req, buf, size);
561     } else {
562         fuse_reply_err(req, -ret);
563     }
564 
565     qemu_vfree(buf);
566 }
567 
568 /**
569  * Handle client writes to the exported image.
570  */
571 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
572                        size_t size, off_t offset, struct fuse_file_info *fi)
573 {
574     FuseExport *exp = fuse_req_userdata(req);
575     int64_t length;
576     int ret;
577 
578     /* Limited by max_write, should not happen */
579     if (size > BDRV_REQUEST_MAX_BYTES) {
580         fuse_reply_err(req, EINVAL);
581         return;
582     }
583 
584     if (!exp->writable) {
585         fuse_reply_err(req, EACCES);
586         return;
587     }
588 
589     /**
590      * Clients will expect short writes at EOF, so we have to limit
591      * offset+size to the image length.
592      */
593     length = blk_getlength(exp->common.blk);
594     if (length < 0) {
595         fuse_reply_err(req, -length);
596         return;
597     }
598 
599     if (offset + size > length) {
600         if (exp->growable) {
601             ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
602             if (ret < 0) {
603                 fuse_reply_err(req, -ret);
604                 return;
605             }
606         } else {
607             size = length - offset;
608         }
609     }
610 
611     ret = blk_pwrite(exp->common.blk, offset, size, buf, 0);
612     if (ret >= 0) {
613         fuse_reply_write(req, size);
614     } else {
615         fuse_reply_err(req, -ret);
616     }
617 }
618 
619 /**
620  * Let clients perform various fallocate() operations.
621  */
622 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
623                            off_t offset, off_t length,
624                            struct fuse_file_info *fi)
625 {
626     FuseExport *exp = fuse_req_userdata(req);
627     int64_t blk_len;
628     int ret;
629 
630     if (!exp->writable) {
631         fuse_reply_err(req, EACCES);
632         return;
633     }
634 
635     blk_len = blk_getlength(exp->common.blk);
636     if (blk_len < 0) {
637         fuse_reply_err(req, -blk_len);
638         return;
639     }
640 
641 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
642     if (mode & FALLOC_FL_KEEP_SIZE) {
643         length = MIN(length, blk_len - offset);
644     }
645 #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
646 
647     if (!mode) {
648         /* We can only fallocate at the EOF with a truncate */
649         if (offset < blk_len) {
650             fuse_reply_err(req, EOPNOTSUPP);
651             return;
652         }
653 
654         if (offset > blk_len) {
655             /* No preallocation needed here */
656             ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
657             if (ret < 0) {
658                 fuse_reply_err(req, -ret);
659                 return;
660             }
661         }
662 
663         ret = fuse_do_truncate(exp, offset + length, true,
664                                PREALLOC_MODE_FALLOC);
665     }
666 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
667     else if (mode & FALLOC_FL_PUNCH_HOLE) {
668         if (!(mode & FALLOC_FL_KEEP_SIZE)) {
669             fuse_reply_err(req, EINVAL);
670             return;
671         }
672 
673         do {
674             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
675 
676             ret = blk_pwrite_zeroes(exp->common.blk, offset, size,
677                                     BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
678             if (ret == -ENOTSUP) {
679                 /*
680                  * fallocate() specifies to return EOPNOTSUPP for unsupported
681                  * operations
682                  */
683                 ret = -EOPNOTSUPP;
684             }
685 
686             offset += size;
687             length -= size;
688         } while (ret == 0 && length > 0);
689     }
690 #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
691 #ifdef CONFIG_FALLOCATE_ZERO_RANGE
692     else if (mode & FALLOC_FL_ZERO_RANGE) {
693         if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
694             /* No need for zeroes, we are going to write them ourselves */
695             ret = fuse_do_truncate(exp, offset + length, false,
696                                    PREALLOC_MODE_OFF);
697             if (ret < 0) {
698                 fuse_reply_err(req, -ret);
699                 return;
700             }
701         }
702 
703         do {
704             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
705 
706             ret = blk_pwrite_zeroes(exp->common.blk,
707                                     offset, size, 0);
708             offset += size;
709             length -= size;
710         } while (ret == 0 && length > 0);
711     }
712 #endif /* CONFIG_FALLOCATE_ZERO_RANGE */
713     else {
714         ret = -EOPNOTSUPP;
715     }
716 
717     fuse_reply_err(req, ret < 0 ? -ret : 0);
718 }
719 
720 /**
721  * Let clients fsync the exported image.
722  */
723 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
724                        struct fuse_file_info *fi)
725 {
726     FuseExport *exp = fuse_req_userdata(req);
727     int ret;
728 
729     ret = blk_flush(exp->common.blk);
730     fuse_reply_err(req, ret < 0 ? -ret : 0);
731 }
732 
733 /**
734  * Called before an FD to the exported image is closed.  (libfuse
735  * notes this to be a way to return last-minute errors.)
736  */
737 static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
738                         struct fuse_file_info *fi)
739 {
740     fuse_fsync(req, inode, 1, fi);
741 }
742 
743 #ifdef CONFIG_FUSE_LSEEK
744 /**
745  * Let clients inquire allocation status.
746  */
747 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
748                        int whence, struct fuse_file_info *fi)
749 {
750     FuseExport *exp = fuse_req_userdata(req);
751 
752     if (whence != SEEK_HOLE && whence != SEEK_DATA) {
753         fuse_reply_err(req, EINVAL);
754         return;
755     }
756 
757     while (true) {
758         int64_t pnum;
759         int ret;
760 
761         ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
762                                       offset, INT64_MAX, &pnum, NULL, NULL);
763         if (ret < 0) {
764             fuse_reply_err(req, -ret);
765             return;
766         }
767 
768         if (!pnum && (ret & BDRV_BLOCK_EOF)) {
769             int64_t blk_len;
770 
771             /*
772              * If blk_getlength() rounds (e.g. by sectors), then the
773              * export length will be rounded, too.  However,
774              * bdrv_block_status_above() may return EOF at unaligned
775              * offsets.  We must not let this become visible and thus
776              * always simulate a hole between @offset (the real EOF)
777              * and @blk_len (the client-visible EOF).
778              */
779 
780             blk_len = blk_getlength(exp->common.blk);
781             if (blk_len < 0) {
782                 fuse_reply_err(req, -blk_len);
783                 return;
784             }
785 
786             if (offset > blk_len || whence == SEEK_DATA) {
787                 fuse_reply_err(req, ENXIO);
788             } else {
789                 fuse_reply_lseek(req, offset);
790             }
791             return;
792         }
793 
794         if (ret & BDRV_BLOCK_DATA) {
795             if (whence == SEEK_DATA) {
796                 fuse_reply_lseek(req, offset);
797                 return;
798             }
799         } else {
800             if (whence == SEEK_HOLE) {
801                 fuse_reply_lseek(req, offset);
802                 return;
803             }
804         }
805 
806         /* Safety check against infinite loops */
807         if (!pnum) {
808             fuse_reply_err(req, ENXIO);
809             return;
810         }
811 
812         offset += pnum;
813     }
814 }
815 #endif
816 
817 static const struct fuse_lowlevel_ops fuse_ops = {
818     .init       = fuse_init,
819     .lookup     = fuse_lookup,
820     .getattr    = fuse_getattr,
821     .setattr    = fuse_setattr,
822     .open       = fuse_open,
823     .read       = fuse_read,
824     .write      = fuse_write,
825     .fallocate  = fuse_fallocate,
826     .flush      = fuse_flush,
827     .fsync      = fuse_fsync,
828 #ifdef CONFIG_FUSE_LSEEK
829     .lseek      = fuse_lseek,
830 #endif
831 };
832 
833 const BlockExportDriver blk_exp_fuse = {
834     .type               = BLOCK_EXPORT_TYPE_FUSE,
835     .instance_size      = sizeof(FuseExport),
836     .create             = fuse_export_create,
837     .delete             = fuse_export_delete,
838     .request_shutdown   = fuse_export_shutdown,
839 };
840