xref: /openbmc/qemu/block/export/fuse.c (revision 8fc54f94)
1 /*
2  * Present a block device as a raw image through FUSE
3  *
4  * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; under version 2 or later of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #define FUSE_USE_VERSION 31
20 
21 #include "qemu/osdep.h"
22 #include "block/aio.h"
23 #include "block/block.h"
24 #include "block/export.h"
25 #include "block/fuse.h"
26 #include "block/qapi.h"
27 #include "qapi/error.h"
28 #include "qapi/qapi-commands-block.h"
29 #include "sysemu/block-backend.h"
30 
31 #include <fuse.h>
32 #include <fuse_lowlevel.h>
33 
34 
35 /* Prevent overly long bounce buffer allocations */
36 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
37 
38 
39 typedef struct FuseExport {
40     BlockExport common;
41 
42     struct fuse_session *fuse_session;
43     struct fuse_buf fuse_buf;
44     bool mounted, fd_handler_set_up;
45 
46     char *mountpoint;
47     bool writable;
48     bool growable;
49     /* Whether allow_other was used as a mount option or not */
50     bool allow_other;
51 } FuseExport;
52 
53 static GHashTable *exports;
54 static const struct fuse_lowlevel_ops fuse_ops;
55 
56 static void fuse_export_shutdown(BlockExport *exp);
57 static void fuse_export_delete(BlockExport *exp);
58 
59 static void init_exports_table(void);
60 
61 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
62                              bool allow_other, Error **errp);
63 static void read_from_fuse_export(void *opaque);
64 
65 static bool is_regular_file(const char *path, Error **errp);
66 
67 
68 static int fuse_export_create(BlockExport *blk_exp,
69                               BlockExportOptions *blk_exp_args,
70                               Error **errp)
71 {
72     FuseExport *exp = container_of(blk_exp, FuseExport, common);
73     BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
74     int ret;
75 
76     assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
77 
78     /* For growable exports, take the RESIZE permission */
79     if (args->growable) {
80         uint64_t blk_perm, blk_shared_perm;
81 
82         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
83 
84         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
85                            blk_shared_perm, errp);
86         if (ret < 0) {
87             return ret;
88         }
89     }
90 
91     init_exports_table();
92 
93     /*
94      * It is important to do this check before calling is_regular_file() --
95      * that function will do a stat(), which we would have to handle if we
96      * already exported something on @mountpoint.  But we cannot, because
97      * we are currently caught up here.
98      * (Note that ideally we would want to resolve relative paths here,
99      * but bdrv_make_absolute_filename() might do the wrong thing for
100      * paths that contain colons, and realpath() would resolve symlinks,
101      * which we do not want: The mount point is not going to be the
102      * symlink's destination, but the link itself.)
103      * So this will not catch all potential clashes, but hopefully at
104      * least the most common one of specifying exactly the same path
105      * string twice.
106      */
107     if (g_hash_table_contains(exports, args->mountpoint)) {
108         error_setg(errp, "There already is a FUSE export on '%s'",
109                    args->mountpoint);
110         ret = -EEXIST;
111         goto fail;
112     }
113 
114     if (!is_regular_file(args->mountpoint, errp)) {
115         ret = -EINVAL;
116         goto fail;
117     }
118 
119     exp->mountpoint = g_strdup(args->mountpoint);
120     exp->writable = blk_exp_args->writable;
121     exp->growable = args->growable;
122 
123     /* set default */
124     if (!args->has_allow_other) {
125         args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
126     }
127 
128     if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
129         /* Ignore errors on our first attempt */
130         ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
131         exp->allow_other = ret == 0;
132         if (ret < 0) {
133             ret = setup_fuse_export(exp, args->mountpoint, false, errp);
134         }
135     } else {
136         exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
137         ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
138     }
139     if (ret < 0) {
140         goto fail;
141     }
142 
143     return 0;
144 
145 fail:
146     fuse_export_delete(blk_exp);
147     return ret;
148 }
149 
150 /**
151  * Allocates the global @exports hash table.
152  */
153 static void init_exports_table(void)
154 {
155     if (exports) {
156         return;
157     }
158 
159     exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
160 }
161 
162 /**
163  * Create exp->fuse_session and mount it.
164  */
165 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
166                              bool allow_other, Error **errp)
167 {
168     const char *fuse_argv[4];
169     char *mount_opts;
170     struct fuse_args fuse_args;
171     int ret;
172 
173     /*
174      * max_read needs to match what fuse_init() sets.
175      * max_write need not be supplied.
176      */
177     mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
178                                  FUSE_MAX_BOUNCE_BYTES,
179                                  allow_other ? ",allow_other" : "");
180 
181     fuse_argv[0] = ""; /* Dummy program name */
182     fuse_argv[1] = "-o";
183     fuse_argv[2] = mount_opts;
184     fuse_argv[3] = NULL;
185     fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
186 
187     exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
188                                          sizeof(fuse_ops), exp);
189     g_free(mount_opts);
190     if (!exp->fuse_session) {
191         error_setg(errp, "Failed to set up FUSE session");
192         ret = -EIO;
193         goto fail;
194     }
195 
196     ret = fuse_session_mount(exp->fuse_session, mountpoint);
197     if (ret < 0) {
198         error_setg(errp, "Failed to mount FUSE session to export");
199         ret = -EIO;
200         goto fail;
201     }
202     exp->mounted = true;
203 
204     g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
205 
206     aio_set_fd_handler(exp->common.ctx,
207                        fuse_session_fd(exp->fuse_session), true,
208                        read_from_fuse_export, NULL, NULL, exp);
209     exp->fd_handler_set_up = true;
210 
211     return 0;
212 
213 fail:
214     fuse_export_shutdown(&exp->common);
215     return ret;
216 }
217 
218 /**
219  * Callback to be invoked when the FUSE session FD can be read from.
220  * (This is basically the FUSE event loop.)
221  */
222 static void read_from_fuse_export(void *opaque)
223 {
224     FuseExport *exp = opaque;
225     int ret;
226 
227     blk_exp_ref(&exp->common);
228 
229     do {
230         ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
231     } while (ret == -EINTR);
232     if (ret < 0) {
233         goto out;
234     }
235 
236     fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
237 
238 out:
239     blk_exp_unref(&exp->common);
240 }
241 
242 static void fuse_export_shutdown(BlockExport *blk_exp)
243 {
244     FuseExport *exp = container_of(blk_exp, FuseExport, common);
245 
246     if (exp->fuse_session) {
247         fuse_session_exit(exp->fuse_session);
248 
249         if (exp->fd_handler_set_up) {
250             aio_set_fd_handler(exp->common.ctx,
251                                fuse_session_fd(exp->fuse_session), true,
252                                NULL, NULL, NULL, NULL);
253             exp->fd_handler_set_up = false;
254         }
255     }
256 
257     if (exp->mountpoint) {
258         /*
259          * Safe to drop now, because we will not handle any requests
260          * for this export anymore anyway.
261          */
262         g_hash_table_remove(exports, exp->mountpoint);
263     }
264 }
265 
266 static void fuse_export_delete(BlockExport *blk_exp)
267 {
268     FuseExport *exp = container_of(blk_exp, FuseExport, common);
269 
270     if (exp->fuse_session) {
271         if (exp->mounted) {
272             fuse_session_unmount(exp->fuse_session);
273         }
274 
275         fuse_session_destroy(exp->fuse_session);
276     }
277 
278     free(exp->fuse_buf.mem);
279     g_free(exp->mountpoint);
280 }
281 
282 /**
283  * Check whether @path points to a regular file.  If not, put an
284  * appropriate message into *errp.
285  */
286 static bool is_regular_file(const char *path, Error **errp)
287 {
288     struct stat statbuf;
289     int ret;
290 
291     ret = stat(path, &statbuf);
292     if (ret < 0) {
293         error_setg_errno(errp, errno, "Failed to stat '%s'", path);
294         return false;
295     }
296 
297     if (!S_ISREG(statbuf.st_mode)) {
298         error_setg(errp, "'%s' is not a regular file", path);
299         return false;
300     }
301 
302     return true;
303 }
304 
305 /**
306  * A chance to set change some parameters supplied to FUSE_INIT.
307  */
308 static void fuse_init(void *userdata, struct fuse_conn_info *conn)
309 {
310     /*
311      * MIN_NON_ZERO() would not be wrong here, but what we set here
312      * must equal what has been passed to fuse_session_new().
313      * Therefore, as long as max_read must be passed as a mount option
314      * (which libfuse claims will be changed at some point), we have
315      * to set max_read to a fixed value here.
316      */
317     conn->max_read = FUSE_MAX_BOUNCE_BYTES;
318 
319     conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
320 }
321 
322 /**
323  * Let clients look up files.  Always return ENOENT because we only
324  * care about the mountpoint itself.
325  */
326 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
327 {
328     fuse_reply_err(req, ENOENT);
329 }
330 
331 /**
332  * Let clients get file attributes (i.e., stat() the file).
333  */
334 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
335                          struct fuse_file_info *fi)
336 {
337     struct stat statbuf;
338     int64_t length, allocated_blocks;
339     time_t now = time(NULL);
340     FuseExport *exp = fuse_req_userdata(req);
341     mode_t mode;
342 
343     length = blk_getlength(exp->common.blk);
344     if (length < 0) {
345         fuse_reply_err(req, -length);
346         return;
347     }
348 
349     allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
350     if (allocated_blocks <= 0) {
351         allocated_blocks = DIV_ROUND_UP(length, 512);
352     } else {
353         allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
354     }
355 
356     mode = S_IFREG | S_IRUSR;
357     if (exp->writable) {
358         mode |= S_IWUSR;
359     }
360 
361     statbuf = (struct stat) {
362         .st_ino     = inode,
363         .st_mode    = mode,
364         .st_nlink   = 1,
365         .st_uid     = getuid(),
366         .st_gid     = getgid(),
367         .st_size    = length,
368         .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
369         .st_blocks  = allocated_blocks,
370         .st_atime   = now,
371         .st_mtime   = now,
372         .st_ctime   = now,
373     };
374 
375     fuse_reply_attr(req, &statbuf, 1.);
376 }
377 
378 static int fuse_do_truncate(const FuseExport *exp, int64_t size,
379                             bool req_zero_write, PreallocMode prealloc)
380 {
381     uint64_t blk_perm, blk_shared_perm;
382     BdrvRequestFlags truncate_flags = 0;
383     int ret;
384 
385     if (req_zero_write) {
386         truncate_flags |= BDRV_REQ_ZERO_WRITE;
387     }
388 
389     /* Growable exports have a permanent RESIZE permission */
390     if (!exp->growable) {
391         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
392 
393         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
394                            blk_shared_perm, NULL);
395         if (ret < 0) {
396             return ret;
397         }
398     }
399 
400     ret = blk_truncate(exp->common.blk, size, true, prealloc,
401                        truncate_flags, NULL);
402 
403     if (!exp->growable) {
404         /* Must succeed, because we are only giving up the RESIZE permission */
405         blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
406     }
407 
408     return ret;
409 }
410 
411 /**
412  * Let clients set file attributes.  Only resizing is supported.
413  */
414 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
415                          int to_set, struct fuse_file_info *fi)
416 {
417     FuseExport *exp = fuse_req_userdata(req);
418     int ret;
419 
420     if (!exp->writable) {
421         fuse_reply_err(req, EACCES);
422         return;
423     }
424 
425     if (to_set & ~FUSE_SET_ATTR_SIZE) {
426         fuse_reply_err(req, ENOTSUP);
427         return;
428     }
429 
430     ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
431     if (ret < 0) {
432         fuse_reply_err(req, -ret);
433         return;
434     }
435 
436     fuse_getattr(req, inode, fi);
437 }
438 
439 /**
440  * Let clients open a file (i.e., the exported image).
441  */
442 static void fuse_open(fuse_req_t req, fuse_ino_t inode,
443                       struct fuse_file_info *fi)
444 {
445     fuse_reply_open(req, fi);
446 }
447 
448 /**
449  * Handle client reads from the exported image.
450  */
451 static void fuse_read(fuse_req_t req, fuse_ino_t inode,
452                       size_t size, off_t offset, struct fuse_file_info *fi)
453 {
454     FuseExport *exp = fuse_req_userdata(req);
455     int64_t length;
456     void *buf;
457     int ret;
458 
459     /* Limited by max_read, should not happen */
460     if (size > FUSE_MAX_BOUNCE_BYTES) {
461         fuse_reply_err(req, EINVAL);
462         return;
463     }
464 
465     /**
466      * Clients will expect short reads at EOF, so we have to limit
467      * offset+size to the image length.
468      */
469     length = blk_getlength(exp->common.blk);
470     if (length < 0) {
471         fuse_reply_err(req, -length);
472         return;
473     }
474 
475     if (offset + size > length) {
476         size = length - offset;
477     }
478 
479     buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
480     if (!buf) {
481         fuse_reply_err(req, ENOMEM);
482         return;
483     }
484 
485     ret = blk_pread(exp->common.blk, offset, buf, size);
486     if (ret >= 0) {
487         fuse_reply_buf(req, buf, size);
488     } else {
489         fuse_reply_err(req, -ret);
490     }
491 
492     qemu_vfree(buf);
493 }
494 
495 /**
496  * Handle client writes to the exported image.
497  */
498 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
499                        size_t size, off_t offset, struct fuse_file_info *fi)
500 {
501     FuseExport *exp = fuse_req_userdata(req);
502     int64_t length;
503     int ret;
504 
505     /* Limited by max_write, should not happen */
506     if (size > BDRV_REQUEST_MAX_BYTES) {
507         fuse_reply_err(req, EINVAL);
508         return;
509     }
510 
511     if (!exp->writable) {
512         fuse_reply_err(req, EACCES);
513         return;
514     }
515 
516     /**
517      * Clients will expect short writes at EOF, so we have to limit
518      * offset+size to the image length.
519      */
520     length = blk_getlength(exp->common.blk);
521     if (length < 0) {
522         fuse_reply_err(req, -length);
523         return;
524     }
525 
526     if (offset + size > length) {
527         if (exp->growable) {
528             ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
529             if (ret < 0) {
530                 fuse_reply_err(req, -ret);
531                 return;
532             }
533         } else {
534             size = length - offset;
535         }
536     }
537 
538     ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
539     if (ret >= 0) {
540         fuse_reply_write(req, size);
541     } else {
542         fuse_reply_err(req, -ret);
543     }
544 }
545 
546 /**
547  * Let clients perform various fallocate() operations.
548  */
549 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
550                            off_t offset, off_t length,
551                            struct fuse_file_info *fi)
552 {
553     FuseExport *exp = fuse_req_userdata(req);
554     int64_t blk_len;
555     int ret;
556 
557     if (!exp->writable) {
558         fuse_reply_err(req, EACCES);
559         return;
560     }
561 
562     blk_len = blk_getlength(exp->common.blk);
563     if (blk_len < 0) {
564         fuse_reply_err(req, -blk_len);
565         return;
566     }
567 
568     if (mode & FALLOC_FL_KEEP_SIZE) {
569         length = MIN(length, blk_len - offset);
570     }
571 
572     if (mode & FALLOC_FL_PUNCH_HOLE) {
573         if (!(mode & FALLOC_FL_KEEP_SIZE)) {
574             fuse_reply_err(req, EINVAL);
575             return;
576         }
577 
578         do {
579             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
580 
581             ret = blk_pdiscard(exp->common.blk, offset, size);
582             offset += size;
583             length -= size;
584         } while (ret == 0 && length > 0);
585     } else if (mode & FALLOC_FL_ZERO_RANGE) {
586         if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
587             /* No need for zeroes, we are going to write them ourselves */
588             ret = fuse_do_truncate(exp, offset + length, false,
589                                    PREALLOC_MODE_OFF);
590             if (ret < 0) {
591                 fuse_reply_err(req, -ret);
592                 return;
593             }
594         }
595 
596         do {
597             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
598 
599             ret = blk_pwrite_zeroes(exp->common.blk,
600                                     offset, size, 0);
601             offset += size;
602             length -= size;
603         } while (ret == 0 && length > 0);
604     } else if (!mode) {
605         /* We can only fallocate at the EOF with a truncate */
606         if (offset < blk_len) {
607             fuse_reply_err(req, EOPNOTSUPP);
608             return;
609         }
610 
611         if (offset > blk_len) {
612             /* No preallocation needed here */
613             ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
614             if (ret < 0) {
615                 fuse_reply_err(req, -ret);
616                 return;
617             }
618         }
619 
620         ret = fuse_do_truncate(exp, offset + length, true,
621                                PREALLOC_MODE_FALLOC);
622     } else {
623         ret = -EOPNOTSUPP;
624     }
625 
626     fuse_reply_err(req, ret < 0 ? -ret : 0);
627 }
628 
629 /**
630  * Let clients fsync the exported image.
631  */
632 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
633                        struct fuse_file_info *fi)
634 {
635     FuseExport *exp = fuse_req_userdata(req);
636     int ret;
637 
638     ret = blk_flush(exp->common.blk);
639     fuse_reply_err(req, ret < 0 ? -ret : 0);
640 }
641 
642 /**
643  * Called before an FD to the exported image is closed.  (libfuse
644  * notes this to be a way to return last-minute errors.)
645  */
646 static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
647                         struct fuse_file_info *fi)
648 {
649     fuse_fsync(req, inode, 1, fi);
650 }
651 
652 #ifdef CONFIG_FUSE_LSEEK
653 /**
654  * Let clients inquire allocation status.
655  */
656 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
657                        int whence, struct fuse_file_info *fi)
658 {
659     FuseExport *exp = fuse_req_userdata(req);
660 
661     if (whence != SEEK_HOLE && whence != SEEK_DATA) {
662         fuse_reply_err(req, EINVAL);
663         return;
664     }
665 
666     while (true) {
667         int64_t pnum;
668         int ret;
669 
670         ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
671                                       offset, INT64_MAX, &pnum, NULL, NULL);
672         if (ret < 0) {
673             fuse_reply_err(req, -ret);
674             return;
675         }
676 
677         if (!pnum && (ret & BDRV_BLOCK_EOF)) {
678             int64_t blk_len;
679 
680             /*
681              * If blk_getlength() rounds (e.g. by sectors), then the
682              * export length will be rounded, too.  However,
683              * bdrv_block_status_above() may return EOF at unaligned
684              * offsets.  We must not let this become visible and thus
685              * always simulate a hole between @offset (the real EOF)
686              * and @blk_len (the client-visible EOF).
687              */
688 
689             blk_len = blk_getlength(exp->common.blk);
690             if (blk_len < 0) {
691                 fuse_reply_err(req, -blk_len);
692                 return;
693             }
694 
695             if (offset > blk_len || whence == SEEK_DATA) {
696                 fuse_reply_err(req, ENXIO);
697             } else {
698                 fuse_reply_lseek(req, offset);
699             }
700             return;
701         }
702 
703         if (ret & BDRV_BLOCK_DATA) {
704             if (whence == SEEK_DATA) {
705                 fuse_reply_lseek(req, offset);
706                 return;
707             }
708         } else {
709             if (whence == SEEK_HOLE) {
710                 fuse_reply_lseek(req, offset);
711                 return;
712             }
713         }
714 
715         /* Safety check against infinite loops */
716         if (!pnum) {
717             fuse_reply_err(req, ENXIO);
718             return;
719         }
720 
721         offset += pnum;
722     }
723 }
724 #endif
725 
726 static const struct fuse_lowlevel_ops fuse_ops = {
727     .init       = fuse_init,
728     .lookup     = fuse_lookup,
729     .getattr    = fuse_getattr,
730     .setattr    = fuse_setattr,
731     .open       = fuse_open,
732     .read       = fuse_read,
733     .write      = fuse_write,
734     .fallocate  = fuse_fallocate,
735     .flush      = fuse_flush,
736     .fsync      = fuse_fsync,
737 #ifdef CONFIG_FUSE_LSEEK
738     .lseek      = fuse_lseek,
739 #endif
740 };
741 
742 const BlockExportDriver blk_exp_fuse = {
743     .type               = BLOCK_EXPORT_TYPE_FUSE,
744     .instance_size      = sizeof(FuseExport),
745     .create             = fuse_export_create,
746     .delete             = fuse_export_delete,
747     .request_shutdown   = fuse_export_shutdown,
748 };
749