xref: /openbmc/qemu/block/export/fuse.c (revision 1da79ecc)
1 /*
2  * Present a block device as a raw image through FUSE
3  *
4  * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; under version 2 or later of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #define FUSE_USE_VERSION 31
20 
21 #include "qemu/osdep.h"
22 #include "block/aio.h"
23 #include "block/block.h"
24 #include "block/export.h"
25 #include "block/fuse.h"
26 #include "block/qapi.h"
27 #include "qapi/error.h"
28 #include "qapi/qapi-commands-block.h"
29 #include "sysemu/block-backend.h"
30 
31 #include <fuse.h>
32 #include <fuse_lowlevel.h>
33 
34 
35 /* Prevent overly long bounce buffer allocations */
36 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
37 
38 
39 typedef struct FuseExport {
40     BlockExport common;
41 
42     struct fuse_session *fuse_session;
43     struct fuse_buf fuse_buf;
44     bool mounted, fd_handler_set_up;
45 
46     char *mountpoint;
47     bool writable;
48     bool growable;
49 } FuseExport;
50 
51 static GHashTable *exports;
52 static const struct fuse_lowlevel_ops fuse_ops;
53 
54 static void fuse_export_shutdown(BlockExport *exp);
55 static void fuse_export_delete(BlockExport *exp);
56 
57 static void init_exports_table(void);
58 
59 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
60                              Error **errp);
61 static void read_from_fuse_export(void *opaque);
62 
63 static bool is_regular_file(const char *path, Error **errp);
64 
65 
66 static int fuse_export_create(BlockExport *blk_exp,
67                               BlockExportOptions *blk_exp_args,
68                               Error **errp)
69 {
70     FuseExport *exp = container_of(blk_exp, FuseExport, common);
71     BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
72     int ret;
73 
74     assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
75 
76     /* For growable exports, take the RESIZE permission */
77     if (args->growable) {
78         uint64_t blk_perm, blk_shared_perm;
79 
80         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
81 
82         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
83                            blk_shared_perm, errp);
84         if (ret < 0) {
85             return ret;
86         }
87     }
88 
89     init_exports_table();
90 
91     /*
92      * It is important to do this check before calling is_regular_file() --
93      * that function will do a stat(), which we would have to handle if we
94      * already exported something on @mountpoint.  But we cannot, because
95      * we are currently caught up here.
96      * (Note that ideally we would want to resolve relative paths here,
97      * but bdrv_make_absolute_filename() might do the wrong thing for
98      * paths that contain colons, and realpath() would resolve symlinks,
99      * which we do not want: The mount point is not going to be the
100      * symlink's destination, but the link itself.)
101      * So this will not catch all potential clashes, but hopefully at
102      * least the most common one of specifying exactly the same path
103      * string twice.
104      */
105     if (g_hash_table_contains(exports, args->mountpoint)) {
106         error_setg(errp, "There already is a FUSE export on '%s'",
107                    args->mountpoint);
108         ret = -EEXIST;
109         goto fail;
110     }
111 
112     if (!is_regular_file(args->mountpoint, errp)) {
113         ret = -EINVAL;
114         goto fail;
115     }
116 
117     exp->mountpoint = g_strdup(args->mountpoint);
118     exp->writable = blk_exp_args->writable;
119     exp->growable = args->growable;
120 
121     ret = setup_fuse_export(exp, args->mountpoint, errp);
122     if (ret < 0) {
123         goto fail;
124     }
125 
126     return 0;
127 
128 fail:
129     fuse_export_delete(blk_exp);
130     return ret;
131 }
132 
133 /**
134  * Allocates the global @exports hash table.
135  */
136 static void init_exports_table(void)
137 {
138     if (exports) {
139         return;
140     }
141 
142     exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
143 }
144 
145 /**
146  * Create exp->fuse_session and mount it.
147  */
148 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
149                              Error **errp)
150 {
151     const char *fuse_argv[4];
152     char *mount_opts;
153     struct fuse_args fuse_args;
154     int ret;
155 
156     /* Needs to match what fuse_init() sets.  Only max_read must be supplied. */
157     mount_opts = g_strdup_printf("max_read=%zu", FUSE_MAX_BOUNCE_BYTES);
158 
159     fuse_argv[0] = ""; /* Dummy program name */
160     fuse_argv[1] = "-o";
161     fuse_argv[2] = mount_opts;
162     fuse_argv[3] = NULL;
163     fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
164 
165     exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
166                                          sizeof(fuse_ops), exp);
167     g_free(mount_opts);
168     if (!exp->fuse_session) {
169         error_setg(errp, "Failed to set up FUSE session");
170         ret = -EIO;
171         goto fail;
172     }
173 
174     ret = fuse_session_mount(exp->fuse_session, mountpoint);
175     if (ret < 0) {
176         error_setg(errp, "Failed to mount FUSE session to export");
177         ret = -EIO;
178         goto fail;
179     }
180     exp->mounted = true;
181 
182     g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
183 
184     aio_set_fd_handler(exp->common.ctx,
185                        fuse_session_fd(exp->fuse_session), true,
186                        read_from_fuse_export, NULL, NULL, exp);
187     exp->fd_handler_set_up = true;
188 
189     return 0;
190 
191 fail:
192     fuse_export_shutdown(&exp->common);
193     return ret;
194 }
195 
196 /**
197  * Callback to be invoked when the FUSE session FD can be read from.
198  * (This is basically the FUSE event loop.)
199  */
200 static void read_from_fuse_export(void *opaque)
201 {
202     FuseExport *exp = opaque;
203     int ret;
204 
205     blk_exp_ref(&exp->common);
206 
207     do {
208         ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
209     } while (ret == -EINTR);
210     if (ret < 0) {
211         goto out;
212     }
213 
214     fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
215 
216 out:
217     blk_exp_unref(&exp->common);
218 }
219 
220 static void fuse_export_shutdown(BlockExport *blk_exp)
221 {
222     FuseExport *exp = container_of(blk_exp, FuseExport, common);
223 
224     if (exp->fuse_session) {
225         fuse_session_exit(exp->fuse_session);
226 
227         if (exp->fd_handler_set_up) {
228             aio_set_fd_handler(exp->common.ctx,
229                                fuse_session_fd(exp->fuse_session), true,
230                                NULL, NULL, NULL, NULL);
231             exp->fd_handler_set_up = false;
232         }
233     }
234 
235     if (exp->mountpoint) {
236         /*
237          * Safe to drop now, because we will not handle any requests
238          * for this export anymore anyway.
239          */
240         g_hash_table_remove(exports, exp->mountpoint);
241     }
242 }
243 
244 static void fuse_export_delete(BlockExport *blk_exp)
245 {
246     FuseExport *exp = container_of(blk_exp, FuseExport, common);
247 
248     if (exp->fuse_session) {
249         if (exp->mounted) {
250             fuse_session_unmount(exp->fuse_session);
251         }
252 
253         fuse_session_destroy(exp->fuse_session);
254     }
255 
256     free(exp->fuse_buf.mem);
257     g_free(exp->mountpoint);
258 }
259 
260 /**
261  * Check whether @path points to a regular file.  If not, put an
262  * appropriate message into *errp.
263  */
264 static bool is_regular_file(const char *path, Error **errp)
265 {
266     struct stat statbuf;
267     int ret;
268 
269     ret = stat(path, &statbuf);
270     if (ret < 0) {
271         error_setg_errno(errp, errno, "Failed to stat '%s'", path);
272         return false;
273     }
274 
275     if (!S_ISREG(statbuf.st_mode)) {
276         error_setg(errp, "'%s' is not a regular file", path);
277         return false;
278     }
279 
280     return true;
281 }
282 
283 /**
284  * A chance to set change some parameters supplied to FUSE_INIT.
285  */
286 static void fuse_init(void *userdata, struct fuse_conn_info *conn)
287 {
288     /*
289      * MIN_NON_ZERO() would not be wrong here, but what we set here
290      * must equal what has been passed to fuse_session_new().
291      * Therefore, as long as max_read must be passed as a mount option
292      * (which libfuse claims will be changed at some point), we have
293      * to set max_read to a fixed value here.
294      */
295     conn->max_read = FUSE_MAX_BOUNCE_BYTES;
296 
297     conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
298 }
299 
300 /**
301  * Let clients look up files.  Always return ENOENT because we only
302  * care about the mountpoint itself.
303  */
304 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
305 {
306     fuse_reply_err(req, ENOENT);
307 }
308 
309 /**
310  * Let clients get file attributes (i.e., stat() the file).
311  */
312 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
313                          struct fuse_file_info *fi)
314 {
315     struct stat statbuf;
316     int64_t length, allocated_blocks;
317     time_t now = time(NULL);
318     FuseExport *exp = fuse_req_userdata(req);
319     mode_t mode;
320 
321     length = blk_getlength(exp->common.blk);
322     if (length < 0) {
323         fuse_reply_err(req, -length);
324         return;
325     }
326 
327     allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
328     if (allocated_blocks <= 0) {
329         allocated_blocks = DIV_ROUND_UP(length, 512);
330     } else {
331         allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
332     }
333 
334     mode = S_IFREG | S_IRUSR;
335     if (exp->writable) {
336         mode |= S_IWUSR;
337     }
338 
339     statbuf = (struct stat) {
340         .st_ino     = inode,
341         .st_mode    = mode,
342         .st_nlink   = 1,
343         .st_uid     = getuid(),
344         .st_gid     = getgid(),
345         .st_size    = length,
346         .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
347         .st_blocks  = allocated_blocks,
348         .st_atime   = now,
349         .st_mtime   = now,
350         .st_ctime   = now,
351     };
352 
353     fuse_reply_attr(req, &statbuf, 1.);
354 }
355 
356 static int fuse_do_truncate(const FuseExport *exp, int64_t size,
357                             bool req_zero_write, PreallocMode prealloc)
358 {
359     uint64_t blk_perm, blk_shared_perm;
360     BdrvRequestFlags truncate_flags = 0;
361     int ret;
362 
363     if (req_zero_write) {
364         truncate_flags |= BDRV_REQ_ZERO_WRITE;
365     }
366 
367     /* Growable exports have a permanent RESIZE permission */
368     if (!exp->growable) {
369         blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
370 
371         ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
372                            blk_shared_perm, NULL);
373         if (ret < 0) {
374             return ret;
375         }
376     }
377 
378     ret = blk_truncate(exp->common.blk, size, true, prealloc,
379                        truncate_flags, NULL);
380 
381     if (!exp->growable) {
382         /* Must succeed, because we are only giving up the RESIZE permission */
383         blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
384     }
385 
386     return ret;
387 }
388 
389 /**
390  * Let clients set file attributes.  Only resizing is supported.
391  */
392 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
393                          int to_set, struct fuse_file_info *fi)
394 {
395     FuseExport *exp = fuse_req_userdata(req);
396     int ret;
397 
398     if (!exp->writable) {
399         fuse_reply_err(req, EACCES);
400         return;
401     }
402 
403     if (to_set & ~FUSE_SET_ATTR_SIZE) {
404         fuse_reply_err(req, ENOTSUP);
405         return;
406     }
407 
408     ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
409     if (ret < 0) {
410         fuse_reply_err(req, -ret);
411         return;
412     }
413 
414     fuse_getattr(req, inode, fi);
415 }
416 
417 /**
418  * Let clients open a file (i.e., the exported image).
419  */
420 static void fuse_open(fuse_req_t req, fuse_ino_t inode,
421                       struct fuse_file_info *fi)
422 {
423     fuse_reply_open(req, fi);
424 }
425 
426 /**
427  * Handle client reads from the exported image.
428  */
429 static void fuse_read(fuse_req_t req, fuse_ino_t inode,
430                       size_t size, off_t offset, struct fuse_file_info *fi)
431 {
432     FuseExport *exp = fuse_req_userdata(req);
433     int64_t length;
434     void *buf;
435     int ret;
436 
437     /* Limited by max_read, should not happen */
438     if (size > FUSE_MAX_BOUNCE_BYTES) {
439         fuse_reply_err(req, EINVAL);
440         return;
441     }
442 
443     /**
444      * Clients will expect short reads at EOF, so we have to limit
445      * offset+size to the image length.
446      */
447     length = blk_getlength(exp->common.blk);
448     if (length < 0) {
449         fuse_reply_err(req, -length);
450         return;
451     }
452 
453     if (offset + size > length) {
454         size = length - offset;
455     }
456 
457     buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
458     if (!buf) {
459         fuse_reply_err(req, ENOMEM);
460         return;
461     }
462 
463     ret = blk_pread(exp->common.blk, offset, buf, size);
464     if (ret >= 0) {
465         fuse_reply_buf(req, buf, size);
466     } else {
467         fuse_reply_err(req, -ret);
468     }
469 
470     qemu_vfree(buf);
471 }
472 
473 /**
474  * Handle client writes to the exported image.
475  */
476 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
477                        size_t size, off_t offset, struct fuse_file_info *fi)
478 {
479     FuseExport *exp = fuse_req_userdata(req);
480     int64_t length;
481     int ret;
482 
483     /* Limited by max_write, should not happen */
484     if (size > BDRV_REQUEST_MAX_BYTES) {
485         fuse_reply_err(req, EINVAL);
486         return;
487     }
488 
489     if (!exp->writable) {
490         fuse_reply_err(req, EACCES);
491         return;
492     }
493 
494     /**
495      * Clients will expect short writes at EOF, so we have to limit
496      * offset+size to the image length.
497      */
498     length = blk_getlength(exp->common.blk);
499     if (length < 0) {
500         fuse_reply_err(req, -length);
501         return;
502     }
503 
504     if (offset + size > length) {
505         if (exp->growable) {
506             ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
507             if (ret < 0) {
508                 fuse_reply_err(req, -ret);
509                 return;
510             }
511         } else {
512             size = length - offset;
513         }
514     }
515 
516     ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
517     if (ret >= 0) {
518         fuse_reply_write(req, size);
519     } else {
520         fuse_reply_err(req, -ret);
521     }
522 }
523 
524 /**
525  * Let clients perform various fallocate() operations.
526  */
527 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
528                            off_t offset, off_t length,
529                            struct fuse_file_info *fi)
530 {
531     FuseExport *exp = fuse_req_userdata(req);
532     int64_t blk_len;
533     int ret;
534 
535     if (!exp->writable) {
536         fuse_reply_err(req, EACCES);
537         return;
538     }
539 
540     blk_len = blk_getlength(exp->common.blk);
541     if (blk_len < 0) {
542         fuse_reply_err(req, -blk_len);
543         return;
544     }
545 
546     if (mode & FALLOC_FL_KEEP_SIZE) {
547         length = MIN(length, blk_len - offset);
548     }
549 
550     if (mode & FALLOC_FL_PUNCH_HOLE) {
551         if (!(mode & FALLOC_FL_KEEP_SIZE)) {
552             fuse_reply_err(req, EINVAL);
553             return;
554         }
555 
556         do {
557             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
558 
559             ret = blk_pdiscard(exp->common.blk, offset, size);
560             offset += size;
561             length -= size;
562         } while (ret == 0 && length > 0);
563     } else if (mode & FALLOC_FL_ZERO_RANGE) {
564         if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
565             /* No need for zeroes, we are going to write them ourselves */
566             ret = fuse_do_truncate(exp, offset + length, false,
567                                    PREALLOC_MODE_OFF);
568             if (ret < 0) {
569                 fuse_reply_err(req, -ret);
570                 return;
571             }
572         }
573 
574         do {
575             int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
576 
577             ret = blk_pwrite_zeroes(exp->common.blk,
578                                     offset, size, 0);
579             offset += size;
580             length -= size;
581         } while (ret == 0 && length > 0);
582     } else if (!mode) {
583         /* We can only fallocate at the EOF with a truncate */
584         if (offset < blk_len) {
585             fuse_reply_err(req, EOPNOTSUPP);
586             return;
587         }
588 
589         if (offset > blk_len) {
590             /* No preallocation needed here */
591             ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
592             if (ret < 0) {
593                 fuse_reply_err(req, -ret);
594                 return;
595             }
596         }
597 
598         ret = fuse_do_truncate(exp, offset + length, true,
599                                PREALLOC_MODE_FALLOC);
600     } else {
601         ret = -EOPNOTSUPP;
602     }
603 
604     fuse_reply_err(req, ret < 0 ? -ret : 0);
605 }
606 
607 /**
608  * Let clients fsync the exported image.
609  */
610 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
611                        struct fuse_file_info *fi)
612 {
613     FuseExport *exp = fuse_req_userdata(req);
614     int ret;
615 
616     ret = blk_flush(exp->common.blk);
617     fuse_reply_err(req, ret < 0 ? -ret : 0);
618 }
619 
620 /**
621  * Called before an FD to the exported image is closed.  (libfuse
622  * notes this to be a way to return last-minute errors.)
623  */
624 static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
625                         struct fuse_file_info *fi)
626 {
627     fuse_fsync(req, inode, 1, fi);
628 }
629 
630 #ifdef CONFIG_FUSE_LSEEK
631 /**
632  * Let clients inquire allocation status.
633  */
634 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
635                        int whence, struct fuse_file_info *fi)
636 {
637     FuseExport *exp = fuse_req_userdata(req);
638 
639     if (whence != SEEK_HOLE && whence != SEEK_DATA) {
640         fuse_reply_err(req, EINVAL);
641         return;
642     }
643 
644     while (true) {
645         int64_t pnum;
646         int ret;
647 
648         ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
649                                       offset, INT64_MAX, &pnum, NULL, NULL);
650         if (ret < 0) {
651             fuse_reply_err(req, -ret);
652             return;
653         }
654 
655         if (!pnum && (ret & BDRV_BLOCK_EOF)) {
656             int64_t blk_len;
657 
658             /*
659              * If blk_getlength() rounds (e.g. by sectors), then the
660              * export length will be rounded, too.  However,
661              * bdrv_block_status_above() may return EOF at unaligned
662              * offsets.  We must not let this become visible and thus
663              * always simulate a hole between @offset (the real EOF)
664              * and @blk_len (the client-visible EOF).
665              */
666 
667             blk_len = blk_getlength(exp->common.blk);
668             if (blk_len < 0) {
669                 fuse_reply_err(req, -blk_len);
670                 return;
671             }
672 
673             if (offset > blk_len || whence == SEEK_DATA) {
674                 fuse_reply_err(req, ENXIO);
675             } else {
676                 fuse_reply_lseek(req, offset);
677             }
678             return;
679         }
680 
681         if (ret & BDRV_BLOCK_DATA) {
682             if (whence == SEEK_DATA) {
683                 fuse_reply_lseek(req, offset);
684                 return;
685             }
686         } else {
687             if (whence == SEEK_HOLE) {
688                 fuse_reply_lseek(req, offset);
689                 return;
690             }
691         }
692 
693         /* Safety check against infinite loops */
694         if (!pnum) {
695             fuse_reply_err(req, ENXIO);
696             return;
697         }
698 
699         offset += pnum;
700     }
701 }
702 #endif
703 
704 static const struct fuse_lowlevel_ops fuse_ops = {
705     .init       = fuse_init,
706     .lookup     = fuse_lookup,
707     .getattr    = fuse_getattr,
708     .setattr    = fuse_setattr,
709     .open       = fuse_open,
710     .read       = fuse_read,
711     .write      = fuse_write,
712     .fallocate  = fuse_fallocate,
713     .flush      = fuse_flush,
714     .fsync      = fuse_fsync,
715 #ifdef CONFIG_FUSE_LSEEK
716     .lseek      = fuse_lseek,
717 #endif
718 };
719 
720 const BlockExportDriver blk_exp_fuse = {
721     .type               = BLOCK_EXPORT_TYPE_FUSE,
722     .instance_size      = sizeof(FuseExport),
723     .create             = fuse_export_create,
724     .delete             = fuse_export_delete,
725     .request_shutdown   = fuse_export_shutdown,
726 };
727