1 /* 2 * Present a block device as a raw image through FUSE 3 * 4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 or later of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #define FUSE_USE_VERSION 31 20 21 #include "qemu/osdep.h" 22 #include "block/aio.h" 23 #include "block/block.h" 24 #include "block/export.h" 25 #include "block/fuse.h" 26 #include "block/qapi.h" 27 #include "qapi/error.h" 28 #include "qapi/qapi-commands-block.h" 29 #include "sysemu/block-backend.h" 30 31 #include <fuse.h> 32 #include <fuse_lowlevel.h> 33 34 35 /* Prevent overly long bounce buffer allocations */ 36 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024)) 37 38 39 typedef struct FuseExport { 40 BlockExport common; 41 42 struct fuse_session *fuse_session; 43 struct fuse_buf fuse_buf; 44 bool mounted, fd_handler_set_up; 45 46 char *mountpoint; 47 bool writable; 48 bool growable; 49 /* Whether allow_other was used as a mount option or not */ 50 bool allow_other; 51 } FuseExport; 52 53 static GHashTable *exports; 54 static const struct fuse_lowlevel_ops fuse_ops; 55 56 static void fuse_export_shutdown(BlockExport *exp); 57 static void fuse_export_delete(BlockExport *exp); 58 59 static void init_exports_table(void); 60 61 static int setup_fuse_export(FuseExport *exp, const char *mountpoint, 62 bool allow_other, Error **errp); 63 static void read_from_fuse_export(void *opaque); 64 65 static bool is_regular_file(const char *path, Error **errp); 66 67 68 static int fuse_export_create(BlockExport *blk_exp, 69 BlockExportOptions *blk_exp_args, 70 Error **errp) 71 { 72 FuseExport *exp = container_of(blk_exp, FuseExport, common); 73 BlockExportOptionsFuse *args = &blk_exp_args->u.fuse; 74 int ret; 75 76 assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE); 77 78 /* For growable exports, take the RESIZE permission */ 79 if (args->growable) { 80 uint64_t blk_perm, blk_shared_perm; 81 82 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); 83 84 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, 85 blk_shared_perm, errp); 86 if (ret < 0) { 87 return ret; 88 } 89 } 90 91 init_exports_table(); 92 93 /* 94 * It is important to do this check before calling is_regular_file() -- 95 * that function will do a stat(), which we would have to handle if we 96 * already exported something on @mountpoint. But we cannot, because 97 * we are currently caught up here. 98 * (Note that ideally we would want to resolve relative paths here, 99 * but bdrv_make_absolute_filename() might do the wrong thing for 100 * paths that contain colons, and realpath() would resolve symlinks, 101 * which we do not want: The mount point is not going to be the 102 * symlink's destination, but the link itself.) 103 * So this will not catch all potential clashes, but hopefully at 104 * least the most common one of specifying exactly the same path 105 * string twice. 106 */ 107 if (g_hash_table_contains(exports, args->mountpoint)) { 108 error_setg(errp, "There already is a FUSE export on '%s'", 109 args->mountpoint); 110 ret = -EEXIST; 111 goto fail; 112 } 113 114 if (!is_regular_file(args->mountpoint, errp)) { 115 ret = -EINVAL; 116 goto fail; 117 } 118 119 exp->mountpoint = g_strdup(args->mountpoint); 120 exp->writable = blk_exp_args->writable; 121 exp->growable = args->growable; 122 123 /* set default */ 124 if (!args->has_allow_other) { 125 args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO; 126 } 127 128 if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) { 129 /* Ignore errors on our first attempt */ 130 ret = setup_fuse_export(exp, args->mountpoint, true, NULL); 131 exp->allow_other = ret == 0; 132 if (ret < 0) { 133 ret = setup_fuse_export(exp, args->mountpoint, false, errp); 134 } 135 } else { 136 exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON; 137 ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp); 138 } 139 if (ret < 0) { 140 goto fail; 141 } 142 143 return 0; 144 145 fail: 146 fuse_export_delete(blk_exp); 147 return ret; 148 } 149 150 /** 151 * Allocates the global @exports hash table. 152 */ 153 static void init_exports_table(void) 154 { 155 if (exports) { 156 return; 157 } 158 159 exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); 160 } 161 162 /** 163 * Create exp->fuse_session and mount it. 164 */ 165 static int setup_fuse_export(FuseExport *exp, const char *mountpoint, 166 bool allow_other, Error **errp) 167 { 168 const char *fuse_argv[4]; 169 char *mount_opts; 170 struct fuse_args fuse_args; 171 int ret; 172 173 /* 174 * max_read needs to match what fuse_init() sets. 175 * max_write need not be supplied. 176 */ 177 mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s", 178 FUSE_MAX_BOUNCE_BYTES, 179 allow_other ? ",allow_other" : ""); 180 181 fuse_argv[0] = ""; /* Dummy program name */ 182 fuse_argv[1] = "-o"; 183 fuse_argv[2] = mount_opts; 184 fuse_argv[3] = NULL; 185 fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv); 186 187 exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops, 188 sizeof(fuse_ops), exp); 189 g_free(mount_opts); 190 if (!exp->fuse_session) { 191 error_setg(errp, "Failed to set up FUSE session"); 192 ret = -EIO; 193 goto fail; 194 } 195 196 ret = fuse_session_mount(exp->fuse_session, mountpoint); 197 if (ret < 0) { 198 error_setg(errp, "Failed to mount FUSE session to export"); 199 ret = -EIO; 200 goto fail; 201 } 202 exp->mounted = true; 203 204 g_hash_table_insert(exports, g_strdup(mountpoint), NULL); 205 206 aio_set_fd_handler(exp->common.ctx, 207 fuse_session_fd(exp->fuse_session), true, 208 read_from_fuse_export, NULL, NULL, exp); 209 exp->fd_handler_set_up = true; 210 211 return 0; 212 213 fail: 214 fuse_export_shutdown(&exp->common); 215 return ret; 216 } 217 218 /** 219 * Callback to be invoked when the FUSE session FD can be read from. 220 * (This is basically the FUSE event loop.) 221 */ 222 static void read_from_fuse_export(void *opaque) 223 { 224 FuseExport *exp = opaque; 225 int ret; 226 227 blk_exp_ref(&exp->common); 228 229 do { 230 ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf); 231 } while (ret == -EINTR); 232 if (ret < 0) { 233 goto out; 234 } 235 236 fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf); 237 238 out: 239 blk_exp_unref(&exp->common); 240 } 241 242 static void fuse_export_shutdown(BlockExport *blk_exp) 243 { 244 FuseExport *exp = container_of(blk_exp, FuseExport, common); 245 246 if (exp->fuse_session) { 247 fuse_session_exit(exp->fuse_session); 248 249 if (exp->fd_handler_set_up) { 250 aio_set_fd_handler(exp->common.ctx, 251 fuse_session_fd(exp->fuse_session), true, 252 NULL, NULL, NULL, NULL); 253 exp->fd_handler_set_up = false; 254 } 255 } 256 257 if (exp->mountpoint) { 258 /* 259 * Safe to drop now, because we will not handle any requests 260 * for this export anymore anyway. 261 */ 262 g_hash_table_remove(exports, exp->mountpoint); 263 } 264 } 265 266 static void fuse_export_delete(BlockExport *blk_exp) 267 { 268 FuseExport *exp = container_of(blk_exp, FuseExport, common); 269 270 if (exp->fuse_session) { 271 if (exp->mounted) { 272 fuse_session_unmount(exp->fuse_session); 273 } 274 275 fuse_session_destroy(exp->fuse_session); 276 } 277 278 free(exp->fuse_buf.mem); 279 g_free(exp->mountpoint); 280 } 281 282 /** 283 * Check whether @path points to a regular file. If not, put an 284 * appropriate message into *errp. 285 */ 286 static bool is_regular_file(const char *path, Error **errp) 287 { 288 struct stat statbuf; 289 int ret; 290 291 ret = stat(path, &statbuf); 292 if (ret < 0) { 293 error_setg_errno(errp, errno, "Failed to stat '%s'", path); 294 return false; 295 } 296 297 if (!S_ISREG(statbuf.st_mode)) { 298 error_setg(errp, "'%s' is not a regular file", path); 299 return false; 300 } 301 302 return true; 303 } 304 305 /** 306 * A chance to set change some parameters supplied to FUSE_INIT. 307 */ 308 static void fuse_init(void *userdata, struct fuse_conn_info *conn) 309 { 310 /* 311 * MIN_NON_ZERO() would not be wrong here, but what we set here 312 * must equal what has been passed to fuse_session_new(). 313 * Therefore, as long as max_read must be passed as a mount option 314 * (which libfuse claims will be changed at some point), we have 315 * to set max_read to a fixed value here. 316 */ 317 conn->max_read = FUSE_MAX_BOUNCE_BYTES; 318 319 conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write); 320 } 321 322 /** 323 * Let clients look up files. Always return ENOENT because we only 324 * care about the mountpoint itself. 325 */ 326 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) 327 { 328 fuse_reply_err(req, ENOENT); 329 } 330 331 /** 332 * Let clients get file attributes (i.e., stat() the file). 333 */ 334 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode, 335 struct fuse_file_info *fi) 336 { 337 struct stat statbuf; 338 int64_t length, allocated_blocks; 339 time_t now = time(NULL); 340 FuseExport *exp = fuse_req_userdata(req); 341 mode_t mode; 342 343 length = blk_getlength(exp->common.blk); 344 if (length < 0) { 345 fuse_reply_err(req, -length); 346 return; 347 } 348 349 allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk)); 350 if (allocated_blocks <= 0) { 351 allocated_blocks = DIV_ROUND_UP(length, 512); 352 } else { 353 allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512); 354 } 355 356 mode = S_IFREG | S_IRUSR; 357 if (exp->writable) { 358 mode |= S_IWUSR; 359 } 360 361 statbuf = (struct stat) { 362 .st_ino = inode, 363 .st_mode = mode, 364 .st_nlink = 1, 365 .st_uid = getuid(), 366 .st_gid = getgid(), 367 .st_size = length, 368 .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment, 369 .st_blocks = allocated_blocks, 370 .st_atime = now, 371 .st_mtime = now, 372 .st_ctime = now, 373 }; 374 375 fuse_reply_attr(req, &statbuf, 1.); 376 } 377 378 static int fuse_do_truncate(const FuseExport *exp, int64_t size, 379 bool req_zero_write, PreallocMode prealloc) 380 { 381 uint64_t blk_perm, blk_shared_perm; 382 BdrvRequestFlags truncate_flags = 0; 383 int ret; 384 385 if (req_zero_write) { 386 truncate_flags |= BDRV_REQ_ZERO_WRITE; 387 } 388 389 /* Growable exports have a permanent RESIZE permission */ 390 if (!exp->growable) { 391 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); 392 393 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, 394 blk_shared_perm, NULL); 395 if (ret < 0) { 396 return ret; 397 } 398 } 399 400 ret = blk_truncate(exp->common.blk, size, true, prealloc, 401 truncate_flags, NULL); 402 403 if (!exp->growable) { 404 /* Must succeed, because we are only giving up the RESIZE permission */ 405 blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort); 406 } 407 408 return ret; 409 } 410 411 /** 412 * Let clients set file attributes. Only resizing is supported. 413 */ 414 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf, 415 int to_set, struct fuse_file_info *fi) 416 { 417 FuseExport *exp = fuse_req_userdata(req); 418 int ret; 419 420 if (!exp->writable) { 421 fuse_reply_err(req, EACCES); 422 return; 423 } 424 425 if (to_set & ~FUSE_SET_ATTR_SIZE) { 426 fuse_reply_err(req, ENOTSUP); 427 return; 428 } 429 430 ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF); 431 if (ret < 0) { 432 fuse_reply_err(req, -ret); 433 return; 434 } 435 436 fuse_getattr(req, inode, fi); 437 } 438 439 /** 440 * Let clients open a file (i.e., the exported image). 441 */ 442 static void fuse_open(fuse_req_t req, fuse_ino_t inode, 443 struct fuse_file_info *fi) 444 { 445 fuse_reply_open(req, fi); 446 } 447 448 /** 449 * Handle client reads from the exported image. 450 */ 451 static void fuse_read(fuse_req_t req, fuse_ino_t inode, 452 size_t size, off_t offset, struct fuse_file_info *fi) 453 { 454 FuseExport *exp = fuse_req_userdata(req); 455 int64_t length; 456 void *buf; 457 int ret; 458 459 /* Limited by max_read, should not happen */ 460 if (size > FUSE_MAX_BOUNCE_BYTES) { 461 fuse_reply_err(req, EINVAL); 462 return; 463 } 464 465 /** 466 * Clients will expect short reads at EOF, so we have to limit 467 * offset+size to the image length. 468 */ 469 length = blk_getlength(exp->common.blk); 470 if (length < 0) { 471 fuse_reply_err(req, -length); 472 return; 473 } 474 475 if (offset + size > length) { 476 size = length - offset; 477 } 478 479 buf = qemu_try_blockalign(blk_bs(exp->common.blk), size); 480 if (!buf) { 481 fuse_reply_err(req, ENOMEM); 482 return; 483 } 484 485 ret = blk_pread(exp->common.blk, offset, buf, size); 486 if (ret >= 0) { 487 fuse_reply_buf(req, buf, size); 488 } else { 489 fuse_reply_err(req, -ret); 490 } 491 492 qemu_vfree(buf); 493 } 494 495 /** 496 * Handle client writes to the exported image. 497 */ 498 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf, 499 size_t size, off_t offset, struct fuse_file_info *fi) 500 { 501 FuseExport *exp = fuse_req_userdata(req); 502 int64_t length; 503 int ret; 504 505 /* Limited by max_write, should not happen */ 506 if (size > BDRV_REQUEST_MAX_BYTES) { 507 fuse_reply_err(req, EINVAL); 508 return; 509 } 510 511 if (!exp->writable) { 512 fuse_reply_err(req, EACCES); 513 return; 514 } 515 516 /** 517 * Clients will expect short writes at EOF, so we have to limit 518 * offset+size to the image length. 519 */ 520 length = blk_getlength(exp->common.blk); 521 if (length < 0) { 522 fuse_reply_err(req, -length); 523 return; 524 } 525 526 if (offset + size > length) { 527 if (exp->growable) { 528 ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF); 529 if (ret < 0) { 530 fuse_reply_err(req, -ret); 531 return; 532 } 533 } else { 534 size = length - offset; 535 } 536 } 537 538 ret = blk_pwrite(exp->common.blk, offset, buf, size, 0); 539 if (ret >= 0) { 540 fuse_reply_write(req, size); 541 } else { 542 fuse_reply_err(req, -ret); 543 } 544 } 545 546 /** 547 * Let clients perform various fallocate() operations. 548 */ 549 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, 550 off_t offset, off_t length, 551 struct fuse_file_info *fi) 552 { 553 FuseExport *exp = fuse_req_userdata(req); 554 int64_t blk_len; 555 int ret; 556 557 if (!exp->writable) { 558 fuse_reply_err(req, EACCES); 559 return; 560 } 561 562 blk_len = blk_getlength(exp->common.blk); 563 if (blk_len < 0) { 564 fuse_reply_err(req, -blk_len); 565 return; 566 } 567 568 if (mode & FALLOC_FL_KEEP_SIZE) { 569 length = MIN(length, blk_len - offset); 570 } 571 572 if (mode & FALLOC_FL_PUNCH_HOLE) { 573 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 574 fuse_reply_err(req, EINVAL); 575 return; 576 } 577 578 do { 579 int size = MIN(length, BDRV_REQUEST_MAX_BYTES); 580 581 ret = blk_pdiscard(exp->common.blk, offset, size); 582 offset += size; 583 length -= size; 584 } while (ret == 0 && length > 0); 585 } else if (mode & FALLOC_FL_ZERO_RANGE) { 586 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) { 587 /* No need for zeroes, we are going to write them ourselves */ 588 ret = fuse_do_truncate(exp, offset + length, false, 589 PREALLOC_MODE_OFF); 590 if (ret < 0) { 591 fuse_reply_err(req, -ret); 592 return; 593 } 594 } 595 596 do { 597 int size = MIN(length, BDRV_REQUEST_MAX_BYTES); 598 599 ret = blk_pwrite_zeroes(exp->common.blk, 600 offset, size, 0); 601 offset += size; 602 length -= size; 603 } while (ret == 0 && length > 0); 604 } else if (!mode) { 605 /* We can only fallocate at the EOF with a truncate */ 606 if (offset < blk_len) { 607 fuse_reply_err(req, EOPNOTSUPP); 608 return; 609 } 610 611 if (offset > blk_len) { 612 /* No preallocation needed here */ 613 ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); 614 if (ret < 0) { 615 fuse_reply_err(req, -ret); 616 return; 617 } 618 } 619 620 ret = fuse_do_truncate(exp, offset + length, true, 621 PREALLOC_MODE_FALLOC); 622 } else { 623 ret = -EOPNOTSUPP; 624 } 625 626 fuse_reply_err(req, ret < 0 ? -ret : 0); 627 } 628 629 /** 630 * Let clients fsync the exported image. 631 */ 632 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync, 633 struct fuse_file_info *fi) 634 { 635 FuseExport *exp = fuse_req_userdata(req); 636 int ret; 637 638 ret = blk_flush(exp->common.blk); 639 fuse_reply_err(req, ret < 0 ? -ret : 0); 640 } 641 642 /** 643 * Called before an FD to the exported image is closed. (libfuse 644 * notes this to be a way to return last-minute errors.) 645 */ 646 static void fuse_flush(fuse_req_t req, fuse_ino_t inode, 647 struct fuse_file_info *fi) 648 { 649 fuse_fsync(req, inode, 1, fi); 650 } 651 652 #ifdef CONFIG_FUSE_LSEEK 653 /** 654 * Let clients inquire allocation status. 655 */ 656 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset, 657 int whence, struct fuse_file_info *fi) 658 { 659 FuseExport *exp = fuse_req_userdata(req); 660 661 if (whence != SEEK_HOLE && whence != SEEK_DATA) { 662 fuse_reply_err(req, EINVAL); 663 return; 664 } 665 666 while (true) { 667 int64_t pnum; 668 int ret; 669 670 ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL, 671 offset, INT64_MAX, &pnum, NULL, NULL); 672 if (ret < 0) { 673 fuse_reply_err(req, -ret); 674 return; 675 } 676 677 if (!pnum && (ret & BDRV_BLOCK_EOF)) { 678 int64_t blk_len; 679 680 /* 681 * If blk_getlength() rounds (e.g. by sectors), then the 682 * export length will be rounded, too. However, 683 * bdrv_block_status_above() may return EOF at unaligned 684 * offsets. We must not let this become visible and thus 685 * always simulate a hole between @offset (the real EOF) 686 * and @blk_len (the client-visible EOF). 687 */ 688 689 blk_len = blk_getlength(exp->common.blk); 690 if (blk_len < 0) { 691 fuse_reply_err(req, -blk_len); 692 return; 693 } 694 695 if (offset > blk_len || whence == SEEK_DATA) { 696 fuse_reply_err(req, ENXIO); 697 } else { 698 fuse_reply_lseek(req, offset); 699 } 700 return; 701 } 702 703 if (ret & BDRV_BLOCK_DATA) { 704 if (whence == SEEK_DATA) { 705 fuse_reply_lseek(req, offset); 706 return; 707 } 708 } else { 709 if (whence == SEEK_HOLE) { 710 fuse_reply_lseek(req, offset); 711 return; 712 } 713 } 714 715 /* Safety check against infinite loops */ 716 if (!pnum) { 717 fuse_reply_err(req, ENXIO); 718 return; 719 } 720 721 offset += pnum; 722 } 723 } 724 #endif 725 726 static const struct fuse_lowlevel_ops fuse_ops = { 727 .init = fuse_init, 728 .lookup = fuse_lookup, 729 .getattr = fuse_getattr, 730 .setattr = fuse_setattr, 731 .open = fuse_open, 732 .read = fuse_read, 733 .write = fuse_write, 734 .fallocate = fuse_fallocate, 735 .flush = fuse_flush, 736 .fsync = fuse_fsync, 737 #ifdef CONFIG_FUSE_LSEEK 738 .lseek = fuse_lseek, 739 #endif 740 }; 741 742 const BlockExportDriver blk_exp_fuse = { 743 .type = BLOCK_EXPORT_TYPE_FUSE, 744 .instance_size = sizeof(FuseExport), 745 .create = fuse_export_create, 746 .delete = fuse_export_delete, 747 .request_shutdown = fuse_export_shutdown, 748 }; 749