1 /* 2 * Present a block device as a raw image through FUSE 3 * 4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 or later of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #define FUSE_USE_VERSION 31 20 21 #include "qemu/osdep.h" 22 #include "block/aio.h" 23 #include "block/block.h" 24 #include "block/export.h" 25 #include "block/fuse.h" 26 #include "block/qapi.h" 27 #include "qapi/error.h" 28 #include "qapi/qapi-commands-block.h" 29 #include "sysemu/block-backend.h" 30 31 #include <fuse.h> 32 #include <fuse_lowlevel.h> 33 34 35 /* Prevent overly long bounce buffer allocations */ 36 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024)) 37 38 39 typedef struct FuseExport { 40 BlockExport common; 41 42 struct fuse_session *fuse_session; 43 struct fuse_buf fuse_buf; 44 bool mounted, fd_handler_set_up; 45 46 char *mountpoint; 47 bool writable; 48 bool growable; 49 } FuseExport; 50 51 static GHashTable *exports; 52 static const struct fuse_lowlevel_ops fuse_ops; 53 54 static void fuse_export_shutdown(BlockExport *exp); 55 static void fuse_export_delete(BlockExport *exp); 56 57 static void init_exports_table(void); 58 59 static int setup_fuse_export(FuseExport *exp, const char *mountpoint, 60 Error **errp); 61 static void read_from_fuse_export(void *opaque); 62 63 static bool is_regular_file(const char *path, Error **errp); 64 65 66 static int fuse_export_create(BlockExport *blk_exp, 67 BlockExportOptions *blk_exp_args, 68 Error **errp) 69 { 70 FuseExport *exp = container_of(blk_exp, FuseExport, common); 71 BlockExportOptionsFuse *args = &blk_exp_args->u.fuse; 72 int ret; 73 74 assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE); 75 76 /* For growable exports, take the RESIZE permission */ 77 if (args->growable) { 78 uint64_t blk_perm, blk_shared_perm; 79 80 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); 81 82 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, 83 blk_shared_perm, errp); 84 if (ret < 0) { 85 return ret; 86 } 87 } 88 89 init_exports_table(); 90 91 /* 92 * It is important to do this check before calling is_regular_file() -- 93 * that function will do a stat(), which we would have to handle if we 94 * already exported something on @mountpoint. But we cannot, because 95 * we are currently caught up here. 96 * (Note that ideally we would want to resolve relative paths here, 97 * but bdrv_make_absolute_filename() might do the wrong thing for 98 * paths that contain colons, and realpath() would resolve symlinks, 99 * which we do not want: The mount point is not going to be the 100 * symlink's destination, but the link itself.) 101 * So this will not catch all potential clashes, but hopefully at 102 * least the most common one of specifying exactly the same path 103 * string twice. 104 */ 105 if (g_hash_table_contains(exports, args->mountpoint)) { 106 error_setg(errp, "There already is a FUSE export on '%s'", 107 args->mountpoint); 108 ret = -EEXIST; 109 goto fail; 110 } 111 112 if (!is_regular_file(args->mountpoint, errp)) { 113 ret = -EINVAL; 114 goto fail; 115 } 116 117 exp->mountpoint = g_strdup(args->mountpoint); 118 exp->writable = blk_exp_args->writable; 119 exp->growable = args->growable; 120 121 ret = setup_fuse_export(exp, args->mountpoint, errp); 122 if (ret < 0) { 123 goto fail; 124 } 125 126 return 0; 127 128 fail: 129 fuse_export_delete(blk_exp); 130 return ret; 131 } 132 133 /** 134 * Allocates the global @exports hash table. 135 */ 136 static void init_exports_table(void) 137 { 138 if (exports) { 139 return; 140 } 141 142 exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); 143 } 144 145 /** 146 * Create exp->fuse_session and mount it. 147 */ 148 static int setup_fuse_export(FuseExport *exp, const char *mountpoint, 149 Error **errp) 150 { 151 const char *fuse_argv[4]; 152 char *mount_opts; 153 struct fuse_args fuse_args; 154 int ret; 155 156 /* Needs to match what fuse_init() sets. Only max_read must be supplied. */ 157 mount_opts = g_strdup_printf("max_read=%zu", FUSE_MAX_BOUNCE_BYTES); 158 159 fuse_argv[0] = ""; /* Dummy program name */ 160 fuse_argv[1] = "-o"; 161 fuse_argv[2] = mount_opts; 162 fuse_argv[3] = NULL; 163 fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv); 164 165 exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops, 166 sizeof(fuse_ops), exp); 167 g_free(mount_opts); 168 if (!exp->fuse_session) { 169 error_setg(errp, "Failed to set up FUSE session"); 170 ret = -EIO; 171 goto fail; 172 } 173 174 ret = fuse_session_mount(exp->fuse_session, mountpoint); 175 if (ret < 0) { 176 error_setg(errp, "Failed to mount FUSE session to export"); 177 ret = -EIO; 178 goto fail; 179 } 180 exp->mounted = true; 181 182 g_hash_table_insert(exports, g_strdup(mountpoint), NULL); 183 184 aio_set_fd_handler(exp->common.ctx, 185 fuse_session_fd(exp->fuse_session), true, 186 read_from_fuse_export, NULL, NULL, exp); 187 exp->fd_handler_set_up = true; 188 189 return 0; 190 191 fail: 192 fuse_export_shutdown(&exp->common); 193 return ret; 194 } 195 196 /** 197 * Callback to be invoked when the FUSE session FD can be read from. 198 * (This is basically the FUSE event loop.) 199 */ 200 static void read_from_fuse_export(void *opaque) 201 { 202 FuseExport *exp = opaque; 203 int ret; 204 205 blk_exp_ref(&exp->common); 206 207 do { 208 ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf); 209 } while (ret == -EINTR); 210 if (ret < 0) { 211 goto out; 212 } 213 214 fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf); 215 216 out: 217 blk_exp_unref(&exp->common); 218 } 219 220 static void fuse_export_shutdown(BlockExport *blk_exp) 221 { 222 FuseExport *exp = container_of(blk_exp, FuseExport, common); 223 224 if (exp->fuse_session) { 225 fuse_session_exit(exp->fuse_session); 226 227 if (exp->fd_handler_set_up) { 228 aio_set_fd_handler(exp->common.ctx, 229 fuse_session_fd(exp->fuse_session), true, 230 NULL, NULL, NULL, NULL); 231 exp->fd_handler_set_up = false; 232 } 233 } 234 235 if (exp->mountpoint) { 236 /* 237 * Safe to drop now, because we will not handle any requests 238 * for this export anymore anyway. 239 */ 240 g_hash_table_remove(exports, exp->mountpoint); 241 } 242 } 243 244 static void fuse_export_delete(BlockExport *blk_exp) 245 { 246 FuseExport *exp = container_of(blk_exp, FuseExport, common); 247 248 if (exp->fuse_session) { 249 if (exp->mounted) { 250 fuse_session_unmount(exp->fuse_session); 251 } 252 253 fuse_session_destroy(exp->fuse_session); 254 } 255 256 free(exp->fuse_buf.mem); 257 g_free(exp->mountpoint); 258 } 259 260 /** 261 * Check whether @path points to a regular file. If not, put an 262 * appropriate message into *errp. 263 */ 264 static bool is_regular_file(const char *path, Error **errp) 265 { 266 struct stat statbuf; 267 int ret; 268 269 ret = stat(path, &statbuf); 270 if (ret < 0) { 271 error_setg_errno(errp, errno, "Failed to stat '%s'", path); 272 return false; 273 } 274 275 if (!S_ISREG(statbuf.st_mode)) { 276 error_setg(errp, "'%s' is not a regular file", path); 277 return false; 278 } 279 280 return true; 281 } 282 283 /** 284 * A chance to set change some parameters supplied to FUSE_INIT. 285 */ 286 static void fuse_init(void *userdata, struct fuse_conn_info *conn) 287 { 288 /* 289 * MIN_NON_ZERO() would not be wrong here, but what we set here 290 * must equal what has been passed to fuse_session_new(). 291 * Therefore, as long as max_read must be passed as a mount option 292 * (which libfuse claims will be changed at some point), we have 293 * to set max_read to a fixed value here. 294 */ 295 conn->max_read = FUSE_MAX_BOUNCE_BYTES; 296 297 conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write); 298 } 299 300 /** 301 * Let clients look up files. Always return ENOENT because we only 302 * care about the mountpoint itself. 303 */ 304 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) 305 { 306 fuse_reply_err(req, ENOENT); 307 } 308 309 /** 310 * Let clients get file attributes (i.e., stat() the file). 311 */ 312 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode, 313 struct fuse_file_info *fi) 314 { 315 struct stat statbuf; 316 int64_t length, allocated_blocks; 317 time_t now = time(NULL); 318 FuseExport *exp = fuse_req_userdata(req); 319 mode_t mode; 320 321 length = blk_getlength(exp->common.blk); 322 if (length < 0) { 323 fuse_reply_err(req, -length); 324 return; 325 } 326 327 allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk)); 328 if (allocated_blocks <= 0) { 329 allocated_blocks = DIV_ROUND_UP(length, 512); 330 } else { 331 allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512); 332 } 333 334 mode = S_IFREG | S_IRUSR; 335 if (exp->writable) { 336 mode |= S_IWUSR; 337 } 338 339 statbuf = (struct stat) { 340 .st_ino = inode, 341 .st_mode = mode, 342 .st_nlink = 1, 343 .st_uid = getuid(), 344 .st_gid = getgid(), 345 .st_size = length, 346 .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment, 347 .st_blocks = allocated_blocks, 348 .st_atime = now, 349 .st_mtime = now, 350 .st_ctime = now, 351 }; 352 353 fuse_reply_attr(req, &statbuf, 1.); 354 } 355 356 static int fuse_do_truncate(const FuseExport *exp, int64_t size, 357 bool req_zero_write, PreallocMode prealloc) 358 { 359 uint64_t blk_perm, blk_shared_perm; 360 BdrvRequestFlags truncate_flags = 0; 361 int ret; 362 363 if (req_zero_write) { 364 truncate_flags |= BDRV_REQ_ZERO_WRITE; 365 } 366 367 /* Growable exports have a permanent RESIZE permission */ 368 if (!exp->growable) { 369 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); 370 371 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, 372 blk_shared_perm, NULL); 373 if (ret < 0) { 374 return ret; 375 } 376 } 377 378 ret = blk_truncate(exp->common.blk, size, true, prealloc, 379 truncate_flags, NULL); 380 381 if (!exp->growable) { 382 /* Must succeed, because we are only giving up the RESIZE permission */ 383 blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort); 384 } 385 386 return ret; 387 } 388 389 /** 390 * Let clients set file attributes. Only resizing is supported. 391 */ 392 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf, 393 int to_set, struct fuse_file_info *fi) 394 { 395 FuseExport *exp = fuse_req_userdata(req); 396 int ret; 397 398 if (!exp->writable) { 399 fuse_reply_err(req, EACCES); 400 return; 401 } 402 403 if (to_set & ~FUSE_SET_ATTR_SIZE) { 404 fuse_reply_err(req, ENOTSUP); 405 return; 406 } 407 408 ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF); 409 if (ret < 0) { 410 fuse_reply_err(req, -ret); 411 return; 412 } 413 414 fuse_getattr(req, inode, fi); 415 } 416 417 /** 418 * Let clients open a file (i.e., the exported image). 419 */ 420 static void fuse_open(fuse_req_t req, fuse_ino_t inode, 421 struct fuse_file_info *fi) 422 { 423 fuse_reply_open(req, fi); 424 } 425 426 /** 427 * Handle client reads from the exported image. 428 */ 429 static void fuse_read(fuse_req_t req, fuse_ino_t inode, 430 size_t size, off_t offset, struct fuse_file_info *fi) 431 { 432 FuseExport *exp = fuse_req_userdata(req); 433 int64_t length; 434 void *buf; 435 int ret; 436 437 /* Limited by max_read, should not happen */ 438 if (size > FUSE_MAX_BOUNCE_BYTES) { 439 fuse_reply_err(req, EINVAL); 440 return; 441 } 442 443 /** 444 * Clients will expect short reads at EOF, so we have to limit 445 * offset+size to the image length. 446 */ 447 length = blk_getlength(exp->common.blk); 448 if (length < 0) { 449 fuse_reply_err(req, -length); 450 return; 451 } 452 453 if (offset + size > length) { 454 size = length - offset; 455 } 456 457 buf = qemu_try_blockalign(blk_bs(exp->common.blk), size); 458 if (!buf) { 459 fuse_reply_err(req, ENOMEM); 460 return; 461 } 462 463 ret = blk_pread(exp->common.blk, offset, buf, size); 464 if (ret >= 0) { 465 fuse_reply_buf(req, buf, size); 466 } else { 467 fuse_reply_err(req, -ret); 468 } 469 470 qemu_vfree(buf); 471 } 472 473 /** 474 * Handle client writes to the exported image. 475 */ 476 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf, 477 size_t size, off_t offset, struct fuse_file_info *fi) 478 { 479 FuseExport *exp = fuse_req_userdata(req); 480 int64_t length; 481 int ret; 482 483 /* Limited by max_write, should not happen */ 484 if (size > BDRV_REQUEST_MAX_BYTES) { 485 fuse_reply_err(req, EINVAL); 486 return; 487 } 488 489 if (!exp->writable) { 490 fuse_reply_err(req, EACCES); 491 return; 492 } 493 494 /** 495 * Clients will expect short writes at EOF, so we have to limit 496 * offset+size to the image length. 497 */ 498 length = blk_getlength(exp->common.blk); 499 if (length < 0) { 500 fuse_reply_err(req, -length); 501 return; 502 } 503 504 if (offset + size > length) { 505 if (exp->growable) { 506 ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF); 507 if (ret < 0) { 508 fuse_reply_err(req, -ret); 509 return; 510 } 511 } else { 512 size = length - offset; 513 } 514 } 515 516 ret = blk_pwrite(exp->common.blk, offset, buf, size, 0); 517 if (ret >= 0) { 518 fuse_reply_write(req, size); 519 } else { 520 fuse_reply_err(req, -ret); 521 } 522 } 523 524 /** 525 * Let clients perform various fallocate() operations. 526 */ 527 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, 528 off_t offset, off_t length, 529 struct fuse_file_info *fi) 530 { 531 FuseExport *exp = fuse_req_userdata(req); 532 int64_t blk_len; 533 int ret; 534 535 if (!exp->writable) { 536 fuse_reply_err(req, EACCES); 537 return; 538 } 539 540 blk_len = blk_getlength(exp->common.blk); 541 if (blk_len < 0) { 542 fuse_reply_err(req, -blk_len); 543 return; 544 } 545 546 if (mode & FALLOC_FL_KEEP_SIZE) { 547 length = MIN(length, blk_len - offset); 548 } 549 550 if (mode & FALLOC_FL_PUNCH_HOLE) { 551 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 552 fuse_reply_err(req, EINVAL); 553 return; 554 } 555 556 do { 557 int size = MIN(length, BDRV_REQUEST_MAX_BYTES); 558 559 ret = blk_pdiscard(exp->common.blk, offset, size); 560 offset += size; 561 length -= size; 562 } while (ret == 0 && length > 0); 563 } else if (mode & FALLOC_FL_ZERO_RANGE) { 564 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) { 565 /* No need for zeroes, we are going to write them ourselves */ 566 ret = fuse_do_truncate(exp, offset + length, false, 567 PREALLOC_MODE_OFF); 568 if (ret < 0) { 569 fuse_reply_err(req, -ret); 570 return; 571 } 572 } 573 574 do { 575 int size = MIN(length, BDRV_REQUEST_MAX_BYTES); 576 577 ret = blk_pwrite_zeroes(exp->common.blk, 578 offset, size, 0); 579 offset += size; 580 length -= size; 581 } while (ret == 0 && length > 0); 582 } else if (!mode) { 583 /* We can only fallocate at the EOF with a truncate */ 584 if (offset < blk_len) { 585 fuse_reply_err(req, EOPNOTSUPP); 586 return; 587 } 588 589 if (offset > blk_len) { 590 /* No preallocation needed here */ 591 ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); 592 if (ret < 0) { 593 fuse_reply_err(req, -ret); 594 return; 595 } 596 } 597 598 ret = fuse_do_truncate(exp, offset + length, true, 599 PREALLOC_MODE_FALLOC); 600 } else { 601 ret = -EOPNOTSUPP; 602 } 603 604 fuse_reply_err(req, ret < 0 ? -ret : 0); 605 } 606 607 /** 608 * Let clients fsync the exported image. 609 */ 610 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync, 611 struct fuse_file_info *fi) 612 { 613 FuseExport *exp = fuse_req_userdata(req); 614 int ret; 615 616 ret = blk_flush(exp->common.blk); 617 fuse_reply_err(req, ret < 0 ? -ret : 0); 618 } 619 620 /** 621 * Called before an FD to the exported image is closed. (libfuse 622 * notes this to be a way to return last-minute errors.) 623 */ 624 static void fuse_flush(fuse_req_t req, fuse_ino_t inode, 625 struct fuse_file_info *fi) 626 { 627 fuse_fsync(req, inode, 1, fi); 628 } 629 630 #ifdef CONFIG_FUSE_LSEEK 631 /** 632 * Let clients inquire allocation status. 633 */ 634 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset, 635 int whence, struct fuse_file_info *fi) 636 { 637 FuseExport *exp = fuse_req_userdata(req); 638 639 if (whence != SEEK_HOLE && whence != SEEK_DATA) { 640 fuse_reply_err(req, EINVAL); 641 return; 642 } 643 644 while (true) { 645 int64_t pnum; 646 int ret; 647 648 ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL, 649 offset, INT64_MAX, &pnum, NULL, NULL); 650 if (ret < 0) { 651 fuse_reply_err(req, -ret); 652 return; 653 } 654 655 if (!pnum && (ret & BDRV_BLOCK_EOF)) { 656 int64_t blk_len; 657 658 /* 659 * If blk_getlength() rounds (e.g. by sectors), then the 660 * export length will be rounded, too. However, 661 * bdrv_block_status_above() may return EOF at unaligned 662 * offsets. We must not let this become visible and thus 663 * always simulate a hole between @offset (the real EOF) 664 * and @blk_len (the client-visible EOF). 665 */ 666 667 blk_len = blk_getlength(exp->common.blk); 668 if (blk_len < 0) { 669 fuse_reply_err(req, -blk_len); 670 return; 671 } 672 673 if (offset > blk_len || whence == SEEK_DATA) { 674 fuse_reply_err(req, ENXIO); 675 } else { 676 fuse_reply_lseek(req, offset); 677 } 678 return; 679 } 680 681 if (ret & BDRV_BLOCK_DATA) { 682 if (whence == SEEK_DATA) { 683 fuse_reply_lseek(req, offset); 684 return; 685 } 686 } else { 687 if (whence == SEEK_HOLE) { 688 fuse_reply_lseek(req, offset); 689 return; 690 } 691 } 692 693 /* Safety check against infinite loops */ 694 if (!pnum) { 695 fuse_reply_err(req, ENXIO); 696 return; 697 } 698 699 offset += pnum; 700 } 701 } 702 #endif 703 704 static const struct fuse_lowlevel_ops fuse_ops = { 705 .init = fuse_init, 706 .lookup = fuse_lookup, 707 .getattr = fuse_getattr, 708 .setattr = fuse_setattr, 709 .open = fuse_open, 710 .read = fuse_read, 711 .write = fuse_write, 712 .fallocate = fuse_fallocate, 713 .flush = fuse_flush, 714 .fsync = fuse_fsync, 715 #ifdef CONFIG_FUSE_LSEEK 716 .lseek = fuse_lseek, 717 #endif 718 }; 719 720 const BlockExportDriver blk_exp_fuse = { 721 .type = BLOCK_EXPORT_TYPE_FUSE, 722 .instance_size = sizeof(FuseExport), 723 .create = fuse_export_create, 724 .delete = fuse_export_delete, 725 .request_shutdown = fuse_export_shutdown, 726 }; 727