1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu-common.h" 20 #include "block/block.h" 21 #include "block/nbd.h" 22 #include "qemu/main-loop.h" 23 #include "block/snapshot.h" 24 25 #include <stdarg.h> 26 #include <stdio.h> 27 #include <getopt.h> 28 #include <err.h> 29 #include <sys/types.h> 30 #include <sys/socket.h> 31 #include <netinet/in.h> 32 #include <netinet/tcp.h> 33 #include <arpa/inet.h> 34 #include <signal.h> 35 #include <libgen.h> 36 #include <pthread.h> 37 38 #define SOCKET_PATH "/var/lock/qemu-nbd-%s" 39 #define QEMU_NBD_OPT_CACHE 1 40 #define QEMU_NBD_OPT_AIO 2 41 #define QEMU_NBD_OPT_DISCARD 3 42 43 static NBDExport *exp; 44 static int verbose; 45 static char *srcpath; 46 static char *sockpath; 47 static int persistent = 0; 48 static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state; 49 static int shared = 1; 50 static int nb_fds; 51 52 static void usage(const char *name) 53 { 54 (printf) ( 55 "Usage: %s [OPTIONS] FILE\n" 56 "QEMU Disk Network Block Device Server\n" 57 "\n" 58 " -h, --help display this help and exit\n" 59 " -V, --version output version information and exit\n" 60 "\n" 61 "Connection properties:\n" 62 " -p, --port=PORT port to listen on (default `%d')\n" 63 " -b, --bind=IFACE interface to bind to (default `0.0.0.0')\n" 64 " -k, --socket=PATH path to the unix socket\n" 65 " (default '"SOCKET_PATH"')\n" 66 " -e, --shared=NUM device can be shared by NUM clients (default '1')\n" 67 " -t, --persistent don't exit on the last connection\n" 68 " -v, --verbose display extra debugging information\n" 69 "\n" 70 "Exposing part of the image:\n" 71 " -o, --offset=OFFSET offset into the image\n" 72 " -P, --partition=NUM only expose partition NUM\n" 73 "\n" 74 #ifdef __linux__ 75 "Kernel NBD client support:\n" 76 " -c, --connect=DEV connect FILE to the local NBD device DEV\n" 77 " -d, --disconnect disconnect the specified device\n" 78 "\n" 79 #endif 80 "\n" 81 "Block device options:\n" 82 " -f, --format=FORMAT set image format (raw, qcow2, ...)\n" 83 " -r, --read-only export read-only\n" 84 " -s, --snapshot use FILE as an external snapshot, create a temporary\n" 85 " file with backing_file=FILE, redirect the write to\n" 86 " the temporary one\n" 87 " -l, --load-snapshot=SNAPSHOT_PARAM\n" 88 " load an internal snapshot inside FILE and export it\n" 89 " as an read-only device, SNAPSHOT_PARAM format is\n" 90 " 'snapshot.id=[ID],snapshot.name=[NAME]', or\n" 91 " '[ID_OR_NAME]'\n" 92 " -n, --nocache disable host cache\n" 93 " --cache=MODE set cache mode (none, writeback, ...)\n" 94 #ifdef CONFIG_LINUX_AIO 95 " --aio=MODE set AIO mode (native or threads)\n" 96 #endif 97 "\n" 98 "Report bugs to <qemu-devel@nongnu.org>\n" 99 , name, NBD_DEFAULT_PORT, "DEVICE"); 100 } 101 102 static void version(const char *name) 103 { 104 printf( 105 "%s version 0.0.1\n" 106 "Written by Anthony Liguori.\n" 107 "\n" 108 "Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>.\n" 109 "This is free software; see the source for copying conditions. There is NO\n" 110 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" 111 , name); 112 } 113 114 struct partition_record 115 { 116 uint8_t bootable; 117 uint8_t start_head; 118 uint32_t start_cylinder; 119 uint8_t start_sector; 120 uint8_t system; 121 uint8_t end_head; 122 uint8_t end_cylinder; 123 uint8_t end_sector; 124 uint32_t start_sector_abs; 125 uint32_t nb_sectors_abs; 126 }; 127 128 static void read_partition(uint8_t *p, struct partition_record *r) 129 { 130 r->bootable = p[0]; 131 r->start_head = p[1]; 132 r->start_cylinder = p[3] | ((p[2] << 2) & 0x0300); 133 r->start_sector = p[2] & 0x3f; 134 r->system = p[4]; 135 r->end_head = p[5]; 136 r->end_cylinder = p[7] | ((p[6] << 2) & 0x300); 137 r->end_sector = p[6] & 0x3f; 138 r->start_sector_abs = p[8] | p[9] << 8 | p[10] << 16 | p[11] << 24; 139 r->nb_sectors_abs = p[12] | p[13] << 8 | p[14] << 16 | p[15] << 24; 140 } 141 142 static int find_partition(BlockDriverState *bs, int partition, 143 off_t *offset, off_t *size) 144 { 145 struct partition_record mbr[4]; 146 uint8_t data[512]; 147 int i; 148 int ext_partnum = 4; 149 int ret; 150 151 if ((ret = bdrv_read(bs, 0, data, 1)) < 0) { 152 errno = -ret; 153 err(EXIT_FAILURE, "error while reading"); 154 } 155 156 if (data[510] != 0x55 || data[511] != 0xaa) { 157 return -EINVAL; 158 } 159 160 for (i = 0; i < 4; i++) { 161 read_partition(&data[446 + 16 * i], &mbr[i]); 162 163 if (!mbr[i].nb_sectors_abs) 164 continue; 165 166 if (mbr[i].system == 0xF || mbr[i].system == 0x5) { 167 struct partition_record ext[4]; 168 uint8_t data1[512]; 169 int j; 170 171 if ((ret = bdrv_read(bs, mbr[i].start_sector_abs, data1, 1)) < 0) { 172 errno = -ret; 173 err(EXIT_FAILURE, "error while reading"); 174 } 175 176 for (j = 0; j < 4; j++) { 177 read_partition(&data1[446 + 16 * j], &ext[j]); 178 if (!ext[j].nb_sectors_abs) 179 continue; 180 181 if ((ext_partnum + j + 1) == partition) { 182 *offset = (uint64_t)ext[j].start_sector_abs << 9; 183 *size = (uint64_t)ext[j].nb_sectors_abs << 9; 184 return 0; 185 } 186 } 187 ext_partnum += 4; 188 } else if ((i + 1) == partition) { 189 *offset = (uint64_t)mbr[i].start_sector_abs << 9; 190 *size = (uint64_t)mbr[i].nb_sectors_abs << 9; 191 return 0; 192 } 193 } 194 195 return -ENOENT; 196 } 197 198 static void termsig_handler(int signum) 199 { 200 state = TERMINATE; 201 qemu_notify_event(); 202 } 203 204 static void *show_parts(void *arg) 205 { 206 char *device = arg; 207 int nbd; 208 209 /* linux just needs an open() to trigger 210 * the partition table update 211 * but remember to load the module with max_part != 0 : 212 * modprobe nbd max_part=63 213 */ 214 nbd = open(device, O_RDWR); 215 if (nbd >= 0) { 216 close(nbd); 217 } 218 return NULL; 219 } 220 221 static void *nbd_client_thread(void *arg) 222 { 223 char *device = arg; 224 off_t size; 225 size_t blocksize; 226 uint32_t nbdflags; 227 int fd, sock; 228 int ret; 229 pthread_t show_parts_thread; 230 231 sock = unix_socket_outgoing(sockpath); 232 if (sock < 0) { 233 goto out; 234 } 235 236 ret = nbd_receive_negotiate(sock, NULL, &nbdflags, 237 &size, &blocksize); 238 if (ret < 0) { 239 goto out; 240 } 241 242 fd = open(device, O_RDWR); 243 if (fd < 0) { 244 /* Linux-only, we can use %m in printf. */ 245 fprintf(stderr, "Failed to open %s: %m", device); 246 goto out; 247 } 248 249 ret = nbd_init(fd, sock, nbdflags, size, blocksize); 250 if (ret < 0) { 251 goto out; 252 } 253 254 /* update partition table */ 255 pthread_create(&show_parts_thread, NULL, show_parts, device); 256 257 if (verbose) { 258 fprintf(stderr, "NBD device %s is now connected to %s\n", 259 device, srcpath); 260 } else { 261 /* Close stderr so that the qemu-nbd process exits. */ 262 dup2(STDOUT_FILENO, STDERR_FILENO); 263 } 264 265 ret = nbd_client(fd); 266 if (ret) { 267 goto out; 268 } 269 close(fd); 270 kill(getpid(), SIGTERM); 271 return (void *) EXIT_SUCCESS; 272 273 out: 274 kill(getpid(), SIGTERM); 275 return (void *) EXIT_FAILURE; 276 } 277 278 static int nbd_can_accept(void *opaque) 279 { 280 return nb_fds < shared; 281 } 282 283 static void nbd_export_closed(NBDExport *exp) 284 { 285 assert(state == TERMINATING); 286 state = TERMINATED; 287 } 288 289 static void nbd_client_closed(NBDClient *client) 290 { 291 nb_fds--; 292 if (nb_fds == 0 && !persistent && state == RUNNING) { 293 state = TERMINATE; 294 } 295 qemu_notify_event(); 296 nbd_client_put(client); 297 } 298 299 static void nbd_accept(void *opaque) 300 { 301 int server_fd = (uintptr_t) opaque; 302 struct sockaddr_in addr; 303 socklen_t addr_len = sizeof(addr); 304 305 int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len); 306 if (state >= TERMINATE) { 307 close(fd); 308 return; 309 } 310 311 if (fd >= 0 && nbd_client_new(exp, fd, nbd_client_closed)) { 312 nb_fds++; 313 } 314 } 315 316 int main(int argc, char **argv) 317 { 318 BlockDriverState *bs; 319 BlockDriver *drv; 320 off_t dev_offset = 0; 321 uint32_t nbdflags = 0; 322 bool disconnect = false; 323 const char *bindto = "0.0.0.0"; 324 char *device = NULL; 325 int port = NBD_DEFAULT_PORT; 326 off_t fd_size; 327 QemuOpts *sn_opts = NULL; 328 const char *sn_id_or_name = NULL; 329 const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:"; 330 struct option lopt[] = { 331 { "help", 0, NULL, 'h' }, 332 { "version", 0, NULL, 'V' }, 333 { "bind", 1, NULL, 'b' }, 334 { "port", 1, NULL, 'p' }, 335 { "socket", 1, NULL, 'k' }, 336 { "offset", 1, NULL, 'o' }, 337 { "read-only", 0, NULL, 'r' }, 338 { "partition", 1, NULL, 'P' }, 339 { "connect", 1, NULL, 'c' }, 340 { "disconnect", 0, NULL, 'd' }, 341 { "snapshot", 0, NULL, 's' }, 342 { "load-snapshot", 1, NULL, 'l' }, 343 { "nocache", 0, NULL, 'n' }, 344 { "cache", 1, NULL, QEMU_NBD_OPT_CACHE }, 345 #ifdef CONFIG_LINUX_AIO 346 { "aio", 1, NULL, QEMU_NBD_OPT_AIO }, 347 #endif 348 { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD }, 349 { "shared", 1, NULL, 'e' }, 350 { "format", 1, NULL, 'f' }, 351 { "persistent", 0, NULL, 't' }, 352 { "verbose", 0, NULL, 'v' }, 353 { NULL, 0, NULL, 0 } 354 }; 355 int ch; 356 int opt_ind = 0; 357 int li; 358 char *end; 359 int flags = BDRV_O_RDWR; 360 int partition = -1; 361 int ret; 362 int fd; 363 bool seen_cache = false; 364 bool seen_discard = false; 365 #ifdef CONFIG_LINUX_AIO 366 bool seen_aio = false; 367 #endif 368 pthread_t client_thread; 369 const char *fmt = NULL; 370 Error *local_err = NULL; 371 372 /* The client thread uses SIGTERM to interrupt the server. A signal 373 * handler ensures that "qemu-nbd -v -c" exits with a nice status code. 374 */ 375 struct sigaction sa_sigterm; 376 memset(&sa_sigterm, 0, sizeof(sa_sigterm)); 377 sa_sigterm.sa_handler = termsig_handler; 378 sigaction(SIGTERM, &sa_sigterm, NULL); 379 380 while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { 381 switch (ch) { 382 case 's': 383 flags |= BDRV_O_SNAPSHOT; 384 break; 385 case 'n': 386 optarg = (char *) "none"; 387 /* fallthrough */ 388 case QEMU_NBD_OPT_CACHE: 389 if (seen_cache) { 390 errx(EXIT_FAILURE, "-n and --cache can only be specified once"); 391 } 392 seen_cache = true; 393 if (bdrv_parse_cache_flags(optarg, &flags) == -1) { 394 errx(EXIT_FAILURE, "Invalid cache mode `%s'", optarg); 395 } 396 break; 397 #ifdef CONFIG_LINUX_AIO 398 case QEMU_NBD_OPT_AIO: 399 if (seen_aio) { 400 errx(EXIT_FAILURE, "--aio can only be specified once"); 401 } 402 seen_aio = true; 403 if (!strcmp(optarg, "native")) { 404 flags |= BDRV_O_NATIVE_AIO; 405 } else if (!strcmp(optarg, "threads")) { 406 /* this is the default */ 407 } else { 408 errx(EXIT_FAILURE, "invalid aio mode `%s'", optarg); 409 } 410 break; 411 #endif 412 case QEMU_NBD_OPT_DISCARD: 413 if (seen_discard) { 414 errx(EXIT_FAILURE, "--discard can only be specified once"); 415 } 416 seen_discard = true; 417 if (bdrv_parse_discard_flags(optarg, &flags) == -1) { 418 errx(EXIT_FAILURE, "Invalid discard mode `%s'", optarg); 419 } 420 break; 421 case 'b': 422 bindto = optarg; 423 break; 424 case 'p': 425 li = strtol(optarg, &end, 0); 426 if (*end) { 427 errx(EXIT_FAILURE, "Invalid port `%s'", optarg); 428 } 429 if (li < 1 || li > 65535) { 430 errx(EXIT_FAILURE, "Port out of range `%s'", optarg); 431 } 432 port = (uint16_t)li; 433 break; 434 case 'o': 435 dev_offset = strtoll (optarg, &end, 0); 436 if (*end) { 437 errx(EXIT_FAILURE, "Invalid offset `%s'", optarg); 438 } 439 if (dev_offset < 0) { 440 errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg); 441 } 442 break; 443 case 'l': 444 if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { 445 sn_opts = qemu_opts_parse(&internal_snapshot_opts, optarg, 0); 446 if (!sn_opts) { 447 errx(EXIT_FAILURE, "Failed in parsing snapshot param `%s'", 448 optarg); 449 } 450 } else { 451 sn_id_or_name = optarg; 452 } 453 /* fall through */ 454 case 'r': 455 nbdflags |= NBD_FLAG_READ_ONLY; 456 flags &= ~BDRV_O_RDWR; 457 break; 458 case 'P': 459 partition = strtol(optarg, &end, 0); 460 if (*end) 461 errx(EXIT_FAILURE, "Invalid partition `%s'", optarg); 462 if (partition < 1 || partition > 8) 463 errx(EXIT_FAILURE, "Invalid partition %d", partition); 464 break; 465 case 'k': 466 sockpath = optarg; 467 if (sockpath[0] != '/') 468 errx(EXIT_FAILURE, "socket path must be absolute\n"); 469 break; 470 case 'd': 471 disconnect = true; 472 break; 473 case 'c': 474 device = optarg; 475 break; 476 case 'e': 477 shared = strtol(optarg, &end, 0); 478 if (*end) { 479 errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg); 480 } 481 if (shared < 1) { 482 errx(EXIT_FAILURE, "Shared device number must be greater than 0\n"); 483 } 484 break; 485 case 'f': 486 fmt = optarg; 487 break; 488 case 't': 489 persistent = 1; 490 break; 491 case 'v': 492 verbose = 1; 493 break; 494 case 'V': 495 version(argv[0]); 496 exit(0); 497 break; 498 case 'h': 499 usage(argv[0]); 500 exit(0); 501 break; 502 case '?': 503 errx(EXIT_FAILURE, "Try `%s --help' for more information.", 504 argv[0]); 505 } 506 } 507 508 if ((argc - optind) != 1) { 509 errx(EXIT_FAILURE, "Invalid number of argument.\n" 510 "Try `%s --help' for more information.", 511 argv[0]); 512 } 513 514 if (disconnect) { 515 fd = open(argv[optind], O_RDWR); 516 if (fd < 0) { 517 err(EXIT_FAILURE, "Cannot open %s", argv[optind]); 518 } 519 nbd_disconnect(fd); 520 521 close(fd); 522 523 printf("%s disconnected\n", argv[optind]); 524 525 return 0; 526 } 527 528 if (device && !verbose) { 529 int stderr_fd[2]; 530 pid_t pid; 531 int ret; 532 533 if (qemu_pipe(stderr_fd) < 0) { 534 err(EXIT_FAILURE, "Error setting up communication pipe"); 535 } 536 537 /* Now daemonize, but keep a communication channel open to 538 * print errors and exit with the proper status code. 539 */ 540 pid = fork(); 541 if (pid == 0) { 542 close(stderr_fd[0]); 543 ret = qemu_daemon(1, 0); 544 545 /* Temporarily redirect stderr to the parent's pipe... */ 546 dup2(stderr_fd[1], STDERR_FILENO); 547 if (ret < 0) { 548 err(EXIT_FAILURE, "Failed to daemonize"); 549 } 550 551 /* ... close the descriptor we inherited and go on. */ 552 close(stderr_fd[1]); 553 } else { 554 bool errors = false; 555 char *buf; 556 557 /* In the parent. Print error messages from the child until 558 * it closes the pipe. 559 */ 560 close(stderr_fd[1]); 561 buf = g_malloc(1024); 562 while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { 563 errors = true; 564 ret = qemu_write_full(STDERR_FILENO, buf, ret); 565 if (ret < 0) { 566 exit(EXIT_FAILURE); 567 } 568 } 569 if (ret < 0) { 570 err(EXIT_FAILURE, "Cannot read from daemon"); 571 } 572 573 /* Usually the daemon should not print any message. 574 * Exit with zero status in that case. 575 */ 576 exit(errors); 577 } 578 } 579 580 if (device != NULL && sockpath == NULL) { 581 sockpath = g_malloc(128); 582 snprintf(sockpath, 128, SOCKET_PATH, basename(device)); 583 } 584 585 qemu_init_main_loop(); 586 bdrv_init(); 587 atexit(bdrv_close_all); 588 589 if (fmt) { 590 drv = bdrv_find_format(fmt); 591 if (!drv) { 592 errx(EXIT_FAILURE, "Unknown file format '%s'", fmt); 593 } 594 } else { 595 drv = NULL; 596 } 597 598 bs = bdrv_new("hda"); 599 srcpath = argv[optind]; 600 ret = bdrv_open(bs, srcpath, NULL, flags, drv, &local_err); 601 if (ret < 0) { 602 errno = -ret; 603 err(EXIT_FAILURE, "Failed to bdrv_open '%s': %s", argv[optind], 604 error_get_pretty(local_err)); 605 } 606 607 if (sn_opts) { 608 ret = bdrv_snapshot_load_tmp(bs, 609 qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), 610 qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), 611 &local_err); 612 } else if (sn_id_or_name) { 613 ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name, 614 &local_err); 615 } 616 if (ret < 0) { 617 errno = -ret; 618 err(EXIT_FAILURE, 619 "Failed to load snapshot: %s", 620 error_get_pretty(local_err)); 621 } 622 623 fd_size = bdrv_getlength(bs); 624 625 if (partition != -1) { 626 ret = find_partition(bs, partition, &dev_offset, &fd_size); 627 if (ret < 0) { 628 errno = -ret; 629 err(EXIT_FAILURE, "Could not find partition %d", partition); 630 } 631 } 632 633 exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed); 634 635 if (sockpath) { 636 fd = unix_socket_incoming(sockpath); 637 } else { 638 fd = tcp_socket_incoming(bindto, port); 639 } 640 641 if (fd < 0) { 642 return 1; 643 } 644 645 if (device) { 646 int ret; 647 648 ret = pthread_create(&client_thread, NULL, nbd_client_thread, device); 649 if (ret != 0) { 650 errx(EXIT_FAILURE, "Failed to create client thread: %s", 651 strerror(ret)); 652 } 653 } else { 654 /* Shut up GCC warnings. */ 655 memset(&client_thread, 0, sizeof(client_thread)); 656 } 657 658 qemu_set_fd_handler2(fd, nbd_can_accept, nbd_accept, NULL, 659 (void *)(uintptr_t)fd); 660 661 /* now when the initialization is (almost) complete, chdir("/") 662 * to free any busy filesystems */ 663 if (chdir("/") < 0) { 664 err(EXIT_FAILURE, "Could not chdir to root directory"); 665 } 666 667 state = RUNNING; 668 do { 669 main_loop_wait(false); 670 if (state == TERMINATE) { 671 state = TERMINATING; 672 nbd_export_close(exp); 673 nbd_export_put(exp); 674 exp = NULL; 675 } 676 } while (state != TERMINATED); 677 678 bdrv_close(bs); 679 if (sockpath) { 680 unlink(sockpath); 681 } 682 683 if (sn_opts) { 684 qemu_opts_del(sn_opts); 685 } 686 687 if (device) { 688 void *ret; 689 pthread_join(client_thread, &ret); 690 exit(ret != NULL); 691 } else { 692 exit(EXIT_SUCCESS); 693 } 694 } 695