1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu-common.h" 20 #include "block/block.h" 21 #include "block/nbd.h" 22 23 #include <stdarg.h> 24 #include <stdio.h> 25 #include <getopt.h> 26 #include <err.h> 27 #include <sys/types.h> 28 #include <sys/socket.h> 29 #include <netinet/in.h> 30 #include <netinet/tcp.h> 31 #include <arpa/inet.h> 32 #include <signal.h> 33 #include <libgen.h> 34 #include <pthread.h> 35 36 #define SOCKET_PATH "/var/lock/qemu-nbd-%s" 37 #define QEMU_NBD_OPT_CACHE 1 38 #define QEMU_NBD_OPT_AIO 2 39 #define QEMU_NBD_OPT_DISCARD 3 40 41 static NBDExport *exp; 42 static int verbose; 43 static char *srcpath; 44 static char *sockpath; 45 static int persistent = 0; 46 static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state; 47 static int shared = 1; 48 static int nb_fds; 49 50 static void usage(const char *name) 51 { 52 (printf) ( 53 "Usage: %s [OPTIONS] FILE\n" 54 "QEMU Disk Network Block Device Server\n" 55 "\n" 56 " -h, --help display this help and exit\n" 57 " -V, --version output version information and exit\n" 58 "\n" 59 "Connection properties:\n" 60 " -p, --port=PORT port to listen on (default `%d')\n" 61 " -b, --bind=IFACE interface to bind to (default `0.0.0.0')\n" 62 " -k, --socket=PATH path to the unix socket\n" 63 " (default '"SOCKET_PATH"')\n" 64 " -e, --shared=NUM device can be shared by NUM clients (default '1')\n" 65 " -t, --persistent don't exit on the last connection\n" 66 " -v, --verbose display extra debugging information\n" 67 "\n" 68 "Exposing part of the image:\n" 69 " -o, --offset=OFFSET offset into the image\n" 70 " -P, --partition=NUM only expose partition NUM\n" 71 "\n" 72 #ifdef __linux__ 73 "Kernel NBD client support:\n" 74 " -c, --connect=DEV connect FILE to the local NBD device DEV\n" 75 " -d, --disconnect disconnect the specified device\n" 76 "\n" 77 #endif 78 "\n" 79 "Block device options:\n" 80 " -r, --read-only export read-only\n" 81 " -s, --snapshot use snapshot file\n" 82 " -n, --nocache disable host cache\n" 83 " --cache=MODE set cache mode (none, writeback, ...)\n" 84 #ifdef CONFIG_LINUX_AIO 85 " --aio=MODE set AIO mode (native or threads)\n" 86 #endif 87 "\n" 88 "Report bugs to <qemu-devel@nongnu.org>\n" 89 , name, NBD_DEFAULT_PORT, "DEVICE"); 90 } 91 92 static void version(const char *name) 93 { 94 printf( 95 "%s version 0.0.1\n" 96 "Written by Anthony Liguori.\n" 97 "\n" 98 "Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>.\n" 99 "This is free software; see the source for copying conditions. There is NO\n" 100 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" 101 , name); 102 } 103 104 struct partition_record 105 { 106 uint8_t bootable; 107 uint8_t start_head; 108 uint32_t start_cylinder; 109 uint8_t start_sector; 110 uint8_t system; 111 uint8_t end_head; 112 uint8_t end_cylinder; 113 uint8_t end_sector; 114 uint32_t start_sector_abs; 115 uint32_t nb_sectors_abs; 116 }; 117 118 static void read_partition(uint8_t *p, struct partition_record *r) 119 { 120 r->bootable = p[0]; 121 r->start_head = p[1]; 122 r->start_cylinder = p[3] | ((p[2] << 2) & 0x0300); 123 r->start_sector = p[2] & 0x3f; 124 r->system = p[4]; 125 r->end_head = p[5]; 126 r->end_cylinder = p[7] | ((p[6] << 2) & 0x300); 127 r->end_sector = p[6] & 0x3f; 128 r->start_sector_abs = p[8] | p[9] << 8 | p[10] << 16 | p[11] << 24; 129 r->nb_sectors_abs = p[12] | p[13] << 8 | p[14] << 16 | p[15] << 24; 130 } 131 132 static int find_partition(BlockDriverState *bs, int partition, 133 off_t *offset, off_t *size) 134 { 135 struct partition_record mbr[4]; 136 uint8_t data[512]; 137 int i; 138 int ext_partnum = 4; 139 int ret; 140 141 if ((ret = bdrv_read(bs, 0, data, 1)) < 0) { 142 errno = -ret; 143 err(EXIT_FAILURE, "error while reading"); 144 } 145 146 if (data[510] != 0x55 || data[511] != 0xaa) { 147 return -EINVAL; 148 } 149 150 for (i = 0; i < 4; i++) { 151 read_partition(&data[446 + 16 * i], &mbr[i]); 152 153 if (!mbr[i].nb_sectors_abs) 154 continue; 155 156 if (mbr[i].system == 0xF || mbr[i].system == 0x5) { 157 struct partition_record ext[4]; 158 uint8_t data1[512]; 159 int j; 160 161 if ((ret = bdrv_read(bs, mbr[i].start_sector_abs, data1, 1)) < 0) { 162 errno = -ret; 163 err(EXIT_FAILURE, "error while reading"); 164 } 165 166 for (j = 0; j < 4; j++) { 167 read_partition(&data1[446 + 16 * j], &ext[j]); 168 if (!ext[j].nb_sectors_abs) 169 continue; 170 171 if ((ext_partnum + j + 1) == partition) { 172 *offset = (uint64_t)ext[j].start_sector_abs << 9; 173 *size = (uint64_t)ext[j].nb_sectors_abs << 9; 174 return 0; 175 } 176 } 177 ext_partnum += 4; 178 } else if ((i + 1) == partition) { 179 *offset = (uint64_t)mbr[i].start_sector_abs << 9; 180 *size = (uint64_t)mbr[i].nb_sectors_abs << 9; 181 return 0; 182 } 183 } 184 185 return -ENOENT; 186 } 187 188 static void termsig_handler(int signum) 189 { 190 state = TERMINATE; 191 qemu_notify_event(); 192 } 193 194 static void *show_parts(void *arg) 195 { 196 char *device = arg; 197 int nbd; 198 199 /* linux just needs an open() to trigger 200 * the partition table update 201 * but remember to load the module with max_part != 0 : 202 * modprobe nbd max_part=63 203 */ 204 nbd = open(device, O_RDWR); 205 if (nbd >= 0) { 206 close(nbd); 207 } 208 return NULL; 209 } 210 211 static void *nbd_client_thread(void *arg) 212 { 213 char *device = arg; 214 off_t size; 215 size_t blocksize; 216 uint32_t nbdflags; 217 int fd, sock; 218 int ret; 219 pthread_t show_parts_thread; 220 221 sock = unix_socket_outgoing(sockpath); 222 if (sock < 0) { 223 goto out; 224 } 225 226 ret = nbd_receive_negotiate(sock, NULL, &nbdflags, 227 &size, &blocksize); 228 if (ret < 0) { 229 goto out; 230 } 231 232 fd = open(device, O_RDWR); 233 if (fd < 0) { 234 /* Linux-only, we can use %m in printf. */ 235 fprintf(stderr, "Failed to open %s: %m", device); 236 goto out; 237 } 238 239 ret = nbd_init(fd, sock, nbdflags, size, blocksize); 240 if (ret < 0) { 241 goto out; 242 } 243 244 /* update partition table */ 245 pthread_create(&show_parts_thread, NULL, show_parts, device); 246 247 if (verbose) { 248 fprintf(stderr, "NBD device %s is now connected to %s\n", 249 device, srcpath); 250 } else { 251 /* Close stderr so that the qemu-nbd process exits. */ 252 dup2(STDOUT_FILENO, STDERR_FILENO); 253 } 254 255 ret = nbd_client(fd); 256 if (ret) { 257 goto out; 258 } 259 close(fd); 260 kill(getpid(), SIGTERM); 261 return (void *) EXIT_SUCCESS; 262 263 out: 264 kill(getpid(), SIGTERM); 265 return (void *) EXIT_FAILURE; 266 } 267 268 static int nbd_can_accept(void *opaque) 269 { 270 return nb_fds < shared; 271 } 272 273 static void nbd_export_closed(NBDExport *exp) 274 { 275 assert(state == TERMINATING); 276 state = TERMINATED; 277 } 278 279 static void nbd_client_closed(NBDClient *client) 280 { 281 nb_fds--; 282 if (nb_fds == 0 && !persistent && state == RUNNING) { 283 state = TERMINATE; 284 } 285 qemu_notify_event(); 286 nbd_client_put(client); 287 } 288 289 static void nbd_accept(void *opaque) 290 { 291 int server_fd = (uintptr_t) opaque; 292 struct sockaddr_in addr; 293 socklen_t addr_len = sizeof(addr); 294 295 int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len); 296 if (state >= TERMINATE) { 297 close(fd); 298 return; 299 } 300 301 if (fd >= 0 && nbd_client_new(exp, fd, nbd_client_closed)) { 302 nb_fds++; 303 } 304 } 305 306 int main(int argc, char **argv) 307 { 308 BlockDriverState *bs; 309 off_t dev_offset = 0; 310 uint32_t nbdflags = 0; 311 bool disconnect = false; 312 const char *bindto = "0.0.0.0"; 313 char *device = NULL; 314 int port = NBD_DEFAULT_PORT; 315 off_t fd_size; 316 const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t"; 317 struct option lopt[] = { 318 { "help", 0, NULL, 'h' }, 319 { "version", 0, NULL, 'V' }, 320 { "bind", 1, NULL, 'b' }, 321 { "port", 1, NULL, 'p' }, 322 { "socket", 1, NULL, 'k' }, 323 { "offset", 1, NULL, 'o' }, 324 { "read-only", 0, NULL, 'r' }, 325 { "partition", 1, NULL, 'P' }, 326 { "connect", 1, NULL, 'c' }, 327 { "disconnect", 0, NULL, 'd' }, 328 { "snapshot", 0, NULL, 's' }, 329 { "nocache", 0, NULL, 'n' }, 330 { "cache", 1, NULL, QEMU_NBD_OPT_CACHE }, 331 #ifdef CONFIG_LINUX_AIO 332 { "aio", 1, NULL, QEMU_NBD_OPT_AIO }, 333 #endif 334 { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD }, 335 { "shared", 1, NULL, 'e' }, 336 { "persistent", 0, NULL, 't' }, 337 { "verbose", 0, NULL, 'v' }, 338 { NULL, 0, NULL, 0 } 339 }; 340 int ch; 341 int opt_ind = 0; 342 int li; 343 char *end; 344 int flags = BDRV_O_RDWR; 345 int partition = -1; 346 int ret; 347 int fd; 348 bool seen_cache = false; 349 bool seen_discard = false; 350 #ifdef CONFIG_LINUX_AIO 351 bool seen_aio = false; 352 #endif 353 pthread_t client_thread; 354 355 /* The client thread uses SIGTERM to interrupt the server. A signal 356 * handler ensures that "qemu-nbd -v -c" exits with a nice status code. 357 */ 358 struct sigaction sa_sigterm; 359 memset(&sa_sigterm, 0, sizeof(sa_sigterm)); 360 sa_sigterm.sa_handler = termsig_handler; 361 sigaction(SIGTERM, &sa_sigterm, NULL); 362 363 while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { 364 switch (ch) { 365 case 's': 366 flags |= BDRV_O_SNAPSHOT; 367 break; 368 case 'n': 369 optarg = (char *) "none"; 370 /* fallthrough */ 371 case QEMU_NBD_OPT_CACHE: 372 if (seen_cache) { 373 errx(EXIT_FAILURE, "-n and --cache can only be specified once"); 374 } 375 seen_cache = true; 376 if (bdrv_parse_cache_flags(optarg, &flags) == -1) { 377 errx(EXIT_FAILURE, "Invalid cache mode `%s'", optarg); 378 } 379 break; 380 #ifdef CONFIG_LINUX_AIO 381 case QEMU_NBD_OPT_AIO: 382 if (seen_aio) { 383 errx(EXIT_FAILURE, "--aio can only be specified once"); 384 } 385 seen_aio = true; 386 if (!strcmp(optarg, "native")) { 387 flags |= BDRV_O_NATIVE_AIO; 388 } else if (!strcmp(optarg, "threads")) { 389 /* this is the default */ 390 } else { 391 errx(EXIT_FAILURE, "invalid aio mode `%s'", optarg); 392 } 393 break; 394 #endif 395 case QEMU_NBD_OPT_DISCARD: 396 if (seen_discard) { 397 errx(EXIT_FAILURE, "--discard can only be specified once"); 398 } 399 seen_discard = true; 400 if (bdrv_parse_discard_flags(optarg, &flags) == -1) { 401 errx(EXIT_FAILURE, "Invalid discard mode `%s'", optarg); 402 } 403 break; 404 case 'b': 405 bindto = optarg; 406 break; 407 case 'p': 408 li = strtol(optarg, &end, 0); 409 if (*end) { 410 errx(EXIT_FAILURE, "Invalid port `%s'", optarg); 411 } 412 if (li < 1 || li > 65535) { 413 errx(EXIT_FAILURE, "Port out of range `%s'", optarg); 414 } 415 port = (uint16_t)li; 416 break; 417 case 'o': 418 dev_offset = strtoll (optarg, &end, 0); 419 if (*end) { 420 errx(EXIT_FAILURE, "Invalid offset `%s'", optarg); 421 } 422 if (dev_offset < 0) { 423 errx(EXIT_FAILURE, "Offset must be positive `%s'", optarg); 424 } 425 break; 426 case 'r': 427 nbdflags |= NBD_FLAG_READ_ONLY; 428 flags &= ~BDRV_O_RDWR; 429 break; 430 case 'P': 431 partition = strtol(optarg, &end, 0); 432 if (*end) 433 errx(EXIT_FAILURE, "Invalid partition `%s'", optarg); 434 if (partition < 1 || partition > 8) 435 errx(EXIT_FAILURE, "Invalid partition %d", partition); 436 break; 437 case 'k': 438 sockpath = optarg; 439 if (sockpath[0] != '/') 440 errx(EXIT_FAILURE, "socket path must be absolute\n"); 441 break; 442 case 'd': 443 disconnect = true; 444 break; 445 case 'c': 446 device = optarg; 447 break; 448 case 'e': 449 shared = strtol(optarg, &end, 0); 450 if (*end) { 451 errx(EXIT_FAILURE, "Invalid shared device number '%s'", optarg); 452 } 453 if (shared < 1) { 454 errx(EXIT_FAILURE, "Shared device number must be greater than 0\n"); 455 } 456 break; 457 case 't': 458 persistent = 1; 459 break; 460 case 'v': 461 verbose = 1; 462 break; 463 case 'V': 464 version(argv[0]); 465 exit(0); 466 break; 467 case 'h': 468 usage(argv[0]); 469 exit(0); 470 break; 471 case '?': 472 errx(EXIT_FAILURE, "Try `%s --help' for more information.", 473 argv[0]); 474 } 475 } 476 477 if ((argc - optind) != 1) { 478 errx(EXIT_FAILURE, "Invalid number of argument.\n" 479 "Try `%s --help' for more information.", 480 argv[0]); 481 } 482 483 if (disconnect) { 484 fd = open(argv[optind], O_RDWR); 485 if (fd < 0) { 486 err(EXIT_FAILURE, "Cannot open %s", argv[optind]); 487 } 488 nbd_disconnect(fd); 489 490 close(fd); 491 492 printf("%s disconnected\n", argv[optind]); 493 494 return 0; 495 } 496 497 if (device && !verbose) { 498 int stderr_fd[2]; 499 pid_t pid; 500 int ret; 501 502 if (qemu_pipe(stderr_fd) < 0) { 503 err(EXIT_FAILURE, "Error setting up communication pipe"); 504 } 505 506 /* Now daemonize, but keep a communication channel open to 507 * print errors and exit with the proper status code. 508 */ 509 pid = fork(); 510 if (pid == 0) { 511 close(stderr_fd[0]); 512 ret = qemu_daemon(1, 0); 513 514 /* Temporarily redirect stderr to the parent's pipe... */ 515 dup2(stderr_fd[1], STDERR_FILENO); 516 if (ret < 0) { 517 err(EXIT_FAILURE, "Failed to daemonize"); 518 } 519 520 /* ... close the descriptor we inherited and go on. */ 521 close(stderr_fd[1]); 522 } else { 523 bool errors = false; 524 char *buf; 525 526 /* In the parent. Print error messages from the child until 527 * it closes the pipe. 528 */ 529 close(stderr_fd[1]); 530 buf = g_malloc(1024); 531 while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { 532 errors = true; 533 ret = qemu_write_full(STDERR_FILENO, buf, ret); 534 if (ret < 0) { 535 exit(EXIT_FAILURE); 536 } 537 } 538 if (ret < 0) { 539 err(EXIT_FAILURE, "Cannot read from daemon"); 540 } 541 542 /* Usually the daemon should not print any message. 543 * Exit with zero status in that case. 544 */ 545 exit(errors); 546 } 547 } 548 549 if (device != NULL && sockpath == NULL) { 550 sockpath = g_malloc(128); 551 snprintf(sockpath, 128, SOCKET_PATH, basename(device)); 552 } 553 554 qemu_init_main_loop(); 555 bdrv_init(); 556 atexit(bdrv_close_all); 557 558 bs = bdrv_new("hda"); 559 srcpath = argv[optind]; 560 if ((ret = bdrv_open(bs, srcpath, flags, NULL)) < 0) { 561 errno = -ret; 562 err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]); 563 } 564 565 fd_size = bdrv_getlength(bs); 566 567 if (partition != -1) { 568 ret = find_partition(bs, partition, &dev_offset, &fd_size); 569 if (ret < 0) { 570 errno = -ret; 571 err(EXIT_FAILURE, "Could not find partition %d", partition); 572 } 573 } 574 575 exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed); 576 577 if (sockpath) { 578 fd = unix_socket_incoming(sockpath); 579 } else { 580 fd = tcp_socket_incoming(bindto, port); 581 } 582 583 if (fd < 0) { 584 return 1; 585 } 586 587 if (device) { 588 int ret; 589 590 ret = pthread_create(&client_thread, NULL, nbd_client_thread, device); 591 if (ret != 0) { 592 errx(EXIT_FAILURE, "Failed to create client thread: %s", 593 strerror(ret)); 594 } 595 } else { 596 /* Shut up GCC warnings. */ 597 memset(&client_thread, 0, sizeof(client_thread)); 598 } 599 600 qemu_set_fd_handler2(fd, nbd_can_accept, nbd_accept, NULL, 601 (void *)(uintptr_t)fd); 602 603 /* now when the initialization is (almost) complete, chdir("/") 604 * to free any busy filesystems */ 605 if (chdir("/") < 0) { 606 err(EXIT_FAILURE, "Could not chdir to root directory"); 607 } 608 609 state = RUNNING; 610 do { 611 main_loop_wait(false); 612 if (state == TERMINATE) { 613 state = TERMINATING; 614 nbd_export_close(exp); 615 nbd_export_put(exp); 616 exp = NULL; 617 } 618 } while (state != TERMINATED); 619 620 bdrv_close(bs); 621 if (sockpath) { 622 unlink(sockpath); 623 } 624 625 if (device) { 626 void *ret; 627 pthread_join(client_thread, &ret); 628 exit(ret != NULL); 629 } else { 630 exit(EXIT_SUCCESS); 631 } 632 } 633