1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device Client Side 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "nbd-internal.h" 22 23 static int nbd_errno_to_system_errno(int err) 24 { 25 switch (err) { 26 case NBD_SUCCESS: 27 return 0; 28 case NBD_EPERM: 29 return EPERM; 30 case NBD_EIO: 31 return EIO; 32 case NBD_ENOMEM: 33 return ENOMEM; 34 case NBD_ENOSPC: 35 return ENOSPC; 36 case NBD_EINVAL: 37 default: 38 return EINVAL; 39 } 40 } 41 42 /* Definitions for opaque data types */ 43 44 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 45 46 /* That's all folks */ 47 48 /* Basic flow for negotiation 49 50 Server Client 51 Negotiate 52 53 or 54 55 Server Client 56 Negotiate #1 57 Option 58 Negotiate #2 59 60 ---- 61 62 followed by 63 64 Server Client 65 Request 66 Response 67 Request 68 Response 69 ... 70 ... 71 Request (type == 2) 72 73 */ 74 75 76 static int nbd_handle_reply_err(uint32_t opt, uint32_t type, Error **errp) 77 { 78 if (!(type & (1 << 31))) { 79 return 0; 80 } 81 82 switch (type) { 83 case NBD_REP_ERR_UNSUP: 84 error_setg(errp, "Unsupported option type %x", opt); 85 break; 86 87 case NBD_REP_ERR_POLICY: 88 error_setg(errp, "Denied by server for option %x", opt); 89 break; 90 91 case NBD_REP_ERR_INVALID: 92 error_setg(errp, "Invalid data length for option %x", opt); 93 break; 94 95 case NBD_REP_ERR_TLS_REQD: 96 error_setg(errp, "TLS negotiation required before option %x", opt); 97 break; 98 99 default: 100 error_setg(errp, "Unknown error code when asking for option %x", opt); 101 break; 102 } 103 104 return -1; 105 } 106 107 static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) 108 { 109 uint64_t magic; 110 uint32_t opt; 111 uint32_t type; 112 uint32_t len; 113 uint32_t namelen; 114 115 *name = NULL; 116 if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 117 error_setg(errp, "failed to read list option magic"); 118 return -1; 119 } 120 magic = be64_to_cpu(magic); 121 if (magic != NBD_REP_MAGIC) { 122 error_setg(errp, "Unexpected option list magic"); 123 return -1; 124 } 125 if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 126 error_setg(errp, "failed to read list option"); 127 return -1; 128 } 129 opt = be32_to_cpu(opt); 130 if (opt != NBD_OPT_LIST) { 131 error_setg(errp, "Unexpected option type %x expected %x", 132 opt, NBD_OPT_LIST); 133 return -1; 134 } 135 136 if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { 137 error_setg(errp, "failed to read list option type"); 138 return -1; 139 } 140 type = be32_to_cpu(type); 141 if (type == NBD_REP_ERR_UNSUP) { 142 return 0; 143 } 144 if (nbd_handle_reply_err(opt, type, errp) < 0) { 145 return -1; 146 } 147 148 if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) { 149 error_setg(errp, "failed to read option length"); 150 return -1; 151 } 152 len = be32_to_cpu(len); 153 154 if (type == NBD_REP_ACK) { 155 if (len != 0) { 156 error_setg(errp, "length too long for option end"); 157 return -1; 158 } 159 } else if (type == NBD_REP_SERVER) { 160 if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { 161 error_setg(errp, "failed to read option name length"); 162 return -1; 163 } 164 namelen = be32_to_cpu(namelen); 165 if (len != (namelen + sizeof(namelen))) { 166 error_setg(errp, "incorrect option mame length"); 167 return -1; 168 } 169 if (namelen > 255) { 170 error_setg(errp, "export name length too long %d", namelen); 171 return -1; 172 } 173 174 *name = g_new0(char, namelen + 1); 175 if (read_sync(ioc, *name, namelen) != namelen) { 176 error_setg(errp, "failed to read export name"); 177 g_free(*name); 178 *name = NULL; 179 return -1; 180 } 181 (*name)[namelen] = '\0'; 182 } else { 183 error_setg(errp, "Unexpected reply type %x expected %x", 184 type, NBD_REP_SERVER); 185 return -1; 186 } 187 return 1; 188 } 189 190 191 static int nbd_receive_query_exports(QIOChannel *ioc, 192 const char *wantname, 193 Error **errp) 194 { 195 uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); 196 uint32_t opt = cpu_to_be32(NBD_OPT_LIST); 197 uint32_t length = 0; 198 bool foundExport = false; 199 200 TRACE("Querying export list"); 201 if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 202 error_setg(errp, "Failed to send list option magic"); 203 return -1; 204 } 205 206 if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 207 error_setg(errp, "Failed to send list option number"); 208 return -1; 209 } 210 211 if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { 212 error_setg(errp, "Failed to send list option length"); 213 return -1; 214 } 215 216 TRACE("Reading available export names"); 217 while (1) { 218 char *name = NULL; 219 int ret = nbd_receive_list(ioc, &name, errp); 220 221 if (ret < 0) { 222 g_free(name); 223 name = NULL; 224 return -1; 225 } 226 if (ret == 0) { 227 /* Server doesn't support export listing, so 228 * we will just assume an export with our 229 * wanted name exists */ 230 foundExport = true; 231 break; 232 } 233 if (name == NULL) { 234 TRACE("End of export name list"); 235 break; 236 } 237 if (g_str_equal(name, wantname)) { 238 foundExport = true; 239 TRACE("Found desired export name '%s'", name); 240 } else { 241 TRACE("Ignored export name '%s'", name); 242 } 243 g_free(name); 244 } 245 246 if (!foundExport) { 247 error_setg(errp, "No export with name '%s' available", wantname); 248 return -1; 249 } 250 251 return 0; 252 } 253 254 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, 255 QCryptoTLSCreds *tlscreds, 256 const char *hostname, Error **errp) 257 { 258 uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); 259 uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS); 260 uint32_t length = 0; 261 uint32_t type; 262 QIOChannelTLS *tioc; 263 struct NBDTLSHandshakeData data = { 0 }; 264 265 TRACE("Requesting TLS from server"); 266 if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 267 error_setg(errp, "Failed to send option magic"); 268 return NULL; 269 } 270 271 if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 272 error_setg(errp, "Failed to send option number"); 273 return NULL; 274 } 275 276 if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { 277 error_setg(errp, "Failed to send option length"); 278 return NULL; 279 } 280 281 TRACE("Getting TLS reply from server1"); 282 if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 283 error_setg(errp, "failed to read option magic"); 284 return NULL; 285 } 286 magic = be64_to_cpu(magic); 287 if (magic != NBD_REP_MAGIC) { 288 error_setg(errp, "Unexpected option magic"); 289 return NULL; 290 } 291 TRACE("Getting TLS reply from server2"); 292 if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 293 error_setg(errp, "failed to read option"); 294 return NULL; 295 } 296 opt = be32_to_cpu(opt); 297 if (opt != NBD_OPT_STARTTLS) { 298 error_setg(errp, "Unexpected option type %x expected %x", 299 opt, NBD_OPT_STARTTLS); 300 return NULL; 301 } 302 303 TRACE("Getting TLS reply from server"); 304 if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { 305 error_setg(errp, "failed to read option type"); 306 return NULL; 307 } 308 type = be32_to_cpu(type); 309 if (type != NBD_REP_ACK) { 310 error_setg(errp, "Server rejected request to start TLS %x", 311 type); 312 return NULL; 313 } 314 315 TRACE("Getting TLS reply from server"); 316 if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) { 317 error_setg(errp, "failed to read option length"); 318 return NULL; 319 } 320 length = be32_to_cpu(length); 321 if (length != 0) { 322 error_setg(errp, "Start TLS reponse was not zero %x", 323 length); 324 return NULL; 325 } 326 327 TRACE("TLS request approved, setting up TLS"); 328 tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp); 329 if (!tioc) { 330 return NULL; 331 } 332 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 333 TRACE("Starting TLS hanshake"); 334 qio_channel_tls_handshake(tioc, 335 nbd_tls_handshake, 336 &data, 337 NULL); 338 339 if (!data.complete) { 340 g_main_loop_run(data.loop); 341 } 342 g_main_loop_unref(data.loop); 343 if (data.error) { 344 error_propagate(errp, data.error); 345 object_unref(OBJECT(tioc)); 346 return NULL; 347 } 348 349 return QIO_CHANNEL(tioc); 350 } 351 352 353 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, 354 QCryptoTLSCreds *tlscreds, const char *hostname, 355 QIOChannel **outioc, 356 off_t *size, Error **errp) 357 { 358 char buf[256]; 359 uint64_t magic, s; 360 int rc; 361 362 TRACE("Receiving negotiation tlscreds=%p hostname=%s.", 363 tlscreds, hostname ? hostname : "<null>"); 364 365 rc = -EINVAL; 366 367 if (outioc) { 368 *outioc = NULL; 369 } 370 if (tlscreds && !outioc) { 371 error_setg(errp, "Output I/O channel required for TLS"); 372 goto fail; 373 } 374 375 if (read_sync(ioc, buf, 8) != 8) { 376 error_setg(errp, "Failed to read data"); 377 goto fail; 378 } 379 380 buf[8] = '\0'; 381 if (strlen(buf) == 0) { 382 error_setg(errp, "Server connection closed unexpectedly"); 383 goto fail; 384 } 385 386 TRACE("Magic is %c%c%c%c%c%c%c%c", 387 qemu_isprint(buf[0]) ? buf[0] : '.', 388 qemu_isprint(buf[1]) ? buf[1] : '.', 389 qemu_isprint(buf[2]) ? buf[2] : '.', 390 qemu_isprint(buf[3]) ? buf[3] : '.', 391 qemu_isprint(buf[4]) ? buf[4] : '.', 392 qemu_isprint(buf[5]) ? buf[5] : '.', 393 qemu_isprint(buf[6]) ? buf[6] : '.', 394 qemu_isprint(buf[7]) ? buf[7] : '.'); 395 396 if (memcmp(buf, "NBDMAGIC", 8) != 0) { 397 error_setg(errp, "Invalid magic received"); 398 goto fail; 399 } 400 401 if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 402 error_setg(errp, "Failed to read magic"); 403 goto fail; 404 } 405 magic = be64_to_cpu(magic); 406 TRACE("Magic is 0x%" PRIx64, magic); 407 408 if (magic == NBD_OPTS_MAGIC) { 409 uint32_t clientflags = 0; 410 uint32_t opt; 411 uint32_t namesize; 412 uint16_t globalflags; 413 uint16_t exportflags; 414 bool fixedNewStyle = false; 415 416 if (read_sync(ioc, &globalflags, sizeof(globalflags)) != 417 sizeof(globalflags)) { 418 error_setg(errp, "Failed to read server flags"); 419 goto fail; 420 } 421 globalflags = be16_to_cpu(globalflags); 422 *flags = globalflags << 16; 423 TRACE("Global flags are %x", globalflags); 424 if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) { 425 fixedNewStyle = true; 426 TRACE("Server supports fixed new style"); 427 clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE; 428 } 429 /* client requested flags */ 430 clientflags = cpu_to_be32(clientflags); 431 if (write_sync(ioc, &clientflags, sizeof(clientflags)) != 432 sizeof(clientflags)) { 433 error_setg(errp, "Failed to send clientflags field"); 434 goto fail; 435 } 436 if (tlscreds) { 437 if (fixedNewStyle) { 438 *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp); 439 if (!*outioc) { 440 goto fail; 441 } 442 ioc = *outioc; 443 } else { 444 error_setg(errp, "Server does not support STARTTLS"); 445 goto fail; 446 } 447 } 448 if (!name) { 449 TRACE("Using default NBD export name \"\""); 450 name = ""; 451 } 452 if (fixedNewStyle) { 453 /* Check our desired export is present in the 454 * server export list. Since NBD_OPT_EXPORT_NAME 455 * cannot return an error message, running this 456 * query gives us good error reporting if the 457 * server required TLS 458 */ 459 if (nbd_receive_query_exports(ioc, name, errp) < 0) { 460 goto fail; 461 } 462 } 463 /* write the export name */ 464 magic = cpu_to_be64(magic); 465 if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 466 error_setg(errp, "Failed to send export name magic"); 467 goto fail; 468 } 469 opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); 470 if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 471 error_setg(errp, "Failed to send export name option number"); 472 goto fail; 473 } 474 namesize = cpu_to_be32(strlen(name)); 475 if (write_sync(ioc, &namesize, sizeof(namesize)) != 476 sizeof(namesize)) { 477 error_setg(errp, "Failed to send export name length"); 478 goto fail; 479 } 480 if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) { 481 error_setg(errp, "Failed to send export name"); 482 goto fail; 483 } 484 485 if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { 486 error_setg(errp, "Failed to read export length"); 487 goto fail; 488 } 489 *size = be64_to_cpu(s); 490 TRACE("Size is %" PRIu64, *size); 491 492 if (read_sync(ioc, &exportflags, sizeof(exportflags)) != 493 sizeof(exportflags)) { 494 error_setg(errp, "Failed to read export flags"); 495 goto fail; 496 } 497 exportflags = be16_to_cpu(exportflags); 498 *flags |= exportflags; 499 TRACE("Export flags are %x", exportflags); 500 } else if (magic == NBD_CLIENT_MAGIC) { 501 if (name) { 502 error_setg(errp, "Server does not support export names"); 503 goto fail; 504 } 505 if (tlscreds) { 506 error_setg(errp, "Server does not support STARTTLS"); 507 goto fail; 508 } 509 510 if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { 511 error_setg(errp, "Failed to read export length"); 512 goto fail; 513 } 514 *size = be64_to_cpu(s); 515 TRACE("Size is %" PRIu64, *size); 516 517 if (read_sync(ioc, flags, sizeof(*flags)) != sizeof(*flags)) { 518 error_setg(errp, "Failed to read export flags"); 519 goto fail; 520 } 521 *flags = be32_to_cpup(flags); 522 } else { 523 error_setg(errp, "Bad magic received"); 524 goto fail; 525 } 526 527 if (read_sync(ioc, &buf, 124) != 124) { 528 error_setg(errp, "Failed to read reserved block"); 529 goto fail; 530 } 531 rc = 0; 532 533 fail: 534 return rc; 535 } 536 537 #ifdef __linux__ 538 int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size) 539 { 540 TRACE("Setting NBD socket"); 541 542 if (ioctl(fd, NBD_SET_SOCK, sioc->fd) < 0) { 543 int serrno = errno; 544 LOG("Failed to set NBD socket"); 545 return -serrno; 546 } 547 548 TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE); 549 550 if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) { 551 int serrno = errno; 552 LOG("Failed setting NBD block size"); 553 return -serrno; 554 } 555 556 TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE)); 557 558 if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) { 559 int serrno = errno; 560 LOG("Failed setting size (in blocks)"); 561 return -serrno; 562 } 563 564 if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) { 565 if (errno == ENOTTY) { 566 int read_only = (flags & NBD_FLAG_READ_ONLY) != 0; 567 TRACE("Setting readonly attribute"); 568 569 if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { 570 int serrno = errno; 571 LOG("Failed setting read-only attribute"); 572 return -serrno; 573 } 574 } else { 575 int serrno = errno; 576 LOG("Failed setting flags"); 577 return -serrno; 578 } 579 } 580 581 TRACE("Negotiation ended"); 582 583 return 0; 584 } 585 586 int nbd_client(int fd) 587 { 588 int ret; 589 int serrno; 590 591 TRACE("Doing NBD loop"); 592 593 ret = ioctl(fd, NBD_DO_IT); 594 if (ret < 0 && errno == EPIPE) { 595 /* NBD_DO_IT normally returns EPIPE when someone has disconnected 596 * the socket via NBD_DISCONNECT. We do not want to return 1 in 597 * that case. 598 */ 599 ret = 0; 600 } 601 serrno = errno; 602 603 TRACE("NBD loop returned %d: %s", ret, strerror(serrno)); 604 605 TRACE("Clearing NBD queue"); 606 ioctl(fd, NBD_CLEAR_QUE); 607 608 TRACE("Clearing NBD socket"); 609 ioctl(fd, NBD_CLEAR_SOCK); 610 611 errno = serrno; 612 return ret; 613 } 614 #else 615 int nbd_init(int fd, QIOChannelSocket *ioc, uint32_t flags, off_t size) 616 { 617 return -ENOTSUP; 618 } 619 620 int nbd_client(int fd) 621 { 622 return -ENOTSUP; 623 } 624 #endif 625 626 ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) 627 { 628 uint8_t buf[NBD_REQUEST_SIZE]; 629 ssize_t ret; 630 631 cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); 632 cpu_to_be32w((uint32_t*)(buf + 4), request->type); 633 cpu_to_be64w((uint64_t*)(buf + 8), request->handle); 634 cpu_to_be64w((uint64_t*)(buf + 16), request->from); 635 cpu_to_be32w((uint32_t*)(buf + 24), request->len); 636 637 TRACE("Sending request to client: " 638 "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}", 639 request->from, request->len, request->handle, request->type); 640 641 ret = write_sync(ioc, buf, sizeof(buf)); 642 if (ret < 0) { 643 return ret; 644 } 645 646 if (ret != sizeof(buf)) { 647 LOG("writing to socket failed"); 648 return -EINVAL; 649 } 650 return 0; 651 } 652 653 ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply) 654 { 655 uint8_t buf[NBD_REPLY_SIZE]; 656 uint32_t magic; 657 ssize_t ret; 658 659 ret = read_sync(ioc, buf, sizeof(buf)); 660 if (ret < 0) { 661 return ret; 662 } 663 664 if (ret != sizeof(buf)) { 665 LOG("read failed"); 666 return -EINVAL; 667 } 668 669 /* Reply 670 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 671 [ 4 .. 7] error (0 == no error) 672 [ 7 .. 15] handle 673 */ 674 675 magic = be32_to_cpup((uint32_t*)buf); 676 reply->error = be32_to_cpup((uint32_t*)(buf + 4)); 677 reply->handle = be64_to_cpup((uint64_t*)(buf + 8)); 678 679 reply->error = nbd_errno_to_system_errno(reply->error); 680 681 TRACE("Got reply: " 682 "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }", 683 magic, reply->error, reply->handle); 684 685 if (magic != NBD_REPLY_MAGIC) { 686 LOG("invalid magic (got 0x%x)", magic); 687 return -EINVAL; 688 } 689 return 0; 690 } 691 692