1 /* 2 * Copyright (C) 2016 Red Hat, Inc. 3 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 4 * 5 * Network Block Device Server Side 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; under version 2 of the License. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qapi/error.h" 22 #include "nbd-internal.h" 23 24 static int system_errno_to_nbd_errno(int err) 25 { 26 switch (err) { 27 case 0: 28 return NBD_SUCCESS; 29 case EPERM: 30 case EROFS: 31 return NBD_EPERM; 32 case EIO: 33 return NBD_EIO; 34 case ENOMEM: 35 return NBD_ENOMEM; 36 #ifdef EDQUOT 37 case EDQUOT: 38 #endif 39 case EFBIG: 40 case ENOSPC: 41 return NBD_ENOSPC; 42 case ESHUTDOWN: 43 return NBD_ESHUTDOWN; 44 case EINVAL: 45 default: 46 return NBD_EINVAL; 47 } 48 } 49 50 /* Definitions for opaque data types */ 51 52 typedef struct NBDRequestData NBDRequestData; 53 54 struct NBDRequestData { 55 QSIMPLEQ_ENTRY(NBDRequestData) entry; 56 NBDClient *client; 57 uint8_t *data; 58 bool complete; 59 }; 60 61 struct NBDExport { 62 int refcount; 63 void (*close)(NBDExport *exp); 64 65 BlockBackend *blk; 66 char *name; 67 char *description; 68 off_t dev_offset; 69 off_t size; 70 uint16_t nbdflags; 71 QTAILQ_HEAD(, NBDClient) clients; 72 QTAILQ_ENTRY(NBDExport) next; 73 74 AioContext *ctx; 75 76 BlockBackend *eject_notifier_blk; 77 Notifier eject_notifier; 78 }; 79 80 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 81 82 struct NBDClient { 83 int refcount; 84 void (*close_fn)(NBDClient *client, bool negotiated); 85 86 bool no_zeroes; 87 NBDExport *exp; 88 QCryptoTLSCreds *tlscreds; 89 char *tlsaclname; 90 QIOChannelSocket *sioc; /* The underlying data channel */ 91 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 92 93 Coroutine *recv_coroutine; 94 95 CoMutex send_lock; 96 Coroutine *send_coroutine; 97 98 QTAILQ_ENTRY(NBDClient) next; 99 int nb_requests; 100 bool closing; 101 }; 102 103 /* That's all folks */ 104 105 static void nbd_client_receive_next_request(NBDClient *client); 106 107 /* Basic flow for negotiation 108 109 Server Client 110 Negotiate 111 112 or 113 114 Server Client 115 Negotiate #1 116 Option 117 Negotiate #2 118 119 ---- 120 121 followed by 122 123 Server Client 124 Request 125 Response 126 Request 127 Response 128 ... 129 ... 130 Request (type == 2) 131 132 */ 133 134 /* Send a reply header, including length, but no payload. 135 * Return -errno on error, 0 on success. */ 136 static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type, 137 uint32_t opt, uint32_t len) 138 { 139 uint64_t magic; 140 141 TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32, 142 type, opt, len); 143 144 magic = cpu_to_be64(NBD_REP_MAGIC); 145 if (nbd_write(ioc, &magic, sizeof(magic), NULL) < 0) { 146 LOG("write failed (rep magic)"); 147 return -EINVAL; 148 } 149 opt = cpu_to_be32(opt); 150 if (nbd_write(ioc, &opt, sizeof(opt), NULL) < 0) { 151 LOG("write failed (rep opt)"); 152 return -EINVAL; 153 } 154 type = cpu_to_be32(type); 155 if (nbd_write(ioc, &type, sizeof(type), NULL) < 0) { 156 LOG("write failed (rep type)"); 157 return -EINVAL; 158 } 159 len = cpu_to_be32(len); 160 if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) { 161 LOG("write failed (rep data length)"); 162 return -EINVAL; 163 } 164 return 0; 165 } 166 167 /* Send a reply header with default 0 length. 168 * Return -errno on error, 0 on success. */ 169 static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) 170 { 171 return nbd_negotiate_send_rep_len(ioc, type, opt, 0); 172 } 173 174 /* Send an error reply. 175 * Return -errno on error, 0 on success. */ 176 static int GCC_FMT_ATTR(4, 5) 177 nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, 178 uint32_t opt, const char *fmt, ...) 179 { 180 va_list va; 181 char *msg; 182 int ret; 183 size_t len; 184 185 va_start(va, fmt); 186 msg = g_strdup_vprintf(fmt, va); 187 va_end(va); 188 len = strlen(msg); 189 assert(len < 4096); 190 TRACE("sending error message \"%s\"", msg); 191 ret = nbd_negotiate_send_rep_len(ioc, type, opt, len); 192 if (ret < 0) { 193 goto out; 194 } 195 if (nbd_write(ioc, msg, len, NULL) < 0) { 196 LOG("write failed (error message)"); 197 ret = -EIO; 198 } else { 199 ret = 0; 200 } 201 out: 202 g_free(msg); 203 return ret; 204 } 205 206 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. 207 * Return -errno on error, 0 on success. */ 208 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) 209 { 210 size_t name_len, desc_len; 211 uint32_t len; 212 const char *name = exp->name ? exp->name : ""; 213 const char *desc = exp->description ? exp->description : ""; 214 int ret; 215 216 TRACE("Advertising export name '%s' description '%s'", name, desc); 217 name_len = strlen(name); 218 desc_len = strlen(desc); 219 len = name_len + desc_len + sizeof(len); 220 ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len); 221 if (ret < 0) { 222 return ret; 223 } 224 225 len = cpu_to_be32(name_len); 226 if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) { 227 LOG("write failed (name length)"); 228 return -EINVAL; 229 } 230 if (nbd_write(ioc, name, name_len, NULL) < 0) { 231 LOG("write failed (name buffer)"); 232 return -EINVAL; 233 } 234 if (nbd_write(ioc, desc, desc_len, NULL) < 0) { 235 LOG("write failed (description buffer)"); 236 return -EINVAL; 237 } 238 return 0; 239 } 240 241 /* Process the NBD_OPT_LIST command, with a potential series of replies. 242 * Return -errno on error, 0 on success. */ 243 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) 244 { 245 NBDExport *exp; 246 247 if (length) { 248 if (nbd_drop(client->ioc, length, NULL) < 0) { 249 return -EIO; 250 } 251 return nbd_negotiate_send_rep_err(client->ioc, 252 NBD_REP_ERR_INVALID, NBD_OPT_LIST, 253 "OPT_LIST should not have length"); 254 } 255 256 /* For each export, send a NBD_REP_SERVER reply. */ 257 QTAILQ_FOREACH(exp, &exports, next) { 258 if (nbd_negotiate_send_rep_list(client->ioc, exp)) { 259 return -EINVAL; 260 } 261 } 262 /* Finish with a NBD_REP_ACK. */ 263 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST); 264 } 265 266 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) 267 { 268 char name[NBD_MAX_NAME_SIZE + 1]; 269 270 /* Client sends: 271 [20 .. xx] export name (length bytes) 272 */ 273 TRACE("Checking length"); 274 if (length >= sizeof(name)) { 275 LOG("Bad length received"); 276 return -EINVAL; 277 } 278 if (nbd_read(client->ioc, name, length, NULL) < 0) { 279 LOG("read failed"); 280 return -EINVAL; 281 } 282 name[length] = '\0'; 283 284 TRACE("Client requested export '%s'", name); 285 286 client->exp = nbd_export_find(name); 287 if (!client->exp) { 288 LOG("export not found"); 289 return -EINVAL; 290 } 291 292 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 293 nbd_export_get(client->exp); 294 295 return 0; 296 } 297 298 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the 299 * new channel for all further (now-encrypted) communication. */ 300 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 301 uint32_t length) 302 { 303 QIOChannel *ioc; 304 QIOChannelTLS *tioc; 305 struct NBDTLSHandshakeData data = { 0 }; 306 307 TRACE("Setting up TLS"); 308 ioc = client->ioc; 309 if (length) { 310 if (nbd_drop(ioc, length, NULL) < 0) { 311 return NULL; 312 } 313 nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, 314 "OPT_STARTTLS should not have length"); 315 return NULL; 316 } 317 318 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, 319 NBD_OPT_STARTTLS) < 0) { 320 return NULL; 321 } 322 323 tioc = qio_channel_tls_new_server(ioc, 324 client->tlscreds, 325 client->tlsaclname, 326 NULL); 327 if (!tioc) { 328 return NULL; 329 } 330 331 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); 332 TRACE("Starting TLS handshake"); 333 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 334 qio_channel_tls_handshake(tioc, 335 nbd_tls_handshake, 336 &data, 337 NULL); 338 339 if (!data.complete) { 340 g_main_loop_run(data.loop); 341 } 342 g_main_loop_unref(data.loop); 343 if (data.error) { 344 object_unref(OBJECT(tioc)); 345 error_free(data.error); 346 return NULL; 347 } 348 349 return QIO_CHANNEL(tioc); 350 } 351 352 353 /* Process all NBD_OPT_* client option commands. 354 * Return -errno on error, 0 on success. */ 355 static int nbd_negotiate_options(NBDClient *client) 356 { 357 uint32_t flags; 358 bool fixedNewstyle = false; 359 360 /* Client sends: 361 [ 0 .. 3] client flags 362 363 [ 0 .. 7] NBD_OPTS_MAGIC 364 [ 8 .. 11] NBD option 365 [12 .. 15] Data length 366 ... Rest of request 367 368 [ 0 .. 7] NBD_OPTS_MAGIC 369 [ 8 .. 11] Second NBD option 370 [12 .. 15] Data length 371 ... Rest of request 372 */ 373 374 if (nbd_read(client->ioc, &flags, sizeof(flags), NULL) < 0) { 375 LOG("read failed"); 376 return -EIO; 377 } 378 TRACE("Checking client flags"); 379 be32_to_cpus(&flags); 380 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 381 TRACE("Client supports fixed newstyle handshake"); 382 fixedNewstyle = true; 383 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 384 } 385 if (flags & NBD_FLAG_C_NO_ZEROES) { 386 TRACE("Client supports no zeroes at handshake end"); 387 client->no_zeroes = true; 388 flags &= ~NBD_FLAG_C_NO_ZEROES; 389 } 390 if (flags != 0) { 391 TRACE("Unknown client flags 0x%" PRIx32 " received", flags); 392 return -EIO; 393 } 394 395 while (1) { 396 int ret; 397 uint32_t clientflags, length; 398 uint64_t magic; 399 400 if (nbd_read(client->ioc, &magic, sizeof(magic), NULL) < 0) { 401 LOG("read failed"); 402 return -EINVAL; 403 } 404 TRACE("Checking opts magic"); 405 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { 406 LOG("Bad magic received"); 407 return -EINVAL; 408 } 409 410 if (nbd_read(client->ioc, &clientflags, 411 sizeof(clientflags), NULL) < 0) 412 { 413 LOG("read failed"); 414 return -EINVAL; 415 } 416 clientflags = be32_to_cpu(clientflags); 417 418 if (nbd_read(client->ioc, &length, sizeof(length), NULL) < 0) { 419 LOG("read failed"); 420 return -EINVAL; 421 } 422 length = be32_to_cpu(length); 423 424 TRACE("Checking option 0x%" PRIx32, clientflags); 425 if (client->tlscreds && 426 client->ioc == (QIOChannel *)client->sioc) { 427 QIOChannel *tioc; 428 if (!fixedNewstyle) { 429 TRACE("Unsupported option 0x%" PRIx32, clientflags); 430 return -EINVAL; 431 } 432 switch (clientflags) { 433 case NBD_OPT_STARTTLS: 434 tioc = nbd_negotiate_handle_starttls(client, length); 435 if (!tioc) { 436 return -EIO; 437 } 438 object_unref(OBJECT(client->ioc)); 439 client->ioc = QIO_CHANNEL(tioc); 440 break; 441 442 case NBD_OPT_EXPORT_NAME: 443 /* No way to return an error to client, so drop connection */ 444 TRACE("Option 0x%x not permitted before TLS", clientflags); 445 return -EINVAL; 446 447 default: 448 if (nbd_drop(client->ioc, length, NULL) < 0) { 449 return -EIO; 450 } 451 ret = nbd_negotiate_send_rep_err(client->ioc, 452 NBD_REP_ERR_TLS_REQD, 453 clientflags, 454 "Option 0x%" PRIx32 455 "not permitted before TLS", 456 clientflags); 457 if (ret < 0) { 458 return ret; 459 } 460 /* Let the client keep trying, unless they asked to quit */ 461 if (clientflags == NBD_OPT_ABORT) { 462 return -EINVAL; 463 } 464 break; 465 } 466 } else if (fixedNewstyle) { 467 switch (clientflags) { 468 case NBD_OPT_LIST: 469 ret = nbd_negotiate_handle_list(client, length); 470 if (ret < 0) { 471 return ret; 472 } 473 break; 474 475 case NBD_OPT_ABORT: 476 /* NBD spec says we must try to reply before 477 * disconnecting, but that we must also tolerate 478 * guests that don't wait for our reply. */ 479 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags); 480 return -EINVAL; 481 482 case NBD_OPT_EXPORT_NAME: 483 return nbd_negotiate_handle_export_name(client, length); 484 485 case NBD_OPT_STARTTLS: 486 if (nbd_drop(client->ioc, length, NULL) < 0) { 487 return -EIO; 488 } 489 if (client->tlscreds) { 490 ret = nbd_negotiate_send_rep_err(client->ioc, 491 NBD_REP_ERR_INVALID, 492 clientflags, 493 "TLS already enabled"); 494 } else { 495 ret = nbd_negotiate_send_rep_err(client->ioc, 496 NBD_REP_ERR_POLICY, 497 clientflags, 498 "TLS not configured"); 499 } 500 if (ret < 0) { 501 return ret; 502 } 503 break; 504 default: 505 if (nbd_drop(client->ioc, length, NULL) < 0) { 506 return -EIO; 507 } 508 ret = nbd_negotiate_send_rep_err(client->ioc, 509 NBD_REP_ERR_UNSUP, 510 clientflags, 511 "Unsupported option 0x%" 512 PRIx32, 513 clientflags); 514 if (ret < 0) { 515 return ret; 516 } 517 break; 518 } 519 } else { 520 /* 521 * If broken new-style we should drop the connection 522 * for anything except NBD_OPT_EXPORT_NAME 523 */ 524 switch (clientflags) { 525 case NBD_OPT_EXPORT_NAME: 526 return nbd_negotiate_handle_export_name(client, length); 527 528 default: 529 TRACE("Unsupported option 0x%" PRIx32, clientflags); 530 return -EINVAL; 531 } 532 } 533 } 534 } 535 536 static coroutine_fn int nbd_negotiate(NBDClient *client) 537 { 538 char buf[8 + 8 + 8 + 128]; 539 int ret; 540 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | 541 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | 542 NBD_FLAG_SEND_WRITE_ZEROES); 543 bool oldStyle; 544 size_t len; 545 546 /* Old style negotiation header without options 547 [ 0 .. 7] passwd ("NBDMAGIC") 548 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 549 [16 .. 23] size 550 [24 .. 25] server flags (0) 551 [26 .. 27] export flags 552 [28 .. 151] reserved (0) 553 554 New style negotiation header with options 555 [ 0 .. 7] passwd ("NBDMAGIC") 556 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 557 [16 .. 17] server flags (0) 558 ....options sent.... 559 [18 .. 25] size 560 [26 .. 27] export flags 561 [28 .. 151] reserved (0, omit if no_zeroes) 562 */ 563 564 qio_channel_set_blocking(client->ioc, false, NULL); 565 566 TRACE("Beginning negotiation."); 567 memset(buf, 0, sizeof(buf)); 568 memcpy(buf, "NBDMAGIC", 8); 569 570 oldStyle = client->exp != NULL && !client->tlscreds; 571 if (oldStyle) { 572 TRACE("advertising size %" PRIu64 " and flags %x", 573 client->exp->size, client->exp->nbdflags | myflags); 574 stq_be_p(buf + 8, NBD_CLIENT_MAGIC); 575 stq_be_p(buf + 16, client->exp->size); 576 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 577 } else { 578 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 579 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); 580 } 581 582 if (oldStyle) { 583 if (client->tlscreds) { 584 TRACE("TLS cannot be enabled with oldstyle protocol"); 585 return -EINVAL; 586 } 587 if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) { 588 LOG("write failed"); 589 return -EINVAL; 590 } 591 } else { 592 if (nbd_write(client->ioc, buf, 18, NULL) < 0) { 593 LOG("write failed"); 594 return -EINVAL; 595 } 596 ret = nbd_negotiate_options(client); 597 if (ret != 0) { 598 LOG("option negotiation failed"); 599 return ret; 600 } 601 602 TRACE("advertising size %" PRIu64 " and flags %x", 603 client->exp->size, client->exp->nbdflags | myflags); 604 stq_be_p(buf + 18, client->exp->size); 605 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 606 len = client->no_zeroes ? 10 : sizeof(buf) - 18; 607 ret = nbd_write(client->ioc, buf + 18, len, NULL); 608 if (ret < 0) { 609 LOG("write failed"); 610 return ret; 611 } 612 } 613 614 TRACE("Negotiation succeeded."); 615 616 return 0; 617 } 618 619 static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request) 620 { 621 uint8_t buf[NBD_REQUEST_SIZE]; 622 uint32_t magic; 623 int ret; 624 625 ret = nbd_read(ioc, buf, sizeof(buf), NULL); 626 if (ret < 0) { 627 return ret; 628 } 629 630 /* Request 631 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 632 [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) 633 [ 6 .. 7] type (NBD_CMD_READ, ...) 634 [ 8 .. 15] handle 635 [16 .. 23] from 636 [24 .. 27] len 637 */ 638 639 magic = ldl_be_p(buf); 640 request->flags = lduw_be_p(buf + 4); 641 request->type = lduw_be_p(buf + 6); 642 request->handle = ldq_be_p(buf + 8); 643 request->from = ldq_be_p(buf + 16); 644 request->len = ldl_be_p(buf + 24); 645 646 TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16 647 ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }", 648 magic, request->flags, request->type, request->from, request->len); 649 650 if (magic != NBD_REQUEST_MAGIC) { 651 LOG("invalid magic (got 0x%" PRIx32 ")", magic); 652 return -EINVAL; 653 } 654 return 0; 655 } 656 657 static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply) 658 { 659 uint8_t buf[NBD_REPLY_SIZE]; 660 661 reply->error = system_errno_to_nbd_errno(reply->error); 662 663 TRACE("Sending response to client: { .error = %" PRId32 664 ", handle = %" PRIu64 " }", 665 reply->error, reply->handle); 666 667 /* Reply 668 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 669 [ 4 .. 7] error (0 == no error) 670 [ 7 .. 15] handle 671 */ 672 stl_be_p(buf, NBD_REPLY_MAGIC); 673 stl_be_p(buf + 4, reply->error); 674 stq_be_p(buf + 8, reply->handle); 675 676 return nbd_write(ioc, buf, sizeof(buf), NULL); 677 } 678 679 #define MAX_NBD_REQUESTS 16 680 681 void nbd_client_get(NBDClient *client) 682 { 683 client->refcount++; 684 } 685 686 void nbd_client_put(NBDClient *client) 687 { 688 if (--client->refcount == 0) { 689 /* The last reference should be dropped by client->close, 690 * which is called by client_close. 691 */ 692 assert(client->closing); 693 694 qio_channel_detach_aio_context(client->ioc); 695 object_unref(OBJECT(client->sioc)); 696 object_unref(OBJECT(client->ioc)); 697 if (client->tlscreds) { 698 object_unref(OBJECT(client->tlscreds)); 699 } 700 g_free(client->tlsaclname); 701 if (client->exp) { 702 QTAILQ_REMOVE(&client->exp->clients, client, next); 703 nbd_export_put(client->exp); 704 } 705 g_free(client); 706 } 707 } 708 709 static void client_close(NBDClient *client, bool negotiated) 710 { 711 if (client->closing) { 712 return; 713 } 714 715 client->closing = true; 716 717 /* Force requests to finish. They will drop their own references, 718 * then we'll close the socket and free the NBDClient. 719 */ 720 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 721 NULL); 722 723 /* Also tell the client, so that they release their reference. */ 724 if (client->close_fn) { 725 client->close_fn(client, negotiated); 726 } 727 } 728 729 static NBDRequestData *nbd_request_get(NBDClient *client) 730 { 731 NBDRequestData *req; 732 733 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 734 client->nb_requests++; 735 736 req = g_new0(NBDRequestData, 1); 737 nbd_client_get(client); 738 req->client = client; 739 return req; 740 } 741 742 static void nbd_request_put(NBDRequestData *req) 743 { 744 NBDClient *client = req->client; 745 746 if (req->data) { 747 qemu_vfree(req->data); 748 } 749 g_free(req); 750 751 client->nb_requests--; 752 nbd_client_receive_next_request(client); 753 754 nbd_client_put(client); 755 } 756 757 static void blk_aio_attached(AioContext *ctx, void *opaque) 758 { 759 NBDExport *exp = opaque; 760 NBDClient *client; 761 762 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx); 763 764 exp->ctx = ctx; 765 766 QTAILQ_FOREACH(client, &exp->clients, next) { 767 qio_channel_attach_aio_context(client->ioc, ctx); 768 if (client->recv_coroutine) { 769 aio_co_schedule(ctx, client->recv_coroutine); 770 } 771 if (client->send_coroutine) { 772 aio_co_schedule(ctx, client->send_coroutine); 773 } 774 } 775 } 776 777 static void blk_aio_detach(void *opaque) 778 { 779 NBDExport *exp = opaque; 780 NBDClient *client; 781 782 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx); 783 784 QTAILQ_FOREACH(client, &exp->clients, next) { 785 qio_channel_detach_aio_context(client->ioc); 786 } 787 788 exp->ctx = NULL; 789 } 790 791 static void nbd_eject_notifier(Notifier *n, void *data) 792 { 793 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 794 nbd_export_close(exp); 795 } 796 797 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, 798 uint16_t nbdflags, void (*close)(NBDExport *), 799 bool writethrough, BlockBackend *on_eject_blk, 800 Error **errp) 801 { 802 BlockBackend *blk; 803 NBDExport *exp = g_malloc0(sizeof(NBDExport)); 804 uint64_t perm; 805 int ret; 806 807 /* Don't allow resize while the NBD server is running, otherwise we don't 808 * care what happens with the node. */ 809 perm = BLK_PERM_CONSISTENT_READ; 810 if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { 811 perm |= BLK_PERM_WRITE; 812 } 813 blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | 814 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); 815 ret = blk_insert_bs(blk, bs, errp); 816 if (ret < 0) { 817 goto fail; 818 } 819 blk_set_enable_write_cache(blk, !writethrough); 820 821 exp->refcount = 1; 822 QTAILQ_INIT(&exp->clients); 823 exp->blk = blk; 824 exp->dev_offset = dev_offset; 825 exp->nbdflags = nbdflags; 826 exp->size = size < 0 ? blk_getlength(blk) : size; 827 if (exp->size < 0) { 828 error_setg_errno(errp, -exp->size, 829 "Failed to determine the NBD export's length"); 830 goto fail; 831 } 832 exp->size -= exp->size % BDRV_SECTOR_SIZE; 833 834 exp->close = close; 835 exp->ctx = blk_get_aio_context(blk); 836 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 837 838 if (on_eject_blk) { 839 blk_ref(on_eject_blk); 840 exp->eject_notifier_blk = on_eject_blk; 841 exp->eject_notifier.notify = nbd_eject_notifier; 842 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier); 843 } 844 845 /* 846 * NBD exports are used for non-shared storage migration. Make sure 847 * that BDRV_O_INACTIVE is cleared and the image is ready for write 848 * access since the export could be available before migration handover. 849 */ 850 aio_context_acquire(exp->ctx); 851 blk_invalidate_cache(blk, NULL); 852 aio_context_release(exp->ctx); 853 return exp; 854 855 fail: 856 blk_unref(blk); 857 g_free(exp); 858 return NULL; 859 } 860 861 NBDExport *nbd_export_find(const char *name) 862 { 863 NBDExport *exp; 864 QTAILQ_FOREACH(exp, &exports, next) { 865 if (strcmp(name, exp->name) == 0) { 866 return exp; 867 } 868 } 869 870 return NULL; 871 } 872 873 void nbd_export_set_name(NBDExport *exp, const char *name) 874 { 875 if (exp->name == name) { 876 return; 877 } 878 879 nbd_export_get(exp); 880 if (exp->name != NULL) { 881 g_free(exp->name); 882 exp->name = NULL; 883 QTAILQ_REMOVE(&exports, exp, next); 884 nbd_export_put(exp); 885 } 886 if (name != NULL) { 887 nbd_export_get(exp); 888 exp->name = g_strdup(name); 889 QTAILQ_INSERT_TAIL(&exports, exp, next); 890 } 891 nbd_export_put(exp); 892 } 893 894 void nbd_export_set_description(NBDExport *exp, const char *description) 895 { 896 g_free(exp->description); 897 exp->description = g_strdup(description); 898 } 899 900 void nbd_export_close(NBDExport *exp) 901 { 902 NBDClient *client, *next; 903 904 nbd_export_get(exp); 905 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 906 client_close(client, true); 907 } 908 nbd_export_set_name(exp, NULL); 909 nbd_export_set_description(exp, NULL); 910 nbd_export_put(exp); 911 } 912 913 void nbd_export_get(NBDExport *exp) 914 { 915 assert(exp->refcount > 0); 916 exp->refcount++; 917 } 918 919 void nbd_export_put(NBDExport *exp) 920 { 921 assert(exp->refcount > 0); 922 if (exp->refcount == 1) { 923 nbd_export_close(exp); 924 } 925 926 if (--exp->refcount == 0) { 927 assert(exp->name == NULL); 928 assert(exp->description == NULL); 929 930 if (exp->close) { 931 exp->close(exp); 932 } 933 934 if (exp->blk) { 935 if (exp->eject_notifier_blk) { 936 notifier_remove(&exp->eject_notifier); 937 blk_unref(exp->eject_notifier_blk); 938 } 939 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 940 blk_aio_detach, exp); 941 blk_unref(exp->blk); 942 exp->blk = NULL; 943 } 944 945 g_free(exp); 946 } 947 } 948 949 BlockBackend *nbd_export_get_blockdev(NBDExport *exp) 950 { 951 return exp->blk; 952 } 953 954 void nbd_export_close_all(void) 955 { 956 NBDExport *exp, *next; 957 958 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { 959 nbd_export_close(exp); 960 } 961 } 962 963 static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) 964 { 965 NBDClient *client = req->client; 966 int ret; 967 968 g_assert(qemu_in_coroutine()); 969 qemu_co_mutex_lock(&client->send_lock); 970 client->send_coroutine = qemu_coroutine_self(); 971 972 if (!len) { 973 ret = nbd_send_reply(client->ioc, reply); 974 } else { 975 qio_channel_set_cork(client->ioc, true); 976 ret = nbd_send_reply(client->ioc, reply); 977 if (ret == 0) { 978 ret = nbd_write(client->ioc, req->data, len, NULL); 979 if (ret < 0) { 980 ret = -EIO; 981 } 982 } 983 qio_channel_set_cork(client->ioc, false); 984 } 985 986 client->send_coroutine = NULL; 987 qemu_co_mutex_unlock(&client->send_lock); 988 return ret; 989 } 990 991 /* nbd_co_receive_request 992 * Collect a client request. Return 0 if request looks valid, -EIO to drop 993 * connection right away, and any other negative value to report an error to 994 * the client (although the caller may still need to disconnect after reporting 995 * the error). 996 */ 997 static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) 998 { 999 NBDClient *client = req->client; 1000 1001 g_assert(qemu_in_coroutine()); 1002 assert(client->recv_coroutine == qemu_coroutine_self()); 1003 if (nbd_receive_request(client->ioc, request) < 0) { 1004 return -EIO; 1005 } 1006 1007 TRACE("Decoding type"); 1008 1009 if (request->type != NBD_CMD_WRITE) { 1010 /* No payload, we are ready to read the next request. */ 1011 req->complete = true; 1012 } 1013 1014 if (request->type == NBD_CMD_DISC) { 1015 /* Special case: we're going to disconnect without a reply, 1016 * whether or not flags, from, or len are bogus */ 1017 TRACE("Request type is DISCONNECT"); 1018 return -EIO; 1019 } 1020 1021 /* Check for sanity in the parameters, part 1. Defer as many 1022 * checks as possible until after reading any NBD_CMD_WRITE 1023 * payload, so we can try and keep the connection alive. */ 1024 if ((request->from + request->len) < request->from) { 1025 LOG("integer overflow detected, you're probably being attacked"); 1026 return -EINVAL; 1027 } 1028 1029 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { 1030 if (request->len > NBD_MAX_BUFFER_SIZE) { 1031 LOG("len (%" PRIu32" ) is larger than max len (%u)", 1032 request->len, NBD_MAX_BUFFER_SIZE); 1033 return -EINVAL; 1034 } 1035 1036 req->data = blk_try_blockalign(client->exp->blk, request->len); 1037 if (req->data == NULL) { 1038 return -ENOMEM; 1039 } 1040 } 1041 if (request->type == NBD_CMD_WRITE) { 1042 TRACE("Reading %" PRIu32 " byte(s)", request->len); 1043 1044 if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) { 1045 LOG("reading from socket failed"); 1046 return -EIO; 1047 } 1048 req->complete = true; 1049 } 1050 1051 /* Sanity checks, part 2. */ 1052 if (request->from + request->len > client->exp->size) { 1053 LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 1054 ", Size: %" PRIu64, request->from, request->len, 1055 (uint64_t)client->exp->size); 1056 return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; 1057 } 1058 if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { 1059 LOG("unsupported flags (got 0x%x)", request->flags); 1060 return -EINVAL; 1061 } 1062 if (request->type != NBD_CMD_WRITE_ZEROES && 1063 (request->flags & NBD_CMD_FLAG_NO_HOLE)) { 1064 LOG("unexpected flags (got 0x%x)", request->flags); 1065 return -EINVAL; 1066 } 1067 1068 return 0; 1069 } 1070 1071 /* Owns a reference to the NBDClient passed as opaque. */ 1072 static coroutine_fn void nbd_trip(void *opaque) 1073 { 1074 NBDClient *client = opaque; 1075 NBDExport *exp = client->exp; 1076 NBDRequestData *req; 1077 NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ 1078 NBDReply reply; 1079 int ret; 1080 int flags; 1081 int reply_data_len = 0; 1082 1083 TRACE("Reading request."); 1084 if (client->closing) { 1085 nbd_client_put(client); 1086 return; 1087 } 1088 1089 req = nbd_request_get(client); 1090 ret = nbd_co_receive_request(req, &request); 1091 client->recv_coroutine = NULL; 1092 nbd_client_receive_next_request(client); 1093 if (ret == -EIO) { 1094 goto disconnect; 1095 } 1096 1097 reply.handle = request.handle; 1098 reply.error = 0; 1099 1100 if (ret < 0) { 1101 reply.error = -ret; 1102 goto reply; 1103 } 1104 1105 if (client->closing) { 1106 /* 1107 * The client may be closed when we are blocked in 1108 * nbd_co_receive_request() 1109 */ 1110 goto done; 1111 } 1112 1113 switch (request.type) { 1114 case NBD_CMD_READ: 1115 TRACE("Request type is READ"); 1116 1117 /* XXX: NBD Protocol only documents use of FUA with WRITE */ 1118 if (request.flags & NBD_CMD_FLAG_FUA) { 1119 ret = blk_co_flush(exp->blk); 1120 if (ret < 0) { 1121 LOG("flush failed"); 1122 reply.error = -ret; 1123 break; 1124 } 1125 } 1126 1127 ret = blk_pread(exp->blk, request.from + exp->dev_offset, 1128 req->data, request.len); 1129 if (ret < 0) { 1130 LOG("reading from file failed"); 1131 reply.error = -ret; 1132 break; 1133 } 1134 1135 reply_data_len = request.len; 1136 TRACE("Read %" PRIu32" byte(s)", request.len); 1137 1138 break; 1139 case NBD_CMD_WRITE: 1140 TRACE("Request type is WRITE"); 1141 1142 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1143 TRACE("Server is read-only, return error"); 1144 reply.error = EROFS; 1145 break; 1146 } 1147 1148 TRACE("Writing to device"); 1149 1150 flags = 0; 1151 if (request.flags & NBD_CMD_FLAG_FUA) { 1152 flags |= BDRV_REQ_FUA; 1153 } 1154 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, 1155 req->data, request.len, flags); 1156 if (ret < 0) { 1157 LOG("writing to file failed"); 1158 reply.error = -ret; 1159 } 1160 1161 break; 1162 case NBD_CMD_WRITE_ZEROES: 1163 TRACE("Request type is WRITE_ZEROES"); 1164 1165 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1166 TRACE("Server is read-only, return error"); 1167 reply.error = EROFS; 1168 break; 1169 } 1170 1171 TRACE("Writing to device"); 1172 1173 flags = 0; 1174 if (request.flags & NBD_CMD_FLAG_FUA) { 1175 flags |= BDRV_REQ_FUA; 1176 } 1177 if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { 1178 flags |= BDRV_REQ_MAY_UNMAP; 1179 } 1180 ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, 1181 request.len, flags); 1182 if (ret < 0) { 1183 LOG("writing to file failed"); 1184 reply.error = -ret; 1185 } 1186 1187 break; 1188 case NBD_CMD_DISC: 1189 /* unreachable, thanks to special case in nbd_co_receive_request() */ 1190 abort(); 1191 1192 case NBD_CMD_FLUSH: 1193 TRACE("Request type is FLUSH"); 1194 1195 ret = blk_co_flush(exp->blk); 1196 if (ret < 0) { 1197 LOG("flush failed"); 1198 reply.error = -ret; 1199 } 1200 1201 break; 1202 case NBD_CMD_TRIM: 1203 TRACE("Request type is TRIM"); 1204 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset, 1205 request.len); 1206 if (ret < 0) { 1207 LOG("discard failed"); 1208 reply.error = -ret; 1209 } 1210 1211 break; 1212 default: 1213 LOG("invalid request type (%" PRIu32 ") received", request.type); 1214 reply.error = EINVAL; 1215 } 1216 1217 reply: 1218 /* We must disconnect after NBD_CMD_WRITE if we did not 1219 * read the payload. 1220 */ 1221 if (nbd_co_send_reply(req, &reply, reply_data_len) < 0 || !req->complete) { 1222 goto disconnect; 1223 } 1224 1225 TRACE("Request/Reply complete"); 1226 1227 done: 1228 nbd_request_put(req); 1229 nbd_client_put(client); 1230 return; 1231 1232 disconnect: 1233 nbd_request_put(req); 1234 client_close(client, true); 1235 nbd_client_put(client); 1236 } 1237 1238 static void nbd_client_receive_next_request(NBDClient *client) 1239 { 1240 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) { 1241 nbd_client_get(client); 1242 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); 1243 aio_co_schedule(client->exp->ctx, client->recv_coroutine); 1244 } 1245 } 1246 1247 static coroutine_fn void nbd_co_client_start(void *opaque) 1248 { 1249 NBDClient *client = opaque; 1250 NBDExport *exp = client->exp; 1251 1252 if (exp) { 1253 nbd_export_get(exp); 1254 QTAILQ_INSERT_TAIL(&exp->clients, client, next); 1255 } 1256 qemu_co_mutex_init(&client->send_lock); 1257 1258 if (nbd_negotiate(client)) { 1259 client_close(client, false); 1260 return; 1261 } 1262 1263 nbd_client_receive_next_request(client); 1264 } 1265 1266 /* 1267 * Create a new client listener on the given export @exp, using the 1268 * given channel @sioc. Begin servicing it in a coroutine. When the 1269 * connection closes, call @close_fn with an indication of whether the 1270 * client completed negotiation. 1271 */ 1272 void nbd_client_new(NBDExport *exp, 1273 QIOChannelSocket *sioc, 1274 QCryptoTLSCreds *tlscreds, 1275 const char *tlsaclname, 1276 void (*close_fn)(NBDClient *, bool)) 1277 { 1278 NBDClient *client; 1279 Coroutine *co; 1280 1281 client = g_malloc0(sizeof(NBDClient)); 1282 client->refcount = 1; 1283 client->exp = exp; 1284 client->tlscreds = tlscreds; 1285 if (tlscreds) { 1286 object_ref(OBJECT(client->tlscreds)); 1287 } 1288 client->tlsaclname = g_strdup(tlsaclname); 1289 client->sioc = sioc; 1290 object_ref(OBJECT(client->sioc)); 1291 client->ioc = QIO_CHANNEL(sioc); 1292 object_ref(OBJECT(client->ioc)); 1293 client->close_fn = close_fn; 1294 1295 co = qemu_coroutine_create(nbd_co_client_start, client); 1296 qemu_coroutine_enter(co); 1297 } 1298