1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device Server Side 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "nbd-internal.h" 22 23 static int system_errno_to_nbd_errno(int err) 24 { 25 switch (err) { 26 case 0: 27 return NBD_SUCCESS; 28 case EPERM: 29 case EROFS: 30 return NBD_EPERM; 31 case EIO: 32 return NBD_EIO; 33 case ENOMEM: 34 return NBD_ENOMEM; 35 #ifdef EDQUOT 36 case EDQUOT: 37 #endif 38 case EFBIG: 39 case ENOSPC: 40 return NBD_ENOSPC; 41 case EINVAL: 42 default: 43 return NBD_EINVAL; 44 } 45 } 46 47 /* Definitions for opaque data types */ 48 49 typedef struct NBDRequest NBDRequest; 50 51 struct NBDRequest { 52 QSIMPLEQ_ENTRY(NBDRequest) entry; 53 NBDClient *client; 54 uint8_t *data; 55 bool complete; 56 }; 57 58 struct NBDExport { 59 int refcount; 60 void (*close)(NBDExport *exp); 61 62 BlockBackend *blk; 63 char *name; 64 off_t dev_offset; 65 off_t size; 66 uint16_t nbdflags; 67 QTAILQ_HEAD(, NBDClient) clients; 68 QTAILQ_ENTRY(NBDExport) next; 69 70 AioContext *ctx; 71 72 BlockBackend *eject_notifier_blk; 73 Notifier eject_notifier; 74 }; 75 76 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 77 78 struct NBDClient { 79 int refcount; 80 void (*close)(NBDClient *client); 81 82 NBDExport *exp; 83 QCryptoTLSCreds *tlscreds; 84 char *tlsaclname; 85 QIOChannelSocket *sioc; /* The underlying data channel */ 86 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 87 88 Coroutine *recv_coroutine; 89 90 CoMutex send_lock; 91 Coroutine *send_coroutine; 92 93 bool can_read; 94 95 QTAILQ_ENTRY(NBDClient) next; 96 int nb_requests; 97 bool closing; 98 }; 99 100 /* That's all folks */ 101 102 static void nbd_set_handlers(NBDClient *client); 103 static void nbd_unset_handlers(NBDClient *client); 104 static void nbd_update_can_read(NBDClient *client); 105 106 static gboolean nbd_negotiate_continue(QIOChannel *ioc, 107 GIOCondition condition, 108 void *opaque) 109 { 110 qemu_coroutine_enter(opaque); 111 return TRUE; 112 } 113 114 static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) 115 { 116 ssize_t ret; 117 guint watch; 118 119 assert(qemu_in_coroutine()); 120 /* Negotiation are always in main loop. */ 121 watch = qio_channel_add_watch(ioc, 122 G_IO_IN, 123 nbd_negotiate_continue, 124 qemu_coroutine_self(), 125 NULL); 126 ret = read_sync(ioc, buffer, size); 127 g_source_remove(watch); 128 return ret; 129 130 } 131 132 static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size) 133 { 134 ssize_t ret; 135 guint watch; 136 137 assert(qemu_in_coroutine()); 138 /* Negotiation are always in main loop. */ 139 watch = qio_channel_add_watch(ioc, 140 G_IO_OUT, 141 nbd_negotiate_continue, 142 qemu_coroutine_self(), 143 NULL); 144 ret = write_sync(ioc, buffer, size); 145 g_source_remove(watch); 146 return ret; 147 } 148 149 static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) 150 { 151 ssize_t ret, dropped = size; 152 uint8_t *buffer = g_malloc(MIN(65536, size)); 153 154 while (size > 0) { 155 ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size)); 156 if (ret < 0) { 157 g_free(buffer); 158 return ret; 159 } 160 161 assert(ret <= size); 162 size -= ret; 163 } 164 165 g_free(buffer); 166 return dropped; 167 } 168 169 /* Basic flow for negotiation 170 171 Server Client 172 Negotiate 173 174 or 175 176 Server Client 177 Negotiate #1 178 Option 179 Negotiate #2 180 181 ---- 182 183 followed by 184 185 Server Client 186 Request 187 Response 188 Request 189 Response 190 ... 191 ... 192 Request (type == 2) 193 194 */ 195 196 static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) 197 { 198 uint64_t magic; 199 uint32_t len; 200 201 TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt); 202 203 magic = cpu_to_be64(NBD_REP_MAGIC); 204 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 205 LOG("write failed (rep magic)"); 206 return -EINVAL; 207 } 208 opt = cpu_to_be32(opt); 209 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 210 LOG("write failed (rep opt)"); 211 return -EINVAL; 212 } 213 type = cpu_to_be32(type); 214 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 215 LOG("write failed (rep type)"); 216 return -EINVAL; 217 } 218 len = cpu_to_be32(0); 219 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 220 LOG("write failed (rep data length)"); 221 return -EINVAL; 222 } 223 return 0; 224 } 225 226 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) 227 { 228 uint64_t magic, name_len; 229 uint32_t opt, type, len; 230 231 TRACE("Advertising export name '%s'", exp->name ? exp->name : ""); 232 name_len = strlen(exp->name); 233 magic = cpu_to_be64(NBD_REP_MAGIC); 234 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 235 LOG("write failed (magic)"); 236 return -EINVAL; 237 } 238 opt = cpu_to_be32(NBD_OPT_LIST); 239 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 240 LOG("write failed (opt)"); 241 return -EINVAL; 242 } 243 type = cpu_to_be32(NBD_REP_SERVER); 244 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 245 LOG("write failed (reply type)"); 246 return -EINVAL; 247 } 248 len = cpu_to_be32(name_len + sizeof(len)); 249 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 250 LOG("write failed (length)"); 251 return -EINVAL; 252 } 253 len = cpu_to_be32(name_len); 254 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 255 LOG("write failed (length)"); 256 return -EINVAL; 257 } 258 if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) { 259 LOG("write failed (buffer)"); 260 return -EINVAL; 261 } 262 return 0; 263 } 264 265 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) 266 { 267 NBDExport *exp; 268 269 if (length) { 270 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 271 return -EIO; 272 } 273 return nbd_negotiate_send_rep(client->ioc, 274 NBD_REP_ERR_INVALID, NBD_OPT_LIST); 275 } 276 277 /* For each export, send a NBD_REP_SERVER reply. */ 278 QTAILQ_FOREACH(exp, &exports, next) { 279 if (nbd_negotiate_send_rep_list(client->ioc, exp)) { 280 return -EINVAL; 281 } 282 } 283 /* Finish with a NBD_REP_ACK. */ 284 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST); 285 } 286 287 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) 288 { 289 int rc = -EINVAL; 290 char name[NBD_MAX_NAME_SIZE + 1]; 291 292 /* Client sends: 293 [20 .. xx] export name (length bytes) 294 */ 295 TRACE("Checking length"); 296 if (length >= sizeof(name)) { 297 LOG("Bad length received"); 298 goto fail; 299 } 300 if (nbd_negotiate_read(client->ioc, name, length) != length) { 301 LOG("read failed"); 302 goto fail; 303 } 304 name[length] = '\0'; 305 306 TRACE("Client requested export '%s'", name); 307 308 client->exp = nbd_export_find(name); 309 if (!client->exp) { 310 LOG("export not found"); 311 goto fail; 312 } 313 314 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 315 nbd_export_get(client->exp); 316 rc = 0; 317 fail: 318 return rc; 319 } 320 321 322 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 323 uint32_t length) 324 { 325 QIOChannel *ioc; 326 QIOChannelTLS *tioc; 327 struct NBDTLSHandshakeData data = { 0 }; 328 329 TRACE("Setting up TLS"); 330 ioc = client->ioc; 331 if (length) { 332 if (nbd_negotiate_drop_sync(ioc, length) != length) { 333 return NULL; 334 } 335 nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS); 336 return NULL; 337 } 338 339 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, 340 NBD_OPT_STARTTLS) < 0) { 341 return NULL; 342 } 343 344 tioc = qio_channel_tls_new_server(ioc, 345 client->tlscreds, 346 client->tlsaclname, 347 NULL); 348 if (!tioc) { 349 return NULL; 350 } 351 352 TRACE("Starting TLS handshake"); 353 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 354 qio_channel_tls_handshake(tioc, 355 nbd_tls_handshake, 356 &data, 357 NULL); 358 359 if (!data.complete) { 360 g_main_loop_run(data.loop); 361 } 362 g_main_loop_unref(data.loop); 363 if (data.error) { 364 object_unref(OBJECT(tioc)); 365 error_free(data.error); 366 return NULL; 367 } 368 369 return QIO_CHANNEL(tioc); 370 } 371 372 373 static int nbd_negotiate_options(NBDClient *client) 374 { 375 uint32_t flags; 376 bool fixedNewstyle = false; 377 378 /* Client sends: 379 [ 0 .. 3] client flags 380 381 [ 0 .. 7] NBD_OPTS_MAGIC 382 [ 8 .. 11] NBD option 383 [12 .. 15] Data length 384 ... Rest of request 385 386 [ 0 .. 7] NBD_OPTS_MAGIC 387 [ 8 .. 11] Second NBD option 388 [12 .. 15] Data length 389 ... Rest of request 390 */ 391 392 if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) != 393 sizeof(flags)) { 394 LOG("read failed"); 395 return -EIO; 396 } 397 TRACE("Checking client flags"); 398 be32_to_cpus(&flags); 399 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 400 TRACE("Client supports fixed newstyle handshake"); 401 fixedNewstyle = true; 402 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 403 } 404 if (flags != 0) { 405 TRACE("Unknown client flags 0x%" PRIx32 " received", flags); 406 return -EIO; 407 } 408 409 while (1) { 410 int ret; 411 uint32_t clientflags, length; 412 uint64_t magic; 413 414 if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) != 415 sizeof(magic)) { 416 LOG("read failed"); 417 return -EINVAL; 418 } 419 TRACE("Checking opts magic"); 420 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { 421 LOG("Bad magic received"); 422 return -EINVAL; 423 } 424 425 if (nbd_negotiate_read(client->ioc, &clientflags, 426 sizeof(clientflags)) != sizeof(clientflags)) { 427 LOG("read failed"); 428 return -EINVAL; 429 } 430 clientflags = be32_to_cpu(clientflags); 431 432 if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) != 433 sizeof(length)) { 434 LOG("read failed"); 435 return -EINVAL; 436 } 437 length = be32_to_cpu(length); 438 439 TRACE("Checking option 0x%" PRIx32, clientflags); 440 if (client->tlscreds && 441 client->ioc == (QIOChannel *)client->sioc) { 442 QIOChannel *tioc; 443 if (!fixedNewstyle) { 444 TRACE("Unsupported option 0x%" PRIx32, clientflags); 445 return -EINVAL; 446 } 447 switch (clientflags) { 448 case NBD_OPT_STARTTLS: 449 tioc = nbd_negotiate_handle_starttls(client, length); 450 if (!tioc) { 451 return -EIO; 452 } 453 object_unref(OBJECT(client->ioc)); 454 client->ioc = QIO_CHANNEL(tioc); 455 break; 456 457 case NBD_OPT_EXPORT_NAME: 458 /* No way to return an error to client, so drop connection */ 459 TRACE("Option 0x%x not permitted before TLS", clientflags); 460 return -EINVAL; 461 462 default: 463 TRACE("Option 0x%" PRIx32 " not permitted before TLS", 464 clientflags); 465 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 466 return -EIO; 467 } 468 ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, 469 clientflags); 470 if (ret < 0) { 471 return ret; 472 } 473 break; 474 } 475 } else if (fixedNewstyle) { 476 switch (clientflags) { 477 case NBD_OPT_LIST: 478 ret = nbd_negotiate_handle_list(client, length); 479 if (ret < 0) { 480 return ret; 481 } 482 break; 483 484 case NBD_OPT_ABORT: 485 return -EINVAL; 486 487 case NBD_OPT_EXPORT_NAME: 488 return nbd_negotiate_handle_export_name(client, length); 489 490 case NBD_OPT_STARTTLS: 491 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 492 return -EIO; 493 } 494 if (client->tlscreds) { 495 TRACE("TLS already enabled"); 496 ret = nbd_negotiate_send_rep(client->ioc, 497 NBD_REP_ERR_INVALID, 498 clientflags); 499 } else { 500 TRACE("TLS not configured"); 501 ret = nbd_negotiate_send_rep(client->ioc, 502 NBD_REP_ERR_POLICY, 503 clientflags); 504 } 505 if (ret < 0) { 506 return ret; 507 } 508 break; 509 default: 510 TRACE("Unsupported option 0x%" PRIx32, clientflags); 511 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 512 return -EIO; 513 } 514 ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, 515 clientflags); 516 if (ret < 0) { 517 return ret; 518 } 519 break; 520 } 521 } else { 522 /* 523 * If broken new-style we should drop the connection 524 * for anything except NBD_OPT_EXPORT_NAME 525 */ 526 switch (clientflags) { 527 case NBD_OPT_EXPORT_NAME: 528 return nbd_negotiate_handle_export_name(client, length); 529 530 default: 531 TRACE("Unsupported option 0x%" PRIx32, clientflags); 532 return -EINVAL; 533 } 534 } 535 } 536 } 537 538 typedef struct { 539 NBDClient *client; 540 Coroutine *co; 541 } NBDClientNewData; 542 543 static coroutine_fn int nbd_negotiate(NBDClientNewData *data) 544 { 545 NBDClient *client = data->client; 546 char buf[8 + 8 + 8 + 128]; 547 int rc; 548 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | 549 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); 550 bool oldStyle; 551 552 /* Old style negotiation header without options 553 [ 0 .. 7] passwd ("NBDMAGIC") 554 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 555 [16 .. 23] size 556 [24 .. 25] server flags (0) 557 [26 .. 27] export flags 558 [28 .. 151] reserved (0) 559 560 New style negotiation header with options 561 [ 0 .. 7] passwd ("NBDMAGIC") 562 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 563 [16 .. 17] server flags (0) 564 ....options sent.... 565 [18 .. 25] size 566 [26 .. 27] export flags 567 [28 .. 151] reserved (0) 568 */ 569 570 qio_channel_set_blocking(client->ioc, false, NULL); 571 rc = -EINVAL; 572 573 TRACE("Beginning negotiation."); 574 memset(buf, 0, sizeof(buf)); 575 memcpy(buf, "NBDMAGIC", 8); 576 577 oldStyle = client->exp != NULL && !client->tlscreds; 578 if (oldStyle) { 579 TRACE("advertising size %" PRIu64 " and flags %x", 580 client->exp->size, client->exp->nbdflags | myflags); 581 stq_be_p(buf + 8, NBD_CLIENT_MAGIC); 582 stq_be_p(buf + 16, client->exp->size); 583 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 584 } else { 585 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 586 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE); 587 } 588 589 if (oldStyle) { 590 if (client->tlscreds) { 591 TRACE("TLS cannot be enabled with oldstyle protocol"); 592 goto fail; 593 } 594 if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) { 595 LOG("write failed"); 596 goto fail; 597 } 598 } else { 599 if (nbd_negotiate_write(client->ioc, buf, 18) != 18) { 600 LOG("write failed"); 601 goto fail; 602 } 603 rc = nbd_negotiate_options(client); 604 if (rc != 0) { 605 LOG("option negotiation failed"); 606 goto fail; 607 } 608 609 TRACE("advertising size %" PRIu64 " and flags %x", 610 client->exp->size, client->exp->nbdflags | myflags); 611 stq_be_p(buf + 18, client->exp->size); 612 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 613 if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) != 614 sizeof(buf) - 18) { 615 LOG("write failed"); 616 goto fail; 617 } 618 } 619 620 TRACE("Negotiation succeeded."); 621 rc = 0; 622 fail: 623 return rc; 624 } 625 626 static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) 627 { 628 uint8_t buf[NBD_REQUEST_SIZE]; 629 uint32_t magic; 630 ssize_t ret; 631 632 ret = read_sync(ioc, buf, sizeof(buf)); 633 if (ret < 0) { 634 return ret; 635 } 636 637 if (ret != sizeof(buf)) { 638 LOG("read failed"); 639 return -EINVAL; 640 } 641 642 /* Request 643 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 644 [ 4 .. 7] type (0 == READ, 1 == WRITE) 645 [ 8 .. 15] handle 646 [16 .. 23] from 647 [24 .. 27] len 648 */ 649 650 magic = ldl_be_p(buf); 651 request->type = ldl_be_p(buf + 4); 652 request->handle = ldq_be_p(buf + 8); 653 request->from = ldq_be_p(buf + 16); 654 request->len = ldl_be_p(buf + 24); 655 656 TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32 657 ", from = %" PRIu64 " , len = %" PRIu32 " }", 658 magic, request->type, request->from, request->len); 659 660 if (magic != NBD_REQUEST_MAGIC) { 661 LOG("invalid magic (got 0x%" PRIx32 ")", magic); 662 return -EINVAL; 663 } 664 return 0; 665 } 666 667 static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply) 668 { 669 uint8_t buf[NBD_REPLY_SIZE]; 670 ssize_t ret; 671 672 reply->error = system_errno_to_nbd_errno(reply->error); 673 674 TRACE("Sending response to client: { .error = %" PRId32 675 ", handle = %" PRIu64 " }", 676 reply->error, reply->handle); 677 678 /* Reply 679 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 680 [ 4 .. 7] error (0 == no error) 681 [ 7 .. 15] handle 682 */ 683 stl_be_p(buf, NBD_REPLY_MAGIC); 684 stl_be_p(buf + 4, reply->error); 685 stq_be_p(buf + 8, reply->handle); 686 687 ret = write_sync(ioc, buf, sizeof(buf)); 688 if (ret < 0) { 689 return ret; 690 } 691 692 if (ret != sizeof(buf)) { 693 LOG("writing to socket failed"); 694 return -EINVAL; 695 } 696 return 0; 697 } 698 699 #define MAX_NBD_REQUESTS 16 700 701 void nbd_client_get(NBDClient *client) 702 { 703 client->refcount++; 704 } 705 706 void nbd_client_put(NBDClient *client) 707 { 708 if (--client->refcount == 0) { 709 /* The last reference should be dropped by client->close, 710 * which is called by client_close. 711 */ 712 assert(client->closing); 713 714 nbd_unset_handlers(client); 715 object_unref(OBJECT(client->sioc)); 716 object_unref(OBJECT(client->ioc)); 717 if (client->tlscreds) { 718 object_unref(OBJECT(client->tlscreds)); 719 } 720 g_free(client->tlsaclname); 721 if (client->exp) { 722 QTAILQ_REMOVE(&client->exp->clients, client, next); 723 nbd_export_put(client->exp); 724 } 725 g_free(client); 726 } 727 } 728 729 static void client_close(NBDClient *client) 730 { 731 if (client->closing) { 732 return; 733 } 734 735 client->closing = true; 736 737 /* Force requests to finish. They will drop their own references, 738 * then we'll close the socket and free the NBDClient. 739 */ 740 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 741 NULL); 742 743 /* Also tell the client, so that they release their reference. */ 744 if (client->close) { 745 client->close(client); 746 } 747 } 748 749 static NBDRequest *nbd_request_get(NBDClient *client) 750 { 751 NBDRequest *req; 752 753 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 754 client->nb_requests++; 755 nbd_update_can_read(client); 756 757 req = g_new0(NBDRequest, 1); 758 nbd_client_get(client); 759 req->client = client; 760 return req; 761 } 762 763 static void nbd_request_put(NBDRequest *req) 764 { 765 NBDClient *client = req->client; 766 767 if (req->data) { 768 qemu_vfree(req->data); 769 } 770 g_free(req); 771 772 client->nb_requests--; 773 nbd_update_can_read(client); 774 nbd_client_put(client); 775 } 776 777 static void blk_aio_attached(AioContext *ctx, void *opaque) 778 { 779 NBDExport *exp = opaque; 780 NBDClient *client; 781 782 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx); 783 784 exp->ctx = ctx; 785 786 QTAILQ_FOREACH(client, &exp->clients, next) { 787 nbd_set_handlers(client); 788 } 789 } 790 791 static void blk_aio_detach(void *opaque) 792 { 793 NBDExport *exp = opaque; 794 NBDClient *client; 795 796 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx); 797 798 QTAILQ_FOREACH(client, &exp->clients, next) { 799 nbd_unset_handlers(client); 800 } 801 802 exp->ctx = NULL; 803 } 804 805 static void nbd_eject_notifier(Notifier *n, void *data) 806 { 807 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 808 nbd_export_close(exp); 809 } 810 811 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, 812 uint16_t nbdflags, void (*close)(NBDExport *), 813 bool writethrough, BlockBackend *on_eject_blk, 814 Error **errp) 815 { 816 BlockBackend *blk; 817 NBDExport *exp = g_malloc0(sizeof(NBDExport)); 818 819 blk = blk_new(); 820 blk_insert_bs(blk, bs); 821 blk_set_enable_write_cache(blk, !writethrough); 822 823 exp->refcount = 1; 824 QTAILQ_INIT(&exp->clients); 825 exp->blk = blk; 826 exp->dev_offset = dev_offset; 827 exp->nbdflags = nbdflags; 828 exp->size = size < 0 ? blk_getlength(blk) : size; 829 if (exp->size < 0) { 830 error_setg_errno(errp, -exp->size, 831 "Failed to determine the NBD export's length"); 832 goto fail; 833 } 834 exp->size -= exp->size % BDRV_SECTOR_SIZE; 835 836 exp->close = close; 837 exp->ctx = blk_get_aio_context(blk); 838 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 839 840 if (on_eject_blk) { 841 blk_ref(on_eject_blk); 842 exp->eject_notifier_blk = on_eject_blk; 843 exp->eject_notifier.notify = nbd_eject_notifier; 844 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier); 845 } 846 847 /* 848 * NBD exports are used for non-shared storage migration. Make sure 849 * that BDRV_O_INACTIVE is cleared and the image is ready for write 850 * access since the export could be available before migration handover. 851 */ 852 aio_context_acquire(exp->ctx); 853 blk_invalidate_cache(blk, NULL); 854 aio_context_release(exp->ctx); 855 return exp; 856 857 fail: 858 blk_unref(blk); 859 g_free(exp); 860 return NULL; 861 } 862 863 NBDExport *nbd_export_find(const char *name) 864 { 865 NBDExport *exp; 866 QTAILQ_FOREACH(exp, &exports, next) { 867 if (strcmp(name, exp->name) == 0) { 868 return exp; 869 } 870 } 871 872 return NULL; 873 } 874 875 void nbd_export_set_name(NBDExport *exp, const char *name) 876 { 877 if (exp->name == name) { 878 return; 879 } 880 881 nbd_export_get(exp); 882 if (exp->name != NULL) { 883 g_free(exp->name); 884 exp->name = NULL; 885 QTAILQ_REMOVE(&exports, exp, next); 886 nbd_export_put(exp); 887 } 888 if (name != NULL) { 889 nbd_export_get(exp); 890 exp->name = g_strdup(name); 891 QTAILQ_INSERT_TAIL(&exports, exp, next); 892 } 893 nbd_export_put(exp); 894 } 895 896 void nbd_export_close(NBDExport *exp) 897 { 898 NBDClient *client, *next; 899 900 nbd_export_get(exp); 901 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 902 client_close(client); 903 } 904 nbd_export_set_name(exp, NULL); 905 nbd_export_put(exp); 906 } 907 908 void nbd_export_get(NBDExport *exp) 909 { 910 assert(exp->refcount > 0); 911 exp->refcount++; 912 } 913 914 void nbd_export_put(NBDExport *exp) 915 { 916 assert(exp->refcount > 0); 917 if (exp->refcount == 1) { 918 nbd_export_close(exp); 919 } 920 921 if (--exp->refcount == 0) { 922 assert(exp->name == NULL); 923 924 if (exp->close) { 925 exp->close(exp); 926 } 927 928 if (exp->blk) { 929 if (exp->eject_notifier_blk) { 930 notifier_remove(&exp->eject_notifier); 931 blk_unref(exp->eject_notifier_blk); 932 } 933 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 934 blk_aio_detach, exp); 935 blk_unref(exp->blk); 936 exp->blk = NULL; 937 } 938 939 g_free(exp); 940 } 941 } 942 943 BlockBackend *nbd_export_get_blockdev(NBDExport *exp) 944 { 945 return exp->blk; 946 } 947 948 void nbd_export_close_all(void) 949 { 950 NBDExport *exp, *next; 951 952 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { 953 nbd_export_close(exp); 954 } 955 } 956 957 static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, 958 int len) 959 { 960 NBDClient *client = req->client; 961 ssize_t rc, ret; 962 963 g_assert(qemu_in_coroutine()); 964 qemu_co_mutex_lock(&client->send_lock); 965 client->send_coroutine = qemu_coroutine_self(); 966 nbd_set_handlers(client); 967 968 if (!len) { 969 rc = nbd_send_reply(client->ioc, reply); 970 } else { 971 qio_channel_set_cork(client->ioc, true); 972 rc = nbd_send_reply(client->ioc, reply); 973 if (rc >= 0) { 974 ret = write_sync(client->ioc, req->data, len); 975 if (ret != len) { 976 rc = -EIO; 977 } 978 } 979 qio_channel_set_cork(client->ioc, false); 980 } 981 982 client->send_coroutine = NULL; 983 nbd_set_handlers(client); 984 qemu_co_mutex_unlock(&client->send_lock); 985 return rc; 986 } 987 988 /* Collect a client request. Return 0 if request looks valid, -EAGAIN 989 * to keep trying the collection, -EIO to drop connection right away, 990 * and any other negative value to report an error to the client 991 * (although the caller may still need to disconnect after reporting 992 * the error). */ 993 static ssize_t nbd_co_receive_request(NBDRequest *req, 994 struct nbd_request *request) 995 { 996 NBDClient *client = req->client; 997 uint32_t command; 998 ssize_t rc; 999 1000 g_assert(qemu_in_coroutine()); 1001 client->recv_coroutine = qemu_coroutine_self(); 1002 nbd_update_can_read(client); 1003 1004 rc = nbd_receive_request(client->ioc, request); 1005 if (rc < 0) { 1006 if (rc != -EAGAIN) { 1007 rc = -EIO; 1008 } 1009 goto out; 1010 } 1011 1012 TRACE("Decoding type"); 1013 1014 command = request->type & NBD_CMD_MASK_COMMAND; 1015 if (command != NBD_CMD_WRITE) { 1016 /* No payload, we are ready to read the next request. */ 1017 req->complete = true; 1018 } 1019 1020 if (command == NBD_CMD_DISC) { 1021 /* Special case: we're going to disconnect without a reply, 1022 * whether or not flags, from, or len are bogus */ 1023 TRACE("Request type is DISCONNECT"); 1024 rc = -EIO; 1025 goto out; 1026 } 1027 1028 /* Check for sanity in the parameters, part 1. Defer as many 1029 * checks as possible until after reading any NBD_CMD_WRITE 1030 * payload, so we can try and keep the connection alive. */ 1031 if ((request->from + request->len) < request->from) { 1032 LOG("integer overflow detected, you're probably being attacked"); 1033 rc = -EINVAL; 1034 goto out; 1035 } 1036 1037 if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { 1038 if (request->len > NBD_MAX_BUFFER_SIZE) { 1039 LOG("len (%" PRIu32" ) is larger than max len (%u)", 1040 request->len, NBD_MAX_BUFFER_SIZE); 1041 rc = -EINVAL; 1042 goto out; 1043 } 1044 1045 req->data = blk_try_blockalign(client->exp->blk, request->len); 1046 if (req->data == NULL) { 1047 rc = -ENOMEM; 1048 goto out; 1049 } 1050 } 1051 if (command == NBD_CMD_WRITE) { 1052 TRACE("Reading %" PRIu32 " byte(s)", request->len); 1053 1054 if (read_sync(client->ioc, req->data, request->len) != request->len) { 1055 LOG("reading from socket failed"); 1056 rc = -EIO; 1057 goto out; 1058 } 1059 req->complete = true; 1060 } 1061 1062 /* Sanity checks, part 2. */ 1063 if (request->from + request->len > client->exp->size) { 1064 LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 1065 ", Size: %" PRIu64, request->from, request->len, 1066 (uint64_t)client->exp->size); 1067 rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; 1068 goto out; 1069 } 1070 if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) { 1071 LOG("unsupported flags (got 0x%x)", 1072 request->type & ~NBD_CMD_MASK_COMMAND); 1073 rc = -EINVAL; 1074 goto out; 1075 } 1076 1077 rc = 0; 1078 1079 out: 1080 client->recv_coroutine = NULL; 1081 nbd_update_can_read(client); 1082 1083 return rc; 1084 } 1085 1086 static void nbd_trip(void *opaque) 1087 { 1088 NBDClient *client = opaque; 1089 NBDExport *exp = client->exp; 1090 NBDRequest *req; 1091 struct nbd_request request; 1092 struct nbd_reply reply; 1093 ssize_t ret; 1094 uint32_t command; 1095 int flags; 1096 1097 TRACE("Reading request."); 1098 if (client->closing) { 1099 return; 1100 } 1101 1102 req = nbd_request_get(client); 1103 ret = nbd_co_receive_request(req, &request); 1104 if (ret == -EAGAIN) { 1105 goto done; 1106 } 1107 if (ret == -EIO) { 1108 goto out; 1109 } 1110 1111 reply.handle = request.handle; 1112 reply.error = 0; 1113 1114 if (ret < 0) { 1115 reply.error = -ret; 1116 goto error_reply; 1117 } 1118 command = request.type & NBD_CMD_MASK_COMMAND; 1119 1120 if (client->closing) { 1121 /* 1122 * The client may be closed when we are blocked in 1123 * nbd_co_receive_request() 1124 */ 1125 goto done; 1126 } 1127 1128 switch (command) { 1129 case NBD_CMD_READ: 1130 TRACE("Request type is READ"); 1131 1132 if (request.type & NBD_CMD_FLAG_FUA) { 1133 ret = blk_co_flush(exp->blk); 1134 if (ret < 0) { 1135 LOG("flush failed"); 1136 reply.error = -ret; 1137 goto error_reply; 1138 } 1139 } 1140 1141 ret = blk_pread(exp->blk, request.from + exp->dev_offset, 1142 req->data, request.len); 1143 if (ret < 0) { 1144 LOG("reading from file failed"); 1145 reply.error = -ret; 1146 goto error_reply; 1147 } 1148 1149 TRACE("Read %" PRIu32" byte(s)", request.len); 1150 if (nbd_co_send_reply(req, &reply, request.len) < 0) 1151 goto out; 1152 break; 1153 case NBD_CMD_WRITE: 1154 TRACE("Request type is WRITE"); 1155 1156 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1157 TRACE("Server is read-only, return error"); 1158 reply.error = EROFS; 1159 goto error_reply; 1160 } 1161 1162 TRACE("Writing to device"); 1163 1164 flags = 0; 1165 if (request.type & NBD_CMD_FLAG_FUA) { 1166 flags |= BDRV_REQ_FUA; 1167 } 1168 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, 1169 req->data, request.len, flags); 1170 if (ret < 0) { 1171 LOG("writing to file failed"); 1172 reply.error = -ret; 1173 goto error_reply; 1174 } 1175 1176 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1177 goto out; 1178 } 1179 break; 1180 1181 case NBD_CMD_DISC: 1182 /* unreachable, thanks to special case in nbd_co_receive_request() */ 1183 abort(); 1184 1185 case NBD_CMD_FLUSH: 1186 TRACE("Request type is FLUSH"); 1187 1188 ret = blk_co_flush(exp->blk); 1189 if (ret < 0) { 1190 LOG("flush failed"); 1191 reply.error = -ret; 1192 } 1193 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1194 goto out; 1195 } 1196 break; 1197 case NBD_CMD_TRIM: 1198 TRACE("Request type is TRIM"); 1199 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset, 1200 request.len); 1201 if (ret < 0) { 1202 LOG("discard failed"); 1203 reply.error = -ret; 1204 } 1205 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1206 goto out; 1207 } 1208 break; 1209 default: 1210 LOG("invalid request type (%" PRIu32 ") received", request.type); 1211 reply.error = EINVAL; 1212 error_reply: 1213 /* We must disconnect after NBD_CMD_WRITE if we did not 1214 * read the payload. 1215 */ 1216 if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) { 1217 goto out; 1218 } 1219 break; 1220 } 1221 1222 TRACE("Request/Reply complete"); 1223 1224 done: 1225 nbd_request_put(req); 1226 return; 1227 1228 out: 1229 nbd_request_put(req); 1230 client_close(client); 1231 } 1232 1233 static void nbd_read(void *opaque) 1234 { 1235 NBDClient *client = opaque; 1236 1237 if (client->recv_coroutine) { 1238 qemu_coroutine_enter(client->recv_coroutine); 1239 } else { 1240 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip, client)); 1241 } 1242 } 1243 1244 static void nbd_restart_write(void *opaque) 1245 { 1246 NBDClient *client = opaque; 1247 1248 qemu_coroutine_enter(client->send_coroutine); 1249 } 1250 1251 static void nbd_set_handlers(NBDClient *client) 1252 { 1253 if (client->exp && client->exp->ctx) { 1254 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1255 true, 1256 client->can_read ? nbd_read : NULL, 1257 client->send_coroutine ? nbd_restart_write : NULL, 1258 client); 1259 } 1260 } 1261 1262 static void nbd_unset_handlers(NBDClient *client) 1263 { 1264 if (client->exp && client->exp->ctx) { 1265 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1266 true, NULL, NULL, NULL); 1267 } 1268 } 1269 1270 static void nbd_update_can_read(NBDClient *client) 1271 { 1272 bool can_read = client->recv_coroutine || 1273 client->nb_requests < MAX_NBD_REQUESTS; 1274 1275 if (can_read != client->can_read) { 1276 client->can_read = can_read; 1277 nbd_set_handlers(client); 1278 1279 /* There is no need to invoke aio_notify(), since aio_set_fd_handler() 1280 * in nbd_set_handlers() will have taken care of that */ 1281 } 1282 } 1283 1284 static coroutine_fn void nbd_co_client_start(void *opaque) 1285 { 1286 NBDClientNewData *data = opaque; 1287 NBDClient *client = data->client; 1288 NBDExport *exp = client->exp; 1289 1290 if (exp) { 1291 nbd_export_get(exp); 1292 } 1293 if (nbd_negotiate(data)) { 1294 client_close(client); 1295 goto out; 1296 } 1297 qemu_co_mutex_init(&client->send_lock); 1298 nbd_set_handlers(client); 1299 1300 if (exp) { 1301 QTAILQ_INSERT_TAIL(&exp->clients, client, next); 1302 } 1303 out: 1304 g_free(data); 1305 } 1306 1307 void nbd_client_new(NBDExport *exp, 1308 QIOChannelSocket *sioc, 1309 QCryptoTLSCreds *tlscreds, 1310 const char *tlsaclname, 1311 void (*close_fn)(NBDClient *)) 1312 { 1313 NBDClient *client; 1314 NBDClientNewData *data = g_new(NBDClientNewData, 1); 1315 1316 client = g_malloc0(sizeof(NBDClient)); 1317 client->refcount = 1; 1318 client->exp = exp; 1319 client->tlscreds = tlscreds; 1320 if (tlscreds) { 1321 object_ref(OBJECT(client->tlscreds)); 1322 } 1323 client->tlsaclname = g_strdup(tlsaclname); 1324 client->sioc = sioc; 1325 object_ref(OBJECT(client->sioc)); 1326 client->ioc = QIO_CHANNEL(sioc); 1327 object_ref(OBJECT(client->ioc)); 1328 client->can_read = true; 1329 client->close = close_fn; 1330 1331 data->client = client; 1332 data->co = qemu_coroutine_create(nbd_co_client_start, data); 1333 qemu_coroutine_enter(data->co); 1334 } 1335