1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device Server Side 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "nbd-internal.h" 21 22 static int system_errno_to_nbd_errno(int err) 23 { 24 switch (err) { 25 case 0: 26 return NBD_SUCCESS; 27 case EPERM: 28 return NBD_EPERM; 29 case EIO: 30 return NBD_EIO; 31 case ENOMEM: 32 return NBD_ENOMEM; 33 #ifdef EDQUOT 34 case EDQUOT: 35 #endif 36 case EFBIG: 37 case ENOSPC: 38 return NBD_ENOSPC; 39 case EINVAL: 40 default: 41 return NBD_EINVAL; 42 } 43 } 44 45 /* Definitions for opaque data types */ 46 47 typedef struct NBDRequest NBDRequest; 48 49 struct NBDRequest { 50 QSIMPLEQ_ENTRY(NBDRequest) entry; 51 NBDClient *client; 52 uint8_t *data; 53 }; 54 55 struct NBDExport { 56 int refcount; 57 void (*close)(NBDExport *exp); 58 59 BlockBackend *blk; 60 char *name; 61 off_t dev_offset; 62 off_t size; 63 uint32_t nbdflags; 64 QTAILQ_HEAD(, NBDClient) clients; 65 QTAILQ_ENTRY(NBDExport) next; 66 67 AioContext *ctx; 68 69 Notifier eject_notifier; 70 }; 71 72 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 73 74 struct NBDClient { 75 int refcount; 76 void (*close)(NBDClient *client); 77 78 NBDExport *exp; 79 QCryptoTLSCreds *tlscreds; 80 char *tlsaclname; 81 QIOChannelSocket *sioc; /* The underlying data channel */ 82 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 83 84 Coroutine *recv_coroutine; 85 86 CoMutex send_lock; 87 Coroutine *send_coroutine; 88 89 bool can_read; 90 91 QTAILQ_ENTRY(NBDClient) next; 92 int nb_requests; 93 bool closing; 94 }; 95 96 /* That's all folks */ 97 98 static void nbd_set_handlers(NBDClient *client); 99 static void nbd_unset_handlers(NBDClient *client); 100 static void nbd_update_can_read(NBDClient *client); 101 102 static gboolean nbd_negotiate_continue(QIOChannel *ioc, 103 GIOCondition condition, 104 void *opaque) 105 { 106 qemu_coroutine_enter(opaque, NULL); 107 return TRUE; 108 } 109 110 static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) 111 { 112 ssize_t ret; 113 guint watch; 114 115 assert(qemu_in_coroutine()); 116 /* Negotiation are always in main loop. */ 117 watch = qio_channel_add_watch(ioc, 118 G_IO_IN, 119 nbd_negotiate_continue, 120 qemu_coroutine_self(), 121 NULL); 122 ret = read_sync(ioc, buffer, size); 123 g_source_remove(watch); 124 return ret; 125 126 } 127 128 static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size) 129 { 130 ssize_t ret; 131 guint watch; 132 133 assert(qemu_in_coroutine()); 134 /* Negotiation are always in main loop. */ 135 watch = qio_channel_add_watch(ioc, 136 G_IO_OUT, 137 nbd_negotiate_continue, 138 qemu_coroutine_self(), 139 NULL); 140 ret = write_sync(ioc, buffer, size); 141 g_source_remove(watch); 142 return ret; 143 } 144 145 static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) 146 { 147 ssize_t ret, dropped = size; 148 uint8_t *buffer = g_malloc(MIN(65536, size)); 149 150 while (size > 0) { 151 ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size)); 152 if (ret < 0) { 153 g_free(buffer); 154 return ret; 155 } 156 157 assert(ret <= size); 158 size -= ret; 159 } 160 161 g_free(buffer); 162 return dropped; 163 } 164 165 /* Basic flow for negotiation 166 167 Server Client 168 Negotiate 169 170 or 171 172 Server Client 173 Negotiate #1 174 Option 175 Negotiate #2 176 177 ---- 178 179 followed by 180 181 Server Client 182 Request 183 Response 184 Request 185 Response 186 ... 187 ... 188 Request (type == 2) 189 190 */ 191 192 static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) 193 { 194 uint64_t magic; 195 uint32_t len; 196 197 TRACE("Reply opt=%x type=%x", type, opt); 198 199 magic = cpu_to_be64(NBD_REP_MAGIC); 200 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 201 LOG("write failed (rep magic)"); 202 return -EINVAL; 203 } 204 opt = cpu_to_be32(opt); 205 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 206 LOG("write failed (rep opt)"); 207 return -EINVAL; 208 } 209 type = cpu_to_be32(type); 210 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 211 LOG("write failed (rep type)"); 212 return -EINVAL; 213 } 214 len = cpu_to_be32(0); 215 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 216 LOG("write failed (rep data length)"); 217 return -EINVAL; 218 } 219 return 0; 220 } 221 222 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) 223 { 224 uint64_t magic, name_len; 225 uint32_t opt, type, len; 226 227 TRACE("Advertizing export name '%s'", exp->name ? exp->name : ""); 228 name_len = strlen(exp->name); 229 magic = cpu_to_be64(NBD_REP_MAGIC); 230 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 231 LOG("write failed (magic)"); 232 return -EINVAL; 233 } 234 opt = cpu_to_be32(NBD_OPT_LIST); 235 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 236 LOG("write failed (opt)"); 237 return -EINVAL; 238 } 239 type = cpu_to_be32(NBD_REP_SERVER); 240 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 241 LOG("write failed (reply type)"); 242 return -EINVAL; 243 } 244 len = cpu_to_be32(name_len + sizeof(len)); 245 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 246 LOG("write failed (length)"); 247 return -EINVAL; 248 } 249 len = cpu_to_be32(name_len); 250 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 251 LOG("write failed (length)"); 252 return -EINVAL; 253 } 254 if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) { 255 LOG("write failed (buffer)"); 256 return -EINVAL; 257 } 258 return 0; 259 } 260 261 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) 262 { 263 NBDExport *exp; 264 265 if (length) { 266 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 267 return -EIO; 268 } 269 return nbd_negotiate_send_rep(client->ioc, 270 NBD_REP_ERR_INVALID, NBD_OPT_LIST); 271 } 272 273 /* For each export, send a NBD_REP_SERVER reply. */ 274 QTAILQ_FOREACH(exp, &exports, next) { 275 if (nbd_negotiate_send_rep_list(client->ioc, exp)) { 276 return -EINVAL; 277 } 278 } 279 /* Finish with a NBD_REP_ACK. */ 280 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST); 281 } 282 283 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) 284 { 285 int rc = -EINVAL; 286 char name[256]; 287 288 /* Client sends: 289 [20 .. xx] export name (length bytes) 290 */ 291 TRACE("Checking length"); 292 if (length > 255) { 293 LOG("Bad length received"); 294 goto fail; 295 } 296 if (nbd_negotiate_read(client->ioc, name, length) != length) { 297 LOG("read failed"); 298 goto fail; 299 } 300 name[length] = '\0'; 301 302 TRACE("Client requested export '%s'", name); 303 304 client->exp = nbd_export_find(name); 305 if (!client->exp) { 306 LOG("export not found"); 307 goto fail; 308 } 309 310 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 311 nbd_export_get(client->exp); 312 rc = 0; 313 fail: 314 return rc; 315 } 316 317 318 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 319 uint32_t length) 320 { 321 QIOChannel *ioc; 322 QIOChannelTLS *tioc; 323 struct NBDTLSHandshakeData data = { 0 }; 324 325 TRACE("Setting up TLS"); 326 ioc = client->ioc; 327 if (length) { 328 if (nbd_negotiate_drop_sync(ioc, length) != length) { 329 return NULL; 330 } 331 nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS); 332 return NULL; 333 } 334 335 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_STARTTLS); 336 337 tioc = qio_channel_tls_new_server(ioc, 338 client->tlscreds, 339 client->tlsaclname, 340 NULL); 341 if (!tioc) { 342 return NULL; 343 } 344 345 TRACE("Starting TLS handshake"); 346 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 347 qio_channel_tls_handshake(tioc, 348 nbd_tls_handshake, 349 &data, 350 NULL); 351 352 if (!data.complete) { 353 g_main_loop_run(data.loop); 354 } 355 g_main_loop_unref(data.loop); 356 if (data.error) { 357 object_unref(OBJECT(tioc)); 358 error_free(data.error); 359 return NULL; 360 } 361 362 return QIO_CHANNEL(tioc); 363 } 364 365 366 static int nbd_negotiate_options(NBDClient *client) 367 { 368 uint32_t flags; 369 bool fixedNewstyle = false; 370 371 /* Client sends: 372 [ 0 .. 3] client flags 373 374 [ 0 .. 7] NBD_OPTS_MAGIC 375 [ 8 .. 11] NBD option 376 [12 .. 15] Data length 377 ... Rest of request 378 379 [ 0 .. 7] NBD_OPTS_MAGIC 380 [ 8 .. 11] Second NBD option 381 [12 .. 15] Data length 382 ... Rest of request 383 */ 384 385 if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) != 386 sizeof(flags)) { 387 LOG("read failed"); 388 return -EIO; 389 } 390 TRACE("Checking client flags"); 391 be32_to_cpus(&flags); 392 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 393 TRACE("Support supports fixed newstyle handshake"); 394 fixedNewstyle = true; 395 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 396 } 397 if (flags != 0) { 398 TRACE("Unknown client flags 0x%x received", flags); 399 return -EIO; 400 } 401 402 while (1) { 403 int ret; 404 uint32_t clientflags, length; 405 uint64_t magic; 406 407 if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) != 408 sizeof(magic)) { 409 LOG("read failed"); 410 return -EINVAL; 411 } 412 TRACE("Checking opts magic"); 413 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { 414 LOG("Bad magic received"); 415 return -EINVAL; 416 } 417 418 if (nbd_negotiate_read(client->ioc, &clientflags, 419 sizeof(clientflags)) != sizeof(clientflags)) { 420 LOG("read failed"); 421 return -EINVAL; 422 } 423 clientflags = be32_to_cpu(clientflags); 424 425 if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) != 426 sizeof(length)) { 427 LOG("read failed"); 428 return -EINVAL; 429 } 430 length = be32_to_cpu(length); 431 432 TRACE("Checking option 0x%x", clientflags); 433 if (client->tlscreds && 434 client->ioc == (QIOChannel *)client->sioc) { 435 QIOChannel *tioc; 436 if (!fixedNewstyle) { 437 TRACE("Unsupported option 0x%x", clientflags); 438 return -EINVAL; 439 } 440 switch (clientflags) { 441 case NBD_OPT_STARTTLS: 442 tioc = nbd_negotiate_handle_starttls(client, length); 443 if (!tioc) { 444 return -EIO; 445 } 446 object_unref(OBJECT(client->ioc)); 447 client->ioc = QIO_CHANNEL(tioc); 448 break; 449 450 default: 451 TRACE("Option 0x%x not permitted before TLS", clientflags); 452 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, 453 clientflags); 454 return -EINVAL; 455 } 456 } else if (fixedNewstyle) { 457 switch (clientflags) { 458 case NBD_OPT_LIST: 459 ret = nbd_negotiate_handle_list(client, length); 460 if (ret < 0) { 461 return ret; 462 } 463 break; 464 465 case NBD_OPT_ABORT: 466 return -EINVAL; 467 468 case NBD_OPT_EXPORT_NAME: 469 return nbd_negotiate_handle_export_name(client, length); 470 471 case NBD_OPT_STARTTLS: 472 if (client->tlscreds) { 473 TRACE("TLS already enabled"); 474 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_INVALID, 475 clientflags); 476 } else { 477 TRACE("TLS not configured"); 478 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_POLICY, 479 clientflags); 480 } 481 return -EINVAL; 482 default: 483 TRACE("Unsupported option 0x%x", clientflags); 484 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, 485 clientflags); 486 return -EINVAL; 487 } 488 } else { 489 /* 490 * If broken new-style we should drop the connection 491 * for anything except NBD_OPT_EXPORT_NAME 492 */ 493 switch (clientflags) { 494 case NBD_OPT_EXPORT_NAME: 495 return nbd_negotiate_handle_export_name(client, length); 496 497 default: 498 TRACE("Unsupported option 0x%x", clientflags); 499 return -EINVAL; 500 } 501 } 502 } 503 } 504 505 typedef struct { 506 NBDClient *client; 507 Coroutine *co; 508 } NBDClientNewData; 509 510 static coroutine_fn int nbd_negotiate(NBDClientNewData *data) 511 { 512 NBDClient *client = data->client; 513 char buf[8 + 8 + 8 + 128]; 514 int rc; 515 const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | 516 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); 517 bool oldStyle; 518 519 /* Old style negotiation header without options 520 [ 0 .. 7] passwd ("NBDMAGIC") 521 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 522 [16 .. 23] size 523 [24 .. 25] server flags (0) 524 [26 .. 27] export flags 525 [28 .. 151] reserved (0) 526 527 New style negotiation header with options 528 [ 0 .. 7] passwd ("NBDMAGIC") 529 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 530 [16 .. 17] server flags (0) 531 ....options sent.... 532 [18 .. 25] size 533 [26 .. 27] export flags 534 [28 .. 151] reserved (0) 535 */ 536 537 qio_channel_set_blocking(client->ioc, false, NULL); 538 rc = -EINVAL; 539 540 TRACE("Beginning negotiation."); 541 memset(buf, 0, sizeof(buf)); 542 memcpy(buf, "NBDMAGIC", 8); 543 544 oldStyle = client->exp != NULL && !client->tlscreds; 545 if (oldStyle) { 546 assert ((client->exp->nbdflags & ~65535) == 0); 547 stq_be_p(buf + 8, NBD_CLIENT_MAGIC); 548 stq_be_p(buf + 16, client->exp->size); 549 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 550 } else { 551 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 552 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE); 553 } 554 555 if (oldStyle) { 556 if (client->tlscreds) { 557 TRACE("TLS cannot be enabled with oldstyle protocol"); 558 goto fail; 559 } 560 if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) { 561 LOG("write failed"); 562 goto fail; 563 } 564 } else { 565 if (nbd_negotiate_write(client->ioc, buf, 18) != 18) { 566 LOG("write failed"); 567 goto fail; 568 } 569 rc = nbd_negotiate_options(client); 570 if (rc != 0) { 571 LOG("option negotiation failed"); 572 goto fail; 573 } 574 575 assert ((client->exp->nbdflags & ~65535) == 0); 576 stq_be_p(buf + 18, client->exp->size); 577 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 578 if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) != 579 sizeof(buf) - 18) { 580 LOG("write failed"); 581 goto fail; 582 } 583 } 584 585 TRACE("Negotiation succeeded."); 586 rc = 0; 587 fail: 588 return rc; 589 } 590 591 #ifdef __linux__ 592 593 int nbd_disconnect(int fd) 594 { 595 ioctl(fd, NBD_CLEAR_QUE); 596 ioctl(fd, NBD_DISCONNECT); 597 ioctl(fd, NBD_CLEAR_SOCK); 598 return 0; 599 } 600 601 #else 602 603 int nbd_disconnect(int fd) 604 { 605 return -ENOTSUP; 606 } 607 #endif 608 609 static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) 610 { 611 uint8_t buf[NBD_REQUEST_SIZE]; 612 uint32_t magic; 613 ssize_t ret; 614 615 ret = read_sync(ioc, buf, sizeof(buf)); 616 if (ret < 0) { 617 return ret; 618 } 619 620 if (ret != sizeof(buf)) { 621 LOG("read failed"); 622 return -EINVAL; 623 } 624 625 /* Request 626 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 627 [ 4 .. 7] type (0 == READ, 1 == WRITE) 628 [ 8 .. 15] handle 629 [16 .. 23] from 630 [24 .. 27] len 631 */ 632 633 magic = be32_to_cpup((uint32_t*)buf); 634 request->type = be32_to_cpup((uint32_t*)(buf + 4)); 635 request->handle = be64_to_cpup((uint64_t*)(buf + 8)); 636 request->from = be64_to_cpup((uint64_t*)(buf + 16)); 637 request->len = be32_to_cpup((uint32_t*)(buf + 24)); 638 639 TRACE("Got request: " 640 "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }", 641 magic, request->type, request->from, request->len); 642 643 if (magic != NBD_REQUEST_MAGIC) { 644 LOG("invalid magic (got 0x%x)", magic); 645 return -EINVAL; 646 } 647 return 0; 648 } 649 650 static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply) 651 { 652 uint8_t buf[NBD_REPLY_SIZE]; 653 ssize_t ret; 654 655 reply->error = system_errno_to_nbd_errno(reply->error); 656 657 /* Reply 658 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 659 [ 4 .. 7] error (0 == no error) 660 [ 7 .. 15] handle 661 */ 662 stl_be_p(buf, NBD_REPLY_MAGIC); 663 stl_be_p(buf + 4, reply->error); 664 stq_be_p(buf + 8, reply->handle); 665 666 TRACE("Sending response to client"); 667 668 ret = write_sync(ioc, buf, sizeof(buf)); 669 if (ret < 0) { 670 return ret; 671 } 672 673 if (ret != sizeof(buf)) { 674 LOG("writing to socket failed"); 675 return -EINVAL; 676 } 677 return 0; 678 } 679 680 #define MAX_NBD_REQUESTS 16 681 682 void nbd_client_get(NBDClient *client) 683 { 684 client->refcount++; 685 } 686 687 void nbd_client_put(NBDClient *client) 688 { 689 if (--client->refcount == 0) { 690 /* The last reference should be dropped by client->close, 691 * which is called by client_close. 692 */ 693 assert(client->closing); 694 695 nbd_unset_handlers(client); 696 object_unref(OBJECT(client->sioc)); 697 object_unref(OBJECT(client->ioc)); 698 if (client->tlscreds) { 699 object_unref(OBJECT(client->tlscreds)); 700 } 701 g_free(client->tlsaclname); 702 if (client->exp) { 703 QTAILQ_REMOVE(&client->exp->clients, client, next); 704 nbd_export_put(client->exp); 705 } 706 g_free(client); 707 } 708 } 709 710 static void client_close(NBDClient *client) 711 { 712 if (client->closing) { 713 return; 714 } 715 716 client->closing = true; 717 718 /* Force requests to finish. They will drop their own references, 719 * then we'll close the socket and free the NBDClient. 720 */ 721 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 722 NULL); 723 724 /* Also tell the client, so that they release their reference. */ 725 if (client->close) { 726 client->close(client); 727 } 728 } 729 730 static NBDRequest *nbd_request_get(NBDClient *client) 731 { 732 NBDRequest *req; 733 734 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 735 client->nb_requests++; 736 nbd_update_can_read(client); 737 738 req = g_new0(NBDRequest, 1); 739 nbd_client_get(client); 740 req->client = client; 741 return req; 742 } 743 744 static void nbd_request_put(NBDRequest *req) 745 { 746 NBDClient *client = req->client; 747 748 if (req->data) { 749 qemu_vfree(req->data); 750 } 751 g_free(req); 752 753 client->nb_requests--; 754 nbd_update_can_read(client); 755 nbd_client_put(client); 756 } 757 758 static void blk_aio_attached(AioContext *ctx, void *opaque) 759 { 760 NBDExport *exp = opaque; 761 NBDClient *client; 762 763 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx); 764 765 exp->ctx = ctx; 766 767 QTAILQ_FOREACH(client, &exp->clients, next) { 768 nbd_set_handlers(client); 769 } 770 } 771 772 static void blk_aio_detach(void *opaque) 773 { 774 NBDExport *exp = opaque; 775 NBDClient *client; 776 777 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx); 778 779 QTAILQ_FOREACH(client, &exp->clients, next) { 780 nbd_unset_handlers(client); 781 } 782 783 exp->ctx = NULL; 784 } 785 786 static void nbd_eject_notifier(Notifier *n, void *data) 787 { 788 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 789 nbd_export_close(exp); 790 } 791 792 NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, 793 uint32_t nbdflags, void (*close)(NBDExport *), 794 Error **errp) 795 { 796 NBDExport *exp = g_malloc0(sizeof(NBDExport)); 797 exp->refcount = 1; 798 QTAILQ_INIT(&exp->clients); 799 exp->blk = blk; 800 exp->dev_offset = dev_offset; 801 exp->nbdflags = nbdflags; 802 exp->size = size < 0 ? blk_getlength(blk) : size; 803 if (exp->size < 0) { 804 error_setg_errno(errp, -exp->size, 805 "Failed to determine the NBD export's length"); 806 goto fail; 807 } 808 exp->size -= exp->size % BDRV_SECTOR_SIZE; 809 810 exp->close = close; 811 exp->ctx = blk_get_aio_context(blk); 812 blk_ref(blk); 813 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 814 815 exp->eject_notifier.notify = nbd_eject_notifier; 816 blk_add_remove_bs_notifier(blk, &exp->eject_notifier); 817 818 /* 819 * NBD exports are used for non-shared storage migration. Make sure 820 * that BDRV_O_INACTIVE is cleared and the image is ready for write 821 * access since the export could be available before migration handover. 822 */ 823 aio_context_acquire(exp->ctx); 824 blk_invalidate_cache(blk, NULL); 825 aio_context_release(exp->ctx); 826 return exp; 827 828 fail: 829 g_free(exp); 830 return NULL; 831 } 832 833 NBDExport *nbd_export_find(const char *name) 834 { 835 NBDExport *exp; 836 QTAILQ_FOREACH(exp, &exports, next) { 837 if (strcmp(name, exp->name) == 0) { 838 return exp; 839 } 840 } 841 842 return NULL; 843 } 844 845 void nbd_export_set_name(NBDExport *exp, const char *name) 846 { 847 if (exp->name == name) { 848 return; 849 } 850 851 nbd_export_get(exp); 852 if (exp->name != NULL) { 853 g_free(exp->name); 854 exp->name = NULL; 855 QTAILQ_REMOVE(&exports, exp, next); 856 nbd_export_put(exp); 857 } 858 if (name != NULL) { 859 nbd_export_get(exp); 860 exp->name = g_strdup(name); 861 QTAILQ_INSERT_TAIL(&exports, exp, next); 862 } 863 nbd_export_put(exp); 864 } 865 866 void nbd_export_close(NBDExport *exp) 867 { 868 NBDClient *client, *next; 869 870 nbd_export_get(exp); 871 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 872 client_close(client); 873 } 874 nbd_export_set_name(exp, NULL); 875 nbd_export_put(exp); 876 } 877 878 void nbd_export_get(NBDExport *exp) 879 { 880 assert(exp->refcount > 0); 881 exp->refcount++; 882 } 883 884 void nbd_export_put(NBDExport *exp) 885 { 886 assert(exp->refcount > 0); 887 if (exp->refcount == 1) { 888 nbd_export_close(exp); 889 } 890 891 if (--exp->refcount == 0) { 892 assert(exp->name == NULL); 893 894 if (exp->close) { 895 exp->close(exp); 896 } 897 898 if (exp->blk) { 899 notifier_remove(&exp->eject_notifier); 900 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 901 blk_aio_detach, exp); 902 blk_unref(exp->blk); 903 exp->blk = NULL; 904 } 905 906 g_free(exp); 907 } 908 } 909 910 BlockBackend *nbd_export_get_blockdev(NBDExport *exp) 911 { 912 return exp->blk; 913 } 914 915 void nbd_export_close_all(void) 916 { 917 NBDExport *exp, *next; 918 919 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { 920 nbd_export_close(exp); 921 } 922 } 923 924 static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, 925 int len) 926 { 927 NBDClient *client = req->client; 928 ssize_t rc, ret; 929 930 g_assert(qemu_in_coroutine()); 931 qemu_co_mutex_lock(&client->send_lock); 932 client->send_coroutine = qemu_coroutine_self(); 933 nbd_set_handlers(client); 934 935 if (!len) { 936 rc = nbd_send_reply(client->ioc, reply); 937 } else { 938 qio_channel_set_cork(client->ioc, true); 939 rc = nbd_send_reply(client->ioc, reply); 940 if (rc >= 0) { 941 ret = write_sync(client->ioc, req->data, len); 942 if (ret != len) { 943 rc = -EIO; 944 } 945 } 946 qio_channel_set_cork(client->ioc, false); 947 } 948 949 client->send_coroutine = NULL; 950 nbd_set_handlers(client); 951 qemu_co_mutex_unlock(&client->send_lock); 952 return rc; 953 } 954 955 static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request) 956 { 957 NBDClient *client = req->client; 958 uint32_t command; 959 ssize_t rc; 960 961 g_assert(qemu_in_coroutine()); 962 client->recv_coroutine = qemu_coroutine_self(); 963 nbd_update_can_read(client); 964 965 rc = nbd_receive_request(client->ioc, request); 966 if (rc < 0) { 967 if (rc != -EAGAIN) { 968 rc = -EIO; 969 } 970 goto out; 971 } 972 973 if ((request->from + request->len) < request->from) { 974 LOG("integer overflow detected! " 975 "you're probably being attacked"); 976 rc = -EINVAL; 977 goto out; 978 } 979 980 TRACE("Decoding type"); 981 982 command = request->type & NBD_CMD_MASK_COMMAND; 983 if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { 984 if (request->len > NBD_MAX_BUFFER_SIZE) { 985 LOG("len (%u) is larger than max len (%u)", 986 request->len, NBD_MAX_BUFFER_SIZE); 987 rc = -EINVAL; 988 goto out; 989 } 990 991 req->data = blk_try_blockalign(client->exp->blk, request->len); 992 if (req->data == NULL) { 993 rc = -ENOMEM; 994 goto out; 995 } 996 } 997 if (command == NBD_CMD_WRITE) { 998 TRACE("Reading %u byte(s)", request->len); 999 1000 if (read_sync(client->ioc, req->data, request->len) != request->len) { 1001 LOG("reading from socket failed"); 1002 rc = -EIO; 1003 goto out; 1004 } 1005 } 1006 rc = 0; 1007 1008 out: 1009 client->recv_coroutine = NULL; 1010 nbd_update_can_read(client); 1011 1012 return rc; 1013 } 1014 1015 static void nbd_trip(void *opaque) 1016 { 1017 NBDClient *client = opaque; 1018 NBDExport *exp = client->exp; 1019 NBDRequest *req; 1020 struct nbd_request request; 1021 struct nbd_reply reply; 1022 ssize_t ret; 1023 uint32_t command; 1024 1025 TRACE("Reading request."); 1026 if (client->closing) { 1027 return; 1028 } 1029 1030 req = nbd_request_get(client); 1031 ret = nbd_co_receive_request(req, &request); 1032 if (ret == -EAGAIN) { 1033 goto done; 1034 } 1035 if (ret == -EIO) { 1036 goto out; 1037 } 1038 1039 reply.handle = request.handle; 1040 reply.error = 0; 1041 1042 if (ret < 0) { 1043 reply.error = -ret; 1044 goto error_reply; 1045 } 1046 command = request.type & NBD_CMD_MASK_COMMAND; 1047 if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) { 1048 LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64 1049 ", Offset: %" PRIu64 "\n", 1050 request.from, request.len, 1051 (uint64_t)exp->size, (uint64_t)exp->dev_offset); 1052 LOG("requested operation past EOF--bad client?"); 1053 goto invalid_request; 1054 } 1055 1056 if (client->closing) { 1057 /* 1058 * The client may be closed when we are blocked in 1059 * nbd_co_receive_request() 1060 */ 1061 goto done; 1062 } 1063 1064 switch (command) { 1065 case NBD_CMD_READ: 1066 TRACE("Request type is READ"); 1067 1068 if (request.type & NBD_CMD_FLAG_FUA) { 1069 ret = blk_co_flush(exp->blk); 1070 if (ret < 0) { 1071 LOG("flush failed"); 1072 reply.error = -ret; 1073 goto error_reply; 1074 } 1075 } 1076 1077 ret = blk_read(exp->blk, 1078 (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, 1079 req->data, request.len / BDRV_SECTOR_SIZE); 1080 if (ret < 0) { 1081 LOG("reading from file failed"); 1082 reply.error = -ret; 1083 goto error_reply; 1084 } 1085 1086 TRACE("Read %u byte(s)", request.len); 1087 if (nbd_co_send_reply(req, &reply, request.len) < 0) 1088 goto out; 1089 break; 1090 case NBD_CMD_WRITE: 1091 TRACE("Request type is WRITE"); 1092 1093 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1094 TRACE("Server is read-only, return error"); 1095 reply.error = EROFS; 1096 goto error_reply; 1097 } 1098 1099 TRACE("Writing to device"); 1100 1101 ret = blk_write(exp->blk, 1102 (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, 1103 req->data, request.len / BDRV_SECTOR_SIZE); 1104 if (ret < 0) { 1105 LOG("writing to file failed"); 1106 reply.error = -ret; 1107 goto error_reply; 1108 } 1109 1110 if (request.type & NBD_CMD_FLAG_FUA) { 1111 ret = blk_co_flush(exp->blk); 1112 if (ret < 0) { 1113 LOG("flush failed"); 1114 reply.error = -ret; 1115 goto error_reply; 1116 } 1117 } 1118 1119 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1120 goto out; 1121 } 1122 break; 1123 case NBD_CMD_DISC: 1124 TRACE("Request type is DISCONNECT"); 1125 errno = 0; 1126 goto out; 1127 case NBD_CMD_FLUSH: 1128 TRACE("Request type is FLUSH"); 1129 1130 ret = blk_co_flush(exp->blk); 1131 if (ret < 0) { 1132 LOG("flush failed"); 1133 reply.error = -ret; 1134 } 1135 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1136 goto out; 1137 } 1138 break; 1139 case NBD_CMD_TRIM: 1140 TRACE("Request type is TRIM"); 1141 ret = blk_co_discard(exp->blk, (request.from + exp->dev_offset) 1142 / BDRV_SECTOR_SIZE, 1143 request.len / BDRV_SECTOR_SIZE); 1144 if (ret < 0) { 1145 LOG("discard failed"); 1146 reply.error = -ret; 1147 } 1148 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1149 goto out; 1150 } 1151 break; 1152 default: 1153 LOG("invalid request type (%u) received", request.type); 1154 invalid_request: 1155 reply.error = EINVAL; 1156 error_reply: 1157 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1158 goto out; 1159 } 1160 break; 1161 } 1162 1163 TRACE("Request/Reply complete"); 1164 1165 done: 1166 nbd_request_put(req); 1167 return; 1168 1169 out: 1170 nbd_request_put(req); 1171 client_close(client); 1172 } 1173 1174 static void nbd_read(void *opaque) 1175 { 1176 NBDClient *client = opaque; 1177 1178 if (client->recv_coroutine) { 1179 qemu_coroutine_enter(client->recv_coroutine, NULL); 1180 } else { 1181 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client); 1182 } 1183 } 1184 1185 static void nbd_restart_write(void *opaque) 1186 { 1187 NBDClient *client = opaque; 1188 1189 qemu_coroutine_enter(client->send_coroutine, NULL); 1190 } 1191 1192 static void nbd_set_handlers(NBDClient *client) 1193 { 1194 if (client->exp && client->exp->ctx) { 1195 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1196 true, 1197 client->can_read ? nbd_read : NULL, 1198 client->send_coroutine ? nbd_restart_write : NULL, 1199 client); 1200 } 1201 } 1202 1203 static void nbd_unset_handlers(NBDClient *client) 1204 { 1205 if (client->exp && client->exp->ctx) { 1206 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1207 true, NULL, NULL, NULL); 1208 } 1209 } 1210 1211 static void nbd_update_can_read(NBDClient *client) 1212 { 1213 bool can_read = client->recv_coroutine || 1214 client->nb_requests < MAX_NBD_REQUESTS; 1215 1216 if (can_read != client->can_read) { 1217 client->can_read = can_read; 1218 nbd_set_handlers(client); 1219 1220 /* There is no need to invoke aio_notify(), since aio_set_fd_handler() 1221 * in nbd_set_handlers() will have taken care of that */ 1222 } 1223 } 1224 1225 static coroutine_fn void nbd_co_client_start(void *opaque) 1226 { 1227 NBDClientNewData *data = opaque; 1228 NBDClient *client = data->client; 1229 NBDExport *exp = client->exp; 1230 1231 if (exp) { 1232 nbd_export_get(exp); 1233 } 1234 if (nbd_negotiate(data)) { 1235 client_close(client); 1236 goto out; 1237 } 1238 qemu_co_mutex_init(&client->send_lock); 1239 nbd_set_handlers(client); 1240 1241 if (exp) { 1242 QTAILQ_INSERT_TAIL(&exp->clients, client, next); 1243 } 1244 out: 1245 g_free(data); 1246 } 1247 1248 void nbd_client_new(NBDExport *exp, 1249 QIOChannelSocket *sioc, 1250 QCryptoTLSCreds *tlscreds, 1251 const char *tlsaclname, 1252 void (*close_fn)(NBDClient *)) 1253 { 1254 NBDClient *client; 1255 NBDClientNewData *data = g_new(NBDClientNewData, 1); 1256 1257 client = g_malloc0(sizeof(NBDClient)); 1258 client->refcount = 1; 1259 client->exp = exp; 1260 client->tlscreds = tlscreds; 1261 if (tlscreds) { 1262 object_ref(OBJECT(client->tlscreds)); 1263 } 1264 client->tlsaclname = g_strdup(tlsaclname); 1265 client->sioc = sioc; 1266 object_ref(OBJECT(client->sioc)); 1267 client->ioc = QIO_CHANNEL(sioc); 1268 object_ref(OBJECT(client->ioc)); 1269 client->can_read = true; 1270 client->close = close_fn; 1271 1272 data->client = client; 1273 data->co = qemu_coroutine_create(nbd_co_client_start); 1274 qemu_coroutine_enter(data->co, data); 1275 } 1276