1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device Server Side 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "nbd-internal.h" 22 23 static int system_errno_to_nbd_errno(int err) 24 { 25 switch (err) { 26 case 0: 27 return NBD_SUCCESS; 28 case EPERM: 29 return NBD_EPERM; 30 case EIO: 31 return NBD_EIO; 32 case ENOMEM: 33 return NBD_ENOMEM; 34 #ifdef EDQUOT 35 case EDQUOT: 36 #endif 37 case EFBIG: 38 case ENOSPC: 39 return NBD_ENOSPC; 40 case EINVAL: 41 default: 42 return NBD_EINVAL; 43 } 44 } 45 46 /* Definitions for opaque data types */ 47 48 typedef struct NBDRequest NBDRequest; 49 50 struct NBDRequest { 51 QSIMPLEQ_ENTRY(NBDRequest) entry; 52 NBDClient *client; 53 uint8_t *data; 54 }; 55 56 struct NBDExport { 57 int refcount; 58 void (*close)(NBDExport *exp); 59 60 BlockBackend *blk; 61 char *name; 62 off_t dev_offset; 63 off_t size; 64 uint32_t nbdflags; 65 QTAILQ_HEAD(, NBDClient) clients; 66 QTAILQ_ENTRY(NBDExport) next; 67 68 AioContext *ctx; 69 70 Notifier eject_notifier; 71 }; 72 73 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 74 75 struct NBDClient { 76 int refcount; 77 void (*close)(NBDClient *client); 78 79 NBDExport *exp; 80 QCryptoTLSCreds *tlscreds; 81 char *tlsaclname; 82 QIOChannelSocket *sioc; /* The underlying data channel */ 83 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 84 85 Coroutine *recv_coroutine; 86 87 CoMutex send_lock; 88 Coroutine *send_coroutine; 89 90 bool can_read; 91 92 QTAILQ_ENTRY(NBDClient) next; 93 int nb_requests; 94 bool closing; 95 }; 96 97 /* That's all folks */ 98 99 static void nbd_set_handlers(NBDClient *client); 100 static void nbd_unset_handlers(NBDClient *client); 101 static void nbd_update_can_read(NBDClient *client); 102 103 static gboolean nbd_negotiate_continue(QIOChannel *ioc, 104 GIOCondition condition, 105 void *opaque) 106 { 107 qemu_coroutine_enter(opaque, NULL); 108 return TRUE; 109 } 110 111 static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) 112 { 113 ssize_t ret; 114 guint watch; 115 116 assert(qemu_in_coroutine()); 117 /* Negotiation are always in main loop. */ 118 watch = qio_channel_add_watch(ioc, 119 G_IO_IN, 120 nbd_negotiate_continue, 121 qemu_coroutine_self(), 122 NULL); 123 ret = read_sync(ioc, buffer, size); 124 g_source_remove(watch); 125 return ret; 126 127 } 128 129 static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size) 130 { 131 ssize_t ret; 132 guint watch; 133 134 assert(qemu_in_coroutine()); 135 /* Negotiation are always in main loop. */ 136 watch = qio_channel_add_watch(ioc, 137 G_IO_OUT, 138 nbd_negotiate_continue, 139 qemu_coroutine_self(), 140 NULL); 141 ret = write_sync(ioc, buffer, size); 142 g_source_remove(watch); 143 return ret; 144 } 145 146 static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) 147 { 148 ssize_t ret, dropped = size; 149 uint8_t *buffer = g_malloc(MIN(65536, size)); 150 151 while (size > 0) { 152 ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size)); 153 if (ret < 0) { 154 g_free(buffer); 155 return ret; 156 } 157 158 assert(ret <= size); 159 size -= ret; 160 } 161 162 g_free(buffer); 163 return dropped; 164 } 165 166 /* Basic flow for negotiation 167 168 Server Client 169 Negotiate 170 171 or 172 173 Server Client 174 Negotiate #1 175 Option 176 Negotiate #2 177 178 ---- 179 180 followed by 181 182 Server Client 183 Request 184 Response 185 Request 186 Response 187 ... 188 ... 189 Request (type == 2) 190 191 */ 192 193 static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) 194 { 195 uint64_t magic; 196 uint32_t len; 197 198 TRACE("Reply opt=%x type=%x", type, opt); 199 200 magic = cpu_to_be64(NBD_REP_MAGIC); 201 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 202 LOG("write failed (rep magic)"); 203 return -EINVAL; 204 } 205 opt = cpu_to_be32(opt); 206 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 207 LOG("write failed (rep opt)"); 208 return -EINVAL; 209 } 210 type = cpu_to_be32(type); 211 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 212 LOG("write failed (rep type)"); 213 return -EINVAL; 214 } 215 len = cpu_to_be32(0); 216 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 217 LOG("write failed (rep data length)"); 218 return -EINVAL; 219 } 220 return 0; 221 } 222 223 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) 224 { 225 uint64_t magic, name_len; 226 uint32_t opt, type, len; 227 228 TRACE("Advertizing export name '%s'", exp->name ? exp->name : ""); 229 name_len = strlen(exp->name); 230 magic = cpu_to_be64(NBD_REP_MAGIC); 231 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 232 LOG("write failed (magic)"); 233 return -EINVAL; 234 } 235 opt = cpu_to_be32(NBD_OPT_LIST); 236 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 237 LOG("write failed (opt)"); 238 return -EINVAL; 239 } 240 type = cpu_to_be32(NBD_REP_SERVER); 241 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 242 LOG("write failed (reply type)"); 243 return -EINVAL; 244 } 245 len = cpu_to_be32(name_len + sizeof(len)); 246 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 247 LOG("write failed (length)"); 248 return -EINVAL; 249 } 250 len = cpu_to_be32(name_len); 251 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 252 LOG("write failed (length)"); 253 return -EINVAL; 254 } 255 if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) { 256 LOG("write failed (buffer)"); 257 return -EINVAL; 258 } 259 return 0; 260 } 261 262 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) 263 { 264 NBDExport *exp; 265 266 if (length) { 267 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 268 return -EIO; 269 } 270 return nbd_negotiate_send_rep(client->ioc, 271 NBD_REP_ERR_INVALID, NBD_OPT_LIST); 272 } 273 274 /* For each export, send a NBD_REP_SERVER reply. */ 275 QTAILQ_FOREACH(exp, &exports, next) { 276 if (nbd_negotiate_send_rep_list(client->ioc, exp)) { 277 return -EINVAL; 278 } 279 } 280 /* Finish with a NBD_REP_ACK. */ 281 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST); 282 } 283 284 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) 285 { 286 int rc = -EINVAL; 287 char name[256]; 288 289 /* Client sends: 290 [20 .. xx] export name (length bytes) 291 */ 292 TRACE("Checking length"); 293 if (length > 255) { 294 LOG("Bad length received"); 295 goto fail; 296 } 297 if (nbd_negotiate_read(client->ioc, name, length) != length) { 298 LOG("read failed"); 299 goto fail; 300 } 301 name[length] = '\0'; 302 303 TRACE("Client requested export '%s'", name); 304 305 client->exp = nbd_export_find(name); 306 if (!client->exp) { 307 LOG("export not found"); 308 goto fail; 309 } 310 311 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 312 nbd_export_get(client->exp); 313 rc = 0; 314 fail: 315 return rc; 316 } 317 318 319 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 320 uint32_t length) 321 { 322 QIOChannel *ioc; 323 QIOChannelTLS *tioc; 324 struct NBDTLSHandshakeData data = { 0 }; 325 326 TRACE("Setting up TLS"); 327 ioc = client->ioc; 328 if (length) { 329 if (nbd_negotiate_drop_sync(ioc, length) != length) { 330 return NULL; 331 } 332 nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS); 333 return NULL; 334 } 335 336 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_STARTTLS); 337 338 tioc = qio_channel_tls_new_server(ioc, 339 client->tlscreds, 340 client->tlsaclname, 341 NULL); 342 if (!tioc) { 343 return NULL; 344 } 345 346 TRACE("Starting TLS handshake"); 347 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 348 qio_channel_tls_handshake(tioc, 349 nbd_tls_handshake, 350 &data, 351 NULL); 352 353 if (!data.complete) { 354 g_main_loop_run(data.loop); 355 } 356 g_main_loop_unref(data.loop); 357 if (data.error) { 358 object_unref(OBJECT(tioc)); 359 error_free(data.error); 360 return NULL; 361 } 362 363 return QIO_CHANNEL(tioc); 364 } 365 366 367 static int nbd_negotiate_options(NBDClient *client) 368 { 369 uint32_t flags; 370 bool fixedNewstyle = false; 371 372 /* Client sends: 373 [ 0 .. 3] client flags 374 375 [ 0 .. 7] NBD_OPTS_MAGIC 376 [ 8 .. 11] NBD option 377 [12 .. 15] Data length 378 ... Rest of request 379 380 [ 0 .. 7] NBD_OPTS_MAGIC 381 [ 8 .. 11] Second NBD option 382 [12 .. 15] Data length 383 ... Rest of request 384 */ 385 386 if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) != 387 sizeof(flags)) { 388 LOG("read failed"); 389 return -EIO; 390 } 391 TRACE("Checking client flags"); 392 be32_to_cpus(&flags); 393 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 394 TRACE("Support supports fixed newstyle handshake"); 395 fixedNewstyle = true; 396 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 397 } 398 if (flags != 0) { 399 TRACE("Unknown client flags 0x%x received", flags); 400 return -EIO; 401 } 402 403 while (1) { 404 int ret; 405 uint32_t clientflags, length; 406 uint64_t magic; 407 408 if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) != 409 sizeof(magic)) { 410 LOG("read failed"); 411 return -EINVAL; 412 } 413 TRACE("Checking opts magic"); 414 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { 415 LOG("Bad magic received"); 416 return -EINVAL; 417 } 418 419 if (nbd_negotiate_read(client->ioc, &clientflags, 420 sizeof(clientflags)) != sizeof(clientflags)) { 421 LOG("read failed"); 422 return -EINVAL; 423 } 424 clientflags = be32_to_cpu(clientflags); 425 426 if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) != 427 sizeof(length)) { 428 LOG("read failed"); 429 return -EINVAL; 430 } 431 length = be32_to_cpu(length); 432 433 TRACE("Checking option 0x%x", clientflags); 434 if (client->tlscreds && 435 client->ioc == (QIOChannel *)client->sioc) { 436 QIOChannel *tioc; 437 if (!fixedNewstyle) { 438 TRACE("Unsupported option 0x%x", clientflags); 439 return -EINVAL; 440 } 441 switch (clientflags) { 442 case NBD_OPT_STARTTLS: 443 tioc = nbd_negotiate_handle_starttls(client, length); 444 if (!tioc) { 445 return -EIO; 446 } 447 object_unref(OBJECT(client->ioc)); 448 client->ioc = QIO_CHANNEL(tioc); 449 break; 450 451 default: 452 TRACE("Option 0x%x not permitted before TLS", clientflags); 453 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, 454 clientflags); 455 return -EINVAL; 456 } 457 } else if (fixedNewstyle) { 458 switch (clientflags) { 459 case NBD_OPT_LIST: 460 ret = nbd_negotiate_handle_list(client, length); 461 if (ret < 0) { 462 return ret; 463 } 464 break; 465 466 case NBD_OPT_ABORT: 467 return -EINVAL; 468 469 case NBD_OPT_EXPORT_NAME: 470 return nbd_negotiate_handle_export_name(client, length); 471 472 case NBD_OPT_STARTTLS: 473 if (client->tlscreds) { 474 TRACE("TLS already enabled"); 475 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_INVALID, 476 clientflags); 477 } else { 478 TRACE("TLS not configured"); 479 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_POLICY, 480 clientflags); 481 } 482 return -EINVAL; 483 default: 484 TRACE("Unsupported option 0x%x", clientflags); 485 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, 486 clientflags); 487 return -EINVAL; 488 } 489 } else { 490 /* 491 * If broken new-style we should drop the connection 492 * for anything except NBD_OPT_EXPORT_NAME 493 */ 494 switch (clientflags) { 495 case NBD_OPT_EXPORT_NAME: 496 return nbd_negotiate_handle_export_name(client, length); 497 498 default: 499 TRACE("Unsupported option 0x%x", clientflags); 500 return -EINVAL; 501 } 502 } 503 } 504 } 505 506 typedef struct { 507 NBDClient *client; 508 Coroutine *co; 509 } NBDClientNewData; 510 511 static coroutine_fn int nbd_negotiate(NBDClientNewData *data) 512 { 513 NBDClient *client = data->client; 514 char buf[8 + 8 + 8 + 128]; 515 int rc; 516 const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | 517 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); 518 bool oldStyle; 519 520 /* Old style negotiation header without options 521 [ 0 .. 7] passwd ("NBDMAGIC") 522 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 523 [16 .. 23] size 524 [24 .. 25] server flags (0) 525 [26 .. 27] export flags 526 [28 .. 151] reserved (0) 527 528 New style negotiation header with options 529 [ 0 .. 7] passwd ("NBDMAGIC") 530 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 531 [16 .. 17] server flags (0) 532 ....options sent.... 533 [18 .. 25] size 534 [26 .. 27] export flags 535 [28 .. 151] reserved (0) 536 */ 537 538 qio_channel_set_blocking(client->ioc, false, NULL); 539 rc = -EINVAL; 540 541 TRACE("Beginning negotiation."); 542 memset(buf, 0, sizeof(buf)); 543 memcpy(buf, "NBDMAGIC", 8); 544 545 oldStyle = client->exp != NULL && !client->tlscreds; 546 if (oldStyle) { 547 assert ((client->exp->nbdflags & ~65535) == 0); 548 stq_be_p(buf + 8, NBD_CLIENT_MAGIC); 549 stq_be_p(buf + 16, client->exp->size); 550 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 551 } else { 552 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 553 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE); 554 } 555 556 if (oldStyle) { 557 if (client->tlscreds) { 558 TRACE("TLS cannot be enabled with oldstyle protocol"); 559 goto fail; 560 } 561 if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) { 562 LOG("write failed"); 563 goto fail; 564 } 565 } else { 566 if (nbd_negotiate_write(client->ioc, buf, 18) != 18) { 567 LOG("write failed"); 568 goto fail; 569 } 570 rc = nbd_negotiate_options(client); 571 if (rc != 0) { 572 LOG("option negotiation failed"); 573 goto fail; 574 } 575 576 assert ((client->exp->nbdflags & ~65535) == 0); 577 stq_be_p(buf + 18, client->exp->size); 578 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 579 if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) != 580 sizeof(buf) - 18) { 581 LOG("write failed"); 582 goto fail; 583 } 584 } 585 586 TRACE("Negotiation succeeded."); 587 rc = 0; 588 fail: 589 return rc; 590 } 591 592 #ifdef __linux__ 593 594 int nbd_disconnect(int fd) 595 { 596 ioctl(fd, NBD_CLEAR_QUE); 597 ioctl(fd, NBD_DISCONNECT); 598 ioctl(fd, NBD_CLEAR_SOCK); 599 return 0; 600 } 601 602 #else 603 604 int nbd_disconnect(int fd) 605 { 606 return -ENOTSUP; 607 } 608 #endif 609 610 static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) 611 { 612 uint8_t buf[NBD_REQUEST_SIZE]; 613 uint32_t magic; 614 ssize_t ret; 615 616 ret = read_sync(ioc, buf, sizeof(buf)); 617 if (ret < 0) { 618 return ret; 619 } 620 621 if (ret != sizeof(buf)) { 622 LOG("read failed"); 623 return -EINVAL; 624 } 625 626 /* Request 627 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 628 [ 4 .. 7] type (0 == READ, 1 == WRITE) 629 [ 8 .. 15] handle 630 [16 .. 23] from 631 [24 .. 27] len 632 */ 633 634 magic = be32_to_cpup((uint32_t*)buf); 635 request->type = be32_to_cpup((uint32_t*)(buf + 4)); 636 request->handle = be64_to_cpup((uint64_t*)(buf + 8)); 637 request->from = be64_to_cpup((uint64_t*)(buf + 16)); 638 request->len = be32_to_cpup((uint32_t*)(buf + 24)); 639 640 TRACE("Got request: " 641 "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }", 642 magic, request->type, request->from, request->len); 643 644 if (magic != NBD_REQUEST_MAGIC) { 645 LOG("invalid magic (got 0x%x)", magic); 646 return -EINVAL; 647 } 648 return 0; 649 } 650 651 static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply) 652 { 653 uint8_t buf[NBD_REPLY_SIZE]; 654 ssize_t ret; 655 656 reply->error = system_errno_to_nbd_errno(reply->error); 657 658 /* Reply 659 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 660 [ 4 .. 7] error (0 == no error) 661 [ 7 .. 15] handle 662 */ 663 stl_be_p(buf, NBD_REPLY_MAGIC); 664 stl_be_p(buf + 4, reply->error); 665 stq_be_p(buf + 8, reply->handle); 666 667 TRACE("Sending response to client"); 668 669 ret = write_sync(ioc, buf, sizeof(buf)); 670 if (ret < 0) { 671 return ret; 672 } 673 674 if (ret != sizeof(buf)) { 675 LOG("writing to socket failed"); 676 return -EINVAL; 677 } 678 return 0; 679 } 680 681 #define MAX_NBD_REQUESTS 16 682 683 void nbd_client_get(NBDClient *client) 684 { 685 client->refcount++; 686 } 687 688 void nbd_client_put(NBDClient *client) 689 { 690 if (--client->refcount == 0) { 691 /* The last reference should be dropped by client->close, 692 * which is called by client_close. 693 */ 694 assert(client->closing); 695 696 nbd_unset_handlers(client); 697 object_unref(OBJECT(client->sioc)); 698 object_unref(OBJECT(client->ioc)); 699 if (client->tlscreds) { 700 object_unref(OBJECT(client->tlscreds)); 701 } 702 g_free(client->tlsaclname); 703 if (client->exp) { 704 QTAILQ_REMOVE(&client->exp->clients, client, next); 705 nbd_export_put(client->exp); 706 } 707 g_free(client); 708 } 709 } 710 711 static void client_close(NBDClient *client) 712 { 713 if (client->closing) { 714 return; 715 } 716 717 client->closing = true; 718 719 /* Force requests to finish. They will drop their own references, 720 * then we'll close the socket and free the NBDClient. 721 */ 722 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 723 NULL); 724 725 /* Also tell the client, so that they release their reference. */ 726 if (client->close) { 727 client->close(client); 728 } 729 } 730 731 static NBDRequest *nbd_request_get(NBDClient *client) 732 { 733 NBDRequest *req; 734 735 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 736 client->nb_requests++; 737 nbd_update_can_read(client); 738 739 req = g_new0(NBDRequest, 1); 740 nbd_client_get(client); 741 req->client = client; 742 return req; 743 } 744 745 static void nbd_request_put(NBDRequest *req) 746 { 747 NBDClient *client = req->client; 748 749 if (req->data) { 750 qemu_vfree(req->data); 751 } 752 g_free(req); 753 754 client->nb_requests--; 755 nbd_update_can_read(client); 756 nbd_client_put(client); 757 } 758 759 static void blk_aio_attached(AioContext *ctx, void *opaque) 760 { 761 NBDExport *exp = opaque; 762 NBDClient *client; 763 764 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx); 765 766 exp->ctx = ctx; 767 768 QTAILQ_FOREACH(client, &exp->clients, next) { 769 nbd_set_handlers(client); 770 } 771 } 772 773 static void blk_aio_detach(void *opaque) 774 { 775 NBDExport *exp = opaque; 776 NBDClient *client; 777 778 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx); 779 780 QTAILQ_FOREACH(client, &exp->clients, next) { 781 nbd_unset_handlers(client); 782 } 783 784 exp->ctx = NULL; 785 } 786 787 static void nbd_eject_notifier(Notifier *n, void *data) 788 { 789 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 790 nbd_export_close(exp); 791 } 792 793 NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, 794 uint32_t nbdflags, void (*close)(NBDExport *), 795 Error **errp) 796 { 797 NBDExport *exp = g_malloc0(sizeof(NBDExport)); 798 exp->refcount = 1; 799 QTAILQ_INIT(&exp->clients); 800 exp->blk = blk; 801 exp->dev_offset = dev_offset; 802 exp->nbdflags = nbdflags; 803 exp->size = size < 0 ? blk_getlength(blk) : size; 804 if (exp->size < 0) { 805 error_setg_errno(errp, -exp->size, 806 "Failed to determine the NBD export's length"); 807 goto fail; 808 } 809 exp->size -= exp->size % BDRV_SECTOR_SIZE; 810 811 exp->close = close; 812 exp->ctx = blk_get_aio_context(blk); 813 blk_ref(blk); 814 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 815 816 exp->eject_notifier.notify = nbd_eject_notifier; 817 blk_add_remove_bs_notifier(blk, &exp->eject_notifier); 818 819 /* 820 * NBD exports are used for non-shared storage migration. Make sure 821 * that BDRV_O_INACTIVE is cleared and the image is ready for write 822 * access since the export could be available before migration handover. 823 */ 824 aio_context_acquire(exp->ctx); 825 blk_invalidate_cache(blk, NULL); 826 aio_context_release(exp->ctx); 827 return exp; 828 829 fail: 830 g_free(exp); 831 return NULL; 832 } 833 834 NBDExport *nbd_export_find(const char *name) 835 { 836 NBDExport *exp; 837 QTAILQ_FOREACH(exp, &exports, next) { 838 if (strcmp(name, exp->name) == 0) { 839 return exp; 840 } 841 } 842 843 return NULL; 844 } 845 846 void nbd_export_set_name(NBDExport *exp, const char *name) 847 { 848 if (exp->name == name) { 849 return; 850 } 851 852 nbd_export_get(exp); 853 if (exp->name != NULL) { 854 g_free(exp->name); 855 exp->name = NULL; 856 QTAILQ_REMOVE(&exports, exp, next); 857 nbd_export_put(exp); 858 } 859 if (name != NULL) { 860 nbd_export_get(exp); 861 exp->name = g_strdup(name); 862 QTAILQ_INSERT_TAIL(&exports, exp, next); 863 } 864 nbd_export_put(exp); 865 } 866 867 void nbd_export_close(NBDExport *exp) 868 { 869 NBDClient *client, *next; 870 871 nbd_export_get(exp); 872 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 873 client_close(client); 874 } 875 nbd_export_set_name(exp, NULL); 876 nbd_export_put(exp); 877 } 878 879 void nbd_export_get(NBDExport *exp) 880 { 881 assert(exp->refcount > 0); 882 exp->refcount++; 883 } 884 885 void nbd_export_put(NBDExport *exp) 886 { 887 assert(exp->refcount > 0); 888 if (exp->refcount == 1) { 889 nbd_export_close(exp); 890 } 891 892 if (--exp->refcount == 0) { 893 assert(exp->name == NULL); 894 895 if (exp->close) { 896 exp->close(exp); 897 } 898 899 if (exp->blk) { 900 notifier_remove(&exp->eject_notifier); 901 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 902 blk_aio_detach, exp); 903 blk_unref(exp->blk); 904 exp->blk = NULL; 905 } 906 907 g_free(exp); 908 } 909 } 910 911 BlockBackend *nbd_export_get_blockdev(NBDExport *exp) 912 { 913 return exp->blk; 914 } 915 916 void nbd_export_close_all(void) 917 { 918 NBDExport *exp, *next; 919 920 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { 921 nbd_export_close(exp); 922 } 923 } 924 925 static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, 926 int len) 927 { 928 NBDClient *client = req->client; 929 ssize_t rc, ret; 930 931 g_assert(qemu_in_coroutine()); 932 qemu_co_mutex_lock(&client->send_lock); 933 client->send_coroutine = qemu_coroutine_self(); 934 nbd_set_handlers(client); 935 936 if (!len) { 937 rc = nbd_send_reply(client->ioc, reply); 938 } else { 939 qio_channel_set_cork(client->ioc, true); 940 rc = nbd_send_reply(client->ioc, reply); 941 if (rc >= 0) { 942 ret = write_sync(client->ioc, req->data, len); 943 if (ret != len) { 944 rc = -EIO; 945 } 946 } 947 qio_channel_set_cork(client->ioc, false); 948 } 949 950 client->send_coroutine = NULL; 951 nbd_set_handlers(client); 952 qemu_co_mutex_unlock(&client->send_lock); 953 return rc; 954 } 955 956 static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request) 957 { 958 NBDClient *client = req->client; 959 uint32_t command; 960 ssize_t rc; 961 962 g_assert(qemu_in_coroutine()); 963 client->recv_coroutine = qemu_coroutine_self(); 964 nbd_update_can_read(client); 965 966 rc = nbd_receive_request(client->ioc, request); 967 if (rc < 0) { 968 if (rc != -EAGAIN) { 969 rc = -EIO; 970 } 971 goto out; 972 } 973 974 if ((request->from + request->len) < request->from) { 975 LOG("integer overflow detected! " 976 "you're probably being attacked"); 977 rc = -EINVAL; 978 goto out; 979 } 980 981 TRACE("Decoding type"); 982 983 command = request->type & NBD_CMD_MASK_COMMAND; 984 if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { 985 if (request->len > NBD_MAX_BUFFER_SIZE) { 986 LOG("len (%u) is larger than max len (%u)", 987 request->len, NBD_MAX_BUFFER_SIZE); 988 rc = -EINVAL; 989 goto out; 990 } 991 992 req->data = blk_try_blockalign(client->exp->blk, request->len); 993 if (req->data == NULL) { 994 rc = -ENOMEM; 995 goto out; 996 } 997 } 998 if (command == NBD_CMD_WRITE) { 999 TRACE("Reading %u byte(s)", request->len); 1000 1001 if (read_sync(client->ioc, req->data, request->len) != request->len) { 1002 LOG("reading from socket failed"); 1003 rc = -EIO; 1004 goto out; 1005 } 1006 } 1007 rc = 0; 1008 1009 out: 1010 client->recv_coroutine = NULL; 1011 nbd_update_can_read(client); 1012 1013 return rc; 1014 } 1015 1016 static void nbd_trip(void *opaque) 1017 { 1018 NBDClient *client = opaque; 1019 NBDExport *exp = client->exp; 1020 NBDRequest *req; 1021 struct nbd_request request; 1022 struct nbd_reply reply; 1023 ssize_t ret; 1024 uint32_t command; 1025 1026 TRACE("Reading request."); 1027 if (client->closing) { 1028 return; 1029 } 1030 1031 req = nbd_request_get(client); 1032 ret = nbd_co_receive_request(req, &request); 1033 if (ret == -EAGAIN) { 1034 goto done; 1035 } 1036 if (ret == -EIO) { 1037 goto out; 1038 } 1039 1040 reply.handle = request.handle; 1041 reply.error = 0; 1042 1043 if (ret < 0) { 1044 reply.error = -ret; 1045 goto error_reply; 1046 } 1047 command = request.type & NBD_CMD_MASK_COMMAND; 1048 if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) { 1049 LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64 1050 ", Offset: %" PRIu64 "\n", 1051 request.from, request.len, 1052 (uint64_t)exp->size, (uint64_t)exp->dev_offset); 1053 LOG("requested operation past EOF--bad client?"); 1054 goto invalid_request; 1055 } 1056 1057 if (client->closing) { 1058 /* 1059 * The client may be closed when we are blocked in 1060 * nbd_co_receive_request() 1061 */ 1062 goto done; 1063 } 1064 1065 switch (command) { 1066 case NBD_CMD_READ: 1067 TRACE("Request type is READ"); 1068 1069 if (request.type & NBD_CMD_FLAG_FUA) { 1070 ret = blk_co_flush(exp->blk); 1071 if (ret < 0) { 1072 LOG("flush failed"); 1073 reply.error = -ret; 1074 goto error_reply; 1075 } 1076 } 1077 1078 ret = blk_read(exp->blk, 1079 (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, 1080 req->data, request.len / BDRV_SECTOR_SIZE); 1081 if (ret < 0) { 1082 LOG("reading from file failed"); 1083 reply.error = -ret; 1084 goto error_reply; 1085 } 1086 1087 TRACE("Read %u byte(s)", request.len); 1088 if (nbd_co_send_reply(req, &reply, request.len) < 0) 1089 goto out; 1090 break; 1091 case NBD_CMD_WRITE: 1092 TRACE("Request type is WRITE"); 1093 1094 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1095 TRACE("Server is read-only, return error"); 1096 reply.error = EROFS; 1097 goto error_reply; 1098 } 1099 1100 TRACE("Writing to device"); 1101 1102 ret = blk_write(exp->blk, 1103 (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, 1104 req->data, request.len / BDRV_SECTOR_SIZE); 1105 if (ret < 0) { 1106 LOG("writing to file failed"); 1107 reply.error = -ret; 1108 goto error_reply; 1109 } 1110 1111 if (request.type & NBD_CMD_FLAG_FUA) { 1112 ret = blk_co_flush(exp->blk); 1113 if (ret < 0) { 1114 LOG("flush failed"); 1115 reply.error = -ret; 1116 goto error_reply; 1117 } 1118 } 1119 1120 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1121 goto out; 1122 } 1123 break; 1124 case NBD_CMD_DISC: 1125 TRACE("Request type is DISCONNECT"); 1126 errno = 0; 1127 goto out; 1128 case NBD_CMD_FLUSH: 1129 TRACE("Request type is FLUSH"); 1130 1131 ret = blk_co_flush(exp->blk); 1132 if (ret < 0) { 1133 LOG("flush failed"); 1134 reply.error = -ret; 1135 } 1136 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1137 goto out; 1138 } 1139 break; 1140 case NBD_CMD_TRIM: 1141 TRACE("Request type is TRIM"); 1142 ret = blk_co_discard(exp->blk, (request.from + exp->dev_offset) 1143 / BDRV_SECTOR_SIZE, 1144 request.len / BDRV_SECTOR_SIZE); 1145 if (ret < 0) { 1146 LOG("discard failed"); 1147 reply.error = -ret; 1148 } 1149 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1150 goto out; 1151 } 1152 break; 1153 default: 1154 LOG("invalid request type (%u) received", request.type); 1155 invalid_request: 1156 reply.error = EINVAL; 1157 error_reply: 1158 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1159 goto out; 1160 } 1161 break; 1162 } 1163 1164 TRACE("Request/Reply complete"); 1165 1166 done: 1167 nbd_request_put(req); 1168 return; 1169 1170 out: 1171 nbd_request_put(req); 1172 client_close(client); 1173 } 1174 1175 static void nbd_read(void *opaque) 1176 { 1177 NBDClient *client = opaque; 1178 1179 if (client->recv_coroutine) { 1180 qemu_coroutine_enter(client->recv_coroutine, NULL); 1181 } else { 1182 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client); 1183 } 1184 } 1185 1186 static void nbd_restart_write(void *opaque) 1187 { 1188 NBDClient *client = opaque; 1189 1190 qemu_coroutine_enter(client->send_coroutine, NULL); 1191 } 1192 1193 static void nbd_set_handlers(NBDClient *client) 1194 { 1195 if (client->exp && client->exp->ctx) { 1196 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1197 true, 1198 client->can_read ? nbd_read : NULL, 1199 client->send_coroutine ? nbd_restart_write : NULL, 1200 client); 1201 } 1202 } 1203 1204 static void nbd_unset_handlers(NBDClient *client) 1205 { 1206 if (client->exp && client->exp->ctx) { 1207 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1208 true, NULL, NULL, NULL); 1209 } 1210 } 1211 1212 static void nbd_update_can_read(NBDClient *client) 1213 { 1214 bool can_read = client->recv_coroutine || 1215 client->nb_requests < MAX_NBD_REQUESTS; 1216 1217 if (can_read != client->can_read) { 1218 client->can_read = can_read; 1219 nbd_set_handlers(client); 1220 1221 /* There is no need to invoke aio_notify(), since aio_set_fd_handler() 1222 * in nbd_set_handlers() will have taken care of that */ 1223 } 1224 } 1225 1226 static coroutine_fn void nbd_co_client_start(void *opaque) 1227 { 1228 NBDClientNewData *data = opaque; 1229 NBDClient *client = data->client; 1230 NBDExport *exp = client->exp; 1231 1232 if (exp) { 1233 nbd_export_get(exp); 1234 } 1235 if (nbd_negotiate(data)) { 1236 client_close(client); 1237 goto out; 1238 } 1239 qemu_co_mutex_init(&client->send_lock); 1240 nbd_set_handlers(client); 1241 1242 if (exp) { 1243 QTAILQ_INSERT_TAIL(&exp->clients, client, next); 1244 } 1245 out: 1246 g_free(data); 1247 } 1248 1249 void nbd_client_new(NBDExport *exp, 1250 QIOChannelSocket *sioc, 1251 QCryptoTLSCreds *tlscreds, 1252 const char *tlsaclname, 1253 void (*close_fn)(NBDClient *)) 1254 { 1255 NBDClient *client; 1256 NBDClientNewData *data = g_new(NBDClientNewData, 1); 1257 1258 client = g_malloc0(sizeof(NBDClient)); 1259 client->refcount = 1; 1260 client->exp = exp; 1261 client->tlscreds = tlscreds; 1262 if (tlscreds) { 1263 object_ref(OBJECT(client->tlscreds)); 1264 } 1265 client->tlsaclname = g_strdup(tlsaclname); 1266 client->sioc = sioc; 1267 object_ref(OBJECT(client->sioc)); 1268 client->ioc = QIO_CHANNEL(sioc); 1269 object_ref(OBJECT(client->ioc)); 1270 client->can_read = true; 1271 client->close = close_fn; 1272 1273 data->client = client; 1274 data->co = qemu_coroutine_create(nbd_co_client_start); 1275 qemu_coroutine_enter(data->co, data); 1276 } 1277