1 /* 2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 3 * 4 * Network Block Device Server Side 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; under version 2 of the License. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "nbd-internal.h" 22 23 static int system_errno_to_nbd_errno(int err) 24 { 25 switch (err) { 26 case 0: 27 return NBD_SUCCESS; 28 case EPERM: 29 case EROFS: 30 return NBD_EPERM; 31 case EIO: 32 return NBD_EIO; 33 case ENOMEM: 34 return NBD_ENOMEM; 35 #ifdef EDQUOT 36 case EDQUOT: 37 #endif 38 case EFBIG: 39 case ENOSPC: 40 return NBD_ENOSPC; 41 case EINVAL: 42 default: 43 return NBD_EINVAL; 44 } 45 } 46 47 /* Definitions for opaque data types */ 48 49 typedef struct NBDRequest NBDRequest; 50 51 struct NBDRequest { 52 QSIMPLEQ_ENTRY(NBDRequest) entry; 53 NBDClient *client; 54 uint8_t *data; 55 }; 56 57 struct NBDExport { 58 int refcount; 59 void (*close)(NBDExport *exp); 60 61 BlockBackend *blk; 62 char *name; 63 off_t dev_offset; 64 off_t size; 65 uint32_t nbdflags; 66 QTAILQ_HEAD(, NBDClient) clients; 67 QTAILQ_ENTRY(NBDExport) next; 68 69 AioContext *ctx; 70 71 Notifier eject_notifier; 72 }; 73 74 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 75 76 struct NBDClient { 77 int refcount; 78 void (*close)(NBDClient *client); 79 80 NBDExport *exp; 81 QCryptoTLSCreds *tlscreds; 82 char *tlsaclname; 83 QIOChannelSocket *sioc; /* The underlying data channel */ 84 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 85 86 Coroutine *recv_coroutine; 87 88 CoMutex send_lock; 89 Coroutine *send_coroutine; 90 91 bool can_read; 92 93 QTAILQ_ENTRY(NBDClient) next; 94 int nb_requests; 95 bool closing; 96 }; 97 98 /* That's all folks */ 99 100 static void nbd_set_handlers(NBDClient *client); 101 static void nbd_unset_handlers(NBDClient *client); 102 static void nbd_update_can_read(NBDClient *client); 103 104 static gboolean nbd_negotiate_continue(QIOChannel *ioc, 105 GIOCondition condition, 106 void *opaque) 107 { 108 qemu_coroutine_enter(opaque, NULL); 109 return TRUE; 110 } 111 112 static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) 113 { 114 ssize_t ret; 115 guint watch; 116 117 assert(qemu_in_coroutine()); 118 /* Negotiation are always in main loop. */ 119 watch = qio_channel_add_watch(ioc, 120 G_IO_IN, 121 nbd_negotiate_continue, 122 qemu_coroutine_self(), 123 NULL); 124 ret = read_sync(ioc, buffer, size); 125 g_source_remove(watch); 126 return ret; 127 128 } 129 130 static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size) 131 { 132 ssize_t ret; 133 guint watch; 134 135 assert(qemu_in_coroutine()); 136 /* Negotiation are always in main loop. */ 137 watch = qio_channel_add_watch(ioc, 138 G_IO_OUT, 139 nbd_negotiate_continue, 140 qemu_coroutine_self(), 141 NULL); 142 ret = write_sync(ioc, buffer, size); 143 g_source_remove(watch); 144 return ret; 145 } 146 147 static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) 148 { 149 ssize_t ret, dropped = size; 150 uint8_t *buffer = g_malloc(MIN(65536, size)); 151 152 while (size > 0) { 153 ret = nbd_negotiate_read(ioc, buffer, MIN(65536, size)); 154 if (ret < 0) { 155 g_free(buffer); 156 return ret; 157 } 158 159 assert(ret <= size); 160 size -= ret; 161 } 162 163 g_free(buffer); 164 return dropped; 165 } 166 167 /* Basic flow for negotiation 168 169 Server Client 170 Negotiate 171 172 or 173 174 Server Client 175 Negotiate #1 176 Option 177 Negotiate #2 178 179 ---- 180 181 followed by 182 183 Server Client 184 Request 185 Response 186 Request 187 Response 188 ... 189 ... 190 Request (type == 2) 191 192 */ 193 194 static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) 195 { 196 uint64_t magic; 197 uint32_t len; 198 199 TRACE("Reply opt=%x type=%x", type, opt); 200 201 magic = cpu_to_be64(NBD_REP_MAGIC); 202 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 203 LOG("write failed (rep magic)"); 204 return -EINVAL; 205 } 206 opt = cpu_to_be32(opt); 207 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 208 LOG("write failed (rep opt)"); 209 return -EINVAL; 210 } 211 type = cpu_to_be32(type); 212 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 213 LOG("write failed (rep type)"); 214 return -EINVAL; 215 } 216 len = cpu_to_be32(0); 217 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 218 LOG("write failed (rep data length)"); 219 return -EINVAL; 220 } 221 return 0; 222 } 223 224 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) 225 { 226 uint64_t magic, name_len; 227 uint32_t opt, type, len; 228 229 TRACE("Advertizing export name '%s'", exp->name ? exp->name : ""); 230 name_len = strlen(exp->name); 231 magic = cpu_to_be64(NBD_REP_MAGIC); 232 if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { 233 LOG("write failed (magic)"); 234 return -EINVAL; 235 } 236 opt = cpu_to_be32(NBD_OPT_LIST); 237 if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { 238 LOG("write failed (opt)"); 239 return -EINVAL; 240 } 241 type = cpu_to_be32(NBD_REP_SERVER); 242 if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { 243 LOG("write failed (reply type)"); 244 return -EINVAL; 245 } 246 len = cpu_to_be32(name_len + sizeof(len)); 247 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 248 LOG("write failed (length)"); 249 return -EINVAL; 250 } 251 len = cpu_to_be32(name_len); 252 if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { 253 LOG("write failed (length)"); 254 return -EINVAL; 255 } 256 if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) { 257 LOG("write failed (buffer)"); 258 return -EINVAL; 259 } 260 return 0; 261 } 262 263 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) 264 { 265 NBDExport *exp; 266 267 if (length) { 268 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 269 return -EIO; 270 } 271 return nbd_negotiate_send_rep(client->ioc, 272 NBD_REP_ERR_INVALID, NBD_OPT_LIST); 273 } 274 275 /* For each export, send a NBD_REP_SERVER reply. */ 276 QTAILQ_FOREACH(exp, &exports, next) { 277 if (nbd_negotiate_send_rep_list(client->ioc, exp)) { 278 return -EINVAL; 279 } 280 } 281 /* Finish with a NBD_REP_ACK. */ 282 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST); 283 } 284 285 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) 286 { 287 int rc = -EINVAL; 288 char name[256]; 289 290 /* Client sends: 291 [20 .. xx] export name (length bytes) 292 */ 293 TRACE("Checking length"); 294 if (length > 255) { 295 LOG("Bad length received"); 296 goto fail; 297 } 298 if (nbd_negotiate_read(client->ioc, name, length) != length) { 299 LOG("read failed"); 300 goto fail; 301 } 302 name[length] = '\0'; 303 304 TRACE("Client requested export '%s'", name); 305 306 client->exp = nbd_export_find(name); 307 if (!client->exp) { 308 LOG("export not found"); 309 goto fail; 310 } 311 312 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 313 nbd_export_get(client->exp); 314 rc = 0; 315 fail: 316 return rc; 317 } 318 319 320 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 321 uint32_t length) 322 { 323 QIOChannel *ioc; 324 QIOChannelTLS *tioc; 325 struct NBDTLSHandshakeData data = { 0 }; 326 327 TRACE("Setting up TLS"); 328 ioc = client->ioc; 329 if (length) { 330 if (nbd_negotiate_drop_sync(ioc, length) != length) { 331 return NULL; 332 } 333 nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS); 334 return NULL; 335 } 336 337 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_STARTTLS); 338 339 tioc = qio_channel_tls_new_server(ioc, 340 client->tlscreds, 341 client->tlsaclname, 342 NULL); 343 if (!tioc) { 344 return NULL; 345 } 346 347 TRACE("Starting TLS handshake"); 348 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 349 qio_channel_tls_handshake(tioc, 350 nbd_tls_handshake, 351 &data, 352 NULL); 353 354 if (!data.complete) { 355 g_main_loop_run(data.loop); 356 } 357 g_main_loop_unref(data.loop); 358 if (data.error) { 359 object_unref(OBJECT(tioc)); 360 error_free(data.error); 361 return NULL; 362 } 363 364 return QIO_CHANNEL(tioc); 365 } 366 367 368 static int nbd_negotiate_options(NBDClient *client) 369 { 370 uint32_t flags; 371 bool fixedNewstyle = false; 372 373 /* Client sends: 374 [ 0 .. 3] client flags 375 376 [ 0 .. 7] NBD_OPTS_MAGIC 377 [ 8 .. 11] NBD option 378 [12 .. 15] Data length 379 ... Rest of request 380 381 [ 0 .. 7] NBD_OPTS_MAGIC 382 [ 8 .. 11] Second NBD option 383 [12 .. 15] Data length 384 ... Rest of request 385 */ 386 387 if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) != 388 sizeof(flags)) { 389 LOG("read failed"); 390 return -EIO; 391 } 392 TRACE("Checking client flags"); 393 be32_to_cpus(&flags); 394 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 395 TRACE("Support supports fixed newstyle handshake"); 396 fixedNewstyle = true; 397 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 398 } 399 if (flags != 0) { 400 TRACE("Unknown client flags 0x%x received", flags); 401 return -EIO; 402 } 403 404 while (1) { 405 int ret; 406 uint32_t clientflags, length; 407 uint64_t magic; 408 409 if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) != 410 sizeof(magic)) { 411 LOG("read failed"); 412 return -EINVAL; 413 } 414 TRACE("Checking opts magic"); 415 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { 416 LOG("Bad magic received"); 417 return -EINVAL; 418 } 419 420 if (nbd_negotiate_read(client->ioc, &clientflags, 421 sizeof(clientflags)) != sizeof(clientflags)) { 422 LOG("read failed"); 423 return -EINVAL; 424 } 425 clientflags = be32_to_cpu(clientflags); 426 427 if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) != 428 sizeof(length)) { 429 LOG("read failed"); 430 return -EINVAL; 431 } 432 length = be32_to_cpu(length); 433 434 TRACE("Checking option 0x%x", clientflags); 435 if (client->tlscreds && 436 client->ioc == (QIOChannel *)client->sioc) { 437 QIOChannel *tioc; 438 if (!fixedNewstyle) { 439 TRACE("Unsupported option 0x%x", clientflags); 440 return -EINVAL; 441 } 442 switch (clientflags) { 443 case NBD_OPT_STARTTLS: 444 tioc = nbd_negotiate_handle_starttls(client, length); 445 if (!tioc) { 446 return -EIO; 447 } 448 object_unref(OBJECT(client->ioc)); 449 client->ioc = QIO_CHANNEL(tioc); 450 break; 451 452 case NBD_OPT_EXPORT_NAME: 453 /* No way to return an error to client, so drop connection */ 454 TRACE("Option 0x%x not permitted before TLS", clientflags); 455 return -EINVAL; 456 457 default: 458 TRACE("Option 0x%x not permitted before TLS", clientflags); 459 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 460 return -EIO; 461 } 462 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, 463 clientflags); 464 break; 465 } 466 } else if (fixedNewstyle) { 467 switch (clientflags) { 468 case NBD_OPT_LIST: 469 ret = nbd_negotiate_handle_list(client, length); 470 if (ret < 0) { 471 return ret; 472 } 473 break; 474 475 case NBD_OPT_ABORT: 476 return -EINVAL; 477 478 case NBD_OPT_EXPORT_NAME: 479 return nbd_negotiate_handle_export_name(client, length); 480 481 case NBD_OPT_STARTTLS: 482 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 483 return -EIO; 484 } 485 if (client->tlscreds) { 486 TRACE("TLS already enabled"); 487 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_INVALID, 488 clientflags); 489 } else { 490 TRACE("TLS not configured"); 491 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_POLICY, 492 clientflags); 493 } 494 break; 495 default: 496 TRACE("Unsupported option 0x%x", clientflags); 497 if (nbd_negotiate_drop_sync(client->ioc, length) != length) { 498 return -EIO; 499 } 500 nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, 501 clientflags); 502 break; 503 } 504 } else { 505 /* 506 * If broken new-style we should drop the connection 507 * for anything except NBD_OPT_EXPORT_NAME 508 */ 509 switch (clientflags) { 510 case NBD_OPT_EXPORT_NAME: 511 return nbd_negotiate_handle_export_name(client, length); 512 513 default: 514 TRACE("Unsupported option 0x%x", clientflags); 515 return -EINVAL; 516 } 517 } 518 } 519 } 520 521 typedef struct { 522 NBDClient *client; 523 Coroutine *co; 524 } NBDClientNewData; 525 526 static coroutine_fn int nbd_negotiate(NBDClientNewData *data) 527 { 528 NBDClient *client = data->client; 529 char buf[8 + 8 + 8 + 128]; 530 int rc; 531 const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | 532 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); 533 bool oldStyle; 534 535 /* Old style negotiation header without options 536 [ 0 .. 7] passwd ("NBDMAGIC") 537 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 538 [16 .. 23] size 539 [24 .. 25] server flags (0) 540 [26 .. 27] export flags 541 [28 .. 151] reserved (0) 542 543 New style negotiation header with options 544 [ 0 .. 7] passwd ("NBDMAGIC") 545 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 546 [16 .. 17] server flags (0) 547 ....options sent.... 548 [18 .. 25] size 549 [26 .. 27] export flags 550 [28 .. 151] reserved (0) 551 */ 552 553 qio_channel_set_blocking(client->ioc, false, NULL); 554 rc = -EINVAL; 555 556 TRACE("Beginning negotiation."); 557 memset(buf, 0, sizeof(buf)); 558 memcpy(buf, "NBDMAGIC", 8); 559 560 oldStyle = client->exp != NULL && !client->tlscreds; 561 if (oldStyle) { 562 assert ((client->exp->nbdflags & ~65535) == 0); 563 stq_be_p(buf + 8, NBD_CLIENT_MAGIC); 564 stq_be_p(buf + 16, client->exp->size); 565 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 566 } else { 567 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 568 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE); 569 } 570 571 if (oldStyle) { 572 if (client->tlscreds) { 573 TRACE("TLS cannot be enabled with oldstyle protocol"); 574 goto fail; 575 } 576 if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) != sizeof(buf)) { 577 LOG("write failed"); 578 goto fail; 579 } 580 } else { 581 if (nbd_negotiate_write(client->ioc, buf, 18) != 18) { 582 LOG("write failed"); 583 goto fail; 584 } 585 rc = nbd_negotiate_options(client); 586 if (rc != 0) { 587 LOG("option negotiation failed"); 588 goto fail; 589 } 590 591 assert ((client->exp->nbdflags & ~65535) == 0); 592 stq_be_p(buf + 18, client->exp->size); 593 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 594 if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) != 595 sizeof(buf) - 18) { 596 LOG("write failed"); 597 goto fail; 598 } 599 } 600 601 TRACE("Negotiation succeeded."); 602 rc = 0; 603 fail: 604 return rc; 605 } 606 607 #ifdef __linux__ 608 609 int nbd_disconnect(int fd) 610 { 611 ioctl(fd, NBD_CLEAR_QUE); 612 ioctl(fd, NBD_DISCONNECT); 613 ioctl(fd, NBD_CLEAR_SOCK); 614 return 0; 615 } 616 617 #else 618 619 int nbd_disconnect(int fd) 620 { 621 return -ENOTSUP; 622 } 623 #endif 624 625 static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) 626 { 627 uint8_t buf[NBD_REQUEST_SIZE]; 628 uint32_t magic; 629 ssize_t ret; 630 631 ret = read_sync(ioc, buf, sizeof(buf)); 632 if (ret < 0) { 633 return ret; 634 } 635 636 if (ret != sizeof(buf)) { 637 LOG("read failed"); 638 return -EINVAL; 639 } 640 641 /* Request 642 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 643 [ 4 .. 7] type (0 == READ, 1 == WRITE) 644 [ 8 .. 15] handle 645 [16 .. 23] from 646 [24 .. 27] len 647 */ 648 649 magic = be32_to_cpup((uint32_t*)buf); 650 request->type = be32_to_cpup((uint32_t*)(buf + 4)); 651 request->handle = be64_to_cpup((uint64_t*)(buf + 8)); 652 request->from = be64_to_cpup((uint64_t*)(buf + 16)); 653 request->len = be32_to_cpup((uint32_t*)(buf + 24)); 654 655 TRACE("Got request: " 656 "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }", 657 magic, request->type, request->from, request->len); 658 659 if (magic != NBD_REQUEST_MAGIC) { 660 LOG("invalid magic (got 0x%x)", magic); 661 return -EINVAL; 662 } 663 return 0; 664 } 665 666 static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply) 667 { 668 uint8_t buf[NBD_REPLY_SIZE]; 669 ssize_t ret; 670 671 reply->error = system_errno_to_nbd_errno(reply->error); 672 673 TRACE("Sending response to client: { .error = %d, handle = %" PRIu64 " }", 674 reply->error, reply->handle); 675 676 /* Reply 677 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 678 [ 4 .. 7] error (0 == no error) 679 [ 7 .. 15] handle 680 */ 681 stl_be_p(buf, NBD_REPLY_MAGIC); 682 stl_be_p(buf + 4, reply->error); 683 stq_be_p(buf + 8, reply->handle); 684 685 ret = write_sync(ioc, buf, sizeof(buf)); 686 if (ret < 0) { 687 return ret; 688 } 689 690 if (ret != sizeof(buf)) { 691 LOG("writing to socket failed"); 692 return -EINVAL; 693 } 694 return 0; 695 } 696 697 #define MAX_NBD_REQUESTS 16 698 699 void nbd_client_get(NBDClient *client) 700 { 701 client->refcount++; 702 } 703 704 void nbd_client_put(NBDClient *client) 705 { 706 if (--client->refcount == 0) { 707 /* The last reference should be dropped by client->close, 708 * which is called by client_close. 709 */ 710 assert(client->closing); 711 712 nbd_unset_handlers(client); 713 object_unref(OBJECT(client->sioc)); 714 object_unref(OBJECT(client->ioc)); 715 if (client->tlscreds) { 716 object_unref(OBJECT(client->tlscreds)); 717 } 718 g_free(client->tlsaclname); 719 if (client->exp) { 720 QTAILQ_REMOVE(&client->exp->clients, client, next); 721 nbd_export_put(client->exp); 722 } 723 g_free(client); 724 } 725 } 726 727 static void client_close(NBDClient *client) 728 { 729 if (client->closing) { 730 return; 731 } 732 733 client->closing = true; 734 735 /* Force requests to finish. They will drop their own references, 736 * then we'll close the socket and free the NBDClient. 737 */ 738 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 739 NULL); 740 741 /* Also tell the client, so that they release their reference. */ 742 if (client->close) { 743 client->close(client); 744 } 745 } 746 747 static NBDRequest *nbd_request_get(NBDClient *client) 748 { 749 NBDRequest *req; 750 751 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 752 client->nb_requests++; 753 nbd_update_can_read(client); 754 755 req = g_new0(NBDRequest, 1); 756 nbd_client_get(client); 757 req->client = client; 758 return req; 759 } 760 761 static void nbd_request_put(NBDRequest *req) 762 { 763 NBDClient *client = req->client; 764 765 if (req->data) { 766 qemu_vfree(req->data); 767 } 768 g_free(req); 769 770 client->nb_requests--; 771 nbd_update_can_read(client); 772 nbd_client_put(client); 773 } 774 775 static void blk_aio_attached(AioContext *ctx, void *opaque) 776 { 777 NBDExport *exp = opaque; 778 NBDClient *client; 779 780 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx); 781 782 exp->ctx = ctx; 783 784 QTAILQ_FOREACH(client, &exp->clients, next) { 785 nbd_set_handlers(client); 786 } 787 } 788 789 static void blk_aio_detach(void *opaque) 790 { 791 NBDExport *exp = opaque; 792 NBDClient *client; 793 794 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx); 795 796 QTAILQ_FOREACH(client, &exp->clients, next) { 797 nbd_unset_handlers(client); 798 } 799 800 exp->ctx = NULL; 801 } 802 803 static void nbd_eject_notifier(Notifier *n, void *data) 804 { 805 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 806 nbd_export_close(exp); 807 } 808 809 NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, 810 uint32_t nbdflags, void (*close)(NBDExport *), 811 Error **errp) 812 { 813 NBDExport *exp = g_malloc0(sizeof(NBDExport)); 814 exp->refcount = 1; 815 QTAILQ_INIT(&exp->clients); 816 exp->blk = blk; 817 exp->dev_offset = dev_offset; 818 exp->nbdflags = nbdflags; 819 exp->size = size < 0 ? blk_getlength(blk) : size; 820 if (exp->size < 0) { 821 error_setg_errno(errp, -exp->size, 822 "Failed to determine the NBD export's length"); 823 goto fail; 824 } 825 exp->size -= exp->size % BDRV_SECTOR_SIZE; 826 827 exp->close = close; 828 exp->ctx = blk_get_aio_context(blk); 829 blk_ref(blk); 830 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 831 832 exp->eject_notifier.notify = nbd_eject_notifier; 833 blk_add_remove_bs_notifier(blk, &exp->eject_notifier); 834 835 /* 836 * NBD exports are used for non-shared storage migration. Make sure 837 * that BDRV_O_INACTIVE is cleared and the image is ready for write 838 * access since the export could be available before migration handover. 839 */ 840 aio_context_acquire(exp->ctx); 841 blk_invalidate_cache(blk, NULL); 842 aio_context_release(exp->ctx); 843 return exp; 844 845 fail: 846 g_free(exp); 847 return NULL; 848 } 849 850 NBDExport *nbd_export_find(const char *name) 851 { 852 NBDExport *exp; 853 QTAILQ_FOREACH(exp, &exports, next) { 854 if (strcmp(name, exp->name) == 0) { 855 return exp; 856 } 857 } 858 859 return NULL; 860 } 861 862 void nbd_export_set_name(NBDExport *exp, const char *name) 863 { 864 if (exp->name == name) { 865 return; 866 } 867 868 nbd_export_get(exp); 869 if (exp->name != NULL) { 870 g_free(exp->name); 871 exp->name = NULL; 872 QTAILQ_REMOVE(&exports, exp, next); 873 nbd_export_put(exp); 874 } 875 if (name != NULL) { 876 nbd_export_get(exp); 877 exp->name = g_strdup(name); 878 QTAILQ_INSERT_TAIL(&exports, exp, next); 879 } 880 nbd_export_put(exp); 881 } 882 883 void nbd_export_close(NBDExport *exp) 884 { 885 NBDClient *client, *next; 886 887 nbd_export_get(exp); 888 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 889 client_close(client); 890 } 891 nbd_export_set_name(exp, NULL); 892 nbd_export_put(exp); 893 } 894 895 void nbd_export_get(NBDExport *exp) 896 { 897 assert(exp->refcount > 0); 898 exp->refcount++; 899 } 900 901 void nbd_export_put(NBDExport *exp) 902 { 903 assert(exp->refcount > 0); 904 if (exp->refcount == 1) { 905 nbd_export_close(exp); 906 } 907 908 if (--exp->refcount == 0) { 909 assert(exp->name == NULL); 910 911 if (exp->close) { 912 exp->close(exp); 913 } 914 915 if (exp->blk) { 916 notifier_remove(&exp->eject_notifier); 917 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 918 blk_aio_detach, exp); 919 blk_unref(exp->blk); 920 exp->blk = NULL; 921 } 922 923 g_free(exp); 924 } 925 } 926 927 BlockBackend *nbd_export_get_blockdev(NBDExport *exp) 928 { 929 return exp->blk; 930 } 931 932 void nbd_export_close_all(void) 933 { 934 NBDExport *exp, *next; 935 936 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { 937 nbd_export_close(exp); 938 } 939 } 940 941 static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, 942 int len) 943 { 944 NBDClient *client = req->client; 945 ssize_t rc, ret; 946 947 g_assert(qemu_in_coroutine()); 948 qemu_co_mutex_lock(&client->send_lock); 949 client->send_coroutine = qemu_coroutine_self(); 950 nbd_set_handlers(client); 951 952 if (!len) { 953 rc = nbd_send_reply(client->ioc, reply); 954 } else { 955 qio_channel_set_cork(client->ioc, true); 956 rc = nbd_send_reply(client->ioc, reply); 957 if (rc >= 0) { 958 ret = write_sync(client->ioc, req->data, len); 959 if (ret != len) { 960 rc = -EIO; 961 } 962 } 963 qio_channel_set_cork(client->ioc, false); 964 } 965 966 client->send_coroutine = NULL; 967 nbd_set_handlers(client); 968 qemu_co_mutex_unlock(&client->send_lock); 969 return rc; 970 } 971 972 static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request) 973 { 974 NBDClient *client = req->client; 975 uint32_t command; 976 ssize_t rc; 977 978 g_assert(qemu_in_coroutine()); 979 client->recv_coroutine = qemu_coroutine_self(); 980 nbd_update_can_read(client); 981 982 rc = nbd_receive_request(client->ioc, request); 983 if (rc < 0) { 984 if (rc != -EAGAIN) { 985 rc = -EIO; 986 } 987 goto out; 988 } 989 990 if ((request->from + request->len) < request->from) { 991 LOG("integer overflow detected! " 992 "you're probably being attacked"); 993 rc = -EINVAL; 994 goto out; 995 } 996 997 TRACE("Decoding type"); 998 999 command = request->type & NBD_CMD_MASK_COMMAND; 1000 if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { 1001 if (request->len > NBD_MAX_BUFFER_SIZE) { 1002 LOG("len (%u) is larger than max len (%u)", 1003 request->len, NBD_MAX_BUFFER_SIZE); 1004 rc = -EINVAL; 1005 goto out; 1006 } 1007 1008 req->data = blk_try_blockalign(client->exp->blk, request->len); 1009 if (req->data == NULL) { 1010 rc = -ENOMEM; 1011 goto out; 1012 } 1013 } 1014 if (command == NBD_CMD_WRITE) { 1015 TRACE("Reading %u byte(s)", request->len); 1016 1017 if (read_sync(client->ioc, req->data, request->len) != request->len) { 1018 LOG("reading from socket failed"); 1019 rc = -EIO; 1020 goto out; 1021 } 1022 } 1023 rc = 0; 1024 1025 out: 1026 client->recv_coroutine = NULL; 1027 nbd_update_can_read(client); 1028 1029 return rc; 1030 } 1031 1032 static void nbd_trip(void *opaque) 1033 { 1034 NBDClient *client = opaque; 1035 NBDExport *exp = client->exp; 1036 NBDRequest *req; 1037 struct nbd_request request; 1038 struct nbd_reply reply; 1039 ssize_t ret; 1040 uint32_t command; 1041 1042 TRACE("Reading request."); 1043 if (client->closing) { 1044 return; 1045 } 1046 1047 req = nbd_request_get(client); 1048 ret = nbd_co_receive_request(req, &request); 1049 if (ret == -EAGAIN) { 1050 goto done; 1051 } 1052 if (ret == -EIO) { 1053 goto out; 1054 } 1055 1056 reply.handle = request.handle; 1057 reply.error = 0; 1058 1059 if (ret < 0) { 1060 reply.error = -ret; 1061 goto error_reply; 1062 } 1063 command = request.type & NBD_CMD_MASK_COMMAND; 1064 if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) { 1065 LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64 1066 ", Offset: %" PRIu64 "\n", 1067 request.from, request.len, 1068 (uint64_t)exp->size, (uint64_t)exp->dev_offset); 1069 LOG("requested operation past EOF--bad client?"); 1070 goto invalid_request; 1071 } 1072 1073 if (client->closing) { 1074 /* 1075 * The client may be closed when we are blocked in 1076 * nbd_co_receive_request() 1077 */ 1078 goto done; 1079 } 1080 1081 switch (command) { 1082 case NBD_CMD_READ: 1083 TRACE("Request type is READ"); 1084 1085 if (request.type & NBD_CMD_FLAG_FUA) { 1086 ret = blk_co_flush(exp->blk); 1087 if (ret < 0) { 1088 LOG("flush failed"); 1089 reply.error = -ret; 1090 goto error_reply; 1091 } 1092 } 1093 1094 ret = blk_pread(exp->blk, request.from + exp->dev_offset, 1095 req->data, request.len); 1096 if (ret < 0) { 1097 LOG("reading from file failed"); 1098 reply.error = -ret; 1099 goto error_reply; 1100 } 1101 1102 TRACE("Read %u byte(s)", request.len); 1103 if (nbd_co_send_reply(req, &reply, request.len) < 0) 1104 goto out; 1105 break; 1106 case NBD_CMD_WRITE: 1107 TRACE("Request type is WRITE"); 1108 1109 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1110 TRACE("Server is read-only, return error"); 1111 reply.error = EROFS; 1112 goto error_reply; 1113 } 1114 1115 TRACE("Writing to device"); 1116 1117 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, 1118 req->data, request.len, 0); 1119 if (ret < 0) { 1120 LOG("writing to file failed"); 1121 reply.error = -ret; 1122 goto error_reply; 1123 } 1124 1125 if (request.type & NBD_CMD_FLAG_FUA) { 1126 ret = blk_co_flush(exp->blk); 1127 if (ret < 0) { 1128 LOG("flush failed"); 1129 reply.error = -ret; 1130 goto error_reply; 1131 } 1132 } 1133 1134 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1135 goto out; 1136 } 1137 break; 1138 case NBD_CMD_DISC: 1139 TRACE("Request type is DISCONNECT"); 1140 errno = 0; 1141 goto out; 1142 case NBD_CMD_FLUSH: 1143 TRACE("Request type is FLUSH"); 1144 1145 ret = blk_co_flush(exp->blk); 1146 if (ret < 0) { 1147 LOG("flush failed"); 1148 reply.error = -ret; 1149 } 1150 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1151 goto out; 1152 } 1153 break; 1154 case NBD_CMD_TRIM: 1155 TRACE("Request type is TRIM"); 1156 /* Ignore unaligned head or tail, until block layer adds byte 1157 * interface */ 1158 if (request.len >= BDRV_SECTOR_SIZE) { 1159 request.len -= (request.from + request.len) % BDRV_SECTOR_SIZE; 1160 ret = blk_co_discard(exp->blk, 1161 DIV_ROUND_UP(request.from + exp->dev_offset, 1162 BDRV_SECTOR_SIZE), 1163 request.len / BDRV_SECTOR_SIZE); 1164 if (ret < 0) { 1165 LOG("discard failed"); 1166 reply.error = -ret; 1167 } 1168 } else { 1169 TRACE("trim request too small, ignoring"); 1170 } 1171 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1172 goto out; 1173 } 1174 break; 1175 default: 1176 LOG("invalid request type (%u) received", request.type); 1177 invalid_request: 1178 reply.error = EINVAL; 1179 error_reply: 1180 if (nbd_co_send_reply(req, &reply, 0) < 0) { 1181 goto out; 1182 } 1183 break; 1184 } 1185 1186 TRACE("Request/Reply complete"); 1187 1188 done: 1189 nbd_request_put(req); 1190 return; 1191 1192 out: 1193 nbd_request_put(req); 1194 client_close(client); 1195 } 1196 1197 static void nbd_read(void *opaque) 1198 { 1199 NBDClient *client = opaque; 1200 1201 if (client->recv_coroutine) { 1202 qemu_coroutine_enter(client->recv_coroutine, NULL); 1203 } else { 1204 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client); 1205 } 1206 } 1207 1208 static void nbd_restart_write(void *opaque) 1209 { 1210 NBDClient *client = opaque; 1211 1212 qemu_coroutine_enter(client->send_coroutine, NULL); 1213 } 1214 1215 static void nbd_set_handlers(NBDClient *client) 1216 { 1217 if (client->exp && client->exp->ctx) { 1218 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1219 true, 1220 client->can_read ? nbd_read : NULL, 1221 client->send_coroutine ? nbd_restart_write : NULL, 1222 client); 1223 } 1224 } 1225 1226 static void nbd_unset_handlers(NBDClient *client) 1227 { 1228 if (client->exp && client->exp->ctx) { 1229 aio_set_fd_handler(client->exp->ctx, client->sioc->fd, 1230 true, NULL, NULL, NULL); 1231 } 1232 } 1233 1234 static void nbd_update_can_read(NBDClient *client) 1235 { 1236 bool can_read = client->recv_coroutine || 1237 client->nb_requests < MAX_NBD_REQUESTS; 1238 1239 if (can_read != client->can_read) { 1240 client->can_read = can_read; 1241 nbd_set_handlers(client); 1242 1243 /* There is no need to invoke aio_notify(), since aio_set_fd_handler() 1244 * in nbd_set_handlers() will have taken care of that */ 1245 } 1246 } 1247 1248 static coroutine_fn void nbd_co_client_start(void *opaque) 1249 { 1250 NBDClientNewData *data = opaque; 1251 NBDClient *client = data->client; 1252 NBDExport *exp = client->exp; 1253 1254 if (exp) { 1255 nbd_export_get(exp); 1256 } 1257 if (nbd_negotiate(data)) { 1258 client_close(client); 1259 goto out; 1260 } 1261 qemu_co_mutex_init(&client->send_lock); 1262 nbd_set_handlers(client); 1263 1264 if (exp) { 1265 QTAILQ_INSERT_TAIL(&exp->clients, client, next); 1266 } 1267 out: 1268 g_free(data); 1269 } 1270 1271 void nbd_client_new(NBDExport *exp, 1272 QIOChannelSocket *sioc, 1273 QCryptoTLSCreds *tlscreds, 1274 const char *tlsaclname, 1275 void (*close_fn)(NBDClient *)) 1276 { 1277 NBDClient *client; 1278 NBDClientNewData *data = g_new(NBDClientNewData, 1); 1279 1280 client = g_malloc0(sizeof(NBDClient)); 1281 client->refcount = 1; 1282 client->exp = exp; 1283 client->tlscreds = tlscreds; 1284 if (tlscreds) { 1285 object_ref(OBJECT(client->tlscreds)); 1286 } 1287 client->tlsaclname = g_strdup(tlsaclname); 1288 client->sioc = sioc; 1289 object_ref(OBJECT(client->sioc)); 1290 client->ioc = QIO_CHANNEL(sioc); 1291 object_ref(OBJECT(client->ioc)); 1292 client->can_read = true; 1293 client->close = close_fn; 1294 1295 data->client = client; 1296 data->co = qemu_coroutine_create(nbd_co_client_start); 1297 qemu_coroutine_enter(data->co, data); 1298 } 1299