1 /* 2 * Copyright (C) 2016-2018 Red Hat, Inc. 3 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 4 * 5 * Network Block Device Server Side 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; under version 2 of the License. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qapi/error.h" 22 #include "trace.h" 23 #include "nbd-internal.h" 24 25 #define NBD_META_ID_BASE_ALLOCATION 0 26 27 static int system_errno_to_nbd_errno(int err) 28 { 29 switch (err) { 30 case 0: 31 return NBD_SUCCESS; 32 case EPERM: 33 case EROFS: 34 return NBD_EPERM; 35 case EIO: 36 return NBD_EIO; 37 case ENOMEM: 38 return NBD_ENOMEM; 39 #ifdef EDQUOT 40 case EDQUOT: 41 #endif 42 case EFBIG: 43 case ENOSPC: 44 return NBD_ENOSPC; 45 case EOVERFLOW: 46 return NBD_EOVERFLOW; 47 case ESHUTDOWN: 48 return NBD_ESHUTDOWN; 49 case EINVAL: 50 default: 51 return NBD_EINVAL; 52 } 53 } 54 55 /* Definitions for opaque data types */ 56 57 typedef struct NBDRequestData NBDRequestData; 58 59 struct NBDRequestData { 60 QSIMPLEQ_ENTRY(NBDRequestData) entry; 61 NBDClient *client; 62 uint8_t *data; 63 bool complete; 64 }; 65 66 struct NBDExport { 67 int refcount; 68 void (*close)(NBDExport *exp); 69 70 BlockBackend *blk; 71 char *name; 72 char *description; 73 off_t dev_offset; 74 off_t size; 75 uint16_t nbdflags; 76 QTAILQ_HEAD(, NBDClient) clients; 77 QTAILQ_ENTRY(NBDExport) next; 78 79 AioContext *ctx; 80 81 BlockBackend *eject_notifier_blk; 82 Notifier eject_notifier; 83 }; 84 85 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 86 87 /* NBDExportMetaContexts represents a list of contexts to be exported, 88 * as selected by NBD_OPT_SET_META_CONTEXT. Also used for 89 * NBD_OPT_LIST_META_CONTEXT. */ 90 typedef struct NBDExportMetaContexts { 91 char export_name[NBD_MAX_NAME_SIZE + 1]; 92 bool valid; /* means that negotiation of the option finished without 93 errors */ 94 bool base_allocation; /* export base:allocation context (block status) */ 95 } NBDExportMetaContexts; 96 97 struct NBDClient { 98 int refcount; 99 void (*close_fn)(NBDClient *client, bool negotiated); 100 101 NBDExport *exp; 102 QCryptoTLSCreds *tlscreds; 103 char *tlsaclname; 104 QIOChannelSocket *sioc; /* The underlying data channel */ 105 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 106 107 Coroutine *recv_coroutine; 108 109 CoMutex send_lock; 110 Coroutine *send_coroutine; 111 112 QTAILQ_ENTRY(NBDClient) next; 113 int nb_requests; 114 bool closing; 115 116 bool structured_reply; 117 NBDExportMetaContexts export_meta; 118 119 uint32_t opt; /* Current option being negotiated */ 120 uint32_t optlen; /* remaining length of data in ioc for the option being 121 negotiated now */ 122 }; 123 124 static void nbd_client_receive_next_request(NBDClient *client); 125 126 /* Basic flow for negotiation 127 128 Server Client 129 Negotiate 130 131 or 132 133 Server Client 134 Negotiate #1 135 Option 136 Negotiate #2 137 138 ---- 139 140 followed by 141 142 Server Client 143 Request 144 Response 145 Request 146 Response 147 ... 148 ... 149 Request (type == 2) 150 151 */ 152 153 static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option, 154 uint32_t type, uint32_t length) 155 { 156 stq_be_p(&rep->magic, NBD_REP_MAGIC); 157 stl_be_p(&rep->option, option); 158 stl_be_p(&rep->type, type); 159 stl_be_p(&rep->length, length); 160 } 161 162 /* Send a reply header, including length, but no payload. 163 * Return -errno on error, 0 on success. */ 164 static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, 165 uint32_t len, Error **errp) 166 { 167 NBDOptionReply rep; 168 169 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt), 170 type, nbd_rep_lookup(type), len); 171 172 assert(len < NBD_MAX_BUFFER_SIZE); 173 174 set_be_option_rep(&rep, client->opt, type, len); 175 return nbd_write(client->ioc, &rep, sizeof(rep), errp); 176 } 177 178 /* Send a reply header with default 0 length. 179 * Return -errno on error, 0 on success. */ 180 static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type, 181 Error **errp) 182 { 183 return nbd_negotiate_send_rep_len(client, type, 0, errp); 184 } 185 186 /* Send an error reply. 187 * Return -errno on error, 0 on success. */ 188 static int GCC_FMT_ATTR(4, 0) 189 nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, 190 Error **errp, const char *fmt, va_list va) 191 { 192 char *msg; 193 int ret; 194 size_t len; 195 196 msg = g_strdup_vprintf(fmt, va); 197 len = strlen(msg); 198 assert(len < 4096); 199 trace_nbd_negotiate_send_rep_err(msg); 200 ret = nbd_negotiate_send_rep_len(client, type, len, errp); 201 if (ret < 0) { 202 goto out; 203 } 204 if (nbd_write(client->ioc, msg, len, errp) < 0) { 205 error_prepend(errp, "write failed (error message): "); 206 ret = -EIO; 207 } else { 208 ret = 0; 209 } 210 211 out: 212 g_free(msg); 213 return ret; 214 } 215 216 /* Send an error reply. 217 * Return -errno on error, 0 on success. */ 218 static int GCC_FMT_ATTR(4, 5) 219 nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, 220 Error **errp, const char *fmt, ...) 221 { 222 va_list va; 223 int ret; 224 225 va_start(va, fmt); 226 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va); 227 va_end(va); 228 return ret; 229 } 230 231 /* Drop remainder of the current option, and send a reply with the 232 * given error type and message. Return -errno on read or write 233 * failure; or 0 if connection is still live. */ 234 static int GCC_FMT_ATTR(4, 0) 235 nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, 236 const char *fmt, va_list va) 237 { 238 int ret = nbd_drop(client->ioc, client->optlen, errp); 239 240 client->optlen = 0; 241 if (!ret) { 242 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va); 243 } 244 return ret; 245 } 246 247 static int GCC_FMT_ATTR(4, 5) 248 nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, 249 const char *fmt, ...) 250 { 251 int ret; 252 va_list va; 253 254 va_start(va, fmt); 255 ret = nbd_opt_vdrop(client, type, errp, fmt, va); 256 va_end(va); 257 258 return ret; 259 } 260 261 static int GCC_FMT_ATTR(3, 4) 262 nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) 263 { 264 int ret; 265 va_list va; 266 267 va_start(va, fmt); 268 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va); 269 va_end(va); 270 271 return ret; 272 } 273 274 /* Read size bytes from the unparsed payload of the current option. 275 * Return -errno on I/O error, 0 if option was completely handled by 276 * sending a reply about inconsistent lengths, or 1 on success. */ 277 static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, 278 Error **errp) 279 { 280 if (size > client->optlen) { 281 return nbd_opt_invalid(client, errp, 282 "Inconsistent lengths in option %s", 283 nbd_opt_lookup(client->opt)); 284 } 285 client->optlen -= size; 286 return qio_channel_read_all(client->ioc, buffer, size, errp) < 0 ? -EIO : 1; 287 } 288 289 /* Drop size bytes from the unparsed payload of the current option. 290 * Return -errno on I/O error, 0 if option was completely handled by 291 * sending a reply about inconsistent lengths, or 1 on success. */ 292 static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) 293 { 294 if (size > client->optlen) { 295 return nbd_opt_invalid(client, errp, 296 "Inconsistent lengths in option %s", 297 nbd_opt_lookup(client->opt)); 298 } 299 client->optlen -= size; 300 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1; 301 } 302 303 /* nbd_opt_read_name 304 * 305 * Read a string with the format: 306 * uint32_t len (<= NBD_MAX_NAME_SIZE) 307 * len bytes string (not 0-terminated) 308 * 309 * @name should be enough to store NBD_MAX_NAME_SIZE+1. 310 * If @length is non-null, it will be set to the actual string length. 311 * 312 * Return -errno on I/O error, 0 if option was completely handled by 313 * sending a reply about inconsistent lengths, or 1 on success. 314 */ 315 static int nbd_opt_read_name(NBDClient *client, char *name, uint32_t *length, 316 Error **errp) 317 { 318 int ret; 319 uint32_t len; 320 321 ret = nbd_opt_read(client, &len, sizeof(len), errp); 322 if (ret <= 0) { 323 return ret; 324 } 325 cpu_to_be32s(&len); 326 327 if (len > NBD_MAX_NAME_SIZE) { 328 return nbd_opt_invalid(client, errp, 329 "Invalid name length: %" PRIu32, len); 330 } 331 332 ret = nbd_opt_read(client, name, len, errp); 333 if (ret <= 0) { 334 return ret; 335 } 336 name[len] = '\0'; 337 338 if (length) { 339 *length = len; 340 } 341 342 return 1; 343 } 344 345 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. 346 * Return -errno on error, 0 on success. */ 347 static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, 348 Error **errp) 349 { 350 size_t name_len, desc_len; 351 uint32_t len; 352 const char *name = exp->name ? exp->name : ""; 353 const char *desc = exp->description ? exp->description : ""; 354 QIOChannel *ioc = client->ioc; 355 int ret; 356 357 trace_nbd_negotiate_send_rep_list(name, desc); 358 name_len = strlen(name); 359 desc_len = strlen(desc); 360 len = name_len + desc_len + sizeof(len); 361 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp); 362 if (ret < 0) { 363 return ret; 364 } 365 366 len = cpu_to_be32(name_len); 367 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) { 368 error_prepend(errp, "write failed (name length): "); 369 return -EINVAL; 370 } 371 372 if (nbd_write(ioc, name, name_len, errp) < 0) { 373 error_prepend(errp, "write failed (name buffer): "); 374 return -EINVAL; 375 } 376 377 if (nbd_write(ioc, desc, desc_len, errp) < 0) { 378 error_prepend(errp, "write failed (description buffer): "); 379 return -EINVAL; 380 } 381 382 return 0; 383 } 384 385 /* Process the NBD_OPT_LIST command, with a potential series of replies. 386 * Return -errno on error, 0 on success. */ 387 static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) 388 { 389 NBDExport *exp; 390 assert(client->opt == NBD_OPT_LIST); 391 392 /* For each export, send a NBD_REP_SERVER reply. */ 393 QTAILQ_FOREACH(exp, &exports, next) { 394 if (nbd_negotiate_send_rep_list(client, exp, errp)) { 395 return -EINVAL; 396 } 397 } 398 /* Finish with a NBD_REP_ACK. */ 399 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 400 } 401 402 static void nbd_check_meta_export_name(NBDClient *client) 403 { 404 client->export_meta.valid &= !strcmp(client->exp->name, 405 client->export_meta.export_name); 406 } 407 408 /* Send a reply to NBD_OPT_EXPORT_NAME. 409 * Return -errno on error, 0 on success. */ 410 static int nbd_negotiate_handle_export_name(NBDClient *client, 411 uint16_t myflags, bool no_zeroes, 412 Error **errp) 413 { 414 char name[NBD_MAX_NAME_SIZE + 1]; 415 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = ""; 416 size_t len; 417 int ret; 418 419 /* Client sends: 420 [20 .. xx] export name (length bytes) 421 Server replies: 422 [ 0 .. 7] size 423 [ 8 .. 9] export flags 424 [10 .. 133] reserved (0) [unless no_zeroes] 425 */ 426 trace_nbd_negotiate_handle_export_name(); 427 if (client->optlen >= sizeof(name)) { 428 error_setg(errp, "Bad length received"); 429 return -EINVAL; 430 } 431 if (nbd_read(client->ioc, name, client->optlen, errp) < 0) { 432 error_prepend(errp, "read failed: "); 433 return -EIO; 434 } 435 name[client->optlen] = '\0'; 436 client->optlen = 0; 437 438 trace_nbd_negotiate_handle_export_name_request(name); 439 440 client->exp = nbd_export_find(name); 441 if (!client->exp) { 442 error_setg(errp, "export not found"); 443 return -EINVAL; 444 } 445 446 trace_nbd_negotiate_new_style_size_flags(client->exp->size, 447 client->exp->nbdflags | myflags); 448 stq_be_p(buf, client->exp->size); 449 stw_be_p(buf + 8, client->exp->nbdflags | myflags); 450 len = no_zeroes ? 10 : sizeof(buf); 451 ret = nbd_write(client->ioc, buf, len, errp); 452 if (ret < 0) { 453 error_prepend(errp, "write failed: "); 454 return ret; 455 } 456 457 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 458 nbd_export_get(client->exp); 459 nbd_check_meta_export_name(client); 460 461 return 0; 462 } 463 464 /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes. 465 * The buffer does NOT include the info type prefix. 466 * Return -errno on error, 0 if ready to send more. */ 467 static int nbd_negotiate_send_info(NBDClient *client, 468 uint16_t info, uint32_t length, void *buf, 469 Error **errp) 470 { 471 int rc; 472 473 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length); 474 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO, 475 sizeof(info) + length, errp); 476 if (rc < 0) { 477 return rc; 478 } 479 cpu_to_be16s(&info); 480 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) { 481 return -EIO; 482 } 483 if (nbd_write(client->ioc, buf, length, errp) < 0) { 484 return -EIO; 485 } 486 return 0; 487 } 488 489 /* nbd_reject_length: Handle any unexpected payload. 490 * @fatal requests that we quit talking to the client, even if we are able 491 * to successfully send an error reply. 492 * Return: 493 * -errno transmission error occurred or @fatal was requested, errp is set 494 * 0 error message successfully sent to client, errp is not set 495 */ 496 static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) 497 { 498 int ret; 499 500 assert(client->optlen); 501 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length", 502 nbd_opt_lookup(client->opt)); 503 if (fatal && !ret) { 504 error_setg(errp, "option '%s' has unexpected length", 505 nbd_opt_lookup(client->opt)); 506 return -EINVAL; 507 } 508 return ret; 509 } 510 511 /* Handle NBD_OPT_INFO and NBD_OPT_GO. 512 * Return -errno on error, 0 if ready for next option, and 1 to move 513 * into transmission phase. */ 514 static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags, 515 Error **errp) 516 { 517 int rc; 518 char name[NBD_MAX_NAME_SIZE + 1]; 519 NBDExport *exp; 520 uint16_t requests; 521 uint16_t request; 522 uint32_t namelen; 523 bool sendname = false; 524 bool blocksize = false; 525 uint32_t sizes[3]; 526 char buf[sizeof(uint64_t) + sizeof(uint16_t)]; 527 528 /* Client sends: 529 4 bytes: L, name length (can be 0) 530 L bytes: export name 531 2 bytes: N, number of requests (can be 0) 532 N * 2 bytes: N requests 533 */ 534 rc = nbd_opt_read_name(client, name, &namelen, errp); 535 if (rc <= 0) { 536 return rc; 537 } 538 trace_nbd_negotiate_handle_export_name_request(name); 539 540 rc = nbd_opt_read(client, &requests, sizeof(requests), errp); 541 if (rc <= 0) { 542 return rc; 543 } 544 be16_to_cpus(&requests); 545 trace_nbd_negotiate_handle_info_requests(requests); 546 while (requests--) { 547 rc = nbd_opt_read(client, &request, sizeof(request), errp); 548 if (rc <= 0) { 549 return rc; 550 } 551 be16_to_cpus(&request); 552 trace_nbd_negotiate_handle_info_request(request, 553 nbd_info_lookup(request)); 554 /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE; 555 * everything else is either a request we don't know or 556 * something we send regardless of request */ 557 switch (request) { 558 case NBD_INFO_NAME: 559 sendname = true; 560 break; 561 case NBD_INFO_BLOCK_SIZE: 562 blocksize = true; 563 break; 564 } 565 } 566 if (client->optlen) { 567 return nbd_reject_length(client, false, errp); 568 } 569 570 exp = nbd_export_find(name); 571 if (!exp) { 572 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN, 573 errp, "export '%s' not present", 574 name); 575 } 576 577 /* Don't bother sending NBD_INFO_NAME unless client requested it */ 578 if (sendname) { 579 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name, 580 errp); 581 if (rc < 0) { 582 return rc; 583 } 584 } 585 586 /* Send NBD_INFO_DESCRIPTION only if available, regardless of 587 * client request */ 588 if (exp->description) { 589 size_t len = strlen(exp->description); 590 591 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION, 592 len, exp->description, errp); 593 if (rc < 0) { 594 return rc; 595 } 596 } 597 598 /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size 599 * according to whether the client requested it, and according to 600 * whether this is OPT_INFO or OPT_GO. */ 601 /* minimum - 1 for back-compat, or 512 if client is new enough. 602 * TODO: consult blk_bs(blk)->bl.request_alignment? */ 603 sizes[0] = 604 (client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1; 605 /* preferred - Hard-code to 4096 for now. 606 * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */ 607 sizes[1] = 4096; 608 /* maximum - At most 32M, but smaller as appropriate. */ 609 sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE); 610 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]); 611 cpu_to_be32s(&sizes[0]); 612 cpu_to_be32s(&sizes[1]); 613 cpu_to_be32s(&sizes[2]); 614 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE, 615 sizeof(sizes), sizes, errp); 616 if (rc < 0) { 617 return rc; 618 } 619 620 /* Send NBD_INFO_EXPORT always */ 621 trace_nbd_negotiate_new_style_size_flags(exp->size, 622 exp->nbdflags | myflags); 623 stq_be_p(buf, exp->size); 624 stw_be_p(buf + 8, exp->nbdflags | myflags); 625 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT, 626 sizeof(buf), buf, errp); 627 if (rc < 0) { 628 return rc; 629 } 630 631 /* If the client is just asking for NBD_OPT_INFO, but forgot to 632 * request block sizes, return an error. 633 * TODO: consult blk_bs(blk)->request_align, and only error if it 634 * is not 1? */ 635 if (client->opt == NBD_OPT_INFO && !blocksize) { 636 return nbd_negotiate_send_rep_err(client, 637 NBD_REP_ERR_BLOCK_SIZE_REQD, 638 errp, 639 "request NBD_INFO_BLOCK_SIZE to " 640 "use this export"); 641 } 642 643 /* Final reply */ 644 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 645 if (rc < 0) { 646 return rc; 647 } 648 649 if (client->opt == NBD_OPT_GO) { 650 client->exp = exp; 651 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 652 nbd_export_get(client->exp); 653 nbd_check_meta_export_name(client); 654 rc = 1; 655 } 656 return rc; 657 } 658 659 660 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the 661 * new channel for all further (now-encrypted) communication. */ 662 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 663 Error **errp) 664 { 665 QIOChannel *ioc; 666 QIOChannelTLS *tioc; 667 struct NBDTLSHandshakeData data = { 0 }; 668 669 assert(client->opt == NBD_OPT_STARTTLS); 670 671 trace_nbd_negotiate_handle_starttls(); 672 ioc = client->ioc; 673 674 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) { 675 return NULL; 676 } 677 678 tioc = qio_channel_tls_new_server(ioc, 679 client->tlscreds, 680 client->tlsaclname, 681 errp); 682 if (!tioc) { 683 return NULL; 684 } 685 686 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); 687 trace_nbd_negotiate_handle_starttls_handshake(); 688 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 689 qio_channel_tls_handshake(tioc, 690 nbd_tls_handshake, 691 &data, 692 NULL, 693 NULL); 694 695 if (!data.complete) { 696 g_main_loop_run(data.loop); 697 } 698 g_main_loop_unref(data.loop); 699 if (data.error) { 700 object_unref(OBJECT(tioc)); 701 error_propagate(errp, data.error); 702 return NULL; 703 } 704 705 return QIO_CHANNEL(tioc); 706 } 707 708 /* nbd_negotiate_send_meta_context 709 * 710 * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT 711 * 712 * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead. 713 */ 714 static int nbd_negotiate_send_meta_context(NBDClient *client, 715 const char *context, 716 uint32_t context_id, 717 Error **errp) 718 { 719 NBDOptionReplyMetaContext opt; 720 struct iovec iov[] = { 721 {.iov_base = &opt, .iov_len = sizeof(opt)}, 722 {.iov_base = (void *)context, .iov_len = strlen(context)} 723 }; 724 725 if (client->opt == NBD_OPT_LIST_META_CONTEXT) { 726 context_id = 0; 727 } 728 729 trace_nbd_negotiate_meta_query_reply(context, context_id); 730 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT, 731 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len); 732 stl_be_p(&opt.context_id, context_id); 733 734 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0; 735 } 736 737 /* nbd_meta_base_query 738 * 739 * Handle query to 'base' namespace. For now, only base:allocation context is 740 * available in it. 'len' is the amount of text remaining to be read from 741 * the current name, after the 'base:' portion has been stripped. 742 * 743 * Return -errno on I/O error, 0 if option was completely handled by 744 * sending a reply about inconsistent lengths, or 1 on success. */ 745 static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, 746 uint32_t len, Error **errp) 747 { 748 int ret; 749 char query[sizeof("allocation") - 1]; 750 size_t alen = strlen("allocation"); 751 752 if (len == 0) { 753 if (client->opt == NBD_OPT_LIST_META_CONTEXT) { 754 meta->base_allocation = true; 755 } 756 trace_nbd_negotiate_meta_query_parse("base:"); 757 return 1; 758 } 759 760 if (len != alen) { 761 trace_nbd_negotiate_meta_query_skip("not base:allocation"); 762 return nbd_opt_skip(client, len, errp); 763 } 764 765 ret = nbd_opt_read(client, query, len, errp); 766 if (ret <= 0) { 767 return ret; 768 } 769 770 if (strncmp(query, "allocation", alen) == 0) { 771 meta->base_allocation = true; 772 } 773 774 trace_nbd_negotiate_meta_query_parse("base:allocation"); 775 return 1; 776 } 777 778 /* nbd_negotiate_meta_query 779 * 780 * Parse namespace name and call corresponding function to parse body of the 781 * query. 782 * 783 * The only supported namespace now is 'base'. 784 * 785 * The function aims not wasting time and memory to read long unknown namespace 786 * names. 787 * 788 * Return -errno on I/O error, 0 if option was completely handled by 789 * sending a reply about inconsistent lengths, or 1 on success. */ 790 static int nbd_negotiate_meta_query(NBDClient *client, 791 NBDExportMetaContexts *meta, Error **errp) 792 { 793 int ret; 794 char query[sizeof("base:") - 1]; 795 size_t baselen = strlen("base:"); 796 uint32_t len; 797 798 ret = nbd_opt_read(client, &len, sizeof(len), errp); 799 if (ret <= 0) { 800 return ret; 801 } 802 cpu_to_be32s(&len); 803 804 /* The only supported namespace for now is 'base'. So query should start 805 * with 'base:'. Otherwise, we can ignore it and skip the remainder. */ 806 if (len < baselen) { 807 trace_nbd_negotiate_meta_query_skip("length too short"); 808 return nbd_opt_skip(client, len, errp); 809 } 810 811 len -= baselen; 812 ret = nbd_opt_read(client, query, baselen, errp); 813 if (ret <= 0) { 814 return ret; 815 } 816 if (strncmp(query, "base:", baselen) != 0) { 817 trace_nbd_negotiate_meta_query_skip("not for base: namespace"); 818 return nbd_opt_skip(client, len, errp); 819 } 820 821 return nbd_meta_base_query(client, meta, len, errp); 822 } 823 824 /* nbd_negotiate_meta_queries 825 * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT 826 * 827 * Return -errno on I/O error, or 0 if option was completely handled. */ 828 static int nbd_negotiate_meta_queries(NBDClient *client, 829 NBDExportMetaContexts *meta, Error **errp) 830 { 831 int ret; 832 NBDExport *exp; 833 NBDExportMetaContexts local_meta; 834 uint32_t nb_queries; 835 int i; 836 837 if (!client->structured_reply) { 838 return nbd_opt_invalid(client, errp, 839 "request option '%s' when structured reply " 840 "is not negotiated", 841 nbd_opt_lookup(client->opt)); 842 } 843 844 if (client->opt == NBD_OPT_LIST_META_CONTEXT) { 845 /* Only change the caller's meta on SET. */ 846 meta = &local_meta; 847 } 848 849 memset(meta, 0, sizeof(*meta)); 850 851 ret = nbd_opt_read_name(client, meta->export_name, NULL, errp); 852 if (ret <= 0) { 853 return ret; 854 } 855 856 exp = nbd_export_find(meta->export_name); 857 if (exp == NULL) { 858 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, 859 "export '%s' not present", meta->export_name); 860 } 861 862 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp); 863 if (ret <= 0) { 864 return ret; 865 } 866 cpu_to_be32s(&nb_queries); 867 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt), 868 meta->export_name, nb_queries); 869 870 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) { 871 /* enable all known contexts */ 872 meta->base_allocation = true; 873 } else { 874 for (i = 0; i < nb_queries; ++i) { 875 ret = nbd_negotiate_meta_query(client, meta, errp); 876 if (ret <= 0) { 877 return ret; 878 } 879 } 880 } 881 882 if (meta->base_allocation) { 883 ret = nbd_negotiate_send_meta_context(client, "base:allocation", 884 NBD_META_ID_BASE_ALLOCATION, 885 errp); 886 if (ret < 0) { 887 return ret; 888 } 889 } 890 891 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 892 if (ret == 0) { 893 meta->valid = true; 894 } 895 896 return ret; 897 } 898 899 /* nbd_negotiate_options 900 * Process all NBD_OPT_* client option commands, during fixed newstyle 901 * negotiation. 902 * Return: 903 * -errno on error, errp is set 904 * 0 on successful negotiation, errp is not set 905 * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, 906 * errp is not set 907 */ 908 static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, 909 Error **errp) 910 { 911 uint32_t flags; 912 bool fixedNewstyle = false; 913 bool no_zeroes = false; 914 915 /* Client sends: 916 [ 0 .. 3] client flags 917 918 Then we loop until NBD_OPT_EXPORT_NAME or NBD_OPT_GO: 919 [ 0 .. 7] NBD_OPTS_MAGIC 920 [ 8 .. 11] NBD option 921 [12 .. 15] Data length 922 ... Rest of request 923 924 [ 0 .. 7] NBD_OPTS_MAGIC 925 [ 8 .. 11] Second NBD option 926 [12 .. 15] Data length 927 ... Rest of request 928 */ 929 930 if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) { 931 error_prepend(errp, "read failed: "); 932 return -EIO; 933 } 934 be32_to_cpus(&flags); 935 trace_nbd_negotiate_options_flags(flags); 936 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 937 fixedNewstyle = true; 938 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 939 } 940 if (flags & NBD_FLAG_C_NO_ZEROES) { 941 no_zeroes = true; 942 flags &= ~NBD_FLAG_C_NO_ZEROES; 943 } 944 if (flags != 0) { 945 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags); 946 return -EINVAL; 947 } 948 949 while (1) { 950 int ret; 951 uint32_t option, length; 952 uint64_t magic; 953 954 if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) { 955 error_prepend(errp, "read failed: "); 956 return -EINVAL; 957 } 958 magic = be64_to_cpu(magic); 959 trace_nbd_negotiate_options_check_magic(magic); 960 if (magic != NBD_OPTS_MAGIC) { 961 error_setg(errp, "Bad magic received"); 962 return -EINVAL; 963 } 964 965 if (nbd_read(client->ioc, &option, 966 sizeof(option), errp) < 0) { 967 error_prepend(errp, "read failed: "); 968 return -EINVAL; 969 } 970 option = be32_to_cpu(option); 971 client->opt = option; 972 973 if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) { 974 error_prepend(errp, "read failed: "); 975 return -EINVAL; 976 } 977 length = be32_to_cpu(length); 978 assert(!client->optlen); 979 client->optlen = length; 980 981 if (length > NBD_MAX_BUFFER_SIZE) { 982 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", 983 length, NBD_MAX_BUFFER_SIZE); 984 return -EINVAL; 985 } 986 987 trace_nbd_negotiate_options_check_option(option, 988 nbd_opt_lookup(option)); 989 if (client->tlscreds && 990 client->ioc == (QIOChannel *)client->sioc) { 991 QIOChannel *tioc; 992 if (!fixedNewstyle) { 993 error_setg(errp, "Unsupported option 0x%" PRIx32, option); 994 return -EINVAL; 995 } 996 switch (option) { 997 case NBD_OPT_STARTTLS: 998 if (length) { 999 /* Unconditionally drop the connection if the client 1000 * can't start a TLS negotiation correctly */ 1001 return nbd_reject_length(client, true, errp); 1002 } 1003 tioc = nbd_negotiate_handle_starttls(client, errp); 1004 if (!tioc) { 1005 return -EIO; 1006 } 1007 ret = 0; 1008 object_unref(OBJECT(client->ioc)); 1009 client->ioc = QIO_CHANNEL(tioc); 1010 break; 1011 1012 case NBD_OPT_EXPORT_NAME: 1013 /* No way to return an error to client, so drop connection */ 1014 error_setg(errp, "Option 0x%x not permitted before TLS", 1015 option); 1016 return -EINVAL; 1017 1018 default: 1019 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD, errp, 1020 "Option 0x%" PRIx32 1021 "not permitted before TLS", option); 1022 /* Let the client keep trying, unless they asked to 1023 * quit. In this mode, we've already sent an error, so 1024 * we can't ack the abort. */ 1025 if (option == NBD_OPT_ABORT) { 1026 return 1; 1027 } 1028 break; 1029 } 1030 } else if (fixedNewstyle) { 1031 switch (option) { 1032 case NBD_OPT_LIST: 1033 if (length) { 1034 ret = nbd_reject_length(client, false, errp); 1035 } else { 1036 ret = nbd_negotiate_handle_list(client, errp); 1037 } 1038 break; 1039 1040 case NBD_OPT_ABORT: 1041 /* NBD spec says we must try to reply before 1042 * disconnecting, but that we must also tolerate 1043 * guests that don't wait for our reply. */ 1044 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL); 1045 return 1; 1046 1047 case NBD_OPT_EXPORT_NAME: 1048 return nbd_negotiate_handle_export_name(client, 1049 myflags, no_zeroes, 1050 errp); 1051 1052 case NBD_OPT_INFO: 1053 case NBD_OPT_GO: 1054 ret = nbd_negotiate_handle_info(client, myflags, errp); 1055 if (ret == 1) { 1056 assert(option == NBD_OPT_GO); 1057 return 0; 1058 } 1059 break; 1060 1061 case NBD_OPT_STARTTLS: 1062 if (length) { 1063 ret = nbd_reject_length(client, false, errp); 1064 } else if (client->tlscreds) { 1065 ret = nbd_negotiate_send_rep_err(client, 1066 NBD_REP_ERR_INVALID, errp, 1067 "TLS already enabled"); 1068 } else { 1069 ret = nbd_negotiate_send_rep_err(client, 1070 NBD_REP_ERR_POLICY, errp, 1071 "TLS not configured"); 1072 } 1073 break; 1074 1075 case NBD_OPT_STRUCTURED_REPLY: 1076 if (length) { 1077 ret = nbd_reject_length(client, false, errp); 1078 } else if (client->structured_reply) { 1079 ret = nbd_negotiate_send_rep_err( 1080 client, NBD_REP_ERR_INVALID, errp, 1081 "structured reply already negotiated"); 1082 } else { 1083 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 1084 client->structured_reply = true; 1085 myflags |= NBD_FLAG_SEND_DF; 1086 } 1087 break; 1088 1089 case NBD_OPT_LIST_META_CONTEXT: 1090 case NBD_OPT_SET_META_CONTEXT: 1091 ret = nbd_negotiate_meta_queries(client, &client->export_meta, 1092 errp); 1093 break; 1094 1095 default: 1096 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp, 1097 "Unsupported option %" PRIu32 " (%s)", 1098 option, nbd_opt_lookup(option)); 1099 break; 1100 } 1101 } else { 1102 /* 1103 * If broken new-style we should drop the connection 1104 * for anything except NBD_OPT_EXPORT_NAME 1105 */ 1106 switch (option) { 1107 case NBD_OPT_EXPORT_NAME: 1108 return nbd_negotiate_handle_export_name(client, 1109 myflags, no_zeroes, 1110 errp); 1111 1112 default: 1113 error_setg(errp, "Unsupported option %" PRIu32 " (%s)", 1114 option, nbd_opt_lookup(option)); 1115 return -EINVAL; 1116 } 1117 } 1118 if (ret < 0) { 1119 return ret; 1120 } 1121 } 1122 } 1123 1124 /* nbd_negotiate 1125 * Return: 1126 * -errno on error, errp is set 1127 * 0 on successful negotiation, errp is not set 1128 * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, 1129 * errp is not set 1130 */ 1131 static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) 1132 { 1133 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = ""; 1134 int ret; 1135 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | 1136 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | 1137 NBD_FLAG_SEND_WRITE_ZEROES); 1138 bool oldStyle; 1139 1140 /* Old style negotiation header, no room for options 1141 [ 0 .. 7] passwd ("NBDMAGIC") 1142 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 1143 [16 .. 23] size 1144 [24 .. 27] export flags (zero-extended) 1145 [28 .. 151] reserved (0) 1146 1147 New style negotiation header, client can send options 1148 [ 0 .. 7] passwd ("NBDMAGIC") 1149 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 1150 [16 .. 17] server flags (0) 1151 ....options sent, ending in NBD_OPT_EXPORT_NAME or NBD_OPT_GO.... 1152 */ 1153 1154 qio_channel_set_blocking(client->ioc, false, NULL); 1155 1156 trace_nbd_negotiate_begin(); 1157 memcpy(buf, "NBDMAGIC", 8); 1158 1159 oldStyle = client->exp != NULL && !client->tlscreds; 1160 if (oldStyle) { 1161 trace_nbd_negotiate_old_style(client->exp->size, 1162 client->exp->nbdflags | myflags); 1163 stq_be_p(buf + 8, NBD_CLIENT_MAGIC); 1164 stq_be_p(buf + 16, client->exp->size); 1165 stl_be_p(buf + 24, client->exp->nbdflags | myflags); 1166 1167 if (nbd_write(client->ioc, buf, sizeof(buf), errp) < 0) { 1168 error_prepend(errp, "write failed: "); 1169 return -EINVAL; 1170 } 1171 } else { 1172 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 1173 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); 1174 1175 if (nbd_write(client->ioc, buf, 18, errp) < 0) { 1176 error_prepend(errp, "write failed: "); 1177 return -EINVAL; 1178 } 1179 ret = nbd_negotiate_options(client, myflags, errp); 1180 if (ret != 0) { 1181 if (ret < 0) { 1182 error_prepend(errp, "option negotiation failed: "); 1183 } 1184 return ret; 1185 } 1186 } 1187 1188 assert(!client->optlen); 1189 trace_nbd_negotiate_success(); 1190 1191 return 0; 1192 } 1193 1194 static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request, 1195 Error **errp) 1196 { 1197 uint8_t buf[NBD_REQUEST_SIZE]; 1198 uint32_t magic; 1199 int ret; 1200 1201 ret = nbd_read(ioc, buf, sizeof(buf), errp); 1202 if (ret < 0) { 1203 return ret; 1204 } 1205 1206 /* Request 1207 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 1208 [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) 1209 [ 6 .. 7] type (NBD_CMD_READ, ...) 1210 [ 8 .. 15] handle 1211 [16 .. 23] from 1212 [24 .. 27] len 1213 */ 1214 1215 magic = ldl_be_p(buf); 1216 request->flags = lduw_be_p(buf + 4); 1217 request->type = lduw_be_p(buf + 6); 1218 request->handle = ldq_be_p(buf + 8); 1219 request->from = ldq_be_p(buf + 16); 1220 request->len = ldl_be_p(buf + 24); 1221 1222 trace_nbd_receive_request(magic, request->flags, request->type, 1223 request->from, request->len); 1224 1225 if (magic != NBD_REQUEST_MAGIC) { 1226 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic); 1227 return -EINVAL; 1228 } 1229 return 0; 1230 } 1231 1232 #define MAX_NBD_REQUESTS 16 1233 1234 void nbd_client_get(NBDClient *client) 1235 { 1236 client->refcount++; 1237 } 1238 1239 void nbd_client_put(NBDClient *client) 1240 { 1241 if (--client->refcount == 0) { 1242 /* The last reference should be dropped by client->close, 1243 * which is called by client_close. 1244 */ 1245 assert(client->closing); 1246 1247 qio_channel_detach_aio_context(client->ioc); 1248 object_unref(OBJECT(client->sioc)); 1249 object_unref(OBJECT(client->ioc)); 1250 if (client->tlscreds) { 1251 object_unref(OBJECT(client->tlscreds)); 1252 } 1253 g_free(client->tlsaclname); 1254 if (client->exp) { 1255 QTAILQ_REMOVE(&client->exp->clients, client, next); 1256 nbd_export_put(client->exp); 1257 } 1258 g_free(client); 1259 } 1260 } 1261 1262 static void client_close(NBDClient *client, bool negotiated) 1263 { 1264 if (client->closing) { 1265 return; 1266 } 1267 1268 client->closing = true; 1269 1270 /* Force requests to finish. They will drop their own references, 1271 * then we'll close the socket and free the NBDClient. 1272 */ 1273 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 1274 NULL); 1275 1276 /* Also tell the client, so that they release their reference. */ 1277 if (client->close_fn) { 1278 client->close_fn(client, negotiated); 1279 } 1280 } 1281 1282 static NBDRequestData *nbd_request_get(NBDClient *client) 1283 { 1284 NBDRequestData *req; 1285 1286 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 1287 client->nb_requests++; 1288 1289 req = g_new0(NBDRequestData, 1); 1290 nbd_client_get(client); 1291 req->client = client; 1292 return req; 1293 } 1294 1295 static void nbd_request_put(NBDRequestData *req) 1296 { 1297 NBDClient *client = req->client; 1298 1299 if (req->data) { 1300 qemu_vfree(req->data); 1301 } 1302 g_free(req); 1303 1304 client->nb_requests--; 1305 nbd_client_receive_next_request(client); 1306 1307 nbd_client_put(client); 1308 } 1309 1310 static void blk_aio_attached(AioContext *ctx, void *opaque) 1311 { 1312 NBDExport *exp = opaque; 1313 NBDClient *client; 1314 1315 trace_nbd_blk_aio_attached(exp->name, ctx); 1316 1317 exp->ctx = ctx; 1318 1319 QTAILQ_FOREACH(client, &exp->clients, next) { 1320 qio_channel_attach_aio_context(client->ioc, ctx); 1321 if (client->recv_coroutine) { 1322 aio_co_schedule(ctx, client->recv_coroutine); 1323 } 1324 if (client->send_coroutine) { 1325 aio_co_schedule(ctx, client->send_coroutine); 1326 } 1327 } 1328 } 1329 1330 static void blk_aio_detach(void *opaque) 1331 { 1332 NBDExport *exp = opaque; 1333 NBDClient *client; 1334 1335 trace_nbd_blk_aio_detach(exp->name, exp->ctx); 1336 1337 QTAILQ_FOREACH(client, &exp->clients, next) { 1338 qio_channel_detach_aio_context(client->ioc); 1339 } 1340 1341 exp->ctx = NULL; 1342 } 1343 1344 static void nbd_eject_notifier(Notifier *n, void *data) 1345 { 1346 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 1347 nbd_export_close(exp); 1348 } 1349 1350 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, 1351 uint16_t nbdflags, void (*close)(NBDExport *), 1352 bool writethrough, BlockBackend *on_eject_blk, 1353 Error **errp) 1354 { 1355 AioContext *ctx; 1356 BlockBackend *blk; 1357 NBDExport *exp = g_new0(NBDExport, 1); 1358 uint64_t perm; 1359 int ret; 1360 1361 /* 1362 * NBD exports are used for non-shared storage migration. Make sure 1363 * that BDRV_O_INACTIVE is cleared and the image is ready for write 1364 * access since the export could be available before migration handover. 1365 */ 1366 ctx = bdrv_get_aio_context(bs); 1367 aio_context_acquire(ctx); 1368 bdrv_invalidate_cache(bs, NULL); 1369 aio_context_release(ctx); 1370 1371 /* Don't allow resize while the NBD server is running, otherwise we don't 1372 * care what happens with the node. */ 1373 perm = BLK_PERM_CONSISTENT_READ; 1374 if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { 1375 perm |= BLK_PERM_WRITE; 1376 } 1377 blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | 1378 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); 1379 ret = blk_insert_bs(blk, bs, errp); 1380 if (ret < 0) { 1381 goto fail; 1382 } 1383 blk_set_enable_write_cache(blk, !writethrough); 1384 1385 exp->refcount = 1; 1386 QTAILQ_INIT(&exp->clients); 1387 exp->blk = blk; 1388 exp->dev_offset = dev_offset; 1389 exp->nbdflags = nbdflags; 1390 exp->size = size < 0 ? blk_getlength(blk) : size; 1391 if (exp->size < 0) { 1392 error_setg_errno(errp, -exp->size, 1393 "Failed to determine the NBD export's length"); 1394 goto fail; 1395 } 1396 exp->size -= exp->size % BDRV_SECTOR_SIZE; 1397 1398 exp->close = close; 1399 exp->ctx = blk_get_aio_context(blk); 1400 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 1401 1402 if (on_eject_blk) { 1403 blk_ref(on_eject_blk); 1404 exp->eject_notifier_blk = on_eject_blk; 1405 exp->eject_notifier.notify = nbd_eject_notifier; 1406 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier); 1407 } 1408 return exp; 1409 1410 fail: 1411 blk_unref(blk); 1412 g_free(exp); 1413 return NULL; 1414 } 1415 1416 NBDExport *nbd_export_find(const char *name) 1417 { 1418 NBDExport *exp; 1419 QTAILQ_FOREACH(exp, &exports, next) { 1420 if (strcmp(name, exp->name) == 0) { 1421 return exp; 1422 } 1423 } 1424 1425 return NULL; 1426 } 1427 1428 void nbd_export_set_name(NBDExport *exp, const char *name) 1429 { 1430 if (exp->name == name) { 1431 return; 1432 } 1433 1434 nbd_export_get(exp); 1435 if (exp->name != NULL) { 1436 g_free(exp->name); 1437 exp->name = NULL; 1438 QTAILQ_REMOVE(&exports, exp, next); 1439 nbd_export_put(exp); 1440 } 1441 if (name != NULL) { 1442 nbd_export_get(exp); 1443 exp->name = g_strdup(name); 1444 QTAILQ_INSERT_TAIL(&exports, exp, next); 1445 } 1446 nbd_export_put(exp); 1447 } 1448 1449 void nbd_export_set_description(NBDExport *exp, const char *description) 1450 { 1451 g_free(exp->description); 1452 exp->description = g_strdup(description); 1453 } 1454 1455 void nbd_export_close(NBDExport *exp) 1456 { 1457 NBDClient *client, *next; 1458 1459 nbd_export_get(exp); 1460 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 1461 client_close(client, true); 1462 } 1463 nbd_export_set_name(exp, NULL); 1464 nbd_export_set_description(exp, NULL); 1465 nbd_export_put(exp); 1466 } 1467 1468 void nbd_export_remove(NBDExport *exp, NbdServerRemoveMode mode, Error **errp) 1469 { 1470 if (mode == NBD_SERVER_REMOVE_MODE_HARD || QTAILQ_EMPTY(&exp->clients)) { 1471 nbd_export_close(exp); 1472 return; 1473 } 1474 1475 assert(mode == NBD_SERVER_REMOVE_MODE_SAFE); 1476 1477 error_setg(errp, "export '%s' still in use", exp->name); 1478 error_append_hint(errp, "Use mode='hard' to force client disconnect\n"); 1479 } 1480 1481 void nbd_export_get(NBDExport *exp) 1482 { 1483 assert(exp->refcount > 0); 1484 exp->refcount++; 1485 } 1486 1487 void nbd_export_put(NBDExport *exp) 1488 { 1489 assert(exp->refcount > 0); 1490 if (exp->refcount == 1) { 1491 nbd_export_close(exp); 1492 } 1493 1494 /* nbd_export_close() may theoretically reduce refcount to 0. It may happen 1495 * if someone calls nbd_export_put() on named export not through 1496 * nbd_export_set_name() when refcount is 1. So, let's assert that 1497 * it is > 0. 1498 */ 1499 assert(exp->refcount > 0); 1500 if (--exp->refcount == 0) { 1501 assert(exp->name == NULL); 1502 assert(exp->description == NULL); 1503 1504 if (exp->close) { 1505 exp->close(exp); 1506 } 1507 1508 if (exp->blk) { 1509 if (exp->eject_notifier_blk) { 1510 notifier_remove(&exp->eject_notifier); 1511 blk_unref(exp->eject_notifier_blk); 1512 } 1513 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 1514 blk_aio_detach, exp); 1515 blk_unref(exp->blk); 1516 exp->blk = NULL; 1517 } 1518 1519 g_free(exp); 1520 } 1521 } 1522 1523 BlockBackend *nbd_export_get_blockdev(NBDExport *exp) 1524 { 1525 return exp->blk; 1526 } 1527 1528 void nbd_export_close_all(void) 1529 { 1530 NBDExport *exp, *next; 1531 1532 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { 1533 nbd_export_close(exp); 1534 } 1535 } 1536 1537 static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov, 1538 unsigned niov, Error **errp) 1539 { 1540 int ret; 1541 1542 g_assert(qemu_in_coroutine()); 1543 qemu_co_mutex_lock(&client->send_lock); 1544 client->send_coroutine = qemu_coroutine_self(); 1545 1546 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0; 1547 1548 client->send_coroutine = NULL; 1549 qemu_co_mutex_unlock(&client->send_lock); 1550 1551 return ret; 1552 } 1553 1554 static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error, 1555 uint64_t handle) 1556 { 1557 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC); 1558 stl_be_p(&reply->error, error); 1559 stq_be_p(&reply->handle, handle); 1560 } 1561 1562 static int nbd_co_send_simple_reply(NBDClient *client, 1563 uint64_t handle, 1564 uint32_t error, 1565 void *data, 1566 size_t len, 1567 Error **errp) 1568 { 1569 NBDSimpleReply reply; 1570 int nbd_err = system_errno_to_nbd_errno(error); 1571 struct iovec iov[] = { 1572 {.iov_base = &reply, .iov_len = sizeof(reply)}, 1573 {.iov_base = data, .iov_len = len} 1574 }; 1575 1576 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err), 1577 len); 1578 set_be_simple_reply(&reply, nbd_err, handle); 1579 1580 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp); 1581 } 1582 1583 static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags, 1584 uint16_t type, uint64_t handle, uint32_t length) 1585 { 1586 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC); 1587 stw_be_p(&chunk->flags, flags); 1588 stw_be_p(&chunk->type, type); 1589 stq_be_p(&chunk->handle, handle); 1590 stl_be_p(&chunk->length, length); 1591 } 1592 1593 static int coroutine_fn nbd_co_send_structured_done(NBDClient *client, 1594 uint64_t handle, 1595 Error **errp) 1596 { 1597 NBDStructuredReplyChunk chunk; 1598 struct iovec iov[] = { 1599 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1600 }; 1601 1602 trace_nbd_co_send_structured_done(handle); 1603 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0); 1604 1605 return nbd_co_send_iov(client, iov, 1, errp); 1606 } 1607 1608 static int coroutine_fn nbd_co_send_structured_read(NBDClient *client, 1609 uint64_t handle, 1610 uint64_t offset, 1611 void *data, 1612 size_t size, 1613 bool final, 1614 Error **errp) 1615 { 1616 NBDStructuredReadData chunk; 1617 struct iovec iov[] = { 1618 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1619 {.iov_base = data, .iov_len = size} 1620 }; 1621 1622 assert(size); 1623 trace_nbd_co_send_structured_read(handle, offset, data, size); 1624 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0, 1625 NBD_REPLY_TYPE_OFFSET_DATA, handle, 1626 sizeof(chunk) - sizeof(chunk.h) + size); 1627 stq_be_p(&chunk.offset, offset); 1628 1629 return nbd_co_send_iov(client, iov, 2, errp); 1630 } 1631 1632 static int coroutine_fn nbd_co_send_structured_error(NBDClient *client, 1633 uint64_t handle, 1634 uint32_t error, 1635 const char *msg, 1636 Error **errp) 1637 { 1638 NBDStructuredError chunk; 1639 int nbd_err = system_errno_to_nbd_errno(error); 1640 struct iovec iov[] = { 1641 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1642 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0}, 1643 }; 1644 1645 assert(nbd_err); 1646 trace_nbd_co_send_structured_error(handle, nbd_err, 1647 nbd_err_lookup(nbd_err), msg ? msg : ""); 1648 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle, 1649 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); 1650 stl_be_p(&chunk.error, nbd_err); 1651 stw_be_p(&chunk.message_length, iov[1].iov_len); 1652 1653 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp); 1654 } 1655 1656 /* Do a sparse read and send the structured reply to the client. 1657 * Returns -errno if sending fails. bdrv_block_status_above() failure is 1658 * reported to the client, at which point this function succeeds. 1659 */ 1660 static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, 1661 uint64_t handle, 1662 uint64_t offset, 1663 uint8_t *data, 1664 size_t size, 1665 Error **errp) 1666 { 1667 int ret = 0; 1668 NBDExport *exp = client->exp; 1669 size_t progress = 0; 1670 1671 while (progress < size) { 1672 int64_t pnum; 1673 int status = bdrv_block_status_above(blk_bs(exp->blk), NULL, 1674 offset + progress, 1675 size - progress, &pnum, NULL, 1676 NULL); 1677 bool final; 1678 1679 if (status < 0) { 1680 char *msg = g_strdup_printf("unable to check for holes: %s", 1681 strerror(-status)); 1682 1683 ret = nbd_co_send_structured_error(client, handle, -status, msg, 1684 errp); 1685 g_free(msg); 1686 return ret; 1687 } 1688 assert(pnum && pnum <= size - progress); 1689 final = progress + pnum == size; 1690 if (status & BDRV_BLOCK_ZERO) { 1691 NBDStructuredReadHole chunk; 1692 struct iovec iov[] = { 1693 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1694 }; 1695 1696 trace_nbd_co_send_structured_read_hole(handle, offset + progress, 1697 pnum); 1698 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0, 1699 NBD_REPLY_TYPE_OFFSET_HOLE, 1700 handle, sizeof(chunk) - sizeof(chunk.h)); 1701 stq_be_p(&chunk.offset, offset + progress); 1702 stl_be_p(&chunk.length, pnum); 1703 ret = nbd_co_send_iov(client, iov, 1, errp); 1704 } else { 1705 ret = blk_pread(exp->blk, offset + progress + exp->dev_offset, 1706 data + progress, pnum); 1707 if (ret < 0) { 1708 error_setg_errno(errp, -ret, "reading from file failed"); 1709 break; 1710 } 1711 ret = nbd_co_send_structured_read(client, handle, offset + progress, 1712 data + progress, pnum, final, 1713 errp); 1714 } 1715 1716 if (ret < 0) { 1717 break; 1718 } 1719 progress += pnum; 1720 } 1721 return ret; 1722 } 1723 1724 static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset, 1725 uint64_t bytes, NBDExtent *extent) 1726 { 1727 uint64_t remaining_bytes = bytes; 1728 1729 while (remaining_bytes) { 1730 uint32_t flags; 1731 int64_t num; 1732 int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes, 1733 &num, NULL, NULL); 1734 if (ret < 0) { 1735 return ret; 1736 } 1737 1738 flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) | 1739 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0); 1740 1741 if (remaining_bytes == bytes) { 1742 extent->flags = flags; 1743 } 1744 1745 if (flags != extent->flags) { 1746 break; 1747 } 1748 1749 offset += num; 1750 remaining_bytes -= num; 1751 } 1752 1753 cpu_to_be32s(&extent->flags); 1754 extent->length = cpu_to_be32(bytes - remaining_bytes); 1755 1756 return 0; 1757 } 1758 1759 /* nbd_co_send_extents 1760 * @extents should be in big-endian */ 1761 static int nbd_co_send_extents(NBDClient *client, uint64_t handle, 1762 NBDExtent *extents, unsigned nb_extents, 1763 uint32_t context_id, Error **errp) 1764 { 1765 NBDStructuredMeta chunk; 1766 1767 struct iovec iov[] = { 1768 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1769 {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])} 1770 }; 1771 1772 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS, 1773 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); 1774 stl_be_p(&chunk.context_id, context_id); 1775 1776 return nbd_co_send_iov(client, iov, 2, errp); 1777 } 1778 1779 /* Get block status from the exported device and send it to the client */ 1780 static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, 1781 BlockDriverState *bs, uint64_t offset, 1782 uint64_t length, uint32_t context_id, 1783 Error **errp) 1784 { 1785 int ret; 1786 NBDExtent extent; 1787 1788 ret = blockstatus_to_extent_be(bs, offset, length, &extent); 1789 if (ret < 0) { 1790 return nbd_co_send_structured_error( 1791 client, handle, -ret, "can't get block status", errp); 1792 } 1793 1794 return nbd_co_send_extents(client, handle, &extent, 1, context_id, errp); 1795 } 1796 1797 /* nbd_co_receive_request 1798 * Collect a client request. Return 0 if request looks valid, -EIO to drop 1799 * connection right away, and any other negative value to report an error to 1800 * the client (although the caller may still need to disconnect after reporting 1801 * the error). 1802 */ 1803 static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, 1804 Error **errp) 1805 { 1806 NBDClient *client = req->client; 1807 int valid_flags; 1808 1809 g_assert(qemu_in_coroutine()); 1810 assert(client->recv_coroutine == qemu_coroutine_self()); 1811 if (nbd_receive_request(client->ioc, request, errp) < 0) { 1812 return -EIO; 1813 } 1814 1815 trace_nbd_co_receive_request_decode_type(request->handle, request->type, 1816 nbd_cmd_lookup(request->type)); 1817 1818 if (request->type != NBD_CMD_WRITE) { 1819 /* No payload, we are ready to read the next request. */ 1820 req->complete = true; 1821 } 1822 1823 if (request->type == NBD_CMD_DISC) { 1824 /* Special case: we're going to disconnect without a reply, 1825 * whether or not flags, from, or len are bogus */ 1826 return -EIO; 1827 } 1828 1829 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { 1830 if (request->len > NBD_MAX_BUFFER_SIZE) { 1831 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", 1832 request->len, NBD_MAX_BUFFER_SIZE); 1833 return -EINVAL; 1834 } 1835 1836 req->data = blk_try_blockalign(client->exp->blk, request->len); 1837 if (req->data == NULL) { 1838 error_setg(errp, "No memory"); 1839 return -ENOMEM; 1840 } 1841 } 1842 if (request->type == NBD_CMD_WRITE) { 1843 if (nbd_read(client->ioc, req->data, request->len, errp) < 0) { 1844 error_prepend(errp, "reading from socket failed: "); 1845 return -EIO; 1846 } 1847 req->complete = true; 1848 1849 trace_nbd_co_receive_request_payload_received(request->handle, 1850 request->len); 1851 } 1852 1853 /* Sanity checks. */ 1854 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY && 1855 (request->type == NBD_CMD_WRITE || 1856 request->type == NBD_CMD_WRITE_ZEROES || 1857 request->type == NBD_CMD_TRIM)) { 1858 error_setg(errp, "Export is read-only"); 1859 return -EROFS; 1860 } 1861 if (request->from > client->exp->size || 1862 request->from + request->len > client->exp->size) { 1863 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 1864 ", Size: %" PRIu64, request->from, request->len, 1865 (uint64_t)client->exp->size); 1866 return (request->type == NBD_CMD_WRITE || 1867 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL; 1868 } 1869 valid_flags = NBD_CMD_FLAG_FUA; 1870 if (request->type == NBD_CMD_READ && client->structured_reply) { 1871 valid_flags |= NBD_CMD_FLAG_DF; 1872 } else if (request->type == NBD_CMD_WRITE_ZEROES) { 1873 valid_flags |= NBD_CMD_FLAG_NO_HOLE; 1874 } else if (request->type == NBD_CMD_BLOCK_STATUS) { 1875 valid_flags |= NBD_CMD_FLAG_REQ_ONE; 1876 } 1877 if (request->flags & ~valid_flags) { 1878 error_setg(errp, "unsupported flags for command %s (got 0x%x)", 1879 nbd_cmd_lookup(request->type), request->flags); 1880 return -EINVAL; 1881 } 1882 1883 return 0; 1884 } 1885 1886 /* Send simple reply without a payload, or a structured error 1887 * @error_msg is ignored if @ret >= 0 1888 * Returns 0 if connection is still live, -errno on failure to talk to client 1889 */ 1890 static coroutine_fn int nbd_send_generic_reply(NBDClient *client, 1891 uint64_t handle, 1892 int ret, 1893 const char *error_msg, 1894 Error **errp) 1895 { 1896 if (client->structured_reply && ret < 0) { 1897 return nbd_co_send_structured_error(client, handle, -ret, error_msg, 1898 errp); 1899 } else { 1900 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0, 1901 NULL, 0, errp); 1902 } 1903 } 1904 1905 /* Handle NBD_CMD_READ request. 1906 * Return -errno if sending fails. Other errors are reported directly to the 1907 * client as an error reply. */ 1908 static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, 1909 uint8_t *data, Error **errp) 1910 { 1911 int ret; 1912 NBDExport *exp = client->exp; 1913 1914 assert(request->type == NBD_CMD_READ); 1915 1916 /* XXX: NBD Protocol only documents use of FUA with WRITE */ 1917 if (request->flags & NBD_CMD_FLAG_FUA) { 1918 ret = blk_co_flush(exp->blk); 1919 if (ret < 0) { 1920 return nbd_send_generic_reply(client, request->handle, ret, 1921 "flush failed", errp); 1922 } 1923 } 1924 1925 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) && 1926 request->len) { 1927 return nbd_co_send_sparse_read(client, request->handle, request->from, 1928 data, request->len, errp); 1929 } 1930 1931 ret = blk_pread(exp->blk, request->from + exp->dev_offset, data, 1932 request->len); 1933 if (ret < 0) { 1934 return nbd_send_generic_reply(client, request->handle, ret, 1935 "reading from file failed", errp); 1936 } 1937 1938 if (client->structured_reply) { 1939 if (request->len) { 1940 return nbd_co_send_structured_read(client, request->handle, 1941 request->from, data, 1942 request->len, true, errp); 1943 } else { 1944 return nbd_co_send_structured_done(client, request->handle, errp); 1945 } 1946 } else { 1947 return nbd_co_send_simple_reply(client, request->handle, 0, 1948 data, request->len, errp); 1949 } 1950 } 1951 1952 /* Handle NBD request. 1953 * Return -errno if sending fails. Other errors are reported directly to the 1954 * client as an error reply. */ 1955 static coroutine_fn int nbd_handle_request(NBDClient *client, 1956 NBDRequest *request, 1957 uint8_t *data, Error **errp) 1958 { 1959 int ret; 1960 int flags; 1961 NBDExport *exp = client->exp; 1962 char *msg; 1963 1964 switch (request->type) { 1965 case NBD_CMD_READ: 1966 return nbd_do_cmd_read(client, request, data, errp); 1967 1968 case NBD_CMD_WRITE: 1969 flags = 0; 1970 if (request->flags & NBD_CMD_FLAG_FUA) { 1971 flags |= BDRV_REQ_FUA; 1972 } 1973 ret = blk_pwrite(exp->blk, request->from + exp->dev_offset, 1974 data, request->len, flags); 1975 return nbd_send_generic_reply(client, request->handle, ret, 1976 "writing to file failed", errp); 1977 1978 case NBD_CMD_WRITE_ZEROES: 1979 flags = 0; 1980 if (request->flags & NBD_CMD_FLAG_FUA) { 1981 flags |= BDRV_REQ_FUA; 1982 } 1983 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) { 1984 flags |= BDRV_REQ_MAY_UNMAP; 1985 } 1986 ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset, 1987 request->len, flags); 1988 return nbd_send_generic_reply(client, request->handle, ret, 1989 "writing to file failed", errp); 1990 1991 case NBD_CMD_DISC: 1992 /* unreachable, thanks to special case in nbd_co_receive_request() */ 1993 abort(); 1994 1995 case NBD_CMD_FLUSH: 1996 ret = blk_co_flush(exp->blk); 1997 return nbd_send_generic_reply(client, request->handle, ret, 1998 "flush failed", errp); 1999 2000 case NBD_CMD_TRIM: 2001 ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset, 2002 request->len); 2003 if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) { 2004 ret = blk_co_flush(exp->blk); 2005 } 2006 return nbd_send_generic_reply(client, request->handle, ret, 2007 "discard failed", errp); 2008 2009 case NBD_CMD_BLOCK_STATUS: 2010 if (client->export_meta.valid && client->export_meta.base_allocation) { 2011 return nbd_co_send_block_status(client, request->handle, 2012 blk_bs(exp->blk), request->from, 2013 request->len, 2014 NBD_META_ID_BASE_ALLOCATION, errp); 2015 } else { 2016 return nbd_send_generic_reply(client, request->handle, -EINVAL, 2017 "CMD_BLOCK_STATUS not negotiated", 2018 errp); 2019 } 2020 2021 default: 2022 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received", 2023 request->type); 2024 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg, 2025 errp); 2026 g_free(msg); 2027 return ret; 2028 } 2029 } 2030 2031 /* Owns a reference to the NBDClient passed as opaque. */ 2032 static coroutine_fn void nbd_trip(void *opaque) 2033 { 2034 NBDClient *client = opaque; 2035 NBDRequestData *req; 2036 NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ 2037 int ret; 2038 Error *local_err = NULL; 2039 2040 trace_nbd_trip(); 2041 if (client->closing) { 2042 nbd_client_put(client); 2043 return; 2044 } 2045 2046 req = nbd_request_get(client); 2047 ret = nbd_co_receive_request(req, &request, &local_err); 2048 client->recv_coroutine = NULL; 2049 2050 if (client->closing) { 2051 /* 2052 * The client may be closed when we are blocked in 2053 * nbd_co_receive_request() 2054 */ 2055 goto done; 2056 } 2057 2058 nbd_client_receive_next_request(client); 2059 if (ret == -EIO) { 2060 goto disconnect; 2061 } 2062 2063 if (ret < 0) { 2064 /* It wans't -EIO, so, according to nbd_co_receive_request() 2065 * semantics, we should return the error to the client. */ 2066 Error *export_err = local_err; 2067 2068 local_err = NULL; 2069 ret = nbd_send_generic_reply(client, request.handle, -EINVAL, 2070 error_get_pretty(export_err), &local_err); 2071 error_free(export_err); 2072 } else { 2073 ret = nbd_handle_request(client, &request, req->data, &local_err); 2074 } 2075 if (ret < 0) { 2076 error_prepend(&local_err, "Failed to send reply: "); 2077 goto disconnect; 2078 } 2079 2080 /* We must disconnect after NBD_CMD_WRITE if we did not 2081 * read the payload. 2082 */ 2083 if (!req->complete) { 2084 error_setg(&local_err, "Request handling failed in intermediate state"); 2085 goto disconnect; 2086 } 2087 2088 done: 2089 nbd_request_put(req); 2090 nbd_client_put(client); 2091 return; 2092 2093 disconnect: 2094 if (local_err) { 2095 error_reportf_err(local_err, "Disconnect client, due to: "); 2096 } 2097 nbd_request_put(req); 2098 client_close(client, true); 2099 nbd_client_put(client); 2100 } 2101 2102 static void nbd_client_receive_next_request(NBDClient *client) 2103 { 2104 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) { 2105 nbd_client_get(client); 2106 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); 2107 aio_co_schedule(client->exp->ctx, client->recv_coroutine); 2108 } 2109 } 2110 2111 static coroutine_fn void nbd_co_client_start(void *opaque) 2112 { 2113 NBDClient *client = opaque; 2114 NBDExport *exp = client->exp; 2115 Error *local_err = NULL; 2116 2117 if (exp) { 2118 nbd_export_get(exp); 2119 QTAILQ_INSERT_TAIL(&exp->clients, client, next); 2120 } 2121 qemu_co_mutex_init(&client->send_lock); 2122 2123 if (nbd_negotiate(client, &local_err)) { 2124 if (local_err) { 2125 error_report_err(local_err); 2126 } 2127 client_close(client, false); 2128 return; 2129 } 2130 2131 nbd_client_receive_next_request(client); 2132 } 2133 2134 /* 2135 * Create a new client listener on the given export @exp, using the 2136 * given channel @sioc. Begin servicing it in a coroutine. When the 2137 * connection closes, call @close_fn with an indication of whether the 2138 * client completed negotiation. 2139 */ 2140 void nbd_client_new(NBDExport *exp, 2141 QIOChannelSocket *sioc, 2142 QCryptoTLSCreds *tlscreds, 2143 const char *tlsaclname, 2144 void (*close_fn)(NBDClient *, bool)) 2145 { 2146 NBDClient *client; 2147 Coroutine *co; 2148 2149 client = g_new0(NBDClient, 1); 2150 client->refcount = 1; 2151 client->exp = exp; 2152 client->tlscreds = tlscreds; 2153 if (tlscreds) { 2154 object_ref(OBJECT(client->tlscreds)); 2155 } 2156 client->tlsaclname = g_strdup(tlsaclname); 2157 client->sioc = sioc; 2158 object_ref(OBJECT(client->sioc)); 2159 client->ioc = QIO_CHANNEL(sioc); 2160 object_ref(OBJECT(client->ioc)); 2161 client->close_fn = close_fn; 2162 2163 co = qemu_coroutine_create(nbd_co_client_start, client); 2164 qemu_coroutine_enter(co); 2165 } 2166