1 /* 2 * Copyright (C) 2016-2021 Red Hat, Inc. 3 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 4 * 5 * Network Block Device Server Side 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; under version 2 of the License. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 22 #include "block/export.h" 23 #include "qapi/error.h" 24 #include "qemu/queue.h" 25 #include "trace.h" 26 #include "nbd-internal.h" 27 #include "qemu/units.h" 28 29 #define NBD_META_ID_BASE_ALLOCATION 0 30 #define NBD_META_ID_ALLOCATION_DEPTH 1 31 /* Dirty bitmaps use 'NBD_META_ID_DIRTY_BITMAP + i', so keep this id last. */ 32 #define NBD_META_ID_DIRTY_BITMAP 2 33 34 /* 35 * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical 36 * constant. If an increase is needed, note that the NBD protocol 37 * recommends no larger than 32 mb, so that the client won't consider 38 * the reply as a denial of service attack. 39 */ 40 #define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8) 41 42 static int system_errno_to_nbd_errno(int err) 43 { 44 switch (err) { 45 case 0: 46 return NBD_SUCCESS; 47 case EPERM: 48 case EROFS: 49 return NBD_EPERM; 50 case EIO: 51 return NBD_EIO; 52 case ENOMEM: 53 return NBD_ENOMEM; 54 #ifdef EDQUOT 55 case EDQUOT: 56 #endif 57 case EFBIG: 58 case ENOSPC: 59 return NBD_ENOSPC; 60 case EOVERFLOW: 61 return NBD_EOVERFLOW; 62 case ENOTSUP: 63 #if ENOTSUP != EOPNOTSUPP 64 case EOPNOTSUPP: 65 #endif 66 return NBD_ENOTSUP; 67 case ESHUTDOWN: 68 return NBD_ESHUTDOWN; 69 case EINVAL: 70 default: 71 return NBD_EINVAL; 72 } 73 } 74 75 /* Definitions for opaque data types */ 76 77 typedef struct NBDRequestData NBDRequestData; 78 79 struct NBDRequestData { 80 NBDClient *client; 81 uint8_t *data; 82 bool complete; 83 }; 84 85 struct NBDExport { 86 BlockExport common; 87 88 char *name; 89 char *description; 90 uint64_t size; 91 uint16_t nbdflags; 92 QTAILQ_HEAD(, NBDClient) clients; 93 QTAILQ_ENTRY(NBDExport) next; 94 95 BlockBackend *eject_notifier_blk; 96 Notifier eject_notifier; 97 98 bool allocation_depth; 99 BdrvDirtyBitmap **export_bitmaps; 100 size_t nr_export_bitmaps; 101 }; 102 103 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 104 105 /* NBDExportMetaContexts represents a list of contexts to be exported, 106 * as selected by NBD_OPT_SET_META_CONTEXT. Also used for 107 * NBD_OPT_LIST_META_CONTEXT. */ 108 typedef struct NBDExportMetaContexts { 109 NBDExport *exp; 110 size_t count; /* number of negotiated contexts */ 111 bool base_allocation; /* export base:allocation context (block status) */ 112 bool allocation_depth; /* export qemu:allocation-depth */ 113 bool *bitmaps; /* 114 * export qemu:dirty-bitmap:<export bitmap name>, 115 * sized by exp->nr_export_bitmaps 116 */ 117 } NBDExportMetaContexts; 118 119 struct NBDClient { 120 int refcount; 121 void (*close_fn)(NBDClient *client, bool negotiated); 122 123 NBDExport *exp; 124 QCryptoTLSCreds *tlscreds; 125 char *tlsauthz; 126 QIOChannelSocket *sioc; /* The underlying data channel */ 127 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 128 129 Coroutine *recv_coroutine; 130 131 CoMutex send_lock; 132 Coroutine *send_coroutine; 133 134 bool read_yielding; 135 bool quiescing; 136 137 QTAILQ_ENTRY(NBDClient) next; 138 int nb_requests; 139 bool closing; 140 141 uint32_t check_align; /* If non-zero, check for aligned client requests */ 142 143 bool structured_reply; 144 NBDExportMetaContexts export_meta; 145 146 uint32_t opt; /* Current option being negotiated */ 147 uint32_t optlen; /* remaining length of data in ioc for the option being 148 negotiated now */ 149 }; 150 151 static void nbd_client_receive_next_request(NBDClient *client); 152 153 /* Basic flow for negotiation 154 155 Server Client 156 Negotiate 157 158 or 159 160 Server Client 161 Negotiate #1 162 Option 163 Negotiate #2 164 165 ---- 166 167 followed by 168 169 Server Client 170 Request 171 Response 172 Request 173 Response 174 ... 175 ... 176 Request (type == 2) 177 178 */ 179 180 static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option, 181 uint32_t type, uint32_t length) 182 { 183 stq_be_p(&rep->magic, NBD_REP_MAGIC); 184 stl_be_p(&rep->option, option); 185 stl_be_p(&rep->type, type); 186 stl_be_p(&rep->length, length); 187 } 188 189 /* Send a reply header, including length, but no payload. 190 * Return -errno on error, 0 on success. */ 191 static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, 192 uint32_t len, Error **errp) 193 { 194 NBDOptionReply rep; 195 196 trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt), 197 type, nbd_rep_lookup(type), len); 198 199 assert(len < NBD_MAX_BUFFER_SIZE); 200 201 set_be_option_rep(&rep, client->opt, type, len); 202 return nbd_write(client->ioc, &rep, sizeof(rep), errp); 203 } 204 205 /* Send a reply header with default 0 length. 206 * Return -errno on error, 0 on success. */ 207 static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type, 208 Error **errp) 209 { 210 return nbd_negotiate_send_rep_len(client, type, 0, errp); 211 } 212 213 /* Send an error reply. 214 * Return -errno on error, 0 on success. */ 215 static int GCC_FMT_ATTR(4, 0) 216 nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, 217 Error **errp, const char *fmt, va_list va) 218 { 219 ERRP_GUARD(); 220 g_autofree char *msg = NULL; 221 int ret; 222 size_t len; 223 224 msg = g_strdup_vprintf(fmt, va); 225 len = strlen(msg); 226 assert(len < NBD_MAX_STRING_SIZE); 227 trace_nbd_negotiate_send_rep_err(msg); 228 ret = nbd_negotiate_send_rep_len(client, type, len, errp); 229 if (ret < 0) { 230 return ret; 231 } 232 if (nbd_write(client->ioc, msg, len, errp) < 0) { 233 error_prepend(errp, "write failed (error message): "); 234 return -EIO; 235 } 236 237 return 0; 238 } 239 240 /* 241 * Return a malloc'd copy of @name suitable for use in an error reply. 242 */ 243 static char * 244 nbd_sanitize_name(const char *name) 245 { 246 if (strnlen(name, 80) < 80) { 247 return g_strdup(name); 248 } 249 /* XXX Should we also try to sanitize any control characters? */ 250 return g_strdup_printf("%.80s...", name); 251 } 252 253 /* Send an error reply. 254 * Return -errno on error, 0 on success. */ 255 static int GCC_FMT_ATTR(4, 5) 256 nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, 257 Error **errp, const char *fmt, ...) 258 { 259 va_list va; 260 int ret; 261 262 va_start(va, fmt); 263 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va); 264 va_end(va); 265 return ret; 266 } 267 268 /* Drop remainder of the current option, and send a reply with the 269 * given error type and message. Return -errno on read or write 270 * failure; or 0 if connection is still live. */ 271 static int GCC_FMT_ATTR(4, 0) 272 nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, 273 const char *fmt, va_list va) 274 { 275 int ret = nbd_drop(client->ioc, client->optlen, errp); 276 277 client->optlen = 0; 278 if (!ret) { 279 ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va); 280 } 281 return ret; 282 } 283 284 static int GCC_FMT_ATTR(4, 5) 285 nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, 286 const char *fmt, ...) 287 { 288 int ret; 289 va_list va; 290 291 va_start(va, fmt); 292 ret = nbd_opt_vdrop(client, type, errp, fmt, va); 293 va_end(va); 294 295 return ret; 296 } 297 298 static int GCC_FMT_ATTR(3, 4) 299 nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) 300 { 301 int ret; 302 va_list va; 303 304 va_start(va, fmt); 305 ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va); 306 va_end(va); 307 308 return ret; 309 } 310 311 /* Read size bytes from the unparsed payload of the current option. 312 * If @check_nul, require that no NUL bytes appear in buffer. 313 * Return -errno on I/O error, 0 if option was completely handled by 314 * sending a reply about inconsistent lengths, or 1 on success. */ 315 static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, 316 bool check_nul, Error **errp) 317 { 318 if (size > client->optlen) { 319 return nbd_opt_invalid(client, errp, 320 "Inconsistent lengths in option %s", 321 nbd_opt_lookup(client->opt)); 322 } 323 client->optlen -= size; 324 if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) { 325 return -EIO; 326 } 327 328 if (check_nul && strnlen(buffer, size) != size) { 329 return nbd_opt_invalid(client, errp, 330 "Unexpected embedded NUL in option %s", 331 nbd_opt_lookup(client->opt)); 332 } 333 return 1; 334 } 335 336 /* Drop size bytes from the unparsed payload of the current option. 337 * Return -errno on I/O error, 0 if option was completely handled by 338 * sending a reply about inconsistent lengths, or 1 on success. */ 339 static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) 340 { 341 if (size > client->optlen) { 342 return nbd_opt_invalid(client, errp, 343 "Inconsistent lengths in option %s", 344 nbd_opt_lookup(client->opt)); 345 } 346 client->optlen -= size; 347 return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1; 348 } 349 350 /* nbd_opt_read_name 351 * 352 * Read a string with the format: 353 * uint32_t len (<= NBD_MAX_STRING_SIZE) 354 * len bytes string (not 0-terminated) 355 * 356 * On success, @name will be allocated. 357 * If @length is non-null, it will be set to the actual string length. 358 * 359 * Return -errno on I/O error, 0 if option was completely handled by 360 * sending a reply about inconsistent lengths, or 1 on success. 361 */ 362 static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, 363 Error **errp) 364 { 365 int ret; 366 uint32_t len; 367 g_autofree char *local_name = NULL; 368 369 *name = NULL; 370 ret = nbd_opt_read(client, &len, sizeof(len), false, errp); 371 if (ret <= 0) { 372 return ret; 373 } 374 len = cpu_to_be32(len); 375 376 if (len > NBD_MAX_STRING_SIZE) { 377 return nbd_opt_invalid(client, errp, 378 "Invalid name length: %" PRIu32, len); 379 } 380 381 local_name = g_malloc(len + 1); 382 ret = nbd_opt_read(client, local_name, len, true, errp); 383 if (ret <= 0) { 384 return ret; 385 } 386 local_name[len] = '\0'; 387 388 if (length) { 389 *length = len; 390 } 391 *name = g_steal_pointer(&local_name); 392 393 return 1; 394 } 395 396 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. 397 * Return -errno on error, 0 on success. */ 398 static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, 399 Error **errp) 400 { 401 ERRP_GUARD(); 402 size_t name_len, desc_len; 403 uint32_t len; 404 const char *name = exp->name ? exp->name : ""; 405 const char *desc = exp->description ? exp->description : ""; 406 QIOChannel *ioc = client->ioc; 407 int ret; 408 409 trace_nbd_negotiate_send_rep_list(name, desc); 410 name_len = strlen(name); 411 desc_len = strlen(desc); 412 assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE); 413 len = name_len + desc_len + sizeof(len); 414 ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp); 415 if (ret < 0) { 416 return ret; 417 } 418 419 len = cpu_to_be32(name_len); 420 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) { 421 error_prepend(errp, "write failed (name length): "); 422 return -EINVAL; 423 } 424 425 if (nbd_write(ioc, name, name_len, errp) < 0) { 426 error_prepend(errp, "write failed (name buffer): "); 427 return -EINVAL; 428 } 429 430 if (nbd_write(ioc, desc, desc_len, errp) < 0) { 431 error_prepend(errp, "write failed (description buffer): "); 432 return -EINVAL; 433 } 434 435 return 0; 436 } 437 438 /* Process the NBD_OPT_LIST command, with a potential series of replies. 439 * Return -errno on error, 0 on success. */ 440 static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) 441 { 442 NBDExport *exp; 443 assert(client->opt == NBD_OPT_LIST); 444 445 /* For each export, send a NBD_REP_SERVER reply. */ 446 QTAILQ_FOREACH(exp, &exports, next) { 447 if (nbd_negotiate_send_rep_list(client, exp, errp)) { 448 return -EINVAL; 449 } 450 } 451 /* Finish with a NBD_REP_ACK. */ 452 return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 453 } 454 455 static void nbd_check_meta_export(NBDClient *client) 456 { 457 if (client->exp != client->export_meta.exp) { 458 client->export_meta.count = 0; 459 } 460 } 461 462 /* Send a reply to NBD_OPT_EXPORT_NAME. 463 * Return -errno on error, 0 on success. */ 464 static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, 465 Error **errp) 466 { 467 ERRP_GUARD(); 468 g_autofree char *name = NULL; 469 char buf[NBD_REPLY_EXPORT_NAME_SIZE] = ""; 470 size_t len; 471 int ret; 472 uint16_t myflags; 473 474 /* Client sends: 475 [20 .. xx] export name (length bytes) 476 Server replies: 477 [ 0 .. 7] size 478 [ 8 .. 9] export flags 479 [10 .. 133] reserved (0) [unless no_zeroes] 480 */ 481 trace_nbd_negotiate_handle_export_name(); 482 if (client->optlen > NBD_MAX_STRING_SIZE) { 483 error_setg(errp, "Bad length received"); 484 return -EINVAL; 485 } 486 name = g_malloc(client->optlen + 1); 487 if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) { 488 return -EIO; 489 } 490 name[client->optlen] = '\0'; 491 client->optlen = 0; 492 493 trace_nbd_negotiate_handle_export_name_request(name); 494 495 client->exp = nbd_export_find(name); 496 if (!client->exp) { 497 error_setg(errp, "export not found"); 498 return -EINVAL; 499 } 500 501 myflags = client->exp->nbdflags; 502 if (client->structured_reply) { 503 myflags |= NBD_FLAG_SEND_DF; 504 } 505 trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags); 506 stq_be_p(buf, client->exp->size); 507 stw_be_p(buf + 8, myflags); 508 len = no_zeroes ? 10 : sizeof(buf); 509 ret = nbd_write(client->ioc, buf, len, errp); 510 if (ret < 0) { 511 error_prepend(errp, "write failed: "); 512 return ret; 513 } 514 515 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 516 blk_exp_ref(&client->exp->common); 517 nbd_check_meta_export(client); 518 519 return 0; 520 } 521 522 /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes. 523 * The buffer does NOT include the info type prefix. 524 * Return -errno on error, 0 if ready to send more. */ 525 static int nbd_negotiate_send_info(NBDClient *client, 526 uint16_t info, uint32_t length, void *buf, 527 Error **errp) 528 { 529 int rc; 530 531 trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length); 532 rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO, 533 sizeof(info) + length, errp); 534 if (rc < 0) { 535 return rc; 536 } 537 info = cpu_to_be16(info); 538 if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) { 539 return -EIO; 540 } 541 if (nbd_write(client->ioc, buf, length, errp) < 0) { 542 return -EIO; 543 } 544 return 0; 545 } 546 547 /* nbd_reject_length: Handle any unexpected payload. 548 * @fatal requests that we quit talking to the client, even if we are able 549 * to successfully send an error reply. 550 * Return: 551 * -errno transmission error occurred or @fatal was requested, errp is set 552 * 0 error message successfully sent to client, errp is not set 553 */ 554 static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) 555 { 556 int ret; 557 558 assert(client->optlen); 559 ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length", 560 nbd_opt_lookup(client->opt)); 561 if (fatal && !ret) { 562 error_setg(errp, "option '%s' has unexpected length", 563 nbd_opt_lookup(client->opt)); 564 return -EINVAL; 565 } 566 return ret; 567 } 568 569 /* Handle NBD_OPT_INFO and NBD_OPT_GO. 570 * Return -errno on error, 0 if ready for next option, and 1 to move 571 * into transmission phase. */ 572 static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) 573 { 574 int rc; 575 g_autofree char *name = NULL; 576 NBDExport *exp; 577 uint16_t requests; 578 uint16_t request; 579 uint32_t namelen = 0; 580 bool sendname = false; 581 bool blocksize = false; 582 uint32_t sizes[3]; 583 char buf[sizeof(uint64_t) + sizeof(uint16_t)]; 584 uint32_t check_align = 0; 585 uint16_t myflags; 586 587 /* Client sends: 588 4 bytes: L, name length (can be 0) 589 L bytes: export name 590 2 bytes: N, number of requests (can be 0) 591 N * 2 bytes: N requests 592 */ 593 rc = nbd_opt_read_name(client, &name, &namelen, errp); 594 if (rc <= 0) { 595 return rc; 596 } 597 trace_nbd_negotiate_handle_export_name_request(name); 598 599 rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp); 600 if (rc <= 0) { 601 return rc; 602 } 603 requests = be16_to_cpu(requests); 604 trace_nbd_negotiate_handle_info_requests(requests); 605 while (requests--) { 606 rc = nbd_opt_read(client, &request, sizeof(request), false, errp); 607 if (rc <= 0) { 608 return rc; 609 } 610 request = be16_to_cpu(request); 611 trace_nbd_negotiate_handle_info_request(request, 612 nbd_info_lookup(request)); 613 /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE; 614 * everything else is either a request we don't know or 615 * something we send regardless of request */ 616 switch (request) { 617 case NBD_INFO_NAME: 618 sendname = true; 619 break; 620 case NBD_INFO_BLOCK_SIZE: 621 blocksize = true; 622 break; 623 } 624 } 625 if (client->optlen) { 626 return nbd_reject_length(client, false, errp); 627 } 628 629 exp = nbd_export_find(name); 630 if (!exp) { 631 g_autofree char *sane_name = nbd_sanitize_name(name); 632 633 return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN, 634 errp, "export '%s' not present", 635 sane_name); 636 } 637 638 /* Don't bother sending NBD_INFO_NAME unless client requested it */ 639 if (sendname) { 640 rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name, 641 errp); 642 if (rc < 0) { 643 return rc; 644 } 645 } 646 647 /* Send NBD_INFO_DESCRIPTION only if available, regardless of 648 * client request */ 649 if (exp->description) { 650 size_t len = strlen(exp->description); 651 652 assert(len <= NBD_MAX_STRING_SIZE); 653 rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION, 654 len, exp->description, errp); 655 if (rc < 0) { 656 return rc; 657 } 658 } 659 660 /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size 661 * according to whether the client requested it, and according to 662 * whether this is OPT_INFO or OPT_GO. */ 663 /* minimum - 1 for back-compat, or actual if client will obey it. */ 664 if (client->opt == NBD_OPT_INFO || blocksize) { 665 check_align = sizes[0] = blk_get_request_alignment(exp->common.blk); 666 } else { 667 sizes[0] = 1; 668 } 669 assert(sizes[0] <= NBD_MAX_BUFFER_SIZE); 670 /* preferred - Hard-code to 4096 for now. 671 * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */ 672 sizes[1] = MAX(4096, sizes[0]); 673 /* maximum - At most 32M, but smaller as appropriate. */ 674 sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE); 675 trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]); 676 sizes[0] = cpu_to_be32(sizes[0]); 677 sizes[1] = cpu_to_be32(sizes[1]); 678 sizes[2] = cpu_to_be32(sizes[2]); 679 rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE, 680 sizeof(sizes), sizes, errp); 681 if (rc < 0) { 682 return rc; 683 } 684 685 /* Send NBD_INFO_EXPORT always */ 686 myflags = exp->nbdflags; 687 if (client->structured_reply) { 688 myflags |= NBD_FLAG_SEND_DF; 689 } 690 trace_nbd_negotiate_new_style_size_flags(exp->size, myflags); 691 stq_be_p(buf, exp->size); 692 stw_be_p(buf + 8, myflags); 693 rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT, 694 sizeof(buf), buf, errp); 695 if (rc < 0) { 696 return rc; 697 } 698 699 /* 700 * If the client is just asking for NBD_OPT_INFO, but forgot to 701 * request block sizes in a situation that would impact 702 * performance, then return an error. But for NBD_OPT_GO, we 703 * tolerate all clients, regardless of alignments. 704 */ 705 if (client->opt == NBD_OPT_INFO && !blocksize && 706 blk_get_request_alignment(exp->common.blk) > 1) { 707 return nbd_negotiate_send_rep_err(client, 708 NBD_REP_ERR_BLOCK_SIZE_REQD, 709 errp, 710 "request NBD_INFO_BLOCK_SIZE to " 711 "use this export"); 712 } 713 714 /* Final reply */ 715 rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 716 if (rc < 0) { 717 return rc; 718 } 719 720 if (client->opt == NBD_OPT_GO) { 721 client->exp = exp; 722 client->check_align = check_align; 723 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 724 blk_exp_ref(&client->exp->common); 725 nbd_check_meta_export(client); 726 rc = 1; 727 } 728 return rc; 729 } 730 731 732 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the 733 * new channel for all further (now-encrypted) communication. */ 734 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 735 Error **errp) 736 { 737 QIOChannel *ioc; 738 QIOChannelTLS *tioc; 739 struct NBDTLSHandshakeData data = { 0 }; 740 741 assert(client->opt == NBD_OPT_STARTTLS); 742 743 trace_nbd_negotiate_handle_starttls(); 744 ioc = client->ioc; 745 746 if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) { 747 return NULL; 748 } 749 750 tioc = qio_channel_tls_new_server(ioc, 751 client->tlscreds, 752 client->tlsauthz, 753 errp); 754 if (!tioc) { 755 return NULL; 756 } 757 758 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); 759 trace_nbd_negotiate_handle_starttls_handshake(); 760 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 761 qio_channel_tls_handshake(tioc, 762 nbd_tls_handshake, 763 &data, 764 NULL, 765 NULL); 766 767 if (!data.complete) { 768 g_main_loop_run(data.loop); 769 } 770 g_main_loop_unref(data.loop); 771 if (data.error) { 772 object_unref(OBJECT(tioc)); 773 error_propagate(errp, data.error); 774 return NULL; 775 } 776 777 return QIO_CHANNEL(tioc); 778 } 779 780 /* nbd_negotiate_send_meta_context 781 * 782 * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT 783 * 784 * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead. 785 */ 786 static int nbd_negotiate_send_meta_context(NBDClient *client, 787 const char *context, 788 uint32_t context_id, 789 Error **errp) 790 { 791 NBDOptionReplyMetaContext opt; 792 struct iovec iov[] = { 793 {.iov_base = &opt, .iov_len = sizeof(opt)}, 794 {.iov_base = (void *)context, .iov_len = strlen(context)} 795 }; 796 797 assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE); 798 if (client->opt == NBD_OPT_LIST_META_CONTEXT) { 799 context_id = 0; 800 } 801 802 trace_nbd_negotiate_meta_query_reply(context, context_id); 803 set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT, 804 sizeof(opt) - sizeof(opt.h) + iov[1].iov_len); 805 stl_be_p(&opt.context_id, context_id); 806 807 return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0; 808 } 809 810 /* 811 * Return true if @query matches @pattern, or if @query is empty when 812 * the @client is performing _LIST_. 813 */ 814 static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, 815 const char *query) 816 { 817 if (!*query) { 818 trace_nbd_negotiate_meta_query_parse("empty"); 819 return client->opt == NBD_OPT_LIST_META_CONTEXT; 820 } 821 if (strcmp(query, pattern) == 0) { 822 trace_nbd_negotiate_meta_query_parse(pattern); 823 return true; 824 } 825 trace_nbd_negotiate_meta_query_skip("pattern not matched"); 826 return false; 827 } 828 829 /* 830 * Return true and adjust @str in place if it begins with @prefix. 831 */ 832 static bool nbd_strshift(const char **str, const char *prefix) 833 { 834 size_t len = strlen(prefix); 835 836 if (strncmp(*str, prefix, len) == 0) { 837 *str += len; 838 return true; 839 } 840 return false; 841 } 842 843 /* nbd_meta_base_query 844 * 845 * Handle queries to 'base' namespace. For now, only the base:allocation 846 * context is available. Return true if @query has been handled. 847 */ 848 static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, 849 const char *query) 850 { 851 if (!nbd_strshift(&query, "base:")) { 852 return false; 853 } 854 trace_nbd_negotiate_meta_query_parse("base:"); 855 856 if (nbd_meta_empty_or_pattern(client, "allocation", query)) { 857 meta->base_allocation = true; 858 } 859 return true; 860 } 861 862 /* nbd_meta_qemu_query 863 * 864 * Handle queries to 'qemu' namespace. For now, only the qemu:dirty-bitmap: 865 * and qemu:allocation-depth contexts are available. Return true if @query 866 * has been handled. 867 */ 868 static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, 869 const char *query) 870 { 871 size_t i; 872 873 if (!nbd_strshift(&query, "qemu:")) { 874 return false; 875 } 876 trace_nbd_negotiate_meta_query_parse("qemu:"); 877 878 if (!*query) { 879 if (client->opt == NBD_OPT_LIST_META_CONTEXT) { 880 meta->allocation_depth = meta->exp->allocation_depth; 881 if (meta->exp->nr_export_bitmaps) { 882 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); 883 } 884 } 885 trace_nbd_negotiate_meta_query_parse("empty"); 886 return true; 887 } 888 889 if (strcmp(query, "allocation-depth") == 0) { 890 trace_nbd_negotiate_meta_query_parse("allocation-depth"); 891 meta->allocation_depth = meta->exp->allocation_depth; 892 return true; 893 } 894 895 if (nbd_strshift(&query, "dirty-bitmap:")) { 896 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:"); 897 if (!*query) { 898 if (client->opt == NBD_OPT_LIST_META_CONTEXT && 899 meta->exp->nr_export_bitmaps) { 900 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); 901 } 902 trace_nbd_negotiate_meta_query_parse("empty"); 903 return true; 904 } 905 906 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) { 907 const char *bm_name; 908 909 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]); 910 if (strcmp(bm_name, query) == 0) { 911 meta->bitmaps[i] = true; 912 trace_nbd_negotiate_meta_query_parse(query); 913 return true; 914 } 915 } 916 trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match"); 917 return true; 918 } 919 920 trace_nbd_negotiate_meta_query_skip("unknown qemu context"); 921 return true; 922 } 923 924 /* nbd_negotiate_meta_query 925 * 926 * Parse namespace name and call corresponding function to parse body of the 927 * query. 928 * 929 * The only supported namespaces are 'base' and 'qemu'. 930 * 931 * Return -errno on I/O error, 0 if option was completely handled by 932 * sending a reply about inconsistent lengths, or 1 on success. */ 933 static int nbd_negotiate_meta_query(NBDClient *client, 934 NBDExportMetaContexts *meta, Error **errp) 935 { 936 int ret; 937 g_autofree char *query = NULL; 938 uint32_t len; 939 940 ret = nbd_opt_read(client, &len, sizeof(len), false, errp); 941 if (ret <= 0) { 942 return ret; 943 } 944 len = cpu_to_be32(len); 945 946 if (len > NBD_MAX_STRING_SIZE) { 947 trace_nbd_negotiate_meta_query_skip("length too long"); 948 return nbd_opt_skip(client, len, errp); 949 } 950 951 query = g_malloc(len + 1); 952 ret = nbd_opt_read(client, query, len, true, errp); 953 if (ret <= 0) { 954 return ret; 955 } 956 query[len] = '\0'; 957 958 if (nbd_meta_base_query(client, meta, query)) { 959 return 1; 960 } 961 if (nbd_meta_qemu_query(client, meta, query)) { 962 return 1; 963 } 964 965 trace_nbd_negotiate_meta_query_skip("unknown namespace"); 966 return 1; 967 } 968 969 /* nbd_negotiate_meta_queries 970 * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT 971 * 972 * Return -errno on I/O error, or 0 if option was completely handled. */ 973 static int nbd_negotiate_meta_queries(NBDClient *client, 974 NBDExportMetaContexts *meta, Error **errp) 975 { 976 int ret; 977 g_autofree char *export_name = NULL; 978 /* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */ 979 g_autofree G_GNUC_UNUSED bool *bitmaps = NULL; 980 NBDExportMetaContexts local_meta = {0}; 981 uint32_t nb_queries; 982 size_t i; 983 size_t count = 0; 984 985 if (client->opt == NBD_OPT_SET_META_CONTEXT && !client->structured_reply) { 986 return nbd_opt_invalid(client, errp, 987 "request option '%s' when structured reply " 988 "is not negotiated", 989 nbd_opt_lookup(client->opt)); 990 } 991 992 if (client->opt == NBD_OPT_LIST_META_CONTEXT) { 993 /* Only change the caller's meta on SET. */ 994 meta = &local_meta; 995 } 996 997 g_free(meta->bitmaps); 998 memset(meta, 0, sizeof(*meta)); 999 1000 ret = nbd_opt_read_name(client, &export_name, NULL, errp); 1001 if (ret <= 0) { 1002 return ret; 1003 } 1004 1005 meta->exp = nbd_export_find(export_name); 1006 if (meta->exp == NULL) { 1007 g_autofree char *sane_name = nbd_sanitize_name(export_name); 1008 1009 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, 1010 "export '%s' not present", sane_name); 1011 } 1012 meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps); 1013 if (client->opt == NBD_OPT_LIST_META_CONTEXT) { 1014 bitmaps = meta->bitmaps; 1015 } 1016 1017 ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp); 1018 if (ret <= 0) { 1019 return ret; 1020 } 1021 nb_queries = cpu_to_be32(nb_queries); 1022 trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt), 1023 export_name, nb_queries); 1024 1025 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) { 1026 /* enable all known contexts */ 1027 meta->base_allocation = true; 1028 meta->allocation_depth = meta->exp->allocation_depth; 1029 if (meta->exp->nr_export_bitmaps) { 1030 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); 1031 } 1032 } else { 1033 for (i = 0; i < nb_queries; ++i) { 1034 ret = nbd_negotiate_meta_query(client, meta, errp); 1035 if (ret <= 0) { 1036 return ret; 1037 } 1038 } 1039 } 1040 1041 if (meta->base_allocation) { 1042 ret = nbd_negotiate_send_meta_context(client, "base:allocation", 1043 NBD_META_ID_BASE_ALLOCATION, 1044 errp); 1045 if (ret < 0) { 1046 return ret; 1047 } 1048 count++; 1049 } 1050 1051 if (meta->allocation_depth) { 1052 ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth", 1053 NBD_META_ID_ALLOCATION_DEPTH, 1054 errp); 1055 if (ret < 0) { 1056 return ret; 1057 } 1058 count++; 1059 } 1060 1061 for (i = 0; i < meta->exp->nr_export_bitmaps; i++) { 1062 const char *bm_name; 1063 g_autofree char *context = NULL; 1064 1065 if (!meta->bitmaps[i]) { 1066 continue; 1067 } 1068 1069 bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]); 1070 context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name); 1071 1072 ret = nbd_negotiate_send_meta_context(client, context, 1073 NBD_META_ID_DIRTY_BITMAP + i, 1074 errp); 1075 if (ret < 0) { 1076 return ret; 1077 } 1078 count++; 1079 } 1080 1081 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 1082 if (ret == 0) { 1083 meta->count = count; 1084 } 1085 1086 return ret; 1087 } 1088 1089 /* nbd_negotiate_options 1090 * Process all NBD_OPT_* client option commands, during fixed newstyle 1091 * negotiation. 1092 * Return: 1093 * -errno on error, errp is set 1094 * 0 on successful negotiation, errp is not set 1095 * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, 1096 * errp is not set 1097 */ 1098 static int nbd_negotiate_options(NBDClient *client, Error **errp) 1099 { 1100 uint32_t flags; 1101 bool fixedNewstyle = false; 1102 bool no_zeroes = false; 1103 1104 /* Client sends: 1105 [ 0 .. 3] client flags 1106 1107 Then we loop until NBD_OPT_EXPORT_NAME or NBD_OPT_GO: 1108 [ 0 .. 7] NBD_OPTS_MAGIC 1109 [ 8 .. 11] NBD option 1110 [12 .. 15] Data length 1111 ... Rest of request 1112 1113 [ 0 .. 7] NBD_OPTS_MAGIC 1114 [ 8 .. 11] Second NBD option 1115 [12 .. 15] Data length 1116 ... Rest of request 1117 */ 1118 1119 if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) { 1120 return -EIO; 1121 } 1122 trace_nbd_negotiate_options_flags(flags); 1123 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 1124 fixedNewstyle = true; 1125 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 1126 } 1127 if (flags & NBD_FLAG_C_NO_ZEROES) { 1128 no_zeroes = true; 1129 flags &= ~NBD_FLAG_C_NO_ZEROES; 1130 } 1131 if (flags != 0) { 1132 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags); 1133 return -EINVAL; 1134 } 1135 1136 while (1) { 1137 int ret; 1138 uint32_t option, length; 1139 uint64_t magic; 1140 1141 if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) { 1142 return -EINVAL; 1143 } 1144 trace_nbd_negotiate_options_check_magic(magic); 1145 if (magic != NBD_OPTS_MAGIC) { 1146 error_setg(errp, "Bad magic received"); 1147 return -EINVAL; 1148 } 1149 1150 if (nbd_read32(client->ioc, &option, "option", errp) < 0) { 1151 return -EINVAL; 1152 } 1153 client->opt = option; 1154 1155 if (nbd_read32(client->ioc, &length, "option length", errp) < 0) { 1156 return -EINVAL; 1157 } 1158 assert(!client->optlen); 1159 client->optlen = length; 1160 1161 if (length > NBD_MAX_BUFFER_SIZE) { 1162 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", 1163 length, NBD_MAX_BUFFER_SIZE); 1164 return -EINVAL; 1165 } 1166 1167 trace_nbd_negotiate_options_check_option(option, 1168 nbd_opt_lookup(option)); 1169 if (client->tlscreds && 1170 client->ioc == (QIOChannel *)client->sioc) { 1171 QIOChannel *tioc; 1172 if (!fixedNewstyle) { 1173 error_setg(errp, "Unsupported option 0x%" PRIx32, option); 1174 return -EINVAL; 1175 } 1176 switch (option) { 1177 case NBD_OPT_STARTTLS: 1178 if (length) { 1179 /* Unconditionally drop the connection if the client 1180 * can't start a TLS negotiation correctly */ 1181 return nbd_reject_length(client, true, errp); 1182 } 1183 tioc = nbd_negotiate_handle_starttls(client, errp); 1184 if (!tioc) { 1185 return -EIO; 1186 } 1187 ret = 0; 1188 object_unref(OBJECT(client->ioc)); 1189 client->ioc = QIO_CHANNEL(tioc); 1190 break; 1191 1192 case NBD_OPT_EXPORT_NAME: 1193 /* No way to return an error to client, so drop connection */ 1194 error_setg(errp, "Option 0x%x not permitted before TLS", 1195 option); 1196 return -EINVAL; 1197 1198 default: 1199 /* Let the client keep trying, unless they asked to 1200 * quit. Always try to give an error back to the 1201 * client; but when replying to OPT_ABORT, be aware 1202 * that the client may hang up before receiving the 1203 * error, in which case we are fine ignoring the 1204 * resulting EPIPE. */ 1205 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD, 1206 option == NBD_OPT_ABORT ? NULL : errp, 1207 "Option 0x%" PRIx32 1208 " not permitted before TLS", option); 1209 if (option == NBD_OPT_ABORT) { 1210 return 1; 1211 } 1212 break; 1213 } 1214 } else if (fixedNewstyle) { 1215 switch (option) { 1216 case NBD_OPT_LIST: 1217 if (length) { 1218 ret = nbd_reject_length(client, false, errp); 1219 } else { 1220 ret = nbd_negotiate_handle_list(client, errp); 1221 } 1222 break; 1223 1224 case NBD_OPT_ABORT: 1225 /* NBD spec says we must try to reply before 1226 * disconnecting, but that we must also tolerate 1227 * guests that don't wait for our reply. */ 1228 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL); 1229 return 1; 1230 1231 case NBD_OPT_EXPORT_NAME: 1232 return nbd_negotiate_handle_export_name(client, no_zeroes, 1233 errp); 1234 1235 case NBD_OPT_INFO: 1236 case NBD_OPT_GO: 1237 ret = nbd_negotiate_handle_info(client, errp); 1238 if (ret == 1) { 1239 assert(option == NBD_OPT_GO); 1240 return 0; 1241 } 1242 break; 1243 1244 case NBD_OPT_STARTTLS: 1245 if (length) { 1246 ret = nbd_reject_length(client, false, errp); 1247 } else if (client->tlscreds) { 1248 ret = nbd_negotiate_send_rep_err(client, 1249 NBD_REP_ERR_INVALID, errp, 1250 "TLS already enabled"); 1251 } else { 1252 ret = nbd_negotiate_send_rep_err(client, 1253 NBD_REP_ERR_POLICY, errp, 1254 "TLS not configured"); 1255 } 1256 break; 1257 1258 case NBD_OPT_STRUCTURED_REPLY: 1259 if (length) { 1260 ret = nbd_reject_length(client, false, errp); 1261 } else if (client->structured_reply) { 1262 ret = nbd_negotiate_send_rep_err( 1263 client, NBD_REP_ERR_INVALID, errp, 1264 "structured reply already negotiated"); 1265 } else { 1266 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); 1267 client->structured_reply = true; 1268 } 1269 break; 1270 1271 case NBD_OPT_LIST_META_CONTEXT: 1272 case NBD_OPT_SET_META_CONTEXT: 1273 ret = nbd_negotiate_meta_queries(client, &client->export_meta, 1274 errp); 1275 break; 1276 1277 default: 1278 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp, 1279 "Unsupported option %" PRIu32 " (%s)", 1280 option, nbd_opt_lookup(option)); 1281 break; 1282 } 1283 } else { 1284 /* 1285 * If broken new-style we should drop the connection 1286 * for anything except NBD_OPT_EXPORT_NAME 1287 */ 1288 switch (option) { 1289 case NBD_OPT_EXPORT_NAME: 1290 return nbd_negotiate_handle_export_name(client, no_zeroes, 1291 errp); 1292 1293 default: 1294 error_setg(errp, "Unsupported option %" PRIu32 " (%s)", 1295 option, nbd_opt_lookup(option)); 1296 return -EINVAL; 1297 } 1298 } 1299 if (ret < 0) { 1300 return ret; 1301 } 1302 } 1303 } 1304 1305 /* nbd_negotiate 1306 * Return: 1307 * -errno on error, errp is set 1308 * 0 on successful negotiation, errp is not set 1309 * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, 1310 * errp is not set 1311 */ 1312 static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) 1313 { 1314 ERRP_GUARD(); 1315 char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = ""; 1316 int ret; 1317 1318 /* Old style negotiation header, no room for options 1319 [ 0 .. 7] passwd ("NBDMAGIC") 1320 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 1321 [16 .. 23] size 1322 [24 .. 27] export flags (zero-extended) 1323 [28 .. 151] reserved (0) 1324 1325 New style negotiation header, client can send options 1326 [ 0 .. 7] passwd ("NBDMAGIC") 1327 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 1328 [16 .. 17] server flags (0) 1329 ....options sent, ending in NBD_OPT_EXPORT_NAME or NBD_OPT_GO.... 1330 */ 1331 1332 qio_channel_set_blocking(client->ioc, false, NULL); 1333 1334 trace_nbd_negotiate_begin(); 1335 memcpy(buf, "NBDMAGIC", 8); 1336 1337 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 1338 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); 1339 1340 if (nbd_write(client->ioc, buf, 18, errp) < 0) { 1341 error_prepend(errp, "write failed: "); 1342 return -EINVAL; 1343 } 1344 ret = nbd_negotiate_options(client, errp); 1345 if (ret != 0) { 1346 if (ret < 0) { 1347 error_prepend(errp, "option negotiation failed: "); 1348 } 1349 return ret; 1350 } 1351 1352 /* Attach the channel to the same AioContext as the export */ 1353 if (client->exp && client->exp->common.ctx) { 1354 qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx); 1355 } 1356 1357 assert(!client->optlen); 1358 trace_nbd_negotiate_success(); 1359 1360 return 0; 1361 } 1362 1363 /* nbd_read_eof 1364 * Tries to read @size bytes from @ioc. This is a local implementation of 1365 * qio_channel_readv_all_eof. We have it here because we need it to be 1366 * interruptible and to know when the coroutine is yielding. 1367 * Returns 1 on success 1368 * 0 on eof, when no data was read (errp is not set) 1369 * negative errno on failure (errp is set) 1370 */ 1371 static inline int coroutine_fn 1372 nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) 1373 { 1374 bool partial = false; 1375 1376 assert(size); 1377 while (size > 0) { 1378 struct iovec iov = { .iov_base = buffer, .iov_len = size }; 1379 ssize_t len; 1380 1381 len = qio_channel_readv(client->ioc, &iov, 1, errp); 1382 if (len == QIO_CHANNEL_ERR_BLOCK) { 1383 client->read_yielding = true; 1384 qio_channel_yield(client->ioc, G_IO_IN); 1385 client->read_yielding = false; 1386 if (client->quiescing) { 1387 return -EAGAIN; 1388 } 1389 continue; 1390 } else if (len < 0) { 1391 return -EIO; 1392 } else if (len == 0) { 1393 if (partial) { 1394 error_setg(errp, 1395 "Unexpected end-of-file before all bytes were read"); 1396 return -EIO; 1397 } else { 1398 return 0; 1399 } 1400 } 1401 1402 partial = true; 1403 size -= len; 1404 buffer = (uint8_t *) buffer + len; 1405 } 1406 return 1; 1407 } 1408 1409 static int nbd_receive_request(NBDClient *client, NBDRequest *request, 1410 Error **errp) 1411 { 1412 uint8_t buf[NBD_REQUEST_SIZE]; 1413 uint32_t magic; 1414 int ret; 1415 1416 ret = nbd_read_eof(client, buf, sizeof(buf), errp); 1417 if (ret < 0) { 1418 return ret; 1419 } 1420 if (ret == 0) { 1421 return -EIO; 1422 } 1423 1424 /* Request 1425 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 1426 [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) 1427 [ 6 .. 7] type (NBD_CMD_READ, ...) 1428 [ 8 .. 15] handle 1429 [16 .. 23] from 1430 [24 .. 27] len 1431 */ 1432 1433 magic = ldl_be_p(buf); 1434 request->flags = lduw_be_p(buf + 4); 1435 request->type = lduw_be_p(buf + 6); 1436 request->handle = ldq_be_p(buf + 8); 1437 request->from = ldq_be_p(buf + 16); 1438 request->len = ldl_be_p(buf + 24); 1439 1440 trace_nbd_receive_request(magic, request->flags, request->type, 1441 request->from, request->len); 1442 1443 if (magic != NBD_REQUEST_MAGIC) { 1444 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic); 1445 return -EINVAL; 1446 } 1447 return 0; 1448 } 1449 1450 #define MAX_NBD_REQUESTS 16 1451 1452 void nbd_client_get(NBDClient *client) 1453 { 1454 client->refcount++; 1455 } 1456 1457 void nbd_client_put(NBDClient *client) 1458 { 1459 if (--client->refcount == 0) { 1460 /* The last reference should be dropped by client->close, 1461 * which is called by client_close. 1462 */ 1463 assert(client->closing); 1464 1465 qio_channel_detach_aio_context(client->ioc); 1466 object_unref(OBJECT(client->sioc)); 1467 object_unref(OBJECT(client->ioc)); 1468 if (client->tlscreds) { 1469 object_unref(OBJECT(client->tlscreds)); 1470 } 1471 g_free(client->tlsauthz); 1472 if (client->exp) { 1473 QTAILQ_REMOVE(&client->exp->clients, client, next); 1474 blk_exp_unref(&client->exp->common); 1475 } 1476 g_free(client->export_meta.bitmaps); 1477 g_free(client); 1478 } 1479 } 1480 1481 static void client_close(NBDClient *client, bool negotiated) 1482 { 1483 if (client->closing) { 1484 return; 1485 } 1486 1487 client->closing = true; 1488 1489 /* Force requests to finish. They will drop their own references, 1490 * then we'll close the socket and free the NBDClient. 1491 */ 1492 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 1493 NULL); 1494 1495 /* Also tell the client, so that they release their reference. */ 1496 if (client->close_fn) { 1497 client->close_fn(client, negotiated); 1498 } 1499 } 1500 1501 static NBDRequestData *nbd_request_get(NBDClient *client) 1502 { 1503 NBDRequestData *req; 1504 1505 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 1506 client->nb_requests++; 1507 1508 req = g_new0(NBDRequestData, 1); 1509 nbd_client_get(client); 1510 req->client = client; 1511 return req; 1512 } 1513 1514 static void nbd_request_put(NBDRequestData *req) 1515 { 1516 NBDClient *client = req->client; 1517 1518 if (req->data) { 1519 qemu_vfree(req->data); 1520 } 1521 g_free(req); 1522 1523 client->nb_requests--; 1524 1525 if (client->quiescing && client->nb_requests == 0) { 1526 aio_wait_kick(); 1527 } 1528 1529 nbd_client_receive_next_request(client); 1530 1531 nbd_client_put(client); 1532 } 1533 1534 static void blk_aio_attached(AioContext *ctx, void *opaque) 1535 { 1536 NBDExport *exp = opaque; 1537 NBDClient *client; 1538 1539 trace_nbd_blk_aio_attached(exp->name, ctx); 1540 1541 exp->common.ctx = ctx; 1542 1543 QTAILQ_FOREACH(client, &exp->clients, next) { 1544 qio_channel_attach_aio_context(client->ioc, ctx); 1545 1546 assert(client->nb_requests == 0); 1547 assert(client->recv_coroutine == NULL); 1548 assert(client->send_coroutine == NULL); 1549 } 1550 } 1551 1552 static void blk_aio_detach(void *opaque) 1553 { 1554 NBDExport *exp = opaque; 1555 NBDClient *client; 1556 1557 trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); 1558 1559 QTAILQ_FOREACH(client, &exp->clients, next) { 1560 qio_channel_detach_aio_context(client->ioc); 1561 } 1562 1563 exp->common.ctx = NULL; 1564 } 1565 1566 static void nbd_drained_begin(void *opaque) 1567 { 1568 NBDExport *exp = opaque; 1569 NBDClient *client; 1570 1571 QTAILQ_FOREACH(client, &exp->clients, next) { 1572 client->quiescing = true; 1573 } 1574 } 1575 1576 static void nbd_drained_end(void *opaque) 1577 { 1578 NBDExport *exp = opaque; 1579 NBDClient *client; 1580 1581 QTAILQ_FOREACH(client, &exp->clients, next) { 1582 client->quiescing = false; 1583 nbd_client_receive_next_request(client); 1584 } 1585 } 1586 1587 static bool nbd_drained_poll(void *opaque) 1588 { 1589 NBDExport *exp = opaque; 1590 NBDClient *client; 1591 1592 QTAILQ_FOREACH(client, &exp->clients, next) { 1593 if (client->nb_requests != 0) { 1594 /* 1595 * If there's a coroutine waiting for a request on nbd_read_eof() 1596 * enter it here so we don't depend on the client to wake it up. 1597 */ 1598 if (client->recv_coroutine != NULL && client->read_yielding) { 1599 qemu_aio_coroutine_enter(exp->common.ctx, 1600 client->recv_coroutine); 1601 } 1602 1603 return true; 1604 } 1605 } 1606 1607 return false; 1608 } 1609 1610 static void nbd_eject_notifier(Notifier *n, void *data) 1611 { 1612 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 1613 1614 blk_exp_request_shutdown(&exp->common); 1615 } 1616 1617 void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk) 1618 { 1619 NBDExport *nbd_exp = container_of(exp, NBDExport, common); 1620 assert(exp->drv == &blk_exp_nbd); 1621 assert(nbd_exp->eject_notifier_blk == NULL); 1622 1623 blk_ref(blk); 1624 nbd_exp->eject_notifier_blk = blk; 1625 nbd_exp->eject_notifier.notify = nbd_eject_notifier; 1626 blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier); 1627 } 1628 1629 static const BlockDevOps nbd_block_ops = { 1630 .drained_begin = nbd_drained_begin, 1631 .drained_end = nbd_drained_end, 1632 .drained_poll = nbd_drained_poll, 1633 }; 1634 1635 static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, 1636 Error **errp) 1637 { 1638 NBDExport *exp = container_of(blk_exp, NBDExport, common); 1639 BlockExportOptionsNbd *arg = &exp_args->u.nbd; 1640 BlockBackend *blk = blk_exp->blk; 1641 int64_t size; 1642 uint64_t perm, shared_perm; 1643 bool readonly = !exp_args->writable; 1644 bool shared = !exp_args->writable; 1645 strList *bitmaps; 1646 size_t i; 1647 int ret; 1648 1649 assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD); 1650 1651 if (!nbd_server_is_running()) { 1652 error_setg(errp, "NBD server not running"); 1653 return -EINVAL; 1654 } 1655 1656 if (!arg->has_name) { 1657 arg->name = exp_args->node_name; 1658 } 1659 1660 if (strlen(arg->name) > NBD_MAX_STRING_SIZE) { 1661 error_setg(errp, "export name '%s' too long", arg->name); 1662 return -EINVAL; 1663 } 1664 1665 if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) { 1666 error_setg(errp, "description '%s' too long", arg->description); 1667 return -EINVAL; 1668 } 1669 1670 if (nbd_export_find(arg->name)) { 1671 error_setg(errp, "NBD server already has export named '%s'", arg->name); 1672 return -EEXIST; 1673 } 1674 1675 size = blk_getlength(blk); 1676 if (size < 0) { 1677 error_setg_errno(errp, -size, 1678 "Failed to determine the NBD export's length"); 1679 return size; 1680 } 1681 1682 /* Don't allow resize while the NBD server is running, otherwise we don't 1683 * care what happens with the node. */ 1684 blk_get_perm(blk, &perm, &shared_perm); 1685 ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp); 1686 if (ret < 0) { 1687 return ret; 1688 } 1689 1690 QTAILQ_INIT(&exp->clients); 1691 exp->name = g_strdup(arg->name); 1692 exp->description = g_strdup(arg->description); 1693 exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH | 1694 NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE); 1695 if (readonly) { 1696 exp->nbdflags |= NBD_FLAG_READ_ONLY; 1697 if (shared) { 1698 exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; 1699 } 1700 } else { 1701 exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES | 1702 NBD_FLAG_SEND_FAST_ZERO); 1703 } 1704 exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE); 1705 1706 for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) { 1707 exp->nr_export_bitmaps++; 1708 } 1709 exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps); 1710 for (i = 0, bitmaps = arg->bitmaps; bitmaps; 1711 i++, bitmaps = bitmaps->next) { 1712 const char *bitmap = bitmaps->value; 1713 BlockDriverState *bs = blk_bs(blk); 1714 BdrvDirtyBitmap *bm = NULL; 1715 1716 while (bs) { 1717 bm = bdrv_find_dirty_bitmap(bs, bitmap); 1718 if (bm != NULL) { 1719 break; 1720 } 1721 1722 bs = bdrv_filter_or_cow_bs(bs); 1723 } 1724 1725 if (bm == NULL) { 1726 ret = -ENOENT; 1727 error_setg(errp, "Bitmap '%s' is not found", bitmap); 1728 goto fail; 1729 } 1730 1731 if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) { 1732 ret = -EINVAL; 1733 goto fail; 1734 } 1735 1736 if (readonly && bdrv_is_writable(bs) && 1737 bdrv_dirty_bitmap_enabled(bm)) { 1738 ret = -EINVAL; 1739 error_setg(errp, 1740 "Enabled bitmap '%s' incompatible with readonly export", 1741 bitmap); 1742 goto fail; 1743 } 1744 1745 exp->export_bitmaps[i] = bm; 1746 assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE); 1747 } 1748 1749 /* Mark bitmaps busy in a separate loop, to simplify roll-back concerns. */ 1750 for (i = 0; i < exp->nr_export_bitmaps; i++) { 1751 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true); 1752 } 1753 1754 exp->allocation_depth = arg->allocation_depth; 1755 1756 /* 1757 * We need to inhibit request queuing in the block layer to ensure we can 1758 * be properly quiesced when entering a drained section, as our coroutines 1759 * servicing pending requests might enter blk_pread(). 1760 */ 1761 blk_set_disable_request_queuing(blk, true); 1762 1763 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 1764 1765 blk_set_dev_ops(blk, &nbd_block_ops, exp); 1766 1767 QTAILQ_INSERT_TAIL(&exports, exp, next); 1768 1769 return 0; 1770 1771 fail: 1772 g_free(exp->export_bitmaps); 1773 g_free(exp->name); 1774 g_free(exp->description); 1775 return ret; 1776 } 1777 1778 NBDExport *nbd_export_find(const char *name) 1779 { 1780 NBDExport *exp; 1781 QTAILQ_FOREACH(exp, &exports, next) { 1782 if (strcmp(name, exp->name) == 0) { 1783 return exp; 1784 } 1785 } 1786 1787 return NULL; 1788 } 1789 1790 AioContext * 1791 nbd_export_aio_context(NBDExport *exp) 1792 { 1793 return exp->common.ctx; 1794 } 1795 1796 static void nbd_export_request_shutdown(BlockExport *blk_exp) 1797 { 1798 NBDExport *exp = container_of(blk_exp, NBDExport, common); 1799 NBDClient *client, *next; 1800 1801 blk_exp_ref(&exp->common); 1802 /* 1803 * TODO: Should we expand QMP NbdServerRemoveNode enum to allow a 1804 * close mode that stops advertising the export to new clients but 1805 * still permits existing clients to run to completion? Because of 1806 * that possibility, nbd_export_close() can be called more than 1807 * once on an export. 1808 */ 1809 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 1810 client_close(client, true); 1811 } 1812 if (exp->name) { 1813 g_free(exp->name); 1814 exp->name = NULL; 1815 QTAILQ_REMOVE(&exports, exp, next); 1816 } 1817 blk_exp_unref(&exp->common); 1818 } 1819 1820 static void nbd_export_delete(BlockExport *blk_exp) 1821 { 1822 size_t i; 1823 NBDExport *exp = container_of(blk_exp, NBDExport, common); 1824 1825 assert(exp->name == NULL); 1826 assert(QTAILQ_EMPTY(&exp->clients)); 1827 1828 g_free(exp->description); 1829 exp->description = NULL; 1830 1831 if (exp->common.blk) { 1832 if (exp->eject_notifier_blk) { 1833 notifier_remove(&exp->eject_notifier); 1834 blk_unref(exp->eject_notifier_blk); 1835 } 1836 blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, 1837 blk_aio_detach, exp); 1838 blk_set_disable_request_queuing(exp->common.blk, false); 1839 } 1840 1841 for (i = 0; i < exp->nr_export_bitmaps; i++) { 1842 bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false); 1843 } 1844 } 1845 1846 const BlockExportDriver blk_exp_nbd = { 1847 .type = BLOCK_EXPORT_TYPE_NBD, 1848 .instance_size = sizeof(NBDExport), 1849 .create = nbd_export_create, 1850 .delete = nbd_export_delete, 1851 .request_shutdown = nbd_export_request_shutdown, 1852 }; 1853 1854 static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov, 1855 unsigned niov, Error **errp) 1856 { 1857 int ret; 1858 1859 g_assert(qemu_in_coroutine()); 1860 qemu_co_mutex_lock(&client->send_lock); 1861 client->send_coroutine = qemu_coroutine_self(); 1862 1863 ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0; 1864 1865 client->send_coroutine = NULL; 1866 qemu_co_mutex_unlock(&client->send_lock); 1867 1868 return ret; 1869 } 1870 1871 static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error, 1872 uint64_t handle) 1873 { 1874 stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC); 1875 stl_be_p(&reply->error, error); 1876 stq_be_p(&reply->handle, handle); 1877 } 1878 1879 static int nbd_co_send_simple_reply(NBDClient *client, 1880 uint64_t handle, 1881 uint32_t error, 1882 void *data, 1883 size_t len, 1884 Error **errp) 1885 { 1886 NBDSimpleReply reply; 1887 int nbd_err = system_errno_to_nbd_errno(error); 1888 struct iovec iov[] = { 1889 {.iov_base = &reply, .iov_len = sizeof(reply)}, 1890 {.iov_base = data, .iov_len = len} 1891 }; 1892 1893 trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err), 1894 len); 1895 set_be_simple_reply(&reply, nbd_err, handle); 1896 1897 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp); 1898 } 1899 1900 static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags, 1901 uint16_t type, uint64_t handle, uint32_t length) 1902 { 1903 stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC); 1904 stw_be_p(&chunk->flags, flags); 1905 stw_be_p(&chunk->type, type); 1906 stq_be_p(&chunk->handle, handle); 1907 stl_be_p(&chunk->length, length); 1908 } 1909 1910 static int coroutine_fn nbd_co_send_structured_done(NBDClient *client, 1911 uint64_t handle, 1912 Error **errp) 1913 { 1914 NBDStructuredReplyChunk chunk; 1915 struct iovec iov[] = { 1916 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1917 }; 1918 1919 trace_nbd_co_send_structured_done(handle); 1920 set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0); 1921 1922 return nbd_co_send_iov(client, iov, 1, errp); 1923 } 1924 1925 static int coroutine_fn nbd_co_send_structured_read(NBDClient *client, 1926 uint64_t handle, 1927 uint64_t offset, 1928 void *data, 1929 size_t size, 1930 bool final, 1931 Error **errp) 1932 { 1933 NBDStructuredReadData chunk; 1934 struct iovec iov[] = { 1935 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1936 {.iov_base = data, .iov_len = size} 1937 }; 1938 1939 assert(size); 1940 trace_nbd_co_send_structured_read(handle, offset, data, size); 1941 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0, 1942 NBD_REPLY_TYPE_OFFSET_DATA, handle, 1943 sizeof(chunk) - sizeof(chunk.h) + size); 1944 stq_be_p(&chunk.offset, offset); 1945 1946 return nbd_co_send_iov(client, iov, 2, errp); 1947 } 1948 1949 static int coroutine_fn nbd_co_send_structured_error(NBDClient *client, 1950 uint64_t handle, 1951 uint32_t error, 1952 const char *msg, 1953 Error **errp) 1954 { 1955 NBDStructuredError chunk; 1956 int nbd_err = system_errno_to_nbd_errno(error); 1957 struct iovec iov[] = { 1958 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 1959 {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0}, 1960 }; 1961 1962 assert(nbd_err); 1963 trace_nbd_co_send_structured_error(handle, nbd_err, 1964 nbd_err_lookup(nbd_err), msg ? msg : ""); 1965 set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle, 1966 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); 1967 stl_be_p(&chunk.error, nbd_err); 1968 stw_be_p(&chunk.message_length, iov[1].iov_len); 1969 1970 return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp); 1971 } 1972 1973 /* Do a sparse read and send the structured reply to the client. 1974 * Returns -errno if sending fails. bdrv_block_status_above() failure is 1975 * reported to the client, at which point this function succeeds. 1976 */ 1977 static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, 1978 uint64_t handle, 1979 uint64_t offset, 1980 uint8_t *data, 1981 size_t size, 1982 Error **errp) 1983 { 1984 int ret = 0; 1985 NBDExport *exp = client->exp; 1986 size_t progress = 0; 1987 1988 while (progress < size) { 1989 int64_t pnum; 1990 int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL, 1991 offset + progress, 1992 size - progress, &pnum, NULL, 1993 NULL); 1994 bool final; 1995 1996 if (status < 0) { 1997 char *msg = g_strdup_printf("unable to check for holes: %s", 1998 strerror(-status)); 1999 2000 ret = nbd_co_send_structured_error(client, handle, -status, msg, 2001 errp); 2002 g_free(msg); 2003 return ret; 2004 } 2005 assert(pnum && pnum <= size - progress); 2006 final = progress + pnum == size; 2007 if (status & BDRV_BLOCK_ZERO) { 2008 NBDStructuredReadHole chunk; 2009 struct iovec iov[] = { 2010 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 2011 }; 2012 2013 trace_nbd_co_send_structured_read_hole(handle, offset + progress, 2014 pnum); 2015 set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0, 2016 NBD_REPLY_TYPE_OFFSET_HOLE, 2017 handle, sizeof(chunk) - sizeof(chunk.h)); 2018 stq_be_p(&chunk.offset, offset + progress); 2019 stl_be_p(&chunk.length, pnum); 2020 ret = nbd_co_send_iov(client, iov, 1, errp); 2021 } else { 2022 ret = blk_pread(exp->common.blk, offset + progress, 2023 data + progress, pnum); 2024 if (ret < 0) { 2025 error_setg_errno(errp, -ret, "reading from file failed"); 2026 break; 2027 } 2028 ret = nbd_co_send_structured_read(client, handle, offset + progress, 2029 data + progress, pnum, final, 2030 errp); 2031 } 2032 2033 if (ret < 0) { 2034 break; 2035 } 2036 progress += pnum; 2037 } 2038 return ret; 2039 } 2040 2041 typedef struct NBDExtentArray { 2042 NBDExtent *extents; 2043 unsigned int nb_alloc; 2044 unsigned int count; 2045 uint64_t total_length; 2046 bool can_add; 2047 bool converted_to_be; 2048 } NBDExtentArray; 2049 2050 static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc) 2051 { 2052 NBDExtentArray *ea = g_new0(NBDExtentArray, 1); 2053 2054 ea->nb_alloc = nb_alloc; 2055 ea->extents = g_new(NBDExtent, nb_alloc); 2056 ea->can_add = true; 2057 2058 return ea; 2059 } 2060 2061 static void nbd_extent_array_free(NBDExtentArray *ea) 2062 { 2063 g_free(ea->extents); 2064 g_free(ea); 2065 } 2066 G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free); 2067 2068 /* Further modifications of the array after conversion are abandoned */ 2069 static void nbd_extent_array_convert_to_be(NBDExtentArray *ea) 2070 { 2071 int i; 2072 2073 assert(!ea->converted_to_be); 2074 ea->can_add = false; 2075 ea->converted_to_be = true; 2076 2077 for (i = 0; i < ea->count; i++) { 2078 ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags); 2079 ea->extents[i].length = cpu_to_be32(ea->extents[i].length); 2080 } 2081 } 2082 2083 /* 2084 * Add extent to NBDExtentArray. If extent can't be added (no available space), 2085 * return -1. 2086 * For safety, when returning -1 for the first time, .can_add is set to false, 2087 * further call to nbd_extent_array_add() will crash. 2088 * (to avoid the situation, when after failing to add an extent (returned -1), 2089 * user miss this failure and add another extent, which is successfully added 2090 * (array is full, but new extent may be squashed into the last one), then we 2091 * have invalid array with skipped extent) 2092 */ 2093 static int nbd_extent_array_add(NBDExtentArray *ea, 2094 uint32_t length, uint32_t flags) 2095 { 2096 assert(ea->can_add); 2097 2098 if (!length) { 2099 return 0; 2100 } 2101 2102 /* Extend previous extent if flags are the same */ 2103 if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) { 2104 uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length; 2105 2106 if (sum <= UINT32_MAX) { 2107 ea->extents[ea->count - 1].length = sum; 2108 ea->total_length += length; 2109 return 0; 2110 } 2111 } 2112 2113 if (ea->count >= ea->nb_alloc) { 2114 ea->can_add = false; 2115 return -1; 2116 } 2117 2118 ea->total_length += length; 2119 ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags}; 2120 ea->count++; 2121 2122 return 0; 2123 } 2124 2125 static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset, 2126 uint64_t bytes, NBDExtentArray *ea) 2127 { 2128 while (bytes) { 2129 uint32_t flags; 2130 int64_t num; 2131 int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num, 2132 NULL, NULL); 2133 2134 if (ret < 0) { 2135 return ret; 2136 } 2137 2138 flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) | 2139 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0); 2140 2141 if (nbd_extent_array_add(ea, num, flags) < 0) { 2142 return 0; 2143 } 2144 2145 offset += num; 2146 bytes -= num; 2147 } 2148 2149 return 0; 2150 } 2151 2152 static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset, 2153 uint64_t bytes, NBDExtentArray *ea) 2154 { 2155 while (bytes) { 2156 int64_t num; 2157 int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes, 2158 &num); 2159 2160 if (ret < 0) { 2161 return ret; 2162 } 2163 2164 if (nbd_extent_array_add(ea, num, ret) < 0) { 2165 return 0; 2166 } 2167 2168 offset += num; 2169 bytes -= num; 2170 } 2171 2172 return 0; 2173 } 2174 2175 /* 2176 * nbd_co_send_extents 2177 * 2178 * @ea is converted to BE by the function 2179 * @last controls whether NBD_REPLY_FLAG_DONE is sent. 2180 */ 2181 static int nbd_co_send_extents(NBDClient *client, uint64_t handle, 2182 NBDExtentArray *ea, 2183 bool last, uint32_t context_id, Error **errp) 2184 { 2185 NBDStructuredMeta chunk; 2186 struct iovec iov[] = { 2187 {.iov_base = &chunk, .iov_len = sizeof(chunk)}, 2188 {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])} 2189 }; 2190 2191 nbd_extent_array_convert_to_be(ea); 2192 2193 trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length, 2194 last); 2195 set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0, 2196 NBD_REPLY_TYPE_BLOCK_STATUS, 2197 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); 2198 stl_be_p(&chunk.context_id, context_id); 2199 2200 return nbd_co_send_iov(client, iov, 2, errp); 2201 } 2202 2203 /* Get block status from the exported device and send it to the client */ 2204 static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, 2205 BlockDriverState *bs, uint64_t offset, 2206 uint32_t length, bool dont_fragment, 2207 bool last, uint32_t context_id, 2208 Error **errp) 2209 { 2210 int ret; 2211 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS; 2212 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents); 2213 2214 if (context_id == NBD_META_ID_BASE_ALLOCATION) { 2215 ret = blockstatus_to_extents(bs, offset, length, ea); 2216 } else { 2217 ret = blockalloc_to_extents(bs, offset, length, ea); 2218 } 2219 if (ret < 0) { 2220 return nbd_co_send_structured_error( 2221 client, handle, -ret, "can't get block status", errp); 2222 } 2223 2224 return nbd_co_send_extents(client, handle, ea, last, context_id, errp); 2225 } 2226 2227 /* Populate @ea from a dirty bitmap. */ 2228 static void bitmap_to_extents(BdrvDirtyBitmap *bitmap, 2229 uint64_t offset, uint64_t length, 2230 NBDExtentArray *es) 2231 { 2232 int64_t start, dirty_start, dirty_count; 2233 int64_t end = offset + length; 2234 bool full = false; 2235 2236 bdrv_dirty_bitmap_lock(bitmap); 2237 2238 for (start = offset; 2239 bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX, 2240 &dirty_start, &dirty_count); 2241 start = dirty_start + dirty_count) 2242 { 2243 if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) || 2244 (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0)) 2245 { 2246 full = true; 2247 break; 2248 } 2249 } 2250 2251 if (!full) { 2252 /* last non dirty extent, nothing to do if array is now full */ 2253 (void) nbd_extent_array_add(es, end - start, 0); 2254 } 2255 2256 bdrv_dirty_bitmap_unlock(bitmap); 2257 } 2258 2259 static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle, 2260 BdrvDirtyBitmap *bitmap, uint64_t offset, 2261 uint32_t length, bool dont_fragment, bool last, 2262 uint32_t context_id, Error **errp) 2263 { 2264 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS; 2265 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents); 2266 2267 bitmap_to_extents(bitmap, offset, length, ea); 2268 2269 return nbd_co_send_extents(client, handle, ea, last, context_id, errp); 2270 } 2271 2272 /* nbd_co_receive_request 2273 * Collect a client request. Return 0 if request looks valid, -EIO to drop 2274 * connection right away, -EAGAIN to indicate we were interrupted and the 2275 * channel should be quiesced, and any other negative value to report an error 2276 * to the client (although the caller may still need to disconnect after 2277 * reporting the error). 2278 */ 2279 static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, 2280 Error **errp) 2281 { 2282 NBDClient *client = req->client; 2283 int valid_flags; 2284 int ret; 2285 2286 g_assert(qemu_in_coroutine()); 2287 assert(client->recv_coroutine == qemu_coroutine_self()); 2288 ret = nbd_receive_request(client, request, errp); 2289 if (ret < 0) { 2290 return ret; 2291 } 2292 2293 trace_nbd_co_receive_request_decode_type(request->handle, request->type, 2294 nbd_cmd_lookup(request->type)); 2295 2296 if (request->type != NBD_CMD_WRITE) { 2297 /* No payload, we are ready to read the next request. */ 2298 req->complete = true; 2299 } 2300 2301 if (request->type == NBD_CMD_DISC) { 2302 /* Special case: we're going to disconnect without a reply, 2303 * whether or not flags, from, or len are bogus */ 2304 return -EIO; 2305 } 2306 2307 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE || 2308 request->type == NBD_CMD_CACHE) 2309 { 2310 if (request->len > NBD_MAX_BUFFER_SIZE) { 2311 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", 2312 request->len, NBD_MAX_BUFFER_SIZE); 2313 return -EINVAL; 2314 } 2315 2316 if (request->type != NBD_CMD_CACHE) { 2317 req->data = blk_try_blockalign(client->exp->common.blk, 2318 request->len); 2319 if (req->data == NULL) { 2320 error_setg(errp, "No memory"); 2321 return -ENOMEM; 2322 } 2323 } 2324 } 2325 2326 if (request->type == NBD_CMD_WRITE) { 2327 if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data", 2328 errp) < 0) 2329 { 2330 return -EIO; 2331 } 2332 req->complete = true; 2333 2334 trace_nbd_co_receive_request_payload_received(request->handle, 2335 request->len); 2336 } 2337 2338 /* Sanity checks. */ 2339 if (client->exp->nbdflags & NBD_FLAG_READ_ONLY && 2340 (request->type == NBD_CMD_WRITE || 2341 request->type == NBD_CMD_WRITE_ZEROES || 2342 request->type == NBD_CMD_TRIM)) { 2343 error_setg(errp, "Export is read-only"); 2344 return -EROFS; 2345 } 2346 if (request->from > client->exp->size || 2347 request->len > client->exp->size - request->from) { 2348 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 2349 ", Size: %" PRIu64, request->from, request->len, 2350 client->exp->size); 2351 return (request->type == NBD_CMD_WRITE || 2352 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL; 2353 } 2354 if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len, 2355 client->check_align)) { 2356 /* 2357 * The block layer gracefully handles unaligned requests, but 2358 * it's still worth tracing client non-compliance 2359 */ 2360 trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type), 2361 request->from, 2362 request->len, 2363 client->check_align); 2364 } 2365 valid_flags = NBD_CMD_FLAG_FUA; 2366 if (request->type == NBD_CMD_READ && client->structured_reply) { 2367 valid_flags |= NBD_CMD_FLAG_DF; 2368 } else if (request->type == NBD_CMD_WRITE_ZEROES) { 2369 valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO; 2370 } else if (request->type == NBD_CMD_BLOCK_STATUS) { 2371 valid_flags |= NBD_CMD_FLAG_REQ_ONE; 2372 } 2373 if (request->flags & ~valid_flags) { 2374 error_setg(errp, "unsupported flags for command %s (got 0x%x)", 2375 nbd_cmd_lookup(request->type), request->flags); 2376 return -EINVAL; 2377 } 2378 2379 return 0; 2380 } 2381 2382 /* Send simple reply without a payload, or a structured error 2383 * @error_msg is ignored if @ret >= 0 2384 * Returns 0 if connection is still live, -errno on failure to talk to client 2385 */ 2386 static coroutine_fn int nbd_send_generic_reply(NBDClient *client, 2387 uint64_t handle, 2388 int ret, 2389 const char *error_msg, 2390 Error **errp) 2391 { 2392 if (client->structured_reply && ret < 0) { 2393 return nbd_co_send_structured_error(client, handle, -ret, error_msg, 2394 errp); 2395 } else { 2396 return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0, 2397 NULL, 0, errp); 2398 } 2399 } 2400 2401 /* Handle NBD_CMD_READ request. 2402 * Return -errno if sending fails. Other errors are reported directly to the 2403 * client as an error reply. */ 2404 static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, 2405 uint8_t *data, Error **errp) 2406 { 2407 int ret; 2408 NBDExport *exp = client->exp; 2409 2410 assert(request->type == NBD_CMD_READ); 2411 2412 /* XXX: NBD Protocol only documents use of FUA with WRITE */ 2413 if (request->flags & NBD_CMD_FLAG_FUA) { 2414 ret = blk_co_flush(exp->common.blk); 2415 if (ret < 0) { 2416 return nbd_send_generic_reply(client, request->handle, ret, 2417 "flush failed", errp); 2418 } 2419 } 2420 2421 if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) && 2422 request->len) 2423 { 2424 return nbd_co_send_sparse_read(client, request->handle, request->from, 2425 data, request->len, errp); 2426 } 2427 2428 ret = blk_pread(exp->common.blk, request->from, data, request->len); 2429 if (ret < 0) { 2430 return nbd_send_generic_reply(client, request->handle, ret, 2431 "reading from file failed", errp); 2432 } 2433 2434 if (client->structured_reply) { 2435 if (request->len) { 2436 return nbd_co_send_structured_read(client, request->handle, 2437 request->from, data, 2438 request->len, true, errp); 2439 } else { 2440 return nbd_co_send_structured_done(client, request->handle, errp); 2441 } 2442 } else { 2443 return nbd_co_send_simple_reply(client, request->handle, 0, 2444 data, request->len, errp); 2445 } 2446 } 2447 2448 /* 2449 * nbd_do_cmd_cache 2450 * 2451 * Handle NBD_CMD_CACHE request. 2452 * Return -errno if sending fails. Other errors are reported directly to the 2453 * client as an error reply. 2454 */ 2455 static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request, 2456 Error **errp) 2457 { 2458 int ret; 2459 NBDExport *exp = client->exp; 2460 2461 assert(request->type == NBD_CMD_CACHE); 2462 2463 ret = blk_co_preadv(exp->common.blk, request->from, request->len, 2464 NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH); 2465 2466 return nbd_send_generic_reply(client, request->handle, ret, 2467 "caching data failed", errp); 2468 } 2469 2470 /* Handle NBD request. 2471 * Return -errno if sending fails. Other errors are reported directly to the 2472 * client as an error reply. */ 2473 static coroutine_fn int nbd_handle_request(NBDClient *client, 2474 NBDRequest *request, 2475 uint8_t *data, Error **errp) 2476 { 2477 int ret; 2478 int flags; 2479 NBDExport *exp = client->exp; 2480 char *msg; 2481 size_t i; 2482 2483 switch (request->type) { 2484 case NBD_CMD_CACHE: 2485 return nbd_do_cmd_cache(client, request, errp); 2486 2487 case NBD_CMD_READ: 2488 return nbd_do_cmd_read(client, request, data, errp); 2489 2490 case NBD_CMD_WRITE: 2491 flags = 0; 2492 if (request->flags & NBD_CMD_FLAG_FUA) { 2493 flags |= BDRV_REQ_FUA; 2494 } 2495 ret = blk_pwrite(exp->common.blk, request->from, data, request->len, 2496 flags); 2497 return nbd_send_generic_reply(client, request->handle, ret, 2498 "writing to file failed", errp); 2499 2500 case NBD_CMD_WRITE_ZEROES: 2501 flags = 0; 2502 if (request->flags & NBD_CMD_FLAG_FUA) { 2503 flags |= BDRV_REQ_FUA; 2504 } 2505 if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) { 2506 flags |= BDRV_REQ_MAY_UNMAP; 2507 } 2508 if (request->flags & NBD_CMD_FLAG_FAST_ZERO) { 2509 flags |= BDRV_REQ_NO_FALLBACK; 2510 } 2511 ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len, 2512 flags); 2513 return nbd_send_generic_reply(client, request->handle, ret, 2514 "writing to file failed", errp); 2515 2516 case NBD_CMD_DISC: 2517 /* unreachable, thanks to special case in nbd_co_receive_request() */ 2518 abort(); 2519 2520 case NBD_CMD_FLUSH: 2521 ret = blk_co_flush(exp->common.blk); 2522 return nbd_send_generic_reply(client, request->handle, ret, 2523 "flush failed", errp); 2524 2525 case NBD_CMD_TRIM: 2526 ret = blk_co_pdiscard(exp->common.blk, request->from, request->len); 2527 if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) { 2528 ret = blk_co_flush(exp->common.blk); 2529 } 2530 return nbd_send_generic_reply(client, request->handle, ret, 2531 "discard failed", errp); 2532 2533 case NBD_CMD_BLOCK_STATUS: 2534 if (!request->len) { 2535 return nbd_send_generic_reply(client, request->handle, -EINVAL, 2536 "need non-zero length", errp); 2537 } 2538 if (client->export_meta.count) { 2539 bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE; 2540 int contexts_remaining = client->export_meta.count; 2541 2542 if (client->export_meta.base_allocation) { 2543 ret = nbd_co_send_block_status(client, request->handle, 2544 blk_bs(exp->common.blk), 2545 request->from, 2546 request->len, dont_fragment, 2547 !--contexts_remaining, 2548 NBD_META_ID_BASE_ALLOCATION, 2549 errp); 2550 if (ret < 0) { 2551 return ret; 2552 } 2553 } 2554 2555 if (client->export_meta.allocation_depth) { 2556 ret = nbd_co_send_block_status(client, request->handle, 2557 blk_bs(exp->common.blk), 2558 request->from, request->len, 2559 dont_fragment, 2560 !--contexts_remaining, 2561 NBD_META_ID_ALLOCATION_DEPTH, 2562 errp); 2563 if (ret < 0) { 2564 return ret; 2565 } 2566 } 2567 2568 for (i = 0; i < client->exp->nr_export_bitmaps; i++) { 2569 if (!client->export_meta.bitmaps[i]) { 2570 continue; 2571 } 2572 ret = nbd_co_send_bitmap(client, request->handle, 2573 client->exp->export_bitmaps[i], 2574 request->from, request->len, 2575 dont_fragment, !--contexts_remaining, 2576 NBD_META_ID_DIRTY_BITMAP + i, errp); 2577 if (ret < 0) { 2578 return ret; 2579 } 2580 } 2581 2582 assert(!contexts_remaining); 2583 2584 return 0; 2585 } else { 2586 return nbd_send_generic_reply(client, request->handle, -EINVAL, 2587 "CMD_BLOCK_STATUS not negotiated", 2588 errp); 2589 } 2590 2591 default: 2592 msg = g_strdup_printf("invalid request type (%" PRIu32 ") received", 2593 request->type); 2594 ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg, 2595 errp); 2596 g_free(msg); 2597 return ret; 2598 } 2599 } 2600 2601 /* Owns a reference to the NBDClient passed as opaque. */ 2602 static coroutine_fn void nbd_trip(void *opaque) 2603 { 2604 NBDClient *client = opaque; 2605 NBDRequestData *req; 2606 NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ 2607 int ret; 2608 Error *local_err = NULL; 2609 2610 trace_nbd_trip(); 2611 if (client->closing) { 2612 nbd_client_put(client); 2613 return; 2614 } 2615 2616 if (client->quiescing) { 2617 /* 2618 * We're switching between AIO contexts. Don't attempt to receive a new 2619 * request and kick the main context which may be waiting for us. 2620 */ 2621 nbd_client_put(client); 2622 client->recv_coroutine = NULL; 2623 aio_wait_kick(); 2624 return; 2625 } 2626 2627 req = nbd_request_get(client); 2628 ret = nbd_co_receive_request(req, &request, &local_err); 2629 client->recv_coroutine = NULL; 2630 2631 if (client->closing) { 2632 /* 2633 * The client may be closed when we are blocked in 2634 * nbd_co_receive_request() 2635 */ 2636 goto done; 2637 } 2638 2639 if (ret == -EAGAIN) { 2640 assert(client->quiescing); 2641 goto done; 2642 } 2643 2644 nbd_client_receive_next_request(client); 2645 if (ret == -EIO) { 2646 goto disconnect; 2647 } 2648 2649 if (ret < 0) { 2650 /* It wans't -EIO, so, according to nbd_co_receive_request() 2651 * semantics, we should return the error to the client. */ 2652 Error *export_err = local_err; 2653 2654 local_err = NULL; 2655 ret = nbd_send_generic_reply(client, request.handle, -EINVAL, 2656 error_get_pretty(export_err), &local_err); 2657 error_free(export_err); 2658 } else { 2659 ret = nbd_handle_request(client, &request, req->data, &local_err); 2660 } 2661 if (ret < 0) { 2662 error_prepend(&local_err, "Failed to send reply: "); 2663 goto disconnect; 2664 } 2665 2666 /* We must disconnect after NBD_CMD_WRITE if we did not 2667 * read the payload. 2668 */ 2669 if (!req->complete) { 2670 error_setg(&local_err, "Request handling failed in intermediate state"); 2671 goto disconnect; 2672 } 2673 2674 done: 2675 nbd_request_put(req); 2676 nbd_client_put(client); 2677 return; 2678 2679 disconnect: 2680 if (local_err) { 2681 error_reportf_err(local_err, "Disconnect client, due to: "); 2682 } 2683 nbd_request_put(req); 2684 client_close(client, true); 2685 nbd_client_put(client); 2686 } 2687 2688 static void nbd_client_receive_next_request(NBDClient *client) 2689 { 2690 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && 2691 !client->quiescing) { 2692 nbd_client_get(client); 2693 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); 2694 aio_co_schedule(client->exp->common.ctx, client->recv_coroutine); 2695 } 2696 } 2697 2698 static coroutine_fn void nbd_co_client_start(void *opaque) 2699 { 2700 NBDClient *client = opaque; 2701 Error *local_err = NULL; 2702 2703 qemu_co_mutex_init(&client->send_lock); 2704 2705 if (nbd_negotiate(client, &local_err)) { 2706 if (local_err) { 2707 error_report_err(local_err); 2708 } 2709 client_close(client, false); 2710 return; 2711 } 2712 2713 nbd_client_receive_next_request(client); 2714 } 2715 2716 /* 2717 * Create a new client listener using the given channel @sioc. 2718 * Begin servicing it in a coroutine. When the connection closes, call 2719 * @close_fn with an indication of whether the client completed negotiation. 2720 */ 2721 void nbd_client_new(QIOChannelSocket *sioc, 2722 QCryptoTLSCreds *tlscreds, 2723 const char *tlsauthz, 2724 void (*close_fn)(NBDClient *, bool)) 2725 { 2726 NBDClient *client; 2727 Coroutine *co; 2728 2729 client = g_new0(NBDClient, 1); 2730 client->refcount = 1; 2731 client->tlscreds = tlscreds; 2732 if (tlscreds) { 2733 object_ref(OBJECT(client->tlscreds)); 2734 } 2735 client->tlsauthz = g_strdup(tlsauthz); 2736 client->sioc = sioc; 2737 object_ref(OBJECT(client->sioc)); 2738 client->ioc = QIO_CHANNEL(sioc); 2739 object_ref(OBJECT(client->ioc)); 2740 client->close_fn = close_fn; 2741 2742 co = qemu_coroutine_create(nbd_co_client_start, client); 2743 qemu_coroutine_enter(co); 2744 } 2745