1 /* 2 * Copyright (C) 2016 Red Hat, Inc. 3 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> 4 * 5 * Network Block Device Server Side 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; under version 2 of the License. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qapi/error.h" 22 #include "trace.h" 23 #include "nbd-internal.h" 24 25 static int system_errno_to_nbd_errno(int err) 26 { 27 switch (err) { 28 case 0: 29 return NBD_SUCCESS; 30 case EPERM: 31 case EROFS: 32 return NBD_EPERM; 33 case EIO: 34 return NBD_EIO; 35 case ENOMEM: 36 return NBD_ENOMEM; 37 #ifdef EDQUOT 38 case EDQUOT: 39 #endif 40 case EFBIG: 41 case ENOSPC: 42 return NBD_ENOSPC; 43 case ESHUTDOWN: 44 return NBD_ESHUTDOWN; 45 case EINVAL: 46 default: 47 return NBD_EINVAL; 48 } 49 } 50 51 /* Definitions for opaque data types */ 52 53 typedef struct NBDRequestData NBDRequestData; 54 55 struct NBDRequestData { 56 QSIMPLEQ_ENTRY(NBDRequestData) entry; 57 NBDClient *client; 58 uint8_t *data; 59 bool complete; 60 }; 61 62 struct NBDExport { 63 int refcount; 64 void (*close)(NBDExport *exp); 65 66 BlockBackend *blk; 67 char *name; 68 char *description; 69 off_t dev_offset; 70 off_t size; 71 uint16_t nbdflags; 72 QTAILQ_HEAD(, NBDClient) clients; 73 QTAILQ_ENTRY(NBDExport) next; 74 75 AioContext *ctx; 76 77 BlockBackend *eject_notifier_blk; 78 Notifier eject_notifier; 79 }; 80 81 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); 82 83 struct NBDClient { 84 int refcount; 85 void (*close_fn)(NBDClient *client, bool negotiated); 86 87 bool no_zeroes; 88 NBDExport *exp; 89 QCryptoTLSCreds *tlscreds; 90 char *tlsaclname; 91 QIOChannelSocket *sioc; /* The underlying data channel */ 92 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ 93 94 Coroutine *recv_coroutine; 95 96 CoMutex send_lock; 97 Coroutine *send_coroutine; 98 99 QTAILQ_ENTRY(NBDClient) next; 100 int nb_requests; 101 bool closing; 102 }; 103 104 /* That's all folks */ 105 106 static void nbd_client_receive_next_request(NBDClient *client); 107 108 /* Basic flow for negotiation 109 110 Server Client 111 Negotiate 112 113 or 114 115 Server Client 116 Negotiate #1 117 Option 118 Negotiate #2 119 120 ---- 121 122 followed by 123 124 Server Client 125 Request 126 Response 127 Request 128 Response 129 ... 130 ... 131 Request (type == 2) 132 133 */ 134 135 /* Send a reply header, including length, but no payload. 136 * Return -errno on error, 0 on success. */ 137 static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type, 138 uint32_t opt, uint32_t len, Error **errp) 139 { 140 uint64_t magic; 141 142 trace_nbd_negotiate_send_rep_len(opt, type, len); 143 144 magic = cpu_to_be64(NBD_REP_MAGIC); 145 if (nbd_write(ioc, &magic, sizeof(magic), errp) < 0) { 146 error_prepend(errp, "write failed (rep magic): "); 147 return -EINVAL; 148 } 149 150 opt = cpu_to_be32(opt); 151 if (nbd_write(ioc, &opt, sizeof(opt), errp) < 0) { 152 error_prepend(errp, "write failed (rep opt): "); 153 return -EINVAL; 154 } 155 156 type = cpu_to_be32(type); 157 if (nbd_write(ioc, &type, sizeof(type), errp) < 0) { 158 error_prepend(errp, "write failed (rep type): "); 159 return -EINVAL; 160 } 161 162 len = cpu_to_be32(len); 163 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) { 164 error_prepend(errp, "write failed (rep data length): "); 165 return -EINVAL; 166 } 167 return 0; 168 } 169 170 /* Send a reply header with default 0 length. 171 * Return -errno on error, 0 on success. */ 172 static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt, 173 Error **errp) 174 { 175 return nbd_negotiate_send_rep_len(ioc, type, opt, 0, errp); 176 } 177 178 /* Send an error reply. 179 * Return -errno on error, 0 on success. */ 180 static int GCC_FMT_ATTR(5, 6) 181 nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, 182 uint32_t opt, Error **errp, const char *fmt, ...) 183 { 184 va_list va; 185 char *msg; 186 int ret; 187 size_t len; 188 189 va_start(va, fmt); 190 msg = g_strdup_vprintf(fmt, va); 191 va_end(va); 192 len = strlen(msg); 193 assert(len < 4096); 194 trace_nbd_negotiate_send_rep_err(msg); 195 ret = nbd_negotiate_send_rep_len(ioc, type, opt, len, errp); 196 if (ret < 0) { 197 goto out; 198 } 199 if (nbd_write(ioc, msg, len, errp) < 0) { 200 error_prepend(errp, "write failed (error message): "); 201 ret = -EIO; 202 } else { 203 ret = 0; 204 } 205 206 out: 207 g_free(msg); 208 return ret; 209 } 210 211 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. 212 * Return -errno on error, 0 on success. */ 213 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp, 214 Error **errp) 215 { 216 size_t name_len, desc_len; 217 uint32_t len; 218 const char *name = exp->name ? exp->name : ""; 219 const char *desc = exp->description ? exp->description : ""; 220 int ret; 221 222 trace_nbd_negotiate_send_rep_list(name, desc); 223 name_len = strlen(name); 224 desc_len = strlen(desc); 225 len = name_len + desc_len + sizeof(len); 226 ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len, 227 errp); 228 if (ret < 0) { 229 return ret; 230 } 231 232 len = cpu_to_be32(name_len); 233 if (nbd_write(ioc, &len, sizeof(len), errp) < 0) { 234 error_prepend(errp, "write failed (name length): "); 235 return -EINVAL; 236 } 237 238 if (nbd_write(ioc, name, name_len, errp) < 0) { 239 error_prepend(errp, "write failed (name buffer): "); 240 return -EINVAL; 241 } 242 243 if (nbd_write(ioc, desc, desc_len, errp) < 0) { 244 error_prepend(errp, "write failed (description buffer): "); 245 return -EINVAL; 246 } 247 248 return 0; 249 } 250 251 /* Process the NBD_OPT_LIST command, with a potential series of replies. 252 * Return -errno on error, 0 on success. */ 253 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length, 254 Error **errp) 255 { 256 NBDExport *exp; 257 258 if (length) { 259 if (nbd_drop(client->ioc, length, errp) < 0) { 260 return -EIO; 261 } 262 return nbd_negotiate_send_rep_err(client->ioc, 263 NBD_REP_ERR_INVALID, NBD_OPT_LIST, 264 errp, 265 "OPT_LIST should not have length"); 266 } 267 268 /* For each export, send a NBD_REP_SERVER reply. */ 269 QTAILQ_FOREACH(exp, &exports, next) { 270 if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) { 271 return -EINVAL; 272 } 273 } 274 /* Finish with a NBD_REP_ACK. */ 275 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST, errp); 276 } 277 278 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length, 279 Error **errp) 280 { 281 char name[NBD_MAX_NAME_SIZE + 1]; 282 283 /* Client sends: 284 [20 .. xx] export name (length bytes) 285 */ 286 trace_nbd_negotiate_handle_export_name(); 287 if (length >= sizeof(name)) { 288 error_setg(errp, "Bad length received"); 289 return -EINVAL; 290 } 291 if (nbd_read(client->ioc, name, length, errp) < 0) { 292 error_prepend(errp, "read failed: "); 293 return -EINVAL; 294 } 295 name[length] = '\0'; 296 297 trace_nbd_negotiate_handle_export_name_request(name); 298 299 client->exp = nbd_export_find(name); 300 if (!client->exp) { 301 error_setg(errp, "export not found"); 302 return -EINVAL; 303 } 304 305 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); 306 nbd_export_get(client->exp); 307 308 return 0; 309 } 310 311 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the 312 * new channel for all further (now-encrypted) communication. */ 313 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, 314 uint32_t length, 315 Error **errp) 316 { 317 QIOChannel *ioc; 318 QIOChannelTLS *tioc; 319 struct NBDTLSHandshakeData data = { 0 }; 320 321 trace_nbd_negotiate_handle_starttls(); 322 ioc = client->ioc; 323 if (length) { 324 if (nbd_drop(ioc, length, errp) < 0) { 325 return NULL; 326 } 327 nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, 328 errp, 329 "OPT_STARTTLS should not have length"); 330 return NULL; 331 } 332 333 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, 334 NBD_OPT_STARTTLS, errp) < 0) { 335 return NULL; 336 } 337 338 tioc = qio_channel_tls_new_server(ioc, 339 client->tlscreds, 340 client->tlsaclname, 341 errp); 342 if (!tioc) { 343 return NULL; 344 } 345 346 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); 347 trace_nbd_negotiate_handle_starttls_handshake(); 348 data.loop = g_main_loop_new(g_main_context_default(), FALSE); 349 qio_channel_tls_handshake(tioc, 350 nbd_tls_handshake, 351 &data, 352 NULL); 353 354 if (!data.complete) { 355 g_main_loop_run(data.loop); 356 } 357 g_main_loop_unref(data.loop); 358 if (data.error) { 359 object_unref(OBJECT(tioc)); 360 error_propagate(errp, data.error); 361 return NULL; 362 } 363 364 return QIO_CHANNEL(tioc); 365 } 366 367 /* nbd_negotiate_options 368 * Process all NBD_OPT_* client option commands. 369 * Return: 370 * -errno on error, errp is set 371 * 0 on successful negotiation, errp is not set 372 * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, 373 * errp is not set 374 */ 375 static int nbd_negotiate_options(NBDClient *client, Error **errp) 376 { 377 uint32_t flags; 378 bool fixedNewstyle = false; 379 Error *local_err = NULL; 380 381 /* Client sends: 382 [ 0 .. 3] client flags 383 384 [ 0 .. 7] NBD_OPTS_MAGIC 385 [ 8 .. 11] NBD option 386 [12 .. 15] Data length 387 ... Rest of request 388 389 [ 0 .. 7] NBD_OPTS_MAGIC 390 [ 8 .. 11] Second NBD option 391 [12 .. 15] Data length 392 ... Rest of request 393 */ 394 395 if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) { 396 error_prepend(errp, "read failed: "); 397 return -EIO; 398 } 399 trace_nbd_negotiate_options_flags(); 400 be32_to_cpus(&flags); 401 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) { 402 trace_nbd_negotiate_options_newstyle(); 403 fixedNewstyle = true; 404 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; 405 } 406 if (flags & NBD_FLAG_C_NO_ZEROES) { 407 trace_nbd_negotiate_options_no_zeroes(); 408 client->no_zeroes = true; 409 flags &= ~NBD_FLAG_C_NO_ZEROES; 410 } 411 if (flags != 0) { 412 error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags); 413 return -EIO; 414 } 415 416 while (1) { 417 int ret; 418 uint32_t option, length; 419 uint64_t magic; 420 421 if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) { 422 error_prepend(errp, "read failed: "); 423 return -EINVAL; 424 } 425 magic = be64_to_cpu(magic); 426 trace_nbd_negotiate_options_check_magic(magic); 427 if (magic != NBD_OPTS_MAGIC) { 428 error_setg(errp, "Bad magic received"); 429 return -EINVAL; 430 } 431 432 if (nbd_read(client->ioc, &option, 433 sizeof(option), errp) < 0) { 434 error_prepend(errp, "read failed: "); 435 return -EINVAL; 436 } 437 option = be32_to_cpu(option); 438 439 if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) { 440 error_prepend(errp, "read failed: "); 441 return -EINVAL; 442 } 443 length = be32_to_cpu(length); 444 445 trace_nbd_negotiate_options_check_option(option); 446 if (client->tlscreds && 447 client->ioc == (QIOChannel *)client->sioc) { 448 QIOChannel *tioc; 449 if (!fixedNewstyle) { 450 error_setg(errp, "Unsupported option 0x%" PRIx32, option); 451 return -EINVAL; 452 } 453 switch (option) { 454 case NBD_OPT_STARTTLS: 455 tioc = nbd_negotiate_handle_starttls(client, length, errp); 456 if (!tioc) { 457 return -EIO; 458 } 459 object_unref(OBJECT(client->ioc)); 460 client->ioc = QIO_CHANNEL(tioc); 461 break; 462 463 case NBD_OPT_EXPORT_NAME: 464 /* No way to return an error to client, so drop connection */ 465 error_setg(errp, "Option 0x%x not permitted before TLS", 466 option); 467 return -EINVAL; 468 469 default: 470 if (nbd_drop(client->ioc, length, errp) < 0) { 471 return -EIO; 472 } 473 ret = nbd_negotiate_send_rep_err(client->ioc, 474 NBD_REP_ERR_TLS_REQD, 475 option, errp, 476 "Option 0x%" PRIx32 477 "not permitted before TLS", 478 option); 479 if (ret < 0) { 480 return ret; 481 } 482 /* Let the client keep trying, unless they asked to quit */ 483 if (option == NBD_OPT_ABORT) { 484 return 1; 485 } 486 break; 487 } 488 } else if (fixedNewstyle) { 489 switch (option) { 490 case NBD_OPT_LIST: 491 ret = nbd_negotiate_handle_list(client, length, errp); 492 if (ret < 0) { 493 return ret; 494 } 495 break; 496 497 case NBD_OPT_ABORT: 498 /* NBD spec says we must try to reply before 499 * disconnecting, but that we must also tolerate 500 * guests that don't wait for our reply. */ 501 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, option, 502 &local_err); 503 504 if (local_err != NULL) { 505 const char *error = error_get_pretty(local_err); 506 trace_nbd_opt_abort_reply_failed(error); 507 error_free(local_err); 508 } 509 510 return 1; 511 512 case NBD_OPT_EXPORT_NAME: 513 return nbd_negotiate_handle_export_name(client, length, errp); 514 515 case NBD_OPT_STARTTLS: 516 if (nbd_drop(client->ioc, length, errp) < 0) { 517 return -EIO; 518 } 519 if (client->tlscreds) { 520 ret = nbd_negotiate_send_rep_err(client->ioc, 521 NBD_REP_ERR_INVALID, 522 option, errp, 523 "TLS already enabled"); 524 } else { 525 ret = nbd_negotiate_send_rep_err(client->ioc, 526 NBD_REP_ERR_POLICY, 527 option, errp, 528 "TLS not configured"); 529 } 530 if (ret < 0) { 531 return ret; 532 } 533 break; 534 default: 535 if (nbd_drop(client->ioc, length, errp) < 0) { 536 return -EIO; 537 } 538 ret = nbd_negotiate_send_rep_err(client->ioc, 539 NBD_REP_ERR_UNSUP, 540 option, errp, 541 "Unsupported option 0x%" 542 PRIx32, 543 option); 544 if (ret < 0) { 545 return ret; 546 } 547 break; 548 } 549 } else { 550 /* 551 * If broken new-style we should drop the connection 552 * for anything except NBD_OPT_EXPORT_NAME 553 */ 554 switch (option) { 555 case NBD_OPT_EXPORT_NAME: 556 return nbd_negotiate_handle_export_name(client, length, errp); 557 558 default: 559 error_setg(errp, "Unsupported option 0x%" PRIx32, option); 560 return -EINVAL; 561 } 562 } 563 } 564 } 565 566 /* nbd_negotiate 567 * Return: 568 * -errno on error, errp is set 569 * 0 on successful negotiation, errp is not set 570 * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, 571 * errp is not set 572 */ 573 static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) 574 { 575 char buf[8 + 8 + 8 + 128]; 576 int ret; 577 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | 578 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | 579 NBD_FLAG_SEND_WRITE_ZEROES); 580 bool oldStyle; 581 size_t len; 582 583 /* Old style negotiation header without options 584 [ 0 .. 7] passwd ("NBDMAGIC") 585 [ 8 .. 15] magic (NBD_CLIENT_MAGIC) 586 [16 .. 23] size 587 [24 .. 25] server flags (0) 588 [26 .. 27] export flags 589 [28 .. 151] reserved (0) 590 591 New style negotiation header with options 592 [ 0 .. 7] passwd ("NBDMAGIC") 593 [ 8 .. 15] magic (NBD_OPTS_MAGIC) 594 [16 .. 17] server flags (0) 595 ....options sent.... 596 [18 .. 25] size 597 [26 .. 27] export flags 598 [28 .. 151] reserved (0, omit if no_zeroes) 599 */ 600 601 qio_channel_set_blocking(client->ioc, false, NULL); 602 603 trace_nbd_negotiate_begin(); 604 memset(buf, 0, sizeof(buf)); 605 memcpy(buf, "NBDMAGIC", 8); 606 607 oldStyle = client->exp != NULL && !client->tlscreds; 608 if (oldStyle) { 609 trace_nbd_negotiate_old_style(client->exp->size, 610 client->exp->nbdflags | myflags); 611 stq_be_p(buf + 8, NBD_CLIENT_MAGIC); 612 stq_be_p(buf + 16, client->exp->size); 613 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 614 615 if (nbd_write(client->ioc, buf, sizeof(buf), errp) < 0) { 616 error_prepend(errp, "write failed: "); 617 return -EINVAL; 618 } 619 } else { 620 stq_be_p(buf + 8, NBD_OPTS_MAGIC); 621 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); 622 623 if (nbd_write(client->ioc, buf, 18, errp) < 0) { 624 error_prepend(errp, "write failed: "); 625 return -EINVAL; 626 } 627 ret = nbd_negotiate_options(client, errp); 628 if (ret != 0) { 629 if (ret < 0) { 630 error_prepend(errp, "option negotiation failed: "); 631 } 632 return ret; 633 } 634 635 trace_nbd_negotiate_new_style_size_flags( 636 client->exp->size, client->exp->nbdflags | myflags); 637 stq_be_p(buf + 18, client->exp->size); 638 stw_be_p(buf + 26, client->exp->nbdflags | myflags); 639 len = client->no_zeroes ? 10 : sizeof(buf) - 18; 640 ret = nbd_write(client->ioc, buf + 18, len, errp); 641 if (ret < 0) { 642 error_prepend(errp, "write failed: "); 643 return ret; 644 } 645 } 646 647 trace_nbd_negotiate_success(); 648 649 return 0; 650 } 651 652 static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request, 653 Error **errp) 654 { 655 uint8_t buf[NBD_REQUEST_SIZE]; 656 uint32_t magic; 657 int ret; 658 659 ret = nbd_read(ioc, buf, sizeof(buf), errp); 660 if (ret < 0) { 661 return ret; 662 } 663 664 /* Request 665 [ 0 .. 3] magic (NBD_REQUEST_MAGIC) 666 [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) 667 [ 6 .. 7] type (NBD_CMD_READ, ...) 668 [ 8 .. 15] handle 669 [16 .. 23] from 670 [24 .. 27] len 671 */ 672 673 magic = ldl_be_p(buf); 674 request->flags = lduw_be_p(buf + 4); 675 request->type = lduw_be_p(buf + 6); 676 request->handle = ldq_be_p(buf + 8); 677 request->from = ldq_be_p(buf + 16); 678 request->len = ldl_be_p(buf + 24); 679 680 trace_nbd_receive_request(magic, request->flags, request->type, 681 request->from, request->len); 682 683 if (magic != NBD_REQUEST_MAGIC) { 684 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic); 685 return -EINVAL; 686 } 687 return 0; 688 } 689 690 static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) 691 { 692 uint8_t buf[NBD_REPLY_SIZE]; 693 694 reply->error = system_errno_to_nbd_errno(reply->error); 695 696 trace_nbd_send_reply(reply->error, reply->handle); 697 698 /* Reply 699 [ 0 .. 3] magic (NBD_REPLY_MAGIC) 700 [ 4 .. 7] error (0 == no error) 701 [ 7 .. 15] handle 702 */ 703 stl_be_p(buf, NBD_REPLY_MAGIC); 704 stl_be_p(buf + 4, reply->error); 705 stq_be_p(buf + 8, reply->handle); 706 707 return nbd_write(ioc, buf, sizeof(buf), errp); 708 } 709 710 #define MAX_NBD_REQUESTS 16 711 712 void nbd_client_get(NBDClient *client) 713 { 714 client->refcount++; 715 } 716 717 void nbd_client_put(NBDClient *client) 718 { 719 if (--client->refcount == 0) { 720 /* The last reference should be dropped by client->close, 721 * which is called by client_close. 722 */ 723 assert(client->closing); 724 725 qio_channel_detach_aio_context(client->ioc); 726 object_unref(OBJECT(client->sioc)); 727 object_unref(OBJECT(client->ioc)); 728 if (client->tlscreds) { 729 object_unref(OBJECT(client->tlscreds)); 730 } 731 g_free(client->tlsaclname); 732 if (client->exp) { 733 QTAILQ_REMOVE(&client->exp->clients, client, next); 734 nbd_export_put(client->exp); 735 } 736 g_free(client); 737 } 738 } 739 740 static void client_close(NBDClient *client, bool negotiated) 741 { 742 if (client->closing) { 743 return; 744 } 745 746 client->closing = true; 747 748 /* Force requests to finish. They will drop their own references, 749 * then we'll close the socket and free the NBDClient. 750 */ 751 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, 752 NULL); 753 754 /* Also tell the client, so that they release their reference. */ 755 if (client->close_fn) { 756 client->close_fn(client, negotiated); 757 } 758 } 759 760 static NBDRequestData *nbd_request_get(NBDClient *client) 761 { 762 NBDRequestData *req; 763 764 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); 765 client->nb_requests++; 766 767 req = g_new0(NBDRequestData, 1); 768 nbd_client_get(client); 769 req->client = client; 770 return req; 771 } 772 773 static void nbd_request_put(NBDRequestData *req) 774 { 775 NBDClient *client = req->client; 776 777 if (req->data) { 778 qemu_vfree(req->data); 779 } 780 g_free(req); 781 782 client->nb_requests--; 783 nbd_client_receive_next_request(client); 784 785 nbd_client_put(client); 786 } 787 788 static void blk_aio_attached(AioContext *ctx, void *opaque) 789 { 790 NBDExport *exp = opaque; 791 NBDClient *client; 792 793 trace_nbd_blk_aio_attached(exp->name, ctx); 794 795 exp->ctx = ctx; 796 797 QTAILQ_FOREACH(client, &exp->clients, next) { 798 qio_channel_attach_aio_context(client->ioc, ctx); 799 if (client->recv_coroutine) { 800 aio_co_schedule(ctx, client->recv_coroutine); 801 } 802 if (client->send_coroutine) { 803 aio_co_schedule(ctx, client->send_coroutine); 804 } 805 } 806 } 807 808 static void blk_aio_detach(void *opaque) 809 { 810 NBDExport *exp = opaque; 811 NBDClient *client; 812 813 trace_nbd_blk_aio_detach(exp->name, exp->ctx); 814 815 QTAILQ_FOREACH(client, &exp->clients, next) { 816 qio_channel_detach_aio_context(client->ioc); 817 } 818 819 exp->ctx = NULL; 820 } 821 822 static void nbd_eject_notifier(Notifier *n, void *data) 823 { 824 NBDExport *exp = container_of(n, NBDExport, eject_notifier); 825 nbd_export_close(exp); 826 } 827 828 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, 829 uint16_t nbdflags, void (*close)(NBDExport *), 830 bool writethrough, BlockBackend *on_eject_blk, 831 Error **errp) 832 { 833 BlockBackend *blk; 834 NBDExport *exp = g_malloc0(sizeof(NBDExport)); 835 uint64_t perm; 836 int ret; 837 838 /* Don't allow resize while the NBD server is running, otherwise we don't 839 * care what happens with the node. */ 840 perm = BLK_PERM_CONSISTENT_READ; 841 if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { 842 perm |= BLK_PERM_WRITE; 843 } 844 blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | 845 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); 846 ret = blk_insert_bs(blk, bs, errp); 847 if (ret < 0) { 848 goto fail; 849 } 850 blk_set_enable_write_cache(blk, !writethrough); 851 852 exp->refcount = 1; 853 QTAILQ_INIT(&exp->clients); 854 exp->blk = blk; 855 exp->dev_offset = dev_offset; 856 exp->nbdflags = nbdflags; 857 exp->size = size < 0 ? blk_getlength(blk) : size; 858 if (exp->size < 0) { 859 error_setg_errno(errp, -exp->size, 860 "Failed to determine the NBD export's length"); 861 goto fail; 862 } 863 exp->size -= exp->size % BDRV_SECTOR_SIZE; 864 865 exp->close = close; 866 exp->ctx = blk_get_aio_context(blk); 867 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); 868 869 if (on_eject_blk) { 870 blk_ref(on_eject_blk); 871 exp->eject_notifier_blk = on_eject_blk; 872 exp->eject_notifier.notify = nbd_eject_notifier; 873 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier); 874 } 875 876 /* 877 * NBD exports are used for non-shared storage migration. Make sure 878 * that BDRV_O_INACTIVE is cleared and the image is ready for write 879 * access since the export could be available before migration handover. 880 */ 881 aio_context_acquire(exp->ctx); 882 blk_invalidate_cache(blk, NULL); 883 aio_context_release(exp->ctx); 884 return exp; 885 886 fail: 887 blk_unref(blk); 888 g_free(exp); 889 return NULL; 890 } 891 892 NBDExport *nbd_export_find(const char *name) 893 { 894 NBDExport *exp; 895 QTAILQ_FOREACH(exp, &exports, next) { 896 if (strcmp(name, exp->name) == 0) { 897 return exp; 898 } 899 } 900 901 return NULL; 902 } 903 904 void nbd_export_set_name(NBDExport *exp, const char *name) 905 { 906 if (exp->name == name) { 907 return; 908 } 909 910 nbd_export_get(exp); 911 if (exp->name != NULL) { 912 g_free(exp->name); 913 exp->name = NULL; 914 QTAILQ_REMOVE(&exports, exp, next); 915 nbd_export_put(exp); 916 } 917 if (name != NULL) { 918 nbd_export_get(exp); 919 exp->name = g_strdup(name); 920 QTAILQ_INSERT_TAIL(&exports, exp, next); 921 } 922 nbd_export_put(exp); 923 } 924 925 void nbd_export_set_description(NBDExport *exp, const char *description) 926 { 927 g_free(exp->description); 928 exp->description = g_strdup(description); 929 } 930 931 void nbd_export_close(NBDExport *exp) 932 { 933 NBDClient *client, *next; 934 935 nbd_export_get(exp); 936 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { 937 client_close(client, true); 938 } 939 nbd_export_set_name(exp, NULL); 940 nbd_export_set_description(exp, NULL); 941 nbd_export_put(exp); 942 } 943 944 void nbd_export_get(NBDExport *exp) 945 { 946 assert(exp->refcount > 0); 947 exp->refcount++; 948 } 949 950 void nbd_export_put(NBDExport *exp) 951 { 952 assert(exp->refcount > 0); 953 if (exp->refcount == 1) { 954 nbd_export_close(exp); 955 } 956 957 if (--exp->refcount == 0) { 958 assert(exp->name == NULL); 959 assert(exp->description == NULL); 960 961 if (exp->close) { 962 exp->close(exp); 963 } 964 965 if (exp->blk) { 966 if (exp->eject_notifier_blk) { 967 notifier_remove(&exp->eject_notifier); 968 blk_unref(exp->eject_notifier_blk); 969 } 970 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 971 blk_aio_detach, exp); 972 blk_unref(exp->blk); 973 exp->blk = NULL; 974 } 975 976 g_free(exp); 977 } 978 } 979 980 BlockBackend *nbd_export_get_blockdev(NBDExport *exp) 981 { 982 return exp->blk; 983 } 984 985 void nbd_export_close_all(void) 986 { 987 NBDExport *exp, *next; 988 989 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { 990 nbd_export_close(exp); 991 } 992 } 993 994 static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len, 995 Error **errp) 996 { 997 NBDClient *client = req->client; 998 int ret; 999 1000 g_assert(qemu_in_coroutine()); 1001 1002 trace_nbd_co_send_reply(reply->handle, reply->error, len); 1003 1004 qemu_co_mutex_lock(&client->send_lock); 1005 client->send_coroutine = qemu_coroutine_self(); 1006 1007 if (!len) { 1008 ret = nbd_send_reply(client->ioc, reply, errp); 1009 } else { 1010 qio_channel_set_cork(client->ioc, true); 1011 ret = nbd_send_reply(client->ioc, reply, errp); 1012 if (ret == 0) { 1013 ret = nbd_write(client->ioc, req->data, len, errp); 1014 if (ret < 0) { 1015 ret = -EIO; 1016 } 1017 } 1018 qio_channel_set_cork(client->ioc, false); 1019 } 1020 1021 client->send_coroutine = NULL; 1022 qemu_co_mutex_unlock(&client->send_lock); 1023 return ret; 1024 } 1025 1026 /* nbd_co_receive_request 1027 * Collect a client request. Return 0 if request looks valid, -EIO to drop 1028 * connection right away, and any other negative value to report an error to 1029 * the client (although the caller may still need to disconnect after reporting 1030 * the error). 1031 */ 1032 static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, 1033 Error **errp) 1034 { 1035 NBDClient *client = req->client; 1036 1037 g_assert(qemu_in_coroutine()); 1038 assert(client->recv_coroutine == qemu_coroutine_self()); 1039 if (nbd_receive_request(client->ioc, request, errp) < 0) { 1040 return -EIO; 1041 } 1042 1043 trace_nbd_co_receive_request_decode_type(request->handle, request->type); 1044 1045 if (request->type != NBD_CMD_WRITE) { 1046 /* No payload, we are ready to read the next request. */ 1047 req->complete = true; 1048 } 1049 1050 if (request->type == NBD_CMD_DISC) { 1051 /* Special case: we're going to disconnect without a reply, 1052 * whether or not flags, from, or len are bogus */ 1053 return -EIO; 1054 } 1055 1056 /* Check for sanity in the parameters, part 1. Defer as many 1057 * checks as possible until after reading any NBD_CMD_WRITE 1058 * payload, so we can try and keep the connection alive. */ 1059 if ((request->from + request->len) < request->from) { 1060 error_setg(errp, 1061 "integer overflow detected, you're probably being attacked"); 1062 return -EINVAL; 1063 } 1064 1065 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { 1066 if (request->len > NBD_MAX_BUFFER_SIZE) { 1067 error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", 1068 request->len, NBD_MAX_BUFFER_SIZE); 1069 return -EINVAL; 1070 } 1071 1072 req->data = blk_try_blockalign(client->exp->blk, request->len); 1073 if (req->data == NULL) { 1074 error_setg(errp, "No memory"); 1075 return -ENOMEM; 1076 } 1077 } 1078 if (request->type == NBD_CMD_WRITE) { 1079 if (nbd_read(client->ioc, req->data, request->len, errp) < 0) { 1080 error_prepend(errp, "reading from socket failed: "); 1081 return -EIO; 1082 } 1083 req->complete = true; 1084 1085 trace_nbd_co_receive_request_payload_received(request->handle, 1086 request->len); 1087 } 1088 1089 /* Sanity checks, part 2. */ 1090 if (request->from + request->len > client->exp->size) { 1091 error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 1092 ", Size: %" PRIu64, request->from, request->len, 1093 (uint64_t)client->exp->size); 1094 return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; 1095 } 1096 if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { 1097 error_setg(errp, "unsupported flags (got 0x%x)", request->flags); 1098 return -EINVAL; 1099 } 1100 if (request->type != NBD_CMD_WRITE_ZEROES && 1101 (request->flags & NBD_CMD_FLAG_NO_HOLE)) { 1102 error_setg(errp, "unexpected flags (got 0x%x)", request->flags); 1103 return -EINVAL; 1104 } 1105 1106 return 0; 1107 } 1108 1109 /* Owns a reference to the NBDClient passed as opaque. */ 1110 static coroutine_fn void nbd_trip(void *opaque) 1111 { 1112 NBDClient *client = opaque; 1113 NBDExport *exp = client->exp; 1114 NBDRequestData *req; 1115 NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ 1116 NBDReply reply; 1117 int ret; 1118 int flags; 1119 int reply_data_len = 0; 1120 Error *local_err = NULL; 1121 1122 trace_nbd_trip(); 1123 if (client->closing) { 1124 nbd_client_put(client); 1125 return; 1126 } 1127 1128 req = nbd_request_get(client); 1129 ret = nbd_co_receive_request(req, &request, &local_err); 1130 client->recv_coroutine = NULL; 1131 nbd_client_receive_next_request(client); 1132 if (ret == -EIO) { 1133 goto disconnect; 1134 } 1135 1136 reply.handle = request.handle; 1137 reply.error = 0; 1138 1139 if (ret < 0) { 1140 reply.error = -ret; 1141 goto reply; 1142 } 1143 1144 if (client->closing) { 1145 /* 1146 * The client may be closed when we are blocked in 1147 * nbd_co_receive_request() 1148 */ 1149 goto done; 1150 } 1151 1152 switch (request.type) { 1153 case NBD_CMD_READ: 1154 /* XXX: NBD Protocol only documents use of FUA with WRITE */ 1155 if (request.flags & NBD_CMD_FLAG_FUA) { 1156 ret = blk_co_flush(exp->blk); 1157 if (ret < 0) { 1158 error_setg_errno(&local_err, -ret, "flush failed"); 1159 reply.error = -ret; 1160 break; 1161 } 1162 } 1163 1164 ret = blk_pread(exp->blk, request.from + exp->dev_offset, 1165 req->data, request.len); 1166 if (ret < 0) { 1167 error_setg_errno(&local_err, -ret, "reading from file failed"); 1168 reply.error = -ret; 1169 break; 1170 } 1171 1172 reply_data_len = request.len; 1173 1174 break; 1175 case NBD_CMD_WRITE: 1176 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1177 reply.error = EROFS; 1178 break; 1179 } 1180 1181 flags = 0; 1182 if (request.flags & NBD_CMD_FLAG_FUA) { 1183 flags |= BDRV_REQ_FUA; 1184 } 1185 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, 1186 req->data, request.len, flags); 1187 if (ret < 0) { 1188 error_setg_errno(&local_err, -ret, "writing to file failed"); 1189 reply.error = -ret; 1190 } 1191 1192 break; 1193 case NBD_CMD_WRITE_ZEROES: 1194 if (exp->nbdflags & NBD_FLAG_READ_ONLY) { 1195 error_setg(&local_err, "Server is read-only, return error"); 1196 reply.error = EROFS; 1197 break; 1198 } 1199 1200 flags = 0; 1201 if (request.flags & NBD_CMD_FLAG_FUA) { 1202 flags |= BDRV_REQ_FUA; 1203 } 1204 if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { 1205 flags |= BDRV_REQ_MAY_UNMAP; 1206 } 1207 ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, 1208 request.len, flags); 1209 if (ret < 0) { 1210 error_setg_errno(&local_err, -ret, "writing to file failed"); 1211 reply.error = -ret; 1212 } 1213 1214 break; 1215 case NBD_CMD_DISC: 1216 /* unreachable, thanks to special case in nbd_co_receive_request() */ 1217 abort(); 1218 1219 case NBD_CMD_FLUSH: 1220 ret = blk_co_flush(exp->blk); 1221 if (ret < 0) { 1222 error_setg_errno(&local_err, -ret, "flush failed"); 1223 reply.error = -ret; 1224 } 1225 1226 break; 1227 case NBD_CMD_TRIM: 1228 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset, 1229 request.len); 1230 if (ret < 0) { 1231 error_setg_errno(&local_err, -ret, "discard failed"); 1232 reply.error = -ret; 1233 } 1234 1235 break; 1236 default: 1237 error_setg(&local_err, "invalid request type (%" PRIu32 ") received", 1238 request.type); 1239 reply.error = EINVAL; 1240 } 1241 1242 reply: 1243 if (local_err) { 1244 /* If we are here local_err is not fatal error, already stored in 1245 * reply.error */ 1246 error_report_err(local_err); 1247 local_err = NULL; 1248 } 1249 1250 if (nbd_co_send_reply(req, &reply, reply_data_len, &local_err) < 0) { 1251 error_prepend(&local_err, "Failed to send reply: "); 1252 goto disconnect; 1253 } 1254 1255 /* We must disconnect after NBD_CMD_WRITE if we did not 1256 * read the payload. 1257 */ 1258 if (!req->complete) { 1259 error_setg(&local_err, "Request handling failed in intermediate state"); 1260 goto disconnect; 1261 } 1262 1263 done: 1264 nbd_request_put(req); 1265 nbd_client_put(client); 1266 return; 1267 1268 disconnect: 1269 if (local_err) { 1270 error_reportf_err(local_err, "Disconnect client, due to: "); 1271 } 1272 nbd_request_put(req); 1273 client_close(client, true); 1274 nbd_client_put(client); 1275 } 1276 1277 static void nbd_client_receive_next_request(NBDClient *client) 1278 { 1279 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) { 1280 nbd_client_get(client); 1281 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); 1282 aio_co_schedule(client->exp->ctx, client->recv_coroutine); 1283 } 1284 } 1285 1286 static coroutine_fn void nbd_co_client_start(void *opaque) 1287 { 1288 NBDClient *client = opaque; 1289 NBDExport *exp = client->exp; 1290 Error *local_err = NULL; 1291 1292 if (exp) { 1293 nbd_export_get(exp); 1294 QTAILQ_INSERT_TAIL(&exp->clients, client, next); 1295 } 1296 qemu_co_mutex_init(&client->send_lock); 1297 1298 if (nbd_negotiate(client, &local_err)) { 1299 if (local_err) { 1300 error_report_err(local_err); 1301 } 1302 client_close(client, false); 1303 return; 1304 } 1305 1306 nbd_client_receive_next_request(client); 1307 } 1308 1309 /* 1310 * Create a new client listener on the given export @exp, using the 1311 * given channel @sioc. Begin servicing it in a coroutine. When the 1312 * connection closes, call @close_fn with an indication of whether the 1313 * client completed negotiation. 1314 */ 1315 void nbd_client_new(NBDExport *exp, 1316 QIOChannelSocket *sioc, 1317 QCryptoTLSCreds *tlscreds, 1318 const char *tlsaclname, 1319 void (*close_fn)(NBDClient *, bool)) 1320 { 1321 NBDClient *client; 1322 Coroutine *co; 1323 1324 client = g_malloc0(sizeof(NBDClient)); 1325 client->refcount = 1; 1326 client->exp = exp; 1327 client->tlscreds = tlscreds; 1328 if (tlscreds) { 1329 object_ref(OBJECT(client->tlscreds)); 1330 } 1331 client->tlsaclname = g_strdup(tlsaclname); 1332 client->sioc = sioc; 1333 object_ref(OBJECT(client->sioc)); 1334 client->ioc = QIO_CHANNEL(sioc); 1335 object_ref(OBJECT(client->ioc)); 1336 client->close_fn = close_fn; 1337 1338 co = qemu_coroutine_create(nbd_co_client_start, client); 1339 qemu_coroutine_enter(co); 1340 } 1341