xref: /openbmc/qemu/nbd/server.c (revision af4c4fd128d3e73f7435a3723e9fcd2ec64c5f4c)
1798bfe00SFam Zheng /*
2a7c8ed36SEric Blake  *  Copyright Red Hat
3798bfe00SFam Zheng  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
4798bfe00SFam Zheng  *
5798bfe00SFam Zheng  *  Network Block Device Server Side
6798bfe00SFam Zheng  *
7798bfe00SFam Zheng  *  This program is free software; you can redistribute it and/or modify
8798bfe00SFam Zheng  *  it under the terms of the GNU General Public License as published by
9798bfe00SFam Zheng  *  the Free Software Foundation; under version 2 of the License.
10798bfe00SFam Zheng  *
11798bfe00SFam Zheng  *  This program is distributed in the hope that it will be useful,
12798bfe00SFam Zheng  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13798bfe00SFam Zheng  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14798bfe00SFam Zheng  *  GNU General Public License for more details.
15798bfe00SFam Zheng  *
16798bfe00SFam Zheng  *  You should have received a copy of the GNU General Public License
17798bfe00SFam Zheng  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18798bfe00SFam Zheng  */
19798bfe00SFam Zheng 
20d38ea87aSPeter Maydell #include "qemu/osdep.h"
2156ee8626SKevin Wolf 
22e2c1c34fSMarkus Armbruster #include "block/block_int.h"
2356ee8626SKevin Wolf #include "block/export.h"
24e2c1c34fSMarkus Armbruster #include "block/dirty-bitmap.h"
25da34e65cSMarkus Armbruster #include "qapi/error.h"
26dc5e9ac7SMarkus Armbruster #include "qemu/queue.h"
279588463eSVladimir Sementsov-Ogievskiy #include "trace.h"
28798bfe00SFam Zheng #include "nbd-internal.h"
29416e34bdSEric Blake #include "qemu/units.h"
305df022cfSPeter Maydell #include "qemu/memalign.h"
31798bfe00SFam Zheng 
32e7b1948dSVladimir Sementsov-Ogievskiy #define NBD_META_ID_BASE_ALLOCATION 0
3371719cd5SEric Blake #define NBD_META_ID_ALLOCATION_DEPTH 1
343b1f244cSEric Blake /* Dirty bitmaps use 'NBD_META_ID_DIRTY_BITMAP + i', so keep this id last. */
3571719cd5SEric Blake #define NBD_META_ID_DIRTY_BITMAP 2
363d068affSVladimir Sementsov-Ogievskiy 
37416e34bdSEric Blake /*
38416e34bdSEric Blake  * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
393d068affSVladimir Sementsov-Ogievskiy  * constant. If an increase is needed, note that the NBD protocol
403d068affSVladimir Sementsov-Ogievskiy  * recommends no larger than 32 mb, so that the client won't consider
41416e34bdSEric Blake  * the reply as a denial of service attack.
42416e34bdSEric Blake  */
43416e34bdSEric Blake #define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
44e7b1948dSVladimir Sementsov-Ogievskiy 
system_errno_to_nbd_errno(int err)45798bfe00SFam Zheng static int system_errno_to_nbd_errno(int err)
46798bfe00SFam Zheng {
47798bfe00SFam Zheng     switch (err) {
48798bfe00SFam Zheng     case 0:
49798bfe00SFam Zheng         return NBD_SUCCESS;
50798bfe00SFam Zheng     case EPERM:
51c0301fccSEric Blake     case EROFS:
52798bfe00SFam Zheng         return NBD_EPERM;
53798bfe00SFam Zheng     case EIO:
54798bfe00SFam Zheng         return NBD_EIO;
55798bfe00SFam Zheng     case ENOMEM:
56798bfe00SFam Zheng         return NBD_ENOMEM;
57798bfe00SFam Zheng #ifdef EDQUOT
58798bfe00SFam Zheng     case EDQUOT:
59798bfe00SFam Zheng #endif
60798bfe00SFam Zheng     case EFBIG:
61798bfe00SFam Zheng     case ENOSPC:
62798bfe00SFam Zheng         return NBD_ENOSPC;
63bae245d1SEric Blake     case EOVERFLOW:
64bae245d1SEric Blake         return NBD_EOVERFLOW;
650a479545SEric Blake     case ENOTSUP:
660a479545SEric Blake #if ENOTSUP != EOPNOTSUPP
670a479545SEric Blake     case EOPNOTSUPP:
680a479545SEric Blake #endif
690a479545SEric Blake         return NBD_ENOTSUP;
70b6f5d3b5SEric Blake     case ESHUTDOWN:
71b6f5d3b5SEric Blake         return NBD_ESHUTDOWN;
72798bfe00SFam Zheng     case EINVAL:
73798bfe00SFam Zheng     default:
74798bfe00SFam Zheng         return NBD_EINVAL;
75798bfe00SFam Zheng     }
76798bfe00SFam Zheng }
77798bfe00SFam Zheng 
78798bfe00SFam Zheng /* Definitions for opaque data types */
79798bfe00SFam Zheng 
80315f78abSEric Blake typedef struct NBDRequestData NBDRequestData;
81798bfe00SFam Zheng 
82315f78abSEric Blake struct NBDRequestData {
83798bfe00SFam Zheng     NBDClient *client;
84798bfe00SFam Zheng     uint8_t *data;
8529b6c3b3SEric Blake     bool complete;
86798bfe00SFam Zheng };
87798bfe00SFam Zheng 
88798bfe00SFam Zheng struct NBDExport {
8956ee8626SKevin Wolf     BlockExport common;
90798bfe00SFam Zheng 
91798bfe00SFam Zheng     char *name;
92b1a75b33SEric Blake     char *description;
939d26dfcbSEric Blake     uint64_t size;
947423f417SEric Blake     uint16_t nbdflags;
95798bfe00SFam Zheng     QTAILQ_HEAD(, NBDClient) clients;
96798bfe00SFam Zheng     QTAILQ_ENTRY(NBDExport) next;
97798bfe00SFam Zheng 
98cd7fca95SKevin Wolf     BlockBackend *eject_notifier_blk;
99741cc431SMax Reitz     Notifier eject_notifier;
1003d068affSVladimir Sementsov-Ogievskiy 
10171719cd5SEric Blake     bool allocation_depth;
1023b1f244cSEric Blake     BdrvDirtyBitmap **export_bitmaps;
1033b1f244cSEric Blake     size_t nr_export_bitmaps;
104798bfe00SFam Zheng };
105798bfe00SFam Zheng 
106798bfe00SFam Zheng static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
107798bfe00SFam Zheng 
108fd358d83SEric Blake /*
109fd358d83SEric Blake  * NBDMetaContexts represents a list of meta contexts in use,
110e7b1948dSVladimir Sementsov-Ogievskiy  * as selected by NBD_OPT_SET_META_CONTEXT. Also used for
111fd358d83SEric Blake  * NBD_OPT_LIST_META_CONTEXT.
112fd358d83SEric Blake  */
113fd358d83SEric Blake struct NBDMetaContexts {
114fd358d83SEric Blake     const NBDExport *exp; /* associated export */
11547ec485eSEric Blake     size_t count; /* number of negotiated contexts */
116e7b1948dSVladimir Sementsov-Ogievskiy     bool base_allocation; /* export base:allocation context (block status) */
11771719cd5SEric Blake     bool allocation_depth; /* export qemu:allocation-depth */
1183b1f244cSEric Blake     bool *bitmaps; /*
1193b1f244cSEric Blake                     * export qemu:dirty-bitmap:<export bitmap name>,
1203b1f244cSEric Blake                     * sized by exp->nr_export_bitmaps
1213b1f244cSEric Blake                     */
122fd358d83SEric Blake };
123e7b1948dSVladimir Sementsov-Ogievskiy 
124798bfe00SFam Zheng struct NBDClient {
125f816310dSStefan Hajnoczi     int refcount; /* atomic */
1260c9390d9SEric Blake     void (*close_fn)(NBDClient *client, bool negotiated);
127fb1c2aaaSEric Blake     void *owner;
128798bfe00SFam Zheng 
1297075d235SStefan Hajnoczi     QemuMutex lock;
1307075d235SStefan Hajnoczi 
131798bfe00SFam Zheng     NBDExport *exp;
132f95910feSDaniel P. Berrange     QCryptoTLSCreds *tlscreds;
133b25e12daSDaniel P. Berrange     char *tlsauthz;
134fb1c2aaaSEric Blake     uint32_t handshake_max_secs;
1351c778ef7SDaniel P. Berrange     QIOChannelSocket *sioc; /* The underlying data channel */
1361c778ef7SDaniel P. Berrange     QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
137798bfe00SFam Zheng 
1387075d235SStefan Hajnoczi     Coroutine *recv_coroutine; /* protected by lock */
139798bfe00SFam Zheng 
140798bfe00SFam Zheng     CoMutex send_lock;
141798bfe00SFam Zheng     Coroutine *send_coroutine;
142798bfe00SFam Zheng 
1437075d235SStefan Hajnoczi     bool read_yielding; /* protected by lock */
1447075d235SStefan Hajnoczi     bool quiescing; /* protected by lock */
145f148ae7dSSergio Lopez 
146798bfe00SFam Zheng     QTAILQ_ENTRY(NBDClient) next;
1477075d235SStefan Hajnoczi     int nb_requests; /* protected by lock */
1487075d235SStefan Hajnoczi     bool closing; /* protected by lock */
1495c54e7faSVladimir Sementsov-Ogievskiy 
1506e280648SEric Blake     uint32_t check_align; /* If non-zero, check for aligned client requests */
1516e280648SEric Blake 
152ac132d05SEric Blake     NBDMode mode;
153fd358d83SEric Blake     NBDMetaContexts contexts; /* Negotiated meta contexts */
154798bfe00SFam Zheng 
1550cfae925SVladimir Sementsov-Ogievskiy     uint32_t opt; /* Current option being negotiated */
1560cfae925SVladimir Sementsov-Ogievskiy     uint32_t optlen; /* remaining length of data in ioc for the option being
1570cfae925SVladimir Sementsov-Ogievskiy                         negotiated now */
1580cfae925SVladimir Sementsov-Ogievskiy };
159798bfe00SFam Zheng 
160ff82911cSPaolo Bonzini static void nbd_client_receive_next_request(NBDClient *client);
161798bfe00SFam Zheng 
162798bfe00SFam Zheng /* Basic flow for negotiation
163798bfe00SFam Zheng 
164798bfe00SFam Zheng    Server         Client
165798bfe00SFam Zheng    Negotiate
166798bfe00SFam Zheng 
167798bfe00SFam Zheng    or
168798bfe00SFam Zheng 
169798bfe00SFam Zheng    Server         Client
170798bfe00SFam Zheng    Negotiate #1
171798bfe00SFam Zheng                   Option
172798bfe00SFam Zheng    Negotiate #2
173798bfe00SFam Zheng 
174798bfe00SFam Zheng    ----
175798bfe00SFam Zheng 
176798bfe00SFam Zheng    followed by
177798bfe00SFam Zheng 
178798bfe00SFam Zheng    Server         Client
179798bfe00SFam Zheng                   Request
180798bfe00SFam Zheng    Response
181798bfe00SFam Zheng                   Request
182798bfe00SFam Zheng    Response
183798bfe00SFam Zheng                   ...
184798bfe00SFam Zheng    ...
185798bfe00SFam Zheng                   Request (type == 2)
186798bfe00SFam Zheng 
187798bfe00SFam Zheng */
188798bfe00SFam Zheng 
set_be_option_rep(NBDOptionReply * rep,uint32_t option,uint32_t type,uint32_t length)1891d17922aSVladimir Sementsov-Ogievskiy static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
1901d17922aSVladimir Sementsov-Ogievskiy                                      uint32_t type, uint32_t length)
1911d17922aSVladimir Sementsov-Ogievskiy {
1921d17922aSVladimir Sementsov-Ogievskiy     stq_be_p(&rep->magic, NBD_REP_MAGIC);
1931d17922aSVladimir Sementsov-Ogievskiy     stl_be_p(&rep->option, option);
1941d17922aSVladimir Sementsov-Ogievskiy     stl_be_p(&rep->type, type);
1951d17922aSVladimir Sementsov-Ogievskiy     stl_be_p(&rep->length, length);
1961d17922aSVladimir Sementsov-Ogievskiy }
1971d17922aSVladimir Sementsov-Ogievskiy 
198526e5c65SEric Blake /* Send a reply header, including length, but no payload.
199526e5c65SEric Blake  * Return -errno on error, 0 on success. */
2004fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_rep_len(NBDClient * client,uint32_t type,uint32_t len,Error ** errp)2014fa333e0SEric Blake nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
2020cfae925SVladimir Sementsov-Ogievskiy                            uint32_t len, Error **errp)
203798bfe00SFam Zheng {
2041d17922aSVladimir Sementsov-Ogievskiy     NBDOptionReply rep;
205798bfe00SFam Zheng 
2061d17922aSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
2073736cc5bSEric Blake                                      type, nbd_rep_lookup(type), len);
208f95910feSDaniel P. Berrange 
209f37708f6SEric Blake     assert(len < NBD_MAX_BUFFER_SIZE);
2102fd2c840SVladimir Sementsov-Ogievskiy 
2111d17922aSVladimir Sementsov-Ogievskiy     set_be_option_rep(&rep, client->opt, type, len);
2121d17922aSVladimir Sementsov-Ogievskiy     return nbd_write(client->ioc, &rep, sizeof(rep), errp);
213798bfe00SFam Zheng }
214798bfe00SFam Zheng 
215526e5c65SEric Blake /* Send a reply header with default 0 length.
216526e5c65SEric Blake  * Return -errno on error, 0 on success. */
2174fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_rep(NBDClient * client,uint32_t type,Error ** errp)2184fa333e0SEric Blake nbd_negotiate_send_rep(NBDClient *client, uint32_t type, Error **errp)
219526e5c65SEric Blake {
2200cfae925SVladimir Sementsov-Ogievskiy     return nbd_negotiate_send_rep_len(client, type, 0, errp);
221526e5c65SEric Blake }
222526e5c65SEric Blake 
22336683283SEric Blake /* Send an error reply.
22436683283SEric Blake  * Return -errno on error, 0 on success. */
2254fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 0)
nbd_negotiate_send_rep_verr(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,va_list va)22641f5dfafSEric Blake nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
22741f5dfafSEric Blake                             Error **errp, const char *fmt, va_list va)
22836683283SEric Blake {
229795d946dSVladimir Sementsov-Ogievskiy     ERRP_GUARD();
230df18c04eSEric Blake     g_autofree char *msg = NULL;
23136683283SEric Blake     int ret;
23236683283SEric Blake     size_t len;
23336683283SEric Blake 
23436683283SEric Blake     msg = g_strdup_vprintf(fmt, va);
23536683283SEric Blake     len = strlen(msg);
2365c4fe018SEric Blake     assert(len < NBD_MAX_STRING_SIZE);
2379588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_send_rep_err(msg);
2380cfae925SVladimir Sementsov-Ogievskiy     ret = nbd_negotiate_send_rep_len(client, type, len, errp);
23936683283SEric Blake     if (ret < 0) {
240df18c04eSEric Blake         return ret;
24136683283SEric Blake     }
2420cfae925SVladimir Sementsov-Ogievskiy     if (nbd_write(client->ioc, msg, len, errp) < 0) {
2432fd2c840SVladimir Sementsov-Ogievskiy         error_prepend(errp, "write failed (error message): ");
244df18c04eSEric Blake         return -EIO;
24536683283SEric Blake     }
2462fd2c840SVladimir Sementsov-Ogievskiy 
247df18c04eSEric Blake     return 0;
24836683283SEric Blake }
24936683283SEric Blake 
2505c4fe018SEric Blake /*
2515c4fe018SEric Blake  * Return a malloc'd copy of @name suitable for use in an error reply.
2525c4fe018SEric Blake  */
2535c4fe018SEric Blake static char *
nbd_sanitize_name(const char * name)2545c4fe018SEric Blake nbd_sanitize_name(const char *name)
2555c4fe018SEric Blake {
2565c4fe018SEric Blake     if (strnlen(name, 80) < 80) {
2575c4fe018SEric Blake         return g_strdup(name);
2585c4fe018SEric Blake     }
2595c4fe018SEric Blake     /* XXX Should we also try to sanitize any control characters? */
2605c4fe018SEric Blake     return g_strdup_printf("%.80s...", name);
2615c4fe018SEric Blake }
2625c4fe018SEric Blake 
26341f5dfafSEric Blake /* Send an error reply.
26441f5dfafSEric Blake  * Return -errno on error, 0 on success. */
2654fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 5)
nbd_negotiate_send_rep_err(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,...)26641f5dfafSEric Blake nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
26741f5dfafSEric Blake                            Error **errp, const char *fmt, ...)
26841f5dfafSEric Blake {
26941f5dfafSEric Blake     va_list va;
27041f5dfafSEric Blake     int ret;
27141f5dfafSEric Blake 
27241f5dfafSEric Blake     va_start(va, fmt);
27341f5dfafSEric Blake     ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
27441f5dfafSEric Blake     va_end(va);
27541f5dfafSEric Blake     return ret;
27641f5dfafSEric Blake }
27741f5dfafSEric Blake 
278894e0280SEric Blake /* Drop remainder of the current option, and send a reply with the
279894e0280SEric Blake  * given error type and message. Return -errno on read or write
280894e0280SEric Blake  * failure; or 0 if connection is still live. */
2814fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 0)
nbd_opt_vdrop(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,va_list va)2822e425fd5SVladimir Sementsov-Ogievskiy nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
2832e425fd5SVladimir Sementsov-Ogievskiy               const char *fmt, va_list va)
2842e425fd5SVladimir Sementsov-Ogievskiy {
2852e425fd5SVladimir Sementsov-Ogievskiy     int ret = nbd_drop(client->ioc, client->optlen, errp);
2862e425fd5SVladimir Sementsov-Ogievskiy 
2872e425fd5SVladimir Sementsov-Ogievskiy     client->optlen = 0;
2882e425fd5SVladimir Sementsov-Ogievskiy     if (!ret) {
2892e425fd5SVladimir Sementsov-Ogievskiy         ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
2902e425fd5SVladimir Sementsov-Ogievskiy     }
2912e425fd5SVladimir Sementsov-Ogievskiy     return ret;
2922e425fd5SVladimir Sementsov-Ogievskiy }
2932e425fd5SVladimir Sementsov-Ogievskiy 
2944fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 5)
nbd_opt_drop(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,...)295894e0280SEric Blake nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
296894e0280SEric Blake              const char *fmt, ...)
297894e0280SEric Blake {
2982e425fd5SVladimir Sementsov-Ogievskiy     int ret;
299894e0280SEric Blake     va_list va;
300894e0280SEric Blake 
301894e0280SEric Blake     va_start(va, fmt);
3022e425fd5SVladimir Sementsov-Ogievskiy     ret = nbd_opt_vdrop(client, type, errp, fmt, va);
303894e0280SEric Blake     va_end(va);
3042e425fd5SVladimir Sementsov-Ogievskiy 
3052e425fd5SVladimir Sementsov-Ogievskiy     return ret;
306894e0280SEric Blake }
3072e425fd5SVladimir Sementsov-Ogievskiy 
3084fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(3, 4)
nbd_opt_invalid(NBDClient * client,Error ** errp,const char * fmt,...)3092e425fd5SVladimir Sementsov-Ogievskiy nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
3102e425fd5SVladimir Sementsov-Ogievskiy {
3112e425fd5SVladimir Sementsov-Ogievskiy     int ret;
3122e425fd5SVladimir Sementsov-Ogievskiy     va_list va;
3132e425fd5SVladimir Sementsov-Ogievskiy 
3142e425fd5SVladimir Sementsov-Ogievskiy     va_start(va, fmt);
3152e425fd5SVladimir Sementsov-Ogievskiy     ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
3162e425fd5SVladimir Sementsov-Ogievskiy     va_end(va);
3172e425fd5SVladimir Sementsov-Ogievskiy 
318894e0280SEric Blake     return ret;
319894e0280SEric Blake }
320894e0280SEric Blake 
321894e0280SEric Blake /* Read size bytes from the unparsed payload of the current option.
322d1e2c3e7SEric Blake  * If @check_nul, require that no NUL bytes appear in buffer.
323894e0280SEric Blake  * Return -errno on I/O error, 0 if option was completely handled by
324894e0280SEric Blake  * sending a reply about inconsistent lengths, or 1 on success. */
3254fa333e0SEric Blake static coroutine_fn int
nbd_opt_read(NBDClient * client,void * buffer,size_t size,bool check_nul,Error ** errp)3264fa333e0SEric Blake nbd_opt_read(NBDClient *client, void *buffer, size_t size,
327d1e2c3e7SEric Blake              bool check_nul, Error **errp)
328894e0280SEric Blake {
329894e0280SEric Blake     if (size > client->optlen) {
3302e425fd5SVladimir Sementsov-Ogievskiy         return nbd_opt_invalid(client, errp,
331894e0280SEric Blake                                "Inconsistent lengths in option %s",
332894e0280SEric Blake                                nbd_opt_lookup(client->opt));
333894e0280SEric Blake     }
334894e0280SEric Blake     client->optlen -= size;
335d1e2c3e7SEric Blake     if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
336d1e2c3e7SEric Blake         return -EIO;
337d1e2c3e7SEric Blake     }
338d1e2c3e7SEric Blake 
339d1e2c3e7SEric Blake     if (check_nul && strnlen(buffer, size) != size) {
340d1e2c3e7SEric Blake         return nbd_opt_invalid(client, errp,
341d1e2c3e7SEric Blake                                "Unexpected embedded NUL in option %s",
342d1e2c3e7SEric Blake                                nbd_opt_lookup(client->opt));
343d1e2c3e7SEric Blake     }
344d1e2c3e7SEric Blake     return 1;
345894e0280SEric Blake }
346894e0280SEric Blake 
347e7b1948dSVladimir Sementsov-Ogievskiy /* Drop size bytes from the unparsed payload of the current option.
348e7b1948dSVladimir Sementsov-Ogievskiy  * Return -errno on I/O error, 0 if option was completely handled by
349e7b1948dSVladimir Sementsov-Ogievskiy  * sending a reply about inconsistent lengths, or 1 on success. */
3504fa333e0SEric Blake static coroutine_fn int
nbd_opt_skip(NBDClient * client,size_t size,Error ** errp)3514fa333e0SEric Blake nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
352e7b1948dSVladimir Sementsov-Ogievskiy {
353e7b1948dSVladimir Sementsov-Ogievskiy     if (size > client->optlen) {
354e7b1948dSVladimir Sementsov-Ogievskiy         return nbd_opt_invalid(client, errp,
355e7b1948dSVladimir Sementsov-Ogievskiy                                "Inconsistent lengths in option %s",
356e7b1948dSVladimir Sementsov-Ogievskiy                                nbd_opt_lookup(client->opt));
357e7b1948dSVladimir Sementsov-Ogievskiy     }
358e7b1948dSVladimir Sementsov-Ogievskiy     client->optlen -= size;
359e7b1948dSVladimir Sementsov-Ogievskiy     return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
360e7b1948dSVladimir Sementsov-Ogievskiy }
361e7b1948dSVladimir Sementsov-Ogievskiy 
36212296459SVladimir Sementsov-Ogievskiy /* nbd_opt_read_name
36312296459SVladimir Sementsov-Ogievskiy  *
36412296459SVladimir Sementsov-Ogievskiy  * Read a string with the format:
36593676c88SEric Blake  *   uint32_t len     (<= NBD_MAX_STRING_SIZE)
36612296459SVladimir Sementsov-Ogievskiy  *   len bytes string (not 0-terminated)
36712296459SVladimir Sementsov-Ogievskiy  *
3689d7ab222SEric Blake  * On success, @name will be allocated.
36912296459SVladimir Sementsov-Ogievskiy  * If @length is non-null, it will be set to the actual string length.
37012296459SVladimir Sementsov-Ogievskiy  *
37112296459SVladimir Sementsov-Ogievskiy  * Return -errno on I/O error, 0 if option was completely handled by
37212296459SVladimir Sementsov-Ogievskiy  * sending a reply about inconsistent lengths, or 1 on success.
37312296459SVladimir Sementsov-Ogievskiy  */
3744fa333e0SEric Blake static coroutine_fn int
nbd_opt_read_name(NBDClient * client,char ** name,uint32_t * length,Error ** errp)3754fa333e0SEric Blake nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
37612296459SVladimir Sementsov-Ogievskiy                   Error **errp)
37712296459SVladimir Sementsov-Ogievskiy {
37812296459SVladimir Sementsov-Ogievskiy     int ret;
37912296459SVladimir Sementsov-Ogievskiy     uint32_t len;
3809d7ab222SEric Blake     g_autofree char *local_name = NULL;
38112296459SVladimir Sementsov-Ogievskiy 
3829d7ab222SEric Blake     *name = NULL;
383d1e2c3e7SEric Blake     ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
38412296459SVladimir Sementsov-Ogievskiy     if (ret <= 0) {
38512296459SVladimir Sementsov-Ogievskiy         return ret;
38612296459SVladimir Sementsov-Ogievskiy     }
38780c7c2b0SPeter Maydell     len = cpu_to_be32(len);
38812296459SVladimir Sementsov-Ogievskiy 
38993676c88SEric Blake     if (len > NBD_MAX_STRING_SIZE) {
39012296459SVladimir Sementsov-Ogievskiy         return nbd_opt_invalid(client, errp,
39112296459SVladimir Sementsov-Ogievskiy                                "Invalid name length: %" PRIu32, len);
39212296459SVladimir Sementsov-Ogievskiy     }
39312296459SVladimir Sementsov-Ogievskiy 
3949d7ab222SEric Blake     local_name = g_malloc(len + 1);
395d1e2c3e7SEric Blake     ret = nbd_opt_read(client, local_name, len, true, errp);
39612296459SVladimir Sementsov-Ogievskiy     if (ret <= 0) {
39712296459SVladimir Sementsov-Ogievskiy         return ret;
39812296459SVladimir Sementsov-Ogievskiy     }
3999d7ab222SEric Blake     local_name[len] = '\0';
40012296459SVladimir Sementsov-Ogievskiy 
40112296459SVladimir Sementsov-Ogievskiy     if (length) {
40212296459SVladimir Sementsov-Ogievskiy         *length = len;
40312296459SVladimir Sementsov-Ogievskiy     }
4049d7ab222SEric Blake     *name = g_steal_pointer(&local_name);
40512296459SVladimir Sementsov-Ogievskiy 
40612296459SVladimir Sementsov-Ogievskiy     return 1;
40712296459SVladimir Sementsov-Ogievskiy }
40812296459SVladimir Sementsov-Ogievskiy 
409526e5c65SEric Blake /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
410526e5c65SEric Blake  * Return -errno on error, 0 on success. */
4114fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_rep_list(NBDClient * client,NBDExport * exp,Error ** errp)4124fa333e0SEric Blake nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, Error **errp)
413798bfe00SFam Zheng {
414795d946dSVladimir Sementsov-Ogievskiy     ERRP_GUARD();
415b1a75b33SEric Blake     size_t name_len, desc_len;
416526e5c65SEric Blake     uint32_t len;
417b1a75b33SEric Blake     const char *name = exp->name ? exp->name : "";
418b1a75b33SEric Blake     const char *desc = exp->description ? exp->description : "";
4190cfae925SVladimir Sementsov-Ogievskiy     QIOChannel *ioc = client->ioc;
4202e5c9ad6SVladimir Sementsov-Ogievskiy     int ret;
421798bfe00SFam Zheng 
4229588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_send_rep_list(name, desc);
423b1a75b33SEric Blake     name_len = strlen(name);
424b1a75b33SEric Blake     desc_len = strlen(desc);
42593676c88SEric Blake     assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
426526e5c65SEric Blake     len = name_len + desc_len + sizeof(len);
4270cfae925SVladimir Sementsov-Ogievskiy     ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
4282e5c9ad6SVladimir Sementsov-Ogievskiy     if (ret < 0) {
4292e5c9ad6SVladimir Sementsov-Ogievskiy         return ret;
430798bfe00SFam Zheng     }
431526e5c65SEric Blake 
432798bfe00SFam Zheng     len = cpu_to_be32(name_len);
4332fd2c840SVladimir Sementsov-Ogievskiy     if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
4342fd2c840SVladimir Sementsov-Ogievskiy         error_prepend(errp, "write failed (name length): ");
435798bfe00SFam Zheng         return -EINVAL;
436798bfe00SFam Zheng     }
4372fd2c840SVladimir Sementsov-Ogievskiy 
4382fd2c840SVladimir Sementsov-Ogievskiy     if (nbd_write(ioc, name, name_len, errp) < 0) {
4392fd2c840SVladimir Sementsov-Ogievskiy         error_prepend(errp, "write failed (name buffer): ");
440b1a75b33SEric Blake         return -EINVAL;
441b1a75b33SEric Blake     }
4422fd2c840SVladimir Sementsov-Ogievskiy 
4432fd2c840SVladimir Sementsov-Ogievskiy     if (nbd_write(ioc, desc, desc_len, errp) < 0) {
4442fd2c840SVladimir Sementsov-Ogievskiy         error_prepend(errp, "write failed (description buffer): ");
445798bfe00SFam Zheng         return -EINVAL;
446798bfe00SFam Zheng     }
4472fd2c840SVladimir Sementsov-Ogievskiy 
448798bfe00SFam Zheng     return 0;
449798bfe00SFam Zheng }
450798bfe00SFam Zheng 
451526e5c65SEric Blake /* Process the NBD_OPT_LIST command, with a potential series of replies.
452526e5c65SEric Blake  * Return -errno on error, 0 on success. */
4534fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_handle_list(NBDClient * client,Error ** errp)4544fa333e0SEric Blake nbd_negotiate_handle_list(NBDClient *client, Error **errp)
455798bfe00SFam Zheng {
456798bfe00SFam Zheng     NBDExport *exp;
4570cfae925SVladimir Sementsov-Ogievskiy     assert(client->opt == NBD_OPT_LIST);
458798bfe00SFam Zheng 
459798bfe00SFam Zheng     /* For each export, send a NBD_REP_SERVER reply. */
460798bfe00SFam Zheng     QTAILQ_FOREACH(exp, &exports, next) {
4610cfae925SVladimir Sementsov-Ogievskiy         if (nbd_negotiate_send_rep_list(client, exp, errp)) {
462798bfe00SFam Zheng             return -EINVAL;
463798bfe00SFam Zheng         }
464798bfe00SFam Zheng     }
465798bfe00SFam Zheng     /* Finish with a NBD_REP_ACK. */
4660cfae925SVladimir Sementsov-Ogievskiy     return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
467798bfe00SFam Zheng }
468798bfe00SFam Zheng 
4694fa333e0SEric Blake static coroutine_fn void
nbd_check_meta_export(NBDClient * client,NBDExport * exp)4704fa333e0SEric Blake nbd_check_meta_export(NBDClient *client, NBDExport *exp)
471e7b1948dSVladimir Sementsov-Ogievskiy {
472fd358d83SEric Blake     if (exp != client->contexts.exp) {
473fd358d83SEric Blake         client->contexts.count = 0;
47447ec485eSEric Blake     }
475e7b1948dSVladimir Sementsov-Ogievskiy }
476e7b1948dSVladimir Sementsov-Ogievskiy 
477f37708f6SEric Blake /* Send a reply to NBD_OPT_EXPORT_NAME.
478f37708f6SEric Blake  * Return -errno on error, 0 on success. */
4794fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_handle_export_name(NBDClient * client,bool no_zeroes,Error ** errp)4804fa333e0SEric Blake nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
4812fd2c840SVladimir Sementsov-Ogievskiy                                  Error **errp)
482798bfe00SFam Zheng {
483795d946dSVladimir Sementsov-Ogievskiy     ERRP_GUARD();
4849d7ab222SEric Blake     g_autofree char *name = NULL;
4855f66d060SEric Blake     char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
48623e099c3SEric Blake     size_t len;
48723e099c3SEric Blake     int ret;
488dbb38caaSEric Blake     uint16_t myflags;
489798bfe00SFam Zheng 
490798bfe00SFam Zheng     /* Client sends:
491798bfe00SFam Zheng         [20 ..  xx]   export name (length bytes)
4925f66d060SEric Blake        Server replies:
4935f66d060SEric Blake         [ 0 ..   7]   size
4945f66d060SEric Blake         [ 8 ..   9]   export flags
4955f66d060SEric Blake         [10 .. 133]   reserved     (0) [unless no_zeroes]
496798bfe00SFam Zheng      */
4979588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_handle_export_name();
4989c1d2614SEric Blake     if (client->mode >= NBD_MODE_EXTENDED) {
4999c1d2614SEric Blake         error_setg(errp, "Extended headers already negotiated");
5009c1d2614SEric Blake         return -EINVAL;
5019c1d2614SEric Blake     }
50293676c88SEric Blake     if (client->optlen > NBD_MAX_STRING_SIZE) {
5032fd2c840SVladimir Sementsov-Ogievskiy         error_setg(errp, "Bad length received");
504d9faeed8SVladimir Sementsov-Ogievskiy         return -EINVAL;
505798bfe00SFam Zheng     }
5069d7ab222SEric Blake     name = g_malloc(client->optlen + 1);
507e6798f06SVladimir Sementsov-Ogievskiy     if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
50832f158a6SEric Blake         return -EIO;
509798bfe00SFam Zheng     }
5100cfae925SVladimir Sementsov-Ogievskiy     name[client->optlen] = '\0';
5110cfae925SVladimir Sementsov-Ogievskiy     client->optlen = 0;
512798bfe00SFam Zheng 
5139588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_handle_export_name_request(name);
5149344e5f5SDaniel P. Berrange 
515798bfe00SFam Zheng     client->exp = nbd_export_find(name);
516798bfe00SFam Zheng     if (!client->exp) {
5172fd2c840SVladimir Sementsov-Ogievskiy         error_setg(errp, "export not found");
518d9faeed8SVladimir Sementsov-Ogievskiy         return -EINVAL;
519798bfe00SFam Zheng     }
520fd358d83SEric Blake     nbd_check_meta_export(client, client->exp);
521798bfe00SFam Zheng 
522dbb38caaSEric Blake     myflags = client->exp->nbdflags;
523ac132d05SEric Blake     if (client->mode >= NBD_MODE_STRUCTURED) {
524dbb38caaSEric Blake         myflags |= NBD_FLAG_SEND_DF;
525dbb38caaSEric Blake     }
5262dcbb11bSEric Blake     if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) {
5272dcbb11bSEric Blake         myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
5282dcbb11bSEric Blake     }
529dbb38caaSEric Blake     trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
53023e099c3SEric Blake     stq_be_p(buf, client->exp->size);
531dbb38caaSEric Blake     stw_be_p(buf + 8, myflags);
53223e099c3SEric Blake     len = no_zeroes ? 10 : sizeof(buf);
53323e099c3SEric Blake     ret = nbd_write(client->ioc, buf, len, errp);
53423e099c3SEric Blake     if (ret < 0) {
53523e099c3SEric Blake         error_prepend(errp, "write failed: ");
53623e099c3SEric Blake         return ret;
53723e099c3SEric Blake     }
53823e099c3SEric Blake 
539798bfe00SFam Zheng     QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
540c69de1beSKevin Wolf     blk_exp_ref(&client->exp->common);
541d9faeed8SVladimir Sementsov-Ogievskiy 
542d9faeed8SVladimir Sementsov-Ogievskiy     return 0;
543798bfe00SFam Zheng }
544798bfe00SFam Zheng 
545f37708f6SEric Blake /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes.
546f37708f6SEric Blake  * The buffer does NOT include the info type prefix.
547f37708f6SEric Blake  * Return -errno on error, 0 if ready to send more. */
5484fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_info(NBDClient * client,uint16_t info,uint32_t length,void * buf,Error ** errp)5494fa333e0SEric Blake nbd_negotiate_send_info(NBDClient *client, uint16_t info, uint32_t length,
5504fa333e0SEric Blake                         void *buf, Error **errp)
551f37708f6SEric Blake {
552f37708f6SEric Blake     int rc;
553f37708f6SEric Blake 
554f37708f6SEric Blake     trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
5550cfae925SVladimir Sementsov-Ogievskiy     rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
556f37708f6SEric Blake                                     sizeof(info) + length, errp);
557f37708f6SEric Blake     if (rc < 0) {
558f37708f6SEric Blake         return rc;
559f37708f6SEric Blake     }
56080c7c2b0SPeter Maydell     info = cpu_to_be16(info);
561f37708f6SEric Blake     if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
562f37708f6SEric Blake         return -EIO;
563f37708f6SEric Blake     }
564f37708f6SEric Blake     if (nbd_write(client->ioc, buf, length, errp) < 0) {
565f37708f6SEric Blake         return -EIO;
566f37708f6SEric Blake     }
567f37708f6SEric Blake     return 0;
568f37708f6SEric Blake }
569f37708f6SEric Blake 
570a16a7907SEric Blake /* nbd_reject_length: Handle any unexpected payload.
571a16a7907SEric Blake  * @fatal requests that we quit talking to the client, even if we are able
572a16a7907SEric Blake  * to successfully send an error reply.
573a16a7907SEric Blake  * Return:
574a16a7907SEric Blake  * -errno  transmission error occurred or @fatal was requested, errp is set
575a16a7907SEric Blake  * 0       error message successfully sent to client, errp is not set
576a16a7907SEric Blake  */
5774fa333e0SEric Blake static coroutine_fn int
nbd_reject_length(NBDClient * client,bool fatal,Error ** errp)5784fa333e0SEric Blake nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
579a16a7907SEric Blake {
580a16a7907SEric Blake     int ret;
581a16a7907SEric Blake 
5820cfae925SVladimir Sementsov-Ogievskiy     assert(client->optlen);
5832e425fd5SVladimir Sementsov-Ogievskiy     ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
5840cfae925SVladimir Sementsov-Ogievskiy                           nbd_opt_lookup(client->opt));
585a16a7907SEric Blake     if (fatal && !ret) {
586894e0280SEric Blake         error_setg(errp, "option '%s' has unexpected length",
5870cfae925SVladimir Sementsov-Ogievskiy                    nbd_opt_lookup(client->opt));
588a16a7907SEric Blake         return -EINVAL;
589a16a7907SEric Blake     }
590a16a7907SEric Blake     return ret;
591a16a7907SEric Blake }
592a16a7907SEric Blake 
593f37708f6SEric Blake /* Handle NBD_OPT_INFO and NBD_OPT_GO.
594f37708f6SEric Blake  * Return -errno on error, 0 if ready for next option, and 1 to move
595f37708f6SEric Blake  * into transmission phase.  */
5964fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_handle_info(NBDClient * client,Error ** errp)5974fa333e0SEric Blake nbd_negotiate_handle_info(NBDClient *client, Error **errp)
598f37708f6SEric Blake {
599f37708f6SEric Blake     int rc;
6009d7ab222SEric Blake     g_autofree char *name = NULL;
601f37708f6SEric Blake     NBDExport *exp;
602f37708f6SEric Blake     uint16_t requests;
603f37708f6SEric Blake     uint16_t request;
604bbc35fc2SChristian Borntraeger     uint32_t namelen = 0;
605f37708f6SEric Blake     bool sendname = false;
6060c1d50bdSEric Blake     bool blocksize = false;
6070c1d50bdSEric Blake     uint32_t sizes[3];
608f37708f6SEric Blake     char buf[sizeof(uint64_t) + sizeof(uint16_t)];
6096e280648SEric Blake     uint32_t check_align = 0;
610dbb38caaSEric Blake     uint16_t myflags;
611f37708f6SEric Blake 
612f37708f6SEric Blake     /* Client sends:
613f37708f6SEric Blake         4 bytes: L, name length (can be 0)
614f37708f6SEric Blake         L bytes: export name
615f37708f6SEric Blake         2 bytes: N, number of requests (can be 0)
616f37708f6SEric Blake         N * 2 bytes: N requests
617f37708f6SEric Blake     */
6189d7ab222SEric Blake     rc = nbd_opt_read_name(client, &name, &namelen, errp);
619894e0280SEric Blake     if (rc <= 0) {
620894e0280SEric Blake         return rc;
621f37708f6SEric Blake     }
622f37708f6SEric Blake     trace_nbd_negotiate_handle_export_name_request(name);
623f37708f6SEric Blake 
624d1e2c3e7SEric Blake     rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
625894e0280SEric Blake     if (rc <= 0) {
626894e0280SEric Blake         return rc;
627f37708f6SEric Blake     }
62880c7c2b0SPeter Maydell     requests = be16_to_cpu(requests);
629f37708f6SEric Blake     trace_nbd_negotiate_handle_info_requests(requests);
630f37708f6SEric Blake     while (requests--) {
631d1e2c3e7SEric Blake         rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
632894e0280SEric Blake         if (rc <= 0) {
633894e0280SEric Blake             return rc;
634f37708f6SEric Blake         }
63580c7c2b0SPeter Maydell         request = be16_to_cpu(request);
636f37708f6SEric Blake         trace_nbd_negotiate_handle_info_request(request,
637f37708f6SEric Blake                                                 nbd_info_lookup(request));
6380c1d50bdSEric Blake         /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE;
6390c1d50bdSEric Blake          * everything else is either a request we don't know or
6400c1d50bdSEric Blake          * something we send regardless of request */
6410c1d50bdSEric Blake         switch (request) {
6420c1d50bdSEric Blake         case NBD_INFO_NAME:
643f37708f6SEric Blake             sendname = true;
6440c1d50bdSEric Blake             break;
6450c1d50bdSEric Blake         case NBD_INFO_BLOCK_SIZE:
6460c1d50bdSEric Blake             blocksize = true;
6470c1d50bdSEric Blake             break;
648f37708f6SEric Blake         }
649f37708f6SEric Blake     }
650894e0280SEric Blake     if (client->optlen) {
651894e0280SEric Blake         return nbd_reject_length(client, false, errp);
652894e0280SEric Blake     }
653f37708f6SEric Blake 
654f37708f6SEric Blake     exp = nbd_export_find(name);
655f37708f6SEric Blake     if (!exp) {
6565c4fe018SEric Blake         g_autofree char *sane_name = nbd_sanitize_name(name);
6575c4fe018SEric Blake 
6580cfae925SVladimir Sementsov-Ogievskiy         return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
6590cfae925SVladimir Sementsov-Ogievskiy                                           errp, "export '%s' not present",
6605c4fe018SEric Blake                                           sane_name);
661f37708f6SEric Blake     }
662fd358d83SEric Blake     if (client->opt == NBD_OPT_GO) {
663fd358d83SEric Blake         nbd_check_meta_export(client, exp);
664fd358d83SEric Blake     }
665f37708f6SEric Blake 
666f37708f6SEric Blake     /* Don't bother sending NBD_INFO_NAME unless client requested it */
667f37708f6SEric Blake     if (sendname) {
6680cfae925SVladimir Sementsov-Ogievskiy         rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
669f37708f6SEric Blake                                      errp);
670f37708f6SEric Blake         if (rc < 0) {
671f37708f6SEric Blake             return rc;
672f37708f6SEric Blake         }
673f37708f6SEric Blake     }
674f37708f6SEric Blake 
675f37708f6SEric Blake     /* Send NBD_INFO_DESCRIPTION only if available, regardless of
676f37708f6SEric Blake      * client request */
677f37708f6SEric Blake     if (exp->description) {
678f37708f6SEric Blake         size_t len = strlen(exp->description);
679f37708f6SEric Blake 
68093676c88SEric Blake         assert(len <= NBD_MAX_STRING_SIZE);
6810cfae925SVladimir Sementsov-Ogievskiy         rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
682f37708f6SEric Blake                                      len, exp->description, errp);
683f37708f6SEric Blake         if (rc < 0) {
684f37708f6SEric Blake             return rc;
685f37708f6SEric Blake         }
686f37708f6SEric Blake     }
687f37708f6SEric Blake 
6880c1d50bdSEric Blake     /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
6890c1d50bdSEric Blake      * according to whether the client requested it, and according to
6900c1d50bdSEric Blake      * whether this is OPT_INFO or OPT_GO. */
691b0245d64SEric Blake     /* minimum - 1 for back-compat, or actual if client will obey it. */
692b0245d64SEric Blake     if (client->opt == NBD_OPT_INFO || blocksize) {
69337a4f70cSKevin Wolf         check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
694b0245d64SEric Blake     } else {
695b0245d64SEric Blake         sizes[0] = 1;
696b0245d64SEric Blake     }
697b0245d64SEric Blake     assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
6980c1d50bdSEric Blake     /* preferred - Hard-code to 4096 for now.
6990c1d50bdSEric Blake      * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
700b0245d64SEric Blake     sizes[1] = MAX(4096, sizes[0]);
7010c1d50bdSEric Blake     /* maximum - At most 32M, but smaller as appropriate. */
70237a4f70cSKevin Wolf     sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
7030c1d50bdSEric Blake     trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
70480c7c2b0SPeter Maydell     sizes[0] = cpu_to_be32(sizes[0]);
70580c7c2b0SPeter Maydell     sizes[1] = cpu_to_be32(sizes[1]);
70680c7c2b0SPeter Maydell     sizes[2] = cpu_to_be32(sizes[2]);
7070cfae925SVladimir Sementsov-Ogievskiy     rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
7080c1d50bdSEric Blake                                  sizeof(sizes), sizes, errp);
7090c1d50bdSEric Blake     if (rc < 0) {
7100c1d50bdSEric Blake         return rc;
7110c1d50bdSEric Blake     }
7120c1d50bdSEric Blake 
713f37708f6SEric Blake     /* Send NBD_INFO_EXPORT always */
714dbb38caaSEric Blake     myflags = exp->nbdflags;
715ac132d05SEric Blake     if (client->mode >= NBD_MODE_STRUCTURED) {
716dbb38caaSEric Blake         myflags |= NBD_FLAG_SEND_DF;
717dbb38caaSEric Blake     }
7182dcbb11bSEric Blake     if (client->mode >= NBD_MODE_EXTENDED &&
7192dcbb11bSEric Blake         (client->contexts.count || client->opt == NBD_OPT_INFO)) {
7202dcbb11bSEric Blake         myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
7212dcbb11bSEric Blake     }
722dbb38caaSEric Blake     trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
723f37708f6SEric Blake     stq_be_p(buf, exp->size);
724dbb38caaSEric Blake     stw_be_p(buf + 8, myflags);
7250cfae925SVladimir Sementsov-Ogievskiy     rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
726f37708f6SEric Blake                                  sizeof(buf), buf, errp);
727f37708f6SEric Blake     if (rc < 0) {
728f37708f6SEric Blake         return rc;
729f37708f6SEric Blake     }
730f37708f6SEric Blake 
731099fbcd6SEric Blake     /*
732099fbcd6SEric Blake      * If the client is just asking for NBD_OPT_INFO, but forgot to
733099fbcd6SEric Blake      * request block sizes in a situation that would impact
734099fbcd6SEric Blake      * performance, then return an error. But for NBD_OPT_GO, we
735099fbcd6SEric Blake      * tolerate all clients, regardless of alignments.
736099fbcd6SEric Blake      */
737099fbcd6SEric Blake     if (client->opt == NBD_OPT_INFO && !blocksize &&
73837a4f70cSKevin Wolf         blk_get_request_alignment(exp->common.blk) > 1) {
7390cfae925SVladimir Sementsov-Ogievskiy         return nbd_negotiate_send_rep_err(client,
7400cfae925SVladimir Sementsov-Ogievskiy                                           NBD_REP_ERR_BLOCK_SIZE_REQD,
7410c1d50bdSEric Blake                                           errp,
7420c1d50bdSEric Blake                                           "request NBD_INFO_BLOCK_SIZE to "
7430c1d50bdSEric Blake                                           "use this export");
7440c1d50bdSEric Blake     }
7450c1d50bdSEric Blake 
746f37708f6SEric Blake     /* Final reply */
7470cfae925SVladimir Sementsov-Ogievskiy     rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
748f37708f6SEric Blake     if (rc < 0) {
749f37708f6SEric Blake         return rc;
750f37708f6SEric Blake     }
751f37708f6SEric Blake 
7520cfae925SVladimir Sementsov-Ogievskiy     if (client->opt == NBD_OPT_GO) {
753f37708f6SEric Blake         client->exp = exp;
7546e280648SEric Blake         client->check_align = check_align;
755f37708f6SEric Blake         QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
756c69de1beSKevin Wolf         blk_exp_ref(&client->exp->common);
757f37708f6SEric Blake         rc = 1;
758f37708f6SEric Blake     }
759f37708f6SEric Blake     return rc;
760f37708f6SEric Blake }
761f37708f6SEric Blake 
762ae6d91a7SZhu Yangyang /* Callback to learn when QIO TLS upgrade is complete */
763ae6d91a7SZhu Yangyang struct NBDTLSServerHandshakeData {
764ae6d91a7SZhu Yangyang     bool complete;
765ae6d91a7SZhu Yangyang     Error *error;
766ae6d91a7SZhu Yangyang     Coroutine *co;
767ae6d91a7SZhu Yangyang };
768ae6d91a7SZhu Yangyang 
7694fa333e0SEric Blake static void
nbd_server_tls_handshake(QIOTask * task,void * opaque)7704fa333e0SEric Blake nbd_server_tls_handshake(QIOTask *task, void *opaque)
771ae6d91a7SZhu Yangyang {
772ae6d91a7SZhu Yangyang     struct NBDTLSServerHandshakeData *data = opaque;
773ae6d91a7SZhu Yangyang 
774ae6d91a7SZhu Yangyang     qio_task_propagate_error(task, &data->error);
775ae6d91a7SZhu Yangyang     data->complete = true;
776ae6d91a7SZhu Yangyang     if (!qemu_coroutine_entered(data->co)) {
777ae6d91a7SZhu Yangyang         aio_co_wake(data->co);
778ae6d91a7SZhu Yangyang     }
779ae6d91a7SZhu Yangyang }
780f37708f6SEric Blake 
78136683283SEric Blake /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
78236683283SEric Blake  * new channel for all further (now-encrypted) communication. */
7834fa333e0SEric Blake static coroutine_fn QIOChannel *
nbd_negotiate_handle_starttls(NBDClient * client,Error ** errp)7844fa333e0SEric Blake nbd_negotiate_handle_starttls(NBDClient *client, Error **errp)
785f95910feSDaniel P. Berrange {
786f95910feSDaniel P. Berrange     QIOChannel *ioc;
787f95910feSDaniel P. Berrange     QIOChannelTLS *tioc;
788ae6d91a7SZhu Yangyang     struct NBDTLSServerHandshakeData data = { 0 };
789f95910feSDaniel P. Berrange 
7900cfae925SVladimir Sementsov-Ogievskiy     assert(client->opt == NBD_OPT_STARTTLS);
7910cfae925SVladimir Sementsov-Ogievskiy 
7929588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_handle_starttls();
793f95910feSDaniel P. Berrange     ioc = client->ioc;
794f95910feSDaniel P. Berrange 
7950cfae925SVladimir Sementsov-Ogievskiy     if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
79663d5ef86SEric Blake         return NULL;
79763d5ef86SEric Blake     }
798f95910feSDaniel P. Berrange 
799f95910feSDaniel P. Berrange     tioc = qio_channel_tls_new_server(ioc,
800f95910feSDaniel P. Berrange                                       client->tlscreds,
801b25e12daSDaniel P. Berrange                                       client->tlsauthz,
8022fd2c840SVladimir Sementsov-Ogievskiy                                       errp);
803f95910feSDaniel P. Berrange     if (!tioc) {
804f95910feSDaniel P. Berrange         return NULL;
805f95910feSDaniel P. Berrange     }
806f95910feSDaniel P. Berrange 
8070d73f725SDaniel P. Berrange     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
8089588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_handle_starttls_handshake();
809ae6d91a7SZhu Yangyang     data.co = qemu_coroutine_self();
810f95910feSDaniel P. Berrange     qio_channel_tls_handshake(tioc,
811ae6d91a7SZhu Yangyang                               nbd_server_tls_handshake,
812f95910feSDaniel P. Berrange                               &data,
8131939ccdaSPeter Xu                               NULL,
814f95910feSDaniel P. Berrange                               NULL);
815f95910feSDaniel P. Berrange 
816f95910feSDaniel P. Berrange     if (!data.complete) {
817ae6d91a7SZhu Yangyang         qemu_coroutine_yield();
818ae6d91a7SZhu Yangyang         assert(data.complete);
819f95910feSDaniel P. Berrange     }
820ae6d91a7SZhu Yangyang 
821f95910feSDaniel P. Berrange     if (data.error) {
822f95910feSDaniel P. Berrange         object_unref(OBJECT(tioc));
8232fd2c840SVladimir Sementsov-Ogievskiy         error_propagate(errp, data.error);
824f95910feSDaniel P. Berrange         return NULL;
825f95910feSDaniel P. Berrange     }
826f95910feSDaniel P. Berrange 
827f95910feSDaniel P. Berrange     return QIO_CHANNEL(tioc);
828f95910feSDaniel P. Berrange }
829f95910feSDaniel P. Berrange 
830e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_negotiate_send_meta_context
831e7b1948dSVladimir Sementsov-Ogievskiy  *
832e7b1948dSVladimir Sementsov-Ogievskiy  * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT
833e7b1948dSVladimir Sementsov-Ogievskiy  *
834e7b1948dSVladimir Sementsov-Ogievskiy  * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead.
835e7b1948dSVladimir Sementsov-Ogievskiy  */
8364fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_meta_context(NBDClient * client,const char * context,uint32_t context_id,Error ** errp)8374fa333e0SEric Blake nbd_negotiate_send_meta_context(NBDClient *client, const char *context,
8384fa333e0SEric Blake                                 uint32_t context_id, Error **errp)
839e7b1948dSVladimir Sementsov-Ogievskiy {
840e7b1948dSVladimir Sementsov-Ogievskiy     NBDOptionReplyMetaContext opt;
841e7b1948dSVladimir Sementsov-Ogievskiy     struct iovec iov[] = {
842e7b1948dSVladimir Sementsov-Ogievskiy         {.iov_base = &opt, .iov_len = sizeof(opt)},
843e7b1948dSVladimir Sementsov-Ogievskiy         {.iov_base = (void *)context, .iov_len = strlen(context)}
844e7b1948dSVladimir Sementsov-Ogievskiy     };
845e7b1948dSVladimir Sementsov-Ogievskiy 
84693676c88SEric Blake     assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
847e7b1948dSVladimir Sementsov-Ogievskiy     if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
848e7b1948dSVladimir Sementsov-Ogievskiy         context_id = 0;
849e7b1948dSVladimir Sementsov-Ogievskiy     }
850e7b1948dSVladimir Sementsov-Ogievskiy 
8512b53af25SEric Blake     trace_nbd_negotiate_meta_query_reply(context, context_id);
852e7b1948dSVladimir Sementsov-Ogievskiy     set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
853e7b1948dSVladimir Sementsov-Ogievskiy                       sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
854e7b1948dSVladimir Sementsov-Ogievskiy     stl_be_p(&opt.context_id, context_id);
855e7b1948dSVladimir Sementsov-Ogievskiy 
856e7b1948dSVladimir Sementsov-Ogievskiy     return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
857e7b1948dSVladimir Sementsov-Ogievskiy }
858e7b1948dSVladimir Sementsov-Ogievskiy 
859ebd57062SEric Blake /*
860ebd57062SEric Blake  * Return true if @query matches @pattern, or if @query is empty when
861ebd57062SEric Blake  * the @client is performing _LIST_.
862b0769d8fSVladimir Sementsov-Ogievskiy  */
8634fa333e0SEric Blake static coroutine_fn bool
nbd_meta_empty_or_pattern(NBDClient * client,const char * pattern,const char * query)8644fa333e0SEric Blake nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
865ebd57062SEric Blake                           const char *query)
866b0769d8fSVladimir Sementsov-Ogievskiy {
867ebd57062SEric Blake     if (!*query) {
868ebd57062SEric Blake         trace_nbd_negotiate_meta_query_parse("empty");
869ebd57062SEric Blake         return client->opt == NBD_OPT_LIST_META_CONTEXT;
870b0769d8fSVladimir Sementsov-Ogievskiy     }
871ebd57062SEric Blake     if (strcmp(query, pattern) == 0) {
872b0769d8fSVladimir Sementsov-Ogievskiy         trace_nbd_negotiate_meta_query_parse(pattern);
873ebd57062SEric Blake         return true;
874b0769d8fSVladimir Sementsov-Ogievskiy     }
875ebd57062SEric Blake     trace_nbd_negotiate_meta_query_skip("pattern not matched");
876ebd57062SEric Blake     return false;
877b0769d8fSVladimir Sementsov-Ogievskiy }
878b0769d8fSVladimir Sementsov-Ogievskiy 
879b0769d8fSVladimir Sementsov-Ogievskiy /*
880ebd57062SEric Blake  * Return true and adjust @str in place if it begins with @prefix.
881b0769d8fSVladimir Sementsov-Ogievskiy  */
8824fa333e0SEric Blake static coroutine_fn bool
nbd_strshift(const char ** str,const char * prefix)8834fa333e0SEric Blake nbd_strshift(const char **str, const char *prefix)
884b0769d8fSVladimir Sementsov-Ogievskiy {
885ebd57062SEric Blake     size_t len = strlen(prefix);
886b0769d8fSVladimir Sementsov-Ogievskiy 
887ebd57062SEric Blake     if (strncmp(*str, prefix, len) == 0) {
888ebd57062SEric Blake         *str += len;
889ebd57062SEric Blake         return true;
890b0769d8fSVladimir Sementsov-Ogievskiy     }
891ebd57062SEric Blake     return false;
892b0769d8fSVladimir Sementsov-Ogievskiy }
893b0769d8fSVladimir Sementsov-Ogievskiy 
894e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_meta_base_query
895e7b1948dSVladimir Sementsov-Ogievskiy  *
896dbb8b396SVladimir Sementsov-Ogievskiy  * Handle queries to 'base' namespace. For now, only the base:allocation
897ebd57062SEric Blake  * context is available.  Return true if @query has been handled.
898dbb8b396SVladimir Sementsov-Ogievskiy  */
8994fa333e0SEric Blake static coroutine_fn bool
nbd_meta_base_query(NBDClient * client,NBDMetaContexts * meta,const char * query)9004fa333e0SEric Blake nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta,
901ebd57062SEric Blake                     const char *query)
902e7b1948dSVladimir Sementsov-Ogievskiy {
903ebd57062SEric Blake     if (!nbd_strshift(&query, "base:")) {
904ebd57062SEric Blake         return false;
905ebd57062SEric Blake     }
906ebd57062SEric Blake     trace_nbd_negotiate_meta_query_parse("base:");
907ebd57062SEric Blake 
908ebd57062SEric Blake     if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
909ebd57062SEric Blake         meta->base_allocation = true;
910ebd57062SEric Blake     }
911ebd57062SEric Blake     return true;
912e7b1948dSVladimir Sementsov-Ogievskiy }
913e7b1948dSVladimir Sementsov-Ogievskiy 
914ebd57062SEric Blake /* nbd_meta_qemu_query
9153d068affSVladimir Sementsov-Ogievskiy  *
916ebd57062SEric Blake  * Handle queries to 'qemu' namespace. For now, only the qemu:dirty-bitmap:
91771719cd5SEric Blake  * and qemu:allocation-depth contexts are available.  Return true if @query
91871719cd5SEric Blake  * has been handled.
919ebd57062SEric Blake  */
9204fa333e0SEric Blake static coroutine_fn bool
nbd_meta_qemu_query(NBDClient * client,NBDMetaContexts * meta,const char * query)9214fa333e0SEric Blake nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta,
922ebd57062SEric Blake                     const char *query)
9233d068affSVladimir Sementsov-Ogievskiy {
9243b1f244cSEric Blake     size_t i;
9253b1f244cSEric Blake 
926ebd57062SEric Blake     if (!nbd_strshift(&query, "qemu:")) {
927ebd57062SEric Blake         return false;
9283d068affSVladimir Sementsov-Ogievskiy     }
929ebd57062SEric Blake     trace_nbd_negotiate_meta_query_parse("qemu:");
9303d068affSVladimir Sementsov-Ogievskiy 
931ebd57062SEric Blake     if (!*query) {
9323d068affSVladimir Sementsov-Ogievskiy         if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
93371719cd5SEric Blake             meta->allocation_depth = meta->exp->allocation_depth;
93476df2b8dSEric Blake             if (meta->exp->nr_export_bitmaps) {
9353b1f244cSEric Blake                 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
9363d068affSVladimir Sementsov-Ogievskiy             }
93776df2b8dSEric Blake         }
9383d068affSVladimir Sementsov-Ogievskiy         trace_nbd_negotiate_meta_query_parse("empty");
939ebd57062SEric Blake         return true;
9403d068affSVladimir Sementsov-Ogievskiy     }
9413d068affSVladimir Sementsov-Ogievskiy 
94271719cd5SEric Blake     if (strcmp(query, "allocation-depth") == 0) {
94371719cd5SEric Blake         trace_nbd_negotiate_meta_query_parse("allocation-depth");
94471719cd5SEric Blake         meta->allocation_depth = meta->exp->allocation_depth;
94571719cd5SEric Blake         return true;
94671719cd5SEric Blake     }
94771719cd5SEric Blake 
948ebd57062SEric Blake     if (nbd_strshift(&query, "dirty-bitmap:")) {
9493d068affSVladimir Sementsov-Ogievskiy         trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
9503b1f244cSEric Blake         if (!*query) {
95176df2b8dSEric Blake             if (client->opt == NBD_OPT_LIST_META_CONTEXT &&
95276df2b8dSEric Blake                 meta->exp->nr_export_bitmaps) {
9533b1f244cSEric Blake                 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
9543b1f244cSEric Blake             }
9553b1f244cSEric Blake             trace_nbd_negotiate_meta_query_parse("empty");
956ebd57062SEric Blake             return true;
957ebd57062SEric Blake         }
9583b1f244cSEric Blake 
9593b1f244cSEric Blake         for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
9603b1f244cSEric Blake             const char *bm_name;
9613b1f244cSEric Blake 
9623b1f244cSEric Blake             bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
9633b1f244cSEric Blake             if (strcmp(bm_name, query) == 0) {
9643b1f244cSEric Blake                 meta->bitmaps[i] = true;
9653b1f244cSEric Blake                 trace_nbd_negotiate_meta_query_parse(query);
9663b1f244cSEric Blake                 return true;
967ebd57062SEric Blake             }
9683b1f244cSEric Blake         }
9693b1f244cSEric Blake         trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
970ebd57062SEric Blake         return true;
971ebd57062SEric Blake     }
9723d068affSVladimir Sementsov-Ogievskiy 
97371719cd5SEric Blake     trace_nbd_negotiate_meta_query_skip("unknown qemu context");
974ebd57062SEric Blake     return true;
9753d068affSVladimir Sementsov-Ogievskiy }
9763d068affSVladimir Sementsov-Ogievskiy 
977e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_negotiate_meta_query
978e7b1948dSVladimir Sementsov-Ogievskiy  *
979e7b1948dSVladimir Sementsov-Ogievskiy  * Parse namespace name and call corresponding function to parse body of the
980e7b1948dSVladimir Sementsov-Ogievskiy  * query.
981e7b1948dSVladimir Sementsov-Ogievskiy  *
98293676c88SEric Blake  * The only supported namespaces are 'base' and 'qemu'.
983e7b1948dSVladimir Sementsov-Ogievskiy  *
984e7b1948dSVladimir Sementsov-Ogievskiy  * Return -errno on I/O error, 0 if option was completely handled by
985e7b1948dSVladimir Sementsov-Ogievskiy  * sending a reply about inconsistent lengths, or 1 on success. */
9864fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_meta_query(NBDClient * client,NBDMetaContexts * meta,Error ** errp)9874fa333e0SEric Blake nbd_negotiate_meta_query(NBDClient *client,
988fd358d83SEric Blake                          NBDMetaContexts *meta, Error **errp)
989e7b1948dSVladimir Sementsov-Ogievskiy {
990e7b1948dSVladimir Sementsov-Ogievskiy     int ret;
991ebd57062SEric Blake     g_autofree char *query = NULL;
992e7b1948dSVladimir Sementsov-Ogievskiy     uint32_t len;
993e7b1948dSVladimir Sementsov-Ogievskiy 
994d1e2c3e7SEric Blake     ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
995e7b1948dSVladimir Sementsov-Ogievskiy     if (ret <= 0) {
996e7b1948dSVladimir Sementsov-Ogievskiy         return ret;
997e7b1948dSVladimir Sementsov-Ogievskiy     }
99880c7c2b0SPeter Maydell     len = cpu_to_be32(len);
999e7b1948dSVladimir Sementsov-Ogievskiy 
100093676c88SEric Blake     if (len > NBD_MAX_STRING_SIZE) {
100193676c88SEric Blake         trace_nbd_negotiate_meta_query_skip("length too long");
100293676c88SEric Blake         return nbd_opt_skip(client, len, errp);
100393676c88SEric Blake     }
1004e7b1948dSVladimir Sementsov-Ogievskiy 
1005ebd57062SEric Blake     query = g_malloc(len + 1);
1006ebd57062SEric Blake     ret = nbd_opt_read(client, query, len, true, errp);
1007e7b1948dSVladimir Sementsov-Ogievskiy     if (ret <= 0) {
1008e7b1948dSVladimir Sementsov-Ogievskiy         return ret;
1009e7b1948dSVladimir Sementsov-Ogievskiy     }
1010ebd57062SEric Blake     query[len] = '\0';
1011e7b1948dSVladimir Sementsov-Ogievskiy 
1012ebd57062SEric Blake     if (nbd_meta_base_query(client, meta, query)) {
1013ebd57062SEric Blake         return 1;
1014ebd57062SEric Blake     }
1015ebd57062SEric Blake     if (nbd_meta_qemu_query(client, meta, query)) {
1016ebd57062SEric Blake         return 1;
10173d068affSVladimir Sementsov-Ogievskiy     }
10183d068affSVladimir Sementsov-Ogievskiy 
10193d068affSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_meta_query_skip("unknown namespace");
1020ebd57062SEric Blake     return 1;
1021e7b1948dSVladimir Sementsov-Ogievskiy }
1022e7b1948dSVladimir Sementsov-Ogievskiy 
1023e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_negotiate_meta_queries
1024e7b1948dSVladimir Sementsov-Ogievskiy  * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
1025e7b1948dSVladimir Sementsov-Ogievskiy  *
1026e7b1948dSVladimir Sementsov-Ogievskiy  * Return -errno on I/O error, or 0 if option was completely handled. */
10274fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_meta_queries(NBDClient * client,Error ** errp)10284fa333e0SEric Blake nbd_negotiate_meta_queries(NBDClient *client, Error **errp)
1029e7b1948dSVladimir Sementsov-Ogievskiy {
1030e7b1948dSVladimir Sementsov-Ogievskiy     int ret;
10319d7ab222SEric Blake     g_autofree char *export_name = NULL;
1032cd1675f8SRichard Henderson     /* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */
1033cd1675f8SRichard Henderson     g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
1034fd358d83SEric Blake     NBDMetaContexts local_meta = {0};
1035fd358d83SEric Blake     NBDMetaContexts *meta;
1036e7b1948dSVladimir Sementsov-Ogievskiy     uint32_t nb_queries;
10373b1f244cSEric Blake     size_t i;
103847ec485eSEric Blake     size_t count = 0;
1039e7b1948dSVladimir Sementsov-Ogievskiy 
1040ac132d05SEric Blake     if (client->opt == NBD_OPT_SET_META_CONTEXT &&
1041ac132d05SEric Blake         client->mode < NBD_MODE_STRUCTURED) {
1042e7b1948dSVladimir Sementsov-Ogievskiy         return nbd_opt_invalid(client, errp,
1043e7b1948dSVladimir Sementsov-Ogievskiy                                "request option '%s' when structured reply "
1044e7b1948dSVladimir Sementsov-Ogievskiy                                "is not negotiated",
1045e7b1948dSVladimir Sementsov-Ogievskiy                                nbd_opt_lookup(client->opt));
1046e7b1948dSVladimir Sementsov-Ogievskiy     }
1047e7b1948dSVladimir Sementsov-Ogievskiy 
1048e7b1948dSVladimir Sementsov-Ogievskiy     if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
1049e7b1948dSVladimir Sementsov-Ogievskiy         /* Only change the caller's meta on SET. */
1050e7b1948dSVladimir Sementsov-Ogievskiy         meta = &local_meta;
1051fd358d83SEric Blake     } else {
1052fd358d83SEric Blake         meta = &client->contexts;
1053e7b1948dSVladimir Sementsov-Ogievskiy     }
1054e7b1948dSVladimir Sementsov-Ogievskiy 
10553b1f244cSEric Blake     g_free(meta->bitmaps);
1056e7b1948dSVladimir Sementsov-Ogievskiy     memset(meta, 0, sizeof(*meta));
1057e7b1948dSVladimir Sementsov-Ogievskiy 
10589d7ab222SEric Blake     ret = nbd_opt_read_name(client, &export_name, NULL, errp);
1059e7b1948dSVladimir Sementsov-Ogievskiy     if (ret <= 0) {
1060e7b1948dSVladimir Sementsov-Ogievskiy         return ret;
1061e7b1948dSVladimir Sementsov-Ogievskiy     }
1062e7b1948dSVladimir Sementsov-Ogievskiy 
1063af736e54SVladimir Sementsov-Ogievskiy     meta->exp = nbd_export_find(export_name);
1064af736e54SVladimir Sementsov-Ogievskiy     if (meta->exp == NULL) {
10655c4fe018SEric Blake         g_autofree char *sane_name = nbd_sanitize_name(export_name);
10665c4fe018SEric Blake 
1067e7b1948dSVladimir Sementsov-Ogievskiy         return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
10685c4fe018SEric Blake                             "export '%s' not present", sane_name);
1069e7b1948dSVladimir Sementsov-Ogievskiy     }
10703b1f244cSEric Blake     meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
10713b1f244cSEric Blake     if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
10723b1f244cSEric Blake         bitmaps = meta->bitmaps;
10733b1f244cSEric Blake     }
1074e7b1948dSVladimir Sementsov-Ogievskiy 
1075d1e2c3e7SEric Blake     ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
1076e7b1948dSVladimir Sementsov-Ogievskiy     if (ret <= 0) {
1077e7b1948dSVladimir Sementsov-Ogievskiy         return ret;
1078e7b1948dSVladimir Sementsov-Ogievskiy     }
107980c7c2b0SPeter Maydell     nb_queries = cpu_to_be32(nb_queries);
10802b53af25SEric Blake     trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
1081af736e54SVladimir Sementsov-Ogievskiy                                      export_name, nb_queries);
1082e7b1948dSVladimir Sementsov-Ogievskiy 
1083e7b1948dSVladimir Sementsov-Ogievskiy     if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
1084e7b1948dSVladimir Sementsov-Ogievskiy         /* enable all known contexts */
1085e7b1948dSVladimir Sementsov-Ogievskiy         meta->base_allocation = true;
108671719cd5SEric Blake         meta->allocation_depth = meta->exp->allocation_depth;
108776df2b8dSEric Blake         if (meta->exp->nr_export_bitmaps) {
10883b1f244cSEric Blake             memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
108976df2b8dSEric Blake         }
1090e7b1948dSVladimir Sementsov-Ogievskiy     } else {
1091e7b1948dSVladimir Sementsov-Ogievskiy         for (i = 0; i < nb_queries; ++i) {
1092e7b1948dSVladimir Sementsov-Ogievskiy             ret = nbd_negotiate_meta_query(client, meta, errp);
1093e7b1948dSVladimir Sementsov-Ogievskiy             if (ret <= 0) {
1094e7b1948dSVladimir Sementsov-Ogievskiy                 return ret;
1095e7b1948dSVladimir Sementsov-Ogievskiy             }
1096e7b1948dSVladimir Sementsov-Ogievskiy         }
1097e7b1948dSVladimir Sementsov-Ogievskiy     }
1098e7b1948dSVladimir Sementsov-Ogievskiy 
1099e7b1948dSVladimir Sementsov-Ogievskiy     if (meta->base_allocation) {
1100e7b1948dSVladimir Sementsov-Ogievskiy         ret = nbd_negotiate_send_meta_context(client, "base:allocation",
1101e7b1948dSVladimir Sementsov-Ogievskiy                                               NBD_META_ID_BASE_ALLOCATION,
1102e7b1948dSVladimir Sementsov-Ogievskiy                                               errp);
1103e7b1948dSVladimir Sementsov-Ogievskiy         if (ret < 0) {
1104e7b1948dSVladimir Sementsov-Ogievskiy             return ret;
1105e7b1948dSVladimir Sementsov-Ogievskiy         }
110647ec485eSEric Blake         count++;
1107e7b1948dSVladimir Sementsov-Ogievskiy     }
1108e7b1948dSVladimir Sementsov-Ogievskiy 
110971719cd5SEric Blake     if (meta->allocation_depth) {
111071719cd5SEric Blake         ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
111171719cd5SEric Blake                                               NBD_META_ID_ALLOCATION_DEPTH,
111271719cd5SEric Blake                                               errp);
111371719cd5SEric Blake         if (ret < 0) {
111471719cd5SEric Blake             return ret;
111571719cd5SEric Blake         }
111671719cd5SEric Blake         count++;
111771719cd5SEric Blake     }
111871719cd5SEric Blake 
11193b1f244cSEric Blake     for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
11203b1f244cSEric Blake         const char *bm_name;
11213b1f244cSEric Blake         g_autofree char *context = NULL;
11223b1f244cSEric Blake 
11233b1f244cSEric Blake         if (!meta->bitmaps[i]) {
11243b1f244cSEric Blake             continue;
11253b1f244cSEric Blake         }
11263b1f244cSEric Blake 
11273b1f244cSEric Blake         bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
11283b1f244cSEric Blake         context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
112902e87e3bSEric Blake 
113002e87e3bSEric Blake         ret = nbd_negotiate_send_meta_context(client, context,
11313b1f244cSEric Blake                                               NBD_META_ID_DIRTY_BITMAP + i,
11323d068affSVladimir Sementsov-Ogievskiy                                               errp);
11333d068affSVladimir Sementsov-Ogievskiy         if (ret < 0) {
11343d068affSVladimir Sementsov-Ogievskiy             return ret;
11353d068affSVladimir Sementsov-Ogievskiy         }
113647ec485eSEric Blake         count++;
11373d068affSVladimir Sementsov-Ogievskiy     }
11383d068affSVladimir Sementsov-Ogievskiy 
1139e7b1948dSVladimir Sementsov-Ogievskiy     ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1140e7b1948dSVladimir Sementsov-Ogievskiy     if (ret == 0) {
114147ec485eSEric Blake         meta->count = count;
1142e7b1948dSVladimir Sementsov-Ogievskiy     }
1143e7b1948dSVladimir Sementsov-Ogievskiy 
1144e7b1948dSVladimir Sementsov-Ogievskiy     return ret;
1145e7b1948dSVladimir Sementsov-Ogievskiy }
1146e7b1948dSVladimir Sementsov-Ogievskiy 
11471e120ffeSVladimir Sementsov-Ogievskiy /* nbd_negotiate_options
1148f37708f6SEric Blake  * Process all NBD_OPT_* client option commands, during fixed newstyle
1149f37708f6SEric Blake  * negotiation.
11501e120ffeSVladimir Sementsov-Ogievskiy  * Return:
11512fd2c840SVladimir Sementsov-Ogievskiy  * -errno  on error, errp is set
11522fd2c840SVladimir Sementsov-Ogievskiy  * 0       on successful negotiation, errp is not set
1153*efd3dda3SEric Blake  * 1       if client sent NBD_OPT_ABORT (i.e. on valid disconnect) or never
1154*efd3dda3SEric Blake  *         wrote anything (i.e. port probe); errp is not set
11551e120ffeSVladimir Sementsov-Ogievskiy  */
11564fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_options(NBDClient * client,Error ** errp)11574fa333e0SEric Blake nbd_negotiate_options(NBDClient *client, Error **errp)
1158798bfe00SFam Zheng {
1159798bfe00SFam Zheng     uint32_t flags;
116026afa868SDaniel P. Berrange     bool fixedNewstyle = false;
116123e099c3SEric Blake     bool no_zeroes = false;
1162798bfe00SFam Zheng 
1163798bfe00SFam Zheng     /* Client sends:
1164798bfe00SFam Zheng         [ 0 ..   3]   client flags
1165798bfe00SFam Zheng 
1166f37708f6SEric Blake        Then we loop until NBD_OPT_EXPORT_NAME or NBD_OPT_GO:
1167798bfe00SFam Zheng         [ 0 ..   7]   NBD_OPTS_MAGIC
1168798bfe00SFam Zheng         [ 8 ..  11]   NBD option
1169798bfe00SFam Zheng         [12 ..  15]   Data length
1170798bfe00SFam Zheng         ...           Rest of request
1171798bfe00SFam Zheng 
1172798bfe00SFam Zheng         [ 0 ..   7]   NBD_OPTS_MAGIC
1173798bfe00SFam Zheng         [ 8 ..  11]   Second NBD option
1174798bfe00SFam Zheng         [12 ..  15]   Data length
1175798bfe00SFam Zheng         ...           Rest of request
1176798bfe00SFam Zheng     */
1177798bfe00SFam Zheng 
1178*efd3dda3SEric Blake     /*
1179*efd3dda3SEric Blake      * Intentionally ignore errors on this first read - we do not want
1180*efd3dda3SEric Blake      * to be noisy about a mere port probe, but only for clients that
1181*efd3dda3SEric Blake      * start talking the protocol and then quit abruptly.
1182*efd3dda3SEric Blake      */
1183*efd3dda3SEric Blake     if (nbd_read32(client->ioc, &flags, "flags", NULL) < 0) {
1184*efd3dda3SEric Blake         return 1;
1185798bfe00SFam Zheng     }
1186ac132d05SEric Blake     client->mode = NBD_MODE_EXPORT_NAME;
1187621c4f4eSEric Blake     trace_nbd_negotiate_options_flags(flags);
118826afa868SDaniel P. Berrange     if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
118926afa868SDaniel P. Berrange         fixedNewstyle = true;
119026afa868SDaniel P. Berrange         flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
1191ac132d05SEric Blake         client->mode = NBD_MODE_SIMPLE;
119226afa868SDaniel P. Berrange     }
1193c203c59aSEric Blake     if (flags & NBD_FLAG_C_NO_ZEROES) {
119423e099c3SEric Blake         no_zeroes = true;
1195c203c59aSEric Blake         flags &= ~NBD_FLAG_C_NO_ZEROES;
1196c203c59aSEric Blake     }
119726afa868SDaniel P. Berrange     if (flags != 0) {
11982fd2c840SVladimir Sementsov-Ogievskiy         error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
1199621c4f4eSEric Blake         return -EINVAL;
1200798bfe00SFam Zheng     }
1201798bfe00SFam Zheng 
1202798bfe00SFam Zheng     while (1) {
1203798bfe00SFam Zheng         int ret;
12047f9039cdSVladimir Sementsov-Ogievskiy         uint32_t option, length;
1205798bfe00SFam Zheng         uint64_t magic;
1206798bfe00SFam Zheng 
1207e6798f06SVladimir Sementsov-Ogievskiy         if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
1208798bfe00SFam Zheng             return -EINVAL;
1209798bfe00SFam Zheng         }
12109588463eSVladimir Sementsov-Ogievskiy         trace_nbd_negotiate_options_check_magic(magic);
12119588463eSVladimir Sementsov-Ogievskiy         if (magic != NBD_OPTS_MAGIC) {
12122fd2c840SVladimir Sementsov-Ogievskiy             error_setg(errp, "Bad magic received");
1213798bfe00SFam Zheng             return -EINVAL;
1214798bfe00SFam Zheng         }
1215798bfe00SFam Zheng 
1216e6798f06SVladimir Sementsov-Ogievskiy         if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
1217798bfe00SFam Zheng             return -EINVAL;
1218798bfe00SFam Zheng         }
12190cfae925SVladimir Sementsov-Ogievskiy         client->opt = option;
1220798bfe00SFam Zheng 
1221e6798f06SVladimir Sementsov-Ogievskiy         if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
1222798bfe00SFam Zheng             return -EINVAL;
1223798bfe00SFam Zheng         }
1224894e0280SEric Blake         assert(!client->optlen);
12250cfae925SVladimir Sementsov-Ogievskiy         client->optlen = length;
1226798bfe00SFam Zheng 
1227fdad35efSEric Blake         if (length > NBD_MAX_BUFFER_SIZE) {
1228fdad35efSEric Blake             error_setg(errp, "len (%" PRIu32 ") is larger than max len (%u)",
1229fdad35efSEric Blake                        length, NBD_MAX_BUFFER_SIZE);
1230fdad35efSEric Blake             return -EINVAL;
1231fdad35efSEric Blake         }
1232fdad35efSEric Blake 
12333736cc5bSEric Blake         trace_nbd_negotiate_options_check_option(option,
12343736cc5bSEric Blake                                                  nbd_opt_lookup(option));
1235f95910feSDaniel P. Berrange         if (client->tlscreds &&
1236f95910feSDaniel P. Berrange             client->ioc == (QIOChannel *)client->sioc) {
1237f95910feSDaniel P. Berrange             QIOChannel *tioc;
1238f95910feSDaniel P. Berrange             if (!fixedNewstyle) {
12397f9039cdSVladimir Sementsov-Ogievskiy                 error_setg(errp, "Unsupported option 0x%" PRIx32, option);
1240f95910feSDaniel P. Berrange                 return -EINVAL;
1241f95910feSDaniel P. Berrange             }
12427f9039cdSVladimir Sementsov-Ogievskiy             switch (option) {
1243f95910feSDaniel P. Berrange             case NBD_OPT_STARTTLS:
1244e68c35cfSEric Blake                 if (length) {
1245e68c35cfSEric Blake                     /* Unconditionally drop the connection if the client
1246e68c35cfSEric Blake                      * can't start a TLS negotiation correctly */
12470cfae925SVladimir Sementsov-Ogievskiy                     return nbd_reject_length(client, true, errp);
1248e68c35cfSEric Blake                 }
1249e68c35cfSEric Blake                 tioc = nbd_negotiate_handle_starttls(client, errp);
1250f95910feSDaniel P. Berrange                 if (!tioc) {
1251f95910feSDaniel P. Berrange                     return -EIO;
1252f95910feSDaniel P. Berrange                 }
12538cbee49eSEric Blake                 ret = 0;
1254f95910feSDaniel P. Berrange                 object_unref(OBJECT(client->ioc));
12557d5b0d68SPhilippe Mathieu-Daudé                 client->ioc = tioc;
1256f95910feSDaniel P. Berrange                 break;
1257f95910feSDaniel P. Berrange 
1258d1129a8aSEric Blake             case NBD_OPT_EXPORT_NAME:
1259d1129a8aSEric Blake                 /* No way to return an error to client, so drop connection */
12602fd2c840SVladimir Sementsov-Ogievskiy                 error_setg(errp, "Option 0x%x not permitted before TLS",
12617f9039cdSVladimir Sementsov-Ogievskiy                            option);
1262d1129a8aSEric Blake                 return -EINVAL;
1263d1129a8aSEric Blake 
1264f95910feSDaniel P. Berrange             default:
12653e99ebb9SEric Blake                 /* Let the client keep trying, unless they asked to
12663e99ebb9SEric Blake                  * quit. Always try to give an error back to the
12673e99ebb9SEric Blake                  * client; but when replying to OPT_ABORT, be aware
12683e99ebb9SEric Blake                  * that the client may hang up before receiving the
12693e99ebb9SEric Blake                  * error, in which case we are fine ignoring the
12703e99ebb9SEric Blake                  * resulting EPIPE. */
12713e99ebb9SEric Blake                 ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
12723e99ebb9SEric Blake                                    option == NBD_OPT_ABORT ? NULL : errp,
127336683283SEric Blake                                    "Option 0x%" PRIx32
1274894e0280SEric Blake                                    " not permitted before TLS", option);
12757f9039cdSVladimir Sementsov-Ogievskiy                 if (option == NBD_OPT_ABORT) {
12761e120ffeSVladimir Sementsov-Ogievskiy                     return 1;
1277b6f5d3b5SEric Blake                 }
1278d1129a8aSEric Blake                 break;
1279f95910feSDaniel P. Berrange             }
1280f95910feSDaniel P. Berrange         } else if (fixedNewstyle) {
12817f9039cdSVladimir Sementsov-Ogievskiy             switch (option) {
1282798bfe00SFam Zheng             case NBD_OPT_LIST:
1283e68c35cfSEric Blake                 if (length) {
12840cfae925SVladimir Sementsov-Ogievskiy                     ret = nbd_reject_length(client, false, errp);
1285e68c35cfSEric Blake                 } else {
1286e68c35cfSEric Blake                     ret = nbd_negotiate_handle_list(client, errp);
1287e68c35cfSEric Blake                 }
1288798bfe00SFam Zheng                 break;
1289798bfe00SFam Zheng 
1290798bfe00SFam Zheng             case NBD_OPT_ABORT:
1291b6f5d3b5SEric Blake                 /* NBD spec says we must try to reply before
1292b6f5d3b5SEric Blake                  * disconnecting, but that we must also tolerate
1293b6f5d3b5SEric Blake                  * guests that don't wait for our reply. */
12940cfae925SVladimir Sementsov-Ogievskiy                 nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
12951e120ffeSVladimir Sementsov-Ogievskiy                 return 1;
1296798bfe00SFam Zheng 
1297798bfe00SFam Zheng             case NBD_OPT_EXPORT_NAME:
1298dbb38caaSEric Blake                 return nbd_negotiate_handle_export_name(client, no_zeroes,
129923e099c3SEric Blake                                                         errp);
1300798bfe00SFam Zheng 
1301f37708f6SEric Blake             case NBD_OPT_INFO:
1302f37708f6SEric Blake             case NBD_OPT_GO:
1303dbb38caaSEric Blake                 ret = nbd_negotiate_handle_info(client, errp);
1304f37708f6SEric Blake                 if (ret == 1) {
1305f37708f6SEric Blake                     assert(option == NBD_OPT_GO);
1306f37708f6SEric Blake                     return 0;
1307f37708f6SEric Blake                 }
1308f37708f6SEric Blake                 break;
1309f37708f6SEric Blake 
1310f95910feSDaniel P. Berrange             case NBD_OPT_STARTTLS:
1311e68c35cfSEric Blake                 if (length) {
13120cfae925SVladimir Sementsov-Ogievskiy                     ret = nbd_reject_length(client, false, errp);
1313e68c35cfSEric Blake                 } else if (client->tlscreds) {
13140cfae925SVladimir Sementsov-Ogievskiy                     ret = nbd_negotiate_send_rep_err(client,
13150cfae925SVladimir Sementsov-Ogievskiy                                                      NBD_REP_ERR_INVALID, errp,
131636683283SEric Blake                                                      "TLS already enabled");
1317f95910feSDaniel P. Berrange                 } else {
13180cfae925SVladimir Sementsov-Ogievskiy                     ret = nbd_negotiate_send_rep_err(client,
13190cfae925SVladimir Sementsov-Ogievskiy                                                      NBD_REP_ERR_POLICY, errp,
132036683283SEric Blake                                                      "TLS not configured");
1321f95910feSDaniel P. Berrange                 }
1322d1129a8aSEric Blake                 break;
13235c54e7faSVladimir Sementsov-Ogievskiy 
13245c54e7faSVladimir Sementsov-Ogievskiy             case NBD_OPT_STRUCTURED_REPLY:
13255c54e7faSVladimir Sementsov-Ogievskiy                 if (length) {
13260cfae925SVladimir Sementsov-Ogievskiy                     ret = nbd_reject_length(client, false, errp);
13279c1d2614SEric Blake                 } else if (client->mode >= NBD_MODE_EXTENDED) {
13289c1d2614SEric Blake                     ret = nbd_negotiate_send_rep_err(
13299c1d2614SEric Blake                         client, NBD_REP_ERR_EXT_HEADER_REQD, errp,
13309c1d2614SEric Blake                         "extended headers already negotiated");
1331ac132d05SEric Blake                 } else if (client->mode >= NBD_MODE_STRUCTURED) {
13325c54e7faSVladimir Sementsov-Ogievskiy                     ret = nbd_negotiate_send_rep_err(
13330cfae925SVladimir Sementsov-Ogievskiy                         client, NBD_REP_ERR_INVALID, errp,
13345c54e7faSVladimir Sementsov-Ogievskiy                         "structured reply already negotiated");
13355c54e7faSVladimir Sementsov-Ogievskiy                 } else {
13360cfae925SVladimir Sementsov-Ogievskiy                     ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1337ac132d05SEric Blake                     client->mode = NBD_MODE_STRUCTURED;
13385c54e7faSVladimir Sementsov-Ogievskiy                 }
13395c54e7faSVladimir Sementsov-Ogievskiy                 break;
13405c54e7faSVladimir Sementsov-Ogievskiy 
1341e7b1948dSVladimir Sementsov-Ogievskiy             case NBD_OPT_LIST_META_CONTEXT:
1342e7b1948dSVladimir Sementsov-Ogievskiy             case NBD_OPT_SET_META_CONTEXT:
1343fd358d83SEric Blake                 ret = nbd_negotiate_meta_queries(client, errp);
1344e7b1948dSVladimir Sementsov-Ogievskiy                 break;
1345e7b1948dSVladimir Sementsov-Ogievskiy 
13469c1d2614SEric Blake             case NBD_OPT_EXTENDED_HEADERS:
13479c1d2614SEric Blake                 if (length) {
13489c1d2614SEric Blake                     ret = nbd_reject_length(client, false, errp);
13499c1d2614SEric Blake                 } else if (client->mode >= NBD_MODE_EXTENDED) {
13509c1d2614SEric Blake                     ret = nbd_negotiate_send_rep_err(
13519c1d2614SEric Blake                         client, NBD_REP_ERR_INVALID, errp,
13529c1d2614SEric Blake                         "extended headers already negotiated");
13539c1d2614SEric Blake                 } else {
13549c1d2614SEric Blake                     ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
13559c1d2614SEric Blake                     client->mode = NBD_MODE_EXTENDED;
13569c1d2614SEric Blake                 }
13579c1d2614SEric Blake                 break;
13589c1d2614SEric Blake 
1359798bfe00SFam Zheng             default:
1360894e0280SEric Blake                 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
136128fb494fSVladimir Sementsov-Ogievskiy                                    "Unsupported option %" PRIu32 " (%s)",
1362894e0280SEric Blake                                    option, nbd_opt_lookup(option));
1363156f6a10SEric Blake                 break;
1364798bfe00SFam Zheng             }
136526afa868SDaniel P. Berrange         } else {
136626afa868SDaniel P. Berrange             /*
136726afa868SDaniel P. Berrange              * If broken new-style we should drop the connection
136826afa868SDaniel P. Berrange              * for anything except NBD_OPT_EXPORT_NAME
136926afa868SDaniel P. Berrange              */
13707f9039cdSVladimir Sementsov-Ogievskiy             switch (option) {
137126afa868SDaniel P. Berrange             case NBD_OPT_EXPORT_NAME:
1372dbb38caaSEric Blake                 return nbd_negotiate_handle_export_name(client, no_zeroes,
137323e099c3SEric Blake                                                         errp);
137426afa868SDaniel P. Berrange 
137526afa868SDaniel P. Berrange             default:
137628fb494fSVladimir Sementsov-Ogievskiy                 error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
13773736cc5bSEric Blake                            option, nbd_opt_lookup(option));
137826afa868SDaniel P. Berrange                 return -EINVAL;
137926afa868SDaniel P. Berrange             }
138026afa868SDaniel P. Berrange         }
13818cbee49eSEric Blake         if (ret < 0) {
13828cbee49eSEric Blake             return ret;
13838cbee49eSEric Blake         }
1384798bfe00SFam Zheng     }
1385798bfe00SFam Zheng }
1386798bfe00SFam Zheng 
13871e120ffeSVladimir Sementsov-Ogievskiy /* nbd_negotiate
13881e120ffeSVladimir Sementsov-Ogievskiy  * Return:
13892fd2c840SVladimir Sementsov-Ogievskiy  * -errno  on error, errp is set
13902fd2c840SVladimir Sementsov-Ogievskiy  * 0       on successful negotiation, errp is not set
1391*efd3dda3SEric Blake  * 1       if client sent NBD_OPT_ABORT (i.e. on valid disconnect) or never
1392*efd3dda3SEric Blake  *         wrote anything (i.e. port probe); errp is not set
13931e120ffeSVladimir Sementsov-Ogievskiy  */
nbd_negotiate(NBDClient * client,Error ** errp)13942fd2c840SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1395798bfe00SFam Zheng {
1396795d946dSVladimir Sementsov-Ogievskiy     ERRP_GUARD();
13975f66d060SEric Blake     char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
13982e5c9ad6SVladimir Sementsov-Ogievskiy     int ret;
1399798bfe00SFam Zheng 
14005f66d060SEric Blake     /* Old style negotiation header, no room for options
1401798bfe00SFam Zheng         [ 0 ..   7]   passwd       ("NBDMAGIC")
1402798bfe00SFam Zheng         [ 8 ..  15]   magic        (NBD_CLIENT_MAGIC)
1403798bfe00SFam Zheng         [16 ..  23]   size
14045f66d060SEric Blake         [24 ..  27]   export flags (zero-extended)
1405798bfe00SFam Zheng         [28 .. 151]   reserved     (0)
1406798bfe00SFam Zheng 
14075f66d060SEric Blake        New style negotiation header, client can send options
1408798bfe00SFam Zheng         [ 0 ..   7]   passwd       ("NBDMAGIC")
1409798bfe00SFam Zheng         [ 8 ..  15]   magic        (NBD_OPTS_MAGIC)
1410798bfe00SFam Zheng         [16 ..  17]   server flags (0)
1411f37708f6SEric Blake         ....options sent, ending in NBD_OPT_EXPORT_NAME or NBD_OPT_GO....
1412798bfe00SFam Zheng      */
1413798bfe00SFam Zheng 
14141c778ef7SDaniel P. Berrange     qio_channel_set_blocking(client->ioc, false, NULL);
141506e0f098SStefan Hajnoczi     qio_channel_set_follow_coroutine_ctx(client->ioc, true);
1416798bfe00SFam Zheng 
14179588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_begin();
1418798bfe00SFam Zheng     memcpy(buf, "NBDMAGIC", 8);
1419f95910feSDaniel P. Berrange 
142076ff081dSVladimir Sementsov-Ogievskiy     stq_be_p(buf + 8, NBD_OPTS_MAGIC);
142176ff081dSVladimir Sementsov-Ogievskiy     stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
142276ff081dSVladimir Sementsov-Ogievskiy 
1423*efd3dda3SEric Blake     /*
1424*efd3dda3SEric Blake      * Be silent about failure to write our greeting: there is nothing
1425*efd3dda3SEric Blake      * wrong with a client testing if our port is alive.
1426*efd3dda3SEric Blake      */
1427*efd3dda3SEric Blake     if (nbd_write(client->ioc, buf, 18, NULL) < 0) {
1428*efd3dda3SEric Blake         return 1;
1429798bfe00SFam Zheng     }
1430dbb38caaSEric Blake     ret = nbd_negotiate_options(client, errp);
14312e5c9ad6SVladimir Sementsov-Ogievskiy     if (ret != 0) {
14322fd2c840SVladimir Sementsov-Ogievskiy         if (ret < 0) {
14332fd2c840SVladimir Sementsov-Ogievskiy             error_prepend(errp, "option negotiation failed: ");
14342fd2c840SVladimir Sementsov-Ogievskiy         }
14352e5c9ad6SVladimir Sementsov-Ogievskiy         return ret;
1436798bfe00SFam Zheng     }
1437798bfe00SFam Zheng 
14380cfae925SVladimir Sementsov-Ogievskiy     assert(!client->optlen);
14399588463eSVladimir Sementsov-Ogievskiy     trace_nbd_negotiate_success();
1440d9faeed8SVladimir Sementsov-Ogievskiy 
1441d9faeed8SVladimir Sementsov-Ogievskiy     return 0;
1442798bfe00SFam Zheng }
1443798bfe00SFam Zheng 
1444f148ae7dSSergio Lopez /* nbd_read_eof
1445f148ae7dSSergio Lopez  * Tries to read @size bytes from @ioc. This is a local implementation of
1446f148ae7dSSergio Lopez  * qio_channel_readv_all_eof. We have it here because we need it to be
1447f148ae7dSSergio Lopez  * interruptible and to know when the coroutine is yielding.
1448f148ae7dSSergio Lopez  * Returns 1 on success
1449f148ae7dSSergio Lopez  *         0 on eof, when no data was read (errp is not set)
1450f148ae7dSSergio Lopez  *         negative errno on failure (errp is set)
1451f148ae7dSSergio Lopez  */
1452f148ae7dSSergio Lopez static inline int coroutine_fn
nbd_read_eof(NBDClient * client,void * buffer,size_t size,Error ** errp)1453f148ae7dSSergio Lopez nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
1454f148ae7dSSergio Lopez {
1455f148ae7dSSergio Lopez     bool partial = false;
1456f148ae7dSSergio Lopez 
1457f148ae7dSSergio Lopez     assert(size);
1458f148ae7dSSergio Lopez     while (size > 0) {
1459f148ae7dSSergio Lopez         struct iovec iov = { .iov_base = buffer, .iov_len = size };
1460f148ae7dSSergio Lopez         ssize_t len;
1461f148ae7dSSergio Lopez 
1462f148ae7dSSergio Lopez         len = qio_channel_readv(client->ioc, &iov, 1, errp);
1463f148ae7dSSergio Lopez         if (len == QIO_CHANNEL_ERR_BLOCK) {
14647075d235SStefan Hajnoczi             WITH_QEMU_LOCK_GUARD(&client->lock) {
1465f148ae7dSSergio Lopez                 client->read_yielding = true;
14667075d235SStefan Hajnoczi 
14677075d235SStefan Hajnoczi                 /* Prompt main loop thread to re-run nbd_drained_poll() */
14687075d235SStefan Hajnoczi                 aio_wait_kick();
14697075d235SStefan Hajnoczi             }
1470f148ae7dSSergio Lopez             qio_channel_yield(client->ioc, G_IO_IN);
14717075d235SStefan Hajnoczi             WITH_QEMU_LOCK_GUARD(&client->lock) {
1472f148ae7dSSergio Lopez                 client->read_yielding = false;
1473f148ae7dSSergio Lopez                 if (client->quiescing) {
1474f148ae7dSSergio Lopez                     return -EAGAIN;
1475f148ae7dSSergio Lopez                 }
14767075d235SStefan Hajnoczi             }
1477f148ae7dSSergio Lopez             continue;
1478f148ae7dSSergio Lopez         } else if (len < 0) {
1479f148ae7dSSergio Lopez             return -EIO;
1480f148ae7dSSergio Lopez         } else if (len == 0) {
1481f148ae7dSSergio Lopez             if (partial) {
1482f148ae7dSSergio Lopez                 error_setg(errp,
1483f148ae7dSSergio Lopez                            "Unexpected end-of-file before all bytes were read");
1484f148ae7dSSergio Lopez                 return -EIO;
1485f148ae7dSSergio Lopez             } else {
1486f148ae7dSSergio Lopez                 return 0;
1487f148ae7dSSergio Lopez             }
1488f148ae7dSSergio Lopez         }
1489f148ae7dSSergio Lopez 
1490f148ae7dSSergio Lopez         partial = true;
1491f148ae7dSSergio Lopez         size -= len;
1492f148ae7dSSergio Lopez         buffer = (uint8_t *) buffer + len;
1493f148ae7dSSergio Lopez     }
1494f148ae7dSSergio Lopez     return 1;
1495f148ae7dSSergio Lopez }
1496f148ae7dSSergio Lopez 
nbd_receive_request(NBDClient * client,NBDRequest * request,Error ** errp)1497d2223cddSPaolo Bonzini static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
14982fd2c840SVladimir Sementsov-Ogievskiy                                             Error **errp)
1499798bfe00SFam Zheng {
1500c8720ca0SEric Blake     uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
1501c8720ca0SEric Blake     uint32_t magic, expect;
1502a0dc63a6SVladimir Sementsov-Ogievskiy     int ret;
1503c8720ca0SEric Blake     size_t size = client->mode >= NBD_MODE_EXTENDED ?
1504c8720ca0SEric Blake         NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE;
1505798bfe00SFam Zheng 
1506c8720ca0SEric Blake     ret = nbd_read_eof(client, buf, size, errp);
1507798bfe00SFam Zheng     if (ret < 0) {
1508798bfe00SFam Zheng         return ret;
1509798bfe00SFam Zheng     }
15101644ccceSEric Blake     if (ret == 0) {
15111644ccceSEric Blake         return -EIO;
15121644ccceSEric Blake     }
1513798bfe00SFam Zheng 
1514c8720ca0SEric Blake     /*
1515c8720ca0SEric Blake      * Compact request
1516c8720ca0SEric Blake      *  [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
1517c8720ca0SEric Blake      *  [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
1518c8720ca0SEric Blake      *  [ 6 ..  7]   type    (NBD_CMD_READ, ...)
1519c8720ca0SEric Blake      *  [ 8 .. 15]   cookie
1520c8720ca0SEric Blake      *  [16 .. 23]   from
1521c8720ca0SEric Blake      *  [24 .. 27]   len
1522c8720ca0SEric Blake      * Extended request
1523c8720ca0SEric Blake      *  [ 0 ..  3]   magic   (NBD_EXTENDED_REQUEST_MAGIC)
1524c8720ca0SEric Blake      *  [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...)
1525c8720ca0SEric Blake      *  [ 6 ..  7]   type    (NBD_CMD_READ, ...)
1526c8720ca0SEric Blake      *  [ 8 .. 15]   cookie
1527c8720ca0SEric Blake      *  [16 .. 23]   from
1528c8720ca0SEric Blake      *  [24 .. 31]   len
1529798bfe00SFam Zheng      */
1530798bfe00SFam Zheng 
1531773dce3cSPeter Maydell     magic = ldl_be_p(buf);
1532b626b51aSEric Blake     request->flags  = lduw_be_p(buf + 4);
1533b626b51aSEric Blake     request->type   = lduw_be_p(buf + 6);
153422efd811SEric Blake     request->cookie = ldq_be_p(buf + 8);
1535773dce3cSPeter Maydell     request->from   = ldq_be_p(buf + 16);
1536c8720ca0SEric Blake     if (client->mode >= NBD_MODE_EXTENDED) {
1537c8720ca0SEric Blake         request->len = ldq_be_p(buf + 24);
1538c8720ca0SEric Blake         expect = NBD_EXTENDED_REQUEST_MAGIC;
1539c8720ca0SEric Blake     } else {
1540b2578459SEric Blake         request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
1541c8720ca0SEric Blake         expect = NBD_REQUEST_MAGIC;
1542c8720ca0SEric Blake     }
1543798bfe00SFam Zheng 
15449588463eSVladimir Sementsov-Ogievskiy     trace_nbd_receive_request(magic, request->flags, request->type,
15459588463eSVladimir Sementsov-Ogievskiy                               request->from, request->len);
1546798bfe00SFam Zheng 
1547c8720ca0SEric Blake     if (magic != expect) {
1548c8720ca0SEric Blake         error_setg(errp, "invalid magic (got 0x%" PRIx32 ", expected 0x%"
1549c8720ca0SEric Blake                    PRIx32 ")", magic, expect);
1550798bfe00SFam Zheng         return -EINVAL;
1551798bfe00SFam Zheng     }
1552798bfe00SFam Zheng     return 0;
1553798bfe00SFam Zheng }
1554798bfe00SFam Zheng 
1555798bfe00SFam Zheng #define MAX_NBD_REQUESTS 16
1556798bfe00SFam Zheng 
1557f816310dSStefan Hajnoczi /* Runs in export AioContext and main loop thread */
nbd_client_get(NBDClient * client)1558798bfe00SFam Zheng void nbd_client_get(NBDClient *client)
1559798bfe00SFam Zheng {
1560f816310dSStefan Hajnoczi     qatomic_inc(&client->refcount);
1561798bfe00SFam Zheng }
1562798bfe00SFam Zheng 
nbd_client_put(NBDClient * client)1563798bfe00SFam Zheng void nbd_client_put(NBDClient *client)
1564798bfe00SFam Zheng {
1565f816310dSStefan Hajnoczi     assert(qemu_in_main_thread());
1566f816310dSStefan Hajnoczi 
1567f816310dSStefan Hajnoczi     if (qatomic_fetch_dec(&client->refcount) == 1) {
1568798bfe00SFam Zheng         /* The last reference should be dropped by client->close,
1569798bfe00SFam Zheng          * which is called by client_close.
1570798bfe00SFam Zheng          */
1571798bfe00SFam Zheng         assert(client->closing);
1572798bfe00SFam Zheng 
15731c778ef7SDaniel P. Berrange         object_unref(OBJECT(client->sioc));
15741c778ef7SDaniel P. Berrange         object_unref(OBJECT(client->ioc));
1575f95910feSDaniel P. Berrange         if (client->tlscreds) {
1576f95910feSDaniel P. Berrange             object_unref(OBJECT(client->tlscreds));
1577f95910feSDaniel P. Berrange         }
1578b25e12daSDaniel P. Berrange         g_free(client->tlsauthz);
1579798bfe00SFam Zheng         if (client->exp) {
1580798bfe00SFam Zheng             QTAILQ_REMOVE(&client->exp->clients, client, next);
1581c69de1beSKevin Wolf             blk_exp_unref(&client->exp->common);
1582798bfe00SFam Zheng         }
1583fd358d83SEric Blake         g_free(client->contexts.bitmaps);
15847075d235SStefan Hajnoczi         qemu_mutex_destroy(&client->lock);
1585798bfe00SFam Zheng         g_free(client);
1586798bfe00SFam Zheng     }
1587798bfe00SFam Zheng }
1588798bfe00SFam Zheng 
1589f816310dSStefan Hajnoczi /*
1590f816310dSStefan Hajnoczi  * Tries to release the reference to @client, but only if other references
1591f816310dSStefan Hajnoczi  * remain. This is an optimization for the common case where we want to avoid
1592f816310dSStefan Hajnoczi  * the expense of scheduling nbd_client_put() in the main loop thread.
1593f816310dSStefan Hajnoczi  *
1594f816310dSStefan Hajnoczi  * Returns true upon success or false if the reference was not released because
1595f816310dSStefan Hajnoczi  * it is the last reference.
1596f816310dSStefan Hajnoczi  */
nbd_client_put_nonzero(NBDClient * client)1597f816310dSStefan Hajnoczi static bool nbd_client_put_nonzero(NBDClient *client)
1598f816310dSStefan Hajnoczi {
1599f816310dSStefan Hajnoczi     int old = qatomic_read(&client->refcount);
1600f816310dSStefan Hajnoczi     int expected;
1601f816310dSStefan Hajnoczi 
1602f816310dSStefan Hajnoczi     do {
1603f816310dSStefan Hajnoczi         if (old == 1) {
1604f816310dSStefan Hajnoczi             return false;
1605f816310dSStefan Hajnoczi         }
1606f816310dSStefan Hajnoczi 
1607f816310dSStefan Hajnoczi         expected = old;
1608f816310dSStefan Hajnoczi         old = qatomic_cmpxchg(&client->refcount, expected, expected - 1);
1609f816310dSStefan Hajnoczi     } while (old != expected);
1610f816310dSStefan Hajnoczi 
1611f816310dSStefan Hajnoczi     return true;
1612f816310dSStefan Hajnoczi }
1613f816310dSStefan Hajnoczi 
client_close(NBDClient * client,bool negotiated)16140c9390d9SEric Blake static void client_close(NBDClient *client, bool negotiated)
1615798bfe00SFam Zheng {
1616f816310dSStefan Hajnoczi     assert(qemu_in_main_thread());
1617f816310dSStefan Hajnoczi 
16187075d235SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&client->lock) {
1619798bfe00SFam Zheng         if (client->closing) {
1620798bfe00SFam Zheng             return;
1621798bfe00SFam Zheng         }
1622798bfe00SFam Zheng 
1623798bfe00SFam Zheng         client->closing = true;
16247075d235SStefan Hajnoczi     }
1625798bfe00SFam Zheng 
1626798bfe00SFam Zheng     /* Force requests to finish.  They will drop their own references,
1627798bfe00SFam Zheng      * then we'll close the socket and free the NBDClient.
1628798bfe00SFam Zheng      */
16291c778ef7SDaniel P. Berrange     qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
16301c778ef7SDaniel P. Berrange                          NULL);
1631798bfe00SFam Zheng 
1632798bfe00SFam Zheng     /* Also tell the client, so that they release their reference.  */
16330c9390d9SEric Blake     if (client->close_fn) {
16340c9390d9SEric Blake         client->close_fn(client, negotiated);
1635798bfe00SFam Zheng     }
1636798bfe00SFam Zheng }
1637798bfe00SFam Zheng 
16387075d235SStefan Hajnoczi /* Runs in export AioContext with client->lock held */
nbd_request_get(NBDClient * client)1639315f78abSEric Blake static NBDRequestData *nbd_request_get(NBDClient *client)
1640798bfe00SFam Zheng {
1641315f78abSEric Blake     NBDRequestData *req;
1642798bfe00SFam Zheng 
1643798bfe00SFam Zheng     assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1644798bfe00SFam Zheng     client->nb_requests++;
1645798bfe00SFam Zheng 
1646315f78abSEric Blake     req = g_new0(NBDRequestData, 1);
1647798bfe00SFam Zheng     req->client = client;
1648798bfe00SFam Zheng     return req;
1649798bfe00SFam Zheng }
1650798bfe00SFam Zheng 
16517075d235SStefan Hajnoczi /* Runs in export AioContext with client->lock held */
nbd_request_put(NBDRequestData * req)1652315f78abSEric Blake static void nbd_request_put(NBDRequestData *req)
1653798bfe00SFam Zheng {
1654798bfe00SFam Zheng     NBDClient *client = req->client;
1655798bfe00SFam Zheng 
1656798bfe00SFam Zheng     if (req->data) {
1657798bfe00SFam Zheng         qemu_vfree(req->data);
1658798bfe00SFam Zheng     }
1659798bfe00SFam Zheng     g_free(req);
1660798bfe00SFam Zheng 
1661798bfe00SFam Zheng     client->nb_requests--;
1662fd6afc50SSergio Lopez 
1663fd6afc50SSergio Lopez     if (client->quiescing && client->nb_requests == 0) {
1664fd6afc50SSergio Lopez         aio_wait_kick();
1665fd6afc50SSergio Lopez     }
1666fd6afc50SSergio Lopez 
1667ff82911cSPaolo Bonzini     nbd_client_receive_next_request(client);
1668798bfe00SFam Zheng }
1669798bfe00SFam Zheng 
blk_aio_attached(AioContext * ctx,void * opaque)1670798bfe00SFam Zheng static void blk_aio_attached(AioContext *ctx, void *opaque)
1671798bfe00SFam Zheng {
1672798bfe00SFam Zheng     NBDExport *exp = opaque;
1673798bfe00SFam Zheng     NBDClient *client;
1674798bfe00SFam Zheng 
16757075d235SStefan Hajnoczi     assert(qemu_in_main_thread());
16767075d235SStefan Hajnoczi 
16779588463eSVladimir Sementsov-Ogievskiy     trace_nbd_blk_aio_attached(exp->name, ctx);
1678798bfe00SFam Zheng 
16798612c686SKevin Wolf     exp->common.ctx = ctx;
1680798bfe00SFam Zheng 
1681798bfe00SFam Zheng     QTAILQ_FOREACH(client, &exp->clients, next) {
16827075d235SStefan Hajnoczi         WITH_QEMU_LOCK_GUARD(&client->lock) {
1683fd6afc50SSergio Lopez             assert(client->nb_requests == 0);
1684f148ae7dSSergio Lopez             assert(client->recv_coroutine == NULL);
1685f148ae7dSSergio Lopez             assert(client->send_coroutine == NULL);
1686798bfe00SFam Zheng         }
1687798bfe00SFam Zheng     }
16887075d235SStefan Hajnoczi }
1689798bfe00SFam Zheng 
blk_aio_detach(void * opaque)1690798bfe00SFam Zheng static void blk_aio_detach(void *opaque)
1691798bfe00SFam Zheng {
1692798bfe00SFam Zheng     NBDExport *exp = opaque;
1693798bfe00SFam Zheng 
16947075d235SStefan Hajnoczi     assert(qemu_in_main_thread());
16957075d235SStefan Hajnoczi 
16968612c686SKevin Wolf     trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
1697798bfe00SFam Zheng 
16988612c686SKevin Wolf     exp->common.ctx = NULL;
1699798bfe00SFam Zheng }
1700798bfe00SFam Zheng 
nbd_drained_begin(void * opaque)1701fd6afc50SSergio Lopez static void nbd_drained_begin(void *opaque)
1702fd6afc50SSergio Lopez {
1703fd6afc50SSergio Lopez     NBDExport *exp = opaque;
1704fd6afc50SSergio Lopez     NBDClient *client;
1705fd6afc50SSergio Lopez 
17067075d235SStefan Hajnoczi     assert(qemu_in_main_thread());
17077075d235SStefan Hajnoczi 
1708fd6afc50SSergio Lopez     QTAILQ_FOREACH(client, &exp->clients, next) {
17097075d235SStefan Hajnoczi         WITH_QEMU_LOCK_GUARD(&client->lock) {
1710fd6afc50SSergio Lopez             client->quiescing = true;
1711fd6afc50SSergio Lopez         }
1712fd6afc50SSergio Lopez     }
17137075d235SStefan Hajnoczi }
1714fd6afc50SSergio Lopez 
nbd_drained_end(void * opaque)1715fd6afc50SSergio Lopez static void nbd_drained_end(void *opaque)
1716fd6afc50SSergio Lopez {
1717fd6afc50SSergio Lopez     NBDExport *exp = opaque;
1718fd6afc50SSergio Lopez     NBDClient *client;
1719fd6afc50SSergio Lopez 
17207075d235SStefan Hajnoczi     assert(qemu_in_main_thread());
17217075d235SStefan Hajnoczi 
1722fd6afc50SSergio Lopez     QTAILQ_FOREACH(client, &exp->clients, next) {
17237075d235SStefan Hajnoczi         WITH_QEMU_LOCK_GUARD(&client->lock) {
1724fd6afc50SSergio Lopez             client->quiescing = false;
1725fd6afc50SSergio Lopez             nbd_client_receive_next_request(client);
1726fd6afc50SSergio Lopez         }
1727fd6afc50SSergio Lopez     }
17287075d235SStefan Hajnoczi }
17297075d235SStefan Hajnoczi 
17307075d235SStefan Hajnoczi /* Runs in export AioContext */
nbd_wake_read_bh(void * opaque)17317075d235SStefan Hajnoczi static void nbd_wake_read_bh(void *opaque)
17327075d235SStefan Hajnoczi {
17337075d235SStefan Hajnoczi     NBDClient *client = opaque;
17347075d235SStefan Hajnoczi     qio_channel_wake_read(client->ioc);
17357075d235SStefan Hajnoczi }
1736fd6afc50SSergio Lopez 
nbd_drained_poll(void * opaque)1737fd6afc50SSergio Lopez static bool nbd_drained_poll(void *opaque)
1738fd6afc50SSergio Lopez {
1739fd6afc50SSergio Lopez     NBDExport *exp = opaque;
1740fd6afc50SSergio Lopez     NBDClient *client;
1741fd6afc50SSergio Lopez 
17427075d235SStefan Hajnoczi     assert(qemu_in_main_thread());
17437075d235SStefan Hajnoczi 
1744fd6afc50SSergio Lopez     QTAILQ_FOREACH(client, &exp->clients, next) {
17457075d235SStefan Hajnoczi         WITH_QEMU_LOCK_GUARD(&client->lock) {
1746fd6afc50SSergio Lopez             if (client->nb_requests != 0) {
1747fd6afc50SSergio Lopez                 /*
1748fd6afc50SSergio Lopez                  * If there's a coroutine waiting for a request on nbd_read_eof()
1749fd6afc50SSergio Lopez                  * enter it here so we don't depend on the client to wake it up.
17507075d235SStefan Hajnoczi                  *
17517075d235SStefan Hajnoczi                  * Schedule a BH in the export AioContext to avoid missing the
17527075d235SStefan Hajnoczi                  * wake up due to the race between qio_channel_wake_read() and
17537075d235SStefan Hajnoczi                  * qio_channel_yield().
1754fd6afc50SSergio Lopez                  */
1755fd6afc50SSergio Lopez                 if (client->recv_coroutine != NULL && client->read_yielding) {
17567075d235SStefan Hajnoczi                     aio_bh_schedule_oneshot(nbd_export_aio_context(client->exp),
17577075d235SStefan Hajnoczi                                             nbd_wake_read_bh, client);
1758fd6afc50SSergio Lopez                 }
1759fd6afc50SSergio Lopez 
1760fd6afc50SSergio Lopez                 return true;
1761fd6afc50SSergio Lopez             }
1762fd6afc50SSergio Lopez         }
17637075d235SStefan Hajnoczi     }
1764fd6afc50SSergio Lopez 
1765fd6afc50SSergio Lopez     return false;
1766fd6afc50SSergio Lopez }
1767fd6afc50SSergio Lopez 
nbd_eject_notifier(Notifier * n,void * data)1768741cc431SMax Reitz static void nbd_eject_notifier(Notifier *n, void *data)
1769741cc431SMax Reitz {
1770741cc431SMax Reitz     NBDExport *exp = container_of(n, NBDExport, eject_notifier);
177161bc846dSEric Blake 
17727075d235SStefan Hajnoczi     assert(qemu_in_main_thread());
17737075d235SStefan Hajnoczi 
1774bc4ee65bSKevin Wolf     blk_exp_request_shutdown(&exp->common);
1775741cc431SMax Reitz }
1776741cc431SMax Reitz 
nbd_export_set_on_eject_blk(BlockExport * exp,BlockBackend * blk)17779b562c64SKevin Wolf void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
17789b562c64SKevin Wolf {
17799b562c64SKevin Wolf     NBDExport *nbd_exp = container_of(exp, NBDExport, common);
17809b562c64SKevin Wolf     assert(exp->drv == &blk_exp_nbd);
17819b562c64SKevin Wolf     assert(nbd_exp->eject_notifier_blk == NULL);
17829b562c64SKevin Wolf 
17839b562c64SKevin Wolf     blk_ref(blk);
17849b562c64SKevin Wolf     nbd_exp->eject_notifier_blk = blk;
17859b562c64SKevin Wolf     nbd_exp->eject_notifier.notify = nbd_eject_notifier;
17869b562c64SKevin Wolf     blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
17879b562c64SKevin Wolf }
17889b562c64SKevin Wolf 
1789fd6afc50SSergio Lopez static const BlockDevOps nbd_block_ops = {
1790fd6afc50SSergio Lopez     .drained_begin = nbd_drained_begin,
1791fd6afc50SSergio Lopez     .drained_end = nbd_drained_end,
1792fd6afc50SSergio Lopez     .drained_poll = nbd_drained_poll,
1793fd6afc50SSergio Lopez };
1794fd6afc50SSergio Lopez 
nbd_export_create(BlockExport * blk_exp,BlockExportOptions * exp_args,Error ** errp)17955b1cb497SKevin Wolf static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
1796331170e0SKevin Wolf                              Error **errp)
1797798bfe00SFam Zheng {
1798a6ff7989SKevin Wolf     NBDExport *exp = container_of(blk_exp, NBDExport, common);
17995b1cb497SKevin Wolf     BlockExportOptionsNbd *arg = &exp_args->u.nbd;
18008461b4d6SMarkus Armbruster     const char *name = arg->name ?: exp_args->node_name;
1801331170e0SKevin Wolf     BlockBackend *blk = blk_exp->blk;
1802b57e4de0SKevin Wolf     int64_t size;
1803331170e0SKevin Wolf     uint64_t perm, shared_perm;
18045b1cb497SKevin Wolf     bool readonly = !exp_args->writable;
1805e5fb29d5SVladimir Sementsov-Ogievskiy     BlockDirtyBitmapOrStrList *bitmaps;
18063b1f244cSEric Blake     size_t i;
1807d7086422SKevin Wolf     int ret;
1808cd7fca95SKevin Wolf 
1809372b69f5SKevin Wolf     GLOBAL_STATE_CODE();
18105b1cb497SKevin Wolf     assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
18115b1cb497SKevin Wolf 
18125b1cb497SKevin Wolf     if (!nbd_server_is_running()) {
18135b1cb497SKevin Wolf         error_setg(errp, "NBD server not running");
18145b1cb497SKevin Wolf         return -EINVAL;
18155b1cb497SKevin Wolf     }
18165b1cb497SKevin Wolf 
18178461b4d6SMarkus Armbruster     if (strlen(name) > NBD_MAX_STRING_SIZE) {
18188461b4d6SMarkus Armbruster         error_setg(errp, "export name '%s' too long", name);
18195b1cb497SKevin Wolf         return -EINVAL;
18205b1cb497SKevin Wolf     }
18215b1cb497SKevin Wolf 
18225b1cb497SKevin Wolf     if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
18235b1cb497SKevin Wolf         error_setg(errp, "description '%s' too long", arg->description);
18245b1cb497SKevin Wolf         return -EINVAL;
18255b1cb497SKevin Wolf     }
18265b1cb497SKevin Wolf 
18278461b4d6SMarkus Armbruster     if (nbd_export_find(name)) {
18288461b4d6SMarkus Armbruster         error_setg(errp, "NBD server already has export named '%s'", name);
18295b1cb497SKevin Wolf         return -EEXIST;
18305b1cb497SKevin Wolf     }
18315b1cb497SKevin Wolf 
1832331170e0SKevin Wolf     size = blk_getlength(blk);
1833b57e4de0SKevin Wolf     if (size < 0) {
1834b57e4de0SKevin Wolf         error_setg_errno(errp, -size,
1835b57e4de0SKevin Wolf                          "Failed to determine the NBD export's length");
1836a6ff7989SKevin Wolf         return size;
1837b57e4de0SKevin Wolf     }
1838b57e4de0SKevin Wolf 
18398a7ce4f9SKevin Wolf     /* Don't allow resize while the NBD server is running, otherwise we don't
18408a7ce4f9SKevin Wolf      * care what happens with the node. */
1841331170e0SKevin Wolf     blk_get_perm(blk, &perm, &shared_perm);
1842331170e0SKevin Wolf     ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
1843d7086422SKevin Wolf     if (ret < 0) {
1844331170e0SKevin Wolf         return ret;
1845d7086422SKevin Wolf     }
1846331170e0SKevin Wolf 
1847798bfe00SFam Zheng     QTAILQ_INIT(&exp->clients);
18488461b4d6SMarkus Armbruster     exp->name = g_strdup(name);
18495b1cb497SKevin Wolf     exp->description = g_strdup(arg->description);
1850dbb38caaSEric Blake     exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
1851dbb38caaSEric Blake                      NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
185258a6fdccSEric Blake 
185358a6fdccSEric Blake     if (nbd_server_max_connections() != 1) {
1854dbb38caaSEric Blake         exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
1855dbb38caaSEric Blake     }
185658a6fdccSEric Blake     if (readonly) {
185758a6fdccSEric Blake         exp->nbdflags |= NBD_FLAG_READ_ONLY;
1858dbb38caaSEric Blake     } else {
1859b491dbb7SEric Blake         exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
1860b491dbb7SEric Blake                           NBD_FLAG_SEND_FAST_ZERO);
1861dbb38caaSEric Blake     }
18627596bbb3SEric Blake     exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
1863798bfe00SFam Zheng 
1864372b69f5SKevin Wolf     bdrv_graph_rdlock_main_loop();
1865372b69f5SKevin Wolf 
1866cbad81ceSEric Blake     for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
18673b1f244cSEric Blake         exp->nr_export_bitmaps++;
18683b1f244cSEric Blake     }
18693b1f244cSEric Blake     exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
18703b1f244cSEric Blake     for (i = 0, bitmaps = arg->bitmaps; bitmaps;
1871e5fb29d5SVladimir Sementsov-Ogievskiy          i++, bitmaps = bitmaps->next)
1872e5fb29d5SVladimir Sementsov-Ogievskiy     {
1873e5fb29d5SVladimir Sementsov-Ogievskiy         const char *bitmap;
1874331170e0SKevin Wolf         BlockDriverState *bs = blk_bs(blk);
1875678ba275SEric Blake         BdrvDirtyBitmap *bm = NULL;
1876678ba275SEric Blake 
1877e5fb29d5SVladimir Sementsov-Ogievskiy         switch (bitmaps->value->type) {
1878e5fb29d5SVladimir Sementsov-Ogievskiy         case QTYPE_QSTRING:
1879e5fb29d5SVladimir Sementsov-Ogievskiy             bitmap = bitmaps->value->u.local;
1880ee2f94caSMax Reitz             while (bs) {
1881cbad81ceSEric Blake                 bm = bdrv_find_dirty_bitmap(bs, bitmap);
1882ee2f94caSMax Reitz                 if (bm != NULL) {
1883678ba275SEric Blake                     break;
1884678ba275SEric Blake                 }
1885678ba275SEric Blake 
1886ee2f94caSMax Reitz                 bs = bdrv_filter_or_cow_bs(bs);
1887678ba275SEric Blake             }
1888678ba275SEric Blake 
1889678ba275SEric Blake             if (bm == NULL) {
1890a6ff7989SKevin Wolf                 ret = -ENOENT;
1891e5fb29d5SVladimir Sementsov-Ogievskiy                 error_setg(errp, "Bitmap '%s' is not found",
1892e5fb29d5SVladimir Sementsov-Ogievskiy                            bitmaps->value->u.local);
18933b78a927SJohn Snow                 goto fail;
18943b78a927SJohn Snow             }
18953b78a927SJohn Snow 
1896dbb38caaSEric Blake             if (readonly && bdrv_is_writable(bs) &&
1897678ba275SEric Blake                 bdrv_dirty_bitmap_enabled(bm)) {
1898a6ff7989SKevin Wolf                 ret = -EINVAL;
1899e5fb29d5SVladimir Sementsov-Ogievskiy                 error_setg(errp, "Enabled bitmap '%s' incompatible with "
1900e5fb29d5SVladimir Sementsov-Ogievskiy                            "readonly export", bitmap);
1901e5fb29d5SVladimir Sementsov-Ogievskiy                 goto fail;
1902e5fb29d5SVladimir Sementsov-Ogievskiy             }
1903e5fb29d5SVladimir Sementsov-Ogievskiy             break;
1904e5fb29d5SVladimir Sementsov-Ogievskiy         case QTYPE_QDICT:
1905e5fb29d5SVladimir Sementsov-Ogievskiy             bitmap = bitmaps->value->u.external.name;
1906e5fb29d5SVladimir Sementsov-Ogievskiy             bm = block_dirty_bitmap_lookup(bitmaps->value->u.external.node,
1907e5fb29d5SVladimir Sementsov-Ogievskiy                                            bitmap, NULL, errp);
1908e5fb29d5SVladimir Sementsov-Ogievskiy             if (!bm) {
1909e5fb29d5SVladimir Sementsov-Ogievskiy                 ret = -ENOENT;
1910e5fb29d5SVladimir Sementsov-Ogievskiy                 goto fail;
1911e5fb29d5SVladimir Sementsov-Ogievskiy             }
1912e5fb29d5SVladimir Sementsov-Ogievskiy             break;
1913e5fb29d5SVladimir Sementsov-Ogievskiy         default:
1914e5fb29d5SVladimir Sementsov-Ogievskiy             abort();
1915e5fb29d5SVladimir Sementsov-Ogievskiy         }
1916e5fb29d5SVladimir Sementsov-Ogievskiy 
1917e5fb29d5SVladimir Sementsov-Ogievskiy         assert(bm);
1918e5fb29d5SVladimir Sementsov-Ogievskiy 
1919e5fb29d5SVladimir Sementsov-Ogievskiy         if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
1920e5fb29d5SVladimir Sementsov-Ogievskiy             ret = -EINVAL;
1921678ba275SEric Blake             goto fail;
1922678ba275SEric Blake         }
1923678ba275SEric Blake 
19243b1f244cSEric Blake         exp->export_bitmaps[i] = bm;
1925cbad81ceSEric Blake         assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
1926678ba275SEric Blake     }
1927678ba275SEric Blake 
19283b1f244cSEric Blake     /* Mark bitmaps busy in a separate loop, to simplify roll-back concerns. */
19293b1f244cSEric Blake     for (i = 0; i < exp->nr_export_bitmaps; i++) {
19303b1f244cSEric Blake         bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
19313b1f244cSEric Blake     }
19323b1f244cSEric Blake 
1933dbc7b014SEric Blake     exp->allocation_depth = arg->allocation_depth;
1934dbc7b014SEric Blake 
1935fd6afc50SSergio Lopez     /*
1936fd6afc50SSergio Lopez      * We need to inhibit request queuing in the block layer to ensure we can
1937fd6afc50SSergio Lopez      * be properly quiesced when entering a drained section, as our coroutines
1938fd6afc50SSergio Lopez      * servicing pending requests might enter blk_pread().
1939fd6afc50SSergio Lopez      */
1940fd6afc50SSergio Lopez     blk_set_disable_request_queuing(blk, true);
1941fd6afc50SSergio Lopez 
1942798bfe00SFam Zheng     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1943741cc431SMax Reitz 
1944fd6afc50SSergio Lopez     blk_set_dev_ops(blk, &nbd_block_ops, exp);
1945fd6afc50SSergio Lopez 
19463fa4c765SEric Blake     QTAILQ_INSERT_TAIL(&exports, exp, next);
1947c69de1beSKevin Wolf 
1948372b69f5SKevin Wolf     bdrv_graph_rdunlock_main_loop();
1949372b69f5SKevin Wolf 
1950a6ff7989SKevin Wolf     return 0;
1951798bfe00SFam Zheng 
1952798bfe00SFam Zheng fail:
1953372b69f5SKevin Wolf     bdrv_graph_rdunlock_main_loop();
19543b1f244cSEric Blake     g_free(exp->export_bitmaps);
19553fa4c765SEric Blake     g_free(exp->name);
19563fa4c765SEric Blake     g_free(exp->description);
1957a6ff7989SKevin Wolf     return ret;
1958798bfe00SFam Zheng }
1959798bfe00SFam Zheng 
nbd_export_find(const char * name)1960798bfe00SFam Zheng NBDExport *nbd_export_find(const char *name)
1961798bfe00SFam Zheng {
1962798bfe00SFam Zheng     NBDExport *exp;
1963798bfe00SFam Zheng     QTAILQ_FOREACH(exp, &exports, next) {
1964798bfe00SFam Zheng         if (strcmp(name, exp->name) == 0) {
1965798bfe00SFam Zheng             return exp;
1966798bfe00SFam Zheng         }
1967798bfe00SFam Zheng     }
1968798bfe00SFam Zheng 
1969798bfe00SFam Zheng     return NULL;
1970798bfe00SFam Zheng }
1971798bfe00SFam Zheng 
197261bc846dSEric Blake AioContext *
nbd_export_aio_context(NBDExport * exp)197361bc846dSEric Blake nbd_export_aio_context(NBDExport *exp)
197461bc846dSEric Blake {
19758612c686SKevin Wolf     return exp->common.ctx;
197661bc846dSEric Blake }
197761bc846dSEric Blake 
nbd_export_request_shutdown(BlockExport * blk_exp)1978bc4ee65bSKevin Wolf static void nbd_export_request_shutdown(BlockExport *blk_exp)
1979798bfe00SFam Zheng {
1980bc4ee65bSKevin Wolf     NBDExport *exp = container_of(blk_exp, NBDExport, common);
1981798bfe00SFam Zheng     NBDClient *client, *next;
1982798bfe00SFam Zheng 
1983c69de1beSKevin Wolf     blk_exp_ref(&exp->common);
19843fa4c765SEric Blake     /*
1985c719573dSEric Blake      * TODO: Should we expand QMP BlockExportRemoveMode enum to allow a
19863fa4c765SEric Blake      * close mode that stops advertising the export to new clients but
19873fa4c765SEric Blake      * still permits existing clients to run to completion? Because of
19883fa4c765SEric Blake      * that possibility, nbd_export_close() can be called more than
19893fa4c765SEric Blake      * once on an export.
19903fa4c765SEric Blake      */
1991798bfe00SFam Zheng     QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
19920c9390d9SEric Blake         client_close(client, true);
1993798bfe00SFam Zheng     }
19943fa4c765SEric Blake     if (exp->name) {
19953fa4c765SEric Blake         g_free(exp->name);
19963fa4c765SEric Blake         exp->name = NULL;
19973fa4c765SEric Blake         QTAILQ_REMOVE(&exports, exp, next);
19983fa4c765SEric Blake     }
1999c69de1beSKevin Wolf     blk_exp_unref(&exp->common);
2000798bfe00SFam Zheng }
2001798bfe00SFam Zheng 
nbd_export_delete(BlockExport * blk_exp)2002c69de1beSKevin Wolf static void nbd_export_delete(BlockExport *blk_exp)
2003798bfe00SFam Zheng {
20043b1f244cSEric Blake     size_t i;
2005c69de1beSKevin Wolf     NBDExport *exp = container_of(blk_exp, NBDExport, common);
2006798bfe00SFam Zheng 
2007798bfe00SFam Zheng     assert(exp->name == NULL);
2008dbc9e94aSKevin Wolf     assert(QTAILQ_EMPTY(&exp->clients));
2009dbc9e94aSKevin Wolf 
2010dbc9e94aSKevin Wolf     g_free(exp->description);
2011dbc9e94aSKevin Wolf     exp->description = NULL;
2012798bfe00SFam Zheng 
2013cd7fca95SKevin Wolf     if (exp->eject_notifier_blk) {
2014741cc431SMax Reitz         notifier_remove(&exp->eject_notifier);
2015cd7fca95SKevin Wolf         blk_unref(exp->eject_notifier_blk);
2016cd7fca95SKevin Wolf     }
201737a4f70cSKevin Wolf     blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
2018798bfe00SFam Zheng                                     blk_aio_detach, exp);
2019fd6afc50SSergio Lopez     blk_set_disable_request_queuing(exp->common.blk, false);
2020798bfe00SFam Zheng 
20213b1f244cSEric Blake     for (i = 0; i < exp->nr_export_bitmaps; i++) {
20223b1f244cSEric Blake         bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
20233d068affSVladimir Sementsov-Ogievskiy     }
2024798bfe00SFam Zheng }
2025798bfe00SFam Zheng 
202656ee8626SKevin Wolf const BlockExportDriver blk_exp_nbd = {
202756ee8626SKevin Wolf     .type               = BLOCK_EXPORT_TYPE_NBD,
2028a6ff7989SKevin Wolf     .instance_size      = sizeof(NBDExport),
202956ee8626SKevin Wolf     .create             = nbd_export_create,
2030c69de1beSKevin Wolf     .delete             = nbd_export_delete,
2031bc4ee65bSKevin Wolf     .request_shutdown   = nbd_export_request_shutdown,
203256ee8626SKevin Wolf };
203356ee8626SKevin Wolf 
nbd_co_send_iov(NBDClient * client,struct iovec * iov,unsigned niov,Error ** errp)2034de79bfc3SVladimir Sementsov-Ogievskiy static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
2035de79bfc3SVladimir Sementsov-Ogievskiy                                         unsigned niov, Error **errp)
2036de79bfc3SVladimir Sementsov-Ogievskiy {
2037de79bfc3SVladimir Sementsov-Ogievskiy     int ret;
2038de79bfc3SVladimir Sementsov-Ogievskiy 
2039de79bfc3SVladimir Sementsov-Ogievskiy     g_assert(qemu_in_coroutine());
2040de79bfc3SVladimir Sementsov-Ogievskiy     qemu_co_mutex_lock(&client->send_lock);
2041de79bfc3SVladimir Sementsov-Ogievskiy     client->send_coroutine = qemu_coroutine_self();
2042de79bfc3SVladimir Sementsov-Ogievskiy 
2043de79bfc3SVladimir Sementsov-Ogievskiy     ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
2044de79bfc3SVladimir Sementsov-Ogievskiy 
2045de79bfc3SVladimir Sementsov-Ogievskiy     client->send_coroutine = NULL;
2046de79bfc3SVladimir Sementsov-Ogievskiy     qemu_co_mutex_unlock(&client->send_lock);
2047de79bfc3SVladimir Sementsov-Ogievskiy 
2048de79bfc3SVladimir Sementsov-Ogievskiy     return ret;
2049de79bfc3SVladimir Sementsov-Ogievskiy }
2050de79bfc3SVladimir Sementsov-Ogievskiy 
set_be_simple_reply(NBDSimpleReply * reply,uint64_t error,uint64_t cookie)2051caad5384SVladimir Sementsov-Ogievskiy static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
205222efd811SEric Blake                                        uint64_t cookie)
2053caad5384SVladimir Sementsov-Ogievskiy {
2054caad5384SVladimir Sementsov-Ogievskiy     stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
2055caad5384SVladimir Sementsov-Ogievskiy     stl_be_p(&reply->error, error);
205622efd811SEric Blake     stq_be_p(&reply->cookie, cookie);
2057caad5384SVladimir Sementsov-Ogievskiy }
2058caad5384SVladimir Sementsov-Ogievskiy 
nbd_co_send_simple_reply(NBDClient * client,NBDRequest * request,uint32_t error,void * data,uint64_t len,Error ** errp)2059d2223cddSPaolo Bonzini static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
206066d4f4feSEric Blake                                                  NBDRequest *request,
206114cea41dSVladimir Sementsov-Ogievskiy                                                  uint32_t error,
2062978df1b6SVladimir Sementsov-Ogievskiy                                                  void *data,
2063b2578459SEric Blake                                                  uint64_t len,
2064978df1b6SVladimir Sementsov-Ogievskiy                                                  Error **errp)
2065798bfe00SFam Zheng {
2066de79bfc3SVladimir Sementsov-Ogievskiy     NBDSimpleReply reply;
206714cea41dSVladimir Sementsov-Ogievskiy     int nbd_err = system_errno_to_nbd_errno(error);
2068de79bfc3SVladimir Sementsov-Ogievskiy     struct iovec iov[] = {
2069de79bfc3SVladimir Sementsov-Ogievskiy         {.iov_base = &reply, .iov_len = sizeof(reply)},
2070de79bfc3SVladimir Sementsov-Ogievskiy         {.iov_base = data, .iov_len = len}
2071de79bfc3SVladimir Sementsov-Ogievskiy     };
20726fb2b972SVladimir Sementsov-Ogievskiy 
2073a7c8ed36SEric Blake     assert(!len || !nbd_err);
2074b2578459SEric Blake     assert(len <= NBD_MAX_BUFFER_SIZE);
2075ac132d05SEric Blake     assert(client->mode < NBD_MODE_STRUCTURED ||
2076ac132d05SEric Blake            (client->mode == NBD_MODE_STRUCTURED &&
2077ac132d05SEric Blake             request->type != NBD_CMD_READ));
207822efd811SEric Blake     trace_nbd_co_send_simple_reply(request->cookie, nbd_err,
207966d4f4feSEric Blake                                    nbd_err_lookup(nbd_err), len);
208022efd811SEric Blake     set_be_simple_reply(&reply, nbd_err, request->cookie);
20816fb2b972SVladimir Sementsov-Ogievskiy 
2082a7c8ed36SEric Blake     return nbd_co_send_iov(client, iov, 2, errp);
2083798bfe00SFam Zheng }
2084798bfe00SFam Zheng 
2085a7c8ed36SEric Blake /*
2086a7c8ed36SEric Blake  * Prepare the header of a reply chunk for network transmission.
2087a7c8ed36SEric Blake  *
2088a7c8ed36SEric Blake  * On input, @iov is partially initialized: iov[0].iov_base must point
2089a7c8ed36SEric Blake  * to an uninitialized NBDReply, while the remaining @niov elements
2090a7c8ed36SEric Blake  * (if any) must be ready for transmission.  This function then
2091a7c8ed36SEric Blake  * populates iov[0] for transmission.
2092a7c8ed36SEric Blake  */
set_be_chunk(NBDClient * client,struct iovec * iov,size_t niov,uint16_t flags,uint16_t type,NBDRequest * request)2093a7c8ed36SEric Blake static inline void set_be_chunk(NBDClient *client, struct iovec *iov,
2094a7c8ed36SEric Blake                                 size_t niov, uint16_t flags, uint16_t type,
209566d4f4feSEric Blake                                 NBDRequest *request)
20965c54e7faSVladimir Sementsov-Ogievskiy {
2097a7c8ed36SEric Blake     size_t i, length = 0;
2098a7c8ed36SEric Blake 
2099a7c8ed36SEric Blake     for (i = 1; i < niov; i++) {
2100a7c8ed36SEric Blake         length += iov[i].iov_len;
2101a7c8ed36SEric Blake     }
2102a7c8ed36SEric Blake     assert(length <= NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData));
2103a7c8ed36SEric Blake 
210411d3355fSEric Blake     if (client->mode >= NBD_MODE_EXTENDED) {
210511d3355fSEric Blake         NBDExtendedReplyChunk *chunk = iov->iov_base;
210611d3355fSEric Blake 
210711d3355fSEric Blake         iov[0].iov_len = sizeof(*chunk);
210811d3355fSEric Blake         stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
210911d3355fSEric Blake         stw_be_p(&chunk->flags, flags);
211011d3355fSEric Blake         stw_be_p(&chunk->type, type);
211111d3355fSEric Blake         stq_be_p(&chunk->cookie, request->cookie);
211211d3355fSEric Blake         stq_be_p(&chunk->offset, request->from);
211311d3355fSEric Blake         stq_be_p(&chunk->length, length);
211411d3355fSEric Blake     } else {
211511d3355fSEric Blake         NBDStructuredReplyChunk *chunk = iov->iov_base;
211611d3355fSEric Blake 
2117a7c8ed36SEric Blake         iov[0].iov_len = sizeof(*chunk);
21185c54e7faSVladimir Sementsov-Ogievskiy         stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
21195c54e7faSVladimir Sementsov-Ogievskiy         stw_be_p(&chunk->flags, flags);
21205c54e7faSVladimir Sementsov-Ogievskiy         stw_be_p(&chunk->type, type);
212122efd811SEric Blake         stq_be_p(&chunk->cookie, request->cookie);
21225c54e7faSVladimir Sementsov-Ogievskiy         stl_be_p(&chunk->length, length);
21235c54e7faSVladimir Sementsov-Ogievskiy     }
212411d3355fSEric Blake }
21255c54e7faSVladimir Sementsov-Ogievskiy 
nbd_co_send_chunk_done(NBDClient * client,NBDRequest * request,Error ** errp)2126a7c8ed36SEric Blake static int coroutine_fn nbd_co_send_chunk_done(NBDClient *client,
212766d4f4feSEric Blake                                                NBDRequest *request,
2128ef8c887eSEric Blake                                                Error **errp)
2129ef8c887eSEric Blake {
2130a7c8ed36SEric Blake     NBDReply hdr;
2131ef8c887eSEric Blake     struct iovec iov[] = {
2132a7c8ed36SEric Blake         {.iov_base = &hdr},
2133ef8c887eSEric Blake     };
2134ef8c887eSEric Blake 
213522efd811SEric Blake     trace_nbd_co_send_chunk_done(request->cookie);
2136a7c8ed36SEric Blake     set_be_chunk(client, iov, 1, NBD_REPLY_FLAG_DONE,
213766d4f4feSEric Blake                  NBD_REPLY_TYPE_NONE, request);
2138ef8c887eSEric Blake     return nbd_co_send_iov(client, iov, 1, errp);
2139ef8c887eSEric Blake }
2140ef8c887eSEric Blake 
nbd_co_send_chunk_read(NBDClient * client,NBDRequest * request,uint64_t offset,void * data,uint64_t size,bool final,Error ** errp)2141a7c8ed36SEric Blake static int coroutine_fn nbd_co_send_chunk_read(NBDClient *client,
214266d4f4feSEric Blake                                                NBDRequest *request,
21435c54e7faSVladimir Sementsov-Ogievskiy                                                uint64_t offset,
21445c54e7faSVladimir Sementsov-Ogievskiy                                                void *data,
2145b2578459SEric Blake                                                uint64_t size,
2146418638d3SEric Blake                                                bool final,
21475c54e7faSVladimir Sementsov-Ogievskiy                                                Error **errp)
21485c54e7faSVladimir Sementsov-Ogievskiy {
2149a7c8ed36SEric Blake     NBDReply hdr;
2150efdc0c10SEric Blake     NBDStructuredReadData chunk;
21515c54e7faSVladimir Sementsov-Ogievskiy     struct iovec iov[] = {
2152a7c8ed36SEric Blake         {.iov_base = &hdr},
21535c54e7faSVladimir Sementsov-Ogievskiy         {.iov_base = &chunk, .iov_len = sizeof(chunk)},
21545c54e7faSVladimir Sementsov-Ogievskiy         {.iov_base = data, .iov_len = size}
21555c54e7faSVladimir Sementsov-Ogievskiy     };
21565c54e7faSVladimir Sementsov-Ogievskiy 
2157b2578459SEric Blake     assert(size && size <= NBD_MAX_BUFFER_SIZE);
215822efd811SEric Blake     trace_nbd_co_send_chunk_read(request->cookie, offset, data, size);
2159a7c8ed36SEric Blake     set_be_chunk(client, iov, 3, final ? NBD_REPLY_FLAG_DONE : 0,
216066d4f4feSEric Blake                  NBD_REPLY_TYPE_OFFSET_DATA, request);
21615c54e7faSVladimir Sementsov-Ogievskiy     stq_be_p(&chunk.offset, offset);
21625c54e7faSVladimir Sementsov-Ogievskiy 
2163a7c8ed36SEric Blake     return nbd_co_send_iov(client, iov, 3, errp);
21645c54e7faSVladimir Sementsov-Ogievskiy }
2165ac132d05SEric Blake 
nbd_co_send_chunk_error(NBDClient * client,NBDRequest * request,uint32_t error,const char * msg,Error ** errp)2166a7c8ed36SEric Blake static int coroutine_fn nbd_co_send_chunk_error(NBDClient *client,
216766d4f4feSEric Blake                                                 NBDRequest *request,
216860ace2baSVladimir Sementsov-Ogievskiy                                                 uint32_t error,
216960ace2baSVladimir Sementsov-Ogievskiy                                                 const char *msg,
217060ace2baSVladimir Sementsov-Ogievskiy                                                 Error **errp)
217160ace2baSVladimir Sementsov-Ogievskiy {
2172a7c8ed36SEric Blake     NBDReply hdr;
217360ace2baSVladimir Sementsov-Ogievskiy     NBDStructuredError chunk;
217460ace2baSVladimir Sementsov-Ogievskiy     int nbd_err = system_errno_to_nbd_errno(error);
217560ace2baSVladimir Sementsov-Ogievskiy     struct iovec iov[] = {
2176a7c8ed36SEric Blake         {.iov_base = &hdr},
217760ace2baSVladimir Sementsov-Ogievskiy         {.iov_base = &chunk, .iov_len = sizeof(chunk)},
217860ace2baSVladimir Sementsov-Ogievskiy         {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
217960ace2baSVladimir Sementsov-Ogievskiy     };
218060ace2baSVladimir Sementsov-Ogievskiy 
218160ace2baSVladimir Sementsov-Ogievskiy     assert(nbd_err);
218222efd811SEric Blake     trace_nbd_co_send_chunk_error(request->cookie, nbd_err,
218360ace2baSVladimir Sementsov-Ogievskiy                                   nbd_err_lookup(nbd_err), msg ? msg : "");
2184a7c8ed36SEric Blake     set_be_chunk(client, iov, 3, NBD_REPLY_FLAG_DONE,
218566d4f4feSEric Blake                  NBD_REPLY_TYPE_ERROR, request);
218660ace2baSVladimir Sementsov-Ogievskiy     stl_be_p(&chunk.error, nbd_err);
2187a7c8ed36SEric Blake     stw_be_p(&chunk.message_length, iov[2].iov_len);
218860ace2baSVladimir Sementsov-Ogievskiy 
2189a7c8ed36SEric Blake     return nbd_co_send_iov(client, iov, 3, errp);
219060ace2baSVladimir Sementsov-Ogievskiy }
219160ace2baSVladimir Sementsov-Ogievskiy 
219237e02aebSVladimir Sementsov-Ogievskiy /* Do a sparse read and send the structured reply to the client.
2193ff7e261bSEmanuele Giuseppe Esposito  * Returns -errno if sending fails. blk_co_block_status_above() failure is
219437e02aebSVladimir Sementsov-Ogievskiy  * reported to the client, at which point this function succeeds.
219537e02aebSVladimir Sementsov-Ogievskiy  */
nbd_co_send_sparse_read(NBDClient * client,NBDRequest * request,uint64_t offset,uint8_t * data,uint64_t size,Error ** errp)2196418638d3SEric Blake static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
219766d4f4feSEric Blake                                                 NBDRequest *request,
2198418638d3SEric Blake                                                 uint64_t offset,
2199418638d3SEric Blake                                                 uint8_t *data,
2200b2578459SEric Blake                                                 uint64_t size,
2201418638d3SEric Blake                                                 Error **errp)
2202418638d3SEric Blake {
2203418638d3SEric Blake     int ret = 0;
2204418638d3SEric Blake     NBDExport *exp = client->exp;
2205418638d3SEric Blake     size_t progress = 0;
2206418638d3SEric Blake 
2207b2578459SEric Blake     assert(size <= NBD_MAX_BUFFER_SIZE);
2208418638d3SEric Blake     while (progress < size) {
2209418638d3SEric Blake         int64_t pnum;
2210ff7e261bSEmanuele Giuseppe Esposito         int status = blk_co_block_status_above(exp->common.blk, NULL,
2211418638d3SEric Blake                                                offset + progress,
2212418638d3SEric Blake                                                size - progress, &pnum, NULL,
2213418638d3SEric Blake                                                NULL);
2214e2de3256SEric Blake         bool final;
2215418638d3SEric Blake 
2216418638d3SEric Blake         if (status < 0) {
221737e02aebSVladimir Sementsov-Ogievskiy             char *msg = g_strdup_printf("unable to check for holes: %s",
221837e02aebSVladimir Sementsov-Ogievskiy                                         strerror(-status));
221937e02aebSVladimir Sementsov-Ogievskiy 
222066d4f4feSEric Blake             ret = nbd_co_send_chunk_error(client, request, -status, msg, errp);
222137e02aebSVladimir Sementsov-Ogievskiy             g_free(msg);
222237e02aebSVladimir Sementsov-Ogievskiy             return ret;
2223418638d3SEric Blake         }
2224418638d3SEric Blake         assert(pnum && pnum <= size - progress);
2225e2de3256SEric Blake         final = progress + pnum == size;
2226418638d3SEric Blake         if (status & BDRV_BLOCK_ZERO) {
2227a7c8ed36SEric Blake             NBDReply hdr;
2228418638d3SEric Blake             NBDStructuredReadHole chunk;
2229418638d3SEric Blake             struct iovec iov[] = {
2230a7c8ed36SEric Blake                 {.iov_base = &hdr},
2231418638d3SEric Blake                 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2232418638d3SEric Blake             };
2233418638d3SEric Blake 
223422efd811SEric Blake             trace_nbd_co_send_chunk_read_hole(request->cookie,
223566d4f4feSEric Blake                                               offset + progress, pnum);
2236a7c8ed36SEric Blake             set_be_chunk(client, iov, 2,
2237a7c8ed36SEric Blake                          final ? NBD_REPLY_FLAG_DONE : 0,
223866d4f4feSEric Blake                          NBD_REPLY_TYPE_OFFSET_HOLE, request);
2239418638d3SEric Blake             stq_be_p(&chunk.offset, offset + progress);
2240418638d3SEric Blake             stl_be_p(&chunk.length, pnum);
2241a7c8ed36SEric Blake             ret = nbd_co_send_iov(client, iov, 2, errp);
2242418638d3SEric Blake         } else {
2243d2223cddSPaolo Bonzini             ret = blk_co_pread(exp->common.blk, offset + progress, pnum,
2244a9262f55SAlberto Faria                                data + progress, 0);
2245418638d3SEric Blake             if (ret < 0) {
2246418638d3SEric Blake                 error_setg_errno(errp, -ret, "reading from file failed");
2247418638d3SEric Blake                 break;
2248418638d3SEric Blake             }
224966d4f4feSEric Blake             ret = nbd_co_send_chunk_read(client, request, offset + progress,
2250a7c8ed36SEric Blake                                          data + progress, pnum, final, errp);
2251418638d3SEric Blake         }
2252418638d3SEric Blake 
2253418638d3SEric Blake         if (ret < 0) {
2254418638d3SEric Blake             break;
2255418638d3SEric Blake         }
2256418638d3SEric Blake         progress += pnum;
2257418638d3SEric Blake     }
2258418638d3SEric Blake     return ret;
2259418638d3SEric Blake }
2260418638d3SEric Blake 
226189cbc7e3SVladimir Sementsov-Ogievskiy typedef struct NBDExtentArray {
2262bcc16cc1SEric Blake     NBDExtent64 *extents;
226389cbc7e3SVladimir Sementsov-Ogievskiy     unsigned int nb_alloc;
226489cbc7e3SVladimir Sementsov-Ogievskiy     unsigned int count;
226589cbc7e3SVladimir Sementsov-Ogievskiy     uint64_t total_length;
2266bcc16cc1SEric Blake     bool extended;
226789cbc7e3SVladimir Sementsov-Ogievskiy     bool can_add;
226889cbc7e3SVladimir Sementsov-Ogievskiy     bool converted_to_be;
226989cbc7e3SVladimir Sementsov-Ogievskiy } NBDExtentArray;
2270e7b1948dSVladimir Sementsov-Ogievskiy 
nbd_extent_array_new(unsigned int nb_alloc,NBDMode mode)2271bcc16cc1SEric Blake static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc,
2272bcc16cc1SEric Blake                                             NBDMode mode)
227389cbc7e3SVladimir Sementsov-Ogievskiy {
227489cbc7e3SVladimir Sementsov-Ogievskiy     NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
227589cbc7e3SVladimir Sementsov-Ogievskiy 
2276bcc16cc1SEric Blake     assert(mode >= NBD_MODE_STRUCTURED);
227789cbc7e3SVladimir Sementsov-Ogievskiy     ea->nb_alloc = nb_alloc;
2278bcc16cc1SEric Blake     ea->extents = g_new(NBDExtent64, nb_alloc);
2279bcc16cc1SEric Blake     ea->extended = mode >= NBD_MODE_EXTENDED;
228089cbc7e3SVladimir Sementsov-Ogievskiy     ea->can_add = true;
228189cbc7e3SVladimir Sementsov-Ogievskiy 
228289cbc7e3SVladimir Sementsov-Ogievskiy     return ea;
228389cbc7e3SVladimir Sementsov-Ogievskiy }
228489cbc7e3SVladimir Sementsov-Ogievskiy 
nbd_extent_array_free(NBDExtentArray * ea)228589cbc7e3SVladimir Sementsov-Ogievskiy static void nbd_extent_array_free(NBDExtentArray *ea)
228689cbc7e3SVladimir Sementsov-Ogievskiy {
228789cbc7e3SVladimir Sementsov-Ogievskiy     g_free(ea->extents);
228889cbc7e3SVladimir Sementsov-Ogievskiy     g_free(ea);
228989cbc7e3SVladimir Sementsov-Ogievskiy }
G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray,nbd_extent_array_free)2290e0e7fe07SMarc-André Lureau G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free)
229189cbc7e3SVladimir Sementsov-Ogievskiy 
229289cbc7e3SVladimir Sementsov-Ogievskiy /* Further modifications of the array after conversion are abandoned */
229389cbc7e3SVladimir Sementsov-Ogievskiy static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
229489cbc7e3SVladimir Sementsov-Ogievskiy {
229589cbc7e3SVladimir Sementsov-Ogievskiy     int i;
229689cbc7e3SVladimir Sementsov-Ogievskiy 
229789cbc7e3SVladimir Sementsov-Ogievskiy     assert(!ea->converted_to_be);
2298bcc16cc1SEric Blake     assert(ea->extended);
229989cbc7e3SVladimir Sementsov-Ogievskiy     ea->can_add = false;
230089cbc7e3SVladimir Sementsov-Ogievskiy     ea->converted_to_be = true;
230189cbc7e3SVladimir Sementsov-Ogievskiy 
230289cbc7e3SVladimir Sementsov-Ogievskiy     for (i = 0; i < ea->count; i++) {
2303bcc16cc1SEric Blake         ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
2304bcc16cc1SEric Blake         ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
230589cbc7e3SVladimir Sementsov-Ogievskiy     }
230689cbc7e3SVladimir Sementsov-Ogievskiy }
230789cbc7e3SVladimir Sementsov-Ogievskiy 
2308bcc16cc1SEric Blake /* Further modifications of the array after conversion are abandoned */
nbd_extent_array_convert_to_narrow(NBDExtentArray * ea)2309bcc16cc1SEric Blake static NBDExtent32 *nbd_extent_array_convert_to_narrow(NBDExtentArray *ea)
2310bcc16cc1SEric Blake {
2311bcc16cc1SEric Blake     int i;
2312bcc16cc1SEric Blake     NBDExtent32 *extents = g_new(NBDExtent32, ea->count);
2313bcc16cc1SEric Blake 
2314bcc16cc1SEric Blake     assert(!ea->converted_to_be);
2315bcc16cc1SEric Blake     assert(!ea->extended);
2316bcc16cc1SEric Blake     ea->can_add = false;
2317bcc16cc1SEric Blake     ea->converted_to_be = true;
2318bcc16cc1SEric Blake 
2319bcc16cc1SEric Blake     for (i = 0; i < ea->count; i++) {
2320bcc16cc1SEric Blake         assert((ea->extents[i].length | ea->extents[i].flags) <= UINT32_MAX);
2321bcc16cc1SEric Blake         extents[i].length = cpu_to_be32(ea->extents[i].length);
2322bcc16cc1SEric Blake         extents[i].flags = cpu_to_be32(ea->extents[i].flags);
2323bcc16cc1SEric Blake     }
2324bcc16cc1SEric Blake 
2325bcc16cc1SEric Blake     return extents;
2326bcc16cc1SEric Blake }
2327bcc16cc1SEric Blake 
232889cbc7e3SVladimir Sementsov-Ogievskiy /*
232989cbc7e3SVladimir Sementsov-Ogievskiy  * Add extent to NBDExtentArray. If extent can't be added (no available space),
233089cbc7e3SVladimir Sementsov-Ogievskiy  * return -1.
233189cbc7e3SVladimir Sementsov-Ogievskiy  * For safety, when returning -1 for the first time, .can_add is set to false,
2332314b9026SEric Blake  * and further calls to nbd_extent_array_add() will crash.
2333314b9026SEric Blake  * (this avoids the situation where a caller ignores failure to add one extent,
2334314b9026SEric Blake  * where adding another extent that would squash into the last array entry
2335314b9026SEric Blake  * would result in an incorrect range reported to the client)
233689cbc7e3SVladimir Sementsov-Ogievskiy  */
nbd_extent_array_add(NBDExtentArray * ea,uint64_t length,uint32_t flags)233789cbc7e3SVladimir Sementsov-Ogievskiy static int nbd_extent_array_add(NBDExtentArray *ea,
2338bcc16cc1SEric Blake                                 uint64_t length, uint32_t flags)
233989cbc7e3SVladimir Sementsov-Ogievskiy {
234089cbc7e3SVladimir Sementsov-Ogievskiy     assert(ea->can_add);
234189cbc7e3SVladimir Sementsov-Ogievskiy 
234289cbc7e3SVladimir Sementsov-Ogievskiy     if (!length) {
234389cbc7e3SVladimir Sementsov-Ogievskiy         return 0;
234489cbc7e3SVladimir Sementsov-Ogievskiy     }
2345bcc16cc1SEric Blake     if (!ea->extended) {
2346bcc16cc1SEric Blake         assert(length <= UINT32_MAX);
2347bcc16cc1SEric Blake     }
234889cbc7e3SVladimir Sementsov-Ogievskiy 
234989cbc7e3SVladimir Sementsov-Ogievskiy     /* Extend previous extent if flags are the same */
235089cbc7e3SVladimir Sementsov-Ogievskiy     if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
2351bcc16cc1SEric Blake         uint64_t sum = length + ea->extents[ea->count - 1].length;
235289cbc7e3SVladimir Sementsov-Ogievskiy 
2353bcc16cc1SEric Blake         /*
2354bcc16cc1SEric Blake          * sum cannot overflow: the block layer bounds image size at
2355bcc16cc1SEric Blake          * 2^63, and ea->extents[].length comes from the block layer.
2356bcc16cc1SEric Blake          */
2357bcc16cc1SEric Blake         assert(sum >= length);
2358bcc16cc1SEric Blake         if (sum <= UINT32_MAX || ea->extended) {
235989cbc7e3SVladimir Sementsov-Ogievskiy             ea->extents[ea->count - 1].length = sum;
236089cbc7e3SVladimir Sementsov-Ogievskiy             ea->total_length += length;
236189cbc7e3SVladimir Sementsov-Ogievskiy             return 0;
236289cbc7e3SVladimir Sementsov-Ogievskiy         }
236389cbc7e3SVladimir Sementsov-Ogievskiy     }
236489cbc7e3SVladimir Sementsov-Ogievskiy 
236589cbc7e3SVladimir Sementsov-Ogievskiy     if (ea->count >= ea->nb_alloc) {
236689cbc7e3SVladimir Sementsov-Ogievskiy         ea->can_add = false;
236789cbc7e3SVladimir Sementsov-Ogievskiy         return -1;
236889cbc7e3SVladimir Sementsov-Ogievskiy     }
236989cbc7e3SVladimir Sementsov-Ogievskiy 
237089cbc7e3SVladimir Sementsov-Ogievskiy     ea->total_length += length;
2371bcc16cc1SEric Blake     ea->extents[ea->count] = (NBDExtent64) {.length = length, .flags = flags};
237289cbc7e3SVladimir Sementsov-Ogievskiy     ea->count++;
237389cbc7e3SVladimir Sementsov-Ogievskiy 
237489cbc7e3SVladimir Sementsov-Ogievskiy     return 0;
237589cbc7e3SVladimir Sementsov-Ogievskiy }
237689cbc7e3SVladimir Sementsov-Ogievskiy 
blockstatus_to_extents(BlockBackend * blk,uint64_t offset,uint64_t bytes,NBDExtentArray * ea)2377ff7e261bSEmanuele Giuseppe Esposito static int coroutine_fn blockstatus_to_extents(BlockBackend *blk,
23786f58ac55SEmanuele Giuseppe Esposito                                                uint64_t offset, uint64_t bytes,
23796f58ac55SEmanuele Giuseppe Esposito                                                NBDExtentArray *ea)
238089cbc7e3SVladimir Sementsov-Ogievskiy {
238189cbc7e3SVladimir Sementsov-Ogievskiy     while (bytes) {
2382e7b1948dSVladimir Sementsov-Ogievskiy         uint32_t flags;
2383e7b1948dSVladimir Sementsov-Ogievskiy         int64_t num;
2384ff7e261bSEmanuele Giuseppe Esposito         int ret = blk_co_block_status_above(blk, NULL, offset, bytes, &num,
238589cbc7e3SVladimir Sementsov-Ogievskiy                                             NULL, NULL);
2386fb7afc79SVladimir Sementsov-Ogievskiy 
2387e7b1948dSVladimir Sementsov-Ogievskiy         if (ret < 0) {
2388e7b1948dSVladimir Sementsov-Ogievskiy             return ret;
2389e7b1948dSVladimir Sementsov-Ogievskiy         }
2390e7b1948dSVladimir Sementsov-Ogievskiy 
23910da98568SNir Soffer         flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
2392e7b1948dSVladimir Sementsov-Ogievskiy                 (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
2393e7b1948dSVladimir Sementsov-Ogievskiy 
239489cbc7e3SVladimir Sementsov-Ogievskiy         if (nbd_extent_array_add(ea, num, flags) < 0) {
239589cbc7e3SVladimir Sementsov-Ogievskiy             return 0;
2396e7b1948dSVladimir Sementsov-Ogievskiy         }
2397e7b1948dSVladimir Sementsov-Ogievskiy 
23982178a569SEric Blake         offset += num;
239989cbc7e3SVladimir Sementsov-Ogievskiy         bytes -= num;
2400e7b1948dSVladimir Sementsov-Ogievskiy     }
2401e7b1948dSVladimir Sementsov-Ogievskiy 
2402e7b1948dSVladimir Sementsov-Ogievskiy     return 0;
2403e7b1948dSVladimir Sementsov-Ogievskiy }
2404e7b1948dSVladimir Sementsov-Ogievskiy 
blockalloc_to_extents(BlockBackend * blk,uint64_t offset,uint64_t bytes,NBDExtentArray * ea)2405ff7e261bSEmanuele Giuseppe Esposito static int coroutine_fn blockalloc_to_extents(BlockBackend *blk,
24066f58ac55SEmanuele Giuseppe Esposito                                               uint64_t offset, uint64_t bytes,
24076f58ac55SEmanuele Giuseppe Esposito                                               NBDExtentArray *ea)
240871719cd5SEric Blake {
240971719cd5SEric Blake     while (bytes) {
241071719cd5SEric Blake         int64_t num;
2411ff7e261bSEmanuele Giuseppe Esposito         int ret = blk_co_is_allocated_above(blk, NULL, false, offset, bytes,
241271719cd5SEric Blake                                             &num);
241371719cd5SEric Blake 
241471719cd5SEric Blake         if (ret < 0) {
241571719cd5SEric Blake             return ret;
241671719cd5SEric Blake         }
241771719cd5SEric Blake 
241871719cd5SEric Blake         if (nbd_extent_array_add(ea, num, ret) < 0) {
241971719cd5SEric Blake             return 0;
242071719cd5SEric Blake         }
242171719cd5SEric Blake 
242271719cd5SEric Blake         offset += num;
242371719cd5SEric Blake         bytes -= num;
242471719cd5SEric Blake     }
242571719cd5SEric Blake 
242671719cd5SEric Blake     return 0;
242771719cd5SEric Blake }
242871719cd5SEric Blake 
242989cbc7e3SVladimir Sementsov-Ogievskiy /*
243089cbc7e3SVladimir Sementsov-Ogievskiy  * nbd_co_send_extents
24313d068affSVladimir Sementsov-Ogievskiy  *
243289cbc7e3SVladimir Sementsov-Ogievskiy  * @ea is converted to BE by the function
243389cbc7e3SVladimir Sementsov-Ogievskiy  * @last controls whether NBD_REPLY_FLAG_DONE is sent.
24343d068affSVladimir Sementsov-Ogievskiy  */
2435d2223cddSPaolo Bonzini static int coroutine_fn
nbd_co_send_extents(NBDClient * client,NBDRequest * request,NBDExtentArray * ea,bool last,uint32_t context_id,Error ** errp)243666d4f4feSEric Blake nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea,
243789cbc7e3SVladimir Sementsov-Ogievskiy                     bool last, uint32_t context_id, Error **errp)
2438e7b1948dSVladimir Sementsov-Ogievskiy {
2439a7c8ed36SEric Blake     NBDReply hdr;
2440bcc16cc1SEric Blake     NBDStructuredMeta meta;
2441bcc16cc1SEric Blake     NBDExtendedMeta meta_ext;
2442bcc16cc1SEric Blake     g_autofree NBDExtent32 *extents = NULL;
2443bcc16cc1SEric Blake     uint16_t type;
2444bcc16cc1SEric Blake     struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} };
2445bcc16cc1SEric Blake 
2446bcc16cc1SEric Blake     if (client->mode >= NBD_MODE_EXTENDED) {
2447bcc16cc1SEric Blake         type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
2448bcc16cc1SEric Blake 
2449bcc16cc1SEric Blake         iov[1].iov_base = &meta_ext;
2450bcc16cc1SEric Blake         iov[1].iov_len = sizeof(meta_ext);
2451bcc16cc1SEric Blake         stl_be_p(&meta_ext.context_id, context_id);
2452bcc16cc1SEric Blake         stl_be_p(&meta_ext.count, ea->count);
2453e7b1948dSVladimir Sementsov-Ogievskiy 
245489cbc7e3SVladimir Sementsov-Ogievskiy         nbd_extent_array_convert_to_be(ea);
2455bcc16cc1SEric Blake         iov[2].iov_base = ea->extents;
2456bcc16cc1SEric Blake         iov[2].iov_len = ea->count * sizeof(ea->extents[0]);
2457bcc16cc1SEric Blake     } else {
2458bcc16cc1SEric Blake         type = NBD_REPLY_TYPE_BLOCK_STATUS;
2459bcc16cc1SEric Blake 
2460bcc16cc1SEric Blake         iov[1].iov_base = &meta;
2461bcc16cc1SEric Blake         iov[1].iov_len = sizeof(meta);
2462bcc16cc1SEric Blake         stl_be_p(&meta.context_id, context_id);
2463bcc16cc1SEric Blake 
2464bcc16cc1SEric Blake         extents = nbd_extent_array_convert_to_narrow(ea);
2465bcc16cc1SEric Blake         iov[2].iov_base = extents;
2466bcc16cc1SEric Blake         iov[2].iov_len = ea->count * sizeof(extents[0]);
2467bcc16cc1SEric Blake     }
246889cbc7e3SVladimir Sementsov-Ogievskiy 
246922efd811SEric Blake     trace_nbd_co_send_extents(request->cookie, ea->count, context_id,
247066d4f4feSEric Blake                               ea->total_length, last);
2471bcc16cc1SEric Blake     set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, type,
2472bcc16cc1SEric Blake                  request);
2473e7b1948dSVladimir Sementsov-Ogievskiy 
2474a7c8ed36SEric Blake     return nbd_co_send_iov(client, iov, 3, errp);
2475e7b1948dSVladimir Sementsov-Ogievskiy }
2476e7b1948dSVladimir Sementsov-Ogievskiy 
2477e7b1948dSVladimir Sementsov-Ogievskiy /* Get block status from the exported device and send it to the client */
24786f58ac55SEmanuele Giuseppe Esposito static int
nbd_co_send_block_status(NBDClient * client,NBDRequest * request,BlockBackend * blk,uint64_t offset,uint64_t length,bool dont_fragment,bool last,uint32_t context_id,Error ** errp)247966d4f4feSEric Blake coroutine_fn nbd_co_send_block_status(NBDClient *client, NBDRequest *request,
2480ff7e261bSEmanuele Giuseppe Esposito                                       BlockBackend *blk, uint64_t offset,
2481bcc16cc1SEric Blake                                       uint64_t length, bool dont_fragment,
2482fb7afc79SVladimir Sementsov-Ogievskiy                                       bool last, uint32_t context_id,
2483fb7afc79SVladimir Sementsov-Ogievskiy                                       Error **errp)
2484e7b1948dSVladimir Sementsov-Ogievskiy {
2485e7b1948dSVladimir Sementsov-Ogievskiy     int ret;
2486416e34bdSEric Blake     unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2487bcc16cc1SEric Blake     g_autoptr(NBDExtentArray) ea =
2488bcc16cc1SEric Blake         nbd_extent_array_new(nb_extents, client->mode);
2489e7b1948dSVladimir Sementsov-Ogievskiy 
249071719cd5SEric Blake     if (context_id == NBD_META_ID_BASE_ALLOCATION) {
2491ff7e261bSEmanuele Giuseppe Esposito         ret = blockstatus_to_extents(blk, offset, length, ea);
249271719cd5SEric Blake     } else {
2493ff7e261bSEmanuele Giuseppe Esposito         ret = blockalloc_to_extents(blk, offset, length, ea);
249471719cd5SEric Blake     }
2495e7b1948dSVladimir Sementsov-Ogievskiy     if (ret < 0) {
249666d4f4feSEric Blake         return nbd_co_send_chunk_error(client, request, -ret,
2497a7c8ed36SEric Blake                                        "can't get block status", errp);
2498e7b1948dSVladimir Sementsov-Ogievskiy     }
2499e7b1948dSVladimir Sementsov-Ogievskiy 
250066d4f4feSEric Blake     return nbd_co_send_extents(client, request, ea, last, context_id, errp);
25013d068affSVladimir Sementsov-Ogievskiy }
25023d068affSVladimir Sementsov-Ogievskiy 
2503dacbb6ebSVladimir Sementsov-Ogievskiy /* Populate @ea from a dirty bitmap. */
bitmap_to_extents(BdrvDirtyBitmap * bitmap,uint64_t offset,uint64_t length,NBDExtentArray * es)250489cbc7e3SVladimir Sementsov-Ogievskiy static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
250589cbc7e3SVladimir Sementsov-Ogievskiy                               uint64_t offset, uint64_t length,
2506dacbb6ebSVladimir Sementsov-Ogievskiy                               NBDExtentArray *es)
25073d068affSVladimir Sementsov-Ogievskiy {
2508dacbb6ebSVladimir Sementsov-Ogievskiy     int64_t start, dirty_start, dirty_count;
2509dacbb6ebSVladimir Sementsov-Ogievskiy     int64_t end = offset + length;
2510dacbb6ebSVladimir Sementsov-Ogievskiy     bool full = false;
2511bcc16cc1SEric Blake     int64_t bound = es->extended ? INT64_MAX : INT32_MAX;
25123d068affSVladimir Sementsov-Ogievskiy 
25133d068affSVladimir Sementsov-Ogievskiy     bdrv_dirty_bitmap_lock(bitmap);
25143d068affSVladimir Sementsov-Ogievskiy 
2515dacbb6ebSVladimir Sementsov-Ogievskiy     for (start = offset;
2516bcc16cc1SEric Blake          bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, bound,
2517dacbb6ebSVladimir Sementsov-Ogievskiy                                            &dirty_start, &dirty_count);
2518dacbb6ebSVladimir Sementsov-Ogievskiy          start = dirty_start + dirty_count)
2519dacbb6ebSVladimir Sementsov-Ogievskiy     {
2520dacbb6ebSVladimir Sementsov-Ogievskiy         if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
2521dacbb6ebSVladimir Sementsov-Ogievskiy             (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
2522dacbb6ebSVladimir Sementsov-Ogievskiy         {
2523dacbb6ebSVladimir Sementsov-Ogievskiy             full = true;
252489cbc7e3SVladimir Sementsov-Ogievskiy             break;
252589cbc7e3SVladimir Sementsov-Ogievskiy         }
25263d068affSVladimir Sementsov-Ogievskiy     }
25273d068affSVladimir Sementsov-Ogievskiy 
2528dacbb6ebSVladimir Sementsov-Ogievskiy     if (!full) {
2529c0b21f2eSEric Blake         /* last non dirty extent, nothing to do if array is now full */
2530c0b21f2eSEric Blake         (void) nbd_extent_array_add(es, end - start, 0);
2531dacbb6ebSVladimir Sementsov-Ogievskiy     }
25323d068affSVladimir Sementsov-Ogievskiy 
25333d068affSVladimir Sementsov-Ogievskiy     bdrv_dirty_bitmap_unlock(bitmap);
25343d068affSVladimir Sementsov-Ogievskiy }
25353d068affSVladimir Sementsov-Ogievskiy 
nbd_co_send_bitmap(NBDClient * client,NBDRequest * request,BdrvDirtyBitmap * bitmap,uint64_t offset,uint64_t length,bool dont_fragment,bool last,uint32_t context_id,Error ** errp)253666d4f4feSEric Blake static int coroutine_fn nbd_co_send_bitmap(NBDClient *client,
253766d4f4feSEric Blake                                            NBDRequest *request,
253866d4f4feSEric Blake                                            BdrvDirtyBitmap *bitmap,
253966d4f4feSEric Blake                                            uint64_t offset,
2540bcc16cc1SEric Blake                                            uint64_t length, bool dont_fragment,
254166d4f4feSEric Blake                                            bool last, uint32_t context_id,
254266d4f4feSEric Blake                                            Error **errp)
25433d068affSVladimir Sementsov-Ogievskiy {
2544416e34bdSEric Blake     unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2545bcc16cc1SEric Blake     g_autoptr(NBDExtentArray) ea =
2546bcc16cc1SEric Blake         nbd_extent_array_new(nb_extents, client->mode);
25473d068affSVladimir Sementsov-Ogievskiy 
2548dacbb6ebSVladimir Sementsov-Ogievskiy     bitmap_to_extents(bitmap, offset, length, ea);
25493d068affSVladimir Sementsov-Ogievskiy 
255066d4f4feSEric Blake     return nbd_co_send_extents(client, request, ea, last, context_id, errp);
2551e7b1948dSVladimir Sementsov-Ogievskiy }
2552e7b1948dSVladimir Sementsov-Ogievskiy 
25532dcbb11bSEric Blake /*
25542dcbb11bSEric Blake  * nbd_co_block_status_payload_read
25552dcbb11bSEric Blake  * Called when a client wants a subset of negotiated contexts via a
25562dcbb11bSEric Blake  * BLOCK_STATUS payload.  Check the payload for valid length and
25572dcbb11bSEric Blake  * contents.  On success, return 0 with request updated to effective
25582dcbb11bSEric Blake  * length.  If request was invalid but all payload consumed, return 0
25592dcbb11bSEric Blake  * with request->len and request->contexts->count set to 0 (which will
25602dcbb11bSEric Blake  * trigger an appropriate NBD_EINVAL response later on).  Return
25612dcbb11bSEric Blake  * negative errno if the payload was not fully consumed.
25622dcbb11bSEric Blake  */
25632dcbb11bSEric Blake static int
nbd_co_block_status_payload_read(NBDClient * client,NBDRequest * request,Error ** errp)25642dcbb11bSEric Blake nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
25652dcbb11bSEric Blake                                  Error **errp)
25662dcbb11bSEric Blake {
25672dcbb11bSEric Blake     uint64_t payload_len = request->len;
25682dcbb11bSEric Blake     g_autofree char *buf = NULL;
25692dcbb11bSEric Blake     size_t count, i, nr_bitmaps;
25702dcbb11bSEric Blake     uint32_t id;
25712dcbb11bSEric Blake 
25722dcbb11bSEric Blake     if (payload_len > NBD_MAX_BUFFER_SIZE) {
25732dcbb11bSEric Blake         error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
25742dcbb11bSEric Blake                    request->len, NBD_MAX_BUFFER_SIZE);
25752dcbb11bSEric Blake         return -EINVAL;
25762dcbb11bSEric Blake     }
25772dcbb11bSEric Blake 
25782dcbb11bSEric Blake     assert(client->contexts.exp == client->exp);
25792dcbb11bSEric Blake     nr_bitmaps = client->exp->nr_export_bitmaps;
25802dcbb11bSEric Blake     request->contexts = g_new0(NBDMetaContexts, 1);
25812dcbb11bSEric Blake     request->contexts->exp = client->exp;
25822dcbb11bSEric Blake 
25832dcbb11bSEric Blake     if (payload_len % sizeof(uint32_t) ||
25842dcbb11bSEric Blake         payload_len < sizeof(NBDBlockStatusPayload) ||
25852dcbb11bSEric Blake         payload_len > (sizeof(NBDBlockStatusPayload) +
25862dcbb11bSEric Blake                        sizeof(id) * client->contexts.count)) {
25872dcbb11bSEric Blake         goto skip;
25882dcbb11bSEric Blake     }
25892dcbb11bSEric Blake 
25902dcbb11bSEric Blake     buf = g_malloc(payload_len);
25912dcbb11bSEric Blake     if (nbd_read(client->ioc, buf, payload_len,
25922dcbb11bSEric Blake                  "CMD_BLOCK_STATUS data", errp) < 0) {
25932dcbb11bSEric Blake         return -EIO;
25942dcbb11bSEric Blake     }
25952dcbb11bSEric Blake     trace_nbd_co_receive_request_payload_received(request->cookie,
25962dcbb11bSEric Blake                                                   payload_len);
25972dcbb11bSEric Blake     request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
25982dcbb11bSEric Blake     count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
25992dcbb11bSEric Blake     payload_len = 0;
26002dcbb11bSEric Blake 
26012dcbb11bSEric Blake     for (i = 0; i < count; i++) {
26022dcbb11bSEric Blake         id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
26032dcbb11bSEric Blake         if (id == NBD_META_ID_BASE_ALLOCATION) {
26042dcbb11bSEric Blake             if (!client->contexts.base_allocation ||
26052dcbb11bSEric Blake                 request->contexts->base_allocation) {
26062dcbb11bSEric Blake                 goto skip;
26072dcbb11bSEric Blake             }
26082dcbb11bSEric Blake             request->contexts->base_allocation = true;
26092dcbb11bSEric Blake         } else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
26102dcbb11bSEric Blake             if (!client->contexts.allocation_depth ||
26112dcbb11bSEric Blake                 request->contexts->allocation_depth) {
26122dcbb11bSEric Blake                 goto skip;
26132dcbb11bSEric Blake             }
26142dcbb11bSEric Blake             request->contexts->allocation_depth = true;
26152dcbb11bSEric Blake         } else {
26162dcbb11bSEric Blake             unsigned idx = id - NBD_META_ID_DIRTY_BITMAP;
26172dcbb11bSEric Blake 
26182dcbb11bSEric Blake             if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] ||
26192dcbb11bSEric Blake                 request->contexts->bitmaps[idx]) {
26202dcbb11bSEric Blake                 goto skip;
26212dcbb11bSEric Blake             }
26222dcbb11bSEric Blake             request->contexts->bitmaps[idx] = true;
26232dcbb11bSEric Blake         }
26242dcbb11bSEric Blake     }
26252dcbb11bSEric Blake 
26262dcbb11bSEric Blake     request->len = ldq_be_p(buf);
26272dcbb11bSEric Blake     request->contexts->count = count;
26282dcbb11bSEric Blake     return 0;
26292dcbb11bSEric Blake 
26302dcbb11bSEric Blake  skip:
26312dcbb11bSEric Blake     trace_nbd_co_receive_block_status_payload_compliance(request->from,
26322dcbb11bSEric Blake                                                          request->len);
26332dcbb11bSEric Blake     request->len = request->contexts->count = 0;
26342dcbb11bSEric Blake     return nbd_drop(client->ioc, payload_len, errp);
26352dcbb11bSEric Blake }
26362dcbb11bSEric Blake 
26372a6e128bSVladimir Sementsov-Ogievskiy /* nbd_co_receive_request
26382a6e128bSVladimir Sementsov-Ogievskiy  * Collect a client request. Return 0 if request looks valid, -EIO to drop
2639f148ae7dSSergio Lopez  * connection right away, -EAGAIN to indicate we were interrupted and the
2640f148ae7dSSergio Lopez  * channel should be quiesced, and any other negative value to report an error
2641f148ae7dSSergio Lopez  * to the client (although the caller may still need to disconnect after
2642f148ae7dSSergio Lopez  * reporting the error).
26432a6e128bSVladimir Sementsov-Ogievskiy  */
nbd_co_receive_request(NBDRequestData * req,NBDRequest * request,Error ** errp)26448db7e2d6SEric Blake static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
26458db7e2d6SEric Blake                                                NBDRequest *request,
26462fd2c840SVladimir Sementsov-Ogievskiy                                                Error **errp)
2647798bfe00SFam Zheng {
2648798bfe00SFam Zheng     NBDClient *client = req->client;
2649009cd866SEric Blake     bool extended_with_payload;
26508db7e2d6SEric Blake     bool check_length = false;
26518db7e2d6SEric Blake     bool check_rofs = false;
26528db7e2d6SEric Blake     bool allocate_buffer = false;
2653009cd866SEric Blake     bool payload_okay = false;
2654009cd866SEric Blake     uint64_t payload_len = 0;
26558db7e2d6SEric Blake     int valid_flags = NBD_CMD_FLAG_FUA;
2656f148ae7dSSergio Lopez     int ret;
2657798bfe00SFam Zheng 
26581c778ef7SDaniel P. Berrange     g_assert(qemu_in_coroutine());
2659f148ae7dSSergio Lopez     ret = nbd_receive_request(client, request, errp);
2660f148ae7dSSergio Lopez     if (ret < 0) {
2661f148ae7dSSergio Lopez         return ret;
2662798bfe00SFam Zheng     }
2663798bfe00SFam Zheng 
266422efd811SEric Blake     trace_nbd_co_receive_request_decode_type(request->cookie, request->type,
26653736cc5bSEric Blake                                              nbd_cmd_lookup(request->type));
2666009cd866SEric Blake     extended_with_payload = client->mode >= NBD_MODE_EXTENDED &&
2667009cd866SEric Blake         request->flags & NBD_CMD_FLAG_PAYLOAD_LEN;
2668009cd866SEric Blake     if (extended_with_payload) {
2669009cd866SEric Blake         payload_len = request->len;
2670009cd866SEric Blake         check_length = true;
2671009cd866SEric Blake     }
2672009cd866SEric Blake 
26738db7e2d6SEric Blake     switch (request->type) {
26748db7e2d6SEric Blake     case NBD_CMD_DISC:
267529b6c3b3SEric Blake         /* Special case: we're going to disconnect without a reply,
267629b6c3b3SEric Blake          * whether or not flags, from, or len are bogus */
26778db7e2d6SEric Blake         req->complete = true;
2678ee898b87SVladimir Sementsov-Ogievskiy         return -EIO;
26798db7e2d6SEric Blake 
26808db7e2d6SEric Blake     case NBD_CMD_READ:
26818db7e2d6SEric Blake         if (client->mode >= NBD_MODE_STRUCTURED) {
26828db7e2d6SEric Blake             valid_flags |= NBD_CMD_FLAG_DF;
26838db7e2d6SEric Blake         }
26848db7e2d6SEric Blake         check_length = true;
26858db7e2d6SEric Blake         allocate_buffer = true;
26868db7e2d6SEric Blake         break;
26878db7e2d6SEric Blake 
26888db7e2d6SEric Blake     case NBD_CMD_WRITE:
2689009cd866SEric Blake         if (client->mode >= NBD_MODE_EXTENDED) {
2690009cd866SEric Blake             if (!extended_with_payload) {
2691009cd866SEric Blake                 /* The client is noncompliant. Trace it, but proceed. */
2692009cd866SEric Blake                 trace_nbd_co_receive_ext_payload_compliance(request->from,
2693009cd866SEric Blake                                                             request->len);
2694009cd866SEric Blake             }
2695009cd866SEric Blake             valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
2696009cd866SEric Blake         }
2697009cd866SEric Blake         payload_okay = true;
26988db7e2d6SEric Blake         payload_len = request->len;
26998db7e2d6SEric Blake         check_length = true;
27008db7e2d6SEric Blake         allocate_buffer = true;
27018db7e2d6SEric Blake         check_rofs = true;
27028db7e2d6SEric Blake         break;
27038db7e2d6SEric Blake 
27048db7e2d6SEric Blake     case NBD_CMD_FLUSH:
27058db7e2d6SEric Blake         break;
27068db7e2d6SEric Blake 
27078db7e2d6SEric Blake     case NBD_CMD_TRIM:
27088db7e2d6SEric Blake         check_rofs = true;
27098db7e2d6SEric Blake         break;
27108db7e2d6SEric Blake 
27118db7e2d6SEric Blake     case NBD_CMD_CACHE:
27128db7e2d6SEric Blake         check_length = true;
27138db7e2d6SEric Blake         break;
27148db7e2d6SEric Blake 
27158db7e2d6SEric Blake     case NBD_CMD_WRITE_ZEROES:
27168db7e2d6SEric Blake         valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
27178db7e2d6SEric Blake         check_rofs = true;
27188db7e2d6SEric Blake         break;
27198db7e2d6SEric Blake 
27208db7e2d6SEric Blake     case NBD_CMD_BLOCK_STATUS:
27212dcbb11bSEric Blake         if (extended_with_payload) {
27222dcbb11bSEric Blake             ret = nbd_co_block_status_payload_read(client, request, errp);
27232dcbb11bSEric Blake             if (ret < 0) {
27242dcbb11bSEric Blake                 return ret;
27252dcbb11bSEric Blake             }
27262dcbb11bSEric Blake             /* payload now consumed */
27272dcbb11bSEric Blake             check_length = false;
27282dcbb11bSEric Blake             payload_len = 0;
27292dcbb11bSEric Blake             valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
27302dcbb11bSEric Blake         } else {
27311dec4643SEric Blake             request->contexts = &client->contexts;
27322dcbb11bSEric Blake         }
27338db7e2d6SEric Blake         valid_flags |= NBD_CMD_FLAG_REQ_ONE;
27348db7e2d6SEric Blake         break;
27358db7e2d6SEric Blake 
27368db7e2d6SEric Blake     default:
27378db7e2d6SEric Blake         /* Unrecognized, will fail later */
27388db7e2d6SEric Blake         ;
273929b6c3b3SEric Blake     }
274029b6c3b3SEric Blake 
27418db7e2d6SEric Blake     /* Payload and buffer handling. */
27428db7e2d6SEric Blake     if (!payload_len) {
27438db7e2d6SEric Blake         req->complete = true;
27448db7e2d6SEric Blake     }
27458db7e2d6SEric Blake     if (check_length && request->len > NBD_MAX_BUFFER_SIZE) {
27468db7e2d6SEric Blake         /* READ, WRITE, CACHE */
2747b2578459SEric Blake         error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
2748eb38c3b6SPaolo Bonzini                    request->len, NBD_MAX_BUFFER_SIZE);
2749ee898b87SVladimir Sementsov-Ogievskiy         return -EINVAL;
2750eb38c3b6SPaolo Bonzini     }
2751009cd866SEric Blake     if (payload_len && !payload_okay) {
2752009cd866SEric Blake         /*
2753009cd866SEric Blake          * For now, we don't support payloads on other commands; but
2754009cd866SEric Blake          * we can keep the connection alive by ignoring the payload.
2755009cd866SEric Blake          * We will fail the command later with NBD_EINVAL for the use
2756009cd866SEric Blake          * of an unsupported flag (and not for access beyond bounds).
2757009cd866SEric Blake          */
2758009cd866SEric Blake         assert(request->type != NBD_CMD_WRITE);
2759009cd866SEric Blake         request->len = 0;
2760009cd866SEric Blake     }
27618db7e2d6SEric Blake     if (allocate_buffer) {
27628db7e2d6SEric Blake         /* READ, WRITE */
276337a4f70cSKevin Wolf         req->data = blk_try_blockalign(client->exp->common.blk,
276437a4f70cSKevin Wolf                                        request->len);
2765f1c17521SPaolo Bonzini         if (req->data == NULL) {
27662fd2c840SVladimir Sementsov-Ogievskiy             error_setg(errp, "No memory");
2767ee898b87SVladimir Sementsov-Ogievskiy             return -ENOMEM;
2768f1c17521SPaolo Bonzini         }
2769798bfe00SFam Zheng     }
27708db7e2d6SEric Blake     if (payload_len) {
2771009cd866SEric Blake         if (payload_okay) {
27728db7e2d6SEric Blake             /* WRITE */
27738db7e2d6SEric Blake             assert(req->data);
27748db7e2d6SEric Blake             ret = nbd_read(client->ioc, req->data, payload_len,
27758db7e2d6SEric Blake                            "CMD_WRITE data", errp);
2776009cd866SEric Blake         } else {
2777009cd866SEric Blake             ret = nbd_drop(client->ioc, payload_len, errp);
2778009cd866SEric Blake         }
27798db7e2d6SEric Blake         if (ret < 0) {
2780ee898b87SVladimir Sementsov-Ogievskiy             return -EIO;
2781798bfe00SFam Zheng         }
278229b6c3b3SEric Blake         req->complete = true;
278322efd811SEric Blake         trace_nbd_co_receive_request_payload_received(request->cookie,
27848db7e2d6SEric Blake                                                       payload_len);
2785798bfe00SFam Zheng     }
278629b6c3b3SEric Blake 
2787fed5f8f8SEric Blake     /* Sanity checks. */
27888db7e2d6SEric Blake     if (client->exp->nbdflags & NBD_FLAG_READ_ONLY && check_rofs) {
27898db7e2d6SEric Blake         /* WRITE, TRIM, WRITE_ZEROES */
2790fed5f8f8SEric Blake         error_setg(errp, "Export is read-only");
2791fed5f8f8SEric Blake         return -EROFS;
2792fed5f8f8SEric Blake     }
2793fed5f8f8SEric Blake     if (request->from > client->exp->size ||
27949d26dfcbSEric Blake         request->len > client->exp->size - request->from) {
2795b2578459SEric Blake         error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu64
279629b6c3b3SEric Blake                    ", Size: %" PRIu64, request->from, request->len,
27979d26dfcbSEric Blake                    client->exp->size);
2798fed5f8f8SEric Blake         return (request->type == NBD_CMD_WRITE ||
2799fed5f8f8SEric Blake                 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
280029b6c3b3SEric Blake     }
28016e280648SEric Blake     if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
28026e280648SEric Blake                                                 client->check_align)) {
28036e280648SEric Blake         /*
28046e280648SEric Blake          * The block layer gracefully handles unaligned requests, but
28056e280648SEric Blake          * it's still worth tracing client non-compliance
28066e280648SEric Blake          */
28076e280648SEric Blake         trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
28086e280648SEric Blake                                               request->from,
28096e280648SEric Blake                                               request->len,
28106e280648SEric Blake                                               client->check_align);
28116e280648SEric Blake     }
28125c54e7faSVladimir Sementsov-Ogievskiy     if (request->flags & ~valid_flags) {
28135c54e7faSVladimir Sementsov-Ogievskiy         error_setg(errp, "unsupported flags for command %s (got 0x%x)",
28145c54e7faSVladimir Sementsov-Ogievskiy                    nbd_cmd_lookup(request->type), request->flags);
2815ee898b87SVladimir Sementsov-Ogievskiy         return -EINVAL;
28161f4d6d18SEric Blake     }
281729b6c3b3SEric Blake 
2818ee898b87SVladimir Sementsov-Ogievskiy     return 0;
2819798bfe00SFam Zheng }
2820798bfe00SFam Zheng 
28216a417599SVladimir Sementsov-Ogievskiy /* Send simple reply without a payload, or a structured error
28226a417599SVladimir Sementsov-Ogievskiy  * @error_msg is ignored if @ret >= 0
28236a417599SVladimir Sementsov-Ogievskiy  * Returns 0 if connection is still live, -errno on failure to talk to client
28246a417599SVladimir Sementsov-Ogievskiy  */
nbd_send_generic_reply(NBDClient * client,NBDRequest * request,int ret,const char * error_msg,Error ** errp)28256a417599SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
282666d4f4feSEric Blake                                                NBDRequest *request,
28276a417599SVladimir Sementsov-Ogievskiy                                                int ret,
28286a417599SVladimir Sementsov-Ogievskiy                                                const char *error_msg,
28296a417599SVladimir Sementsov-Ogievskiy                                                Error **errp)
28306a417599SVladimir Sementsov-Ogievskiy {
2831ac132d05SEric Blake     if (client->mode >= NBD_MODE_STRUCTURED && ret < 0) {
283266d4f4feSEric Blake         return nbd_co_send_chunk_error(client, request, -ret, error_msg, errp);
283311d3355fSEric Blake     } else if (client->mode >= NBD_MODE_EXTENDED) {
283411d3355fSEric Blake         return nbd_co_send_chunk_done(client, request, errp);
28356a417599SVladimir Sementsov-Ogievskiy     } else {
283666d4f4feSEric Blake         return nbd_co_send_simple_reply(client, request, ret < 0 ? -ret : 0,
28376a417599SVladimir Sementsov-Ogievskiy                                         NULL, 0, errp);
28386a417599SVladimir Sementsov-Ogievskiy     }
28396a417599SVladimir Sementsov-Ogievskiy }
28406a417599SVladimir Sementsov-Ogievskiy 
28416a417599SVladimir Sementsov-Ogievskiy /* Handle NBD_CMD_READ request.
28426a417599SVladimir Sementsov-Ogievskiy  * Return -errno if sending fails. Other errors are reported directly to the
28436a417599SVladimir Sementsov-Ogievskiy  * client as an error reply. */
nbd_do_cmd_read(NBDClient * client,NBDRequest * request,uint8_t * data,Error ** errp)28446a417599SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
28456a417599SVladimir Sementsov-Ogievskiy                                         uint8_t *data, Error **errp)
28466a417599SVladimir Sementsov-Ogievskiy {
28476a417599SVladimir Sementsov-Ogievskiy     int ret;
28486a417599SVladimir Sementsov-Ogievskiy     NBDExport *exp = client->exp;
28496a417599SVladimir Sementsov-Ogievskiy 
28507fa5c565SVladimir Sementsov-Ogievskiy     assert(request->type == NBD_CMD_READ);
2851b2578459SEric Blake     assert(request->len <= NBD_MAX_BUFFER_SIZE);
28526a417599SVladimir Sementsov-Ogievskiy 
28536a417599SVladimir Sementsov-Ogievskiy     /* XXX: NBD Protocol only documents use of FUA with WRITE */
28546a417599SVladimir Sementsov-Ogievskiy     if (request->flags & NBD_CMD_FLAG_FUA) {
285537a4f70cSKevin Wolf         ret = blk_co_flush(exp->common.blk);
28566a417599SVladimir Sementsov-Ogievskiy         if (ret < 0) {
285766d4f4feSEric Blake             return nbd_send_generic_reply(client, request, ret,
28586a417599SVladimir Sementsov-Ogievskiy                                           "flush failed", errp);
28596a417599SVladimir Sementsov-Ogievskiy         }
28606a417599SVladimir Sementsov-Ogievskiy     }
28616a417599SVladimir Sementsov-Ogievskiy 
2862ac132d05SEric Blake     if (client->mode >= NBD_MODE_STRUCTURED &&
2863ac132d05SEric Blake         !(request->flags & NBD_CMD_FLAG_DF) && request->len)
28642f454defSVladimir Sementsov-Ogievskiy     {
286566d4f4feSEric Blake         return nbd_co_send_sparse_read(client, request, request->from,
28666a417599SVladimir Sementsov-Ogievskiy                                        data, request->len, errp);
28676a417599SVladimir Sementsov-Ogievskiy     }
28686a417599SVladimir Sementsov-Ogievskiy 
2869d2223cddSPaolo Bonzini     ret = blk_co_pread(exp->common.blk, request->from, request->len, data, 0);
28707fa5c565SVladimir Sementsov-Ogievskiy     if (ret < 0) {
287166d4f4feSEric Blake         return nbd_send_generic_reply(client, request, ret,
28726a417599SVladimir Sementsov-Ogievskiy                                       "reading from file failed", errp);
28736a417599SVladimir Sementsov-Ogievskiy     }
28746a417599SVladimir Sementsov-Ogievskiy 
2875ac132d05SEric Blake     if (client->mode >= NBD_MODE_STRUCTURED) {
28766a417599SVladimir Sementsov-Ogievskiy         if (request->len) {
287766d4f4feSEric Blake             return nbd_co_send_chunk_read(client, request, request->from, data,
28786a417599SVladimir Sementsov-Ogievskiy                                           request->len, true, errp);
28796a417599SVladimir Sementsov-Ogievskiy         } else {
288066d4f4feSEric Blake             return nbd_co_send_chunk_done(client, request, errp);
28816a417599SVladimir Sementsov-Ogievskiy         }
28826a417599SVladimir Sementsov-Ogievskiy     } else {
288366d4f4feSEric Blake         return nbd_co_send_simple_reply(client, request, 0,
28846a417599SVladimir Sementsov-Ogievskiy                                         data, request->len, errp);
28856a417599SVladimir Sementsov-Ogievskiy     }
28866a417599SVladimir Sementsov-Ogievskiy }
28876a417599SVladimir Sementsov-Ogievskiy 
28887fa5c565SVladimir Sementsov-Ogievskiy /*
28897fa5c565SVladimir Sementsov-Ogievskiy  * nbd_do_cmd_cache
28907fa5c565SVladimir Sementsov-Ogievskiy  *
28917fa5c565SVladimir Sementsov-Ogievskiy  * Handle NBD_CMD_CACHE request.
28927fa5c565SVladimir Sementsov-Ogievskiy  * Return -errno if sending fails. Other errors are reported directly to the
28937fa5c565SVladimir Sementsov-Ogievskiy  * client as an error reply.
28947fa5c565SVladimir Sementsov-Ogievskiy  */
nbd_do_cmd_cache(NBDClient * client,NBDRequest * request,Error ** errp)28957fa5c565SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
28967fa5c565SVladimir Sementsov-Ogievskiy                                          Error **errp)
28977fa5c565SVladimir Sementsov-Ogievskiy {
28987fa5c565SVladimir Sementsov-Ogievskiy     int ret;
28997fa5c565SVladimir Sementsov-Ogievskiy     NBDExport *exp = client->exp;
29007fa5c565SVladimir Sementsov-Ogievskiy 
29017fa5c565SVladimir Sementsov-Ogievskiy     assert(request->type == NBD_CMD_CACHE);
2902b2578459SEric Blake     assert(request->len <= NBD_MAX_BUFFER_SIZE);
29037fa5c565SVladimir Sementsov-Ogievskiy 
290437a4f70cSKevin Wolf     ret = blk_co_preadv(exp->common.blk, request->from, request->len,
29057fa5c565SVladimir Sementsov-Ogievskiy                         NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
29067fa5c565SVladimir Sementsov-Ogievskiy 
290766d4f4feSEric Blake     return nbd_send_generic_reply(client, request, ret,
29087fa5c565SVladimir Sementsov-Ogievskiy                                   "caching data failed", errp);
29097fa5c565SVladimir Sementsov-Ogievskiy }
29107fa5c565SVladimir Sementsov-Ogievskiy 
29116f302e60SVladimir Sementsov-Ogievskiy /* Handle NBD request.
29126f302e60SVladimir Sementsov-Ogievskiy  * Return -errno if sending fails. Other errors are reported directly to the
29136f302e60SVladimir Sementsov-Ogievskiy  * client as an error reply. */
nbd_handle_request(NBDClient * client,NBDRequest * request,uint8_t * data,Error ** errp)29146f302e60SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_handle_request(NBDClient *client,
29156f302e60SVladimir Sementsov-Ogievskiy                                            NBDRequest *request,
29166f302e60SVladimir Sementsov-Ogievskiy                                            uint8_t *data, Error **errp)
29176f302e60SVladimir Sementsov-Ogievskiy {
29186f302e60SVladimir Sementsov-Ogievskiy     int ret;
29196f302e60SVladimir Sementsov-Ogievskiy     int flags;
29206f302e60SVladimir Sementsov-Ogievskiy     NBDExport *exp = client->exp;
29216f302e60SVladimir Sementsov-Ogievskiy     char *msg;
29223b1f244cSEric Blake     size_t i;
29236f302e60SVladimir Sementsov-Ogievskiy 
29246f302e60SVladimir Sementsov-Ogievskiy     switch (request->type) {
2925bc37b06aSVladimir Sementsov-Ogievskiy     case NBD_CMD_CACHE:
29267fa5c565SVladimir Sementsov-Ogievskiy         return nbd_do_cmd_cache(client, request, errp);
29277fa5c565SVladimir Sementsov-Ogievskiy 
29287fa5c565SVladimir Sementsov-Ogievskiy     case NBD_CMD_READ:
29296f302e60SVladimir Sementsov-Ogievskiy         return nbd_do_cmd_read(client, request, data, errp);
29306f302e60SVladimir Sementsov-Ogievskiy 
29316f302e60SVladimir Sementsov-Ogievskiy     case NBD_CMD_WRITE:
29326f302e60SVladimir Sementsov-Ogievskiy         flags = 0;
29336f302e60SVladimir Sementsov-Ogievskiy         if (request->flags & NBD_CMD_FLAG_FUA) {
29346f302e60SVladimir Sementsov-Ogievskiy             flags |= BDRV_REQ_FUA;
29356f302e60SVladimir Sementsov-Ogievskiy         }
2936b2578459SEric Blake         assert(request->len <= NBD_MAX_BUFFER_SIZE);
2937d2223cddSPaolo Bonzini         ret = blk_co_pwrite(exp->common.blk, request->from, request->len, data,
293837a4f70cSKevin Wolf                             flags);
293966d4f4feSEric Blake         return nbd_send_generic_reply(client, request, ret,
29406f302e60SVladimir Sementsov-Ogievskiy                                       "writing to file failed", errp);
29416f302e60SVladimir Sementsov-Ogievskiy 
29426f302e60SVladimir Sementsov-Ogievskiy     case NBD_CMD_WRITE_ZEROES:
29436f302e60SVladimir Sementsov-Ogievskiy         flags = 0;
29446f302e60SVladimir Sementsov-Ogievskiy         if (request->flags & NBD_CMD_FLAG_FUA) {
29456f302e60SVladimir Sementsov-Ogievskiy             flags |= BDRV_REQ_FUA;
29466f302e60SVladimir Sementsov-Ogievskiy         }
29476f302e60SVladimir Sementsov-Ogievskiy         if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
29486f302e60SVladimir Sementsov-Ogievskiy             flags |= BDRV_REQ_MAY_UNMAP;
29496f302e60SVladimir Sementsov-Ogievskiy         }
2950b491dbb7SEric Blake         if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
2951b491dbb7SEric Blake             flags |= BDRV_REQ_NO_FALLBACK;
2952b491dbb7SEric Blake         }
2953d2223cddSPaolo Bonzini         ret = blk_co_pwrite_zeroes(exp->common.blk, request->from, request->len,
2954e3557422SEric Blake                                    flags);
295566d4f4feSEric Blake         return nbd_send_generic_reply(client, request, ret,
29566f302e60SVladimir Sementsov-Ogievskiy                                       "writing to file failed", errp);
29576f302e60SVladimir Sementsov-Ogievskiy 
29586f302e60SVladimir Sementsov-Ogievskiy     case NBD_CMD_DISC:
29596f302e60SVladimir Sementsov-Ogievskiy         /* unreachable, thanks to special case in nbd_co_receive_request() */
29606f302e60SVladimir Sementsov-Ogievskiy         abort();
29616f302e60SVladimir Sementsov-Ogievskiy 
29626f302e60SVladimir Sementsov-Ogievskiy     case NBD_CMD_FLUSH:
296337a4f70cSKevin Wolf         ret = blk_co_flush(exp->common.blk);
296466d4f4feSEric Blake         return nbd_send_generic_reply(client, request, ret,
29656f302e60SVladimir Sementsov-Ogievskiy                                       "flush failed", errp);
29666f302e60SVladimir Sementsov-Ogievskiy 
29676f302e60SVladimir Sementsov-Ogievskiy     case NBD_CMD_TRIM:
2968e3557422SEric Blake         ret = blk_co_pdiscard(exp->common.blk, request->from, request->len);
2969890cbccbSEric Blake         if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
297037a4f70cSKevin Wolf             ret = blk_co_flush(exp->common.blk);
297165529782SEric Blake         }
297266d4f4feSEric Blake         return nbd_send_generic_reply(client, request, ret,
29736f302e60SVladimir Sementsov-Ogievskiy                                       "discard failed", errp);
29746f302e60SVladimir Sementsov-Ogievskiy 
2975e7b1948dSVladimir Sementsov-Ogievskiy     case NBD_CMD_BLOCK_STATUS:
29761dec4643SEric Blake         assert(request->contexts);
2977bcc16cc1SEric Blake         assert(client->mode >= NBD_MODE_EXTENDED ||
2978bcc16cc1SEric Blake                request->len <= UINT32_MAX);
29791dec4643SEric Blake         if (request->contexts->count) {
2980fb7afc79SVladimir Sementsov-Ogievskiy             bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
29811dec4643SEric Blake             int contexts_remaining = request->contexts->count;
2982fb7afc79SVladimir Sementsov-Ogievskiy 
29832dcbb11bSEric Blake             if (!request->len) {
29842dcbb11bSEric Blake                 return nbd_send_generic_reply(client, request, -EINVAL,
29852dcbb11bSEric Blake                                               "need non-zero length", errp);
29862dcbb11bSEric Blake             }
29871dec4643SEric Blake             if (request->contexts->base_allocation) {
298866d4f4feSEric Blake                 ret = nbd_co_send_block_status(client, request,
2989ff7e261bSEmanuele Giuseppe Esposito                                                exp->common.blk,
299037a4f70cSKevin Wolf                                                request->from,
2991fb7afc79SVladimir Sementsov-Ogievskiy                                                request->len, dont_fragment,
299247ec485eSEric Blake                                                !--contexts_remaining,
29933d068affSVladimir Sementsov-Ogievskiy                                                NBD_META_ID_BASE_ALLOCATION,
29943d068affSVladimir Sementsov-Ogievskiy                                                errp);
299573e064ccSEric Blake                 if (ret < 0) {
299673e064ccSEric Blake                     return ret;
299773e064ccSEric Blake                 }
299873e064ccSEric Blake             }
299973e064ccSEric Blake 
30001dec4643SEric Blake             if (request->contexts->allocation_depth) {
300166d4f4feSEric Blake                 ret = nbd_co_send_block_status(client, request,
3002ff7e261bSEmanuele Giuseppe Esposito                                                exp->common.blk,
300371719cd5SEric Blake                                                request->from, request->len,
300471719cd5SEric Blake                                                dont_fragment,
300571719cd5SEric Blake                                                !--contexts_remaining,
300671719cd5SEric Blake                                                NBD_META_ID_ALLOCATION_DEPTH,
300771719cd5SEric Blake                                                errp);
300871719cd5SEric Blake                 if (ret < 0) {
300971719cd5SEric Blake                     return ret;
301071719cd5SEric Blake                 }
301171719cd5SEric Blake             }
301271719cd5SEric Blake 
30131dec4643SEric Blake             assert(request->contexts->exp == client->exp);
30143b1f244cSEric Blake             for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
30151dec4643SEric Blake                 if (!request->contexts->bitmaps[i]) {
30163b1f244cSEric Blake                     continue;
30173b1f244cSEric Blake                 }
301866d4f4feSEric Blake                 ret = nbd_co_send_bitmap(client, request,
30193b1f244cSEric Blake                                          client->exp->export_bitmaps[i],
30203d068affSVladimir Sementsov-Ogievskiy                                          request->from, request->len,
302147ec485eSEric Blake                                          dont_fragment, !--contexts_remaining,
30223b1f244cSEric Blake                                          NBD_META_ID_DIRTY_BITMAP + i, errp);
302373e064ccSEric Blake                 if (ret < 0) {
302473e064ccSEric Blake                     return ret;
302573e064ccSEric Blake                 }
30263d068affSVladimir Sementsov-Ogievskiy             }
30273d068affSVladimir Sementsov-Ogievskiy 
302847ec485eSEric Blake             assert(!contexts_remaining);
302947ec485eSEric Blake 
303073e064ccSEric Blake             return 0;
30311dec4643SEric Blake         } else if (client->contexts.count) {
30321dec4643SEric Blake             return nbd_send_generic_reply(client, request, -EINVAL,
30331dec4643SEric Blake                                           "CMD_BLOCK_STATUS payload not valid",
30341dec4643SEric Blake                                           errp);
3035e7b1948dSVladimir Sementsov-Ogievskiy         } else {
303666d4f4feSEric Blake             return nbd_send_generic_reply(client, request, -EINVAL,
3037e7b1948dSVladimir Sementsov-Ogievskiy                                           "CMD_BLOCK_STATUS not negotiated",
3038e7b1948dSVladimir Sementsov-Ogievskiy                                           errp);
3039e7b1948dSVladimir Sementsov-Ogievskiy         }
3040e7b1948dSVladimir Sementsov-Ogievskiy 
30416f302e60SVladimir Sementsov-Ogievskiy     default:
30426f302e60SVladimir Sementsov-Ogievskiy         msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
30436f302e60SVladimir Sementsov-Ogievskiy                               request->type);
304466d4f4feSEric Blake         ret = nbd_send_generic_reply(client, request, -EINVAL, msg,
30456f302e60SVladimir Sementsov-Ogievskiy                                      errp);
30466f302e60SVladimir Sementsov-Ogievskiy         g_free(msg);
30476f302e60SVladimir Sementsov-Ogievskiy         return ret;
30486f302e60SVladimir Sementsov-Ogievskiy     }
30496f302e60SVladimir Sementsov-Ogievskiy }
30506f302e60SVladimir Sementsov-Ogievskiy 
3051ff82911cSPaolo Bonzini /* Owns a reference to the NBDClient passed as opaque.  */
nbd_trip(void * opaque)3052ff82911cSPaolo Bonzini static coroutine_fn void nbd_trip(void *opaque)
3053798bfe00SFam Zheng {
30549c707525SKevin Wolf     NBDRequestData *req = opaque;
30559c707525SKevin Wolf     NBDClient *client = req->client;
3056ff82911cSPaolo Bonzini     NBDRequest request = { 0 };    /* GCC thinks it can be used uninitialized */
3057a0dc63a6SVladimir Sementsov-Ogievskiy     int ret;
30582fd2c840SVladimir Sementsov-Ogievskiy     Error *local_err = NULL;
3059798bfe00SFam Zheng 
3060f816310dSStefan Hajnoczi     /*
3061f816310dSStefan Hajnoczi      * Note that nbd_client_put() and client_close() must be called from the
3062f816310dSStefan Hajnoczi      * main loop thread. Use aio_co_reschedule_self() to switch AioContext
3063f816310dSStefan Hajnoczi      * before calling these functions.
3064f816310dSStefan Hajnoczi      */
3065f816310dSStefan Hajnoczi 
30669588463eSVladimir Sementsov-Ogievskiy     trace_nbd_trip();
30677075d235SStefan Hajnoczi 
30687075d235SStefan Hajnoczi     qemu_mutex_lock(&client->lock);
30697075d235SStefan Hajnoczi 
3070798bfe00SFam Zheng     if (client->closing) {
3071f816310dSStefan Hajnoczi         goto done;
3072798bfe00SFam Zheng     }
3073798bfe00SFam Zheng 
3074f148ae7dSSergio Lopez     if (client->quiescing) {
3075f148ae7dSSergio Lopez         /*
3076f148ae7dSSergio Lopez          * We're switching between AIO contexts. Don't attempt to receive a new
3077f148ae7dSSergio Lopez          * request and kick the main context which may be waiting for us.
3078f148ae7dSSergio Lopez          */
3079f148ae7dSSergio Lopez         client->recv_coroutine = NULL;
3080f148ae7dSSergio Lopez         aio_wait_kick();
3081f816310dSStefan Hajnoczi         goto done;
3082f148ae7dSSergio Lopez     }
3083f148ae7dSSergio Lopez 
30847075d235SStefan Hajnoczi     /*
30857075d235SStefan Hajnoczi      * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has
30867075d235SStefan Hajnoczi      * set client->quiescing but by the time we get back nbd_drained_end() may
30877075d235SStefan Hajnoczi      * have already cleared client->quiescing. In that case we try again
30887075d235SStefan Hajnoczi      * because nothing else will spawn an nbd_trip() coroutine until we set
30897075d235SStefan Hajnoczi      * client->recv_coroutine = NULL further down.
30907075d235SStefan Hajnoczi      */
30917075d235SStefan Hajnoczi     do {
30927075d235SStefan Hajnoczi         assert(client->recv_coroutine == qemu_coroutine_self());
30937075d235SStefan Hajnoczi         qemu_mutex_unlock(&client->lock);
30942fd2c840SVladimir Sementsov-Ogievskiy         ret = nbd_co_receive_request(req, &request, &local_err);
30957075d235SStefan Hajnoczi         qemu_mutex_lock(&client->lock);
30967075d235SStefan Hajnoczi     } while (ret == -EAGAIN && !client->quiescing);
30977075d235SStefan Hajnoczi 
3098ee898b87SVladimir Sementsov-Ogievskiy     client->recv_coroutine = NULL;
3099798bfe00SFam Zheng 
3100798bfe00SFam Zheng     if (client->closing) {
3101798bfe00SFam Zheng         /*
3102798bfe00SFam Zheng          * The client may be closed when we are blocked in
3103798bfe00SFam Zheng          * nbd_co_receive_request()
3104798bfe00SFam Zheng          */
3105798bfe00SFam Zheng         goto done;
3106798bfe00SFam Zheng     }
3107798bfe00SFam Zheng 
3108f148ae7dSSergio Lopez     if (ret == -EAGAIN) {
3109f148ae7dSSergio Lopez         goto done;
3110f148ae7dSSergio Lopez     }
3111f148ae7dSSergio Lopez 
3112a0d7ce20SVladimir Sementsov-Ogievskiy     nbd_client_receive_next_request(client);
31137075d235SStefan Hajnoczi 
3114a0d7ce20SVladimir Sementsov-Ogievskiy     if (ret == -EIO) {
3115a0d7ce20SVladimir Sementsov-Ogievskiy         goto disconnect;
3116a0d7ce20SVladimir Sementsov-Ogievskiy     }
3117a0d7ce20SVladimir Sementsov-Ogievskiy 
31187075d235SStefan Hajnoczi     qemu_mutex_unlock(&client->lock);
3119bd2cd4a4SFlorian Westphal     qio_channel_set_cork(client->ioc, true);
3120bd2cd4a4SFlorian Westphal 
3121a0d7ce20SVladimir Sementsov-Ogievskiy     if (ret < 0) {
3122314b9026SEric Blake         /* It wasn't -EIO, so, according to nbd_co_receive_request()
31236a417599SVladimir Sementsov-Ogievskiy          * semantics, we should return the error to the client. */
31246a417599SVladimir Sementsov-Ogievskiy         Error *export_err = local_err;
31256a417599SVladimir Sementsov-Ogievskiy 
31266a417599SVladimir Sementsov-Ogievskiy         local_err = NULL;
312766d4f4feSEric Blake         ret = nbd_send_generic_reply(client, &request, -EINVAL,
31286a417599SVladimir Sementsov-Ogievskiy                                      error_get_pretty(export_err), &local_err);
31296a417599SVladimir Sementsov-Ogievskiy         error_free(export_err);
31306f302e60SVladimir Sementsov-Ogievskiy     } else {
31316f302e60SVladimir Sementsov-Ogievskiy         ret = nbd_handle_request(client, &request, req->data, &local_err);
3132a0d7ce20SVladimir Sementsov-Ogievskiy     }
31331dec4643SEric Blake     if (request.contexts && request.contexts != &client->contexts) {
31341dec4643SEric Blake         assert(request.type == NBD_CMD_BLOCK_STATUS);
31351dec4643SEric Blake         g_free(request.contexts->bitmaps);
31361dec4643SEric Blake         g_free(request.contexts);
31371dec4643SEric Blake     }
31387075d235SStefan Hajnoczi 
31397075d235SStefan Hajnoczi     qio_channel_set_cork(client->ioc, false);
31407075d235SStefan Hajnoczi     qemu_mutex_lock(&client->lock);
31417075d235SStefan Hajnoczi 
31425c54e7faSVladimir Sementsov-Ogievskiy     if (ret < 0) {
3143c7b97282SVladimir Sementsov-Ogievskiy         error_prepend(&local_err, "Failed to send reply: ");
31442fd2c840SVladimir Sementsov-Ogievskiy         goto disconnect;
31452fd2c840SVladimir Sementsov-Ogievskiy     }
31462fd2c840SVladimir Sementsov-Ogievskiy 
31472dcbb11bSEric Blake     /*
31482dcbb11bSEric Blake      * We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with
31492dcbb11bSEric Blake      * payload if we did not read the payload.
315029b6c3b3SEric Blake      */
31512fd2c840SVladimir Sementsov-Ogievskiy     if (!req->complete) {
31522fd2c840SVladimir Sementsov-Ogievskiy         error_setg(&local_err, "Request handling failed in intermediate state");
31538c372a02SVladimir Sementsov-Ogievskiy         goto disconnect;
3154798bfe00SFam Zheng     }
3155798bfe00SFam Zheng 
3156798bfe00SFam Zheng done:
3157798bfe00SFam Zheng     nbd_request_put(req);
31587075d235SStefan Hajnoczi 
31597075d235SStefan Hajnoczi     qemu_mutex_unlock(&client->lock);
31607075d235SStefan Hajnoczi 
3161f816310dSStefan Hajnoczi     if (!nbd_client_put_nonzero(client)) {
3162f816310dSStefan Hajnoczi         aio_co_reschedule_self(qemu_get_aio_context());
3163ff82911cSPaolo Bonzini         nbd_client_put(client);
3164f816310dSStefan Hajnoczi     }
3165798bfe00SFam Zheng     return;
3166798bfe00SFam Zheng 
31678c372a02SVladimir Sementsov-Ogievskiy disconnect:
31682fd2c840SVladimir Sementsov-Ogievskiy     if (local_err) {
31692fd2c840SVladimir Sementsov-Ogievskiy         error_reportf_err(local_err, "Disconnect client, due to: ");
31702fd2c840SVladimir Sementsov-Ogievskiy     }
31717075d235SStefan Hajnoczi 
3172798bfe00SFam Zheng     nbd_request_put(req);
31737075d235SStefan Hajnoczi     qemu_mutex_unlock(&client->lock);
3174f816310dSStefan Hajnoczi 
3175f816310dSStefan Hajnoczi     aio_co_reschedule_self(qemu_get_aio_context());
31760c9390d9SEric Blake     client_close(client, true);
3177ff82911cSPaolo Bonzini     nbd_client_put(client);
3178798bfe00SFam Zheng }
3179798bfe00SFam Zheng 
31807075d235SStefan Hajnoczi /*
31817075d235SStefan Hajnoczi  * Runs in export AioContext and main loop thread. Caller must hold
31827075d235SStefan Hajnoczi  * client->lock.
31837075d235SStefan Hajnoczi  */
nbd_client_receive_next_request(NBDClient * client)3184ff82911cSPaolo Bonzini static void nbd_client_receive_next_request(NBDClient *client)
3185798bfe00SFam Zheng {
31869c707525SKevin Wolf     NBDRequestData *req;
31879c707525SKevin Wolf 
3188f148ae7dSSergio Lopez     if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
3189f148ae7dSSergio Lopez         !client->quiescing) {
3190ff82911cSPaolo Bonzini         nbd_client_get(client);
31919c707525SKevin Wolf         req = nbd_request_get(client);
31929c707525SKevin Wolf         client->recv_coroutine = qemu_coroutine_create(nbd_trip, req);
31938612c686SKevin Wolf         aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
3194798bfe00SFam Zheng     }
3195798bfe00SFam Zheng }
3196798bfe00SFam Zheng 
nbd_handshake_timer_cb(void * opaque)3197b9b72cb3SEric Blake static void nbd_handshake_timer_cb(void *opaque)
3198b9b72cb3SEric Blake {
3199b9b72cb3SEric Blake     QIOChannel *ioc = opaque;
3200b9b72cb3SEric Blake 
3201b9b72cb3SEric Blake     trace_nbd_handshake_timer_cb();
3202b9b72cb3SEric Blake     qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
3203b9b72cb3SEric Blake }
3204b9b72cb3SEric Blake 
nbd_co_client_start(void * opaque)32051a6245a5SFam Zheng static coroutine_fn void nbd_co_client_start(void *opaque)
3206798bfe00SFam Zheng {
3207c84087f2SVladimir Sementsov-Ogievskiy     NBDClient *client = opaque;
32082fd2c840SVladimir Sementsov-Ogievskiy     Error *local_err = NULL;
3209b9b72cb3SEric Blake     QEMUTimer *handshake_timer = NULL;
32101a6245a5SFam Zheng 
3211798bfe00SFam Zheng     qemu_co_mutex_init(&client->send_lock);
3212798bfe00SFam Zheng 
3213b9b72cb3SEric Blake     /*
3214b9b72cb3SEric Blake      * Create a timer to bound the time spent in negotiation. If the
3215b9b72cb3SEric Blake      * timer expires, it is likely nbd_negotiate will fail because the
3216b9b72cb3SEric Blake      * socket was shutdown.
3217b9b72cb3SEric Blake      */
3218b9b72cb3SEric Blake     if (client->handshake_max_secs > 0) {
3219b9b72cb3SEric Blake         handshake_timer = aio_timer_new(qemu_get_aio_context(),
3220b9b72cb3SEric Blake                                         QEMU_CLOCK_REALTIME,
3221b9b72cb3SEric Blake                                         SCALE_NS,
3222b9b72cb3SEric Blake                                         nbd_handshake_timer_cb,
3223b9b72cb3SEric Blake                                         client->sioc);
3224b9b72cb3SEric Blake         timer_mod(handshake_timer,
3225b9b72cb3SEric Blake                   qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
3226b9b72cb3SEric Blake                   client->handshake_max_secs * NANOSECONDS_PER_SECOND);
3227b9b72cb3SEric Blake     }
3228b9b72cb3SEric Blake 
32292fd2c840SVladimir Sementsov-Ogievskiy     if (nbd_negotiate(client, &local_err)) {
32302fd2c840SVladimir Sementsov-Ogievskiy         if (local_err) {
32312fd2c840SVladimir Sementsov-Ogievskiy             error_report_err(local_err);
32322fd2c840SVladimir Sementsov-Ogievskiy         }
3233b9b72cb3SEric Blake         timer_free(handshake_timer);
32340c9390d9SEric Blake         client_close(client, false);
3235c84087f2SVladimir Sementsov-Ogievskiy         return;
3236798bfe00SFam Zheng     }
3237ff82911cSPaolo Bonzini 
3238b9b72cb3SEric Blake     timer_free(handshake_timer);
32397075d235SStefan Hajnoczi     WITH_QEMU_LOCK_GUARD(&client->lock) {
3240ff82911cSPaolo Bonzini         nbd_client_receive_next_request(client);
32411a6245a5SFam Zheng     }
32427075d235SStefan Hajnoczi }
32431a6245a5SFam Zheng 
32440c9390d9SEric Blake /*
3245fb1c2aaaSEric Blake  * Create a new client listener using the given channel @sioc and @owner.
32467f7dfe2aSVladimir Sementsov-Ogievskiy  * Begin servicing it in a coroutine.  When the connection closes, call
3247fb1c2aaaSEric Blake  * @close_fn with an indication of whether the client completed negotiation
3248fb1c2aaaSEric Blake  * within @handshake_max_secs seconds (0 for unbounded).
32490c9390d9SEric Blake  */
nbd_client_new(QIOChannelSocket * sioc,uint32_t handshake_max_secs,QCryptoTLSCreds * tlscreds,const char * tlsauthz,void (* close_fn)(NBDClient *,bool),void * owner)32507f7dfe2aSVladimir Sementsov-Ogievskiy void nbd_client_new(QIOChannelSocket *sioc,
3251fb1c2aaaSEric Blake                     uint32_t handshake_max_secs,
3252f95910feSDaniel P. Berrange                     QCryptoTLSCreds *tlscreds,
3253b25e12daSDaniel P. Berrange                     const char *tlsauthz,
3254fb1c2aaaSEric Blake                     void (*close_fn)(NBDClient *, bool),
3255fb1c2aaaSEric Blake                     void *owner)
32561a6245a5SFam Zheng {
32571a6245a5SFam Zheng     NBDClient *client;
3258c84087f2SVladimir Sementsov-Ogievskiy     Coroutine *co;
32591a6245a5SFam Zheng 
3260e8d3eb74SMarc-André Lureau     client = g_new0(NBDClient, 1);
32617075d235SStefan Hajnoczi     qemu_mutex_init(&client->lock);
32621a6245a5SFam Zheng     client->refcount = 1;
3263f95910feSDaniel P. Berrange     client->tlscreds = tlscreds;
3264f95910feSDaniel P. Berrange     if (tlscreds) {
3265f95910feSDaniel P. Berrange         object_ref(OBJECT(client->tlscreds));
3266f95910feSDaniel P. Berrange     }
3267b25e12daSDaniel P. Berrange     client->tlsauthz = g_strdup(tlsauthz);
3268fb1c2aaaSEric Blake     client->handshake_max_secs = handshake_max_secs;
32691c778ef7SDaniel P. Berrange     client->sioc = sioc;
3270f1426881SEric Blake     qio_channel_set_delay(QIO_CHANNEL(sioc), false);
32711c778ef7SDaniel P. Berrange     object_ref(OBJECT(client->sioc));
32721c778ef7SDaniel P. Berrange     client->ioc = QIO_CHANNEL(sioc);
32731c778ef7SDaniel P. Berrange     object_ref(OBJECT(client->ioc));
32740c9390d9SEric Blake     client->close_fn = close_fn;
3275fb1c2aaaSEric Blake     client->owner = owner;
32761a6245a5SFam Zheng 
3277c84087f2SVladimir Sementsov-Ogievskiy     co = qemu_coroutine_create(nbd_co_client_start, client);
3278c84087f2SVladimir Sementsov-Ogievskiy     qemu_coroutine_enter(co);
3279798bfe00SFam Zheng }
3280fb1c2aaaSEric Blake 
3281fb1c2aaaSEric Blake void *
nbd_client_owner(NBDClient * client)3282fb1c2aaaSEric Blake nbd_client_owner(NBDClient *client)
3283fb1c2aaaSEric Blake {
3284fb1c2aaaSEric Blake     return client->owner;
3285fb1c2aaaSEric Blake }
3286