1798bfe00SFam Zheng /*
2a7c8ed36SEric Blake * Copyright Red Hat
3798bfe00SFam Zheng * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
4798bfe00SFam Zheng *
5798bfe00SFam Zheng * Network Block Device Server Side
6798bfe00SFam Zheng *
7798bfe00SFam Zheng * This program is free software; you can redistribute it and/or modify
8798bfe00SFam Zheng * it under the terms of the GNU General Public License as published by
9798bfe00SFam Zheng * the Free Software Foundation; under version 2 of the License.
10798bfe00SFam Zheng *
11798bfe00SFam Zheng * This program is distributed in the hope that it will be useful,
12798bfe00SFam Zheng * but WITHOUT ANY WARRANTY; without even the implied warranty of
13798bfe00SFam Zheng * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14798bfe00SFam Zheng * GNU General Public License for more details.
15798bfe00SFam Zheng *
16798bfe00SFam Zheng * You should have received a copy of the GNU General Public License
17798bfe00SFam Zheng * along with this program; if not, see <http://www.gnu.org/licenses/>.
18798bfe00SFam Zheng */
19798bfe00SFam Zheng
20d38ea87aSPeter Maydell #include "qemu/osdep.h"
2156ee8626SKevin Wolf
22e2c1c34fSMarkus Armbruster #include "block/block_int.h"
2356ee8626SKevin Wolf #include "block/export.h"
24e2c1c34fSMarkus Armbruster #include "block/dirty-bitmap.h"
25da34e65cSMarkus Armbruster #include "qapi/error.h"
26dc5e9ac7SMarkus Armbruster #include "qemu/queue.h"
279588463eSVladimir Sementsov-Ogievskiy #include "trace.h"
28798bfe00SFam Zheng #include "nbd-internal.h"
29416e34bdSEric Blake #include "qemu/units.h"
305df022cfSPeter Maydell #include "qemu/memalign.h"
31798bfe00SFam Zheng
32e7b1948dSVladimir Sementsov-Ogievskiy #define NBD_META_ID_BASE_ALLOCATION 0
3371719cd5SEric Blake #define NBD_META_ID_ALLOCATION_DEPTH 1
343b1f244cSEric Blake /* Dirty bitmaps use 'NBD_META_ID_DIRTY_BITMAP + i', so keep this id last. */
3571719cd5SEric Blake #define NBD_META_ID_DIRTY_BITMAP 2
363d068affSVladimir Sementsov-Ogievskiy
37416e34bdSEric Blake /*
38416e34bdSEric Blake * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
393d068affSVladimir Sementsov-Ogievskiy * constant. If an increase is needed, note that the NBD protocol
403d068affSVladimir Sementsov-Ogievskiy * recommends no larger than 32 mb, so that the client won't consider
41416e34bdSEric Blake * the reply as a denial of service attack.
42416e34bdSEric Blake */
43416e34bdSEric Blake #define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
44e7b1948dSVladimir Sementsov-Ogievskiy
system_errno_to_nbd_errno(int err)45798bfe00SFam Zheng static int system_errno_to_nbd_errno(int err)
46798bfe00SFam Zheng {
47798bfe00SFam Zheng switch (err) {
48798bfe00SFam Zheng case 0:
49798bfe00SFam Zheng return NBD_SUCCESS;
50798bfe00SFam Zheng case EPERM:
51c0301fccSEric Blake case EROFS:
52798bfe00SFam Zheng return NBD_EPERM;
53798bfe00SFam Zheng case EIO:
54798bfe00SFam Zheng return NBD_EIO;
55798bfe00SFam Zheng case ENOMEM:
56798bfe00SFam Zheng return NBD_ENOMEM;
57798bfe00SFam Zheng #ifdef EDQUOT
58798bfe00SFam Zheng case EDQUOT:
59798bfe00SFam Zheng #endif
60798bfe00SFam Zheng case EFBIG:
61798bfe00SFam Zheng case ENOSPC:
62798bfe00SFam Zheng return NBD_ENOSPC;
63bae245d1SEric Blake case EOVERFLOW:
64bae245d1SEric Blake return NBD_EOVERFLOW;
650a479545SEric Blake case ENOTSUP:
660a479545SEric Blake #if ENOTSUP != EOPNOTSUPP
670a479545SEric Blake case EOPNOTSUPP:
680a479545SEric Blake #endif
690a479545SEric Blake return NBD_ENOTSUP;
70b6f5d3b5SEric Blake case ESHUTDOWN:
71b6f5d3b5SEric Blake return NBD_ESHUTDOWN;
72798bfe00SFam Zheng case EINVAL:
73798bfe00SFam Zheng default:
74798bfe00SFam Zheng return NBD_EINVAL;
75798bfe00SFam Zheng }
76798bfe00SFam Zheng }
77798bfe00SFam Zheng
78798bfe00SFam Zheng /* Definitions for opaque data types */
79798bfe00SFam Zheng
80315f78abSEric Blake typedef struct NBDRequestData NBDRequestData;
81798bfe00SFam Zheng
82315f78abSEric Blake struct NBDRequestData {
83798bfe00SFam Zheng NBDClient *client;
84798bfe00SFam Zheng uint8_t *data;
8529b6c3b3SEric Blake bool complete;
86798bfe00SFam Zheng };
87798bfe00SFam Zheng
88798bfe00SFam Zheng struct NBDExport {
8956ee8626SKevin Wolf BlockExport common;
90798bfe00SFam Zheng
91798bfe00SFam Zheng char *name;
92b1a75b33SEric Blake char *description;
939d26dfcbSEric Blake uint64_t size;
947423f417SEric Blake uint16_t nbdflags;
95798bfe00SFam Zheng QTAILQ_HEAD(, NBDClient) clients;
96798bfe00SFam Zheng QTAILQ_ENTRY(NBDExport) next;
97798bfe00SFam Zheng
98cd7fca95SKevin Wolf BlockBackend *eject_notifier_blk;
99741cc431SMax Reitz Notifier eject_notifier;
1003d068affSVladimir Sementsov-Ogievskiy
10171719cd5SEric Blake bool allocation_depth;
1023b1f244cSEric Blake BdrvDirtyBitmap **export_bitmaps;
1033b1f244cSEric Blake size_t nr_export_bitmaps;
104798bfe00SFam Zheng };
105798bfe00SFam Zheng
106798bfe00SFam Zheng static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
107798bfe00SFam Zheng
108fd358d83SEric Blake /*
109fd358d83SEric Blake * NBDMetaContexts represents a list of meta contexts in use,
110e7b1948dSVladimir Sementsov-Ogievskiy * as selected by NBD_OPT_SET_META_CONTEXT. Also used for
111fd358d83SEric Blake * NBD_OPT_LIST_META_CONTEXT.
112fd358d83SEric Blake */
113fd358d83SEric Blake struct NBDMetaContexts {
114fd358d83SEric Blake const NBDExport *exp; /* associated export */
11547ec485eSEric Blake size_t count; /* number of negotiated contexts */
116e7b1948dSVladimir Sementsov-Ogievskiy bool base_allocation; /* export base:allocation context (block status) */
11771719cd5SEric Blake bool allocation_depth; /* export qemu:allocation-depth */
1183b1f244cSEric Blake bool *bitmaps; /*
1193b1f244cSEric Blake * export qemu:dirty-bitmap:<export bitmap name>,
1203b1f244cSEric Blake * sized by exp->nr_export_bitmaps
1213b1f244cSEric Blake */
122fd358d83SEric Blake };
123e7b1948dSVladimir Sementsov-Ogievskiy
124798bfe00SFam Zheng struct NBDClient {
125f816310dSStefan Hajnoczi int refcount; /* atomic */
1260c9390d9SEric Blake void (*close_fn)(NBDClient *client, bool negotiated);
127fb1c2aaaSEric Blake void *owner;
128798bfe00SFam Zheng
1297075d235SStefan Hajnoczi QemuMutex lock;
1307075d235SStefan Hajnoczi
131798bfe00SFam Zheng NBDExport *exp;
132f95910feSDaniel P. Berrange QCryptoTLSCreds *tlscreds;
133b25e12daSDaniel P. Berrange char *tlsauthz;
134fb1c2aaaSEric Blake uint32_t handshake_max_secs;
1351c778ef7SDaniel P. Berrange QIOChannelSocket *sioc; /* The underlying data channel */
1361c778ef7SDaniel P. Berrange QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
137798bfe00SFam Zheng
1387075d235SStefan Hajnoczi Coroutine *recv_coroutine; /* protected by lock */
139798bfe00SFam Zheng
140798bfe00SFam Zheng CoMutex send_lock;
141798bfe00SFam Zheng Coroutine *send_coroutine;
142798bfe00SFam Zheng
1437075d235SStefan Hajnoczi bool read_yielding; /* protected by lock */
1447075d235SStefan Hajnoczi bool quiescing; /* protected by lock */
145f148ae7dSSergio Lopez
146798bfe00SFam Zheng QTAILQ_ENTRY(NBDClient) next;
1477075d235SStefan Hajnoczi int nb_requests; /* protected by lock */
1487075d235SStefan Hajnoczi bool closing; /* protected by lock */
1495c54e7faSVladimir Sementsov-Ogievskiy
1506e280648SEric Blake uint32_t check_align; /* If non-zero, check for aligned client requests */
1516e280648SEric Blake
152ac132d05SEric Blake NBDMode mode;
153fd358d83SEric Blake NBDMetaContexts contexts; /* Negotiated meta contexts */
154798bfe00SFam Zheng
1550cfae925SVladimir Sementsov-Ogievskiy uint32_t opt; /* Current option being negotiated */
1560cfae925SVladimir Sementsov-Ogievskiy uint32_t optlen; /* remaining length of data in ioc for the option being
1570cfae925SVladimir Sementsov-Ogievskiy negotiated now */
1580cfae925SVladimir Sementsov-Ogievskiy };
159798bfe00SFam Zheng
160ff82911cSPaolo Bonzini static void nbd_client_receive_next_request(NBDClient *client);
161798bfe00SFam Zheng
162798bfe00SFam Zheng /* Basic flow for negotiation
163798bfe00SFam Zheng
164798bfe00SFam Zheng Server Client
165798bfe00SFam Zheng Negotiate
166798bfe00SFam Zheng
167798bfe00SFam Zheng or
168798bfe00SFam Zheng
169798bfe00SFam Zheng Server Client
170798bfe00SFam Zheng Negotiate #1
171798bfe00SFam Zheng Option
172798bfe00SFam Zheng Negotiate #2
173798bfe00SFam Zheng
174798bfe00SFam Zheng ----
175798bfe00SFam Zheng
176798bfe00SFam Zheng followed by
177798bfe00SFam Zheng
178798bfe00SFam Zheng Server Client
179798bfe00SFam Zheng Request
180798bfe00SFam Zheng Response
181798bfe00SFam Zheng Request
182798bfe00SFam Zheng Response
183798bfe00SFam Zheng ...
184798bfe00SFam Zheng ...
185798bfe00SFam Zheng Request (type == 2)
186798bfe00SFam Zheng
187798bfe00SFam Zheng */
188798bfe00SFam Zheng
set_be_option_rep(NBDOptionReply * rep,uint32_t option,uint32_t type,uint32_t length)1891d17922aSVladimir Sementsov-Ogievskiy static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option,
1901d17922aSVladimir Sementsov-Ogievskiy uint32_t type, uint32_t length)
1911d17922aSVladimir Sementsov-Ogievskiy {
1921d17922aSVladimir Sementsov-Ogievskiy stq_be_p(&rep->magic, NBD_REP_MAGIC);
1931d17922aSVladimir Sementsov-Ogievskiy stl_be_p(&rep->option, option);
1941d17922aSVladimir Sementsov-Ogievskiy stl_be_p(&rep->type, type);
1951d17922aSVladimir Sementsov-Ogievskiy stl_be_p(&rep->length, length);
1961d17922aSVladimir Sementsov-Ogievskiy }
1971d17922aSVladimir Sementsov-Ogievskiy
198526e5c65SEric Blake /* Send a reply header, including length, but no payload.
199526e5c65SEric Blake * Return -errno on error, 0 on success. */
2004fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_rep_len(NBDClient * client,uint32_t type,uint32_t len,Error ** errp)2014fa333e0SEric Blake nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type,
2020cfae925SVladimir Sementsov-Ogievskiy uint32_t len, Error **errp)
203798bfe00SFam Zheng {
2041d17922aSVladimir Sementsov-Ogievskiy NBDOptionReply rep;
205798bfe00SFam Zheng
2061d17922aSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_send_rep_len(client->opt, nbd_opt_lookup(client->opt),
2073736cc5bSEric Blake type, nbd_rep_lookup(type), len);
208f95910feSDaniel P. Berrange
209f37708f6SEric Blake assert(len < NBD_MAX_BUFFER_SIZE);
2102fd2c840SVladimir Sementsov-Ogievskiy
2111d17922aSVladimir Sementsov-Ogievskiy set_be_option_rep(&rep, client->opt, type, len);
2121d17922aSVladimir Sementsov-Ogievskiy return nbd_write(client->ioc, &rep, sizeof(rep), errp);
213798bfe00SFam Zheng }
214798bfe00SFam Zheng
215526e5c65SEric Blake /* Send a reply header with default 0 length.
216526e5c65SEric Blake * Return -errno on error, 0 on success. */
2174fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_rep(NBDClient * client,uint32_t type,Error ** errp)2184fa333e0SEric Blake nbd_negotiate_send_rep(NBDClient *client, uint32_t type, Error **errp)
219526e5c65SEric Blake {
2200cfae925SVladimir Sementsov-Ogievskiy return nbd_negotiate_send_rep_len(client, type, 0, errp);
221526e5c65SEric Blake }
222526e5c65SEric Blake
22336683283SEric Blake /* Send an error reply.
22436683283SEric Blake * Return -errno on error, 0 on success. */
2254fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 0)
nbd_negotiate_send_rep_verr(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,va_list va)22641f5dfafSEric Blake nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
22741f5dfafSEric Blake Error **errp, const char *fmt, va_list va)
22836683283SEric Blake {
229795d946dSVladimir Sementsov-Ogievskiy ERRP_GUARD();
230df18c04eSEric Blake g_autofree char *msg = NULL;
23136683283SEric Blake int ret;
23236683283SEric Blake size_t len;
23336683283SEric Blake
23436683283SEric Blake msg = g_strdup_vprintf(fmt, va);
23536683283SEric Blake len = strlen(msg);
2365c4fe018SEric Blake assert(len < NBD_MAX_STRING_SIZE);
2379588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_send_rep_err(msg);
2380cfae925SVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep_len(client, type, len, errp);
23936683283SEric Blake if (ret < 0) {
240df18c04eSEric Blake return ret;
24136683283SEric Blake }
2420cfae925SVladimir Sementsov-Ogievskiy if (nbd_write(client->ioc, msg, len, errp) < 0) {
2432fd2c840SVladimir Sementsov-Ogievskiy error_prepend(errp, "write failed (error message): ");
244df18c04eSEric Blake return -EIO;
24536683283SEric Blake }
2462fd2c840SVladimir Sementsov-Ogievskiy
247df18c04eSEric Blake return 0;
24836683283SEric Blake }
24936683283SEric Blake
2505c4fe018SEric Blake /*
2515c4fe018SEric Blake * Return a malloc'd copy of @name suitable for use in an error reply.
2525c4fe018SEric Blake */
2535c4fe018SEric Blake static char *
nbd_sanitize_name(const char * name)2545c4fe018SEric Blake nbd_sanitize_name(const char *name)
2555c4fe018SEric Blake {
2565c4fe018SEric Blake if (strnlen(name, 80) < 80) {
2575c4fe018SEric Blake return g_strdup(name);
2585c4fe018SEric Blake }
2595c4fe018SEric Blake /* XXX Should we also try to sanitize any control characters? */
2605c4fe018SEric Blake return g_strdup_printf("%.80s...", name);
2615c4fe018SEric Blake }
2625c4fe018SEric Blake
26341f5dfafSEric Blake /* Send an error reply.
26441f5dfafSEric Blake * Return -errno on error, 0 on success. */
2654fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 5)
nbd_negotiate_send_rep_err(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,...)26641f5dfafSEric Blake nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
26741f5dfafSEric Blake Error **errp, const char *fmt, ...)
26841f5dfafSEric Blake {
26941f5dfafSEric Blake va_list va;
27041f5dfafSEric Blake int ret;
27141f5dfafSEric Blake
27241f5dfafSEric Blake va_start(va, fmt);
27341f5dfafSEric Blake ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
27441f5dfafSEric Blake va_end(va);
27541f5dfafSEric Blake return ret;
27641f5dfafSEric Blake }
27741f5dfafSEric Blake
278894e0280SEric Blake /* Drop remainder of the current option, and send a reply with the
279894e0280SEric Blake * given error type and message. Return -errno on read or write
280894e0280SEric Blake * failure; or 0 if connection is still live. */
2814fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 0)
nbd_opt_vdrop(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,va_list va)2822e425fd5SVladimir Sementsov-Ogievskiy nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
2832e425fd5SVladimir Sementsov-Ogievskiy const char *fmt, va_list va)
2842e425fd5SVladimir Sementsov-Ogievskiy {
2852e425fd5SVladimir Sementsov-Ogievskiy int ret = nbd_drop(client->ioc, client->optlen, errp);
2862e425fd5SVladimir Sementsov-Ogievskiy
2872e425fd5SVladimir Sementsov-Ogievskiy client->optlen = 0;
2882e425fd5SVladimir Sementsov-Ogievskiy if (!ret) {
2892e425fd5SVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
2902e425fd5SVladimir Sementsov-Ogievskiy }
2912e425fd5SVladimir Sementsov-Ogievskiy return ret;
2922e425fd5SVladimir Sementsov-Ogievskiy }
2932e425fd5SVladimir Sementsov-Ogievskiy
2944fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(4, 5)
nbd_opt_drop(NBDClient * client,uint32_t type,Error ** errp,const char * fmt,...)295894e0280SEric Blake nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
296894e0280SEric Blake const char *fmt, ...)
297894e0280SEric Blake {
2982e425fd5SVladimir Sementsov-Ogievskiy int ret;
299894e0280SEric Blake va_list va;
300894e0280SEric Blake
301894e0280SEric Blake va_start(va, fmt);
3022e425fd5SVladimir Sementsov-Ogievskiy ret = nbd_opt_vdrop(client, type, errp, fmt, va);
303894e0280SEric Blake va_end(va);
3042e425fd5SVladimir Sementsov-Ogievskiy
3052e425fd5SVladimir Sementsov-Ogievskiy return ret;
306894e0280SEric Blake }
3072e425fd5SVladimir Sementsov-Ogievskiy
3084fa333e0SEric Blake static coroutine_fn int G_GNUC_PRINTF(3, 4)
nbd_opt_invalid(NBDClient * client,Error ** errp,const char * fmt,...)3092e425fd5SVladimir Sementsov-Ogievskiy nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
3102e425fd5SVladimir Sementsov-Ogievskiy {
3112e425fd5SVladimir Sementsov-Ogievskiy int ret;
3122e425fd5SVladimir Sementsov-Ogievskiy va_list va;
3132e425fd5SVladimir Sementsov-Ogievskiy
3142e425fd5SVladimir Sementsov-Ogievskiy va_start(va, fmt);
3152e425fd5SVladimir Sementsov-Ogievskiy ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
3162e425fd5SVladimir Sementsov-Ogievskiy va_end(va);
3172e425fd5SVladimir Sementsov-Ogievskiy
318894e0280SEric Blake return ret;
319894e0280SEric Blake }
320894e0280SEric Blake
321894e0280SEric Blake /* Read size bytes from the unparsed payload of the current option.
322d1e2c3e7SEric Blake * If @check_nul, require that no NUL bytes appear in buffer.
323894e0280SEric Blake * Return -errno on I/O error, 0 if option was completely handled by
324894e0280SEric Blake * sending a reply about inconsistent lengths, or 1 on success. */
3254fa333e0SEric Blake static coroutine_fn int
nbd_opt_read(NBDClient * client,void * buffer,size_t size,bool check_nul,Error ** errp)3264fa333e0SEric Blake nbd_opt_read(NBDClient *client, void *buffer, size_t size,
327d1e2c3e7SEric Blake bool check_nul, Error **errp)
328894e0280SEric Blake {
329894e0280SEric Blake if (size > client->optlen) {
3302e425fd5SVladimir Sementsov-Ogievskiy return nbd_opt_invalid(client, errp,
331894e0280SEric Blake "Inconsistent lengths in option %s",
332894e0280SEric Blake nbd_opt_lookup(client->opt));
333894e0280SEric Blake }
334894e0280SEric Blake client->optlen -= size;
335d1e2c3e7SEric Blake if (qio_channel_read_all(client->ioc, buffer, size, errp) < 0) {
336d1e2c3e7SEric Blake return -EIO;
337d1e2c3e7SEric Blake }
338d1e2c3e7SEric Blake
339d1e2c3e7SEric Blake if (check_nul && strnlen(buffer, size) != size) {
340d1e2c3e7SEric Blake return nbd_opt_invalid(client, errp,
341d1e2c3e7SEric Blake "Unexpected embedded NUL in option %s",
342d1e2c3e7SEric Blake nbd_opt_lookup(client->opt));
343d1e2c3e7SEric Blake }
344d1e2c3e7SEric Blake return 1;
345894e0280SEric Blake }
346894e0280SEric Blake
347e7b1948dSVladimir Sementsov-Ogievskiy /* Drop size bytes from the unparsed payload of the current option.
348e7b1948dSVladimir Sementsov-Ogievskiy * Return -errno on I/O error, 0 if option was completely handled by
349e7b1948dSVladimir Sementsov-Ogievskiy * sending a reply about inconsistent lengths, or 1 on success. */
3504fa333e0SEric Blake static coroutine_fn int
nbd_opt_skip(NBDClient * client,size_t size,Error ** errp)3514fa333e0SEric Blake nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
352e7b1948dSVladimir Sementsov-Ogievskiy {
353e7b1948dSVladimir Sementsov-Ogievskiy if (size > client->optlen) {
354e7b1948dSVladimir Sementsov-Ogievskiy return nbd_opt_invalid(client, errp,
355e7b1948dSVladimir Sementsov-Ogievskiy "Inconsistent lengths in option %s",
356e7b1948dSVladimir Sementsov-Ogievskiy nbd_opt_lookup(client->opt));
357e7b1948dSVladimir Sementsov-Ogievskiy }
358e7b1948dSVladimir Sementsov-Ogievskiy client->optlen -= size;
359e7b1948dSVladimir Sementsov-Ogievskiy return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
360e7b1948dSVladimir Sementsov-Ogievskiy }
361e7b1948dSVladimir Sementsov-Ogievskiy
36212296459SVladimir Sementsov-Ogievskiy /* nbd_opt_read_name
36312296459SVladimir Sementsov-Ogievskiy *
36412296459SVladimir Sementsov-Ogievskiy * Read a string with the format:
36593676c88SEric Blake * uint32_t len (<= NBD_MAX_STRING_SIZE)
36612296459SVladimir Sementsov-Ogievskiy * len bytes string (not 0-terminated)
36712296459SVladimir Sementsov-Ogievskiy *
3689d7ab222SEric Blake * On success, @name will be allocated.
36912296459SVladimir Sementsov-Ogievskiy * If @length is non-null, it will be set to the actual string length.
37012296459SVladimir Sementsov-Ogievskiy *
37112296459SVladimir Sementsov-Ogievskiy * Return -errno on I/O error, 0 if option was completely handled by
37212296459SVladimir Sementsov-Ogievskiy * sending a reply about inconsistent lengths, or 1 on success.
37312296459SVladimir Sementsov-Ogievskiy */
3744fa333e0SEric Blake static coroutine_fn int
nbd_opt_read_name(NBDClient * client,char ** name,uint32_t * length,Error ** errp)3754fa333e0SEric Blake nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length,
37612296459SVladimir Sementsov-Ogievskiy Error **errp)
37712296459SVladimir Sementsov-Ogievskiy {
37812296459SVladimir Sementsov-Ogievskiy int ret;
37912296459SVladimir Sementsov-Ogievskiy uint32_t len;
3809d7ab222SEric Blake g_autofree char *local_name = NULL;
38112296459SVladimir Sementsov-Ogievskiy
3829d7ab222SEric Blake *name = NULL;
383d1e2c3e7SEric Blake ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
38412296459SVladimir Sementsov-Ogievskiy if (ret <= 0) {
38512296459SVladimir Sementsov-Ogievskiy return ret;
38612296459SVladimir Sementsov-Ogievskiy }
38780c7c2b0SPeter Maydell len = cpu_to_be32(len);
38812296459SVladimir Sementsov-Ogievskiy
38993676c88SEric Blake if (len > NBD_MAX_STRING_SIZE) {
39012296459SVladimir Sementsov-Ogievskiy return nbd_opt_invalid(client, errp,
39112296459SVladimir Sementsov-Ogievskiy "Invalid name length: %" PRIu32, len);
39212296459SVladimir Sementsov-Ogievskiy }
39312296459SVladimir Sementsov-Ogievskiy
3949d7ab222SEric Blake local_name = g_malloc(len + 1);
395d1e2c3e7SEric Blake ret = nbd_opt_read(client, local_name, len, true, errp);
39612296459SVladimir Sementsov-Ogievskiy if (ret <= 0) {
39712296459SVladimir Sementsov-Ogievskiy return ret;
39812296459SVladimir Sementsov-Ogievskiy }
3999d7ab222SEric Blake local_name[len] = '\0';
40012296459SVladimir Sementsov-Ogievskiy
40112296459SVladimir Sementsov-Ogievskiy if (length) {
40212296459SVladimir Sementsov-Ogievskiy *length = len;
40312296459SVladimir Sementsov-Ogievskiy }
4049d7ab222SEric Blake *name = g_steal_pointer(&local_name);
40512296459SVladimir Sementsov-Ogievskiy
40612296459SVladimir Sementsov-Ogievskiy return 1;
40712296459SVladimir Sementsov-Ogievskiy }
40812296459SVladimir Sementsov-Ogievskiy
409526e5c65SEric Blake /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
410526e5c65SEric Blake * Return -errno on error, 0 on success. */
4114fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_rep_list(NBDClient * client,NBDExport * exp,Error ** errp)4124fa333e0SEric Blake nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, Error **errp)
413798bfe00SFam Zheng {
414795d946dSVladimir Sementsov-Ogievskiy ERRP_GUARD();
415b1a75b33SEric Blake size_t name_len, desc_len;
416526e5c65SEric Blake uint32_t len;
417b1a75b33SEric Blake const char *name = exp->name ? exp->name : "";
418b1a75b33SEric Blake const char *desc = exp->description ? exp->description : "";
4190cfae925SVladimir Sementsov-Ogievskiy QIOChannel *ioc = client->ioc;
4202e5c9ad6SVladimir Sementsov-Ogievskiy int ret;
421798bfe00SFam Zheng
4229588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_send_rep_list(name, desc);
423b1a75b33SEric Blake name_len = strlen(name);
424b1a75b33SEric Blake desc_len = strlen(desc);
42593676c88SEric Blake assert(name_len <= NBD_MAX_STRING_SIZE && desc_len <= NBD_MAX_STRING_SIZE);
426526e5c65SEric Blake len = name_len + desc_len + sizeof(len);
4270cfae925SVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep_len(client, NBD_REP_SERVER, len, errp);
4282e5c9ad6SVladimir Sementsov-Ogievskiy if (ret < 0) {
4292e5c9ad6SVladimir Sementsov-Ogievskiy return ret;
430798bfe00SFam Zheng }
431526e5c65SEric Blake
432798bfe00SFam Zheng len = cpu_to_be32(name_len);
4332fd2c840SVladimir Sementsov-Ogievskiy if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
4342fd2c840SVladimir Sementsov-Ogievskiy error_prepend(errp, "write failed (name length): ");
435798bfe00SFam Zheng return -EINVAL;
436798bfe00SFam Zheng }
4372fd2c840SVladimir Sementsov-Ogievskiy
4382fd2c840SVladimir Sementsov-Ogievskiy if (nbd_write(ioc, name, name_len, errp) < 0) {
4392fd2c840SVladimir Sementsov-Ogievskiy error_prepend(errp, "write failed (name buffer): ");
440b1a75b33SEric Blake return -EINVAL;
441b1a75b33SEric Blake }
4422fd2c840SVladimir Sementsov-Ogievskiy
4432fd2c840SVladimir Sementsov-Ogievskiy if (nbd_write(ioc, desc, desc_len, errp) < 0) {
4442fd2c840SVladimir Sementsov-Ogievskiy error_prepend(errp, "write failed (description buffer): ");
445798bfe00SFam Zheng return -EINVAL;
446798bfe00SFam Zheng }
4472fd2c840SVladimir Sementsov-Ogievskiy
448798bfe00SFam Zheng return 0;
449798bfe00SFam Zheng }
450798bfe00SFam Zheng
451526e5c65SEric Blake /* Process the NBD_OPT_LIST command, with a potential series of replies.
452526e5c65SEric Blake * Return -errno on error, 0 on success. */
4534fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_handle_list(NBDClient * client,Error ** errp)4544fa333e0SEric Blake nbd_negotiate_handle_list(NBDClient *client, Error **errp)
455798bfe00SFam Zheng {
456798bfe00SFam Zheng NBDExport *exp;
4570cfae925SVladimir Sementsov-Ogievskiy assert(client->opt == NBD_OPT_LIST);
458798bfe00SFam Zheng
459798bfe00SFam Zheng /* For each export, send a NBD_REP_SERVER reply. */
460798bfe00SFam Zheng QTAILQ_FOREACH(exp, &exports, next) {
4610cfae925SVladimir Sementsov-Ogievskiy if (nbd_negotiate_send_rep_list(client, exp, errp)) {
462798bfe00SFam Zheng return -EINVAL;
463798bfe00SFam Zheng }
464798bfe00SFam Zheng }
465798bfe00SFam Zheng /* Finish with a NBD_REP_ACK. */
4660cfae925SVladimir Sementsov-Ogievskiy return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
467798bfe00SFam Zheng }
468798bfe00SFam Zheng
4694fa333e0SEric Blake static coroutine_fn void
nbd_check_meta_export(NBDClient * client,NBDExport * exp)4704fa333e0SEric Blake nbd_check_meta_export(NBDClient *client, NBDExport *exp)
471e7b1948dSVladimir Sementsov-Ogievskiy {
472fd358d83SEric Blake if (exp != client->contexts.exp) {
473fd358d83SEric Blake client->contexts.count = 0;
47447ec485eSEric Blake }
475e7b1948dSVladimir Sementsov-Ogievskiy }
476e7b1948dSVladimir Sementsov-Ogievskiy
477f37708f6SEric Blake /* Send a reply to NBD_OPT_EXPORT_NAME.
478f37708f6SEric Blake * Return -errno on error, 0 on success. */
4794fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_handle_export_name(NBDClient * client,bool no_zeroes,Error ** errp)4804fa333e0SEric Blake nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
4812fd2c840SVladimir Sementsov-Ogievskiy Error **errp)
482798bfe00SFam Zheng {
483795d946dSVladimir Sementsov-Ogievskiy ERRP_GUARD();
4849d7ab222SEric Blake g_autofree char *name = NULL;
4855f66d060SEric Blake char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
48623e099c3SEric Blake size_t len;
48723e099c3SEric Blake int ret;
488dbb38caaSEric Blake uint16_t myflags;
489798bfe00SFam Zheng
490798bfe00SFam Zheng /* Client sends:
491798bfe00SFam Zheng [20 .. xx] export name (length bytes)
4925f66d060SEric Blake Server replies:
4935f66d060SEric Blake [ 0 .. 7] size
4945f66d060SEric Blake [ 8 .. 9] export flags
4955f66d060SEric Blake [10 .. 133] reserved (0) [unless no_zeroes]
496798bfe00SFam Zheng */
4979588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_handle_export_name();
4989c1d2614SEric Blake if (client->mode >= NBD_MODE_EXTENDED) {
4999c1d2614SEric Blake error_setg(errp, "Extended headers already negotiated");
5009c1d2614SEric Blake return -EINVAL;
5019c1d2614SEric Blake }
50293676c88SEric Blake if (client->optlen > NBD_MAX_STRING_SIZE) {
5032fd2c840SVladimir Sementsov-Ogievskiy error_setg(errp, "Bad length received");
504d9faeed8SVladimir Sementsov-Ogievskiy return -EINVAL;
505798bfe00SFam Zheng }
5069d7ab222SEric Blake name = g_malloc(client->optlen + 1);
507e6798f06SVladimir Sementsov-Ogievskiy if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
50832f158a6SEric Blake return -EIO;
509798bfe00SFam Zheng }
5100cfae925SVladimir Sementsov-Ogievskiy name[client->optlen] = '\0';
5110cfae925SVladimir Sementsov-Ogievskiy client->optlen = 0;
512798bfe00SFam Zheng
5139588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_handle_export_name_request(name);
5149344e5f5SDaniel P. Berrange
515798bfe00SFam Zheng client->exp = nbd_export_find(name);
516798bfe00SFam Zheng if (!client->exp) {
5172fd2c840SVladimir Sementsov-Ogievskiy error_setg(errp, "export not found");
518d9faeed8SVladimir Sementsov-Ogievskiy return -EINVAL;
519798bfe00SFam Zheng }
520fd358d83SEric Blake nbd_check_meta_export(client, client->exp);
521798bfe00SFam Zheng
522dbb38caaSEric Blake myflags = client->exp->nbdflags;
523ac132d05SEric Blake if (client->mode >= NBD_MODE_STRUCTURED) {
524dbb38caaSEric Blake myflags |= NBD_FLAG_SEND_DF;
525dbb38caaSEric Blake }
5262dcbb11bSEric Blake if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) {
5272dcbb11bSEric Blake myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
5282dcbb11bSEric Blake }
529dbb38caaSEric Blake trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
53023e099c3SEric Blake stq_be_p(buf, client->exp->size);
531dbb38caaSEric Blake stw_be_p(buf + 8, myflags);
53223e099c3SEric Blake len = no_zeroes ? 10 : sizeof(buf);
53323e099c3SEric Blake ret = nbd_write(client->ioc, buf, len, errp);
53423e099c3SEric Blake if (ret < 0) {
53523e099c3SEric Blake error_prepend(errp, "write failed: ");
53623e099c3SEric Blake return ret;
53723e099c3SEric Blake }
53823e099c3SEric Blake
539798bfe00SFam Zheng QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
540c69de1beSKevin Wolf blk_exp_ref(&client->exp->common);
541d9faeed8SVladimir Sementsov-Ogievskiy
542d9faeed8SVladimir Sementsov-Ogievskiy return 0;
543798bfe00SFam Zheng }
544798bfe00SFam Zheng
545f37708f6SEric Blake /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes.
546f37708f6SEric Blake * The buffer does NOT include the info type prefix.
547f37708f6SEric Blake * Return -errno on error, 0 if ready to send more. */
5484fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_info(NBDClient * client,uint16_t info,uint32_t length,void * buf,Error ** errp)5494fa333e0SEric Blake nbd_negotiate_send_info(NBDClient *client, uint16_t info, uint32_t length,
5504fa333e0SEric Blake void *buf, Error **errp)
551f37708f6SEric Blake {
552f37708f6SEric Blake int rc;
553f37708f6SEric Blake
554f37708f6SEric Blake trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
5550cfae925SVladimir Sementsov-Ogievskiy rc = nbd_negotiate_send_rep_len(client, NBD_REP_INFO,
556f37708f6SEric Blake sizeof(info) + length, errp);
557f37708f6SEric Blake if (rc < 0) {
558f37708f6SEric Blake return rc;
559f37708f6SEric Blake }
56080c7c2b0SPeter Maydell info = cpu_to_be16(info);
561f37708f6SEric Blake if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
562f37708f6SEric Blake return -EIO;
563f37708f6SEric Blake }
564f37708f6SEric Blake if (nbd_write(client->ioc, buf, length, errp) < 0) {
565f37708f6SEric Blake return -EIO;
566f37708f6SEric Blake }
567f37708f6SEric Blake return 0;
568f37708f6SEric Blake }
569f37708f6SEric Blake
570a16a7907SEric Blake /* nbd_reject_length: Handle any unexpected payload.
571a16a7907SEric Blake * @fatal requests that we quit talking to the client, even if we are able
572a16a7907SEric Blake * to successfully send an error reply.
573a16a7907SEric Blake * Return:
574a16a7907SEric Blake * -errno transmission error occurred or @fatal was requested, errp is set
575a16a7907SEric Blake * 0 error message successfully sent to client, errp is not set
576a16a7907SEric Blake */
5774fa333e0SEric Blake static coroutine_fn int
nbd_reject_length(NBDClient * client,bool fatal,Error ** errp)5784fa333e0SEric Blake nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
579a16a7907SEric Blake {
580a16a7907SEric Blake int ret;
581a16a7907SEric Blake
5820cfae925SVladimir Sementsov-Ogievskiy assert(client->optlen);
5832e425fd5SVladimir Sementsov-Ogievskiy ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
5840cfae925SVladimir Sementsov-Ogievskiy nbd_opt_lookup(client->opt));
585a16a7907SEric Blake if (fatal && !ret) {
586894e0280SEric Blake error_setg(errp, "option '%s' has unexpected length",
5870cfae925SVladimir Sementsov-Ogievskiy nbd_opt_lookup(client->opt));
588a16a7907SEric Blake return -EINVAL;
589a16a7907SEric Blake }
590a16a7907SEric Blake return ret;
591a16a7907SEric Blake }
592a16a7907SEric Blake
593f37708f6SEric Blake /* Handle NBD_OPT_INFO and NBD_OPT_GO.
594f37708f6SEric Blake * Return -errno on error, 0 if ready for next option, and 1 to move
595f37708f6SEric Blake * into transmission phase. */
5964fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_handle_info(NBDClient * client,Error ** errp)5974fa333e0SEric Blake nbd_negotiate_handle_info(NBDClient *client, Error **errp)
598f37708f6SEric Blake {
599f37708f6SEric Blake int rc;
6009d7ab222SEric Blake g_autofree char *name = NULL;
601f37708f6SEric Blake NBDExport *exp;
602f37708f6SEric Blake uint16_t requests;
603f37708f6SEric Blake uint16_t request;
604bbc35fc2SChristian Borntraeger uint32_t namelen = 0;
605f37708f6SEric Blake bool sendname = false;
6060c1d50bdSEric Blake bool blocksize = false;
6070c1d50bdSEric Blake uint32_t sizes[3];
608f37708f6SEric Blake char buf[sizeof(uint64_t) + sizeof(uint16_t)];
6096e280648SEric Blake uint32_t check_align = 0;
610dbb38caaSEric Blake uint16_t myflags;
611f37708f6SEric Blake
612f37708f6SEric Blake /* Client sends:
613f37708f6SEric Blake 4 bytes: L, name length (can be 0)
614f37708f6SEric Blake L bytes: export name
615f37708f6SEric Blake 2 bytes: N, number of requests (can be 0)
616f37708f6SEric Blake N * 2 bytes: N requests
617f37708f6SEric Blake */
6189d7ab222SEric Blake rc = nbd_opt_read_name(client, &name, &namelen, errp);
619894e0280SEric Blake if (rc <= 0) {
620894e0280SEric Blake return rc;
621f37708f6SEric Blake }
622f37708f6SEric Blake trace_nbd_negotiate_handle_export_name_request(name);
623f37708f6SEric Blake
624d1e2c3e7SEric Blake rc = nbd_opt_read(client, &requests, sizeof(requests), false, errp);
625894e0280SEric Blake if (rc <= 0) {
626894e0280SEric Blake return rc;
627f37708f6SEric Blake }
62880c7c2b0SPeter Maydell requests = be16_to_cpu(requests);
629f37708f6SEric Blake trace_nbd_negotiate_handle_info_requests(requests);
630f37708f6SEric Blake while (requests--) {
631d1e2c3e7SEric Blake rc = nbd_opt_read(client, &request, sizeof(request), false, errp);
632894e0280SEric Blake if (rc <= 0) {
633894e0280SEric Blake return rc;
634f37708f6SEric Blake }
63580c7c2b0SPeter Maydell request = be16_to_cpu(request);
636f37708f6SEric Blake trace_nbd_negotiate_handle_info_request(request,
637f37708f6SEric Blake nbd_info_lookup(request));
6380c1d50bdSEric Blake /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE;
6390c1d50bdSEric Blake * everything else is either a request we don't know or
6400c1d50bdSEric Blake * something we send regardless of request */
6410c1d50bdSEric Blake switch (request) {
6420c1d50bdSEric Blake case NBD_INFO_NAME:
643f37708f6SEric Blake sendname = true;
6440c1d50bdSEric Blake break;
6450c1d50bdSEric Blake case NBD_INFO_BLOCK_SIZE:
6460c1d50bdSEric Blake blocksize = true;
6470c1d50bdSEric Blake break;
648f37708f6SEric Blake }
649f37708f6SEric Blake }
650894e0280SEric Blake if (client->optlen) {
651894e0280SEric Blake return nbd_reject_length(client, false, errp);
652894e0280SEric Blake }
653f37708f6SEric Blake
654f37708f6SEric Blake exp = nbd_export_find(name);
655f37708f6SEric Blake if (!exp) {
6565c4fe018SEric Blake g_autofree char *sane_name = nbd_sanitize_name(name);
6575c4fe018SEric Blake
6580cfae925SVladimir Sementsov-Ogievskiy return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN,
6590cfae925SVladimir Sementsov-Ogievskiy errp, "export '%s' not present",
6605c4fe018SEric Blake sane_name);
661f37708f6SEric Blake }
662fd358d83SEric Blake if (client->opt == NBD_OPT_GO) {
663fd358d83SEric Blake nbd_check_meta_export(client, exp);
664fd358d83SEric Blake }
665f37708f6SEric Blake
666f37708f6SEric Blake /* Don't bother sending NBD_INFO_NAME unless client requested it */
667f37708f6SEric Blake if (sendname) {
6680cfae925SVladimir Sementsov-Ogievskiy rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, name,
669f37708f6SEric Blake errp);
670f37708f6SEric Blake if (rc < 0) {
671f37708f6SEric Blake return rc;
672f37708f6SEric Blake }
673f37708f6SEric Blake }
674f37708f6SEric Blake
675f37708f6SEric Blake /* Send NBD_INFO_DESCRIPTION only if available, regardless of
676f37708f6SEric Blake * client request */
677f37708f6SEric Blake if (exp->description) {
678f37708f6SEric Blake size_t len = strlen(exp->description);
679f37708f6SEric Blake
68093676c88SEric Blake assert(len <= NBD_MAX_STRING_SIZE);
6810cfae925SVladimir Sementsov-Ogievskiy rc = nbd_negotiate_send_info(client, NBD_INFO_DESCRIPTION,
682f37708f6SEric Blake len, exp->description, errp);
683f37708f6SEric Blake if (rc < 0) {
684f37708f6SEric Blake return rc;
685f37708f6SEric Blake }
686f37708f6SEric Blake }
687f37708f6SEric Blake
6880c1d50bdSEric Blake /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
6890c1d50bdSEric Blake * according to whether the client requested it, and according to
6900c1d50bdSEric Blake * whether this is OPT_INFO or OPT_GO. */
691b0245d64SEric Blake /* minimum - 1 for back-compat, or actual if client will obey it. */
692b0245d64SEric Blake if (client->opt == NBD_OPT_INFO || blocksize) {
69337a4f70cSKevin Wolf check_align = sizes[0] = blk_get_request_alignment(exp->common.blk);
694b0245d64SEric Blake } else {
695b0245d64SEric Blake sizes[0] = 1;
696b0245d64SEric Blake }
697b0245d64SEric Blake assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
6980c1d50bdSEric Blake /* preferred - Hard-code to 4096 for now.
6990c1d50bdSEric Blake * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
700b0245d64SEric Blake sizes[1] = MAX(4096, sizes[0]);
7010c1d50bdSEric Blake /* maximum - At most 32M, but smaller as appropriate. */
70237a4f70cSKevin Wolf sizes[2] = MIN(blk_get_max_transfer(exp->common.blk), NBD_MAX_BUFFER_SIZE);
7030c1d50bdSEric Blake trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
70480c7c2b0SPeter Maydell sizes[0] = cpu_to_be32(sizes[0]);
70580c7c2b0SPeter Maydell sizes[1] = cpu_to_be32(sizes[1]);
70680c7c2b0SPeter Maydell sizes[2] = cpu_to_be32(sizes[2]);
7070cfae925SVladimir Sementsov-Ogievskiy rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
7080c1d50bdSEric Blake sizeof(sizes), sizes, errp);
7090c1d50bdSEric Blake if (rc < 0) {
7100c1d50bdSEric Blake return rc;
7110c1d50bdSEric Blake }
7120c1d50bdSEric Blake
713f37708f6SEric Blake /* Send NBD_INFO_EXPORT always */
714dbb38caaSEric Blake myflags = exp->nbdflags;
715ac132d05SEric Blake if (client->mode >= NBD_MODE_STRUCTURED) {
716dbb38caaSEric Blake myflags |= NBD_FLAG_SEND_DF;
717dbb38caaSEric Blake }
7182dcbb11bSEric Blake if (client->mode >= NBD_MODE_EXTENDED &&
7192dcbb11bSEric Blake (client->contexts.count || client->opt == NBD_OPT_INFO)) {
7202dcbb11bSEric Blake myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
7212dcbb11bSEric Blake }
722dbb38caaSEric Blake trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
723f37708f6SEric Blake stq_be_p(buf, exp->size);
724dbb38caaSEric Blake stw_be_p(buf + 8, myflags);
7250cfae925SVladimir Sementsov-Ogievskiy rc = nbd_negotiate_send_info(client, NBD_INFO_EXPORT,
726f37708f6SEric Blake sizeof(buf), buf, errp);
727f37708f6SEric Blake if (rc < 0) {
728f37708f6SEric Blake return rc;
729f37708f6SEric Blake }
730f37708f6SEric Blake
731099fbcd6SEric Blake /*
732099fbcd6SEric Blake * If the client is just asking for NBD_OPT_INFO, but forgot to
733099fbcd6SEric Blake * request block sizes in a situation that would impact
734099fbcd6SEric Blake * performance, then return an error. But for NBD_OPT_GO, we
735099fbcd6SEric Blake * tolerate all clients, regardless of alignments.
736099fbcd6SEric Blake */
737099fbcd6SEric Blake if (client->opt == NBD_OPT_INFO && !blocksize &&
73837a4f70cSKevin Wolf blk_get_request_alignment(exp->common.blk) > 1) {
7390cfae925SVladimir Sementsov-Ogievskiy return nbd_negotiate_send_rep_err(client,
7400cfae925SVladimir Sementsov-Ogievskiy NBD_REP_ERR_BLOCK_SIZE_REQD,
7410c1d50bdSEric Blake errp,
7420c1d50bdSEric Blake "request NBD_INFO_BLOCK_SIZE to "
7430c1d50bdSEric Blake "use this export");
7440c1d50bdSEric Blake }
7450c1d50bdSEric Blake
746f37708f6SEric Blake /* Final reply */
7470cfae925SVladimir Sementsov-Ogievskiy rc = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
748f37708f6SEric Blake if (rc < 0) {
749f37708f6SEric Blake return rc;
750f37708f6SEric Blake }
751f37708f6SEric Blake
7520cfae925SVladimir Sementsov-Ogievskiy if (client->opt == NBD_OPT_GO) {
753f37708f6SEric Blake client->exp = exp;
7546e280648SEric Blake client->check_align = check_align;
755f37708f6SEric Blake QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
756c69de1beSKevin Wolf blk_exp_ref(&client->exp->common);
757f37708f6SEric Blake rc = 1;
758f37708f6SEric Blake }
759f37708f6SEric Blake return rc;
760f37708f6SEric Blake }
761f37708f6SEric Blake
762ae6d91a7SZhu Yangyang /* Callback to learn when QIO TLS upgrade is complete */
763ae6d91a7SZhu Yangyang struct NBDTLSServerHandshakeData {
764ae6d91a7SZhu Yangyang bool complete;
765ae6d91a7SZhu Yangyang Error *error;
766ae6d91a7SZhu Yangyang Coroutine *co;
767ae6d91a7SZhu Yangyang };
768ae6d91a7SZhu Yangyang
7694fa333e0SEric Blake static void
nbd_server_tls_handshake(QIOTask * task,void * opaque)7704fa333e0SEric Blake nbd_server_tls_handshake(QIOTask *task, void *opaque)
771ae6d91a7SZhu Yangyang {
772ae6d91a7SZhu Yangyang struct NBDTLSServerHandshakeData *data = opaque;
773ae6d91a7SZhu Yangyang
774ae6d91a7SZhu Yangyang qio_task_propagate_error(task, &data->error);
775ae6d91a7SZhu Yangyang data->complete = true;
776ae6d91a7SZhu Yangyang if (!qemu_coroutine_entered(data->co)) {
777ae6d91a7SZhu Yangyang aio_co_wake(data->co);
778ae6d91a7SZhu Yangyang }
779ae6d91a7SZhu Yangyang }
780f37708f6SEric Blake
78136683283SEric Blake /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
78236683283SEric Blake * new channel for all further (now-encrypted) communication. */
7834fa333e0SEric Blake static coroutine_fn QIOChannel *
nbd_negotiate_handle_starttls(NBDClient * client,Error ** errp)7844fa333e0SEric Blake nbd_negotiate_handle_starttls(NBDClient *client, Error **errp)
785f95910feSDaniel P. Berrange {
786f95910feSDaniel P. Berrange QIOChannel *ioc;
787f95910feSDaniel P. Berrange QIOChannelTLS *tioc;
788ae6d91a7SZhu Yangyang struct NBDTLSServerHandshakeData data = { 0 };
789f95910feSDaniel P. Berrange
7900cfae925SVladimir Sementsov-Ogievskiy assert(client->opt == NBD_OPT_STARTTLS);
7910cfae925SVladimir Sementsov-Ogievskiy
7929588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_handle_starttls();
793f95910feSDaniel P. Berrange ioc = client->ioc;
794f95910feSDaniel P. Berrange
7950cfae925SVladimir Sementsov-Ogievskiy if (nbd_negotiate_send_rep(client, NBD_REP_ACK, errp) < 0) {
79663d5ef86SEric Blake return NULL;
79763d5ef86SEric Blake }
798f95910feSDaniel P. Berrange
799f95910feSDaniel P. Berrange tioc = qio_channel_tls_new_server(ioc,
800f95910feSDaniel P. Berrange client->tlscreds,
801b25e12daSDaniel P. Berrange client->tlsauthz,
8022fd2c840SVladimir Sementsov-Ogievskiy errp);
803f95910feSDaniel P. Berrange if (!tioc) {
804f95910feSDaniel P. Berrange return NULL;
805f95910feSDaniel P. Berrange }
806f95910feSDaniel P. Berrange
8070d73f725SDaniel P. Berrange qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
8089588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_handle_starttls_handshake();
809ae6d91a7SZhu Yangyang data.co = qemu_coroutine_self();
810f95910feSDaniel P. Berrange qio_channel_tls_handshake(tioc,
811ae6d91a7SZhu Yangyang nbd_server_tls_handshake,
812f95910feSDaniel P. Berrange &data,
8131939ccdaSPeter Xu NULL,
814f95910feSDaniel P. Berrange NULL);
815f95910feSDaniel P. Berrange
816f95910feSDaniel P. Berrange if (!data.complete) {
817ae6d91a7SZhu Yangyang qemu_coroutine_yield();
818ae6d91a7SZhu Yangyang assert(data.complete);
819f95910feSDaniel P. Berrange }
820ae6d91a7SZhu Yangyang
821f95910feSDaniel P. Berrange if (data.error) {
822f95910feSDaniel P. Berrange object_unref(OBJECT(tioc));
8232fd2c840SVladimir Sementsov-Ogievskiy error_propagate(errp, data.error);
824f95910feSDaniel P. Berrange return NULL;
825f95910feSDaniel P. Berrange }
826f95910feSDaniel P. Berrange
827f95910feSDaniel P. Berrange return QIO_CHANNEL(tioc);
828f95910feSDaniel P. Berrange }
829f95910feSDaniel P. Berrange
830e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_negotiate_send_meta_context
831e7b1948dSVladimir Sementsov-Ogievskiy *
832e7b1948dSVladimir Sementsov-Ogievskiy * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT
833e7b1948dSVladimir Sementsov-Ogievskiy *
834e7b1948dSVladimir Sementsov-Ogievskiy * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead.
835e7b1948dSVladimir Sementsov-Ogievskiy */
8364fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_send_meta_context(NBDClient * client,const char * context,uint32_t context_id,Error ** errp)8374fa333e0SEric Blake nbd_negotiate_send_meta_context(NBDClient *client, const char *context,
8384fa333e0SEric Blake uint32_t context_id, Error **errp)
839e7b1948dSVladimir Sementsov-Ogievskiy {
840e7b1948dSVladimir Sementsov-Ogievskiy NBDOptionReplyMetaContext opt;
841e7b1948dSVladimir Sementsov-Ogievskiy struct iovec iov[] = {
842e7b1948dSVladimir Sementsov-Ogievskiy {.iov_base = &opt, .iov_len = sizeof(opt)},
843e7b1948dSVladimir Sementsov-Ogievskiy {.iov_base = (void *)context, .iov_len = strlen(context)}
844e7b1948dSVladimir Sementsov-Ogievskiy };
845e7b1948dSVladimir Sementsov-Ogievskiy
84693676c88SEric Blake assert(iov[1].iov_len <= NBD_MAX_STRING_SIZE);
847e7b1948dSVladimir Sementsov-Ogievskiy if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
848e7b1948dSVladimir Sementsov-Ogievskiy context_id = 0;
849e7b1948dSVladimir Sementsov-Ogievskiy }
850e7b1948dSVladimir Sementsov-Ogievskiy
8512b53af25SEric Blake trace_nbd_negotiate_meta_query_reply(context, context_id);
852e7b1948dSVladimir Sementsov-Ogievskiy set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
853e7b1948dSVladimir Sementsov-Ogievskiy sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
854e7b1948dSVladimir Sementsov-Ogievskiy stl_be_p(&opt.context_id, context_id);
855e7b1948dSVladimir Sementsov-Ogievskiy
856e7b1948dSVladimir Sementsov-Ogievskiy return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
857e7b1948dSVladimir Sementsov-Ogievskiy }
858e7b1948dSVladimir Sementsov-Ogievskiy
859ebd57062SEric Blake /*
860ebd57062SEric Blake * Return true if @query matches @pattern, or if @query is empty when
861ebd57062SEric Blake * the @client is performing _LIST_.
862b0769d8fSVladimir Sementsov-Ogievskiy */
8634fa333e0SEric Blake static coroutine_fn bool
nbd_meta_empty_or_pattern(NBDClient * client,const char * pattern,const char * query)8644fa333e0SEric Blake nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern,
865ebd57062SEric Blake const char *query)
866b0769d8fSVladimir Sementsov-Ogievskiy {
867ebd57062SEric Blake if (!*query) {
868ebd57062SEric Blake trace_nbd_negotiate_meta_query_parse("empty");
869ebd57062SEric Blake return client->opt == NBD_OPT_LIST_META_CONTEXT;
870b0769d8fSVladimir Sementsov-Ogievskiy }
871ebd57062SEric Blake if (strcmp(query, pattern) == 0) {
872b0769d8fSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_meta_query_parse(pattern);
873ebd57062SEric Blake return true;
874b0769d8fSVladimir Sementsov-Ogievskiy }
875ebd57062SEric Blake trace_nbd_negotiate_meta_query_skip("pattern not matched");
876ebd57062SEric Blake return false;
877b0769d8fSVladimir Sementsov-Ogievskiy }
878b0769d8fSVladimir Sementsov-Ogievskiy
879b0769d8fSVladimir Sementsov-Ogievskiy /*
880ebd57062SEric Blake * Return true and adjust @str in place if it begins with @prefix.
881b0769d8fSVladimir Sementsov-Ogievskiy */
8824fa333e0SEric Blake static coroutine_fn bool
nbd_strshift(const char ** str,const char * prefix)8834fa333e0SEric Blake nbd_strshift(const char **str, const char *prefix)
884b0769d8fSVladimir Sementsov-Ogievskiy {
885ebd57062SEric Blake size_t len = strlen(prefix);
886b0769d8fSVladimir Sementsov-Ogievskiy
887ebd57062SEric Blake if (strncmp(*str, prefix, len) == 0) {
888ebd57062SEric Blake *str += len;
889ebd57062SEric Blake return true;
890b0769d8fSVladimir Sementsov-Ogievskiy }
891ebd57062SEric Blake return false;
892b0769d8fSVladimir Sementsov-Ogievskiy }
893b0769d8fSVladimir Sementsov-Ogievskiy
894e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_meta_base_query
895e7b1948dSVladimir Sementsov-Ogievskiy *
896dbb8b396SVladimir Sementsov-Ogievskiy * Handle queries to 'base' namespace. For now, only the base:allocation
897ebd57062SEric Blake * context is available. Return true if @query has been handled.
898dbb8b396SVladimir Sementsov-Ogievskiy */
8994fa333e0SEric Blake static coroutine_fn bool
nbd_meta_base_query(NBDClient * client,NBDMetaContexts * meta,const char * query)9004fa333e0SEric Blake nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta,
901ebd57062SEric Blake const char *query)
902e7b1948dSVladimir Sementsov-Ogievskiy {
903ebd57062SEric Blake if (!nbd_strshift(&query, "base:")) {
904ebd57062SEric Blake return false;
905ebd57062SEric Blake }
906ebd57062SEric Blake trace_nbd_negotiate_meta_query_parse("base:");
907ebd57062SEric Blake
908ebd57062SEric Blake if (nbd_meta_empty_or_pattern(client, "allocation", query)) {
909ebd57062SEric Blake meta->base_allocation = true;
910ebd57062SEric Blake }
911ebd57062SEric Blake return true;
912e7b1948dSVladimir Sementsov-Ogievskiy }
913e7b1948dSVladimir Sementsov-Ogievskiy
914ebd57062SEric Blake /* nbd_meta_qemu_query
9153d068affSVladimir Sementsov-Ogievskiy *
916ebd57062SEric Blake * Handle queries to 'qemu' namespace. For now, only the qemu:dirty-bitmap:
91771719cd5SEric Blake * and qemu:allocation-depth contexts are available. Return true if @query
91871719cd5SEric Blake * has been handled.
919ebd57062SEric Blake */
9204fa333e0SEric Blake static coroutine_fn bool
nbd_meta_qemu_query(NBDClient * client,NBDMetaContexts * meta,const char * query)9214fa333e0SEric Blake nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta,
922ebd57062SEric Blake const char *query)
9233d068affSVladimir Sementsov-Ogievskiy {
9243b1f244cSEric Blake size_t i;
9253b1f244cSEric Blake
926ebd57062SEric Blake if (!nbd_strshift(&query, "qemu:")) {
927ebd57062SEric Blake return false;
9283d068affSVladimir Sementsov-Ogievskiy }
929ebd57062SEric Blake trace_nbd_negotiate_meta_query_parse("qemu:");
9303d068affSVladimir Sementsov-Ogievskiy
931ebd57062SEric Blake if (!*query) {
9323d068affSVladimir Sementsov-Ogievskiy if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
93371719cd5SEric Blake meta->allocation_depth = meta->exp->allocation_depth;
93476df2b8dSEric Blake if (meta->exp->nr_export_bitmaps) {
9353b1f244cSEric Blake memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
9363d068affSVladimir Sementsov-Ogievskiy }
93776df2b8dSEric Blake }
9383d068affSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_meta_query_parse("empty");
939ebd57062SEric Blake return true;
9403d068affSVladimir Sementsov-Ogievskiy }
9413d068affSVladimir Sementsov-Ogievskiy
94271719cd5SEric Blake if (strcmp(query, "allocation-depth") == 0) {
94371719cd5SEric Blake trace_nbd_negotiate_meta_query_parse("allocation-depth");
94471719cd5SEric Blake meta->allocation_depth = meta->exp->allocation_depth;
94571719cd5SEric Blake return true;
94671719cd5SEric Blake }
94771719cd5SEric Blake
948ebd57062SEric Blake if (nbd_strshift(&query, "dirty-bitmap:")) {
9493d068affSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
9503b1f244cSEric Blake if (!*query) {
95176df2b8dSEric Blake if (client->opt == NBD_OPT_LIST_META_CONTEXT &&
95276df2b8dSEric Blake meta->exp->nr_export_bitmaps) {
9533b1f244cSEric Blake memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
9543b1f244cSEric Blake }
9553b1f244cSEric Blake trace_nbd_negotiate_meta_query_parse("empty");
956ebd57062SEric Blake return true;
957ebd57062SEric Blake }
9583b1f244cSEric Blake
9593b1f244cSEric Blake for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
9603b1f244cSEric Blake const char *bm_name;
9613b1f244cSEric Blake
9623b1f244cSEric Blake bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
9633b1f244cSEric Blake if (strcmp(bm_name, query) == 0) {
9643b1f244cSEric Blake meta->bitmaps[i] = true;
9653b1f244cSEric Blake trace_nbd_negotiate_meta_query_parse(query);
9663b1f244cSEric Blake return true;
967ebd57062SEric Blake }
9683b1f244cSEric Blake }
9693b1f244cSEric Blake trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
970ebd57062SEric Blake return true;
971ebd57062SEric Blake }
9723d068affSVladimir Sementsov-Ogievskiy
97371719cd5SEric Blake trace_nbd_negotiate_meta_query_skip("unknown qemu context");
974ebd57062SEric Blake return true;
9753d068affSVladimir Sementsov-Ogievskiy }
9763d068affSVladimir Sementsov-Ogievskiy
977e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_negotiate_meta_query
978e7b1948dSVladimir Sementsov-Ogievskiy *
979e7b1948dSVladimir Sementsov-Ogievskiy * Parse namespace name and call corresponding function to parse body of the
980e7b1948dSVladimir Sementsov-Ogievskiy * query.
981e7b1948dSVladimir Sementsov-Ogievskiy *
98293676c88SEric Blake * The only supported namespaces are 'base' and 'qemu'.
983e7b1948dSVladimir Sementsov-Ogievskiy *
984e7b1948dSVladimir Sementsov-Ogievskiy * Return -errno on I/O error, 0 if option was completely handled by
985e7b1948dSVladimir Sementsov-Ogievskiy * sending a reply about inconsistent lengths, or 1 on success. */
9864fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_meta_query(NBDClient * client,NBDMetaContexts * meta,Error ** errp)9874fa333e0SEric Blake nbd_negotiate_meta_query(NBDClient *client,
988fd358d83SEric Blake NBDMetaContexts *meta, Error **errp)
989e7b1948dSVladimir Sementsov-Ogievskiy {
990e7b1948dSVladimir Sementsov-Ogievskiy int ret;
991ebd57062SEric Blake g_autofree char *query = NULL;
992e7b1948dSVladimir Sementsov-Ogievskiy uint32_t len;
993e7b1948dSVladimir Sementsov-Ogievskiy
994d1e2c3e7SEric Blake ret = nbd_opt_read(client, &len, sizeof(len), false, errp);
995e7b1948dSVladimir Sementsov-Ogievskiy if (ret <= 0) {
996e7b1948dSVladimir Sementsov-Ogievskiy return ret;
997e7b1948dSVladimir Sementsov-Ogievskiy }
99880c7c2b0SPeter Maydell len = cpu_to_be32(len);
999e7b1948dSVladimir Sementsov-Ogievskiy
100093676c88SEric Blake if (len > NBD_MAX_STRING_SIZE) {
100193676c88SEric Blake trace_nbd_negotiate_meta_query_skip("length too long");
100293676c88SEric Blake return nbd_opt_skip(client, len, errp);
100393676c88SEric Blake }
1004e7b1948dSVladimir Sementsov-Ogievskiy
1005ebd57062SEric Blake query = g_malloc(len + 1);
1006ebd57062SEric Blake ret = nbd_opt_read(client, query, len, true, errp);
1007e7b1948dSVladimir Sementsov-Ogievskiy if (ret <= 0) {
1008e7b1948dSVladimir Sementsov-Ogievskiy return ret;
1009e7b1948dSVladimir Sementsov-Ogievskiy }
1010ebd57062SEric Blake query[len] = '\0';
1011e7b1948dSVladimir Sementsov-Ogievskiy
1012ebd57062SEric Blake if (nbd_meta_base_query(client, meta, query)) {
1013ebd57062SEric Blake return 1;
1014ebd57062SEric Blake }
1015ebd57062SEric Blake if (nbd_meta_qemu_query(client, meta, query)) {
1016ebd57062SEric Blake return 1;
10173d068affSVladimir Sementsov-Ogievskiy }
10183d068affSVladimir Sementsov-Ogievskiy
10193d068affSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_meta_query_skip("unknown namespace");
1020ebd57062SEric Blake return 1;
1021e7b1948dSVladimir Sementsov-Ogievskiy }
1022e7b1948dSVladimir Sementsov-Ogievskiy
1023e7b1948dSVladimir Sementsov-Ogievskiy /* nbd_negotiate_meta_queries
1024e7b1948dSVladimir Sementsov-Ogievskiy * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
1025e7b1948dSVladimir Sementsov-Ogievskiy *
1026e7b1948dSVladimir Sementsov-Ogievskiy * Return -errno on I/O error, or 0 if option was completely handled. */
10274fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_meta_queries(NBDClient * client,Error ** errp)10284fa333e0SEric Blake nbd_negotiate_meta_queries(NBDClient *client, Error **errp)
1029e7b1948dSVladimir Sementsov-Ogievskiy {
1030e7b1948dSVladimir Sementsov-Ogievskiy int ret;
10319d7ab222SEric Blake g_autofree char *export_name = NULL;
1032cd1675f8SRichard Henderson /* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */
1033cd1675f8SRichard Henderson g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
1034fd358d83SEric Blake NBDMetaContexts local_meta = {0};
1035fd358d83SEric Blake NBDMetaContexts *meta;
1036e7b1948dSVladimir Sementsov-Ogievskiy uint32_t nb_queries;
10373b1f244cSEric Blake size_t i;
103847ec485eSEric Blake size_t count = 0;
1039e7b1948dSVladimir Sementsov-Ogievskiy
1040ac132d05SEric Blake if (client->opt == NBD_OPT_SET_META_CONTEXT &&
1041ac132d05SEric Blake client->mode < NBD_MODE_STRUCTURED) {
1042e7b1948dSVladimir Sementsov-Ogievskiy return nbd_opt_invalid(client, errp,
1043e7b1948dSVladimir Sementsov-Ogievskiy "request option '%s' when structured reply "
1044e7b1948dSVladimir Sementsov-Ogievskiy "is not negotiated",
1045e7b1948dSVladimir Sementsov-Ogievskiy nbd_opt_lookup(client->opt));
1046e7b1948dSVladimir Sementsov-Ogievskiy }
1047e7b1948dSVladimir Sementsov-Ogievskiy
1048e7b1948dSVladimir Sementsov-Ogievskiy if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
1049e7b1948dSVladimir Sementsov-Ogievskiy /* Only change the caller's meta on SET. */
1050e7b1948dSVladimir Sementsov-Ogievskiy meta = &local_meta;
1051fd358d83SEric Blake } else {
1052fd358d83SEric Blake meta = &client->contexts;
1053e7b1948dSVladimir Sementsov-Ogievskiy }
1054e7b1948dSVladimir Sementsov-Ogievskiy
10553b1f244cSEric Blake g_free(meta->bitmaps);
1056e7b1948dSVladimir Sementsov-Ogievskiy memset(meta, 0, sizeof(*meta));
1057e7b1948dSVladimir Sementsov-Ogievskiy
10589d7ab222SEric Blake ret = nbd_opt_read_name(client, &export_name, NULL, errp);
1059e7b1948dSVladimir Sementsov-Ogievskiy if (ret <= 0) {
1060e7b1948dSVladimir Sementsov-Ogievskiy return ret;
1061e7b1948dSVladimir Sementsov-Ogievskiy }
1062e7b1948dSVladimir Sementsov-Ogievskiy
1063af736e54SVladimir Sementsov-Ogievskiy meta->exp = nbd_export_find(export_name);
1064af736e54SVladimir Sementsov-Ogievskiy if (meta->exp == NULL) {
10655c4fe018SEric Blake g_autofree char *sane_name = nbd_sanitize_name(export_name);
10665c4fe018SEric Blake
1067e7b1948dSVladimir Sementsov-Ogievskiy return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
10685c4fe018SEric Blake "export '%s' not present", sane_name);
1069e7b1948dSVladimir Sementsov-Ogievskiy }
10703b1f244cSEric Blake meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
10713b1f244cSEric Blake if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
10723b1f244cSEric Blake bitmaps = meta->bitmaps;
10733b1f244cSEric Blake }
1074e7b1948dSVladimir Sementsov-Ogievskiy
1075d1e2c3e7SEric Blake ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), false, errp);
1076e7b1948dSVladimir Sementsov-Ogievskiy if (ret <= 0) {
1077e7b1948dSVladimir Sementsov-Ogievskiy return ret;
1078e7b1948dSVladimir Sementsov-Ogievskiy }
107980c7c2b0SPeter Maydell nb_queries = cpu_to_be32(nb_queries);
10802b53af25SEric Blake trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
1081af736e54SVladimir Sementsov-Ogievskiy export_name, nb_queries);
1082e7b1948dSVladimir Sementsov-Ogievskiy
1083e7b1948dSVladimir Sementsov-Ogievskiy if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
1084e7b1948dSVladimir Sementsov-Ogievskiy /* enable all known contexts */
1085e7b1948dSVladimir Sementsov-Ogievskiy meta->base_allocation = true;
108671719cd5SEric Blake meta->allocation_depth = meta->exp->allocation_depth;
108776df2b8dSEric Blake if (meta->exp->nr_export_bitmaps) {
10883b1f244cSEric Blake memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
108976df2b8dSEric Blake }
1090e7b1948dSVladimir Sementsov-Ogievskiy } else {
1091e7b1948dSVladimir Sementsov-Ogievskiy for (i = 0; i < nb_queries; ++i) {
1092e7b1948dSVladimir Sementsov-Ogievskiy ret = nbd_negotiate_meta_query(client, meta, errp);
1093e7b1948dSVladimir Sementsov-Ogievskiy if (ret <= 0) {
1094e7b1948dSVladimir Sementsov-Ogievskiy return ret;
1095e7b1948dSVladimir Sementsov-Ogievskiy }
1096e7b1948dSVladimir Sementsov-Ogievskiy }
1097e7b1948dSVladimir Sementsov-Ogievskiy }
1098e7b1948dSVladimir Sementsov-Ogievskiy
1099e7b1948dSVladimir Sementsov-Ogievskiy if (meta->base_allocation) {
1100e7b1948dSVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_meta_context(client, "base:allocation",
1101e7b1948dSVladimir Sementsov-Ogievskiy NBD_META_ID_BASE_ALLOCATION,
1102e7b1948dSVladimir Sementsov-Ogievskiy errp);
1103e7b1948dSVladimir Sementsov-Ogievskiy if (ret < 0) {
1104e7b1948dSVladimir Sementsov-Ogievskiy return ret;
1105e7b1948dSVladimir Sementsov-Ogievskiy }
110647ec485eSEric Blake count++;
1107e7b1948dSVladimir Sementsov-Ogievskiy }
1108e7b1948dSVladimir Sementsov-Ogievskiy
110971719cd5SEric Blake if (meta->allocation_depth) {
111071719cd5SEric Blake ret = nbd_negotiate_send_meta_context(client, "qemu:allocation-depth",
111171719cd5SEric Blake NBD_META_ID_ALLOCATION_DEPTH,
111271719cd5SEric Blake errp);
111371719cd5SEric Blake if (ret < 0) {
111471719cd5SEric Blake return ret;
111571719cd5SEric Blake }
111671719cd5SEric Blake count++;
111771719cd5SEric Blake }
111871719cd5SEric Blake
11193b1f244cSEric Blake for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
11203b1f244cSEric Blake const char *bm_name;
11213b1f244cSEric Blake g_autofree char *context = NULL;
11223b1f244cSEric Blake
11233b1f244cSEric Blake if (!meta->bitmaps[i]) {
11243b1f244cSEric Blake continue;
11253b1f244cSEric Blake }
11263b1f244cSEric Blake
11273b1f244cSEric Blake bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
11283b1f244cSEric Blake context = g_strdup_printf("qemu:dirty-bitmap:%s", bm_name);
112902e87e3bSEric Blake
113002e87e3bSEric Blake ret = nbd_negotiate_send_meta_context(client, context,
11313b1f244cSEric Blake NBD_META_ID_DIRTY_BITMAP + i,
11323d068affSVladimir Sementsov-Ogievskiy errp);
11333d068affSVladimir Sementsov-Ogievskiy if (ret < 0) {
11343d068affSVladimir Sementsov-Ogievskiy return ret;
11353d068affSVladimir Sementsov-Ogievskiy }
113647ec485eSEric Blake count++;
11373d068affSVladimir Sementsov-Ogievskiy }
11383d068affSVladimir Sementsov-Ogievskiy
1139e7b1948dSVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1140e7b1948dSVladimir Sementsov-Ogievskiy if (ret == 0) {
114147ec485eSEric Blake meta->count = count;
1142e7b1948dSVladimir Sementsov-Ogievskiy }
1143e7b1948dSVladimir Sementsov-Ogievskiy
1144e7b1948dSVladimir Sementsov-Ogievskiy return ret;
1145e7b1948dSVladimir Sementsov-Ogievskiy }
1146e7b1948dSVladimir Sementsov-Ogievskiy
11471e120ffeSVladimir Sementsov-Ogievskiy /* nbd_negotiate_options
1148f37708f6SEric Blake * Process all NBD_OPT_* client option commands, during fixed newstyle
1149f37708f6SEric Blake * negotiation.
11501e120ffeSVladimir Sementsov-Ogievskiy * Return:
11512fd2c840SVladimir Sementsov-Ogievskiy * -errno on error, errp is set
11522fd2c840SVladimir Sementsov-Ogievskiy * 0 on successful negotiation, errp is not set
1153*efd3dda3SEric Blake * 1 if client sent NBD_OPT_ABORT (i.e. on valid disconnect) or never
1154*efd3dda3SEric Blake * wrote anything (i.e. port probe); errp is not set
11551e120ffeSVladimir Sementsov-Ogievskiy */
11564fa333e0SEric Blake static coroutine_fn int
nbd_negotiate_options(NBDClient * client,Error ** errp)11574fa333e0SEric Blake nbd_negotiate_options(NBDClient *client, Error **errp)
1158798bfe00SFam Zheng {
1159798bfe00SFam Zheng uint32_t flags;
116026afa868SDaniel P. Berrange bool fixedNewstyle = false;
116123e099c3SEric Blake bool no_zeroes = false;
1162798bfe00SFam Zheng
1163798bfe00SFam Zheng /* Client sends:
1164798bfe00SFam Zheng [ 0 .. 3] client flags
1165798bfe00SFam Zheng
1166f37708f6SEric Blake Then we loop until NBD_OPT_EXPORT_NAME or NBD_OPT_GO:
1167798bfe00SFam Zheng [ 0 .. 7] NBD_OPTS_MAGIC
1168798bfe00SFam Zheng [ 8 .. 11] NBD option
1169798bfe00SFam Zheng [12 .. 15] Data length
1170798bfe00SFam Zheng ... Rest of request
1171798bfe00SFam Zheng
1172798bfe00SFam Zheng [ 0 .. 7] NBD_OPTS_MAGIC
1173798bfe00SFam Zheng [ 8 .. 11] Second NBD option
1174798bfe00SFam Zheng [12 .. 15] Data length
1175798bfe00SFam Zheng ... Rest of request
1176798bfe00SFam Zheng */
1177798bfe00SFam Zheng
1178*efd3dda3SEric Blake /*
1179*efd3dda3SEric Blake * Intentionally ignore errors on this first read - we do not want
1180*efd3dda3SEric Blake * to be noisy about a mere port probe, but only for clients that
1181*efd3dda3SEric Blake * start talking the protocol and then quit abruptly.
1182*efd3dda3SEric Blake */
1183*efd3dda3SEric Blake if (nbd_read32(client->ioc, &flags, "flags", NULL) < 0) {
1184*efd3dda3SEric Blake return 1;
1185798bfe00SFam Zheng }
1186ac132d05SEric Blake client->mode = NBD_MODE_EXPORT_NAME;
1187621c4f4eSEric Blake trace_nbd_negotiate_options_flags(flags);
118826afa868SDaniel P. Berrange if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
118926afa868SDaniel P. Berrange fixedNewstyle = true;
119026afa868SDaniel P. Berrange flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
1191ac132d05SEric Blake client->mode = NBD_MODE_SIMPLE;
119226afa868SDaniel P. Berrange }
1193c203c59aSEric Blake if (flags & NBD_FLAG_C_NO_ZEROES) {
119423e099c3SEric Blake no_zeroes = true;
1195c203c59aSEric Blake flags &= ~NBD_FLAG_C_NO_ZEROES;
1196c203c59aSEric Blake }
119726afa868SDaniel P. Berrange if (flags != 0) {
11982fd2c840SVladimir Sementsov-Ogievskiy error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
1199621c4f4eSEric Blake return -EINVAL;
1200798bfe00SFam Zheng }
1201798bfe00SFam Zheng
1202798bfe00SFam Zheng while (1) {
1203798bfe00SFam Zheng int ret;
12047f9039cdSVladimir Sementsov-Ogievskiy uint32_t option, length;
1205798bfe00SFam Zheng uint64_t magic;
1206798bfe00SFam Zheng
1207e6798f06SVladimir Sementsov-Ogievskiy if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
1208798bfe00SFam Zheng return -EINVAL;
1209798bfe00SFam Zheng }
12109588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_options_check_magic(magic);
12119588463eSVladimir Sementsov-Ogievskiy if (magic != NBD_OPTS_MAGIC) {
12122fd2c840SVladimir Sementsov-Ogievskiy error_setg(errp, "Bad magic received");
1213798bfe00SFam Zheng return -EINVAL;
1214798bfe00SFam Zheng }
1215798bfe00SFam Zheng
1216e6798f06SVladimir Sementsov-Ogievskiy if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
1217798bfe00SFam Zheng return -EINVAL;
1218798bfe00SFam Zheng }
12190cfae925SVladimir Sementsov-Ogievskiy client->opt = option;
1220798bfe00SFam Zheng
1221e6798f06SVladimir Sementsov-Ogievskiy if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
1222798bfe00SFam Zheng return -EINVAL;
1223798bfe00SFam Zheng }
1224894e0280SEric Blake assert(!client->optlen);
12250cfae925SVladimir Sementsov-Ogievskiy client->optlen = length;
1226798bfe00SFam Zheng
1227fdad35efSEric Blake if (length > NBD_MAX_BUFFER_SIZE) {
1228fdad35efSEric Blake error_setg(errp, "len (%" PRIu32 ") is larger than max len (%u)",
1229fdad35efSEric Blake length, NBD_MAX_BUFFER_SIZE);
1230fdad35efSEric Blake return -EINVAL;
1231fdad35efSEric Blake }
1232fdad35efSEric Blake
12333736cc5bSEric Blake trace_nbd_negotiate_options_check_option(option,
12343736cc5bSEric Blake nbd_opt_lookup(option));
1235f95910feSDaniel P. Berrange if (client->tlscreds &&
1236f95910feSDaniel P. Berrange client->ioc == (QIOChannel *)client->sioc) {
1237f95910feSDaniel P. Berrange QIOChannel *tioc;
1238f95910feSDaniel P. Berrange if (!fixedNewstyle) {
12397f9039cdSVladimir Sementsov-Ogievskiy error_setg(errp, "Unsupported option 0x%" PRIx32, option);
1240f95910feSDaniel P. Berrange return -EINVAL;
1241f95910feSDaniel P. Berrange }
12427f9039cdSVladimir Sementsov-Ogievskiy switch (option) {
1243f95910feSDaniel P. Berrange case NBD_OPT_STARTTLS:
1244e68c35cfSEric Blake if (length) {
1245e68c35cfSEric Blake /* Unconditionally drop the connection if the client
1246e68c35cfSEric Blake * can't start a TLS negotiation correctly */
12470cfae925SVladimir Sementsov-Ogievskiy return nbd_reject_length(client, true, errp);
1248e68c35cfSEric Blake }
1249e68c35cfSEric Blake tioc = nbd_negotiate_handle_starttls(client, errp);
1250f95910feSDaniel P. Berrange if (!tioc) {
1251f95910feSDaniel P. Berrange return -EIO;
1252f95910feSDaniel P. Berrange }
12538cbee49eSEric Blake ret = 0;
1254f95910feSDaniel P. Berrange object_unref(OBJECT(client->ioc));
12557d5b0d68SPhilippe Mathieu-Daudé client->ioc = tioc;
1256f95910feSDaniel P. Berrange break;
1257f95910feSDaniel P. Berrange
1258d1129a8aSEric Blake case NBD_OPT_EXPORT_NAME:
1259d1129a8aSEric Blake /* No way to return an error to client, so drop connection */
12602fd2c840SVladimir Sementsov-Ogievskiy error_setg(errp, "Option 0x%x not permitted before TLS",
12617f9039cdSVladimir Sementsov-Ogievskiy option);
1262d1129a8aSEric Blake return -EINVAL;
1263d1129a8aSEric Blake
1264f95910feSDaniel P. Berrange default:
12653e99ebb9SEric Blake /* Let the client keep trying, unless they asked to
12663e99ebb9SEric Blake * quit. Always try to give an error back to the
12673e99ebb9SEric Blake * client; but when replying to OPT_ABORT, be aware
12683e99ebb9SEric Blake * that the client may hang up before receiving the
12693e99ebb9SEric Blake * error, in which case we are fine ignoring the
12703e99ebb9SEric Blake * resulting EPIPE. */
12713e99ebb9SEric Blake ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
12723e99ebb9SEric Blake option == NBD_OPT_ABORT ? NULL : errp,
127336683283SEric Blake "Option 0x%" PRIx32
1274894e0280SEric Blake " not permitted before TLS", option);
12757f9039cdSVladimir Sementsov-Ogievskiy if (option == NBD_OPT_ABORT) {
12761e120ffeSVladimir Sementsov-Ogievskiy return 1;
1277b6f5d3b5SEric Blake }
1278d1129a8aSEric Blake break;
1279f95910feSDaniel P. Berrange }
1280f95910feSDaniel P. Berrange } else if (fixedNewstyle) {
12817f9039cdSVladimir Sementsov-Ogievskiy switch (option) {
1282798bfe00SFam Zheng case NBD_OPT_LIST:
1283e68c35cfSEric Blake if (length) {
12840cfae925SVladimir Sementsov-Ogievskiy ret = nbd_reject_length(client, false, errp);
1285e68c35cfSEric Blake } else {
1286e68c35cfSEric Blake ret = nbd_negotiate_handle_list(client, errp);
1287e68c35cfSEric Blake }
1288798bfe00SFam Zheng break;
1289798bfe00SFam Zheng
1290798bfe00SFam Zheng case NBD_OPT_ABORT:
1291b6f5d3b5SEric Blake /* NBD spec says we must try to reply before
1292b6f5d3b5SEric Blake * disconnecting, but that we must also tolerate
1293b6f5d3b5SEric Blake * guests that don't wait for our reply. */
12940cfae925SVladimir Sementsov-Ogievskiy nbd_negotiate_send_rep(client, NBD_REP_ACK, NULL);
12951e120ffeSVladimir Sementsov-Ogievskiy return 1;
1296798bfe00SFam Zheng
1297798bfe00SFam Zheng case NBD_OPT_EXPORT_NAME:
1298dbb38caaSEric Blake return nbd_negotiate_handle_export_name(client, no_zeroes,
129923e099c3SEric Blake errp);
1300798bfe00SFam Zheng
1301f37708f6SEric Blake case NBD_OPT_INFO:
1302f37708f6SEric Blake case NBD_OPT_GO:
1303dbb38caaSEric Blake ret = nbd_negotiate_handle_info(client, errp);
1304f37708f6SEric Blake if (ret == 1) {
1305f37708f6SEric Blake assert(option == NBD_OPT_GO);
1306f37708f6SEric Blake return 0;
1307f37708f6SEric Blake }
1308f37708f6SEric Blake break;
1309f37708f6SEric Blake
1310f95910feSDaniel P. Berrange case NBD_OPT_STARTTLS:
1311e68c35cfSEric Blake if (length) {
13120cfae925SVladimir Sementsov-Ogievskiy ret = nbd_reject_length(client, false, errp);
1313e68c35cfSEric Blake } else if (client->tlscreds) {
13140cfae925SVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep_err(client,
13150cfae925SVladimir Sementsov-Ogievskiy NBD_REP_ERR_INVALID, errp,
131636683283SEric Blake "TLS already enabled");
1317f95910feSDaniel P. Berrange } else {
13180cfae925SVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep_err(client,
13190cfae925SVladimir Sementsov-Ogievskiy NBD_REP_ERR_POLICY, errp,
132036683283SEric Blake "TLS not configured");
1321f95910feSDaniel P. Berrange }
1322d1129a8aSEric Blake break;
13235c54e7faSVladimir Sementsov-Ogievskiy
13245c54e7faSVladimir Sementsov-Ogievskiy case NBD_OPT_STRUCTURED_REPLY:
13255c54e7faSVladimir Sementsov-Ogievskiy if (length) {
13260cfae925SVladimir Sementsov-Ogievskiy ret = nbd_reject_length(client, false, errp);
13279c1d2614SEric Blake } else if (client->mode >= NBD_MODE_EXTENDED) {
13289c1d2614SEric Blake ret = nbd_negotiate_send_rep_err(
13299c1d2614SEric Blake client, NBD_REP_ERR_EXT_HEADER_REQD, errp,
13309c1d2614SEric Blake "extended headers already negotiated");
1331ac132d05SEric Blake } else if (client->mode >= NBD_MODE_STRUCTURED) {
13325c54e7faSVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep_err(
13330cfae925SVladimir Sementsov-Ogievskiy client, NBD_REP_ERR_INVALID, errp,
13345c54e7faSVladimir Sementsov-Ogievskiy "structured reply already negotiated");
13355c54e7faSVladimir Sementsov-Ogievskiy } else {
13360cfae925SVladimir Sementsov-Ogievskiy ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
1337ac132d05SEric Blake client->mode = NBD_MODE_STRUCTURED;
13385c54e7faSVladimir Sementsov-Ogievskiy }
13395c54e7faSVladimir Sementsov-Ogievskiy break;
13405c54e7faSVladimir Sementsov-Ogievskiy
1341e7b1948dSVladimir Sementsov-Ogievskiy case NBD_OPT_LIST_META_CONTEXT:
1342e7b1948dSVladimir Sementsov-Ogievskiy case NBD_OPT_SET_META_CONTEXT:
1343fd358d83SEric Blake ret = nbd_negotiate_meta_queries(client, errp);
1344e7b1948dSVladimir Sementsov-Ogievskiy break;
1345e7b1948dSVladimir Sementsov-Ogievskiy
13469c1d2614SEric Blake case NBD_OPT_EXTENDED_HEADERS:
13479c1d2614SEric Blake if (length) {
13489c1d2614SEric Blake ret = nbd_reject_length(client, false, errp);
13499c1d2614SEric Blake } else if (client->mode >= NBD_MODE_EXTENDED) {
13509c1d2614SEric Blake ret = nbd_negotiate_send_rep_err(
13519c1d2614SEric Blake client, NBD_REP_ERR_INVALID, errp,
13529c1d2614SEric Blake "extended headers already negotiated");
13539c1d2614SEric Blake } else {
13549c1d2614SEric Blake ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
13559c1d2614SEric Blake client->mode = NBD_MODE_EXTENDED;
13569c1d2614SEric Blake }
13579c1d2614SEric Blake break;
13589c1d2614SEric Blake
1359798bfe00SFam Zheng default:
1360894e0280SEric Blake ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
136128fb494fSVladimir Sementsov-Ogievskiy "Unsupported option %" PRIu32 " (%s)",
1362894e0280SEric Blake option, nbd_opt_lookup(option));
1363156f6a10SEric Blake break;
1364798bfe00SFam Zheng }
136526afa868SDaniel P. Berrange } else {
136626afa868SDaniel P. Berrange /*
136726afa868SDaniel P. Berrange * If broken new-style we should drop the connection
136826afa868SDaniel P. Berrange * for anything except NBD_OPT_EXPORT_NAME
136926afa868SDaniel P. Berrange */
13707f9039cdSVladimir Sementsov-Ogievskiy switch (option) {
137126afa868SDaniel P. Berrange case NBD_OPT_EXPORT_NAME:
1372dbb38caaSEric Blake return nbd_negotiate_handle_export_name(client, no_zeroes,
137323e099c3SEric Blake errp);
137426afa868SDaniel P. Berrange
137526afa868SDaniel P. Berrange default:
137628fb494fSVladimir Sementsov-Ogievskiy error_setg(errp, "Unsupported option %" PRIu32 " (%s)",
13773736cc5bSEric Blake option, nbd_opt_lookup(option));
137826afa868SDaniel P. Berrange return -EINVAL;
137926afa868SDaniel P. Berrange }
138026afa868SDaniel P. Berrange }
13818cbee49eSEric Blake if (ret < 0) {
13828cbee49eSEric Blake return ret;
13838cbee49eSEric Blake }
1384798bfe00SFam Zheng }
1385798bfe00SFam Zheng }
1386798bfe00SFam Zheng
13871e120ffeSVladimir Sementsov-Ogievskiy /* nbd_negotiate
13881e120ffeSVladimir Sementsov-Ogievskiy * Return:
13892fd2c840SVladimir Sementsov-Ogievskiy * -errno on error, errp is set
13902fd2c840SVladimir Sementsov-Ogievskiy * 0 on successful negotiation, errp is not set
1391*efd3dda3SEric Blake * 1 if client sent NBD_OPT_ABORT (i.e. on valid disconnect) or never
1392*efd3dda3SEric Blake * wrote anything (i.e. port probe); errp is not set
13931e120ffeSVladimir Sementsov-Ogievskiy */
nbd_negotiate(NBDClient * client,Error ** errp)13942fd2c840SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
1395798bfe00SFam Zheng {
1396795d946dSVladimir Sementsov-Ogievskiy ERRP_GUARD();
13975f66d060SEric Blake char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
13982e5c9ad6SVladimir Sementsov-Ogievskiy int ret;
1399798bfe00SFam Zheng
14005f66d060SEric Blake /* Old style negotiation header, no room for options
1401798bfe00SFam Zheng [ 0 .. 7] passwd ("NBDMAGIC")
1402798bfe00SFam Zheng [ 8 .. 15] magic (NBD_CLIENT_MAGIC)
1403798bfe00SFam Zheng [16 .. 23] size
14045f66d060SEric Blake [24 .. 27] export flags (zero-extended)
1405798bfe00SFam Zheng [28 .. 151] reserved (0)
1406798bfe00SFam Zheng
14075f66d060SEric Blake New style negotiation header, client can send options
1408798bfe00SFam Zheng [ 0 .. 7] passwd ("NBDMAGIC")
1409798bfe00SFam Zheng [ 8 .. 15] magic (NBD_OPTS_MAGIC)
1410798bfe00SFam Zheng [16 .. 17] server flags (0)
1411f37708f6SEric Blake ....options sent, ending in NBD_OPT_EXPORT_NAME or NBD_OPT_GO....
1412798bfe00SFam Zheng */
1413798bfe00SFam Zheng
14141c778ef7SDaniel P. Berrange qio_channel_set_blocking(client->ioc, false, NULL);
141506e0f098SStefan Hajnoczi qio_channel_set_follow_coroutine_ctx(client->ioc, true);
1416798bfe00SFam Zheng
14179588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_begin();
1418798bfe00SFam Zheng memcpy(buf, "NBDMAGIC", 8);
1419f95910feSDaniel P. Berrange
142076ff081dSVladimir Sementsov-Ogievskiy stq_be_p(buf + 8, NBD_OPTS_MAGIC);
142176ff081dSVladimir Sementsov-Ogievskiy stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
142276ff081dSVladimir Sementsov-Ogievskiy
1423*efd3dda3SEric Blake /*
1424*efd3dda3SEric Blake * Be silent about failure to write our greeting: there is nothing
1425*efd3dda3SEric Blake * wrong with a client testing if our port is alive.
1426*efd3dda3SEric Blake */
1427*efd3dda3SEric Blake if (nbd_write(client->ioc, buf, 18, NULL) < 0) {
1428*efd3dda3SEric Blake return 1;
1429798bfe00SFam Zheng }
1430dbb38caaSEric Blake ret = nbd_negotiate_options(client, errp);
14312e5c9ad6SVladimir Sementsov-Ogievskiy if (ret != 0) {
14322fd2c840SVladimir Sementsov-Ogievskiy if (ret < 0) {
14332fd2c840SVladimir Sementsov-Ogievskiy error_prepend(errp, "option negotiation failed: ");
14342fd2c840SVladimir Sementsov-Ogievskiy }
14352e5c9ad6SVladimir Sementsov-Ogievskiy return ret;
1436798bfe00SFam Zheng }
1437798bfe00SFam Zheng
14380cfae925SVladimir Sementsov-Ogievskiy assert(!client->optlen);
14399588463eSVladimir Sementsov-Ogievskiy trace_nbd_negotiate_success();
1440d9faeed8SVladimir Sementsov-Ogievskiy
1441d9faeed8SVladimir Sementsov-Ogievskiy return 0;
1442798bfe00SFam Zheng }
1443798bfe00SFam Zheng
1444f148ae7dSSergio Lopez /* nbd_read_eof
1445f148ae7dSSergio Lopez * Tries to read @size bytes from @ioc. This is a local implementation of
1446f148ae7dSSergio Lopez * qio_channel_readv_all_eof. We have it here because we need it to be
1447f148ae7dSSergio Lopez * interruptible and to know when the coroutine is yielding.
1448f148ae7dSSergio Lopez * Returns 1 on success
1449f148ae7dSSergio Lopez * 0 on eof, when no data was read (errp is not set)
1450f148ae7dSSergio Lopez * negative errno on failure (errp is set)
1451f148ae7dSSergio Lopez */
1452f148ae7dSSergio Lopez static inline int coroutine_fn
nbd_read_eof(NBDClient * client,void * buffer,size_t size,Error ** errp)1453f148ae7dSSergio Lopez nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
1454f148ae7dSSergio Lopez {
1455f148ae7dSSergio Lopez bool partial = false;
1456f148ae7dSSergio Lopez
1457f148ae7dSSergio Lopez assert(size);
1458f148ae7dSSergio Lopez while (size > 0) {
1459f148ae7dSSergio Lopez struct iovec iov = { .iov_base = buffer, .iov_len = size };
1460f148ae7dSSergio Lopez ssize_t len;
1461f148ae7dSSergio Lopez
1462f148ae7dSSergio Lopez len = qio_channel_readv(client->ioc, &iov, 1, errp);
1463f148ae7dSSergio Lopez if (len == QIO_CHANNEL_ERR_BLOCK) {
14647075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
1465f148ae7dSSergio Lopez client->read_yielding = true;
14667075d235SStefan Hajnoczi
14677075d235SStefan Hajnoczi /* Prompt main loop thread to re-run nbd_drained_poll() */
14687075d235SStefan Hajnoczi aio_wait_kick();
14697075d235SStefan Hajnoczi }
1470f148ae7dSSergio Lopez qio_channel_yield(client->ioc, G_IO_IN);
14717075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
1472f148ae7dSSergio Lopez client->read_yielding = false;
1473f148ae7dSSergio Lopez if (client->quiescing) {
1474f148ae7dSSergio Lopez return -EAGAIN;
1475f148ae7dSSergio Lopez }
14767075d235SStefan Hajnoczi }
1477f148ae7dSSergio Lopez continue;
1478f148ae7dSSergio Lopez } else if (len < 0) {
1479f148ae7dSSergio Lopez return -EIO;
1480f148ae7dSSergio Lopez } else if (len == 0) {
1481f148ae7dSSergio Lopez if (partial) {
1482f148ae7dSSergio Lopez error_setg(errp,
1483f148ae7dSSergio Lopez "Unexpected end-of-file before all bytes were read");
1484f148ae7dSSergio Lopez return -EIO;
1485f148ae7dSSergio Lopez } else {
1486f148ae7dSSergio Lopez return 0;
1487f148ae7dSSergio Lopez }
1488f148ae7dSSergio Lopez }
1489f148ae7dSSergio Lopez
1490f148ae7dSSergio Lopez partial = true;
1491f148ae7dSSergio Lopez size -= len;
1492f148ae7dSSergio Lopez buffer = (uint8_t *) buffer + len;
1493f148ae7dSSergio Lopez }
1494f148ae7dSSergio Lopez return 1;
1495f148ae7dSSergio Lopez }
1496f148ae7dSSergio Lopez
nbd_receive_request(NBDClient * client,NBDRequest * request,Error ** errp)1497d2223cddSPaolo Bonzini static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
14982fd2c840SVladimir Sementsov-Ogievskiy Error **errp)
1499798bfe00SFam Zheng {
1500c8720ca0SEric Blake uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
1501c8720ca0SEric Blake uint32_t magic, expect;
1502a0dc63a6SVladimir Sementsov-Ogievskiy int ret;
1503c8720ca0SEric Blake size_t size = client->mode >= NBD_MODE_EXTENDED ?
1504c8720ca0SEric Blake NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE;
1505798bfe00SFam Zheng
1506c8720ca0SEric Blake ret = nbd_read_eof(client, buf, size, errp);
1507798bfe00SFam Zheng if (ret < 0) {
1508798bfe00SFam Zheng return ret;
1509798bfe00SFam Zheng }
15101644ccceSEric Blake if (ret == 0) {
15111644ccceSEric Blake return -EIO;
15121644ccceSEric Blake }
1513798bfe00SFam Zheng
1514c8720ca0SEric Blake /*
1515c8720ca0SEric Blake * Compact request
1516c8720ca0SEric Blake * [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
1517c8720ca0SEric Blake * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
1518c8720ca0SEric Blake * [ 6 .. 7] type (NBD_CMD_READ, ...)
1519c8720ca0SEric Blake * [ 8 .. 15] cookie
1520c8720ca0SEric Blake * [16 .. 23] from
1521c8720ca0SEric Blake * [24 .. 27] len
1522c8720ca0SEric Blake * Extended request
1523c8720ca0SEric Blake * [ 0 .. 3] magic (NBD_EXTENDED_REQUEST_MAGIC)
1524c8720ca0SEric Blake * [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...)
1525c8720ca0SEric Blake * [ 6 .. 7] type (NBD_CMD_READ, ...)
1526c8720ca0SEric Blake * [ 8 .. 15] cookie
1527c8720ca0SEric Blake * [16 .. 23] from
1528c8720ca0SEric Blake * [24 .. 31] len
1529798bfe00SFam Zheng */
1530798bfe00SFam Zheng
1531773dce3cSPeter Maydell magic = ldl_be_p(buf);
1532b626b51aSEric Blake request->flags = lduw_be_p(buf + 4);
1533b626b51aSEric Blake request->type = lduw_be_p(buf + 6);
153422efd811SEric Blake request->cookie = ldq_be_p(buf + 8);
1535773dce3cSPeter Maydell request->from = ldq_be_p(buf + 16);
1536c8720ca0SEric Blake if (client->mode >= NBD_MODE_EXTENDED) {
1537c8720ca0SEric Blake request->len = ldq_be_p(buf + 24);
1538c8720ca0SEric Blake expect = NBD_EXTENDED_REQUEST_MAGIC;
1539c8720ca0SEric Blake } else {
1540b2578459SEric Blake request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
1541c8720ca0SEric Blake expect = NBD_REQUEST_MAGIC;
1542c8720ca0SEric Blake }
1543798bfe00SFam Zheng
15449588463eSVladimir Sementsov-Ogievskiy trace_nbd_receive_request(magic, request->flags, request->type,
15459588463eSVladimir Sementsov-Ogievskiy request->from, request->len);
1546798bfe00SFam Zheng
1547c8720ca0SEric Blake if (magic != expect) {
1548c8720ca0SEric Blake error_setg(errp, "invalid magic (got 0x%" PRIx32 ", expected 0x%"
1549c8720ca0SEric Blake PRIx32 ")", magic, expect);
1550798bfe00SFam Zheng return -EINVAL;
1551798bfe00SFam Zheng }
1552798bfe00SFam Zheng return 0;
1553798bfe00SFam Zheng }
1554798bfe00SFam Zheng
1555798bfe00SFam Zheng #define MAX_NBD_REQUESTS 16
1556798bfe00SFam Zheng
1557f816310dSStefan Hajnoczi /* Runs in export AioContext and main loop thread */
nbd_client_get(NBDClient * client)1558798bfe00SFam Zheng void nbd_client_get(NBDClient *client)
1559798bfe00SFam Zheng {
1560f816310dSStefan Hajnoczi qatomic_inc(&client->refcount);
1561798bfe00SFam Zheng }
1562798bfe00SFam Zheng
nbd_client_put(NBDClient * client)1563798bfe00SFam Zheng void nbd_client_put(NBDClient *client)
1564798bfe00SFam Zheng {
1565f816310dSStefan Hajnoczi assert(qemu_in_main_thread());
1566f816310dSStefan Hajnoczi
1567f816310dSStefan Hajnoczi if (qatomic_fetch_dec(&client->refcount) == 1) {
1568798bfe00SFam Zheng /* The last reference should be dropped by client->close,
1569798bfe00SFam Zheng * which is called by client_close.
1570798bfe00SFam Zheng */
1571798bfe00SFam Zheng assert(client->closing);
1572798bfe00SFam Zheng
15731c778ef7SDaniel P. Berrange object_unref(OBJECT(client->sioc));
15741c778ef7SDaniel P. Berrange object_unref(OBJECT(client->ioc));
1575f95910feSDaniel P. Berrange if (client->tlscreds) {
1576f95910feSDaniel P. Berrange object_unref(OBJECT(client->tlscreds));
1577f95910feSDaniel P. Berrange }
1578b25e12daSDaniel P. Berrange g_free(client->tlsauthz);
1579798bfe00SFam Zheng if (client->exp) {
1580798bfe00SFam Zheng QTAILQ_REMOVE(&client->exp->clients, client, next);
1581c69de1beSKevin Wolf blk_exp_unref(&client->exp->common);
1582798bfe00SFam Zheng }
1583fd358d83SEric Blake g_free(client->contexts.bitmaps);
15847075d235SStefan Hajnoczi qemu_mutex_destroy(&client->lock);
1585798bfe00SFam Zheng g_free(client);
1586798bfe00SFam Zheng }
1587798bfe00SFam Zheng }
1588798bfe00SFam Zheng
1589f816310dSStefan Hajnoczi /*
1590f816310dSStefan Hajnoczi * Tries to release the reference to @client, but only if other references
1591f816310dSStefan Hajnoczi * remain. This is an optimization for the common case where we want to avoid
1592f816310dSStefan Hajnoczi * the expense of scheduling nbd_client_put() in the main loop thread.
1593f816310dSStefan Hajnoczi *
1594f816310dSStefan Hajnoczi * Returns true upon success or false if the reference was not released because
1595f816310dSStefan Hajnoczi * it is the last reference.
1596f816310dSStefan Hajnoczi */
nbd_client_put_nonzero(NBDClient * client)1597f816310dSStefan Hajnoczi static bool nbd_client_put_nonzero(NBDClient *client)
1598f816310dSStefan Hajnoczi {
1599f816310dSStefan Hajnoczi int old = qatomic_read(&client->refcount);
1600f816310dSStefan Hajnoczi int expected;
1601f816310dSStefan Hajnoczi
1602f816310dSStefan Hajnoczi do {
1603f816310dSStefan Hajnoczi if (old == 1) {
1604f816310dSStefan Hajnoczi return false;
1605f816310dSStefan Hajnoczi }
1606f816310dSStefan Hajnoczi
1607f816310dSStefan Hajnoczi expected = old;
1608f816310dSStefan Hajnoczi old = qatomic_cmpxchg(&client->refcount, expected, expected - 1);
1609f816310dSStefan Hajnoczi } while (old != expected);
1610f816310dSStefan Hajnoczi
1611f816310dSStefan Hajnoczi return true;
1612f816310dSStefan Hajnoczi }
1613f816310dSStefan Hajnoczi
client_close(NBDClient * client,bool negotiated)16140c9390d9SEric Blake static void client_close(NBDClient *client, bool negotiated)
1615798bfe00SFam Zheng {
1616f816310dSStefan Hajnoczi assert(qemu_in_main_thread());
1617f816310dSStefan Hajnoczi
16187075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
1619798bfe00SFam Zheng if (client->closing) {
1620798bfe00SFam Zheng return;
1621798bfe00SFam Zheng }
1622798bfe00SFam Zheng
1623798bfe00SFam Zheng client->closing = true;
16247075d235SStefan Hajnoczi }
1625798bfe00SFam Zheng
1626798bfe00SFam Zheng /* Force requests to finish. They will drop their own references,
1627798bfe00SFam Zheng * then we'll close the socket and free the NBDClient.
1628798bfe00SFam Zheng */
16291c778ef7SDaniel P. Berrange qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
16301c778ef7SDaniel P. Berrange NULL);
1631798bfe00SFam Zheng
1632798bfe00SFam Zheng /* Also tell the client, so that they release their reference. */
16330c9390d9SEric Blake if (client->close_fn) {
16340c9390d9SEric Blake client->close_fn(client, negotiated);
1635798bfe00SFam Zheng }
1636798bfe00SFam Zheng }
1637798bfe00SFam Zheng
16387075d235SStefan Hajnoczi /* Runs in export AioContext with client->lock held */
nbd_request_get(NBDClient * client)1639315f78abSEric Blake static NBDRequestData *nbd_request_get(NBDClient *client)
1640798bfe00SFam Zheng {
1641315f78abSEric Blake NBDRequestData *req;
1642798bfe00SFam Zheng
1643798bfe00SFam Zheng assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
1644798bfe00SFam Zheng client->nb_requests++;
1645798bfe00SFam Zheng
1646315f78abSEric Blake req = g_new0(NBDRequestData, 1);
1647798bfe00SFam Zheng req->client = client;
1648798bfe00SFam Zheng return req;
1649798bfe00SFam Zheng }
1650798bfe00SFam Zheng
16517075d235SStefan Hajnoczi /* Runs in export AioContext with client->lock held */
nbd_request_put(NBDRequestData * req)1652315f78abSEric Blake static void nbd_request_put(NBDRequestData *req)
1653798bfe00SFam Zheng {
1654798bfe00SFam Zheng NBDClient *client = req->client;
1655798bfe00SFam Zheng
1656798bfe00SFam Zheng if (req->data) {
1657798bfe00SFam Zheng qemu_vfree(req->data);
1658798bfe00SFam Zheng }
1659798bfe00SFam Zheng g_free(req);
1660798bfe00SFam Zheng
1661798bfe00SFam Zheng client->nb_requests--;
1662fd6afc50SSergio Lopez
1663fd6afc50SSergio Lopez if (client->quiescing && client->nb_requests == 0) {
1664fd6afc50SSergio Lopez aio_wait_kick();
1665fd6afc50SSergio Lopez }
1666fd6afc50SSergio Lopez
1667ff82911cSPaolo Bonzini nbd_client_receive_next_request(client);
1668798bfe00SFam Zheng }
1669798bfe00SFam Zheng
blk_aio_attached(AioContext * ctx,void * opaque)1670798bfe00SFam Zheng static void blk_aio_attached(AioContext *ctx, void *opaque)
1671798bfe00SFam Zheng {
1672798bfe00SFam Zheng NBDExport *exp = opaque;
1673798bfe00SFam Zheng NBDClient *client;
1674798bfe00SFam Zheng
16757075d235SStefan Hajnoczi assert(qemu_in_main_thread());
16767075d235SStefan Hajnoczi
16779588463eSVladimir Sementsov-Ogievskiy trace_nbd_blk_aio_attached(exp->name, ctx);
1678798bfe00SFam Zheng
16798612c686SKevin Wolf exp->common.ctx = ctx;
1680798bfe00SFam Zheng
1681798bfe00SFam Zheng QTAILQ_FOREACH(client, &exp->clients, next) {
16827075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
1683fd6afc50SSergio Lopez assert(client->nb_requests == 0);
1684f148ae7dSSergio Lopez assert(client->recv_coroutine == NULL);
1685f148ae7dSSergio Lopez assert(client->send_coroutine == NULL);
1686798bfe00SFam Zheng }
1687798bfe00SFam Zheng }
16887075d235SStefan Hajnoczi }
1689798bfe00SFam Zheng
blk_aio_detach(void * opaque)1690798bfe00SFam Zheng static void blk_aio_detach(void *opaque)
1691798bfe00SFam Zheng {
1692798bfe00SFam Zheng NBDExport *exp = opaque;
1693798bfe00SFam Zheng
16947075d235SStefan Hajnoczi assert(qemu_in_main_thread());
16957075d235SStefan Hajnoczi
16968612c686SKevin Wolf trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
1697798bfe00SFam Zheng
16988612c686SKevin Wolf exp->common.ctx = NULL;
1699798bfe00SFam Zheng }
1700798bfe00SFam Zheng
nbd_drained_begin(void * opaque)1701fd6afc50SSergio Lopez static void nbd_drained_begin(void *opaque)
1702fd6afc50SSergio Lopez {
1703fd6afc50SSergio Lopez NBDExport *exp = opaque;
1704fd6afc50SSergio Lopez NBDClient *client;
1705fd6afc50SSergio Lopez
17067075d235SStefan Hajnoczi assert(qemu_in_main_thread());
17077075d235SStefan Hajnoczi
1708fd6afc50SSergio Lopez QTAILQ_FOREACH(client, &exp->clients, next) {
17097075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
1710fd6afc50SSergio Lopez client->quiescing = true;
1711fd6afc50SSergio Lopez }
1712fd6afc50SSergio Lopez }
17137075d235SStefan Hajnoczi }
1714fd6afc50SSergio Lopez
nbd_drained_end(void * opaque)1715fd6afc50SSergio Lopez static void nbd_drained_end(void *opaque)
1716fd6afc50SSergio Lopez {
1717fd6afc50SSergio Lopez NBDExport *exp = opaque;
1718fd6afc50SSergio Lopez NBDClient *client;
1719fd6afc50SSergio Lopez
17207075d235SStefan Hajnoczi assert(qemu_in_main_thread());
17217075d235SStefan Hajnoczi
1722fd6afc50SSergio Lopez QTAILQ_FOREACH(client, &exp->clients, next) {
17237075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
1724fd6afc50SSergio Lopez client->quiescing = false;
1725fd6afc50SSergio Lopez nbd_client_receive_next_request(client);
1726fd6afc50SSergio Lopez }
1727fd6afc50SSergio Lopez }
17287075d235SStefan Hajnoczi }
17297075d235SStefan Hajnoczi
17307075d235SStefan Hajnoczi /* Runs in export AioContext */
nbd_wake_read_bh(void * opaque)17317075d235SStefan Hajnoczi static void nbd_wake_read_bh(void *opaque)
17327075d235SStefan Hajnoczi {
17337075d235SStefan Hajnoczi NBDClient *client = opaque;
17347075d235SStefan Hajnoczi qio_channel_wake_read(client->ioc);
17357075d235SStefan Hajnoczi }
1736fd6afc50SSergio Lopez
nbd_drained_poll(void * opaque)1737fd6afc50SSergio Lopez static bool nbd_drained_poll(void *opaque)
1738fd6afc50SSergio Lopez {
1739fd6afc50SSergio Lopez NBDExport *exp = opaque;
1740fd6afc50SSergio Lopez NBDClient *client;
1741fd6afc50SSergio Lopez
17427075d235SStefan Hajnoczi assert(qemu_in_main_thread());
17437075d235SStefan Hajnoczi
1744fd6afc50SSergio Lopez QTAILQ_FOREACH(client, &exp->clients, next) {
17457075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
1746fd6afc50SSergio Lopez if (client->nb_requests != 0) {
1747fd6afc50SSergio Lopez /*
1748fd6afc50SSergio Lopez * If there's a coroutine waiting for a request on nbd_read_eof()
1749fd6afc50SSergio Lopez * enter it here so we don't depend on the client to wake it up.
17507075d235SStefan Hajnoczi *
17517075d235SStefan Hajnoczi * Schedule a BH in the export AioContext to avoid missing the
17527075d235SStefan Hajnoczi * wake up due to the race between qio_channel_wake_read() and
17537075d235SStefan Hajnoczi * qio_channel_yield().
1754fd6afc50SSergio Lopez */
1755fd6afc50SSergio Lopez if (client->recv_coroutine != NULL && client->read_yielding) {
17567075d235SStefan Hajnoczi aio_bh_schedule_oneshot(nbd_export_aio_context(client->exp),
17577075d235SStefan Hajnoczi nbd_wake_read_bh, client);
1758fd6afc50SSergio Lopez }
1759fd6afc50SSergio Lopez
1760fd6afc50SSergio Lopez return true;
1761fd6afc50SSergio Lopez }
1762fd6afc50SSergio Lopez }
17637075d235SStefan Hajnoczi }
1764fd6afc50SSergio Lopez
1765fd6afc50SSergio Lopez return false;
1766fd6afc50SSergio Lopez }
1767fd6afc50SSergio Lopez
nbd_eject_notifier(Notifier * n,void * data)1768741cc431SMax Reitz static void nbd_eject_notifier(Notifier *n, void *data)
1769741cc431SMax Reitz {
1770741cc431SMax Reitz NBDExport *exp = container_of(n, NBDExport, eject_notifier);
177161bc846dSEric Blake
17727075d235SStefan Hajnoczi assert(qemu_in_main_thread());
17737075d235SStefan Hajnoczi
1774bc4ee65bSKevin Wolf blk_exp_request_shutdown(&exp->common);
1775741cc431SMax Reitz }
1776741cc431SMax Reitz
nbd_export_set_on_eject_blk(BlockExport * exp,BlockBackend * blk)17779b562c64SKevin Wolf void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
17789b562c64SKevin Wolf {
17799b562c64SKevin Wolf NBDExport *nbd_exp = container_of(exp, NBDExport, common);
17809b562c64SKevin Wolf assert(exp->drv == &blk_exp_nbd);
17819b562c64SKevin Wolf assert(nbd_exp->eject_notifier_blk == NULL);
17829b562c64SKevin Wolf
17839b562c64SKevin Wolf blk_ref(blk);
17849b562c64SKevin Wolf nbd_exp->eject_notifier_blk = blk;
17859b562c64SKevin Wolf nbd_exp->eject_notifier.notify = nbd_eject_notifier;
17869b562c64SKevin Wolf blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
17879b562c64SKevin Wolf }
17889b562c64SKevin Wolf
1789fd6afc50SSergio Lopez static const BlockDevOps nbd_block_ops = {
1790fd6afc50SSergio Lopez .drained_begin = nbd_drained_begin,
1791fd6afc50SSergio Lopez .drained_end = nbd_drained_end,
1792fd6afc50SSergio Lopez .drained_poll = nbd_drained_poll,
1793fd6afc50SSergio Lopez };
1794fd6afc50SSergio Lopez
nbd_export_create(BlockExport * blk_exp,BlockExportOptions * exp_args,Error ** errp)17955b1cb497SKevin Wolf static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
1796331170e0SKevin Wolf Error **errp)
1797798bfe00SFam Zheng {
1798a6ff7989SKevin Wolf NBDExport *exp = container_of(blk_exp, NBDExport, common);
17995b1cb497SKevin Wolf BlockExportOptionsNbd *arg = &exp_args->u.nbd;
18008461b4d6SMarkus Armbruster const char *name = arg->name ?: exp_args->node_name;
1801331170e0SKevin Wolf BlockBackend *blk = blk_exp->blk;
1802b57e4de0SKevin Wolf int64_t size;
1803331170e0SKevin Wolf uint64_t perm, shared_perm;
18045b1cb497SKevin Wolf bool readonly = !exp_args->writable;
1805e5fb29d5SVladimir Sementsov-Ogievskiy BlockDirtyBitmapOrStrList *bitmaps;
18063b1f244cSEric Blake size_t i;
1807d7086422SKevin Wolf int ret;
1808cd7fca95SKevin Wolf
1809372b69f5SKevin Wolf GLOBAL_STATE_CODE();
18105b1cb497SKevin Wolf assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
18115b1cb497SKevin Wolf
18125b1cb497SKevin Wolf if (!nbd_server_is_running()) {
18135b1cb497SKevin Wolf error_setg(errp, "NBD server not running");
18145b1cb497SKevin Wolf return -EINVAL;
18155b1cb497SKevin Wolf }
18165b1cb497SKevin Wolf
18178461b4d6SMarkus Armbruster if (strlen(name) > NBD_MAX_STRING_SIZE) {
18188461b4d6SMarkus Armbruster error_setg(errp, "export name '%s' too long", name);
18195b1cb497SKevin Wolf return -EINVAL;
18205b1cb497SKevin Wolf }
18215b1cb497SKevin Wolf
18225b1cb497SKevin Wolf if (arg->description && strlen(arg->description) > NBD_MAX_STRING_SIZE) {
18235b1cb497SKevin Wolf error_setg(errp, "description '%s' too long", arg->description);
18245b1cb497SKevin Wolf return -EINVAL;
18255b1cb497SKevin Wolf }
18265b1cb497SKevin Wolf
18278461b4d6SMarkus Armbruster if (nbd_export_find(name)) {
18288461b4d6SMarkus Armbruster error_setg(errp, "NBD server already has export named '%s'", name);
18295b1cb497SKevin Wolf return -EEXIST;
18305b1cb497SKevin Wolf }
18315b1cb497SKevin Wolf
1832331170e0SKevin Wolf size = blk_getlength(blk);
1833b57e4de0SKevin Wolf if (size < 0) {
1834b57e4de0SKevin Wolf error_setg_errno(errp, -size,
1835b57e4de0SKevin Wolf "Failed to determine the NBD export's length");
1836a6ff7989SKevin Wolf return size;
1837b57e4de0SKevin Wolf }
1838b57e4de0SKevin Wolf
18398a7ce4f9SKevin Wolf /* Don't allow resize while the NBD server is running, otherwise we don't
18408a7ce4f9SKevin Wolf * care what happens with the node. */
1841331170e0SKevin Wolf blk_get_perm(blk, &perm, &shared_perm);
1842331170e0SKevin Wolf ret = blk_set_perm(blk, perm, shared_perm & ~BLK_PERM_RESIZE, errp);
1843d7086422SKevin Wolf if (ret < 0) {
1844331170e0SKevin Wolf return ret;
1845d7086422SKevin Wolf }
1846331170e0SKevin Wolf
1847798bfe00SFam Zheng QTAILQ_INIT(&exp->clients);
18488461b4d6SMarkus Armbruster exp->name = g_strdup(name);
18495b1cb497SKevin Wolf exp->description = g_strdup(arg->description);
1850dbb38caaSEric Blake exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
1851dbb38caaSEric Blake NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
185258a6fdccSEric Blake
185358a6fdccSEric Blake if (nbd_server_max_connections() != 1) {
1854dbb38caaSEric Blake exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
1855dbb38caaSEric Blake }
185658a6fdccSEric Blake if (readonly) {
185758a6fdccSEric Blake exp->nbdflags |= NBD_FLAG_READ_ONLY;
1858dbb38caaSEric Blake } else {
1859b491dbb7SEric Blake exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
1860b491dbb7SEric Blake NBD_FLAG_SEND_FAST_ZERO);
1861dbb38caaSEric Blake }
18627596bbb3SEric Blake exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
1863798bfe00SFam Zheng
1864372b69f5SKevin Wolf bdrv_graph_rdlock_main_loop();
1865372b69f5SKevin Wolf
1866cbad81ceSEric Blake for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
18673b1f244cSEric Blake exp->nr_export_bitmaps++;
18683b1f244cSEric Blake }
18693b1f244cSEric Blake exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
18703b1f244cSEric Blake for (i = 0, bitmaps = arg->bitmaps; bitmaps;
1871e5fb29d5SVladimir Sementsov-Ogievskiy i++, bitmaps = bitmaps->next)
1872e5fb29d5SVladimir Sementsov-Ogievskiy {
1873e5fb29d5SVladimir Sementsov-Ogievskiy const char *bitmap;
1874331170e0SKevin Wolf BlockDriverState *bs = blk_bs(blk);
1875678ba275SEric Blake BdrvDirtyBitmap *bm = NULL;
1876678ba275SEric Blake
1877e5fb29d5SVladimir Sementsov-Ogievskiy switch (bitmaps->value->type) {
1878e5fb29d5SVladimir Sementsov-Ogievskiy case QTYPE_QSTRING:
1879e5fb29d5SVladimir Sementsov-Ogievskiy bitmap = bitmaps->value->u.local;
1880ee2f94caSMax Reitz while (bs) {
1881cbad81ceSEric Blake bm = bdrv_find_dirty_bitmap(bs, bitmap);
1882ee2f94caSMax Reitz if (bm != NULL) {
1883678ba275SEric Blake break;
1884678ba275SEric Blake }
1885678ba275SEric Blake
1886ee2f94caSMax Reitz bs = bdrv_filter_or_cow_bs(bs);
1887678ba275SEric Blake }
1888678ba275SEric Blake
1889678ba275SEric Blake if (bm == NULL) {
1890a6ff7989SKevin Wolf ret = -ENOENT;
1891e5fb29d5SVladimir Sementsov-Ogievskiy error_setg(errp, "Bitmap '%s' is not found",
1892e5fb29d5SVladimir Sementsov-Ogievskiy bitmaps->value->u.local);
18933b78a927SJohn Snow goto fail;
18943b78a927SJohn Snow }
18953b78a927SJohn Snow
1896dbb38caaSEric Blake if (readonly && bdrv_is_writable(bs) &&
1897678ba275SEric Blake bdrv_dirty_bitmap_enabled(bm)) {
1898a6ff7989SKevin Wolf ret = -EINVAL;
1899e5fb29d5SVladimir Sementsov-Ogievskiy error_setg(errp, "Enabled bitmap '%s' incompatible with "
1900e5fb29d5SVladimir Sementsov-Ogievskiy "readonly export", bitmap);
1901e5fb29d5SVladimir Sementsov-Ogievskiy goto fail;
1902e5fb29d5SVladimir Sementsov-Ogievskiy }
1903e5fb29d5SVladimir Sementsov-Ogievskiy break;
1904e5fb29d5SVladimir Sementsov-Ogievskiy case QTYPE_QDICT:
1905e5fb29d5SVladimir Sementsov-Ogievskiy bitmap = bitmaps->value->u.external.name;
1906e5fb29d5SVladimir Sementsov-Ogievskiy bm = block_dirty_bitmap_lookup(bitmaps->value->u.external.node,
1907e5fb29d5SVladimir Sementsov-Ogievskiy bitmap, NULL, errp);
1908e5fb29d5SVladimir Sementsov-Ogievskiy if (!bm) {
1909e5fb29d5SVladimir Sementsov-Ogievskiy ret = -ENOENT;
1910e5fb29d5SVladimir Sementsov-Ogievskiy goto fail;
1911e5fb29d5SVladimir Sementsov-Ogievskiy }
1912e5fb29d5SVladimir Sementsov-Ogievskiy break;
1913e5fb29d5SVladimir Sementsov-Ogievskiy default:
1914e5fb29d5SVladimir Sementsov-Ogievskiy abort();
1915e5fb29d5SVladimir Sementsov-Ogievskiy }
1916e5fb29d5SVladimir Sementsov-Ogievskiy
1917e5fb29d5SVladimir Sementsov-Ogievskiy assert(bm);
1918e5fb29d5SVladimir Sementsov-Ogievskiy
1919e5fb29d5SVladimir Sementsov-Ogievskiy if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
1920e5fb29d5SVladimir Sementsov-Ogievskiy ret = -EINVAL;
1921678ba275SEric Blake goto fail;
1922678ba275SEric Blake }
1923678ba275SEric Blake
19243b1f244cSEric Blake exp->export_bitmaps[i] = bm;
1925cbad81ceSEric Blake assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
1926678ba275SEric Blake }
1927678ba275SEric Blake
19283b1f244cSEric Blake /* Mark bitmaps busy in a separate loop, to simplify roll-back concerns. */
19293b1f244cSEric Blake for (i = 0; i < exp->nr_export_bitmaps; i++) {
19303b1f244cSEric Blake bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], true);
19313b1f244cSEric Blake }
19323b1f244cSEric Blake
1933dbc7b014SEric Blake exp->allocation_depth = arg->allocation_depth;
1934dbc7b014SEric Blake
1935fd6afc50SSergio Lopez /*
1936fd6afc50SSergio Lopez * We need to inhibit request queuing in the block layer to ensure we can
1937fd6afc50SSergio Lopez * be properly quiesced when entering a drained section, as our coroutines
1938fd6afc50SSergio Lopez * servicing pending requests might enter blk_pread().
1939fd6afc50SSergio Lopez */
1940fd6afc50SSergio Lopez blk_set_disable_request_queuing(blk, true);
1941fd6afc50SSergio Lopez
1942798bfe00SFam Zheng blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1943741cc431SMax Reitz
1944fd6afc50SSergio Lopez blk_set_dev_ops(blk, &nbd_block_ops, exp);
1945fd6afc50SSergio Lopez
19463fa4c765SEric Blake QTAILQ_INSERT_TAIL(&exports, exp, next);
1947c69de1beSKevin Wolf
1948372b69f5SKevin Wolf bdrv_graph_rdunlock_main_loop();
1949372b69f5SKevin Wolf
1950a6ff7989SKevin Wolf return 0;
1951798bfe00SFam Zheng
1952798bfe00SFam Zheng fail:
1953372b69f5SKevin Wolf bdrv_graph_rdunlock_main_loop();
19543b1f244cSEric Blake g_free(exp->export_bitmaps);
19553fa4c765SEric Blake g_free(exp->name);
19563fa4c765SEric Blake g_free(exp->description);
1957a6ff7989SKevin Wolf return ret;
1958798bfe00SFam Zheng }
1959798bfe00SFam Zheng
nbd_export_find(const char * name)1960798bfe00SFam Zheng NBDExport *nbd_export_find(const char *name)
1961798bfe00SFam Zheng {
1962798bfe00SFam Zheng NBDExport *exp;
1963798bfe00SFam Zheng QTAILQ_FOREACH(exp, &exports, next) {
1964798bfe00SFam Zheng if (strcmp(name, exp->name) == 0) {
1965798bfe00SFam Zheng return exp;
1966798bfe00SFam Zheng }
1967798bfe00SFam Zheng }
1968798bfe00SFam Zheng
1969798bfe00SFam Zheng return NULL;
1970798bfe00SFam Zheng }
1971798bfe00SFam Zheng
197261bc846dSEric Blake AioContext *
nbd_export_aio_context(NBDExport * exp)197361bc846dSEric Blake nbd_export_aio_context(NBDExport *exp)
197461bc846dSEric Blake {
19758612c686SKevin Wolf return exp->common.ctx;
197661bc846dSEric Blake }
197761bc846dSEric Blake
nbd_export_request_shutdown(BlockExport * blk_exp)1978bc4ee65bSKevin Wolf static void nbd_export_request_shutdown(BlockExport *blk_exp)
1979798bfe00SFam Zheng {
1980bc4ee65bSKevin Wolf NBDExport *exp = container_of(blk_exp, NBDExport, common);
1981798bfe00SFam Zheng NBDClient *client, *next;
1982798bfe00SFam Zheng
1983c69de1beSKevin Wolf blk_exp_ref(&exp->common);
19843fa4c765SEric Blake /*
1985c719573dSEric Blake * TODO: Should we expand QMP BlockExportRemoveMode enum to allow a
19863fa4c765SEric Blake * close mode that stops advertising the export to new clients but
19873fa4c765SEric Blake * still permits existing clients to run to completion? Because of
19883fa4c765SEric Blake * that possibility, nbd_export_close() can be called more than
19893fa4c765SEric Blake * once on an export.
19903fa4c765SEric Blake */
1991798bfe00SFam Zheng QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
19920c9390d9SEric Blake client_close(client, true);
1993798bfe00SFam Zheng }
19943fa4c765SEric Blake if (exp->name) {
19953fa4c765SEric Blake g_free(exp->name);
19963fa4c765SEric Blake exp->name = NULL;
19973fa4c765SEric Blake QTAILQ_REMOVE(&exports, exp, next);
19983fa4c765SEric Blake }
1999c69de1beSKevin Wolf blk_exp_unref(&exp->common);
2000798bfe00SFam Zheng }
2001798bfe00SFam Zheng
nbd_export_delete(BlockExport * blk_exp)2002c69de1beSKevin Wolf static void nbd_export_delete(BlockExport *blk_exp)
2003798bfe00SFam Zheng {
20043b1f244cSEric Blake size_t i;
2005c69de1beSKevin Wolf NBDExport *exp = container_of(blk_exp, NBDExport, common);
2006798bfe00SFam Zheng
2007798bfe00SFam Zheng assert(exp->name == NULL);
2008dbc9e94aSKevin Wolf assert(QTAILQ_EMPTY(&exp->clients));
2009dbc9e94aSKevin Wolf
2010dbc9e94aSKevin Wolf g_free(exp->description);
2011dbc9e94aSKevin Wolf exp->description = NULL;
2012798bfe00SFam Zheng
2013cd7fca95SKevin Wolf if (exp->eject_notifier_blk) {
2014741cc431SMax Reitz notifier_remove(&exp->eject_notifier);
2015cd7fca95SKevin Wolf blk_unref(exp->eject_notifier_blk);
2016cd7fca95SKevin Wolf }
201737a4f70cSKevin Wolf blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
2018798bfe00SFam Zheng blk_aio_detach, exp);
2019fd6afc50SSergio Lopez blk_set_disable_request_queuing(exp->common.blk, false);
2020798bfe00SFam Zheng
20213b1f244cSEric Blake for (i = 0; i < exp->nr_export_bitmaps; i++) {
20223b1f244cSEric Blake bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
20233d068affSVladimir Sementsov-Ogievskiy }
2024798bfe00SFam Zheng }
2025798bfe00SFam Zheng
202656ee8626SKevin Wolf const BlockExportDriver blk_exp_nbd = {
202756ee8626SKevin Wolf .type = BLOCK_EXPORT_TYPE_NBD,
2028a6ff7989SKevin Wolf .instance_size = sizeof(NBDExport),
202956ee8626SKevin Wolf .create = nbd_export_create,
2030c69de1beSKevin Wolf .delete = nbd_export_delete,
2031bc4ee65bSKevin Wolf .request_shutdown = nbd_export_request_shutdown,
203256ee8626SKevin Wolf };
203356ee8626SKevin Wolf
nbd_co_send_iov(NBDClient * client,struct iovec * iov,unsigned niov,Error ** errp)2034de79bfc3SVladimir Sementsov-Ogievskiy static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
2035de79bfc3SVladimir Sementsov-Ogievskiy unsigned niov, Error **errp)
2036de79bfc3SVladimir Sementsov-Ogievskiy {
2037de79bfc3SVladimir Sementsov-Ogievskiy int ret;
2038de79bfc3SVladimir Sementsov-Ogievskiy
2039de79bfc3SVladimir Sementsov-Ogievskiy g_assert(qemu_in_coroutine());
2040de79bfc3SVladimir Sementsov-Ogievskiy qemu_co_mutex_lock(&client->send_lock);
2041de79bfc3SVladimir Sementsov-Ogievskiy client->send_coroutine = qemu_coroutine_self();
2042de79bfc3SVladimir Sementsov-Ogievskiy
2043de79bfc3SVladimir Sementsov-Ogievskiy ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;
2044de79bfc3SVladimir Sementsov-Ogievskiy
2045de79bfc3SVladimir Sementsov-Ogievskiy client->send_coroutine = NULL;
2046de79bfc3SVladimir Sementsov-Ogievskiy qemu_co_mutex_unlock(&client->send_lock);
2047de79bfc3SVladimir Sementsov-Ogievskiy
2048de79bfc3SVladimir Sementsov-Ogievskiy return ret;
2049de79bfc3SVladimir Sementsov-Ogievskiy }
2050de79bfc3SVladimir Sementsov-Ogievskiy
set_be_simple_reply(NBDSimpleReply * reply,uint64_t error,uint64_t cookie)2051caad5384SVladimir Sementsov-Ogievskiy static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
205222efd811SEric Blake uint64_t cookie)
2053caad5384SVladimir Sementsov-Ogievskiy {
2054caad5384SVladimir Sementsov-Ogievskiy stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
2055caad5384SVladimir Sementsov-Ogievskiy stl_be_p(&reply->error, error);
205622efd811SEric Blake stq_be_p(&reply->cookie, cookie);
2057caad5384SVladimir Sementsov-Ogievskiy }
2058caad5384SVladimir Sementsov-Ogievskiy
nbd_co_send_simple_reply(NBDClient * client,NBDRequest * request,uint32_t error,void * data,uint64_t len,Error ** errp)2059d2223cddSPaolo Bonzini static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
206066d4f4feSEric Blake NBDRequest *request,
206114cea41dSVladimir Sementsov-Ogievskiy uint32_t error,
2062978df1b6SVladimir Sementsov-Ogievskiy void *data,
2063b2578459SEric Blake uint64_t len,
2064978df1b6SVladimir Sementsov-Ogievskiy Error **errp)
2065798bfe00SFam Zheng {
2066de79bfc3SVladimir Sementsov-Ogievskiy NBDSimpleReply reply;
206714cea41dSVladimir Sementsov-Ogievskiy int nbd_err = system_errno_to_nbd_errno(error);
2068de79bfc3SVladimir Sementsov-Ogievskiy struct iovec iov[] = {
2069de79bfc3SVladimir Sementsov-Ogievskiy {.iov_base = &reply, .iov_len = sizeof(reply)},
2070de79bfc3SVladimir Sementsov-Ogievskiy {.iov_base = data, .iov_len = len}
2071de79bfc3SVladimir Sementsov-Ogievskiy };
20726fb2b972SVladimir Sementsov-Ogievskiy
2073a7c8ed36SEric Blake assert(!len || !nbd_err);
2074b2578459SEric Blake assert(len <= NBD_MAX_BUFFER_SIZE);
2075ac132d05SEric Blake assert(client->mode < NBD_MODE_STRUCTURED ||
2076ac132d05SEric Blake (client->mode == NBD_MODE_STRUCTURED &&
2077ac132d05SEric Blake request->type != NBD_CMD_READ));
207822efd811SEric Blake trace_nbd_co_send_simple_reply(request->cookie, nbd_err,
207966d4f4feSEric Blake nbd_err_lookup(nbd_err), len);
208022efd811SEric Blake set_be_simple_reply(&reply, nbd_err, request->cookie);
20816fb2b972SVladimir Sementsov-Ogievskiy
2082a7c8ed36SEric Blake return nbd_co_send_iov(client, iov, 2, errp);
2083798bfe00SFam Zheng }
2084798bfe00SFam Zheng
2085a7c8ed36SEric Blake /*
2086a7c8ed36SEric Blake * Prepare the header of a reply chunk for network transmission.
2087a7c8ed36SEric Blake *
2088a7c8ed36SEric Blake * On input, @iov is partially initialized: iov[0].iov_base must point
2089a7c8ed36SEric Blake * to an uninitialized NBDReply, while the remaining @niov elements
2090a7c8ed36SEric Blake * (if any) must be ready for transmission. This function then
2091a7c8ed36SEric Blake * populates iov[0] for transmission.
2092a7c8ed36SEric Blake */
set_be_chunk(NBDClient * client,struct iovec * iov,size_t niov,uint16_t flags,uint16_t type,NBDRequest * request)2093a7c8ed36SEric Blake static inline void set_be_chunk(NBDClient *client, struct iovec *iov,
2094a7c8ed36SEric Blake size_t niov, uint16_t flags, uint16_t type,
209566d4f4feSEric Blake NBDRequest *request)
20965c54e7faSVladimir Sementsov-Ogievskiy {
2097a7c8ed36SEric Blake size_t i, length = 0;
2098a7c8ed36SEric Blake
2099a7c8ed36SEric Blake for (i = 1; i < niov; i++) {
2100a7c8ed36SEric Blake length += iov[i].iov_len;
2101a7c8ed36SEric Blake }
2102a7c8ed36SEric Blake assert(length <= NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData));
2103a7c8ed36SEric Blake
210411d3355fSEric Blake if (client->mode >= NBD_MODE_EXTENDED) {
210511d3355fSEric Blake NBDExtendedReplyChunk *chunk = iov->iov_base;
210611d3355fSEric Blake
210711d3355fSEric Blake iov[0].iov_len = sizeof(*chunk);
210811d3355fSEric Blake stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
210911d3355fSEric Blake stw_be_p(&chunk->flags, flags);
211011d3355fSEric Blake stw_be_p(&chunk->type, type);
211111d3355fSEric Blake stq_be_p(&chunk->cookie, request->cookie);
211211d3355fSEric Blake stq_be_p(&chunk->offset, request->from);
211311d3355fSEric Blake stq_be_p(&chunk->length, length);
211411d3355fSEric Blake } else {
211511d3355fSEric Blake NBDStructuredReplyChunk *chunk = iov->iov_base;
211611d3355fSEric Blake
2117a7c8ed36SEric Blake iov[0].iov_len = sizeof(*chunk);
21185c54e7faSVladimir Sementsov-Ogievskiy stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
21195c54e7faSVladimir Sementsov-Ogievskiy stw_be_p(&chunk->flags, flags);
21205c54e7faSVladimir Sementsov-Ogievskiy stw_be_p(&chunk->type, type);
212122efd811SEric Blake stq_be_p(&chunk->cookie, request->cookie);
21225c54e7faSVladimir Sementsov-Ogievskiy stl_be_p(&chunk->length, length);
21235c54e7faSVladimir Sementsov-Ogievskiy }
212411d3355fSEric Blake }
21255c54e7faSVladimir Sementsov-Ogievskiy
nbd_co_send_chunk_done(NBDClient * client,NBDRequest * request,Error ** errp)2126a7c8ed36SEric Blake static int coroutine_fn nbd_co_send_chunk_done(NBDClient *client,
212766d4f4feSEric Blake NBDRequest *request,
2128ef8c887eSEric Blake Error **errp)
2129ef8c887eSEric Blake {
2130a7c8ed36SEric Blake NBDReply hdr;
2131ef8c887eSEric Blake struct iovec iov[] = {
2132a7c8ed36SEric Blake {.iov_base = &hdr},
2133ef8c887eSEric Blake };
2134ef8c887eSEric Blake
213522efd811SEric Blake trace_nbd_co_send_chunk_done(request->cookie);
2136a7c8ed36SEric Blake set_be_chunk(client, iov, 1, NBD_REPLY_FLAG_DONE,
213766d4f4feSEric Blake NBD_REPLY_TYPE_NONE, request);
2138ef8c887eSEric Blake return nbd_co_send_iov(client, iov, 1, errp);
2139ef8c887eSEric Blake }
2140ef8c887eSEric Blake
nbd_co_send_chunk_read(NBDClient * client,NBDRequest * request,uint64_t offset,void * data,uint64_t size,bool final,Error ** errp)2141a7c8ed36SEric Blake static int coroutine_fn nbd_co_send_chunk_read(NBDClient *client,
214266d4f4feSEric Blake NBDRequest *request,
21435c54e7faSVladimir Sementsov-Ogievskiy uint64_t offset,
21445c54e7faSVladimir Sementsov-Ogievskiy void *data,
2145b2578459SEric Blake uint64_t size,
2146418638d3SEric Blake bool final,
21475c54e7faSVladimir Sementsov-Ogievskiy Error **errp)
21485c54e7faSVladimir Sementsov-Ogievskiy {
2149a7c8ed36SEric Blake NBDReply hdr;
2150efdc0c10SEric Blake NBDStructuredReadData chunk;
21515c54e7faSVladimir Sementsov-Ogievskiy struct iovec iov[] = {
2152a7c8ed36SEric Blake {.iov_base = &hdr},
21535c54e7faSVladimir Sementsov-Ogievskiy {.iov_base = &chunk, .iov_len = sizeof(chunk)},
21545c54e7faSVladimir Sementsov-Ogievskiy {.iov_base = data, .iov_len = size}
21555c54e7faSVladimir Sementsov-Ogievskiy };
21565c54e7faSVladimir Sementsov-Ogievskiy
2157b2578459SEric Blake assert(size && size <= NBD_MAX_BUFFER_SIZE);
215822efd811SEric Blake trace_nbd_co_send_chunk_read(request->cookie, offset, data, size);
2159a7c8ed36SEric Blake set_be_chunk(client, iov, 3, final ? NBD_REPLY_FLAG_DONE : 0,
216066d4f4feSEric Blake NBD_REPLY_TYPE_OFFSET_DATA, request);
21615c54e7faSVladimir Sementsov-Ogievskiy stq_be_p(&chunk.offset, offset);
21625c54e7faSVladimir Sementsov-Ogievskiy
2163a7c8ed36SEric Blake return nbd_co_send_iov(client, iov, 3, errp);
21645c54e7faSVladimir Sementsov-Ogievskiy }
2165ac132d05SEric Blake
nbd_co_send_chunk_error(NBDClient * client,NBDRequest * request,uint32_t error,const char * msg,Error ** errp)2166a7c8ed36SEric Blake static int coroutine_fn nbd_co_send_chunk_error(NBDClient *client,
216766d4f4feSEric Blake NBDRequest *request,
216860ace2baSVladimir Sementsov-Ogievskiy uint32_t error,
216960ace2baSVladimir Sementsov-Ogievskiy const char *msg,
217060ace2baSVladimir Sementsov-Ogievskiy Error **errp)
217160ace2baSVladimir Sementsov-Ogievskiy {
2172a7c8ed36SEric Blake NBDReply hdr;
217360ace2baSVladimir Sementsov-Ogievskiy NBDStructuredError chunk;
217460ace2baSVladimir Sementsov-Ogievskiy int nbd_err = system_errno_to_nbd_errno(error);
217560ace2baSVladimir Sementsov-Ogievskiy struct iovec iov[] = {
2176a7c8ed36SEric Blake {.iov_base = &hdr},
217760ace2baSVladimir Sementsov-Ogievskiy {.iov_base = &chunk, .iov_len = sizeof(chunk)},
217860ace2baSVladimir Sementsov-Ogievskiy {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
217960ace2baSVladimir Sementsov-Ogievskiy };
218060ace2baSVladimir Sementsov-Ogievskiy
218160ace2baSVladimir Sementsov-Ogievskiy assert(nbd_err);
218222efd811SEric Blake trace_nbd_co_send_chunk_error(request->cookie, nbd_err,
218360ace2baSVladimir Sementsov-Ogievskiy nbd_err_lookup(nbd_err), msg ? msg : "");
2184a7c8ed36SEric Blake set_be_chunk(client, iov, 3, NBD_REPLY_FLAG_DONE,
218566d4f4feSEric Blake NBD_REPLY_TYPE_ERROR, request);
218660ace2baSVladimir Sementsov-Ogievskiy stl_be_p(&chunk.error, nbd_err);
2187a7c8ed36SEric Blake stw_be_p(&chunk.message_length, iov[2].iov_len);
218860ace2baSVladimir Sementsov-Ogievskiy
2189a7c8ed36SEric Blake return nbd_co_send_iov(client, iov, 3, errp);
219060ace2baSVladimir Sementsov-Ogievskiy }
219160ace2baSVladimir Sementsov-Ogievskiy
219237e02aebSVladimir Sementsov-Ogievskiy /* Do a sparse read and send the structured reply to the client.
2193ff7e261bSEmanuele Giuseppe Esposito * Returns -errno if sending fails. blk_co_block_status_above() failure is
219437e02aebSVladimir Sementsov-Ogievskiy * reported to the client, at which point this function succeeds.
219537e02aebSVladimir Sementsov-Ogievskiy */
nbd_co_send_sparse_read(NBDClient * client,NBDRequest * request,uint64_t offset,uint8_t * data,uint64_t size,Error ** errp)2196418638d3SEric Blake static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
219766d4f4feSEric Blake NBDRequest *request,
2198418638d3SEric Blake uint64_t offset,
2199418638d3SEric Blake uint8_t *data,
2200b2578459SEric Blake uint64_t size,
2201418638d3SEric Blake Error **errp)
2202418638d3SEric Blake {
2203418638d3SEric Blake int ret = 0;
2204418638d3SEric Blake NBDExport *exp = client->exp;
2205418638d3SEric Blake size_t progress = 0;
2206418638d3SEric Blake
2207b2578459SEric Blake assert(size <= NBD_MAX_BUFFER_SIZE);
2208418638d3SEric Blake while (progress < size) {
2209418638d3SEric Blake int64_t pnum;
2210ff7e261bSEmanuele Giuseppe Esposito int status = blk_co_block_status_above(exp->common.blk, NULL,
2211418638d3SEric Blake offset + progress,
2212418638d3SEric Blake size - progress, &pnum, NULL,
2213418638d3SEric Blake NULL);
2214e2de3256SEric Blake bool final;
2215418638d3SEric Blake
2216418638d3SEric Blake if (status < 0) {
221737e02aebSVladimir Sementsov-Ogievskiy char *msg = g_strdup_printf("unable to check for holes: %s",
221837e02aebSVladimir Sementsov-Ogievskiy strerror(-status));
221937e02aebSVladimir Sementsov-Ogievskiy
222066d4f4feSEric Blake ret = nbd_co_send_chunk_error(client, request, -status, msg, errp);
222137e02aebSVladimir Sementsov-Ogievskiy g_free(msg);
222237e02aebSVladimir Sementsov-Ogievskiy return ret;
2223418638d3SEric Blake }
2224418638d3SEric Blake assert(pnum && pnum <= size - progress);
2225e2de3256SEric Blake final = progress + pnum == size;
2226418638d3SEric Blake if (status & BDRV_BLOCK_ZERO) {
2227a7c8ed36SEric Blake NBDReply hdr;
2228418638d3SEric Blake NBDStructuredReadHole chunk;
2229418638d3SEric Blake struct iovec iov[] = {
2230a7c8ed36SEric Blake {.iov_base = &hdr},
2231418638d3SEric Blake {.iov_base = &chunk, .iov_len = sizeof(chunk)},
2232418638d3SEric Blake };
2233418638d3SEric Blake
223422efd811SEric Blake trace_nbd_co_send_chunk_read_hole(request->cookie,
223566d4f4feSEric Blake offset + progress, pnum);
2236a7c8ed36SEric Blake set_be_chunk(client, iov, 2,
2237a7c8ed36SEric Blake final ? NBD_REPLY_FLAG_DONE : 0,
223866d4f4feSEric Blake NBD_REPLY_TYPE_OFFSET_HOLE, request);
2239418638d3SEric Blake stq_be_p(&chunk.offset, offset + progress);
2240418638d3SEric Blake stl_be_p(&chunk.length, pnum);
2241a7c8ed36SEric Blake ret = nbd_co_send_iov(client, iov, 2, errp);
2242418638d3SEric Blake } else {
2243d2223cddSPaolo Bonzini ret = blk_co_pread(exp->common.blk, offset + progress, pnum,
2244a9262f55SAlberto Faria data + progress, 0);
2245418638d3SEric Blake if (ret < 0) {
2246418638d3SEric Blake error_setg_errno(errp, -ret, "reading from file failed");
2247418638d3SEric Blake break;
2248418638d3SEric Blake }
224966d4f4feSEric Blake ret = nbd_co_send_chunk_read(client, request, offset + progress,
2250a7c8ed36SEric Blake data + progress, pnum, final, errp);
2251418638d3SEric Blake }
2252418638d3SEric Blake
2253418638d3SEric Blake if (ret < 0) {
2254418638d3SEric Blake break;
2255418638d3SEric Blake }
2256418638d3SEric Blake progress += pnum;
2257418638d3SEric Blake }
2258418638d3SEric Blake return ret;
2259418638d3SEric Blake }
2260418638d3SEric Blake
226189cbc7e3SVladimir Sementsov-Ogievskiy typedef struct NBDExtentArray {
2262bcc16cc1SEric Blake NBDExtent64 *extents;
226389cbc7e3SVladimir Sementsov-Ogievskiy unsigned int nb_alloc;
226489cbc7e3SVladimir Sementsov-Ogievskiy unsigned int count;
226589cbc7e3SVladimir Sementsov-Ogievskiy uint64_t total_length;
2266bcc16cc1SEric Blake bool extended;
226789cbc7e3SVladimir Sementsov-Ogievskiy bool can_add;
226889cbc7e3SVladimir Sementsov-Ogievskiy bool converted_to_be;
226989cbc7e3SVladimir Sementsov-Ogievskiy } NBDExtentArray;
2270e7b1948dSVladimir Sementsov-Ogievskiy
nbd_extent_array_new(unsigned int nb_alloc,NBDMode mode)2271bcc16cc1SEric Blake static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc,
2272bcc16cc1SEric Blake NBDMode mode)
227389cbc7e3SVladimir Sementsov-Ogievskiy {
227489cbc7e3SVladimir Sementsov-Ogievskiy NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
227589cbc7e3SVladimir Sementsov-Ogievskiy
2276bcc16cc1SEric Blake assert(mode >= NBD_MODE_STRUCTURED);
227789cbc7e3SVladimir Sementsov-Ogievskiy ea->nb_alloc = nb_alloc;
2278bcc16cc1SEric Blake ea->extents = g_new(NBDExtent64, nb_alloc);
2279bcc16cc1SEric Blake ea->extended = mode >= NBD_MODE_EXTENDED;
228089cbc7e3SVladimir Sementsov-Ogievskiy ea->can_add = true;
228189cbc7e3SVladimir Sementsov-Ogievskiy
228289cbc7e3SVladimir Sementsov-Ogievskiy return ea;
228389cbc7e3SVladimir Sementsov-Ogievskiy }
228489cbc7e3SVladimir Sementsov-Ogievskiy
nbd_extent_array_free(NBDExtentArray * ea)228589cbc7e3SVladimir Sementsov-Ogievskiy static void nbd_extent_array_free(NBDExtentArray *ea)
228689cbc7e3SVladimir Sementsov-Ogievskiy {
228789cbc7e3SVladimir Sementsov-Ogievskiy g_free(ea->extents);
228889cbc7e3SVladimir Sementsov-Ogievskiy g_free(ea);
228989cbc7e3SVladimir Sementsov-Ogievskiy }
G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray,nbd_extent_array_free)2290e0e7fe07SMarc-André Lureau G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free)
229189cbc7e3SVladimir Sementsov-Ogievskiy
229289cbc7e3SVladimir Sementsov-Ogievskiy /* Further modifications of the array after conversion are abandoned */
229389cbc7e3SVladimir Sementsov-Ogievskiy static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
229489cbc7e3SVladimir Sementsov-Ogievskiy {
229589cbc7e3SVladimir Sementsov-Ogievskiy int i;
229689cbc7e3SVladimir Sementsov-Ogievskiy
229789cbc7e3SVladimir Sementsov-Ogievskiy assert(!ea->converted_to_be);
2298bcc16cc1SEric Blake assert(ea->extended);
229989cbc7e3SVladimir Sementsov-Ogievskiy ea->can_add = false;
230089cbc7e3SVladimir Sementsov-Ogievskiy ea->converted_to_be = true;
230189cbc7e3SVladimir Sementsov-Ogievskiy
230289cbc7e3SVladimir Sementsov-Ogievskiy for (i = 0; i < ea->count; i++) {
2303bcc16cc1SEric Blake ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
2304bcc16cc1SEric Blake ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
230589cbc7e3SVladimir Sementsov-Ogievskiy }
230689cbc7e3SVladimir Sementsov-Ogievskiy }
230789cbc7e3SVladimir Sementsov-Ogievskiy
2308bcc16cc1SEric Blake /* Further modifications of the array after conversion are abandoned */
nbd_extent_array_convert_to_narrow(NBDExtentArray * ea)2309bcc16cc1SEric Blake static NBDExtent32 *nbd_extent_array_convert_to_narrow(NBDExtentArray *ea)
2310bcc16cc1SEric Blake {
2311bcc16cc1SEric Blake int i;
2312bcc16cc1SEric Blake NBDExtent32 *extents = g_new(NBDExtent32, ea->count);
2313bcc16cc1SEric Blake
2314bcc16cc1SEric Blake assert(!ea->converted_to_be);
2315bcc16cc1SEric Blake assert(!ea->extended);
2316bcc16cc1SEric Blake ea->can_add = false;
2317bcc16cc1SEric Blake ea->converted_to_be = true;
2318bcc16cc1SEric Blake
2319bcc16cc1SEric Blake for (i = 0; i < ea->count; i++) {
2320bcc16cc1SEric Blake assert((ea->extents[i].length | ea->extents[i].flags) <= UINT32_MAX);
2321bcc16cc1SEric Blake extents[i].length = cpu_to_be32(ea->extents[i].length);
2322bcc16cc1SEric Blake extents[i].flags = cpu_to_be32(ea->extents[i].flags);
2323bcc16cc1SEric Blake }
2324bcc16cc1SEric Blake
2325bcc16cc1SEric Blake return extents;
2326bcc16cc1SEric Blake }
2327bcc16cc1SEric Blake
232889cbc7e3SVladimir Sementsov-Ogievskiy /*
232989cbc7e3SVladimir Sementsov-Ogievskiy * Add extent to NBDExtentArray. If extent can't be added (no available space),
233089cbc7e3SVladimir Sementsov-Ogievskiy * return -1.
233189cbc7e3SVladimir Sementsov-Ogievskiy * For safety, when returning -1 for the first time, .can_add is set to false,
2332314b9026SEric Blake * and further calls to nbd_extent_array_add() will crash.
2333314b9026SEric Blake * (this avoids the situation where a caller ignores failure to add one extent,
2334314b9026SEric Blake * where adding another extent that would squash into the last array entry
2335314b9026SEric Blake * would result in an incorrect range reported to the client)
233689cbc7e3SVladimir Sementsov-Ogievskiy */
nbd_extent_array_add(NBDExtentArray * ea,uint64_t length,uint32_t flags)233789cbc7e3SVladimir Sementsov-Ogievskiy static int nbd_extent_array_add(NBDExtentArray *ea,
2338bcc16cc1SEric Blake uint64_t length, uint32_t flags)
233989cbc7e3SVladimir Sementsov-Ogievskiy {
234089cbc7e3SVladimir Sementsov-Ogievskiy assert(ea->can_add);
234189cbc7e3SVladimir Sementsov-Ogievskiy
234289cbc7e3SVladimir Sementsov-Ogievskiy if (!length) {
234389cbc7e3SVladimir Sementsov-Ogievskiy return 0;
234489cbc7e3SVladimir Sementsov-Ogievskiy }
2345bcc16cc1SEric Blake if (!ea->extended) {
2346bcc16cc1SEric Blake assert(length <= UINT32_MAX);
2347bcc16cc1SEric Blake }
234889cbc7e3SVladimir Sementsov-Ogievskiy
234989cbc7e3SVladimir Sementsov-Ogievskiy /* Extend previous extent if flags are the same */
235089cbc7e3SVladimir Sementsov-Ogievskiy if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
2351bcc16cc1SEric Blake uint64_t sum = length + ea->extents[ea->count - 1].length;
235289cbc7e3SVladimir Sementsov-Ogievskiy
2353bcc16cc1SEric Blake /*
2354bcc16cc1SEric Blake * sum cannot overflow: the block layer bounds image size at
2355bcc16cc1SEric Blake * 2^63, and ea->extents[].length comes from the block layer.
2356bcc16cc1SEric Blake */
2357bcc16cc1SEric Blake assert(sum >= length);
2358bcc16cc1SEric Blake if (sum <= UINT32_MAX || ea->extended) {
235989cbc7e3SVladimir Sementsov-Ogievskiy ea->extents[ea->count - 1].length = sum;
236089cbc7e3SVladimir Sementsov-Ogievskiy ea->total_length += length;
236189cbc7e3SVladimir Sementsov-Ogievskiy return 0;
236289cbc7e3SVladimir Sementsov-Ogievskiy }
236389cbc7e3SVladimir Sementsov-Ogievskiy }
236489cbc7e3SVladimir Sementsov-Ogievskiy
236589cbc7e3SVladimir Sementsov-Ogievskiy if (ea->count >= ea->nb_alloc) {
236689cbc7e3SVladimir Sementsov-Ogievskiy ea->can_add = false;
236789cbc7e3SVladimir Sementsov-Ogievskiy return -1;
236889cbc7e3SVladimir Sementsov-Ogievskiy }
236989cbc7e3SVladimir Sementsov-Ogievskiy
237089cbc7e3SVladimir Sementsov-Ogievskiy ea->total_length += length;
2371bcc16cc1SEric Blake ea->extents[ea->count] = (NBDExtent64) {.length = length, .flags = flags};
237289cbc7e3SVladimir Sementsov-Ogievskiy ea->count++;
237389cbc7e3SVladimir Sementsov-Ogievskiy
237489cbc7e3SVladimir Sementsov-Ogievskiy return 0;
237589cbc7e3SVladimir Sementsov-Ogievskiy }
237689cbc7e3SVladimir Sementsov-Ogievskiy
blockstatus_to_extents(BlockBackend * blk,uint64_t offset,uint64_t bytes,NBDExtentArray * ea)2377ff7e261bSEmanuele Giuseppe Esposito static int coroutine_fn blockstatus_to_extents(BlockBackend *blk,
23786f58ac55SEmanuele Giuseppe Esposito uint64_t offset, uint64_t bytes,
23796f58ac55SEmanuele Giuseppe Esposito NBDExtentArray *ea)
238089cbc7e3SVladimir Sementsov-Ogievskiy {
238189cbc7e3SVladimir Sementsov-Ogievskiy while (bytes) {
2382e7b1948dSVladimir Sementsov-Ogievskiy uint32_t flags;
2383e7b1948dSVladimir Sementsov-Ogievskiy int64_t num;
2384ff7e261bSEmanuele Giuseppe Esposito int ret = blk_co_block_status_above(blk, NULL, offset, bytes, &num,
238589cbc7e3SVladimir Sementsov-Ogievskiy NULL, NULL);
2386fb7afc79SVladimir Sementsov-Ogievskiy
2387e7b1948dSVladimir Sementsov-Ogievskiy if (ret < 0) {
2388e7b1948dSVladimir Sementsov-Ogievskiy return ret;
2389e7b1948dSVladimir Sementsov-Ogievskiy }
2390e7b1948dSVladimir Sementsov-Ogievskiy
23910da98568SNir Soffer flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
2392e7b1948dSVladimir Sementsov-Ogievskiy (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
2393e7b1948dSVladimir Sementsov-Ogievskiy
239489cbc7e3SVladimir Sementsov-Ogievskiy if (nbd_extent_array_add(ea, num, flags) < 0) {
239589cbc7e3SVladimir Sementsov-Ogievskiy return 0;
2396e7b1948dSVladimir Sementsov-Ogievskiy }
2397e7b1948dSVladimir Sementsov-Ogievskiy
23982178a569SEric Blake offset += num;
239989cbc7e3SVladimir Sementsov-Ogievskiy bytes -= num;
2400e7b1948dSVladimir Sementsov-Ogievskiy }
2401e7b1948dSVladimir Sementsov-Ogievskiy
2402e7b1948dSVladimir Sementsov-Ogievskiy return 0;
2403e7b1948dSVladimir Sementsov-Ogievskiy }
2404e7b1948dSVladimir Sementsov-Ogievskiy
blockalloc_to_extents(BlockBackend * blk,uint64_t offset,uint64_t bytes,NBDExtentArray * ea)2405ff7e261bSEmanuele Giuseppe Esposito static int coroutine_fn blockalloc_to_extents(BlockBackend *blk,
24066f58ac55SEmanuele Giuseppe Esposito uint64_t offset, uint64_t bytes,
24076f58ac55SEmanuele Giuseppe Esposito NBDExtentArray *ea)
240871719cd5SEric Blake {
240971719cd5SEric Blake while (bytes) {
241071719cd5SEric Blake int64_t num;
2411ff7e261bSEmanuele Giuseppe Esposito int ret = blk_co_is_allocated_above(blk, NULL, false, offset, bytes,
241271719cd5SEric Blake &num);
241371719cd5SEric Blake
241471719cd5SEric Blake if (ret < 0) {
241571719cd5SEric Blake return ret;
241671719cd5SEric Blake }
241771719cd5SEric Blake
241871719cd5SEric Blake if (nbd_extent_array_add(ea, num, ret) < 0) {
241971719cd5SEric Blake return 0;
242071719cd5SEric Blake }
242171719cd5SEric Blake
242271719cd5SEric Blake offset += num;
242371719cd5SEric Blake bytes -= num;
242471719cd5SEric Blake }
242571719cd5SEric Blake
242671719cd5SEric Blake return 0;
242771719cd5SEric Blake }
242871719cd5SEric Blake
242989cbc7e3SVladimir Sementsov-Ogievskiy /*
243089cbc7e3SVladimir Sementsov-Ogievskiy * nbd_co_send_extents
24313d068affSVladimir Sementsov-Ogievskiy *
243289cbc7e3SVladimir Sementsov-Ogievskiy * @ea is converted to BE by the function
243389cbc7e3SVladimir Sementsov-Ogievskiy * @last controls whether NBD_REPLY_FLAG_DONE is sent.
24343d068affSVladimir Sementsov-Ogievskiy */
2435d2223cddSPaolo Bonzini static int coroutine_fn
nbd_co_send_extents(NBDClient * client,NBDRequest * request,NBDExtentArray * ea,bool last,uint32_t context_id,Error ** errp)243666d4f4feSEric Blake nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea,
243789cbc7e3SVladimir Sementsov-Ogievskiy bool last, uint32_t context_id, Error **errp)
2438e7b1948dSVladimir Sementsov-Ogievskiy {
2439a7c8ed36SEric Blake NBDReply hdr;
2440bcc16cc1SEric Blake NBDStructuredMeta meta;
2441bcc16cc1SEric Blake NBDExtendedMeta meta_ext;
2442bcc16cc1SEric Blake g_autofree NBDExtent32 *extents = NULL;
2443bcc16cc1SEric Blake uint16_t type;
2444bcc16cc1SEric Blake struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} };
2445bcc16cc1SEric Blake
2446bcc16cc1SEric Blake if (client->mode >= NBD_MODE_EXTENDED) {
2447bcc16cc1SEric Blake type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
2448bcc16cc1SEric Blake
2449bcc16cc1SEric Blake iov[1].iov_base = &meta_ext;
2450bcc16cc1SEric Blake iov[1].iov_len = sizeof(meta_ext);
2451bcc16cc1SEric Blake stl_be_p(&meta_ext.context_id, context_id);
2452bcc16cc1SEric Blake stl_be_p(&meta_ext.count, ea->count);
2453e7b1948dSVladimir Sementsov-Ogievskiy
245489cbc7e3SVladimir Sementsov-Ogievskiy nbd_extent_array_convert_to_be(ea);
2455bcc16cc1SEric Blake iov[2].iov_base = ea->extents;
2456bcc16cc1SEric Blake iov[2].iov_len = ea->count * sizeof(ea->extents[0]);
2457bcc16cc1SEric Blake } else {
2458bcc16cc1SEric Blake type = NBD_REPLY_TYPE_BLOCK_STATUS;
2459bcc16cc1SEric Blake
2460bcc16cc1SEric Blake iov[1].iov_base = &meta;
2461bcc16cc1SEric Blake iov[1].iov_len = sizeof(meta);
2462bcc16cc1SEric Blake stl_be_p(&meta.context_id, context_id);
2463bcc16cc1SEric Blake
2464bcc16cc1SEric Blake extents = nbd_extent_array_convert_to_narrow(ea);
2465bcc16cc1SEric Blake iov[2].iov_base = extents;
2466bcc16cc1SEric Blake iov[2].iov_len = ea->count * sizeof(extents[0]);
2467bcc16cc1SEric Blake }
246889cbc7e3SVladimir Sementsov-Ogievskiy
246922efd811SEric Blake trace_nbd_co_send_extents(request->cookie, ea->count, context_id,
247066d4f4feSEric Blake ea->total_length, last);
2471bcc16cc1SEric Blake set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, type,
2472bcc16cc1SEric Blake request);
2473e7b1948dSVladimir Sementsov-Ogievskiy
2474a7c8ed36SEric Blake return nbd_co_send_iov(client, iov, 3, errp);
2475e7b1948dSVladimir Sementsov-Ogievskiy }
2476e7b1948dSVladimir Sementsov-Ogievskiy
2477e7b1948dSVladimir Sementsov-Ogievskiy /* Get block status from the exported device and send it to the client */
24786f58ac55SEmanuele Giuseppe Esposito static int
nbd_co_send_block_status(NBDClient * client,NBDRequest * request,BlockBackend * blk,uint64_t offset,uint64_t length,bool dont_fragment,bool last,uint32_t context_id,Error ** errp)247966d4f4feSEric Blake coroutine_fn nbd_co_send_block_status(NBDClient *client, NBDRequest *request,
2480ff7e261bSEmanuele Giuseppe Esposito BlockBackend *blk, uint64_t offset,
2481bcc16cc1SEric Blake uint64_t length, bool dont_fragment,
2482fb7afc79SVladimir Sementsov-Ogievskiy bool last, uint32_t context_id,
2483fb7afc79SVladimir Sementsov-Ogievskiy Error **errp)
2484e7b1948dSVladimir Sementsov-Ogievskiy {
2485e7b1948dSVladimir Sementsov-Ogievskiy int ret;
2486416e34bdSEric Blake unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2487bcc16cc1SEric Blake g_autoptr(NBDExtentArray) ea =
2488bcc16cc1SEric Blake nbd_extent_array_new(nb_extents, client->mode);
2489e7b1948dSVladimir Sementsov-Ogievskiy
249071719cd5SEric Blake if (context_id == NBD_META_ID_BASE_ALLOCATION) {
2491ff7e261bSEmanuele Giuseppe Esposito ret = blockstatus_to_extents(blk, offset, length, ea);
249271719cd5SEric Blake } else {
2493ff7e261bSEmanuele Giuseppe Esposito ret = blockalloc_to_extents(blk, offset, length, ea);
249471719cd5SEric Blake }
2495e7b1948dSVladimir Sementsov-Ogievskiy if (ret < 0) {
249666d4f4feSEric Blake return nbd_co_send_chunk_error(client, request, -ret,
2497a7c8ed36SEric Blake "can't get block status", errp);
2498e7b1948dSVladimir Sementsov-Ogievskiy }
2499e7b1948dSVladimir Sementsov-Ogievskiy
250066d4f4feSEric Blake return nbd_co_send_extents(client, request, ea, last, context_id, errp);
25013d068affSVladimir Sementsov-Ogievskiy }
25023d068affSVladimir Sementsov-Ogievskiy
2503dacbb6ebSVladimir Sementsov-Ogievskiy /* Populate @ea from a dirty bitmap. */
bitmap_to_extents(BdrvDirtyBitmap * bitmap,uint64_t offset,uint64_t length,NBDExtentArray * es)250489cbc7e3SVladimir Sementsov-Ogievskiy static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
250589cbc7e3SVladimir Sementsov-Ogievskiy uint64_t offset, uint64_t length,
2506dacbb6ebSVladimir Sementsov-Ogievskiy NBDExtentArray *es)
25073d068affSVladimir Sementsov-Ogievskiy {
2508dacbb6ebSVladimir Sementsov-Ogievskiy int64_t start, dirty_start, dirty_count;
2509dacbb6ebSVladimir Sementsov-Ogievskiy int64_t end = offset + length;
2510dacbb6ebSVladimir Sementsov-Ogievskiy bool full = false;
2511bcc16cc1SEric Blake int64_t bound = es->extended ? INT64_MAX : INT32_MAX;
25123d068affSVladimir Sementsov-Ogievskiy
25133d068affSVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_lock(bitmap);
25143d068affSVladimir Sementsov-Ogievskiy
2515dacbb6ebSVladimir Sementsov-Ogievskiy for (start = offset;
2516bcc16cc1SEric Blake bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, bound,
2517dacbb6ebSVladimir Sementsov-Ogievskiy &dirty_start, &dirty_count);
2518dacbb6ebSVladimir Sementsov-Ogievskiy start = dirty_start + dirty_count)
2519dacbb6ebSVladimir Sementsov-Ogievskiy {
2520dacbb6ebSVladimir Sementsov-Ogievskiy if ((nbd_extent_array_add(es, dirty_start - start, 0) < 0) ||
2521dacbb6ebSVladimir Sementsov-Ogievskiy (nbd_extent_array_add(es, dirty_count, NBD_STATE_DIRTY) < 0))
2522dacbb6ebSVladimir Sementsov-Ogievskiy {
2523dacbb6ebSVladimir Sementsov-Ogievskiy full = true;
252489cbc7e3SVladimir Sementsov-Ogievskiy break;
252589cbc7e3SVladimir Sementsov-Ogievskiy }
25263d068affSVladimir Sementsov-Ogievskiy }
25273d068affSVladimir Sementsov-Ogievskiy
2528dacbb6ebSVladimir Sementsov-Ogievskiy if (!full) {
2529c0b21f2eSEric Blake /* last non dirty extent, nothing to do if array is now full */
2530c0b21f2eSEric Blake (void) nbd_extent_array_add(es, end - start, 0);
2531dacbb6ebSVladimir Sementsov-Ogievskiy }
25323d068affSVladimir Sementsov-Ogievskiy
25333d068affSVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_unlock(bitmap);
25343d068affSVladimir Sementsov-Ogievskiy }
25353d068affSVladimir Sementsov-Ogievskiy
nbd_co_send_bitmap(NBDClient * client,NBDRequest * request,BdrvDirtyBitmap * bitmap,uint64_t offset,uint64_t length,bool dont_fragment,bool last,uint32_t context_id,Error ** errp)253666d4f4feSEric Blake static int coroutine_fn nbd_co_send_bitmap(NBDClient *client,
253766d4f4feSEric Blake NBDRequest *request,
253866d4f4feSEric Blake BdrvDirtyBitmap *bitmap,
253966d4f4feSEric Blake uint64_t offset,
2540bcc16cc1SEric Blake uint64_t length, bool dont_fragment,
254166d4f4feSEric Blake bool last, uint32_t context_id,
254266d4f4feSEric Blake Error **errp)
25433d068affSVladimir Sementsov-Ogievskiy {
2544416e34bdSEric Blake unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
2545bcc16cc1SEric Blake g_autoptr(NBDExtentArray) ea =
2546bcc16cc1SEric Blake nbd_extent_array_new(nb_extents, client->mode);
25473d068affSVladimir Sementsov-Ogievskiy
2548dacbb6ebSVladimir Sementsov-Ogievskiy bitmap_to_extents(bitmap, offset, length, ea);
25493d068affSVladimir Sementsov-Ogievskiy
255066d4f4feSEric Blake return nbd_co_send_extents(client, request, ea, last, context_id, errp);
2551e7b1948dSVladimir Sementsov-Ogievskiy }
2552e7b1948dSVladimir Sementsov-Ogievskiy
25532dcbb11bSEric Blake /*
25542dcbb11bSEric Blake * nbd_co_block_status_payload_read
25552dcbb11bSEric Blake * Called when a client wants a subset of negotiated contexts via a
25562dcbb11bSEric Blake * BLOCK_STATUS payload. Check the payload for valid length and
25572dcbb11bSEric Blake * contents. On success, return 0 with request updated to effective
25582dcbb11bSEric Blake * length. If request was invalid but all payload consumed, return 0
25592dcbb11bSEric Blake * with request->len and request->contexts->count set to 0 (which will
25602dcbb11bSEric Blake * trigger an appropriate NBD_EINVAL response later on). Return
25612dcbb11bSEric Blake * negative errno if the payload was not fully consumed.
25622dcbb11bSEric Blake */
25632dcbb11bSEric Blake static int
nbd_co_block_status_payload_read(NBDClient * client,NBDRequest * request,Error ** errp)25642dcbb11bSEric Blake nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
25652dcbb11bSEric Blake Error **errp)
25662dcbb11bSEric Blake {
25672dcbb11bSEric Blake uint64_t payload_len = request->len;
25682dcbb11bSEric Blake g_autofree char *buf = NULL;
25692dcbb11bSEric Blake size_t count, i, nr_bitmaps;
25702dcbb11bSEric Blake uint32_t id;
25712dcbb11bSEric Blake
25722dcbb11bSEric Blake if (payload_len > NBD_MAX_BUFFER_SIZE) {
25732dcbb11bSEric Blake error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
25742dcbb11bSEric Blake request->len, NBD_MAX_BUFFER_SIZE);
25752dcbb11bSEric Blake return -EINVAL;
25762dcbb11bSEric Blake }
25772dcbb11bSEric Blake
25782dcbb11bSEric Blake assert(client->contexts.exp == client->exp);
25792dcbb11bSEric Blake nr_bitmaps = client->exp->nr_export_bitmaps;
25802dcbb11bSEric Blake request->contexts = g_new0(NBDMetaContexts, 1);
25812dcbb11bSEric Blake request->contexts->exp = client->exp;
25822dcbb11bSEric Blake
25832dcbb11bSEric Blake if (payload_len % sizeof(uint32_t) ||
25842dcbb11bSEric Blake payload_len < sizeof(NBDBlockStatusPayload) ||
25852dcbb11bSEric Blake payload_len > (sizeof(NBDBlockStatusPayload) +
25862dcbb11bSEric Blake sizeof(id) * client->contexts.count)) {
25872dcbb11bSEric Blake goto skip;
25882dcbb11bSEric Blake }
25892dcbb11bSEric Blake
25902dcbb11bSEric Blake buf = g_malloc(payload_len);
25912dcbb11bSEric Blake if (nbd_read(client->ioc, buf, payload_len,
25922dcbb11bSEric Blake "CMD_BLOCK_STATUS data", errp) < 0) {
25932dcbb11bSEric Blake return -EIO;
25942dcbb11bSEric Blake }
25952dcbb11bSEric Blake trace_nbd_co_receive_request_payload_received(request->cookie,
25962dcbb11bSEric Blake payload_len);
25972dcbb11bSEric Blake request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
25982dcbb11bSEric Blake count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
25992dcbb11bSEric Blake payload_len = 0;
26002dcbb11bSEric Blake
26012dcbb11bSEric Blake for (i = 0; i < count; i++) {
26022dcbb11bSEric Blake id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
26032dcbb11bSEric Blake if (id == NBD_META_ID_BASE_ALLOCATION) {
26042dcbb11bSEric Blake if (!client->contexts.base_allocation ||
26052dcbb11bSEric Blake request->contexts->base_allocation) {
26062dcbb11bSEric Blake goto skip;
26072dcbb11bSEric Blake }
26082dcbb11bSEric Blake request->contexts->base_allocation = true;
26092dcbb11bSEric Blake } else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
26102dcbb11bSEric Blake if (!client->contexts.allocation_depth ||
26112dcbb11bSEric Blake request->contexts->allocation_depth) {
26122dcbb11bSEric Blake goto skip;
26132dcbb11bSEric Blake }
26142dcbb11bSEric Blake request->contexts->allocation_depth = true;
26152dcbb11bSEric Blake } else {
26162dcbb11bSEric Blake unsigned idx = id - NBD_META_ID_DIRTY_BITMAP;
26172dcbb11bSEric Blake
26182dcbb11bSEric Blake if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] ||
26192dcbb11bSEric Blake request->contexts->bitmaps[idx]) {
26202dcbb11bSEric Blake goto skip;
26212dcbb11bSEric Blake }
26222dcbb11bSEric Blake request->contexts->bitmaps[idx] = true;
26232dcbb11bSEric Blake }
26242dcbb11bSEric Blake }
26252dcbb11bSEric Blake
26262dcbb11bSEric Blake request->len = ldq_be_p(buf);
26272dcbb11bSEric Blake request->contexts->count = count;
26282dcbb11bSEric Blake return 0;
26292dcbb11bSEric Blake
26302dcbb11bSEric Blake skip:
26312dcbb11bSEric Blake trace_nbd_co_receive_block_status_payload_compliance(request->from,
26322dcbb11bSEric Blake request->len);
26332dcbb11bSEric Blake request->len = request->contexts->count = 0;
26342dcbb11bSEric Blake return nbd_drop(client->ioc, payload_len, errp);
26352dcbb11bSEric Blake }
26362dcbb11bSEric Blake
26372a6e128bSVladimir Sementsov-Ogievskiy /* nbd_co_receive_request
26382a6e128bSVladimir Sementsov-Ogievskiy * Collect a client request. Return 0 if request looks valid, -EIO to drop
2639f148ae7dSSergio Lopez * connection right away, -EAGAIN to indicate we were interrupted and the
2640f148ae7dSSergio Lopez * channel should be quiesced, and any other negative value to report an error
2641f148ae7dSSergio Lopez * to the client (although the caller may still need to disconnect after
2642f148ae7dSSergio Lopez * reporting the error).
26432a6e128bSVladimir Sementsov-Ogievskiy */
nbd_co_receive_request(NBDRequestData * req,NBDRequest * request,Error ** errp)26448db7e2d6SEric Blake static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
26458db7e2d6SEric Blake NBDRequest *request,
26462fd2c840SVladimir Sementsov-Ogievskiy Error **errp)
2647798bfe00SFam Zheng {
2648798bfe00SFam Zheng NBDClient *client = req->client;
2649009cd866SEric Blake bool extended_with_payload;
26508db7e2d6SEric Blake bool check_length = false;
26518db7e2d6SEric Blake bool check_rofs = false;
26528db7e2d6SEric Blake bool allocate_buffer = false;
2653009cd866SEric Blake bool payload_okay = false;
2654009cd866SEric Blake uint64_t payload_len = 0;
26558db7e2d6SEric Blake int valid_flags = NBD_CMD_FLAG_FUA;
2656f148ae7dSSergio Lopez int ret;
2657798bfe00SFam Zheng
26581c778ef7SDaniel P. Berrange g_assert(qemu_in_coroutine());
2659f148ae7dSSergio Lopez ret = nbd_receive_request(client, request, errp);
2660f148ae7dSSergio Lopez if (ret < 0) {
2661f148ae7dSSergio Lopez return ret;
2662798bfe00SFam Zheng }
2663798bfe00SFam Zheng
266422efd811SEric Blake trace_nbd_co_receive_request_decode_type(request->cookie, request->type,
26653736cc5bSEric Blake nbd_cmd_lookup(request->type));
2666009cd866SEric Blake extended_with_payload = client->mode >= NBD_MODE_EXTENDED &&
2667009cd866SEric Blake request->flags & NBD_CMD_FLAG_PAYLOAD_LEN;
2668009cd866SEric Blake if (extended_with_payload) {
2669009cd866SEric Blake payload_len = request->len;
2670009cd866SEric Blake check_length = true;
2671009cd866SEric Blake }
2672009cd866SEric Blake
26738db7e2d6SEric Blake switch (request->type) {
26748db7e2d6SEric Blake case NBD_CMD_DISC:
267529b6c3b3SEric Blake /* Special case: we're going to disconnect without a reply,
267629b6c3b3SEric Blake * whether or not flags, from, or len are bogus */
26778db7e2d6SEric Blake req->complete = true;
2678ee898b87SVladimir Sementsov-Ogievskiy return -EIO;
26798db7e2d6SEric Blake
26808db7e2d6SEric Blake case NBD_CMD_READ:
26818db7e2d6SEric Blake if (client->mode >= NBD_MODE_STRUCTURED) {
26828db7e2d6SEric Blake valid_flags |= NBD_CMD_FLAG_DF;
26838db7e2d6SEric Blake }
26848db7e2d6SEric Blake check_length = true;
26858db7e2d6SEric Blake allocate_buffer = true;
26868db7e2d6SEric Blake break;
26878db7e2d6SEric Blake
26888db7e2d6SEric Blake case NBD_CMD_WRITE:
2689009cd866SEric Blake if (client->mode >= NBD_MODE_EXTENDED) {
2690009cd866SEric Blake if (!extended_with_payload) {
2691009cd866SEric Blake /* The client is noncompliant. Trace it, but proceed. */
2692009cd866SEric Blake trace_nbd_co_receive_ext_payload_compliance(request->from,
2693009cd866SEric Blake request->len);
2694009cd866SEric Blake }
2695009cd866SEric Blake valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
2696009cd866SEric Blake }
2697009cd866SEric Blake payload_okay = true;
26988db7e2d6SEric Blake payload_len = request->len;
26998db7e2d6SEric Blake check_length = true;
27008db7e2d6SEric Blake allocate_buffer = true;
27018db7e2d6SEric Blake check_rofs = true;
27028db7e2d6SEric Blake break;
27038db7e2d6SEric Blake
27048db7e2d6SEric Blake case NBD_CMD_FLUSH:
27058db7e2d6SEric Blake break;
27068db7e2d6SEric Blake
27078db7e2d6SEric Blake case NBD_CMD_TRIM:
27088db7e2d6SEric Blake check_rofs = true;
27098db7e2d6SEric Blake break;
27108db7e2d6SEric Blake
27118db7e2d6SEric Blake case NBD_CMD_CACHE:
27128db7e2d6SEric Blake check_length = true;
27138db7e2d6SEric Blake break;
27148db7e2d6SEric Blake
27158db7e2d6SEric Blake case NBD_CMD_WRITE_ZEROES:
27168db7e2d6SEric Blake valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
27178db7e2d6SEric Blake check_rofs = true;
27188db7e2d6SEric Blake break;
27198db7e2d6SEric Blake
27208db7e2d6SEric Blake case NBD_CMD_BLOCK_STATUS:
27212dcbb11bSEric Blake if (extended_with_payload) {
27222dcbb11bSEric Blake ret = nbd_co_block_status_payload_read(client, request, errp);
27232dcbb11bSEric Blake if (ret < 0) {
27242dcbb11bSEric Blake return ret;
27252dcbb11bSEric Blake }
27262dcbb11bSEric Blake /* payload now consumed */
27272dcbb11bSEric Blake check_length = false;
27282dcbb11bSEric Blake payload_len = 0;
27292dcbb11bSEric Blake valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
27302dcbb11bSEric Blake } else {
27311dec4643SEric Blake request->contexts = &client->contexts;
27322dcbb11bSEric Blake }
27338db7e2d6SEric Blake valid_flags |= NBD_CMD_FLAG_REQ_ONE;
27348db7e2d6SEric Blake break;
27358db7e2d6SEric Blake
27368db7e2d6SEric Blake default:
27378db7e2d6SEric Blake /* Unrecognized, will fail later */
27388db7e2d6SEric Blake ;
273929b6c3b3SEric Blake }
274029b6c3b3SEric Blake
27418db7e2d6SEric Blake /* Payload and buffer handling. */
27428db7e2d6SEric Blake if (!payload_len) {
27438db7e2d6SEric Blake req->complete = true;
27448db7e2d6SEric Blake }
27458db7e2d6SEric Blake if (check_length && request->len > NBD_MAX_BUFFER_SIZE) {
27468db7e2d6SEric Blake /* READ, WRITE, CACHE */
2747b2578459SEric Blake error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
2748eb38c3b6SPaolo Bonzini request->len, NBD_MAX_BUFFER_SIZE);
2749ee898b87SVladimir Sementsov-Ogievskiy return -EINVAL;
2750eb38c3b6SPaolo Bonzini }
2751009cd866SEric Blake if (payload_len && !payload_okay) {
2752009cd866SEric Blake /*
2753009cd866SEric Blake * For now, we don't support payloads on other commands; but
2754009cd866SEric Blake * we can keep the connection alive by ignoring the payload.
2755009cd866SEric Blake * We will fail the command later with NBD_EINVAL for the use
2756009cd866SEric Blake * of an unsupported flag (and not for access beyond bounds).
2757009cd866SEric Blake */
2758009cd866SEric Blake assert(request->type != NBD_CMD_WRITE);
2759009cd866SEric Blake request->len = 0;
2760009cd866SEric Blake }
27618db7e2d6SEric Blake if (allocate_buffer) {
27628db7e2d6SEric Blake /* READ, WRITE */
276337a4f70cSKevin Wolf req->data = blk_try_blockalign(client->exp->common.blk,
276437a4f70cSKevin Wolf request->len);
2765f1c17521SPaolo Bonzini if (req->data == NULL) {
27662fd2c840SVladimir Sementsov-Ogievskiy error_setg(errp, "No memory");
2767ee898b87SVladimir Sementsov-Ogievskiy return -ENOMEM;
2768f1c17521SPaolo Bonzini }
2769798bfe00SFam Zheng }
27708db7e2d6SEric Blake if (payload_len) {
2771009cd866SEric Blake if (payload_okay) {
27728db7e2d6SEric Blake /* WRITE */
27738db7e2d6SEric Blake assert(req->data);
27748db7e2d6SEric Blake ret = nbd_read(client->ioc, req->data, payload_len,
27758db7e2d6SEric Blake "CMD_WRITE data", errp);
2776009cd866SEric Blake } else {
2777009cd866SEric Blake ret = nbd_drop(client->ioc, payload_len, errp);
2778009cd866SEric Blake }
27798db7e2d6SEric Blake if (ret < 0) {
2780ee898b87SVladimir Sementsov-Ogievskiy return -EIO;
2781798bfe00SFam Zheng }
278229b6c3b3SEric Blake req->complete = true;
278322efd811SEric Blake trace_nbd_co_receive_request_payload_received(request->cookie,
27848db7e2d6SEric Blake payload_len);
2785798bfe00SFam Zheng }
278629b6c3b3SEric Blake
2787fed5f8f8SEric Blake /* Sanity checks. */
27888db7e2d6SEric Blake if (client->exp->nbdflags & NBD_FLAG_READ_ONLY && check_rofs) {
27898db7e2d6SEric Blake /* WRITE, TRIM, WRITE_ZEROES */
2790fed5f8f8SEric Blake error_setg(errp, "Export is read-only");
2791fed5f8f8SEric Blake return -EROFS;
2792fed5f8f8SEric Blake }
2793fed5f8f8SEric Blake if (request->from > client->exp->size ||
27949d26dfcbSEric Blake request->len > client->exp->size - request->from) {
2795b2578459SEric Blake error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu64
279629b6c3b3SEric Blake ", Size: %" PRIu64, request->from, request->len,
27979d26dfcbSEric Blake client->exp->size);
2798fed5f8f8SEric Blake return (request->type == NBD_CMD_WRITE ||
2799fed5f8f8SEric Blake request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
280029b6c3b3SEric Blake }
28016e280648SEric Blake if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
28026e280648SEric Blake client->check_align)) {
28036e280648SEric Blake /*
28046e280648SEric Blake * The block layer gracefully handles unaligned requests, but
28056e280648SEric Blake * it's still worth tracing client non-compliance
28066e280648SEric Blake */
28076e280648SEric Blake trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
28086e280648SEric Blake request->from,
28096e280648SEric Blake request->len,
28106e280648SEric Blake client->check_align);
28116e280648SEric Blake }
28125c54e7faSVladimir Sementsov-Ogievskiy if (request->flags & ~valid_flags) {
28135c54e7faSVladimir Sementsov-Ogievskiy error_setg(errp, "unsupported flags for command %s (got 0x%x)",
28145c54e7faSVladimir Sementsov-Ogievskiy nbd_cmd_lookup(request->type), request->flags);
2815ee898b87SVladimir Sementsov-Ogievskiy return -EINVAL;
28161f4d6d18SEric Blake }
281729b6c3b3SEric Blake
2818ee898b87SVladimir Sementsov-Ogievskiy return 0;
2819798bfe00SFam Zheng }
2820798bfe00SFam Zheng
28216a417599SVladimir Sementsov-Ogievskiy /* Send simple reply without a payload, or a structured error
28226a417599SVladimir Sementsov-Ogievskiy * @error_msg is ignored if @ret >= 0
28236a417599SVladimir Sementsov-Ogievskiy * Returns 0 if connection is still live, -errno on failure to talk to client
28246a417599SVladimir Sementsov-Ogievskiy */
nbd_send_generic_reply(NBDClient * client,NBDRequest * request,int ret,const char * error_msg,Error ** errp)28256a417599SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
282666d4f4feSEric Blake NBDRequest *request,
28276a417599SVladimir Sementsov-Ogievskiy int ret,
28286a417599SVladimir Sementsov-Ogievskiy const char *error_msg,
28296a417599SVladimir Sementsov-Ogievskiy Error **errp)
28306a417599SVladimir Sementsov-Ogievskiy {
2831ac132d05SEric Blake if (client->mode >= NBD_MODE_STRUCTURED && ret < 0) {
283266d4f4feSEric Blake return nbd_co_send_chunk_error(client, request, -ret, error_msg, errp);
283311d3355fSEric Blake } else if (client->mode >= NBD_MODE_EXTENDED) {
283411d3355fSEric Blake return nbd_co_send_chunk_done(client, request, errp);
28356a417599SVladimir Sementsov-Ogievskiy } else {
283666d4f4feSEric Blake return nbd_co_send_simple_reply(client, request, ret < 0 ? -ret : 0,
28376a417599SVladimir Sementsov-Ogievskiy NULL, 0, errp);
28386a417599SVladimir Sementsov-Ogievskiy }
28396a417599SVladimir Sementsov-Ogievskiy }
28406a417599SVladimir Sementsov-Ogievskiy
28416a417599SVladimir Sementsov-Ogievskiy /* Handle NBD_CMD_READ request.
28426a417599SVladimir Sementsov-Ogievskiy * Return -errno if sending fails. Other errors are reported directly to the
28436a417599SVladimir Sementsov-Ogievskiy * client as an error reply. */
nbd_do_cmd_read(NBDClient * client,NBDRequest * request,uint8_t * data,Error ** errp)28446a417599SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
28456a417599SVladimir Sementsov-Ogievskiy uint8_t *data, Error **errp)
28466a417599SVladimir Sementsov-Ogievskiy {
28476a417599SVladimir Sementsov-Ogievskiy int ret;
28486a417599SVladimir Sementsov-Ogievskiy NBDExport *exp = client->exp;
28496a417599SVladimir Sementsov-Ogievskiy
28507fa5c565SVladimir Sementsov-Ogievskiy assert(request->type == NBD_CMD_READ);
2851b2578459SEric Blake assert(request->len <= NBD_MAX_BUFFER_SIZE);
28526a417599SVladimir Sementsov-Ogievskiy
28536a417599SVladimir Sementsov-Ogievskiy /* XXX: NBD Protocol only documents use of FUA with WRITE */
28546a417599SVladimir Sementsov-Ogievskiy if (request->flags & NBD_CMD_FLAG_FUA) {
285537a4f70cSKevin Wolf ret = blk_co_flush(exp->common.blk);
28566a417599SVladimir Sementsov-Ogievskiy if (ret < 0) {
285766d4f4feSEric Blake return nbd_send_generic_reply(client, request, ret,
28586a417599SVladimir Sementsov-Ogievskiy "flush failed", errp);
28596a417599SVladimir Sementsov-Ogievskiy }
28606a417599SVladimir Sementsov-Ogievskiy }
28616a417599SVladimir Sementsov-Ogievskiy
2862ac132d05SEric Blake if (client->mode >= NBD_MODE_STRUCTURED &&
2863ac132d05SEric Blake !(request->flags & NBD_CMD_FLAG_DF) && request->len)
28642f454defSVladimir Sementsov-Ogievskiy {
286566d4f4feSEric Blake return nbd_co_send_sparse_read(client, request, request->from,
28666a417599SVladimir Sementsov-Ogievskiy data, request->len, errp);
28676a417599SVladimir Sementsov-Ogievskiy }
28686a417599SVladimir Sementsov-Ogievskiy
2869d2223cddSPaolo Bonzini ret = blk_co_pread(exp->common.blk, request->from, request->len, data, 0);
28707fa5c565SVladimir Sementsov-Ogievskiy if (ret < 0) {
287166d4f4feSEric Blake return nbd_send_generic_reply(client, request, ret,
28726a417599SVladimir Sementsov-Ogievskiy "reading from file failed", errp);
28736a417599SVladimir Sementsov-Ogievskiy }
28746a417599SVladimir Sementsov-Ogievskiy
2875ac132d05SEric Blake if (client->mode >= NBD_MODE_STRUCTURED) {
28766a417599SVladimir Sementsov-Ogievskiy if (request->len) {
287766d4f4feSEric Blake return nbd_co_send_chunk_read(client, request, request->from, data,
28786a417599SVladimir Sementsov-Ogievskiy request->len, true, errp);
28796a417599SVladimir Sementsov-Ogievskiy } else {
288066d4f4feSEric Blake return nbd_co_send_chunk_done(client, request, errp);
28816a417599SVladimir Sementsov-Ogievskiy }
28826a417599SVladimir Sementsov-Ogievskiy } else {
288366d4f4feSEric Blake return nbd_co_send_simple_reply(client, request, 0,
28846a417599SVladimir Sementsov-Ogievskiy data, request->len, errp);
28856a417599SVladimir Sementsov-Ogievskiy }
28866a417599SVladimir Sementsov-Ogievskiy }
28876a417599SVladimir Sementsov-Ogievskiy
28887fa5c565SVladimir Sementsov-Ogievskiy /*
28897fa5c565SVladimir Sementsov-Ogievskiy * nbd_do_cmd_cache
28907fa5c565SVladimir Sementsov-Ogievskiy *
28917fa5c565SVladimir Sementsov-Ogievskiy * Handle NBD_CMD_CACHE request.
28927fa5c565SVladimir Sementsov-Ogievskiy * Return -errno if sending fails. Other errors are reported directly to the
28937fa5c565SVladimir Sementsov-Ogievskiy * client as an error reply.
28947fa5c565SVladimir Sementsov-Ogievskiy */
nbd_do_cmd_cache(NBDClient * client,NBDRequest * request,Error ** errp)28957fa5c565SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
28967fa5c565SVladimir Sementsov-Ogievskiy Error **errp)
28977fa5c565SVladimir Sementsov-Ogievskiy {
28987fa5c565SVladimir Sementsov-Ogievskiy int ret;
28997fa5c565SVladimir Sementsov-Ogievskiy NBDExport *exp = client->exp;
29007fa5c565SVladimir Sementsov-Ogievskiy
29017fa5c565SVladimir Sementsov-Ogievskiy assert(request->type == NBD_CMD_CACHE);
2902b2578459SEric Blake assert(request->len <= NBD_MAX_BUFFER_SIZE);
29037fa5c565SVladimir Sementsov-Ogievskiy
290437a4f70cSKevin Wolf ret = blk_co_preadv(exp->common.blk, request->from, request->len,
29057fa5c565SVladimir Sementsov-Ogievskiy NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
29067fa5c565SVladimir Sementsov-Ogievskiy
290766d4f4feSEric Blake return nbd_send_generic_reply(client, request, ret,
29087fa5c565SVladimir Sementsov-Ogievskiy "caching data failed", errp);
29097fa5c565SVladimir Sementsov-Ogievskiy }
29107fa5c565SVladimir Sementsov-Ogievskiy
29116f302e60SVladimir Sementsov-Ogievskiy /* Handle NBD request.
29126f302e60SVladimir Sementsov-Ogievskiy * Return -errno if sending fails. Other errors are reported directly to the
29136f302e60SVladimir Sementsov-Ogievskiy * client as an error reply. */
nbd_handle_request(NBDClient * client,NBDRequest * request,uint8_t * data,Error ** errp)29146f302e60SVladimir Sementsov-Ogievskiy static coroutine_fn int nbd_handle_request(NBDClient *client,
29156f302e60SVladimir Sementsov-Ogievskiy NBDRequest *request,
29166f302e60SVladimir Sementsov-Ogievskiy uint8_t *data, Error **errp)
29176f302e60SVladimir Sementsov-Ogievskiy {
29186f302e60SVladimir Sementsov-Ogievskiy int ret;
29196f302e60SVladimir Sementsov-Ogievskiy int flags;
29206f302e60SVladimir Sementsov-Ogievskiy NBDExport *exp = client->exp;
29216f302e60SVladimir Sementsov-Ogievskiy char *msg;
29223b1f244cSEric Blake size_t i;
29236f302e60SVladimir Sementsov-Ogievskiy
29246f302e60SVladimir Sementsov-Ogievskiy switch (request->type) {
2925bc37b06aSVladimir Sementsov-Ogievskiy case NBD_CMD_CACHE:
29267fa5c565SVladimir Sementsov-Ogievskiy return nbd_do_cmd_cache(client, request, errp);
29277fa5c565SVladimir Sementsov-Ogievskiy
29287fa5c565SVladimir Sementsov-Ogievskiy case NBD_CMD_READ:
29296f302e60SVladimir Sementsov-Ogievskiy return nbd_do_cmd_read(client, request, data, errp);
29306f302e60SVladimir Sementsov-Ogievskiy
29316f302e60SVladimir Sementsov-Ogievskiy case NBD_CMD_WRITE:
29326f302e60SVladimir Sementsov-Ogievskiy flags = 0;
29336f302e60SVladimir Sementsov-Ogievskiy if (request->flags & NBD_CMD_FLAG_FUA) {
29346f302e60SVladimir Sementsov-Ogievskiy flags |= BDRV_REQ_FUA;
29356f302e60SVladimir Sementsov-Ogievskiy }
2936b2578459SEric Blake assert(request->len <= NBD_MAX_BUFFER_SIZE);
2937d2223cddSPaolo Bonzini ret = blk_co_pwrite(exp->common.blk, request->from, request->len, data,
293837a4f70cSKevin Wolf flags);
293966d4f4feSEric Blake return nbd_send_generic_reply(client, request, ret,
29406f302e60SVladimir Sementsov-Ogievskiy "writing to file failed", errp);
29416f302e60SVladimir Sementsov-Ogievskiy
29426f302e60SVladimir Sementsov-Ogievskiy case NBD_CMD_WRITE_ZEROES:
29436f302e60SVladimir Sementsov-Ogievskiy flags = 0;
29446f302e60SVladimir Sementsov-Ogievskiy if (request->flags & NBD_CMD_FLAG_FUA) {
29456f302e60SVladimir Sementsov-Ogievskiy flags |= BDRV_REQ_FUA;
29466f302e60SVladimir Sementsov-Ogievskiy }
29476f302e60SVladimir Sementsov-Ogievskiy if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
29486f302e60SVladimir Sementsov-Ogievskiy flags |= BDRV_REQ_MAY_UNMAP;
29496f302e60SVladimir Sementsov-Ogievskiy }
2950b491dbb7SEric Blake if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
2951b491dbb7SEric Blake flags |= BDRV_REQ_NO_FALLBACK;
2952b491dbb7SEric Blake }
2953d2223cddSPaolo Bonzini ret = blk_co_pwrite_zeroes(exp->common.blk, request->from, request->len,
2954e3557422SEric Blake flags);
295566d4f4feSEric Blake return nbd_send_generic_reply(client, request, ret,
29566f302e60SVladimir Sementsov-Ogievskiy "writing to file failed", errp);
29576f302e60SVladimir Sementsov-Ogievskiy
29586f302e60SVladimir Sementsov-Ogievskiy case NBD_CMD_DISC:
29596f302e60SVladimir Sementsov-Ogievskiy /* unreachable, thanks to special case in nbd_co_receive_request() */
29606f302e60SVladimir Sementsov-Ogievskiy abort();
29616f302e60SVladimir Sementsov-Ogievskiy
29626f302e60SVladimir Sementsov-Ogievskiy case NBD_CMD_FLUSH:
296337a4f70cSKevin Wolf ret = blk_co_flush(exp->common.blk);
296466d4f4feSEric Blake return nbd_send_generic_reply(client, request, ret,
29656f302e60SVladimir Sementsov-Ogievskiy "flush failed", errp);
29666f302e60SVladimir Sementsov-Ogievskiy
29676f302e60SVladimir Sementsov-Ogievskiy case NBD_CMD_TRIM:
2968e3557422SEric Blake ret = blk_co_pdiscard(exp->common.blk, request->from, request->len);
2969890cbccbSEric Blake if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
297037a4f70cSKevin Wolf ret = blk_co_flush(exp->common.blk);
297165529782SEric Blake }
297266d4f4feSEric Blake return nbd_send_generic_reply(client, request, ret,
29736f302e60SVladimir Sementsov-Ogievskiy "discard failed", errp);
29746f302e60SVladimir Sementsov-Ogievskiy
2975e7b1948dSVladimir Sementsov-Ogievskiy case NBD_CMD_BLOCK_STATUS:
29761dec4643SEric Blake assert(request->contexts);
2977bcc16cc1SEric Blake assert(client->mode >= NBD_MODE_EXTENDED ||
2978bcc16cc1SEric Blake request->len <= UINT32_MAX);
29791dec4643SEric Blake if (request->contexts->count) {
2980fb7afc79SVladimir Sementsov-Ogievskiy bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
29811dec4643SEric Blake int contexts_remaining = request->contexts->count;
2982fb7afc79SVladimir Sementsov-Ogievskiy
29832dcbb11bSEric Blake if (!request->len) {
29842dcbb11bSEric Blake return nbd_send_generic_reply(client, request, -EINVAL,
29852dcbb11bSEric Blake "need non-zero length", errp);
29862dcbb11bSEric Blake }
29871dec4643SEric Blake if (request->contexts->base_allocation) {
298866d4f4feSEric Blake ret = nbd_co_send_block_status(client, request,
2989ff7e261bSEmanuele Giuseppe Esposito exp->common.blk,
299037a4f70cSKevin Wolf request->from,
2991fb7afc79SVladimir Sementsov-Ogievskiy request->len, dont_fragment,
299247ec485eSEric Blake !--contexts_remaining,
29933d068affSVladimir Sementsov-Ogievskiy NBD_META_ID_BASE_ALLOCATION,
29943d068affSVladimir Sementsov-Ogievskiy errp);
299573e064ccSEric Blake if (ret < 0) {
299673e064ccSEric Blake return ret;
299773e064ccSEric Blake }
299873e064ccSEric Blake }
299973e064ccSEric Blake
30001dec4643SEric Blake if (request->contexts->allocation_depth) {
300166d4f4feSEric Blake ret = nbd_co_send_block_status(client, request,
3002ff7e261bSEmanuele Giuseppe Esposito exp->common.blk,
300371719cd5SEric Blake request->from, request->len,
300471719cd5SEric Blake dont_fragment,
300571719cd5SEric Blake !--contexts_remaining,
300671719cd5SEric Blake NBD_META_ID_ALLOCATION_DEPTH,
300771719cd5SEric Blake errp);
300871719cd5SEric Blake if (ret < 0) {
300971719cd5SEric Blake return ret;
301071719cd5SEric Blake }
301171719cd5SEric Blake }
301271719cd5SEric Blake
30131dec4643SEric Blake assert(request->contexts->exp == client->exp);
30143b1f244cSEric Blake for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
30151dec4643SEric Blake if (!request->contexts->bitmaps[i]) {
30163b1f244cSEric Blake continue;
30173b1f244cSEric Blake }
301866d4f4feSEric Blake ret = nbd_co_send_bitmap(client, request,
30193b1f244cSEric Blake client->exp->export_bitmaps[i],
30203d068affSVladimir Sementsov-Ogievskiy request->from, request->len,
302147ec485eSEric Blake dont_fragment, !--contexts_remaining,
30223b1f244cSEric Blake NBD_META_ID_DIRTY_BITMAP + i, errp);
302373e064ccSEric Blake if (ret < 0) {
302473e064ccSEric Blake return ret;
302573e064ccSEric Blake }
30263d068affSVladimir Sementsov-Ogievskiy }
30273d068affSVladimir Sementsov-Ogievskiy
302847ec485eSEric Blake assert(!contexts_remaining);
302947ec485eSEric Blake
303073e064ccSEric Blake return 0;
30311dec4643SEric Blake } else if (client->contexts.count) {
30321dec4643SEric Blake return nbd_send_generic_reply(client, request, -EINVAL,
30331dec4643SEric Blake "CMD_BLOCK_STATUS payload not valid",
30341dec4643SEric Blake errp);
3035e7b1948dSVladimir Sementsov-Ogievskiy } else {
303666d4f4feSEric Blake return nbd_send_generic_reply(client, request, -EINVAL,
3037e7b1948dSVladimir Sementsov-Ogievskiy "CMD_BLOCK_STATUS not negotiated",
3038e7b1948dSVladimir Sementsov-Ogievskiy errp);
3039e7b1948dSVladimir Sementsov-Ogievskiy }
3040e7b1948dSVladimir Sementsov-Ogievskiy
30416f302e60SVladimir Sementsov-Ogievskiy default:
30426f302e60SVladimir Sementsov-Ogievskiy msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
30436f302e60SVladimir Sementsov-Ogievskiy request->type);
304466d4f4feSEric Blake ret = nbd_send_generic_reply(client, request, -EINVAL, msg,
30456f302e60SVladimir Sementsov-Ogievskiy errp);
30466f302e60SVladimir Sementsov-Ogievskiy g_free(msg);
30476f302e60SVladimir Sementsov-Ogievskiy return ret;
30486f302e60SVladimir Sementsov-Ogievskiy }
30496f302e60SVladimir Sementsov-Ogievskiy }
30506f302e60SVladimir Sementsov-Ogievskiy
3051ff82911cSPaolo Bonzini /* Owns a reference to the NBDClient passed as opaque. */
nbd_trip(void * opaque)3052ff82911cSPaolo Bonzini static coroutine_fn void nbd_trip(void *opaque)
3053798bfe00SFam Zheng {
30549c707525SKevin Wolf NBDRequestData *req = opaque;
30559c707525SKevin Wolf NBDClient *client = req->client;
3056ff82911cSPaolo Bonzini NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
3057a0dc63a6SVladimir Sementsov-Ogievskiy int ret;
30582fd2c840SVladimir Sementsov-Ogievskiy Error *local_err = NULL;
3059798bfe00SFam Zheng
3060f816310dSStefan Hajnoczi /*
3061f816310dSStefan Hajnoczi * Note that nbd_client_put() and client_close() must be called from the
3062f816310dSStefan Hajnoczi * main loop thread. Use aio_co_reschedule_self() to switch AioContext
3063f816310dSStefan Hajnoczi * before calling these functions.
3064f816310dSStefan Hajnoczi */
3065f816310dSStefan Hajnoczi
30669588463eSVladimir Sementsov-Ogievskiy trace_nbd_trip();
30677075d235SStefan Hajnoczi
30687075d235SStefan Hajnoczi qemu_mutex_lock(&client->lock);
30697075d235SStefan Hajnoczi
3070798bfe00SFam Zheng if (client->closing) {
3071f816310dSStefan Hajnoczi goto done;
3072798bfe00SFam Zheng }
3073798bfe00SFam Zheng
3074f148ae7dSSergio Lopez if (client->quiescing) {
3075f148ae7dSSergio Lopez /*
3076f148ae7dSSergio Lopez * We're switching between AIO contexts. Don't attempt to receive a new
3077f148ae7dSSergio Lopez * request and kick the main context which may be waiting for us.
3078f148ae7dSSergio Lopez */
3079f148ae7dSSergio Lopez client->recv_coroutine = NULL;
3080f148ae7dSSergio Lopez aio_wait_kick();
3081f816310dSStefan Hajnoczi goto done;
3082f148ae7dSSergio Lopez }
3083f148ae7dSSergio Lopez
30847075d235SStefan Hajnoczi /*
30857075d235SStefan Hajnoczi * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has
30867075d235SStefan Hajnoczi * set client->quiescing but by the time we get back nbd_drained_end() may
30877075d235SStefan Hajnoczi * have already cleared client->quiescing. In that case we try again
30887075d235SStefan Hajnoczi * because nothing else will spawn an nbd_trip() coroutine until we set
30897075d235SStefan Hajnoczi * client->recv_coroutine = NULL further down.
30907075d235SStefan Hajnoczi */
30917075d235SStefan Hajnoczi do {
30927075d235SStefan Hajnoczi assert(client->recv_coroutine == qemu_coroutine_self());
30937075d235SStefan Hajnoczi qemu_mutex_unlock(&client->lock);
30942fd2c840SVladimir Sementsov-Ogievskiy ret = nbd_co_receive_request(req, &request, &local_err);
30957075d235SStefan Hajnoczi qemu_mutex_lock(&client->lock);
30967075d235SStefan Hajnoczi } while (ret == -EAGAIN && !client->quiescing);
30977075d235SStefan Hajnoczi
3098ee898b87SVladimir Sementsov-Ogievskiy client->recv_coroutine = NULL;
3099798bfe00SFam Zheng
3100798bfe00SFam Zheng if (client->closing) {
3101798bfe00SFam Zheng /*
3102798bfe00SFam Zheng * The client may be closed when we are blocked in
3103798bfe00SFam Zheng * nbd_co_receive_request()
3104798bfe00SFam Zheng */
3105798bfe00SFam Zheng goto done;
3106798bfe00SFam Zheng }
3107798bfe00SFam Zheng
3108f148ae7dSSergio Lopez if (ret == -EAGAIN) {
3109f148ae7dSSergio Lopez goto done;
3110f148ae7dSSergio Lopez }
3111f148ae7dSSergio Lopez
3112a0d7ce20SVladimir Sementsov-Ogievskiy nbd_client_receive_next_request(client);
31137075d235SStefan Hajnoczi
3114a0d7ce20SVladimir Sementsov-Ogievskiy if (ret == -EIO) {
3115a0d7ce20SVladimir Sementsov-Ogievskiy goto disconnect;
3116a0d7ce20SVladimir Sementsov-Ogievskiy }
3117a0d7ce20SVladimir Sementsov-Ogievskiy
31187075d235SStefan Hajnoczi qemu_mutex_unlock(&client->lock);
3119bd2cd4a4SFlorian Westphal qio_channel_set_cork(client->ioc, true);
3120bd2cd4a4SFlorian Westphal
3121a0d7ce20SVladimir Sementsov-Ogievskiy if (ret < 0) {
3122314b9026SEric Blake /* It wasn't -EIO, so, according to nbd_co_receive_request()
31236a417599SVladimir Sementsov-Ogievskiy * semantics, we should return the error to the client. */
31246a417599SVladimir Sementsov-Ogievskiy Error *export_err = local_err;
31256a417599SVladimir Sementsov-Ogievskiy
31266a417599SVladimir Sementsov-Ogievskiy local_err = NULL;
312766d4f4feSEric Blake ret = nbd_send_generic_reply(client, &request, -EINVAL,
31286a417599SVladimir Sementsov-Ogievskiy error_get_pretty(export_err), &local_err);
31296a417599SVladimir Sementsov-Ogievskiy error_free(export_err);
31306f302e60SVladimir Sementsov-Ogievskiy } else {
31316f302e60SVladimir Sementsov-Ogievskiy ret = nbd_handle_request(client, &request, req->data, &local_err);
3132a0d7ce20SVladimir Sementsov-Ogievskiy }
31331dec4643SEric Blake if (request.contexts && request.contexts != &client->contexts) {
31341dec4643SEric Blake assert(request.type == NBD_CMD_BLOCK_STATUS);
31351dec4643SEric Blake g_free(request.contexts->bitmaps);
31361dec4643SEric Blake g_free(request.contexts);
31371dec4643SEric Blake }
31387075d235SStefan Hajnoczi
31397075d235SStefan Hajnoczi qio_channel_set_cork(client->ioc, false);
31407075d235SStefan Hajnoczi qemu_mutex_lock(&client->lock);
31417075d235SStefan Hajnoczi
31425c54e7faSVladimir Sementsov-Ogievskiy if (ret < 0) {
3143c7b97282SVladimir Sementsov-Ogievskiy error_prepend(&local_err, "Failed to send reply: ");
31442fd2c840SVladimir Sementsov-Ogievskiy goto disconnect;
31452fd2c840SVladimir Sementsov-Ogievskiy }
31462fd2c840SVladimir Sementsov-Ogievskiy
31472dcbb11bSEric Blake /*
31482dcbb11bSEric Blake * We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with
31492dcbb11bSEric Blake * payload if we did not read the payload.
315029b6c3b3SEric Blake */
31512fd2c840SVladimir Sementsov-Ogievskiy if (!req->complete) {
31522fd2c840SVladimir Sementsov-Ogievskiy error_setg(&local_err, "Request handling failed in intermediate state");
31538c372a02SVladimir Sementsov-Ogievskiy goto disconnect;
3154798bfe00SFam Zheng }
3155798bfe00SFam Zheng
3156798bfe00SFam Zheng done:
3157798bfe00SFam Zheng nbd_request_put(req);
31587075d235SStefan Hajnoczi
31597075d235SStefan Hajnoczi qemu_mutex_unlock(&client->lock);
31607075d235SStefan Hajnoczi
3161f816310dSStefan Hajnoczi if (!nbd_client_put_nonzero(client)) {
3162f816310dSStefan Hajnoczi aio_co_reschedule_self(qemu_get_aio_context());
3163ff82911cSPaolo Bonzini nbd_client_put(client);
3164f816310dSStefan Hajnoczi }
3165798bfe00SFam Zheng return;
3166798bfe00SFam Zheng
31678c372a02SVladimir Sementsov-Ogievskiy disconnect:
31682fd2c840SVladimir Sementsov-Ogievskiy if (local_err) {
31692fd2c840SVladimir Sementsov-Ogievskiy error_reportf_err(local_err, "Disconnect client, due to: ");
31702fd2c840SVladimir Sementsov-Ogievskiy }
31717075d235SStefan Hajnoczi
3172798bfe00SFam Zheng nbd_request_put(req);
31737075d235SStefan Hajnoczi qemu_mutex_unlock(&client->lock);
3174f816310dSStefan Hajnoczi
3175f816310dSStefan Hajnoczi aio_co_reschedule_self(qemu_get_aio_context());
31760c9390d9SEric Blake client_close(client, true);
3177ff82911cSPaolo Bonzini nbd_client_put(client);
3178798bfe00SFam Zheng }
3179798bfe00SFam Zheng
31807075d235SStefan Hajnoczi /*
31817075d235SStefan Hajnoczi * Runs in export AioContext and main loop thread. Caller must hold
31827075d235SStefan Hajnoczi * client->lock.
31837075d235SStefan Hajnoczi */
nbd_client_receive_next_request(NBDClient * client)3184ff82911cSPaolo Bonzini static void nbd_client_receive_next_request(NBDClient *client)
3185798bfe00SFam Zheng {
31869c707525SKevin Wolf NBDRequestData *req;
31879c707525SKevin Wolf
3188f148ae7dSSergio Lopez if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
3189f148ae7dSSergio Lopez !client->quiescing) {
3190ff82911cSPaolo Bonzini nbd_client_get(client);
31919c707525SKevin Wolf req = nbd_request_get(client);
31929c707525SKevin Wolf client->recv_coroutine = qemu_coroutine_create(nbd_trip, req);
31938612c686SKevin Wolf aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
3194798bfe00SFam Zheng }
3195798bfe00SFam Zheng }
3196798bfe00SFam Zheng
nbd_handshake_timer_cb(void * opaque)3197b9b72cb3SEric Blake static void nbd_handshake_timer_cb(void *opaque)
3198b9b72cb3SEric Blake {
3199b9b72cb3SEric Blake QIOChannel *ioc = opaque;
3200b9b72cb3SEric Blake
3201b9b72cb3SEric Blake trace_nbd_handshake_timer_cb();
3202b9b72cb3SEric Blake qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
3203b9b72cb3SEric Blake }
3204b9b72cb3SEric Blake
nbd_co_client_start(void * opaque)32051a6245a5SFam Zheng static coroutine_fn void nbd_co_client_start(void *opaque)
3206798bfe00SFam Zheng {
3207c84087f2SVladimir Sementsov-Ogievskiy NBDClient *client = opaque;
32082fd2c840SVladimir Sementsov-Ogievskiy Error *local_err = NULL;
3209b9b72cb3SEric Blake QEMUTimer *handshake_timer = NULL;
32101a6245a5SFam Zheng
3211798bfe00SFam Zheng qemu_co_mutex_init(&client->send_lock);
3212798bfe00SFam Zheng
3213b9b72cb3SEric Blake /*
3214b9b72cb3SEric Blake * Create a timer to bound the time spent in negotiation. If the
3215b9b72cb3SEric Blake * timer expires, it is likely nbd_negotiate will fail because the
3216b9b72cb3SEric Blake * socket was shutdown.
3217b9b72cb3SEric Blake */
3218b9b72cb3SEric Blake if (client->handshake_max_secs > 0) {
3219b9b72cb3SEric Blake handshake_timer = aio_timer_new(qemu_get_aio_context(),
3220b9b72cb3SEric Blake QEMU_CLOCK_REALTIME,
3221b9b72cb3SEric Blake SCALE_NS,
3222b9b72cb3SEric Blake nbd_handshake_timer_cb,
3223b9b72cb3SEric Blake client->sioc);
3224b9b72cb3SEric Blake timer_mod(handshake_timer,
3225b9b72cb3SEric Blake qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
3226b9b72cb3SEric Blake client->handshake_max_secs * NANOSECONDS_PER_SECOND);
3227b9b72cb3SEric Blake }
3228b9b72cb3SEric Blake
32292fd2c840SVladimir Sementsov-Ogievskiy if (nbd_negotiate(client, &local_err)) {
32302fd2c840SVladimir Sementsov-Ogievskiy if (local_err) {
32312fd2c840SVladimir Sementsov-Ogievskiy error_report_err(local_err);
32322fd2c840SVladimir Sementsov-Ogievskiy }
3233b9b72cb3SEric Blake timer_free(handshake_timer);
32340c9390d9SEric Blake client_close(client, false);
3235c84087f2SVladimir Sementsov-Ogievskiy return;
3236798bfe00SFam Zheng }
3237ff82911cSPaolo Bonzini
3238b9b72cb3SEric Blake timer_free(handshake_timer);
32397075d235SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&client->lock) {
3240ff82911cSPaolo Bonzini nbd_client_receive_next_request(client);
32411a6245a5SFam Zheng }
32427075d235SStefan Hajnoczi }
32431a6245a5SFam Zheng
32440c9390d9SEric Blake /*
3245fb1c2aaaSEric Blake * Create a new client listener using the given channel @sioc and @owner.
32467f7dfe2aSVladimir Sementsov-Ogievskiy * Begin servicing it in a coroutine. When the connection closes, call
3247fb1c2aaaSEric Blake * @close_fn with an indication of whether the client completed negotiation
3248fb1c2aaaSEric Blake * within @handshake_max_secs seconds (0 for unbounded).
32490c9390d9SEric Blake */
nbd_client_new(QIOChannelSocket * sioc,uint32_t handshake_max_secs,QCryptoTLSCreds * tlscreds,const char * tlsauthz,void (* close_fn)(NBDClient *,bool),void * owner)32507f7dfe2aSVladimir Sementsov-Ogievskiy void nbd_client_new(QIOChannelSocket *sioc,
3251fb1c2aaaSEric Blake uint32_t handshake_max_secs,
3252f95910feSDaniel P. Berrange QCryptoTLSCreds *tlscreds,
3253b25e12daSDaniel P. Berrange const char *tlsauthz,
3254fb1c2aaaSEric Blake void (*close_fn)(NBDClient *, bool),
3255fb1c2aaaSEric Blake void *owner)
32561a6245a5SFam Zheng {
32571a6245a5SFam Zheng NBDClient *client;
3258c84087f2SVladimir Sementsov-Ogievskiy Coroutine *co;
32591a6245a5SFam Zheng
3260e8d3eb74SMarc-André Lureau client = g_new0(NBDClient, 1);
32617075d235SStefan Hajnoczi qemu_mutex_init(&client->lock);
32621a6245a5SFam Zheng client->refcount = 1;
3263f95910feSDaniel P. Berrange client->tlscreds = tlscreds;
3264f95910feSDaniel P. Berrange if (tlscreds) {
3265f95910feSDaniel P. Berrange object_ref(OBJECT(client->tlscreds));
3266f95910feSDaniel P. Berrange }
3267b25e12daSDaniel P. Berrange client->tlsauthz = g_strdup(tlsauthz);
3268fb1c2aaaSEric Blake client->handshake_max_secs = handshake_max_secs;
32691c778ef7SDaniel P. Berrange client->sioc = sioc;
3270f1426881SEric Blake qio_channel_set_delay(QIO_CHANNEL(sioc), false);
32711c778ef7SDaniel P. Berrange object_ref(OBJECT(client->sioc));
32721c778ef7SDaniel P. Berrange client->ioc = QIO_CHANNEL(sioc);
32731c778ef7SDaniel P. Berrange object_ref(OBJECT(client->ioc));
32740c9390d9SEric Blake client->close_fn = close_fn;
3275fb1c2aaaSEric Blake client->owner = owner;
32761a6245a5SFam Zheng
3277c84087f2SVladimir Sementsov-Ogievskiy co = qemu_coroutine_create(nbd_co_client_start, client);
3278c84087f2SVladimir Sementsov-Ogievskiy qemu_coroutine_enter(co);
3279798bfe00SFam Zheng }
3280fb1c2aaaSEric Blake
3281fb1c2aaaSEric Blake void *
nbd_client_owner(NBDClient * client)3282fb1c2aaaSEric Blake nbd_client_owner(NBDClient *client)
3283fb1c2aaaSEric Blake {
3284fb1c2aaaSEric Blake return client->owner;
3285fb1c2aaaSEric Blake }
3286