xref: /openbmc/qemu/nbd/client.c (revision 9c489ea6)
1 /*
2  *  Copyright (C) 2016-2017 Red Hat, Inc.
3  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
4  *
5  *  Network Block Device Client Side
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; under version 2 of the License.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qapi/error.h"
22 #include "trace.h"
23 #include "nbd-internal.h"
24 
25 static int nbd_errno_to_system_errno(int err)
26 {
27     int ret;
28     switch (err) {
29     case NBD_SUCCESS:
30         ret = 0;
31         break;
32     case NBD_EPERM:
33         ret = EPERM;
34         break;
35     case NBD_EIO:
36         ret = EIO;
37         break;
38     case NBD_ENOMEM:
39         ret = ENOMEM;
40         break;
41     case NBD_ENOSPC:
42         ret = ENOSPC;
43         break;
44     case NBD_ESHUTDOWN:
45         ret = ESHUTDOWN;
46         break;
47     default:
48         trace_nbd_unknown_error(err);
49         /* fallthrough */
50     case NBD_EINVAL:
51         ret = EINVAL;
52         break;
53     }
54     return ret;
55 }
56 
57 /* Definitions for opaque data types */
58 
59 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
60 
61 /* That's all folks */
62 
63 /* Basic flow for negotiation
64 
65    Server         Client
66    Negotiate
67 
68    or
69 
70    Server         Client
71    Negotiate #1
72                   Option
73    Negotiate #2
74 
75    ----
76 
77    followed by
78 
79    Server         Client
80                   Request
81    Response
82                   Request
83    Response
84                   ...
85    ...
86                   Request (type == 2)
87 
88 */
89 
90 /* Send an option request.
91  *
92  * The request is for option @opt, with @data containing @len bytes of
93  * additional payload for the request (@len may be -1 to treat @data as
94  * a C string; and @data may be NULL if @len is 0).
95  * Return 0 if successful, -1 with errp set if it is impossible to
96  * continue. */
97 static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
98                                    uint32_t len, const char *data,
99                                    Error **errp)
100 {
101     nbd_option req;
102     QEMU_BUILD_BUG_ON(sizeof(req) != 16);
103 
104     if (len == -1) {
105         req.length = len = strlen(data);
106     }
107     trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
108 
109     stq_be_p(&req.magic, NBD_OPTS_MAGIC);
110     stl_be_p(&req.option, opt);
111     stl_be_p(&req.length, len);
112 
113     if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
114         error_prepend(errp, "Failed to send option request header");
115         return -1;
116     }
117 
118     if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
119         error_prepend(errp, "Failed to send option request data");
120         return -1;
121     }
122 
123     return 0;
124 }
125 
126 /* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
127  * not going to attempt further negotiation. */
128 static void nbd_send_opt_abort(QIOChannel *ioc)
129 {
130     /* Technically, a compliant server is supposed to reply to us; but
131      * older servers disconnected instead. At any rate, we're allowed
132      * to disconnect without waiting for the server reply, so we don't
133      * even care if the request makes it to the server, let alone
134      * waiting around for whether the server replies. */
135     nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
136 }
137 
138 
139 /* Receive the header of an option reply, which should match the given
140  * opt.  Read through the length field, but NOT the length bytes of
141  * payload. Return 0 if successful, -1 with errp set if it is
142  * impossible to continue. */
143 static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
144                                     nbd_opt_reply *reply, Error **errp)
145 {
146     QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
147     if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) {
148         error_prepend(errp, "failed to read option reply");
149         nbd_send_opt_abort(ioc);
150         return -1;
151     }
152     be64_to_cpus(&reply->magic);
153     be32_to_cpus(&reply->option);
154     be32_to_cpus(&reply->type);
155     be32_to_cpus(&reply->length);
156 
157     trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
158                                    reply->type, nbd_rep_lookup(reply->type),
159                                    reply->length);
160 
161     if (reply->magic != NBD_REP_MAGIC) {
162         error_setg(errp, "Unexpected option reply magic");
163         nbd_send_opt_abort(ioc);
164         return -1;
165     }
166     if (reply->option != opt) {
167         error_setg(errp, "Unexpected option type %x expected %x",
168                    reply->option, opt);
169         nbd_send_opt_abort(ioc);
170         return -1;
171     }
172     return 0;
173 }
174 
175 /* If reply represents success, return 1 without further action.
176  * If reply represents an error, consume the optional payload of
177  * the packet on ioc.  Then return 0 for unsupported (so the client
178  * can fall back to other approaches), or -1 with errp set for other
179  * errors.
180  */
181 static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
182                                 Error **errp)
183 {
184     char *msg = NULL;
185     int result = -1;
186 
187     if (!(reply->type & (1 << 31))) {
188         return 1;
189     }
190 
191     if (reply->length) {
192         if (reply->length > NBD_MAX_BUFFER_SIZE) {
193             error_setg(errp, "server error 0x%" PRIx32
194                        " (%s) message is too long",
195                        reply->type, nbd_rep_lookup(reply->type));
196             goto cleanup;
197         }
198         msg = g_malloc(reply->length + 1);
199         if (nbd_read(ioc, msg, reply->length, errp) < 0) {
200             error_prepend(errp, "failed to read option error 0x%" PRIx32
201                           " (%s) message",
202                           reply->type, nbd_rep_lookup(reply->type));
203             goto cleanup;
204         }
205         msg[reply->length] = '\0';
206     }
207 
208     switch (reply->type) {
209     case NBD_REP_ERR_UNSUP:
210         trace_nbd_reply_err_unsup(reply->option, nbd_opt_lookup(reply->option));
211         result = 0;
212         goto cleanup;
213 
214     case NBD_REP_ERR_POLICY:
215         error_setg(errp, "Denied by server for option %" PRIx32 " (%s)",
216                    reply->option, nbd_opt_lookup(reply->option));
217         break;
218 
219     case NBD_REP_ERR_INVALID:
220         error_setg(errp, "Invalid data length for option %" PRIx32 " (%s)",
221                    reply->option, nbd_opt_lookup(reply->option));
222         break;
223 
224     case NBD_REP_ERR_PLATFORM:
225         error_setg(errp, "Server lacks support for option %" PRIx32 " (%s)",
226                    reply->option, nbd_opt_lookup(reply->option));
227         break;
228 
229     case NBD_REP_ERR_TLS_REQD:
230         error_setg(errp, "TLS negotiation required before option %" PRIx32
231                    " (%s)", reply->option, nbd_opt_lookup(reply->option));
232         break;
233 
234     case NBD_REP_ERR_UNKNOWN:
235         error_setg(errp, "Requested export not available");
236         break;
237 
238     case NBD_REP_ERR_SHUTDOWN:
239         error_setg(errp, "Server shutting down before option %" PRIx32 " (%s)",
240                    reply->option, nbd_opt_lookup(reply->option));
241         break;
242 
243     case NBD_REP_ERR_BLOCK_SIZE_REQD:
244         error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIx32
245                    " (%s)", reply->option, nbd_opt_lookup(reply->option));
246         break;
247 
248     default:
249         error_setg(errp, "Unknown error code when asking for option %" PRIx32
250                    " (%s)", reply->option, nbd_opt_lookup(reply->option));
251         break;
252     }
253 
254     if (msg) {
255         error_append_hint(errp, "server reported: %s\n", msg);
256     }
257 
258  cleanup:
259     g_free(msg);
260     if (result < 0) {
261         nbd_send_opt_abort(ioc);
262     }
263     return result;
264 }
265 
266 /* Process another portion of the NBD_OPT_LIST reply.  Set *@match if
267  * the current reply matches @want or if the server does not support
268  * NBD_OPT_LIST, otherwise leave @match alone.  Return 0 if iteration
269  * is complete, positive if more replies are expected, or negative
270  * with @errp set if an unrecoverable error occurred. */
271 static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
272                             Error **errp)
273 {
274     nbd_opt_reply reply;
275     uint32_t len;
276     uint32_t namelen;
277     char name[NBD_MAX_NAME_SIZE + 1];
278     int error;
279 
280     if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
281         return -1;
282     }
283     error = nbd_handle_reply_err(ioc, &reply, errp);
284     if (error <= 0) {
285         /* The server did not support NBD_OPT_LIST, so set *match on
286          * the assumption that any name will be accepted.  */
287         *match = true;
288         return error;
289     }
290     len = reply.length;
291 
292     if (reply.type == NBD_REP_ACK) {
293         if (len != 0) {
294             error_setg(errp, "length too long for option end");
295             nbd_send_opt_abort(ioc);
296             return -1;
297         }
298         return 0;
299     } else if (reply.type != NBD_REP_SERVER) {
300         error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
301                    reply.type, NBD_REP_SERVER);
302         nbd_send_opt_abort(ioc);
303         return -1;
304     }
305 
306     if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
307         error_setg(errp, "incorrect option length %" PRIu32, len);
308         nbd_send_opt_abort(ioc);
309         return -1;
310     }
311     if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) {
312         error_prepend(errp, "failed to read option name length");
313         nbd_send_opt_abort(ioc);
314         return -1;
315     }
316     namelen = be32_to_cpu(namelen);
317     len -= sizeof(namelen);
318     if (len < namelen) {
319         error_setg(errp, "incorrect option name length");
320         nbd_send_opt_abort(ioc);
321         return -1;
322     }
323     if (namelen != strlen(want)) {
324         if (nbd_drop(ioc, len, errp) < 0) {
325             error_prepend(errp, "failed to skip export name with wrong length");
326             nbd_send_opt_abort(ioc);
327             return -1;
328         }
329         return 1;
330     }
331 
332     assert(namelen < sizeof(name));
333     if (nbd_read(ioc, name, namelen, errp) < 0) {
334         error_prepend(errp, "failed to read export name");
335         nbd_send_opt_abort(ioc);
336         return -1;
337     }
338     name[namelen] = '\0';
339     len -= namelen;
340     if (nbd_drop(ioc, len, errp) < 0) {
341         error_prepend(errp, "failed to read export description");
342         nbd_send_opt_abort(ioc);
343         return -1;
344     }
345     if (!strcmp(name, want)) {
346         *match = true;
347     }
348     return 1;
349 }
350 
351 
352 /* Returns -1 if NBD_OPT_GO proves the export @wantname cannot be
353  * used, 0 if NBD_OPT_GO is unsupported (fall back to NBD_OPT_LIST and
354  * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
355  * go (with @info populated). */
356 static int nbd_opt_go(QIOChannel *ioc, const char *wantname,
357                       NBDExportInfo *info, Error **errp)
358 {
359     nbd_opt_reply reply;
360     uint32_t len = strlen(wantname);
361     uint16_t type;
362     int error;
363     char *buf;
364 
365     /* The protocol requires that the server send NBD_INFO_EXPORT with
366      * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
367      * flags still 0 is a witness of a broken server. */
368     info->flags = 0;
369 
370     trace_nbd_opt_go_start(wantname);
371     buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
372     stl_be_p(buf, len);
373     memcpy(buf + 4, wantname, len);
374     /* At most one request, everything else up to server */
375     stw_be_p(buf + 4 + len, info->request_sizes);
376     if (info->request_sizes) {
377         stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
378     }
379     if (nbd_send_option_request(ioc, NBD_OPT_GO,
380                                 4 + len + 2 + 2 * info->request_sizes, buf,
381                                 errp) < 0) {
382         return -1;
383     }
384 
385     while (1) {
386         if (nbd_receive_option_reply(ioc, NBD_OPT_GO, &reply, errp) < 0) {
387             return -1;
388         }
389         error = nbd_handle_reply_err(ioc, &reply, errp);
390         if (error <= 0) {
391             return error;
392         }
393         len = reply.length;
394 
395         if (reply.type == NBD_REP_ACK) {
396             /* Server is done sending info and moved into transmission
397                phase, but make sure it sent flags */
398             if (len) {
399                 error_setg(errp, "server sent invalid NBD_REP_ACK");
400                 nbd_send_opt_abort(ioc);
401                 return -1;
402             }
403             if (!info->flags) {
404                 error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
405                 nbd_send_opt_abort(ioc);
406                 return -1;
407             }
408             trace_nbd_opt_go_success();
409             return 1;
410         }
411         if (reply.type != NBD_REP_INFO) {
412             error_setg(errp, "unexpected reply type %" PRIx32
413                        " (%s), expected %x",
414                        reply.type, nbd_rep_lookup(reply.type), NBD_REP_INFO);
415             nbd_send_opt_abort(ioc);
416             return -1;
417         }
418         if (len < sizeof(type)) {
419             error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
420                        len);
421             nbd_send_opt_abort(ioc);
422             return -1;
423         }
424         if (nbd_read(ioc, &type, sizeof(type), errp) < 0) {
425             error_prepend(errp, "failed to read info type");
426             nbd_send_opt_abort(ioc);
427             return -1;
428         }
429         len -= sizeof(type);
430         be16_to_cpus(&type);
431         switch (type) {
432         case NBD_INFO_EXPORT:
433             if (len != sizeof(info->size) + sizeof(info->flags)) {
434                 error_setg(errp, "remaining export info len %" PRIu32
435                            " is unexpected size", len);
436                 nbd_send_opt_abort(ioc);
437                 return -1;
438             }
439             if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
440                 error_prepend(errp, "failed to read info size");
441                 nbd_send_opt_abort(ioc);
442                 return -1;
443             }
444             be64_to_cpus(&info->size);
445             if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
446                 error_prepend(errp, "failed to read info flags");
447                 nbd_send_opt_abort(ioc);
448                 return -1;
449             }
450             be16_to_cpus(&info->flags);
451             trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
452             break;
453 
454         case NBD_INFO_BLOCK_SIZE:
455             if (len != sizeof(info->min_block) * 3) {
456                 error_setg(errp, "remaining export info len %" PRIu32
457                            " is unexpected size", len);
458                 nbd_send_opt_abort(ioc);
459                 return -1;
460             }
461             if (nbd_read(ioc, &info->min_block, sizeof(info->min_block),
462                          errp) < 0) {
463                 error_prepend(errp, "failed to read info minimum block size");
464                 nbd_send_opt_abort(ioc);
465                 return -1;
466             }
467             be32_to_cpus(&info->min_block);
468             if (!is_power_of_2(info->min_block)) {
469                 error_setg(errp, "server minimum block size %" PRId32
470                            "is not a power of two", info->min_block);
471                 nbd_send_opt_abort(ioc);
472                 return -1;
473             }
474             if (nbd_read(ioc, &info->opt_block, sizeof(info->opt_block),
475                          errp) < 0) {
476                 error_prepend(errp, "failed to read info preferred block size");
477                 nbd_send_opt_abort(ioc);
478                 return -1;
479             }
480             be32_to_cpus(&info->opt_block);
481             if (!is_power_of_2(info->opt_block) ||
482                 info->opt_block < info->min_block) {
483                 error_setg(errp, "server preferred block size %" PRId32
484                            "is not valid", info->opt_block);
485                 nbd_send_opt_abort(ioc);
486                 return -1;
487             }
488             if (nbd_read(ioc, &info->max_block, sizeof(info->max_block),
489                          errp) < 0) {
490                 error_prepend(errp, "failed to read info maximum block size");
491                 nbd_send_opt_abort(ioc);
492                 return -1;
493             }
494             be32_to_cpus(&info->max_block);
495             trace_nbd_opt_go_info_block_size(info->min_block, info->opt_block,
496                                              info->max_block);
497             break;
498 
499         default:
500             trace_nbd_opt_go_info_unknown(type, nbd_info_lookup(type));
501             if (nbd_drop(ioc, len, errp) < 0) {
502                 error_prepend(errp, "Failed to read info payload");
503                 nbd_send_opt_abort(ioc);
504                 return -1;
505             }
506             break;
507         }
508     }
509 }
510 
511 /* Return -1 on failure, 0 if wantname is an available export. */
512 static int nbd_receive_query_exports(QIOChannel *ioc,
513                                      const char *wantname,
514                                      Error **errp)
515 {
516     bool foundExport = false;
517 
518     trace_nbd_receive_query_exports_start(wantname);
519     if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
520         return -1;
521     }
522 
523     while (1) {
524         int ret = nbd_receive_list(ioc, wantname, &foundExport, errp);
525 
526         if (ret < 0) {
527             /* Server gave unexpected reply */
528             return -1;
529         } else if (ret == 0) {
530             /* Done iterating. */
531             if (!foundExport) {
532                 error_setg(errp, "No export with name '%s' available",
533                            wantname);
534                 nbd_send_opt_abort(ioc);
535                 return -1;
536             }
537             trace_nbd_receive_query_exports_success(wantname);
538             return 0;
539         }
540     }
541 }
542 
543 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
544                                         QCryptoTLSCreds *tlscreds,
545                                         const char *hostname, Error **errp)
546 {
547     nbd_opt_reply reply;
548     QIOChannelTLS *tioc;
549     struct NBDTLSHandshakeData data = { 0 };
550 
551     trace_nbd_receive_starttls_request();
552     if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
553         return NULL;
554     }
555 
556     trace_nbd_receive_starttls_reply();
557     if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
558         return NULL;
559     }
560 
561     if (reply.type != NBD_REP_ACK) {
562         error_setg(errp, "Server rejected request to start TLS %" PRIx32,
563                    reply.type);
564         nbd_send_opt_abort(ioc);
565         return NULL;
566     }
567 
568     if (reply.length != 0) {
569         error_setg(errp, "Start TLS response was not zero %" PRIu32,
570                    reply.length);
571         nbd_send_opt_abort(ioc);
572         return NULL;
573     }
574 
575     trace_nbd_receive_starttls_new_client();
576     tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
577     if (!tioc) {
578         return NULL;
579     }
580     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
581     data.loop = g_main_loop_new(g_main_context_default(), FALSE);
582     trace_nbd_receive_starttls_tls_handshake();
583     qio_channel_tls_handshake(tioc,
584                               nbd_tls_handshake,
585                               &data,
586                               NULL);
587 
588     if (!data.complete) {
589         g_main_loop_run(data.loop);
590     }
591     g_main_loop_unref(data.loop);
592     if (data.error) {
593         error_propagate(errp, data.error);
594         object_unref(OBJECT(tioc));
595         return NULL;
596     }
597 
598     return QIO_CHANNEL(tioc);
599 }
600 
601 
602 int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
603                           QCryptoTLSCreds *tlscreds, const char *hostname,
604                           QIOChannel **outioc, NBDExportInfo *info,
605                           Error **errp)
606 {
607     char buf[256];
608     uint64_t magic;
609     int rc;
610     bool zeroes = true;
611 
612     trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");
613 
614     rc = -EINVAL;
615 
616     if (outioc) {
617         *outioc = NULL;
618     }
619     if (tlscreds && !outioc) {
620         error_setg(errp, "Output I/O channel required for TLS");
621         goto fail;
622     }
623 
624     if (nbd_read(ioc, buf, 8, errp) < 0) {
625         error_prepend(errp, "Failed to read data");
626         goto fail;
627     }
628 
629     buf[8] = '\0';
630     if (strlen(buf) == 0) {
631         error_setg(errp, "Server connection closed unexpectedly");
632         goto fail;
633     }
634 
635     magic = ldq_be_p(buf);
636     trace_nbd_receive_negotiate_magic(magic);
637 
638     if (memcmp(buf, "NBDMAGIC", 8) != 0) {
639         error_setg(errp, "Invalid magic received");
640         goto fail;
641     }
642 
643     if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
644         error_prepend(errp, "Failed to read magic");
645         goto fail;
646     }
647     magic = be64_to_cpu(magic);
648     trace_nbd_receive_negotiate_magic(magic);
649 
650     if (magic == NBD_OPTS_MAGIC) {
651         uint32_t clientflags = 0;
652         uint16_t globalflags;
653         bool fixedNewStyle = false;
654 
655         if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
656             error_prepend(errp, "Failed to read server flags");
657             goto fail;
658         }
659         globalflags = be16_to_cpu(globalflags);
660         trace_nbd_receive_negotiate_server_flags(globalflags);
661         if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
662             fixedNewStyle = true;
663             clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
664         }
665         if (globalflags & NBD_FLAG_NO_ZEROES) {
666             zeroes = false;
667             clientflags |= NBD_FLAG_C_NO_ZEROES;
668         }
669         /* client requested flags */
670         clientflags = cpu_to_be32(clientflags);
671         if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
672             error_prepend(errp, "Failed to send clientflags field");
673             goto fail;
674         }
675         if (tlscreds) {
676             if (fixedNewStyle) {
677                 *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
678                 if (!*outioc) {
679                     goto fail;
680                 }
681                 ioc = *outioc;
682             } else {
683                 error_setg(errp, "Server does not support STARTTLS");
684                 goto fail;
685             }
686         }
687         if (!name) {
688             trace_nbd_receive_negotiate_default_name();
689             name = "";
690         }
691         if (fixedNewStyle) {
692             int result;
693 
694             /* Try NBD_OPT_GO first - if it works, we are done (it
695              * also gives us a good message if the server requires
696              * TLS).  If it is not available, fall back to
697              * NBD_OPT_LIST for nicer error messages about a missing
698              * export, then use NBD_OPT_EXPORT_NAME.  */
699             result = nbd_opt_go(ioc, name, info, errp);
700             if (result < 0) {
701                 goto fail;
702             }
703             if (result > 0) {
704                 return 0;
705             }
706             /* Check our desired export is present in the
707              * server export list. Since NBD_OPT_EXPORT_NAME
708              * cannot return an error message, running this
709              * query gives us better error reporting if the
710              * export name is not available.
711              */
712             if (nbd_receive_query_exports(ioc, name, errp) < 0) {
713                 goto fail;
714             }
715         }
716         /* write the export name request */
717         if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name,
718                                     errp) < 0) {
719             goto fail;
720         }
721 
722         /* Read the response */
723         if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
724             error_prepend(errp, "Failed to read export length");
725             goto fail;
726         }
727         be64_to_cpus(&info->size);
728 
729         if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
730             error_prepend(errp, "Failed to read export flags");
731             goto fail;
732         }
733         be16_to_cpus(&info->flags);
734     } else if (magic == NBD_CLIENT_MAGIC) {
735         uint32_t oldflags;
736 
737         if (name) {
738             error_setg(errp, "Server does not support export names");
739             goto fail;
740         }
741         if (tlscreds) {
742             error_setg(errp, "Server does not support STARTTLS");
743             goto fail;
744         }
745 
746         if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
747             error_prepend(errp, "Failed to read export length");
748             goto fail;
749         }
750         be64_to_cpus(&info->size);
751 
752         if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
753             error_prepend(errp, "Failed to read export flags");
754             goto fail;
755         }
756         be32_to_cpus(&oldflags);
757         if (oldflags & ~0xffff) {
758             error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
759             goto fail;
760         }
761         info->flags = oldflags;
762     } else {
763         error_setg(errp, "Bad magic received");
764         goto fail;
765     }
766 
767     trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
768     if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
769         error_prepend(errp, "Failed to read reserved block");
770         goto fail;
771     }
772     rc = 0;
773 
774 fail:
775     return rc;
776 }
777 
778 #ifdef __linux__
779 int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
780              Error **errp)
781 {
782     unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
783     unsigned long sectors = info->size / sector_size;
784 
785     /* FIXME: Once the kernel module is patched to honor block sizes,
786      * and to advertise that fact to user space, we should update the
787      * hand-off to the kernel to use any block sizes we learned. */
788     assert(!info->request_sizes);
789     if (info->size / sector_size != sectors) {
790         error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
791                    info->size);
792         return -E2BIG;
793     }
794 
795     trace_nbd_init_set_socket();
796 
797     if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
798         int serrno = errno;
799         error_setg(errp, "Failed to set NBD socket");
800         return -serrno;
801     }
802 
803     trace_nbd_init_set_block_size(sector_size);
804 
805     if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
806         int serrno = errno;
807         error_setg(errp, "Failed setting NBD block size");
808         return -serrno;
809     }
810 
811     trace_nbd_init_set_size(sectors);
812     if (info->size % sector_size) {
813         trace_nbd_init_trailing_bytes(info->size % sector_size);
814     }
815 
816     if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
817         int serrno = errno;
818         error_setg(errp, "Failed setting size (in blocks)");
819         return -serrno;
820     }
821 
822     if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
823         if (errno == ENOTTY) {
824             int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
825             trace_nbd_init_set_readonly();
826 
827             if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
828                 int serrno = errno;
829                 error_setg(errp, "Failed setting read-only attribute");
830                 return -serrno;
831             }
832         } else {
833             int serrno = errno;
834             error_setg(errp, "Failed setting flags");
835             return -serrno;
836         }
837     }
838 
839     trace_nbd_init_finish();
840 
841     return 0;
842 }
843 
844 int nbd_client(int fd)
845 {
846     int ret;
847     int serrno;
848 
849     trace_nbd_client_loop();
850 
851     ret = ioctl(fd, NBD_DO_IT);
852     if (ret < 0 && errno == EPIPE) {
853         /* NBD_DO_IT normally returns EPIPE when someone has disconnected
854          * the socket via NBD_DISCONNECT.  We do not want to return 1 in
855          * that case.
856          */
857         ret = 0;
858     }
859     serrno = errno;
860 
861     trace_nbd_client_loop_ret(ret, strerror(serrno));
862 
863     trace_nbd_client_clear_queue();
864     ioctl(fd, NBD_CLEAR_QUE);
865 
866     trace_nbd_client_clear_socket();
867     ioctl(fd, NBD_CLEAR_SOCK);
868 
869     errno = serrno;
870     return ret;
871 }
872 
873 int nbd_disconnect(int fd)
874 {
875     ioctl(fd, NBD_CLEAR_QUE);
876     ioctl(fd, NBD_DISCONNECT);
877     ioctl(fd, NBD_CLEAR_SOCK);
878     return 0;
879 }
880 
881 #else
882 int nbd_init(int fd, QIOChannelSocket *ioc, NBDExportInfo *info,
883 	     Error **errp)
884 {
885     error_setg(errp, "nbd_init is only supported on Linux");
886     return -ENOTSUP;
887 }
888 
889 int nbd_client(int fd)
890 {
891     return -ENOTSUP;
892 }
893 int nbd_disconnect(int fd)
894 {
895     return -ENOTSUP;
896 }
897 #endif
898 
899 ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
900 {
901     uint8_t buf[NBD_REQUEST_SIZE];
902 
903     trace_nbd_send_request(request->from, request->len, request->handle,
904                            request->flags, request->type,
905                            nbd_cmd_lookup(request->type));
906 
907     stl_be_p(buf, NBD_REQUEST_MAGIC);
908     stw_be_p(buf + 4, request->flags);
909     stw_be_p(buf + 6, request->type);
910     stq_be_p(buf + 8, request->handle);
911     stq_be_p(buf + 16, request->from);
912     stl_be_p(buf + 24, request->len);
913 
914     return nbd_write(ioc, buf, sizeof(buf), NULL);
915 }
916 
917 ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
918 {
919     uint8_t buf[NBD_REPLY_SIZE];
920     uint32_t magic;
921     ssize_t ret;
922 
923     ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
924     if (ret <= 0) {
925         return ret;
926     }
927 
928     if (ret != sizeof(buf)) {
929         error_setg(errp, "read failed");
930         return -EINVAL;
931     }
932 
933     /* Reply
934        [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
935        [ 4 ..  7]    error   (0 == no error)
936        [ 7 .. 15]    handle
937      */
938 
939     magic = ldl_be_p(buf);
940     reply->error  = ldl_be_p(buf + 4);
941     reply->handle = ldq_be_p(buf + 8);
942 
943     reply->error = nbd_errno_to_system_errno(reply->error);
944 
945     if (reply->error == ESHUTDOWN) {
946         /* This works even on mingw which lacks a native ESHUTDOWN */
947         error_setg(errp, "server shutting down");
948         return -EINVAL;
949     }
950     trace_nbd_receive_reply(magic, reply->error, reply->handle);
951 
952     if (magic != NBD_REPLY_MAGIC) {
953         error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
954         return -EINVAL;
955     }
956     return sizeof(buf);
957 }
958 
959