xref: /openbmc/qemu/block/iscsi.c (revision 89de4b91)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "scsi/constants.h"
38 #include "qemu/iov.h"
39 #include "qemu/uuid.h"
40 #include "qmp-commands.h"
41 #include "qapi/qmp/qstring.h"
42 #include "crypto/secret.h"
43 #include "scsi/utils.h"
44 
45 /* Conflict between scsi/utils.h and libiscsi! :( */
46 #define SCSI_XFER_NONE ISCSI_XFER_NONE
47 #include <iscsi/iscsi.h>
48 #include <iscsi/scsi-lowlevel.h>
49 #undef SCSI_XFER_NONE
50 QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE);
51 
52 #ifdef __linux__
53 #include <scsi/sg.h>
54 #endif
55 
56 typedef struct IscsiLun {
57     struct iscsi_context *iscsi;
58     AioContext *aio_context;
59     int lun;
60     enum scsi_inquiry_peripheral_device_type type;
61     int block_size;
62     uint64_t num_blocks;
63     int events;
64     QEMUTimer *nop_timer;
65     QEMUTimer *event_timer;
66     QemuMutex mutex;
67     struct scsi_inquiry_logical_block_provisioning lbp;
68     struct scsi_inquiry_block_limits bl;
69     unsigned char *zeroblock;
70     /* The allocmap tracks which clusters (pages) on the iSCSI target are
71      * allocated and which are not. In case a target returns zeros for
72      * unallocated pages (iscsilun->lprz) we can directly return zeros instead
73      * of reading zeros over the wire if a read request falls within an
74      * unallocated block. As there are 3 possible states we need 2 bitmaps to
75      * track. allocmap_valid keeps track if QEMU's information about a page is
76      * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
77      * valid information about a page the corresponding allocmap entry should be
78      * switched to unallocated as well to force a new lookup of the allocation
79      * status as lookups are generally skipped if a page is suspect to be
80      * allocated. If a iSCSI target is opened with cache.direct = on the
81      * allocmap_valid does not exist turning all cached information invalid so
82      * that a fresh lookup is made for any page even if allocmap entry returns
83      * it's unallocated. */
84     unsigned long *allocmap;
85     unsigned long *allocmap_valid;
86     long allocmap_size;
87     int cluster_sectors;
88     bool use_16_for_rw;
89     bool write_protected;
90     bool lbpme;
91     bool lbprz;
92     bool dpofua;
93     bool has_write_same;
94     bool request_timed_out;
95 } IscsiLun;
96 
97 typedef struct IscsiTask {
98     int status;
99     int complete;
100     int retries;
101     int do_retry;
102     struct scsi_task *task;
103     Coroutine *co;
104     IscsiLun *iscsilun;
105     QEMUTimer retry_timer;
106     int err_code;
107 } IscsiTask;
108 
109 typedef struct IscsiAIOCB {
110     BlockAIOCB common;
111     QEMUBH *bh;
112     IscsiLun *iscsilun;
113     struct scsi_task *task;
114     uint8_t *buf;
115     int status;
116     int64_t sector_num;
117     int nb_sectors;
118     int ret;
119 #ifdef __linux__
120     sg_io_hdr_t *ioh;
121 #endif
122 } IscsiAIOCB;
123 
124 /* libiscsi uses time_t so its enough to process events every second */
125 #define EVENT_INTERVAL 1000
126 #define NOP_INTERVAL 5000
127 #define MAX_NOP_FAILURES 3
128 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
129 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
130 
131 /* this threshold is a trade-off knob to choose between
132  * the potential additional overhead of an extra GET_LBA_STATUS request
133  * vs. unnecessarily reading a lot of zero sectors over the wire.
134  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
135  * sectors we check the allocation status of the area covered by the
136  * request first if the allocationmap indicates that the area might be
137  * unallocated. */
138 #define ISCSI_CHECKALLOC_THRES 64
139 
140 static void
141 iscsi_bh_cb(void *p)
142 {
143     IscsiAIOCB *acb = p;
144 
145     qemu_bh_delete(acb->bh);
146 
147     g_free(acb->buf);
148     acb->buf = NULL;
149 
150     acb->common.cb(acb->common.opaque, acb->status);
151 
152     if (acb->task != NULL) {
153         scsi_free_scsi_task(acb->task);
154         acb->task = NULL;
155     }
156 
157     qemu_aio_unref(acb);
158 }
159 
160 static void
161 iscsi_schedule_bh(IscsiAIOCB *acb)
162 {
163     if (acb->bh) {
164         return;
165     }
166     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
167     qemu_bh_schedule(acb->bh);
168 }
169 
170 static void iscsi_co_generic_bh_cb(void *opaque)
171 {
172     struct IscsiTask *iTask = opaque;
173 
174     iTask->complete = 1;
175     aio_co_wake(iTask->co);
176 }
177 
178 static void iscsi_retry_timer_expired(void *opaque)
179 {
180     struct IscsiTask *iTask = opaque;
181     iTask->complete = 1;
182     if (iTask->co) {
183         aio_co_wake(iTask->co);
184     }
185 }
186 
187 static inline unsigned exp_random(double mean)
188 {
189     return -mean * log((double)rand() / RAND_MAX);
190 }
191 
192 /* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
193  * libiscsi 1.10.0, together with other constants we need.  Use it as
194  * a hint that we have to define them ourselves if needed, to keep the
195  * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
196  * the test because SCSI_STATUS_* is an enum.
197  *
198  * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
199  * an enum, check against the LIBISCSI_API_VERSION macro, which was
200  * introduced in 1.11.0.  If it is present, there is no need to define
201  * anything.
202  */
203 #if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
204     !defined(LIBISCSI_API_VERSION)
205 #define SCSI_STATUS_TASK_SET_FULL                          0x28
206 #define SCSI_STATUS_TIMEOUT                                0x0f000002
207 #define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
208 #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
209 #endif
210 
211 #ifndef LIBISCSI_API_VERSION
212 #define LIBISCSI_API_VERSION 20130701
213 #endif
214 
215 static int iscsi_translate_sense(struct scsi_sense *sense)
216 {
217     return - scsi_sense_to_errno(sense->key,
218                                  (sense->ascq & 0xFF00) >> 8,
219                                  sense->ascq & 0xFF);
220 }
221 
222 /* Called (via iscsi_service) with QemuMutex held.  */
223 static void
224 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
225                         void *command_data, void *opaque)
226 {
227     struct IscsiTask *iTask = opaque;
228     struct scsi_task *task = command_data;
229 
230     iTask->status = status;
231     iTask->do_retry = 0;
232     iTask->task = task;
233 
234     if (status != SCSI_STATUS_GOOD) {
235         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
236             if (status == SCSI_STATUS_CHECK_CONDITION
237                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
238                 error_report("iSCSI CheckCondition: %s",
239                              iscsi_get_error(iscsi));
240                 iTask->do_retry = 1;
241                 goto out;
242             }
243             if (status == SCSI_STATUS_BUSY ||
244                 status == SCSI_STATUS_TIMEOUT ||
245                 status == SCSI_STATUS_TASK_SET_FULL) {
246                 unsigned retry_time =
247                     exp_random(iscsi_retry_times[iTask->retries - 1]);
248                 if (status == SCSI_STATUS_TIMEOUT) {
249                     /* make sure the request is rescheduled AFTER the
250                      * reconnect is initiated */
251                     retry_time = EVENT_INTERVAL * 2;
252                     iTask->iscsilun->request_timed_out = true;
253                 }
254                 error_report("iSCSI Busy/TaskSetFull/TimeOut"
255                              " (retry #%u in %u ms): %s",
256                              iTask->retries, retry_time,
257                              iscsi_get_error(iscsi));
258                 aio_timer_init(iTask->iscsilun->aio_context,
259                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
260                                SCALE_MS, iscsi_retry_timer_expired, iTask);
261                 timer_mod(&iTask->retry_timer,
262                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
263                 iTask->do_retry = 1;
264                 return;
265             }
266         }
267         iTask->err_code = iscsi_translate_sense(&task->sense);
268         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
269     }
270 
271 out:
272     if (iTask->co) {
273         aio_bh_schedule_oneshot(iTask->iscsilun->aio_context,
274                                  iscsi_co_generic_bh_cb, iTask);
275     } else {
276         iTask->complete = 1;
277     }
278 }
279 
280 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
281 {
282     *iTask = (struct IscsiTask) {
283         .co         = qemu_coroutine_self(),
284         .iscsilun   = iscsilun,
285     };
286 }
287 
288 static void
289 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
290                     void *private_data)
291 {
292     IscsiAIOCB *acb = private_data;
293 
294     acb->status = -ECANCELED;
295     iscsi_schedule_bh(acb);
296 }
297 
298 static void
299 iscsi_aio_cancel(BlockAIOCB *blockacb)
300 {
301     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
302     IscsiLun *iscsilun = acb->iscsilun;
303 
304     if (acb->status != -EINPROGRESS) {
305         return;
306     }
307 
308     /* send a task mgmt call to the target to cancel the task on the target */
309     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
310                                      iscsi_abort_task_cb, acb);
311 
312 }
313 
314 static const AIOCBInfo iscsi_aiocb_info = {
315     .aiocb_size         = sizeof(IscsiAIOCB),
316     .cancel_async       = iscsi_aio_cancel,
317 };
318 
319 
320 static void iscsi_process_read(void *arg);
321 static void iscsi_process_write(void *arg);
322 
323 /* Called with QemuMutex held.  */
324 static void
325 iscsi_set_events(IscsiLun *iscsilun)
326 {
327     struct iscsi_context *iscsi = iscsilun->iscsi;
328     int ev = iscsi_which_events(iscsi);
329 
330     if (ev != iscsilun->events) {
331         aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
332                            false,
333                            (ev & POLLIN) ? iscsi_process_read : NULL,
334                            (ev & POLLOUT) ? iscsi_process_write : NULL,
335                            NULL,
336                            iscsilun);
337         iscsilun->events = ev;
338     }
339 }
340 
341 static void iscsi_timed_check_events(void *opaque)
342 {
343     IscsiLun *iscsilun = opaque;
344 
345     /* check for timed out requests */
346     iscsi_service(iscsilun->iscsi, 0);
347 
348     if (iscsilun->request_timed_out) {
349         iscsilun->request_timed_out = false;
350         iscsi_reconnect(iscsilun->iscsi);
351     }
352 
353     /* newer versions of libiscsi may return zero events. Ensure we are able
354      * to return to service once this situation changes. */
355     iscsi_set_events(iscsilun);
356 
357     timer_mod(iscsilun->event_timer,
358               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
359 }
360 
361 static void
362 iscsi_process_read(void *arg)
363 {
364     IscsiLun *iscsilun = arg;
365     struct iscsi_context *iscsi = iscsilun->iscsi;
366 
367     qemu_mutex_lock(&iscsilun->mutex);
368     iscsi_service(iscsi, POLLIN);
369     iscsi_set_events(iscsilun);
370     qemu_mutex_unlock(&iscsilun->mutex);
371 }
372 
373 static void
374 iscsi_process_write(void *arg)
375 {
376     IscsiLun *iscsilun = arg;
377     struct iscsi_context *iscsi = iscsilun->iscsi;
378 
379     qemu_mutex_lock(&iscsilun->mutex);
380     iscsi_service(iscsi, POLLOUT);
381     iscsi_set_events(iscsilun);
382     qemu_mutex_unlock(&iscsilun->mutex);
383 }
384 
385 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
386 {
387     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
388 }
389 
390 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
391 {
392     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
393 }
394 
395 static bool is_byte_request_lun_aligned(int64_t offset, int count,
396                                         IscsiLun *iscsilun)
397 {
398     if (offset % iscsilun->block_size || count % iscsilun->block_size) {
399         error_report("iSCSI misaligned request: "
400                      "iscsilun->block_size %u, offset %" PRIi64
401                      ", count %d",
402                      iscsilun->block_size, offset, count);
403         return false;
404     }
405     return true;
406 }
407 
408 static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
409                                           IscsiLun *iscsilun)
410 {
411     assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
412     return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
413                                        nb_sectors << BDRV_SECTOR_BITS,
414                                        iscsilun);
415 }
416 
417 static void iscsi_allocmap_free(IscsiLun *iscsilun)
418 {
419     g_free(iscsilun->allocmap);
420     g_free(iscsilun->allocmap_valid);
421     iscsilun->allocmap = NULL;
422     iscsilun->allocmap_valid = NULL;
423 }
424 
425 
426 static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
427 {
428     iscsi_allocmap_free(iscsilun);
429 
430     iscsilun->allocmap_size =
431         DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
432                      iscsilun->cluster_sectors);
433 
434     iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
435     if (!iscsilun->allocmap) {
436         return -ENOMEM;
437     }
438 
439     if (open_flags & BDRV_O_NOCACHE) {
440         /* in case that cache.direct = on all allocmap entries are
441          * treated as invalid to force a relookup of the block
442          * status on every read request */
443         return 0;
444     }
445 
446     iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
447     if (!iscsilun->allocmap_valid) {
448         /* if we are under memory pressure free the allocmap as well */
449         iscsi_allocmap_free(iscsilun);
450         return -ENOMEM;
451     }
452 
453     return 0;
454 }
455 
456 static void
457 iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
458                       int nb_sectors, bool allocated, bool valid)
459 {
460     int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
461 
462     if (iscsilun->allocmap == NULL) {
463         return;
464     }
465     /* expand to entirely contain all affected clusters */
466     cl_num_expanded = sector_num / iscsilun->cluster_sectors;
467     nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
468                                    iscsilun->cluster_sectors) - cl_num_expanded;
469     /* shrink to touch only completely contained clusters */
470     cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
471     nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
472                       - cl_num_shrunk;
473     if (allocated) {
474         bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
475     } else {
476         if (nb_cls_shrunk > 0) {
477             bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
478         }
479     }
480 
481     if (iscsilun->allocmap_valid == NULL) {
482         return;
483     }
484     if (valid) {
485         if (nb_cls_shrunk > 0) {
486             bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
487         }
488     } else {
489         bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
490                      nb_cls_expanded);
491     }
492 }
493 
494 static void
495 iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
496                              int nb_sectors)
497 {
498     iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
499 }
500 
501 static void
502 iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
503                                int nb_sectors)
504 {
505     /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
506      * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
507      */
508     iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
509 }
510 
511 static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
512                                        int nb_sectors)
513 {
514     iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
515 }
516 
517 static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
518 {
519     if (iscsilun->allocmap) {
520         bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
521     }
522     if (iscsilun->allocmap_valid) {
523         bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
524     }
525 }
526 
527 static inline bool
528 iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
529                             int nb_sectors)
530 {
531     unsigned long size;
532     if (iscsilun->allocmap == NULL) {
533         return true;
534     }
535     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
536     return !(find_next_bit(iscsilun->allocmap, size,
537                            sector_num / iscsilun->cluster_sectors) == size);
538 }
539 
540 static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
541                                            int64_t sector_num, int nb_sectors)
542 {
543     unsigned long size;
544     if (iscsilun->allocmap_valid == NULL) {
545         return false;
546     }
547     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
548     return (find_next_zero_bit(iscsilun->allocmap_valid, size,
549                                sector_num / iscsilun->cluster_sectors) == size);
550 }
551 
552 static int coroutine_fn
553 iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
554                       QEMUIOVector *iov, int flags)
555 {
556     IscsiLun *iscsilun = bs->opaque;
557     struct IscsiTask iTask;
558     uint64_t lba;
559     uint32_t num_sectors;
560     bool fua = flags & BDRV_REQ_FUA;
561     int r = 0;
562 
563     if (fua) {
564         assert(iscsilun->dpofua);
565     }
566     if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
567         return -EINVAL;
568     }
569 
570     if (bs->bl.max_transfer) {
571         assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
572     }
573 
574     lba = sector_qemu2lun(sector_num, iscsilun);
575     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
576     iscsi_co_init_iscsitask(iscsilun, &iTask);
577     qemu_mutex_lock(&iscsilun->mutex);
578 retry:
579     if (iscsilun->use_16_for_rw) {
580 #if LIBISCSI_API_VERSION >= (20160603)
581         iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
582                                             NULL, num_sectors * iscsilun->block_size,
583                                             iscsilun->block_size, 0, 0, fua, 0, 0,
584                                             iscsi_co_generic_cb, &iTask,
585                                             (struct scsi_iovec *)iov->iov, iov->niov);
586     } else {
587         iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
588                                             NULL, num_sectors * iscsilun->block_size,
589                                             iscsilun->block_size, 0, 0, fua, 0, 0,
590                                             iscsi_co_generic_cb, &iTask,
591                                             (struct scsi_iovec *)iov->iov, iov->niov);
592     }
593 #else
594         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
595                                         NULL, num_sectors * iscsilun->block_size,
596                                         iscsilun->block_size, 0, 0, fua, 0, 0,
597                                         iscsi_co_generic_cb, &iTask);
598     } else {
599         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
600                                         NULL, num_sectors * iscsilun->block_size,
601                                         iscsilun->block_size, 0, 0, fua, 0, 0,
602                                         iscsi_co_generic_cb, &iTask);
603     }
604 #endif
605     if (iTask.task == NULL) {
606         qemu_mutex_unlock(&iscsilun->mutex);
607         return -ENOMEM;
608     }
609 #if LIBISCSI_API_VERSION < (20160603)
610     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
611                           iov->niov);
612 #endif
613     while (!iTask.complete) {
614         iscsi_set_events(iscsilun);
615         qemu_mutex_unlock(&iscsilun->mutex);
616         qemu_coroutine_yield();
617         qemu_mutex_lock(&iscsilun->mutex);
618     }
619 
620     if (iTask.task != NULL) {
621         scsi_free_scsi_task(iTask.task);
622         iTask.task = NULL;
623     }
624 
625     if (iTask.do_retry) {
626         iTask.complete = 0;
627         goto retry;
628     }
629 
630     if (iTask.status != SCSI_STATUS_GOOD) {
631         iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
632         r = iTask.err_code;
633         goto out_unlock;
634     }
635 
636     iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
637 
638 out_unlock:
639     qemu_mutex_unlock(&iscsilun->mutex);
640     return r;
641 }
642 
643 
644 
645 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
646                                                   int64_t sector_num,
647                                                   int nb_sectors, int *pnum,
648                                                   BlockDriverState **file)
649 {
650     IscsiLun *iscsilun = bs->opaque;
651     struct scsi_get_lba_status *lbas = NULL;
652     struct scsi_lba_status_descriptor *lbasd = NULL;
653     struct IscsiTask iTask;
654     int64_t ret;
655 
656     iscsi_co_init_iscsitask(iscsilun, &iTask);
657 
658     if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
659         ret = -EINVAL;
660         goto out;
661     }
662 
663     /* default to all sectors allocated */
664     ret = BDRV_BLOCK_DATA;
665     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
666     *pnum = nb_sectors;
667 
668     /* LUN does not support logical block provisioning */
669     if (!iscsilun->lbpme) {
670         goto out;
671     }
672 
673     qemu_mutex_lock(&iscsilun->mutex);
674 retry:
675     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
676                                   sector_qemu2lun(sector_num, iscsilun),
677                                   8 + 16, iscsi_co_generic_cb,
678                                   &iTask) == NULL) {
679         ret = -ENOMEM;
680         goto out_unlock;
681     }
682 
683     while (!iTask.complete) {
684         iscsi_set_events(iscsilun);
685         qemu_mutex_unlock(&iscsilun->mutex);
686         qemu_coroutine_yield();
687         qemu_mutex_lock(&iscsilun->mutex);
688     }
689 
690     if (iTask.do_retry) {
691         if (iTask.task != NULL) {
692             scsi_free_scsi_task(iTask.task);
693             iTask.task = NULL;
694         }
695         iTask.complete = 0;
696         goto retry;
697     }
698 
699     if (iTask.status != SCSI_STATUS_GOOD) {
700         /* in case the get_lba_status_callout fails (i.e.
701          * because the device is busy or the cmd is not
702          * supported) we pretend all blocks are allocated
703          * for backwards compatibility */
704         goto out_unlock;
705     }
706 
707     lbas = scsi_datain_unmarshall(iTask.task);
708     if (lbas == NULL) {
709         ret = -EIO;
710         goto out_unlock;
711     }
712 
713     lbasd = &lbas->descriptors[0];
714 
715     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
716         ret = -EIO;
717         goto out_unlock;
718     }
719 
720     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
721 
722     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
723         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
724         ret &= ~BDRV_BLOCK_DATA;
725         if (iscsilun->lbprz) {
726             ret |= BDRV_BLOCK_ZERO;
727         }
728     }
729 
730     if (ret & BDRV_BLOCK_ZERO) {
731         iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
732     } else {
733         iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
734     }
735 
736     if (*pnum > nb_sectors) {
737         *pnum = nb_sectors;
738     }
739 out_unlock:
740     qemu_mutex_unlock(&iscsilun->mutex);
741 out:
742     if (iTask.task != NULL) {
743         scsi_free_scsi_task(iTask.task);
744     }
745     if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
746         *file = bs;
747     }
748     return ret;
749 }
750 
751 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
752                                        int64_t sector_num, int nb_sectors,
753                                        QEMUIOVector *iov)
754 {
755     IscsiLun *iscsilun = bs->opaque;
756     struct IscsiTask iTask;
757     uint64_t lba;
758     uint32_t num_sectors;
759 
760     if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
761         return -EINVAL;
762     }
763 
764     if (bs->bl.max_transfer) {
765         assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
766     }
767 
768     /* if cache.direct is off and we have a valid entry in our allocation map
769      * we can skip checking the block status and directly return zeroes if
770      * the request falls within an unallocated area */
771     if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
772         !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
773             qemu_iovec_memset(iov, 0, 0x00, iov->size);
774             return 0;
775     }
776 
777     if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
778         !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
779         !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
780         int pnum;
781         BlockDriverState *file;
782         /* check the block status from the beginning of the cluster
783          * containing the start sector */
784         int64_t ret = iscsi_co_get_block_status(bs,
785                           sector_num - sector_num % iscsilun->cluster_sectors,
786                           BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
787         if (ret < 0) {
788             return ret;
789         }
790         /* if the whole request falls into an unallocated area we can avoid
791          * to read and directly return zeroes instead */
792         if (ret & BDRV_BLOCK_ZERO &&
793             pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
794             qemu_iovec_memset(iov, 0, 0x00, iov->size);
795             return 0;
796         }
797     }
798 
799     lba = sector_qemu2lun(sector_num, iscsilun);
800     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
801 
802     iscsi_co_init_iscsitask(iscsilun, &iTask);
803     qemu_mutex_lock(&iscsilun->mutex);
804 retry:
805     if (iscsilun->use_16_for_rw) {
806 #if LIBISCSI_API_VERSION >= (20160603)
807         iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
808                                            num_sectors * iscsilun->block_size,
809                                            iscsilun->block_size, 0, 0, 0, 0, 0,
810                                            iscsi_co_generic_cb, &iTask,
811                                            (struct scsi_iovec *)iov->iov, iov->niov);
812     } else {
813         iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
814                                            num_sectors * iscsilun->block_size,
815                                            iscsilun->block_size,
816                                            0, 0, 0, 0, 0,
817                                            iscsi_co_generic_cb, &iTask,
818                                            (struct scsi_iovec *)iov->iov, iov->niov);
819     }
820 #else
821         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
822                                        num_sectors * iscsilun->block_size,
823                                        iscsilun->block_size, 0, 0, 0, 0, 0,
824                                        iscsi_co_generic_cb, &iTask);
825     } else {
826         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
827                                        num_sectors * iscsilun->block_size,
828                                        iscsilun->block_size,
829                                        0, 0, 0, 0, 0,
830                                        iscsi_co_generic_cb, &iTask);
831     }
832 #endif
833     if (iTask.task == NULL) {
834         qemu_mutex_unlock(&iscsilun->mutex);
835         return -ENOMEM;
836     }
837 #if LIBISCSI_API_VERSION < (20160603)
838     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
839 #endif
840     while (!iTask.complete) {
841         iscsi_set_events(iscsilun);
842         qemu_mutex_unlock(&iscsilun->mutex);
843         qemu_coroutine_yield();
844         qemu_mutex_lock(&iscsilun->mutex);
845     }
846 
847     if (iTask.task != NULL) {
848         scsi_free_scsi_task(iTask.task);
849         iTask.task = NULL;
850     }
851 
852     if (iTask.do_retry) {
853         iTask.complete = 0;
854         goto retry;
855     }
856     qemu_mutex_unlock(&iscsilun->mutex);
857 
858     if (iTask.status != SCSI_STATUS_GOOD) {
859         return iTask.err_code;
860     }
861 
862     return 0;
863 }
864 
865 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
866 {
867     IscsiLun *iscsilun = bs->opaque;
868     struct IscsiTask iTask;
869 
870     iscsi_co_init_iscsitask(iscsilun, &iTask);
871     qemu_mutex_lock(&iscsilun->mutex);
872 retry:
873     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
874                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
875         qemu_mutex_unlock(&iscsilun->mutex);
876         return -ENOMEM;
877     }
878 
879     while (!iTask.complete) {
880         iscsi_set_events(iscsilun);
881         qemu_mutex_unlock(&iscsilun->mutex);
882         qemu_coroutine_yield();
883         qemu_mutex_lock(&iscsilun->mutex);
884     }
885 
886     if (iTask.task != NULL) {
887         scsi_free_scsi_task(iTask.task);
888         iTask.task = NULL;
889     }
890 
891     if (iTask.do_retry) {
892         iTask.complete = 0;
893         goto retry;
894     }
895     qemu_mutex_unlock(&iscsilun->mutex);
896 
897     if (iTask.status != SCSI_STATUS_GOOD) {
898         return iTask.err_code;
899     }
900 
901     return 0;
902 }
903 
904 #ifdef __linux__
905 /* Called (via iscsi_service) with QemuMutex held.  */
906 static void
907 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
908                      void *command_data, void *opaque)
909 {
910     IscsiAIOCB *acb = opaque;
911 
912     g_free(acb->buf);
913     acb->buf = NULL;
914 
915     acb->status = 0;
916     if (status < 0) {
917         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
918                      iscsi_get_error(iscsi));
919         acb->status = iscsi_translate_sense(&acb->task->sense);
920     }
921 
922     acb->ioh->driver_status = 0;
923     acb->ioh->host_status   = 0;
924     acb->ioh->resid         = 0;
925     acb->ioh->status        = status;
926 
927 #define SG_ERR_DRIVER_SENSE    0x08
928 
929     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
930         int ss;
931 
932         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
933 
934         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
935         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
936              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
937         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
938     }
939 
940     iscsi_schedule_bh(acb);
941 }
942 
943 static void iscsi_ioctl_bh_completion(void *opaque)
944 {
945     IscsiAIOCB *acb = opaque;
946 
947     qemu_bh_delete(acb->bh);
948     acb->common.cb(acb->common.opaque, acb->ret);
949     qemu_aio_unref(acb);
950 }
951 
952 static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
953 {
954     BlockDriverState *bs = acb->common.bs;
955     IscsiLun *iscsilun = bs->opaque;
956     int ret = 0;
957 
958     switch (req) {
959     case SG_GET_VERSION_NUM:
960         *(int *)buf = 30000;
961         break;
962     case SG_GET_SCSI_ID:
963         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
964         break;
965     default:
966         ret = -EINVAL;
967     }
968     assert(!acb->bh);
969     acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
970                          iscsi_ioctl_bh_completion, acb);
971     acb->ret = ret;
972     qemu_bh_schedule(acb->bh);
973 }
974 
975 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
976         unsigned long int req, void *buf,
977         BlockCompletionFunc *cb, void *opaque)
978 {
979     IscsiLun *iscsilun = bs->opaque;
980     struct iscsi_context *iscsi = iscsilun->iscsi;
981     struct iscsi_data data;
982     IscsiAIOCB *acb;
983 
984     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
985 
986     acb->iscsilun = iscsilun;
987     acb->bh          = NULL;
988     acb->status      = -EINPROGRESS;
989     acb->buf         = NULL;
990     acb->ioh         = buf;
991 
992     if (req != SG_IO) {
993         iscsi_ioctl_handle_emulated(acb, req, buf);
994         return &acb->common;
995     }
996 
997     if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
998         error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
999                      acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
1000         qemu_aio_unref(acb);
1001         return NULL;
1002     }
1003 
1004     acb->task = malloc(sizeof(struct scsi_task));
1005     if (acb->task == NULL) {
1006         error_report("iSCSI: Failed to allocate task for scsi command. %s",
1007                      iscsi_get_error(iscsi));
1008         qemu_aio_unref(acb);
1009         return NULL;
1010     }
1011     memset(acb->task, 0, sizeof(struct scsi_task));
1012 
1013     switch (acb->ioh->dxfer_direction) {
1014     case SG_DXFER_TO_DEV:
1015         acb->task->xfer_dir = SCSI_XFER_WRITE;
1016         break;
1017     case SG_DXFER_FROM_DEV:
1018         acb->task->xfer_dir = SCSI_XFER_READ;
1019         break;
1020     default:
1021         acb->task->xfer_dir = SCSI_XFER_NONE;
1022         break;
1023     }
1024 
1025     acb->task->cdb_size = acb->ioh->cmd_len;
1026     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
1027     acb->task->expxferlen = acb->ioh->dxfer_len;
1028 
1029     data.size = 0;
1030     qemu_mutex_lock(&iscsilun->mutex);
1031     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
1032         if (acb->ioh->iovec_count == 0) {
1033             data.data = acb->ioh->dxferp;
1034             data.size = acb->ioh->dxfer_len;
1035         } else {
1036             scsi_task_set_iov_out(acb->task,
1037                                  (struct scsi_iovec *) acb->ioh->dxferp,
1038                                  acb->ioh->iovec_count);
1039         }
1040     }
1041 
1042     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
1043                                  iscsi_aio_ioctl_cb,
1044                                  (data.size > 0) ? &data : NULL,
1045                                  acb) != 0) {
1046         qemu_mutex_unlock(&iscsilun->mutex);
1047         scsi_free_scsi_task(acb->task);
1048         qemu_aio_unref(acb);
1049         return NULL;
1050     }
1051 
1052     /* tell libiscsi to read straight into the buffer we got from ioctl */
1053     if (acb->task->xfer_dir == SCSI_XFER_READ) {
1054         if (acb->ioh->iovec_count == 0) {
1055             scsi_task_add_data_in_buffer(acb->task,
1056                                          acb->ioh->dxfer_len,
1057                                          acb->ioh->dxferp);
1058         } else {
1059             scsi_task_set_iov_in(acb->task,
1060                                  (struct scsi_iovec *) acb->ioh->dxferp,
1061                                  acb->ioh->iovec_count);
1062         }
1063     }
1064 
1065     iscsi_set_events(iscsilun);
1066     qemu_mutex_unlock(&iscsilun->mutex);
1067 
1068     return &acb->common;
1069 }
1070 
1071 #endif
1072 
1073 static int64_t
1074 iscsi_getlength(BlockDriverState *bs)
1075 {
1076     IscsiLun *iscsilun = bs->opaque;
1077     int64_t len;
1078 
1079     len  = iscsilun->num_blocks;
1080     len *= iscsilun->block_size;
1081 
1082     return len;
1083 }
1084 
1085 static int
1086 coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
1087 {
1088     IscsiLun *iscsilun = bs->opaque;
1089     struct IscsiTask iTask;
1090     struct unmap_list list;
1091     int r = 0;
1092 
1093     if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1094         return -ENOTSUP;
1095     }
1096 
1097     if (!iscsilun->lbp.lbpu) {
1098         /* UNMAP is not supported by the target */
1099         return 0;
1100     }
1101 
1102     list.lba = offset / iscsilun->block_size;
1103     list.num = bytes / iscsilun->block_size;
1104 
1105     iscsi_co_init_iscsitask(iscsilun, &iTask);
1106     qemu_mutex_lock(&iscsilun->mutex);
1107 retry:
1108     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
1109                          iscsi_co_generic_cb, &iTask) == NULL) {
1110         r = -ENOMEM;
1111         goto out_unlock;
1112     }
1113 
1114     while (!iTask.complete) {
1115         iscsi_set_events(iscsilun);
1116         qemu_mutex_unlock(&iscsilun->mutex);
1117         qemu_coroutine_yield();
1118         qemu_mutex_lock(&iscsilun->mutex);
1119     }
1120 
1121     if (iTask.task != NULL) {
1122         scsi_free_scsi_task(iTask.task);
1123         iTask.task = NULL;
1124     }
1125 
1126     if (iTask.do_retry) {
1127         iTask.complete = 0;
1128         goto retry;
1129     }
1130 
1131     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
1132         /* the target might fail with a check condition if it
1133            is not happy with the alignment of the UNMAP request
1134            we silently fail in this case */
1135         goto out_unlock;
1136     }
1137 
1138     if (iTask.status != SCSI_STATUS_GOOD) {
1139         r = iTask.err_code;
1140         goto out_unlock;
1141     }
1142 
1143     iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1144                                bytes >> BDRV_SECTOR_BITS);
1145 
1146 out_unlock:
1147     qemu_mutex_unlock(&iscsilun->mutex);
1148     return r;
1149 }
1150 
1151 static int
1152 coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
1153                                     int bytes, BdrvRequestFlags flags)
1154 {
1155     IscsiLun *iscsilun = bs->opaque;
1156     struct IscsiTask iTask;
1157     uint64_t lba;
1158     uint32_t nb_blocks;
1159     bool use_16_for_ws = iscsilun->use_16_for_rw;
1160     int r = 0;
1161 
1162     if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1163         return -ENOTSUP;
1164     }
1165 
1166     if (flags & BDRV_REQ_MAY_UNMAP) {
1167         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
1168             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
1169             use_16_for_ws = true;
1170         }
1171         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
1172             /* WRITESAME16 with UNMAP is not supported by the target,
1173              * fall back and try WRITESAME10/16 without UNMAP */
1174             flags &= ~BDRV_REQ_MAY_UNMAP;
1175             use_16_for_ws = iscsilun->use_16_for_rw;
1176         }
1177     }
1178 
1179     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1180         /* WRITESAME without UNMAP is not supported by the target */
1181         return -ENOTSUP;
1182     }
1183 
1184     lba = offset / iscsilun->block_size;
1185     nb_blocks = bytes / iscsilun->block_size;
1186 
1187     if (iscsilun->zeroblock == NULL) {
1188         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1189         if (iscsilun->zeroblock == NULL) {
1190             return -ENOMEM;
1191         }
1192     }
1193 
1194     qemu_mutex_lock(&iscsilun->mutex);
1195     iscsi_co_init_iscsitask(iscsilun, &iTask);
1196 retry:
1197     if (use_16_for_ws) {
1198         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1199                                             iscsilun->zeroblock, iscsilun->block_size,
1200                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1201                                             0, 0, iscsi_co_generic_cb, &iTask);
1202     } else {
1203         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1204                                             iscsilun->zeroblock, iscsilun->block_size,
1205                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1206                                             0, 0, iscsi_co_generic_cb, &iTask);
1207     }
1208     if (iTask.task == NULL) {
1209         qemu_mutex_unlock(&iscsilun->mutex);
1210         return -ENOMEM;
1211     }
1212 
1213     while (!iTask.complete) {
1214         iscsi_set_events(iscsilun);
1215         qemu_mutex_unlock(&iscsilun->mutex);
1216         qemu_coroutine_yield();
1217         qemu_mutex_lock(&iscsilun->mutex);
1218     }
1219 
1220     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1221         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1222         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1223          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1224         /* WRITE SAME is not supported by the target */
1225         iscsilun->has_write_same = false;
1226         scsi_free_scsi_task(iTask.task);
1227         r = -ENOTSUP;
1228         goto out_unlock;
1229     }
1230 
1231     if (iTask.task != NULL) {
1232         scsi_free_scsi_task(iTask.task);
1233         iTask.task = NULL;
1234     }
1235 
1236     if (iTask.do_retry) {
1237         iTask.complete = 0;
1238         goto retry;
1239     }
1240 
1241     if (iTask.status != SCSI_STATUS_GOOD) {
1242         iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1243                                    bytes >> BDRV_SECTOR_BITS);
1244         r = iTask.err_code;
1245         goto out_unlock;
1246     }
1247 
1248     if (flags & BDRV_REQ_MAY_UNMAP) {
1249         iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
1250                                    bytes >> BDRV_SECTOR_BITS);
1251     } else {
1252         iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
1253                                      bytes >> BDRV_SECTOR_BITS);
1254     }
1255 
1256 out_unlock:
1257     qemu_mutex_unlock(&iscsilun->mutex);
1258     return r;
1259 }
1260 
1261 static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
1262                        Error **errp)
1263 {
1264     const char *user = NULL;
1265     const char *password = NULL;
1266     const char *secretid;
1267     char *secret = NULL;
1268 
1269     user = qemu_opt_get(opts, "user");
1270     if (!user) {
1271         return;
1272     }
1273 
1274     secretid = qemu_opt_get(opts, "password-secret");
1275     password = qemu_opt_get(opts, "password");
1276     if (secretid && password) {
1277         error_setg(errp, "'password' and 'password-secret' properties are "
1278                    "mutually exclusive");
1279         return;
1280     }
1281     if (secretid) {
1282         secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1283         if (!secret) {
1284             return;
1285         }
1286         password = secret;
1287     } else if (!password) {
1288         error_setg(errp, "CHAP username specified but no password was given");
1289         return;
1290     }
1291 
1292     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1293         error_setg(errp, "Failed to set initiator username and password");
1294     }
1295 
1296     g_free(secret);
1297 }
1298 
1299 static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
1300                                 Error **errp)
1301 {
1302     const char *digest = NULL;
1303 
1304     digest = qemu_opt_get(opts, "header-digest");
1305     if (!digest) {
1306         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1307     } else if (!strcmp(digest, "crc32c")) {
1308         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1309     } else if (!strcmp(digest, "none")) {
1310         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1311     } else if (!strcmp(digest, "crc32c-none")) {
1312         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1313     } else if (!strcmp(digest, "none-crc32c")) {
1314         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1315     } else {
1316         error_setg(errp, "Invalid header-digest setting : %s", digest);
1317     }
1318 }
1319 
1320 static char *get_initiator_name(QemuOpts *opts)
1321 {
1322     const char *name;
1323     char *iscsi_name;
1324     UuidInfo *uuid_info;
1325 
1326     name = qemu_opt_get(opts, "initiator-name");
1327     if (name) {
1328         return g_strdup(name);
1329     }
1330 
1331     uuid_info = qmp_query_uuid(NULL);
1332     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1333         name = qemu_get_vm_name();
1334     } else {
1335         name = uuid_info->UUID;
1336     }
1337     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1338                                  name ? ":" : "", name ? name : "");
1339     qapi_free_UuidInfo(uuid_info);
1340     return iscsi_name;
1341 }
1342 
1343 static void iscsi_nop_timed_event(void *opaque)
1344 {
1345     IscsiLun *iscsilun = opaque;
1346 
1347     qemu_mutex_lock(&iscsilun->mutex);
1348     if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1349         error_report("iSCSI: NOP timeout. Reconnecting...");
1350         iscsilun->request_timed_out = true;
1351     } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1352         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1353         goto out;
1354     }
1355 
1356     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1357     iscsi_set_events(iscsilun);
1358 
1359 out:
1360     qemu_mutex_unlock(&iscsilun->mutex);
1361 }
1362 
1363 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1364 {
1365     struct scsi_task *task = NULL;
1366     struct scsi_readcapacity10 *rc10 = NULL;
1367     struct scsi_readcapacity16 *rc16 = NULL;
1368     int retries = ISCSI_CMD_RETRIES;
1369 
1370     do {
1371         if (task != NULL) {
1372             scsi_free_scsi_task(task);
1373             task = NULL;
1374         }
1375 
1376         switch (iscsilun->type) {
1377         case TYPE_DISK:
1378             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1379             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1380                 rc16 = scsi_datain_unmarshall(task);
1381                 if (rc16 == NULL) {
1382                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1383                 } else {
1384                     iscsilun->block_size = rc16->block_length;
1385                     iscsilun->num_blocks = rc16->returned_lba + 1;
1386                     iscsilun->lbpme = !!rc16->lbpme;
1387                     iscsilun->lbprz = !!rc16->lbprz;
1388                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1389                 }
1390                 break;
1391             }
1392             if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1393                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1394                 break;
1395             }
1396             /* Fall through and try READ CAPACITY(10) instead.  */
1397         case TYPE_ROM:
1398             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1399             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1400                 rc10 = scsi_datain_unmarshall(task);
1401                 if (rc10 == NULL) {
1402                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1403                 } else {
1404                     iscsilun->block_size = rc10->block_size;
1405                     if (rc10->lba == 0) {
1406                         /* blank disk loaded */
1407                         iscsilun->num_blocks = 0;
1408                     } else {
1409                         iscsilun->num_blocks = rc10->lba + 1;
1410                     }
1411                 }
1412             }
1413             break;
1414         default:
1415             return;
1416         }
1417     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1418              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1419              && retries-- > 0);
1420 
1421     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1422         error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1423     } else if (!iscsilun->block_size ||
1424                iscsilun->block_size % BDRV_SECTOR_SIZE) {
1425         error_setg(errp, "iSCSI: the target returned an invalid "
1426                    "block size of %d.", iscsilun->block_size);
1427     }
1428     if (task) {
1429         scsi_free_scsi_task(task);
1430     }
1431 }
1432 
1433 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1434                                           int evpd, int pc, void **inq, Error **errp)
1435 {
1436     int full_size;
1437     struct scsi_task *task = NULL;
1438     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1439     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1440         goto fail;
1441     }
1442     full_size = scsi_datain_getfullsize(task);
1443     if (full_size > task->datain.size) {
1444         scsi_free_scsi_task(task);
1445 
1446         /* we need more data for the full list */
1447         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1448         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1449             goto fail;
1450         }
1451     }
1452 
1453     *inq = scsi_datain_unmarshall(task);
1454     if (*inq == NULL) {
1455         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1456         goto fail_with_err;
1457     }
1458 
1459     return task;
1460 
1461 fail:
1462     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1463                iscsi_get_error(iscsi));
1464 fail_with_err:
1465     if (task != NULL) {
1466         scsi_free_scsi_task(task);
1467     }
1468     return NULL;
1469 }
1470 
1471 static void iscsi_detach_aio_context(BlockDriverState *bs)
1472 {
1473     IscsiLun *iscsilun = bs->opaque;
1474 
1475     aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1476                        false, NULL, NULL, NULL, NULL);
1477     iscsilun->events = 0;
1478 
1479     if (iscsilun->nop_timer) {
1480         timer_del(iscsilun->nop_timer);
1481         timer_free(iscsilun->nop_timer);
1482         iscsilun->nop_timer = NULL;
1483     }
1484     if (iscsilun->event_timer) {
1485         timer_del(iscsilun->event_timer);
1486         timer_free(iscsilun->event_timer);
1487         iscsilun->event_timer = NULL;
1488     }
1489 }
1490 
1491 static void iscsi_attach_aio_context(BlockDriverState *bs,
1492                                      AioContext *new_context)
1493 {
1494     IscsiLun *iscsilun = bs->opaque;
1495 
1496     iscsilun->aio_context = new_context;
1497     iscsi_set_events(iscsilun);
1498 
1499     /* Set up a timer for sending out iSCSI NOPs */
1500     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1501                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1502                                         iscsi_nop_timed_event, iscsilun);
1503     timer_mod(iscsilun->nop_timer,
1504               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1505 
1506     /* Set up a timer for periodic calls to iscsi_set_events and to
1507      * scan for command timeout */
1508     iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1509                                           QEMU_CLOCK_REALTIME, SCALE_MS,
1510                                           iscsi_timed_check_events, iscsilun);
1511     timer_mod(iscsilun->event_timer,
1512               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1513 }
1514 
1515 static void iscsi_modesense_sync(IscsiLun *iscsilun)
1516 {
1517     struct scsi_task *task;
1518     struct scsi_mode_sense *ms = NULL;
1519     iscsilun->write_protected = false;
1520     iscsilun->dpofua = false;
1521 
1522     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1523                                  1, SCSI_MODESENSE_PC_CURRENT,
1524                                  0x3F, 0, 255);
1525     if (task == NULL) {
1526         error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1527                      iscsi_get_error(iscsilun->iscsi));
1528         goto out;
1529     }
1530 
1531     if (task->status != SCSI_STATUS_GOOD) {
1532         error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1533         goto out;
1534     }
1535     ms = scsi_datain_unmarshall(task);
1536     if (!ms) {
1537         error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1538                      iscsi_get_error(iscsilun->iscsi));
1539         goto out;
1540     }
1541     iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1542     iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1543 
1544 out:
1545     if (task) {
1546         scsi_free_scsi_task(task);
1547     }
1548 }
1549 
1550 static void iscsi_parse_iscsi_option(const char *target, QDict *options)
1551 {
1552     QemuOptsList *list;
1553     QemuOpts *opts;
1554     const char *user, *password, *password_secret, *initiator_name,
1555                *header_digest, *timeout;
1556 
1557     list = qemu_find_opts("iscsi");
1558     if (!list) {
1559         return;
1560     }
1561 
1562     opts = qemu_opts_find(list, target);
1563     if (opts == NULL) {
1564         opts = QTAILQ_FIRST(&list->head);
1565         if (!opts) {
1566             return;
1567         }
1568     }
1569 
1570     user = qemu_opt_get(opts, "user");
1571     if (user) {
1572         qdict_set_default_str(options, "user", user);
1573     }
1574 
1575     password = qemu_opt_get(opts, "password");
1576     if (password) {
1577         qdict_set_default_str(options, "password", password);
1578     }
1579 
1580     password_secret = qemu_opt_get(opts, "password-secret");
1581     if (password_secret) {
1582         qdict_set_default_str(options, "password-secret", password_secret);
1583     }
1584 
1585     initiator_name = qemu_opt_get(opts, "initiator-name");
1586     if (initiator_name) {
1587         qdict_set_default_str(options, "initiator-name", initiator_name);
1588     }
1589 
1590     header_digest = qemu_opt_get(opts, "header-digest");
1591     if (header_digest) {
1592         /* -iscsi takes upper case values, but QAPI only supports lower case
1593          * enum constant names, so we have to convert here. */
1594         char *qapi_value = g_ascii_strdown(header_digest, -1);
1595         qdict_set_default_str(options, "header-digest", qapi_value);
1596         g_free(qapi_value);
1597     }
1598 
1599     timeout = qemu_opt_get(opts, "timeout");
1600     if (timeout) {
1601         qdict_set_default_str(options, "timeout", timeout);
1602     }
1603 }
1604 
1605 /*
1606  * We support iscsi url's on the form
1607  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1608  */
1609 static void iscsi_parse_filename(const char *filename, QDict *options,
1610                                  Error **errp)
1611 {
1612     struct iscsi_url *iscsi_url;
1613     const char *transport_name;
1614     char *lun_str;
1615 
1616     iscsi_url = iscsi_parse_full_url(NULL, filename);
1617     if (iscsi_url == NULL) {
1618         error_setg(errp, "Failed to parse URL : %s", filename);
1619         return;
1620     }
1621 
1622 #if LIBISCSI_API_VERSION >= (20160603)
1623     switch (iscsi_url->transport) {
1624     case TCP_TRANSPORT:
1625         transport_name = "tcp";
1626         break;
1627     case ISER_TRANSPORT:
1628         transport_name = "iser";
1629         break;
1630     default:
1631         error_setg(errp, "Unknown transport type (%d)",
1632                    iscsi_url->transport);
1633         return;
1634     }
1635 #else
1636     transport_name = "tcp";
1637 #endif
1638 
1639     qdict_set_default_str(options, "transport", transport_name);
1640     qdict_set_default_str(options, "portal", iscsi_url->portal);
1641     qdict_set_default_str(options, "target", iscsi_url->target);
1642 
1643     lun_str = g_strdup_printf("%d", iscsi_url->lun);
1644     qdict_set_default_str(options, "lun", lun_str);
1645     g_free(lun_str);
1646 
1647     /* User/password from -iscsi take precedence over those from the URL */
1648     iscsi_parse_iscsi_option(iscsi_url->target, options);
1649 
1650     if (iscsi_url->user[0] != '\0') {
1651         qdict_set_default_str(options, "user", iscsi_url->user);
1652         qdict_set_default_str(options, "password", iscsi_url->passwd);
1653     }
1654 
1655     iscsi_destroy_url(iscsi_url);
1656 }
1657 
1658 static QemuOptsList runtime_opts = {
1659     .name = "iscsi",
1660     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1661     .desc = {
1662         {
1663             .name = "transport",
1664             .type = QEMU_OPT_STRING,
1665         },
1666         {
1667             .name = "portal",
1668             .type = QEMU_OPT_STRING,
1669         },
1670         {
1671             .name = "target",
1672             .type = QEMU_OPT_STRING,
1673         },
1674         {
1675             .name = "user",
1676             .type = QEMU_OPT_STRING,
1677         },
1678         {
1679             .name = "password",
1680             .type = QEMU_OPT_STRING,
1681         },
1682         {
1683             .name = "password-secret",
1684             .type = QEMU_OPT_STRING,
1685         },
1686         {
1687             .name = "lun",
1688             .type = QEMU_OPT_NUMBER,
1689         },
1690         {
1691             .name = "initiator-name",
1692             .type = QEMU_OPT_STRING,
1693         },
1694         {
1695             .name = "header-digest",
1696             .type = QEMU_OPT_STRING,
1697         },
1698         {
1699             .name = "timeout",
1700             .type = QEMU_OPT_NUMBER,
1701         },
1702         {
1703             .name = "filename",
1704             .type = QEMU_OPT_STRING,
1705         },
1706         { /* end of list */ }
1707     },
1708 };
1709 
1710 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1711                       Error **errp)
1712 {
1713     IscsiLun *iscsilun = bs->opaque;
1714     struct iscsi_context *iscsi = NULL;
1715     struct scsi_task *task = NULL;
1716     struct scsi_inquiry_standard *inq = NULL;
1717     struct scsi_inquiry_supported_pages *inq_vpd;
1718     char *initiator_name = NULL;
1719     QemuOpts *opts;
1720     Error *local_err = NULL;
1721     const char *transport_name, *portal, *target, *filename;
1722 #if LIBISCSI_API_VERSION >= (20160603)
1723     enum iscsi_transport_type transport;
1724 #endif
1725     int i, ret = 0, timeout = 0, lun;
1726 
1727     /* If we are given a filename, parse the filename, with precedence given to
1728      * filename encoded options */
1729     filename = qdict_get_try_str(options, "filename");
1730     if (filename) {
1731         warn_report("'filename' option specified. "
1732                     "This is an unsupported option, and may be deprecated "
1733                     "in the future");
1734         iscsi_parse_filename(filename, options, &local_err);
1735         if (local_err) {
1736             ret = -EINVAL;
1737             error_propagate(errp, local_err);
1738             goto exit;
1739         }
1740     }
1741 
1742     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1743     qemu_opts_absorb_qdict(opts, options, &local_err);
1744     if (local_err) {
1745         error_propagate(errp, local_err);
1746         ret = -EINVAL;
1747         goto out;
1748     }
1749 
1750     transport_name = qemu_opt_get(opts, "transport");
1751     portal = qemu_opt_get(opts, "portal");
1752     target = qemu_opt_get(opts, "target");
1753     lun = qemu_opt_get_number(opts, "lun", 0);
1754 
1755     if (!transport_name || !portal || !target) {
1756         error_setg(errp, "Need all of transport, portal and target options");
1757         ret = -EINVAL;
1758         goto out;
1759     }
1760 
1761     if (!strcmp(transport_name, "tcp")) {
1762 #if LIBISCSI_API_VERSION >= (20160603)
1763         transport = TCP_TRANSPORT;
1764     } else if (!strcmp(transport_name, "iser")) {
1765         transport = ISER_TRANSPORT;
1766 #else
1767         /* TCP is what older libiscsi versions always use */
1768 #endif
1769     } else {
1770         error_setg(errp, "Unknown transport: %s", transport_name);
1771         ret = -EINVAL;
1772         goto out;
1773     }
1774 
1775     memset(iscsilun, 0, sizeof(IscsiLun));
1776 
1777     initiator_name = get_initiator_name(opts);
1778 
1779     iscsi = iscsi_create_context(initiator_name);
1780     if (iscsi == NULL) {
1781         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1782         ret = -ENOMEM;
1783         goto out;
1784     }
1785 #if LIBISCSI_API_VERSION >= (20160603)
1786     if (iscsi_init_transport(iscsi, transport)) {
1787         error_setg(errp, ("Error initializing transport."));
1788         ret = -EINVAL;
1789         goto out;
1790     }
1791 #endif
1792     if (iscsi_set_targetname(iscsi, target)) {
1793         error_setg(errp, "iSCSI: Failed to set target name.");
1794         ret = -EINVAL;
1795         goto out;
1796     }
1797 
1798     /* check if we got CHAP username/password via the options */
1799     apply_chap(iscsi, opts, &local_err);
1800     if (local_err != NULL) {
1801         error_propagate(errp, local_err);
1802         ret = -EINVAL;
1803         goto out;
1804     }
1805 
1806     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1807         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1808         ret = -EINVAL;
1809         goto out;
1810     }
1811 
1812     /* check if we got HEADER_DIGEST via the options */
1813     apply_header_digest(iscsi, opts, &local_err);
1814     if (local_err != NULL) {
1815         error_propagate(errp, local_err);
1816         ret = -EINVAL;
1817         goto out;
1818     }
1819 
1820     /* timeout handling is broken in libiscsi before 1.15.0 */
1821     timeout = qemu_opt_get_number(opts, "timeout", 0);
1822 #if LIBISCSI_API_VERSION >= 20150621
1823     iscsi_set_timeout(iscsi, timeout);
1824 #else
1825     if (timeout) {
1826         error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1827     }
1828 #endif
1829 
1830     if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
1831         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1832             iscsi_get_error(iscsi));
1833         ret = -EINVAL;
1834         goto out;
1835     }
1836 
1837     iscsilun->iscsi = iscsi;
1838     iscsilun->aio_context = bdrv_get_aio_context(bs);
1839     iscsilun->lun = lun;
1840     iscsilun->has_write_same = true;
1841 
1842     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1843                             (void **) &inq, errp);
1844     if (task == NULL) {
1845         ret = -EINVAL;
1846         goto out;
1847     }
1848     iscsilun->type = inq->periperal_device_type;
1849     scsi_free_scsi_task(task);
1850     task = NULL;
1851 
1852     iscsi_modesense_sync(iscsilun);
1853     if (iscsilun->dpofua) {
1854         bs->supported_write_flags = BDRV_REQ_FUA;
1855     }
1856     bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
1857 
1858     /* Check the write protect flag of the LUN if we want to write */
1859     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1860         iscsilun->write_protected) {
1861         error_setg(errp, "Cannot open a write protected LUN as read-write");
1862         ret = -EACCES;
1863         goto out;
1864     }
1865 
1866     iscsi_readcapacity_sync(iscsilun, &local_err);
1867     if (local_err != NULL) {
1868         error_propagate(errp, local_err);
1869         ret = -EINVAL;
1870         goto out;
1871     }
1872     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1873 
1874     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1875      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1876      * will try to read from the device to guess the image format.
1877      */
1878     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1879         bs->sg = true;
1880     }
1881 
1882     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1883                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1884                             (void **) &inq_vpd, errp);
1885     if (task == NULL) {
1886         ret = -EINVAL;
1887         goto out;
1888     }
1889     for (i = 0; i < inq_vpd->num_pages; i++) {
1890         struct scsi_task *inq_task;
1891         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1892         struct scsi_inquiry_block_limits *inq_bl;
1893         switch (inq_vpd->pages[i]) {
1894         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1895             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1896                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1897                                         (void **) &inq_lbp, errp);
1898             if (inq_task == NULL) {
1899                 ret = -EINVAL;
1900                 goto out;
1901             }
1902             memcpy(&iscsilun->lbp, inq_lbp,
1903                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1904             scsi_free_scsi_task(inq_task);
1905             break;
1906         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1907             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1908                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1909                                     (void **) &inq_bl, errp);
1910             if (inq_task == NULL) {
1911                 ret = -EINVAL;
1912                 goto out;
1913             }
1914             memcpy(&iscsilun->bl, inq_bl,
1915                    sizeof(struct scsi_inquiry_block_limits));
1916             scsi_free_scsi_task(inq_task);
1917             break;
1918         default:
1919             break;
1920         }
1921     }
1922     scsi_free_scsi_task(task);
1923     task = NULL;
1924 
1925     qemu_mutex_init(&iscsilun->mutex);
1926     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1927 
1928     /* Guess the internal cluster (page) size of the iscsi target by the means
1929      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1930      * reasonable size */
1931     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1932         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1933         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1934                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1935         if (iscsilun->lbprz) {
1936             ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
1937         }
1938     }
1939 
1940 out:
1941     qemu_opts_del(opts);
1942     g_free(initiator_name);
1943     if (task != NULL) {
1944         scsi_free_scsi_task(task);
1945     }
1946 
1947     if (ret) {
1948         if (iscsi != NULL) {
1949             if (iscsi_is_logged_in(iscsi)) {
1950                 iscsi_logout_sync(iscsi);
1951             }
1952             iscsi_destroy_context(iscsi);
1953         }
1954         memset(iscsilun, 0, sizeof(IscsiLun));
1955     }
1956 exit:
1957     return ret;
1958 }
1959 
1960 static void iscsi_close(BlockDriverState *bs)
1961 {
1962     IscsiLun *iscsilun = bs->opaque;
1963     struct iscsi_context *iscsi = iscsilun->iscsi;
1964 
1965     iscsi_detach_aio_context(bs);
1966     if (iscsi_is_logged_in(iscsi)) {
1967         iscsi_logout_sync(iscsi);
1968     }
1969     iscsi_destroy_context(iscsi);
1970     g_free(iscsilun->zeroblock);
1971     iscsi_allocmap_free(iscsilun);
1972     qemu_mutex_destroy(&iscsilun->mutex);
1973     memset(iscsilun, 0, sizeof(IscsiLun));
1974 }
1975 
1976 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1977 {
1978     /* We don't actually refresh here, but just return data queried in
1979      * iscsi_open(): iscsi targets don't change their limits. */
1980 
1981     IscsiLun *iscsilun = bs->opaque;
1982     uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1983     unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
1984 
1985     assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bs->sg);
1986 
1987     bs->bl.request_alignment = block_size;
1988 
1989     if (iscsilun->bl.max_xfer_len) {
1990         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1991     }
1992 
1993     if (max_xfer_len * block_size < INT_MAX) {
1994         bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
1995     }
1996 
1997     if (iscsilun->lbp.lbpu) {
1998         if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
1999             bs->bl.max_pdiscard =
2000                 iscsilun->bl.max_unmap * iscsilun->block_size;
2001         }
2002         bs->bl.pdiscard_alignment =
2003             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2004     } else {
2005         bs->bl.pdiscard_alignment = iscsilun->block_size;
2006     }
2007 
2008     if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
2009         bs->bl.max_pwrite_zeroes =
2010             iscsilun->bl.max_ws_len * iscsilun->block_size;
2011     }
2012     if (iscsilun->lbp.lbpws) {
2013         bs->bl.pwrite_zeroes_alignment =
2014             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2015     } else {
2016         bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
2017     }
2018     if (iscsilun->bl.opt_xfer_len &&
2019         iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
2020         bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
2021                                         iscsilun->block_size);
2022     }
2023 }
2024 
2025 /* Note that this will not re-establish a connection with an iSCSI target - it
2026  * is effectively a NOP.  */
2027 static int iscsi_reopen_prepare(BDRVReopenState *state,
2028                                 BlockReopenQueue *queue, Error **errp)
2029 {
2030     IscsiLun *iscsilun = state->bs->opaque;
2031 
2032     if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
2033         error_setg(errp, "Cannot open a write protected LUN as read-write");
2034         return -EACCES;
2035     }
2036     return 0;
2037 }
2038 
2039 static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
2040 {
2041     IscsiLun *iscsilun = reopen_state->bs->opaque;
2042 
2043     /* the cache.direct status might have changed */
2044     if (iscsilun->allocmap != NULL) {
2045         iscsi_allocmap_init(iscsilun, reopen_state->flags);
2046     }
2047 }
2048 
2049 static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
2050                           PreallocMode prealloc, Error **errp)
2051 {
2052     IscsiLun *iscsilun = bs->opaque;
2053     Error *local_err = NULL;
2054 
2055     if (prealloc != PREALLOC_MODE_OFF) {
2056         error_setg(errp, "Unsupported preallocation mode '%s'",
2057                    PreallocMode_str(prealloc));
2058         return -ENOTSUP;
2059     }
2060 
2061     if (iscsilun->type != TYPE_DISK) {
2062         error_setg(errp, "Cannot resize non-disk iSCSI devices");
2063         return -ENOTSUP;
2064     }
2065 
2066     iscsi_readcapacity_sync(iscsilun, &local_err);
2067     if (local_err != NULL) {
2068         error_propagate(errp, local_err);
2069         return -EIO;
2070     }
2071 
2072     if (offset > iscsi_getlength(bs)) {
2073         error_setg(errp, "Cannot grow iSCSI devices");
2074         return -EINVAL;
2075     }
2076 
2077     if (iscsilun->allocmap != NULL) {
2078         iscsi_allocmap_init(iscsilun, bs->open_flags);
2079     }
2080 
2081     return 0;
2082 }
2083 
2084 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
2085 {
2086     int ret = 0;
2087     int64_t total_size = 0;
2088     BlockDriverState *bs;
2089     IscsiLun *iscsilun = NULL;
2090     QDict *bs_options;
2091     Error *local_err = NULL;
2092 
2093     bs = bdrv_new();
2094 
2095     /* Read out options */
2096     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
2097                               BDRV_SECTOR_SIZE);
2098     bs->opaque = g_new0(struct IscsiLun, 1);
2099     iscsilun = bs->opaque;
2100 
2101     bs_options = qdict_new();
2102     iscsi_parse_filename(filename, bs_options, &local_err);
2103     if (local_err) {
2104         error_propagate(errp, local_err);
2105         ret = -EINVAL;
2106     } else {
2107         ret = iscsi_open(bs, bs_options, 0, NULL);
2108     }
2109     QDECREF(bs_options);
2110 
2111     if (ret != 0) {
2112         goto out;
2113     }
2114     iscsi_detach_aio_context(bs);
2115     if (iscsilun->type != TYPE_DISK) {
2116         ret = -ENODEV;
2117         goto out;
2118     }
2119     if (bs->total_sectors < total_size) {
2120         ret = -ENOSPC;
2121         goto out;
2122     }
2123 
2124     ret = 0;
2125 out:
2126     if (iscsilun->iscsi != NULL) {
2127         iscsi_destroy_context(iscsilun->iscsi);
2128     }
2129     g_free(bs->opaque);
2130     bs->opaque = NULL;
2131     bdrv_unref(bs);
2132     return ret;
2133 }
2134 
2135 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2136 {
2137     IscsiLun *iscsilun = bs->opaque;
2138     bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
2139     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
2140     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
2141     return 0;
2142 }
2143 
2144 static void iscsi_invalidate_cache(BlockDriverState *bs,
2145                                    Error **errp)
2146 {
2147     IscsiLun *iscsilun = bs->opaque;
2148     iscsi_allocmap_invalidate(iscsilun);
2149 }
2150 
2151 static QemuOptsList iscsi_create_opts = {
2152     .name = "iscsi-create-opts",
2153     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
2154     .desc = {
2155         {
2156             .name = BLOCK_OPT_SIZE,
2157             .type = QEMU_OPT_SIZE,
2158             .help = "Virtual disk size"
2159         },
2160         { /* end of list */ }
2161     }
2162 };
2163 
2164 static BlockDriver bdrv_iscsi = {
2165     .format_name     = "iscsi",
2166     .protocol_name   = "iscsi",
2167 
2168     .instance_size          = sizeof(IscsiLun),
2169     .bdrv_parse_filename    = iscsi_parse_filename,
2170     .bdrv_file_open         = iscsi_open,
2171     .bdrv_close             = iscsi_close,
2172     .bdrv_create            = iscsi_create,
2173     .create_opts            = &iscsi_create_opts,
2174     .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2175     .bdrv_reopen_commit     = iscsi_reopen_commit,
2176     .bdrv_invalidate_cache  = iscsi_invalidate_cache,
2177 
2178     .bdrv_getlength  = iscsi_getlength,
2179     .bdrv_get_info   = iscsi_get_info,
2180     .bdrv_truncate   = iscsi_truncate,
2181     .bdrv_refresh_limits = iscsi_refresh_limits,
2182 
2183     .bdrv_co_get_block_status = iscsi_co_get_block_status,
2184     .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2185     .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2186     .bdrv_co_readv         = iscsi_co_readv,
2187     .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2188     .bdrv_co_flush_to_disk = iscsi_co_flush,
2189 
2190 #ifdef __linux__
2191     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2192 #endif
2193 
2194     .bdrv_detach_aio_context = iscsi_detach_aio_context,
2195     .bdrv_attach_aio_context = iscsi_attach_aio_context,
2196 };
2197 
2198 #if LIBISCSI_API_VERSION >= (20160603)
2199 static BlockDriver bdrv_iser = {
2200     .format_name     = "iser",
2201     .protocol_name   = "iser",
2202 
2203     .instance_size          = sizeof(IscsiLun),
2204     .bdrv_parse_filename    = iscsi_parse_filename,
2205     .bdrv_file_open         = iscsi_open,
2206     .bdrv_close             = iscsi_close,
2207     .bdrv_create            = iscsi_create,
2208     .create_opts            = &iscsi_create_opts,
2209     .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2210     .bdrv_reopen_commit     = iscsi_reopen_commit,
2211     .bdrv_invalidate_cache  = iscsi_invalidate_cache,
2212 
2213     .bdrv_getlength  = iscsi_getlength,
2214     .bdrv_get_info   = iscsi_get_info,
2215     .bdrv_truncate   = iscsi_truncate,
2216     .bdrv_refresh_limits = iscsi_refresh_limits,
2217 
2218     .bdrv_co_get_block_status = iscsi_co_get_block_status,
2219     .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2220     .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2221     .bdrv_co_readv         = iscsi_co_readv,
2222     .bdrv_co_writev_flags  = iscsi_co_writev_flags,
2223     .bdrv_co_flush_to_disk = iscsi_co_flush,
2224 
2225 #ifdef __linux__
2226     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2227 #endif
2228 
2229     .bdrv_detach_aio_context = iscsi_detach_aio_context,
2230     .bdrv_attach_aio_context = iscsi_attach_aio_context,
2231 };
2232 #endif
2233 
2234 static void iscsi_block_init(void)
2235 {
2236     bdrv_register(&bdrv_iscsi);
2237 #if LIBISCSI_API_VERSION >= (20160603)
2238     bdrv_register(&bdrv_iser);
2239 #endif
2240 }
2241 
2242 block_init(iscsi_block_init);
2243