xref: /openbmc/qemu/block/iscsi.c (revision 000c4dff)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "trace.h"
38 #include "block/scsi.h"
39 #include "qemu/iov.h"
40 #include "sysemu/sysemu.h"
41 #include "qmp-commands.h"
42 
43 #include <iscsi/iscsi.h>
44 #include <iscsi/scsi-lowlevel.h>
45 
46 #ifdef __linux__
47 #include <scsi/sg.h>
48 #include <block/scsi.h>
49 #endif
50 
51 typedef struct IscsiLun {
52     struct iscsi_context *iscsi;
53     AioContext *aio_context;
54     int lun;
55     enum scsi_inquiry_peripheral_device_type type;
56     int block_size;
57     uint64_t num_blocks;
58     int events;
59     QEMUTimer *nop_timer;
60     uint8_t lbpme;
61     uint8_t lbprz;
62     uint8_t has_write_same;
63     struct scsi_inquiry_logical_block_provisioning lbp;
64     struct scsi_inquiry_block_limits bl;
65     unsigned char *zeroblock;
66     unsigned long *allocationmap;
67     int cluster_sectors;
68     bool use_16_for_rw;
69 } IscsiLun;
70 
71 typedef struct IscsiTask {
72     int status;
73     int complete;
74     int retries;
75     int do_retry;
76     struct scsi_task *task;
77     Coroutine *co;
78     QEMUBH *bh;
79     IscsiLun *iscsilun;
80     QEMUTimer retry_timer;
81 } IscsiTask;
82 
83 typedef struct IscsiAIOCB {
84     BlockDriverAIOCB common;
85     QEMUIOVector *qiov;
86     QEMUBH *bh;
87     IscsiLun *iscsilun;
88     struct scsi_task *task;
89     uint8_t *buf;
90     int status;
91     int canceled;
92     int64_t sector_num;
93     int nb_sectors;
94 #ifdef __linux__
95     sg_io_hdr_t *ioh;
96 #endif
97 } IscsiAIOCB;
98 
99 #define NOP_INTERVAL 5000
100 #define MAX_NOP_FAILURES 3
101 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
102 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
103 
104 /* this threshold is a trade-off knob to choose between
105  * the potential additional overhead of an extra GET_LBA_STATUS request
106  * vs. unnecessarily reading a lot of zero sectors over the wire.
107  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
108  * sectors we check the allocation status of the area covered by the
109  * request first if the allocationmap indicates that the area might be
110  * unallocated. */
111 #define ISCSI_CHECKALLOC_THRES 64
112 
113 static void
114 iscsi_bh_cb(void *p)
115 {
116     IscsiAIOCB *acb = p;
117 
118     qemu_bh_delete(acb->bh);
119 
120     g_free(acb->buf);
121     acb->buf = NULL;
122 
123     if (acb->canceled == 0) {
124         acb->common.cb(acb->common.opaque, acb->status);
125     }
126 
127     if (acb->task != NULL) {
128         scsi_free_scsi_task(acb->task);
129         acb->task = NULL;
130     }
131 
132     qemu_aio_release(acb);
133 }
134 
135 static void
136 iscsi_schedule_bh(IscsiAIOCB *acb)
137 {
138     if (acb->bh) {
139         return;
140     }
141     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
142     qemu_bh_schedule(acb->bh);
143 }
144 
145 static void iscsi_co_generic_bh_cb(void *opaque)
146 {
147     struct IscsiTask *iTask = opaque;
148     iTask->complete = 1;
149     qemu_bh_delete(iTask->bh);
150     qemu_coroutine_enter(iTask->co, NULL);
151 }
152 
153 static void iscsi_retry_timer_expired(void *opaque)
154 {
155     struct IscsiTask *iTask = opaque;
156     iTask->complete = 1;
157     if (iTask->co) {
158         qemu_coroutine_enter(iTask->co, NULL);
159     }
160 }
161 
162 static inline unsigned exp_random(double mean)
163 {
164     return -mean * log((double)rand() / RAND_MAX);
165 }
166 
167 static void
168 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
169                         void *command_data, void *opaque)
170 {
171     struct IscsiTask *iTask = opaque;
172     struct scsi_task *task = command_data;
173 
174     iTask->status = status;
175     iTask->do_retry = 0;
176     iTask->task = task;
177 
178     if (status != SCSI_STATUS_GOOD) {
179         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
180             if (status == SCSI_STATUS_CHECK_CONDITION
181                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
182                 error_report("iSCSI CheckCondition: %s",
183                              iscsi_get_error(iscsi));
184                 iTask->do_retry = 1;
185                 goto out;
186             }
187             if (status == SCSI_STATUS_BUSY) {
188                 unsigned retry_time =
189                     exp_random(iscsi_retry_times[iTask->retries - 1]);
190                 error_report("iSCSI Busy (retry #%u in %u ms): %s",
191                              iTask->retries, retry_time,
192                              iscsi_get_error(iscsi));
193                 aio_timer_init(iTask->iscsilun->aio_context,
194                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
195                                SCALE_MS, iscsi_retry_timer_expired, iTask);
196                 timer_mod(&iTask->retry_timer,
197                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
198                 iTask->do_retry = 1;
199                 return;
200             }
201         }
202         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
203     }
204 
205 out:
206     if (iTask->co) {
207         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
208                                iscsi_co_generic_bh_cb, iTask);
209         qemu_bh_schedule(iTask->bh);
210     } else {
211         iTask->complete = 1;
212     }
213 }
214 
215 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
216 {
217     *iTask = (struct IscsiTask) {
218         .co         = qemu_coroutine_self(),
219         .iscsilun   = iscsilun,
220     };
221 }
222 
223 static void
224 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
225                     void *private_data)
226 {
227     IscsiAIOCB *acb = private_data;
228 
229     acb->status = -ECANCELED;
230     iscsi_schedule_bh(acb);
231 }
232 
233 static void
234 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
235 {
236     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
237     IscsiLun *iscsilun = acb->iscsilun;
238 
239     if (acb->status != -EINPROGRESS) {
240         return;
241     }
242 
243     acb->canceled = 1;
244 
245     /* send a task mgmt call to the target to cancel the task on the target */
246     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
247                                      iscsi_abort_task_cb, acb);
248 
249     while (acb->status == -EINPROGRESS) {
250         aio_poll(iscsilun->aio_context, true);
251     }
252 }
253 
254 static const AIOCBInfo iscsi_aiocb_info = {
255     .aiocb_size         = sizeof(IscsiAIOCB),
256     .cancel             = iscsi_aio_cancel,
257 };
258 
259 
260 static void iscsi_process_read(void *arg);
261 static void iscsi_process_write(void *arg);
262 
263 static void
264 iscsi_set_events(IscsiLun *iscsilun)
265 {
266     struct iscsi_context *iscsi = iscsilun->iscsi;
267     int ev;
268 
269     /* We always register a read handler.  */
270     ev = POLLIN;
271     ev |= iscsi_which_events(iscsi);
272     if (ev != iscsilun->events) {
273         aio_set_fd_handler(iscsilun->aio_context,
274                            iscsi_get_fd(iscsi),
275                            iscsi_process_read,
276                            (ev & POLLOUT) ? iscsi_process_write : NULL,
277                            iscsilun);
278 
279     }
280 
281     iscsilun->events = ev;
282 }
283 
284 static void
285 iscsi_process_read(void *arg)
286 {
287     IscsiLun *iscsilun = arg;
288     struct iscsi_context *iscsi = iscsilun->iscsi;
289 
290     iscsi_service(iscsi, POLLIN);
291     iscsi_set_events(iscsilun);
292 }
293 
294 static void
295 iscsi_process_write(void *arg)
296 {
297     IscsiLun *iscsilun = arg;
298     struct iscsi_context *iscsi = iscsilun->iscsi;
299 
300     iscsi_service(iscsi, POLLOUT);
301     iscsi_set_events(iscsilun);
302 }
303 
304 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
305 {
306     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
307 }
308 
309 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
310 {
311     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
312 }
313 
314 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
315                                       IscsiLun *iscsilun)
316 {
317     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
318         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
319             error_report("iSCSI misaligned request: "
320                          "iscsilun->block_size %u, sector_num %" PRIi64
321                          ", nb_sectors %d",
322                          iscsilun->block_size, sector_num, nb_sectors);
323             return 0;
324     }
325     return 1;
326 }
327 
328 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
329                                     int nb_sectors)
330 {
331     if (iscsilun->allocationmap == NULL) {
332         return;
333     }
334     bitmap_set(iscsilun->allocationmap,
335                sector_num / iscsilun->cluster_sectors,
336                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
337 }
338 
339 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
340                                       int nb_sectors)
341 {
342     int64_t cluster_num, nb_clusters;
343     if (iscsilun->allocationmap == NULL) {
344         return;
345     }
346     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
347     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
348                   - cluster_num;
349     if (nb_clusters > 0) {
350         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
351     }
352 }
353 
354 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
355                                         int64_t sector_num, int nb_sectors,
356                                         QEMUIOVector *iov)
357 {
358     IscsiLun *iscsilun = bs->opaque;
359     struct IscsiTask iTask;
360     uint64_t lba;
361     uint32_t num_sectors;
362 
363     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
364         return -EINVAL;
365     }
366 
367     lba = sector_qemu2lun(sector_num, iscsilun);
368     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
369     iscsi_co_init_iscsitask(iscsilun, &iTask);
370 retry:
371     if (iscsilun->use_16_for_rw) {
372         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
373                                         NULL, num_sectors * iscsilun->block_size,
374                                         iscsilun->block_size, 0, 0, 0, 0, 0,
375                                         iscsi_co_generic_cb, &iTask);
376     } else {
377         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
378                                         NULL, num_sectors * iscsilun->block_size,
379                                         iscsilun->block_size, 0, 0, 0, 0, 0,
380                                         iscsi_co_generic_cb, &iTask);
381     }
382     if (iTask.task == NULL) {
383         return -ENOMEM;
384     }
385     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
386                           iov->niov);
387     while (!iTask.complete) {
388         iscsi_set_events(iscsilun);
389         qemu_coroutine_yield();
390     }
391 
392     if (iTask.task != NULL) {
393         scsi_free_scsi_task(iTask.task);
394         iTask.task = NULL;
395     }
396 
397     if (iTask.do_retry) {
398         iTask.complete = 0;
399         goto retry;
400     }
401 
402     if (iTask.status != SCSI_STATUS_GOOD) {
403         return -EIO;
404     }
405 
406     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
407 
408     return 0;
409 }
410 
411 
412 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
413                                              int64_t sector_num, int nb_sectors)
414 {
415     unsigned long size;
416     if (iscsilun->allocationmap == NULL) {
417         return true;
418     }
419     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
420     return !(find_next_bit(iscsilun->allocationmap, size,
421                            sector_num / iscsilun->cluster_sectors) == size);
422 }
423 
424 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
425                                                   int64_t sector_num,
426                                                   int nb_sectors, int *pnum)
427 {
428     IscsiLun *iscsilun = bs->opaque;
429     struct scsi_get_lba_status *lbas = NULL;
430     struct scsi_lba_status_descriptor *lbasd = NULL;
431     struct IscsiTask iTask;
432     int64_t ret;
433 
434     iscsi_co_init_iscsitask(iscsilun, &iTask);
435 
436     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
437         ret = -EINVAL;
438         goto out;
439     }
440 
441     /* default to all sectors allocated */
442     ret = BDRV_BLOCK_DATA;
443     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
444     *pnum = nb_sectors;
445 
446     /* LUN does not support logical block provisioning */
447     if (iscsilun->lbpme == 0) {
448         goto out;
449     }
450 
451 retry:
452     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
453                                   sector_qemu2lun(sector_num, iscsilun),
454                                   8 + 16, iscsi_co_generic_cb,
455                                   &iTask) == NULL) {
456         ret = -ENOMEM;
457         goto out;
458     }
459 
460     while (!iTask.complete) {
461         iscsi_set_events(iscsilun);
462         qemu_coroutine_yield();
463     }
464 
465     if (iTask.do_retry) {
466         if (iTask.task != NULL) {
467             scsi_free_scsi_task(iTask.task);
468             iTask.task = NULL;
469         }
470         iTask.complete = 0;
471         goto retry;
472     }
473 
474     if (iTask.status != SCSI_STATUS_GOOD) {
475         /* in case the get_lba_status_callout fails (i.e.
476          * because the device is busy or the cmd is not
477          * supported) we pretend all blocks are allocated
478          * for backwards compatibility */
479         goto out;
480     }
481 
482     lbas = scsi_datain_unmarshall(iTask.task);
483     if (lbas == NULL) {
484         ret = -EIO;
485         goto out;
486     }
487 
488     lbasd = &lbas->descriptors[0];
489 
490     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
491         ret = -EIO;
492         goto out;
493     }
494 
495     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
496 
497     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
498         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
499         ret &= ~BDRV_BLOCK_DATA;
500         if (iscsilun->lbprz) {
501             ret |= BDRV_BLOCK_ZERO;
502         }
503     }
504 
505     if (ret & BDRV_BLOCK_ZERO) {
506         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
507     } else {
508         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
509     }
510 
511     if (*pnum > nb_sectors) {
512         *pnum = nb_sectors;
513     }
514 out:
515     if (iTask.task != NULL) {
516         scsi_free_scsi_task(iTask.task);
517     }
518     return ret;
519 }
520 
521 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
522                                        int64_t sector_num, int nb_sectors,
523                                        QEMUIOVector *iov)
524 {
525     IscsiLun *iscsilun = bs->opaque;
526     struct IscsiTask iTask;
527     uint64_t lba;
528     uint32_t num_sectors;
529 
530     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
531         return -EINVAL;
532     }
533 
534     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
535         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
536         int64_t ret;
537         int pnum;
538         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
539         if (ret < 0) {
540             return ret;
541         }
542         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
543             qemu_iovec_memset(iov, 0, 0x00, iov->size);
544             return 0;
545         }
546     }
547 
548     lba = sector_qemu2lun(sector_num, iscsilun);
549     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
550 
551     iscsi_co_init_iscsitask(iscsilun, &iTask);
552 retry:
553     if (iscsilun->use_16_for_rw) {
554         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
555                                        num_sectors * iscsilun->block_size,
556                                        iscsilun->block_size, 0, 0, 0, 0, 0,
557                                        iscsi_co_generic_cb, &iTask);
558     } else {
559         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
560                                        num_sectors * iscsilun->block_size,
561                                        iscsilun->block_size,
562                                        0, 0, 0, 0, 0,
563                                        iscsi_co_generic_cb, &iTask);
564     }
565     if (iTask.task == NULL) {
566         return -ENOMEM;
567     }
568     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
569 
570     while (!iTask.complete) {
571         iscsi_set_events(iscsilun);
572         qemu_coroutine_yield();
573     }
574 
575     if (iTask.task != NULL) {
576         scsi_free_scsi_task(iTask.task);
577         iTask.task = NULL;
578     }
579 
580     if (iTask.do_retry) {
581         iTask.complete = 0;
582         goto retry;
583     }
584 
585     if (iTask.status != SCSI_STATUS_GOOD) {
586         return -EIO;
587     }
588 
589     return 0;
590 }
591 
592 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
593 {
594     IscsiLun *iscsilun = bs->opaque;
595     struct IscsiTask iTask;
596 
597     if (bs->sg) {
598         return 0;
599     }
600 
601     iscsi_co_init_iscsitask(iscsilun, &iTask);
602 
603 retry:
604     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
605                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
606         return -ENOMEM;
607     }
608 
609     while (!iTask.complete) {
610         iscsi_set_events(iscsilun);
611         qemu_coroutine_yield();
612     }
613 
614     if (iTask.task != NULL) {
615         scsi_free_scsi_task(iTask.task);
616         iTask.task = NULL;
617     }
618 
619     if (iTask.do_retry) {
620         iTask.complete = 0;
621         goto retry;
622     }
623 
624     if (iTask.status != SCSI_STATUS_GOOD) {
625         return -EIO;
626     }
627 
628     return 0;
629 }
630 
631 #ifdef __linux__
632 static void
633 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
634                      void *command_data, void *opaque)
635 {
636     IscsiAIOCB *acb = opaque;
637 
638     g_free(acb->buf);
639     acb->buf = NULL;
640 
641     if (acb->canceled != 0) {
642         return;
643     }
644 
645     acb->status = 0;
646     if (status < 0) {
647         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
648                      iscsi_get_error(iscsi));
649         acb->status = -EIO;
650     }
651 
652     acb->ioh->driver_status = 0;
653     acb->ioh->host_status   = 0;
654     acb->ioh->resid         = 0;
655 
656 #define SG_ERR_DRIVER_SENSE    0x08
657 
658     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
659         int ss;
660 
661         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
662 
663         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
664         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
665              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
666         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
667     }
668 
669     iscsi_schedule_bh(acb);
670 }
671 
672 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
673         unsigned long int req, void *buf,
674         BlockDriverCompletionFunc *cb, void *opaque)
675 {
676     IscsiLun *iscsilun = bs->opaque;
677     struct iscsi_context *iscsi = iscsilun->iscsi;
678     struct iscsi_data data;
679     IscsiAIOCB *acb;
680 
681     assert(req == SG_IO);
682 
683     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
684 
685     acb->iscsilun = iscsilun;
686     acb->canceled    = 0;
687     acb->bh          = NULL;
688     acb->status      = -EINPROGRESS;
689     acb->buf         = NULL;
690     acb->ioh         = buf;
691 
692     acb->task = malloc(sizeof(struct scsi_task));
693     if (acb->task == NULL) {
694         error_report("iSCSI: Failed to allocate task for scsi command. %s",
695                      iscsi_get_error(iscsi));
696         qemu_aio_release(acb);
697         return NULL;
698     }
699     memset(acb->task, 0, sizeof(struct scsi_task));
700 
701     switch (acb->ioh->dxfer_direction) {
702     case SG_DXFER_TO_DEV:
703         acb->task->xfer_dir = SCSI_XFER_WRITE;
704         break;
705     case SG_DXFER_FROM_DEV:
706         acb->task->xfer_dir = SCSI_XFER_READ;
707         break;
708     default:
709         acb->task->xfer_dir = SCSI_XFER_NONE;
710         break;
711     }
712 
713     acb->task->cdb_size = acb->ioh->cmd_len;
714     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
715     acb->task->expxferlen = acb->ioh->dxfer_len;
716 
717     data.size = 0;
718     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
719         if (acb->ioh->iovec_count == 0) {
720             data.data = acb->ioh->dxferp;
721             data.size = acb->ioh->dxfer_len;
722         } else {
723             scsi_task_set_iov_out(acb->task,
724                                  (struct scsi_iovec *) acb->ioh->dxferp,
725                                  acb->ioh->iovec_count);
726         }
727     }
728 
729     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
730                                  iscsi_aio_ioctl_cb,
731                                  (data.size > 0) ? &data : NULL,
732                                  acb) != 0) {
733         scsi_free_scsi_task(acb->task);
734         qemu_aio_release(acb);
735         return NULL;
736     }
737 
738     /* tell libiscsi to read straight into the buffer we got from ioctl */
739     if (acb->task->xfer_dir == SCSI_XFER_READ) {
740         if (acb->ioh->iovec_count == 0) {
741             scsi_task_add_data_in_buffer(acb->task,
742                                          acb->ioh->dxfer_len,
743                                          acb->ioh->dxferp);
744         } else {
745             scsi_task_set_iov_in(acb->task,
746                                  (struct scsi_iovec *) acb->ioh->dxferp,
747                                  acb->ioh->iovec_count);
748         }
749     }
750 
751     iscsi_set_events(iscsilun);
752 
753     return &acb->common;
754 }
755 
756 static void ioctl_cb(void *opaque, int status)
757 {
758     int *p_status = opaque;
759     *p_status = status;
760 }
761 
762 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
763 {
764     IscsiLun *iscsilun = bs->opaque;
765     int status;
766 
767     switch (req) {
768     case SG_GET_VERSION_NUM:
769         *(int *)buf = 30000;
770         break;
771     case SG_GET_SCSI_ID:
772         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
773         break;
774     case SG_IO:
775         status = -EINPROGRESS;
776         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
777 
778         while (status == -EINPROGRESS) {
779             aio_poll(iscsilun->aio_context, true);
780         }
781 
782         return 0;
783     default:
784         return -1;
785     }
786     return 0;
787 }
788 #endif
789 
790 static int64_t
791 iscsi_getlength(BlockDriverState *bs)
792 {
793     IscsiLun *iscsilun = bs->opaque;
794     int64_t len;
795 
796     len  = iscsilun->num_blocks;
797     len *= iscsilun->block_size;
798 
799     return len;
800 }
801 
802 static int
803 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
804                                    int nb_sectors)
805 {
806     IscsiLun *iscsilun = bs->opaque;
807     struct IscsiTask iTask;
808     struct unmap_list list;
809 
810     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
811         return -EINVAL;
812     }
813 
814     if (!iscsilun->lbp.lbpu) {
815         /* UNMAP is not supported by the target */
816         return 0;
817     }
818 
819     list.lba = sector_qemu2lun(sector_num, iscsilun);
820     list.num = sector_qemu2lun(nb_sectors, iscsilun);
821 
822     iscsi_co_init_iscsitask(iscsilun, &iTask);
823 retry:
824     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
825                      iscsi_co_generic_cb, &iTask) == NULL) {
826         return -ENOMEM;
827     }
828 
829     while (!iTask.complete) {
830         iscsi_set_events(iscsilun);
831         qemu_coroutine_yield();
832     }
833 
834     if (iTask.task != NULL) {
835         scsi_free_scsi_task(iTask.task);
836         iTask.task = NULL;
837     }
838 
839     if (iTask.do_retry) {
840         iTask.complete = 0;
841         goto retry;
842     }
843 
844     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
845         /* the target might fail with a check condition if it
846            is not happy with the alignment of the UNMAP request
847            we silently fail in this case */
848         return 0;
849     }
850 
851     if (iTask.status != SCSI_STATUS_GOOD) {
852         return -EIO;
853     }
854 
855     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
856 
857     return 0;
858 }
859 
860 static int
861 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
862                                    int nb_sectors, BdrvRequestFlags flags)
863 {
864     IscsiLun *iscsilun = bs->opaque;
865     struct IscsiTask iTask;
866     uint64_t lba;
867     uint32_t nb_blocks;
868     bool use_16_for_ws = iscsilun->use_16_for_rw;
869 
870     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
871         return -EINVAL;
872     }
873 
874     if (flags & BDRV_REQ_MAY_UNMAP) {
875         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
876             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
877             use_16_for_ws = true;
878         }
879         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
880             /* WRITESAME16 with UNMAP is not supported by the target,
881              * fall back and try WRITESAME10/16 without UNMAP */
882             flags &= ~BDRV_REQ_MAY_UNMAP;
883             use_16_for_ws = iscsilun->use_16_for_rw;
884         }
885     }
886 
887     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
888         /* WRITESAME without UNMAP is not supported by the target */
889         return -ENOTSUP;
890     }
891 
892     lba = sector_qemu2lun(sector_num, iscsilun);
893     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
894 
895     if (iscsilun->zeroblock == NULL) {
896         iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
897     }
898 
899     iscsi_co_init_iscsitask(iscsilun, &iTask);
900 retry:
901     if (use_16_for_ws) {
902         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
903                                             iscsilun->zeroblock, iscsilun->block_size,
904                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
905                                             0, 0, iscsi_co_generic_cb, &iTask);
906     } else {
907         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
908                                             iscsilun->zeroblock, iscsilun->block_size,
909                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
910                                             0, 0, iscsi_co_generic_cb, &iTask);
911     }
912     if (iTask.task == NULL) {
913         return -ENOMEM;
914     }
915 
916     while (!iTask.complete) {
917         iscsi_set_events(iscsilun);
918         qemu_coroutine_yield();
919     }
920 
921     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
922         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
923         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
924          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
925         /* WRITE SAME is not supported by the target */
926         iscsilun->has_write_same = false;
927         scsi_free_scsi_task(iTask.task);
928         return -ENOTSUP;
929     }
930 
931     if (iTask.task != NULL) {
932         scsi_free_scsi_task(iTask.task);
933         iTask.task = NULL;
934     }
935 
936     if (iTask.do_retry) {
937         iTask.complete = 0;
938         goto retry;
939     }
940 
941     if (iTask.status != SCSI_STATUS_GOOD) {
942         return -EIO;
943     }
944 
945     if (flags & BDRV_REQ_MAY_UNMAP) {
946         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
947     } else {
948         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
949     }
950 
951     return 0;
952 }
953 
954 static void parse_chap(struct iscsi_context *iscsi, const char *target,
955                        Error **errp)
956 {
957     QemuOptsList *list;
958     QemuOpts *opts;
959     const char *user = NULL;
960     const char *password = NULL;
961 
962     list = qemu_find_opts("iscsi");
963     if (!list) {
964         return;
965     }
966 
967     opts = qemu_opts_find(list, target);
968     if (opts == NULL) {
969         opts = QTAILQ_FIRST(&list->head);
970         if (!opts) {
971             return;
972         }
973     }
974 
975     user = qemu_opt_get(opts, "user");
976     if (!user) {
977         return;
978     }
979 
980     password = qemu_opt_get(opts, "password");
981     if (!password) {
982         error_setg(errp, "CHAP username specified but no password was given");
983         return;
984     }
985 
986     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
987         error_setg(errp, "Failed to set initiator username and password");
988     }
989 }
990 
991 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
992                                 Error **errp)
993 {
994     QemuOptsList *list;
995     QemuOpts *opts;
996     const char *digest = NULL;
997 
998     list = qemu_find_opts("iscsi");
999     if (!list) {
1000         return;
1001     }
1002 
1003     opts = qemu_opts_find(list, target);
1004     if (opts == NULL) {
1005         opts = QTAILQ_FIRST(&list->head);
1006         if (!opts) {
1007             return;
1008         }
1009     }
1010 
1011     digest = qemu_opt_get(opts, "header-digest");
1012     if (!digest) {
1013         return;
1014     }
1015 
1016     if (!strcmp(digest, "CRC32C")) {
1017         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1018     } else if (!strcmp(digest, "NONE")) {
1019         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1020     } else if (!strcmp(digest, "CRC32C-NONE")) {
1021         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1022     } else if (!strcmp(digest, "NONE-CRC32C")) {
1023         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1024     } else {
1025         error_setg(errp, "Invalid header-digest setting : %s", digest);
1026     }
1027 }
1028 
1029 static char *parse_initiator_name(const char *target)
1030 {
1031     QemuOptsList *list;
1032     QemuOpts *opts;
1033     const char *name;
1034     char *iscsi_name;
1035     UuidInfo *uuid_info;
1036 
1037     list = qemu_find_opts("iscsi");
1038     if (list) {
1039         opts = qemu_opts_find(list, target);
1040         if (!opts) {
1041             opts = QTAILQ_FIRST(&list->head);
1042         }
1043         if (opts) {
1044             name = qemu_opt_get(opts, "initiator-name");
1045             if (name) {
1046                 return g_strdup(name);
1047             }
1048         }
1049     }
1050 
1051     uuid_info = qmp_query_uuid(NULL);
1052     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1053         name = qemu_get_vm_name();
1054     } else {
1055         name = uuid_info->UUID;
1056     }
1057     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1058                                  name ? ":" : "", name ? name : "");
1059     qapi_free_UuidInfo(uuid_info);
1060     return iscsi_name;
1061 }
1062 
1063 static void iscsi_nop_timed_event(void *opaque)
1064 {
1065     IscsiLun *iscsilun = opaque;
1066 
1067     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1068         error_report("iSCSI: NOP timeout. Reconnecting...");
1069         iscsi_reconnect(iscsilun->iscsi);
1070     }
1071 
1072     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1073         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1074         return;
1075     }
1076 
1077     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1078     iscsi_set_events(iscsilun);
1079 }
1080 
1081 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1082 {
1083     struct scsi_task *task = NULL;
1084     struct scsi_readcapacity10 *rc10 = NULL;
1085     struct scsi_readcapacity16 *rc16 = NULL;
1086     int retries = ISCSI_CMD_RETRIES;
1087 
1088     do {
1089         if (task != NULL) {
1090             scsi_free_scsi_task(task);
1091             task = NULL;
1092         }
1093 
1094         switch (iscsilun->type) {
1095         case TYPE_DISK:
1096             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1097             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1098                 rc16 = scsi_datain_unmarshall(task);
1099                 if (rc16 == NULL) {
1100                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1101                 } else {
1102                     iscsilun->block_size = rc16->block_length;
1103                     iscsilun->num_blocks = rc16->returned_lba + 1;
1104                     iscsilun->lbpme = rc16->lbpme;
1105                     iscsilun->lbprz = rc16->lbprz;
1106                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1107                 }
1108             }
1109             break;
1110         case TYPE_ROM:
1111             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1112             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1113                 rc10 = scsi_datain_unmarshall(task);
1114                 if (rc10 == NULL) {
1115                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1116                 } else {
1117                     iscsilun->block_size = rc10->block_size;
1118                     if (rc10->lba == 0) {
1119                         /* blank disk loaded */
1120                         iscsilun->num_blocks = 0;
1121                     } else {
1122                         iscsilun->num_blocks = rc10->lba + 1;
1123                     }
1124                 }
1125             }
1126             break;
1127         default:
1128             return;
1129         }
1130     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1131              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1132              && retries-- > 0);
1133 
1134     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1135         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1136     }
1137     if (task) {
1138         scsi_free_scsi_task(task);
1139     }
1140 }
1141 
1142 /* TODO Convert to fine grained options */
1143 static QemuOptsList runtime_opts = {
1144     .name = "iscsi",
1145     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1146     .desc = {
1147         {
1148             .name = "filename",
1149             .type = QEMU_OPT_STRING,
1150             .help = "URL to the iscsi image",
1151         },
1152         { /* end of list */ }
1153     },
1154 };
1155 
1156 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1157                                           int evpd, int pc, void **inq, Error **errp)
1158 {
1159     int full_size;
1160     struct scsi_task *task = NULL;
1161     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1162     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1163         goto fail;
1164     }
1165     full_size = scsi_datain_getfullsize(task);
1166     if (full_size > task->datain.size) {
1167         scsi_free_scsi_task(task);
1168 
1169         /* we need more data for the full list */
1170         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1171         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1172             goto fail;
1173         }
1174     }
1175 
1176     *inq = scsi_datain_unmarshall(task);
1177     if (*inq == NULL) {
1178         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1179         goto fail_with_err;
1180     }
1181 
1182     return task;
1183 
1184 fail:
1185     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1186                iscsi_get_error(iscsi));
1187 fail_with_err:
1188     if (task != NULL) {
1189         scsi_free_scsi_task(task);
1190     }
1191     return NULL;
1192 }
1193 
1194 static void iscsi_detach_aio_context(BlockDriverState *bs)
1195 {
1196     IscsiLun *iscsilun = bs->opaque;
1197 
1198     aio_set_fd_handler(iscsilun->aio_context,
1199                        iscsi_get_fd(iscsilun->iscsi),
1200                        NULL, NULL, NULL);
1201     iscsilun->events = 0;
1202 
1203     if (iscsilun->nop_timer) {
1204         timer_del(iscsilun->nop_timer);
1205         timer_free(iscsilun->nop_timer);
1206         iscsilun->nop_timer = NULL;
1207     }
1208 }
1209 
1210 static void iscsi_attach_aio_context(BlockDriverState *bs,
1211                                      AioContext *new_context)
1212 {
1213     IscsiLun *iscsilun = bs->opaque;
1214 
1215     iscsilun->aio_context = new_context;
1216     iscsi_set_events(iscsilun);
1217 
1218     /* Set up a timer for sending out iSCSI NOPs */
1219     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1220                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1221                                         iscsi_nop_timed_event, iscsilun);
1222     timer_mod(iscsilun->nop_timer,
1223               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1224 }
1225 
1226 /*
1227  * We support iscsi url's on the form
1228  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1229  *
1230  * Note: flags are currently not used by iscsi_open.  If this function
1231  * is changed such that flags are used, please examine iscsi_reopen_prepare()
1232  * to see if needs to be changed as well.
1233  */
1234 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1235                       Error **errp)
1236 {
1237     IscsiLun *iscsilun = bs->opaque;
1238     struct iscsi_context *iscsi = NULL;
1239     struct iscsi_url *iscsi_url = NULL;
1240     struct scsi_task *task = NULL;
1241     struct scsi_inquiry_standard *inq = NULL;
1242     struct scsi_inquiry_supported_pages *inq_vpd;
1243     char *initiator_name = NULL;
1244     QemuOpts *opts;
1245     Error *local_err = NULL;
1246     const char *filename;
1247     int i, ret;
1248 
1249     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1250         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1251                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1252                    "of 512", BDRV_SECTOR_SIZE);
1253         return -EINVAL;
1254     }
1255 
1256     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1257     qemu_opts_absorb_qdict(opts, options, &local_err);
1258     if (local_err) {
1259         error_propagate(errp, local_err);
1260         ret = -EINVAL;
1261         goto out;
1262     }
1263 
1264     filename = qemu_opt_get(opts, "filename");
1265 
1266     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1267     if (iscsi_url == NULL) {
1268         error_setg(errp, "Failed to parse URL : %s", filename);
1269         ret = -EINVAL;
1270         goto out;
1271     }
1272 
1273     memset(iscsilun, 0, sizeof(IscsiLun));
1274 
1275     initiator_name = parse_initiator_name(iscsi_url->target);
1276 
1277     iscsi = iscsi_create_context(initiator_name);
1278     if (iscsi == NULL) {
1279         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1280         ret = -ENOMEM;
1281         goto out;
1282     }
1283 
1284     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1285         error_setg(errp, "iSCSI: Failed to set target name.");
1286         ret = -EINVAL;
1287         goto out;
1288     }
1289 
1290     if (iscsi_url->user != NULL) {
1291         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1292                                               iscsi_url->passwd);
1293         if (ret != 0) {
1294             error_setg(errp, "Failed to set initiator username and password");
1295             ret = -EINVAL;
1296             goto out;
1297         }
1298     }
1299 
1300     /* check if we got CHAP username/password via the options */
1301     parse_chap(iscsi, iscsi_url->target, &local_err);
1302     if (local_err != NULL) {
1303         error_propagate(errp, local_err);
1304         ret = -EINVAL;
1305         goto out;
1306     }
1307 
1308     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1309         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1310         ret = -EINVAL;
1311         goto out;
1312     }
1313 
1314     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1315 
1316     /* check if we got HEADER_DIGEST via the options */
1317     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1318     if (local_err != NULL) {
1319         error_propagate(errp, local_err);
1320         ret = -EINVAL;
1321         goto out;
1322     }
1323 
1324     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1325         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1326             iscsi_get_error(iscsi));
1327         ret = -EINVAL;
1328         goto out;
1329     }
1330 
1331     iscsilun->iscsi = iscsi;
1332     iscsilun->aio_context = bdrv_get_aio_context(bs);
1333     iscsilun->lun   = iscsi_url->lun;
1334     iscsilun->has_write_same = true;
1335 
1336     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1337                             (void **) &inq, errp);
1338     if (task == NULL) {
1339         ret = -EINVAL;
1340         goto out;
1341     }
1342     iscsilun->type = inq->periperal_device_type;
1343     scsi_free_scsi_task(task);
1344     task = NULL;
1345 
1346     iscsi_readcapacity_sync(iscsilun, &local_err);
1347     if (local_err != NULL) {
1348         error_propagate(errp, local_err);
1349         ret = -EINVAL;
1350         goto out;
1351     }
1352     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1353     bs->request_alignment = iscsilun->block_size;
1354 
1355     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1356      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1357      * will try to read from the device to guess the image format.
1358      */
1359     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1360         bs->sg = 1;
1361     }
1362 
1363     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1364                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1365                             (void **) &inq_vpd, errp);
1366     if (task == NULL) {
1367         ret = -EINVAL;
1368         goto out;
1369     }
1370     for (i = 0; i < inq_vpd->num_pages; i++) {
1371         struct scsi_task *inq_task;
1372         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1373         struct scsi_inquiry_block_limits *inq_bl;
1374         switch (inq_vpd->pages[i]) {
1375         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1376             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1377                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1378                                         (void **) &inq_lbp, errp);
1379             if (inq_task == NULL) {
1380                 ret = -EINVAL;
1381                 goto out;
1382             }
1383             memcpy(&iscsilun->lbp, inq_lbp,
1384                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1385             scsi_free_scsi_task(inq_task);
1386             break;
1387         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1388             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1389                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1390                                     (void **) &inq_bl, errp);
1391             if (inq_task == NULL) {
1392                 ret = -EINVAL;
1393                 goto out;
1394             }
1395             memcpy(&iscsilun->bl, inq_bl,
1396                    sizeof(struct scsi_inquiry_block_limits));
1397             scsi_free_scsi_task(inq_task);
1398             break;
1399         default:
1400             break;
1401         }
1402     }
1403     scsi_free_scsi_task(task);
1404     task = NULL;
1405 
1406     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1407 
1408     /* Guess the internal cluster (page) size of the iscsi target by the means
1409      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1410      * reasonable size */
1411     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1412         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1413         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1414                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1415         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1416             iscsilun->allocationmap =
1417                 bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1418                                         iscsilun->cluster_sectors));
1419         }
1420     }
1421 
1422 out:
1423     qemu_opts_del(opts);
1424     g_free(initiator_name);
1425     if (iscsi_url != NULL) {
1426         iscsi_destroy_url(iscsi_url);
1427     }
1428     if (task != NULL) {
1429         scsi_free_scsi_task(task);
1430     }
1431 
1432     if (ret) {
1433         if (iscsi != NULL) {
1434             iscsi_destroy_context(iscsi);
1435         }
1436         memset(iscsilun, 0, sizeof(IscsiLun));
1437     }
1438     return ret;
1439 }
1440 
1441 static void iscsi_close(BlockDriverState *bs)
1442 {
1443     IscsiLun *iscsilun = bs->opaque;
1444     struct iscsi_context *iscsi = iscsilun->iscsi;
1445 
1446     iscsi_detach_aio_context(bs);
1447     iscsi_destroy_context(iscsi);
1448     g_free(iscsilun->zeroblock);
1449     g_free(iscsilun->allocationmap);
1450     memset(iscsilun, 0, sizeof(IscsiLun));
1451 }
1452 
1453 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1454 {
1455     IscsiLun *iscsilun = bs->opaque;
1456 
1457     /* We don't actually refresh here, but just return data queried in
1458      * iscsi_open(): iscsi targets don't change their limits. */
1459     if (iscsilun->lbp.lbpu) {
1460         if (iscsilun->bl.max_unmap < 0xffffffff) {
1461             bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
1462                                                  iscsilun);
1463         }
1464         bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1465                                                    iscsilun);
1466     }
1467 
1468     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1469         bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
1470                                                   iscsilun);
1471     }
1472     if (iscsilun->lbp.lbpws) {
1473         bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1474                                                         iscsilun);
1475     }
1476     bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
1477                                                  iscsilun);
1478 }
1479 
1480 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
1481  * prepare.  Note that this will not re-establish a connection with an iSCSI
1482  * target - it is effectively a NOP.  */
1483 static int iscsi_reopen_prepare(BDRVReopenState *state,
1484                                 BlockReopenQueue *queue, Error **errp)
1485 {
1486     /* NOP */
1487     return 0;
1488 }
1489 
1490 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1491 {
1492     IscsiLun *iscsilun = bs->opaque;
1493     Error *local_err = NULL;
1494 
1495     if (iscsilun->type != TYPE_DISK) {
1496         return -ENOTSUP;
1497     }
1498 
1499     iscsi_readcapacity_sync(iscsilun, &local_err);
1500     if (local_err != NULL) {
1501         error_free(local_err);
1502         return -EIO;
1503     }
1504 
1505     if (offset > iscsi_getlength(bs)) {
1506         return -EINVAL;
1507     }
1508 
1509     if (iscsilun->allocationmap != NULL) {
1510         g_free(iscsilun->allocationmap);
1511         iscsilun->allocationmap =
1512             bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1513                                     iscsilun->cluster_sectors));
1514     }
1515 
1516     return 0;
1517 }
1518 
1519 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1520 {
1521     int ret = 0;
1522     int64_t total_size = 0;
1523     BlockDriverState *bs;
1524     IscsiLun *iscsilun = NULL;
1525     QDict *bs_options;
1526 
1527     bs = bdrv_new("", &error_abort);
1528 
1529     /* Read out options */
1530     total_size =
1531         qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
1532     bs->opaque = g_malloc0(sizeof(struct IscsiLun));
1533     iscsilun = bs->opaque;
1534 
1535     bs_options = qdict_new();
1536     qdict_put(bs_options, "filename", qstring_from_str(filename));
1537     ret = iscsi_open(bs, bs_options, 0, NULL);
1538     QDECREF(bs_options);
1539 
1540     if (ret != 0) {
1541         goto out;
1542     }
1543     iscsi_detach_aio_context(bs);
1544     if (iscsilun->type != TYPE_DISK) {
1545         ret = -ENODEV;
1546         goto out;
1547     }
1548     if (bs->total_sectors < total_size) {
1549         ret = -ENOSPC;
1550         goto out;
1551     }
1552 
1553     ret = 0;
1554 out:
1555     if (iscsilun->iscsi != NULL) {
1556         iscsi_destroy_context(iscsilun->iscsi);
1557     }
1558     g_free(bs->opaque);
1559     bs->opaque = NULL;
1560     bdrv_unref(bs);
1561     return ret;
1562 }
1563 
1564 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1565 {
1566     IscsiLun *iscsilun = bs->opaque;
1567     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1568     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1569     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1570     return 0;
1571 }
1572 
1573 static QemuOptsList iscsi_create_opts = {
1574     .name = "iscsi-create-opts",
1575     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1576     .desc = {
1577         {
1578             .name = BLOCK_OPT_SIZE,
1579             .type = QEMU_OPT_SIZE,
1580             .help = "Virtual disk size"
1581         },
1582         { /* end of list */ }
1583     }
1584 };
1585 
1586 static BlockDriver bdrv_iscsi = {
1587     .format_name     = "iscsi",
1588     .protocol_name   = "iscsi",
1589 
1590     .instance_size   = sizeof(IscsiLun),
1591     .bdrv_needs_filename = true,
1592     .bdrv_file_open  = iscsi_open,
1593     .bdrv_close      = iscsi_close,
1594     .bdrv_create     = iscsi_create,
1595     .create_opts     = &iscsi_create_opts,
1596     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1597 
1598     .bdrv_getlength  = iscsi_getlength,
1599     .bdrv_get_info   = iscsi_get_info,
1600     .bdrv_truncate   = iscsi_truncate,
1601     .bdrv_refresh_limits = iscsi_refresh_limits,
1602 
1603     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1604     .bdrv_co_discard      = iscsi_co_discard,
1605     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1606     .bdrv_co_readv         = iscsi_co_readv,
1607     .bdrv_co_writev        = iscsi_co_writev,
1608     .bdrv_co_flush_to_disk = iscsi_co_flush,
1609 
1610 #ifdef __linux__
1611     .bdrv_ioctl       = iscsi_ioctl,
1612     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1613 #endif
1614 
1615     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1616     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1617 };
1618 
1619 static QemuOptsList qemu_iscsi_opts = {
1620     .name = "iscsi",
1621     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1622     .desc = {
1623         {
1624             .name = "user",
1625             .type = QEMU_OPT_STRING,
1626             .help = "username for CHAP authentication to target",
1627         },{
1628             .name = "password",
1629             .type = QEMU_OPT_STRING,
1630             .help = "password for CHAP authentication to target",
1631         },{
1632             .name = "header-digest",
1633             .type = QEMU_OPT_STRING,
1634             .help = "HeaderDigest setting. "
1635                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1636         },{
1637             .name = "initiator-name",
1638             .type = QEMU_OPT_STRING,
1639             .help = "Initiator iqn name to use when connecting",
1640         },
1641         { /* end of list */ }
1642     },
1643 };
1644 
1645 static void iscsi_block_init(void)
1646 {
1647     bdrv_register(&bdrv_iscsi);
1648     qemu_add_opts(&qemu_iscsi_opts);
1649 }
1650 
1651 block_init(iscsi_block_init);
1652