xref: /openbmc/qemu/block/iscsi.c (revision 13cc2c3e)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <arpa/inet.h>
30 #include "qemu-common.h"
31 #include "qemu/config-file.h"
32 #include "qemu/error-report.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "block/block_int.h"
36 #include "trace.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 
42 #include <iscsi/iscsi.h>
43 #include <iscsi/scsi-lowlevel.h>
44 
45 #ifdef __linux__
46 #include <scsi/sg.h>
47 #include <block/scsi.h>
48 #endif
49 
50 typedef struct IscsiLun {
51     struct iscsi_context *iscsi;
52     int lun;
53     enum scsi_inquiry_peripheral_device_type type;
54     int block_size;
55     uint64_t num_blocks;
56     int events;
57     QEMUTimer *nop_timer;
58     uint8_t lbpme;
59     uint8_t lbprz;
60     uint8_t has_write_same;
61     struct scsi_inquiry_logical_block_provisioning lbp;
62     struct scsi_inquiry_block_limits bl;
63     unsigned char *zeroblock;
64     unsigned long *allocationmap;
65     int cluster_sectors;
66 } IscsiLun;
67 
68 typedef struct IscsiTask {
69     int status;
70     int complete;
71     int retries;
72     int do_retry;
73     struct scsi_task *task;
74     Coroutine *co;
75     QEMUBH *bh;
76 } IscsiTask;
77 
78 typedef struct IscsiAIOCB {
79     BlockDriverAIOCB common;
80     QEMUIOVector *qiov;
81     QEMUBH *bh;
82     IscsiLun *iscsilun;
83     struct scsi_task *task;
84     uint8_t *buf;
85     int status;
86     int canceled;
87     int retries;
88     int64_t sector_num;
89     int nb_sectors;
90 #ifdef __linux__
91     sg_io_hdr_t *ioh;
92 #endif
93 } IscsiAIOCB;
94 
95 #define NOP_INTERVAL 5000
96 #define MAX_NOP_FAILURES 3
97 #define ISCSI_CMD_RETRIES 5
98 
99 /* this threshhold is a trade-off knob to choose between
100  * the potential additional overhead of an extra GET_LBA_STATUS request
101  * vs. unnecessarily reading a lot of zero sectors over the wire.
102  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
103  * sectors we check the allocation status of the area covered by the
104  * request first if the allocationmap indicates that the area might be
105  * unallocated. */
106 #define ISCSI_CHECKALLOC_THRES 64
107 
108 static void
109 iscsi_bh_cb(void *p)
110 {
111     IscsiAIOCB *acb = p;
112 
113     qemu_bh_delete(acb->bh);
114 
115     g_free(acb->buf);
116     acb->buf = NULL;
117 
118     if (acb->canceled == 0) {
119         acb->common.cb(acb->common.opaque, acb->status);
120     }
121 
122     if (acb->task != NULL) {
123         scsi_free_scsi_task(acb->task);
124         acb->task = NULL;
125     }
126 
127     qemu_aio_release(acb);
128 }
129 
130 static void
131 iscsi_schedule_bh(IscsiAIOCB *acb)
132 {
133     if (acb->bh) {
134         return;
135     }
136     acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
137     qemu_bh_schedule(acb->bh);
138 }
139 
140 static void iscsi_co_generic_bh_cb(void *opaque)
141 {
142     struct IscsiTask *iTask = opaque;
143     qemu_bh_delete(iTask->bh);
144     qemu_coroutine_enter(iTask->co, NULL);
145 }
146 
147 static void
148 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
149                         void *command_data, void *opaque)
150 {
151     struct IscsiTask *iTask = opaque;
152     struct scsi_task *task = command_data;
153 
154     iTask->complete = 1;
155     iTask->status = status;
156     iTask->do_retry = 0;
157     iTask->task = task;
158 
159     if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION
160         && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
161         error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi));
162         iTask->do_retry = 1;
163         goto out;
164     }
165 
166     if (status != SCSI_STATUS_GOOD) {
167         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
168     }
169 
170 out:
171     if (iTask->co) {
172         iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
173         qemu_bh_schedule(iTask->bh);
174     }
175 }
176 
177 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
178 {
179     *iTask = (struct IscsiTask) {
180         .co         = qemu_coroutine_self(),
181         .retries    = ISCSI_CMD_RETRIES,
182     };
183 }
184 
185 static void
186 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
187                     void *private_data)
188 {
189     IscsiAIOCB *acb = private_data;
190 
191     acb->status = -ECANCELED;
192     iscsi_schedule_bh(acb);
193 }
194 
195 static void
196 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
197 {
198     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
199     IscsiLun *iscsilun = acb->iscsilun;
200 
201     if (acb->status != -EINPROGRESS) {
202         return;
203     }
204 
205     acb->canceled = 1;
206 
207     /* send a task mgmt call to the target to cancel the task on the target */
208     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
209                                      iscsi_abort_task_cb, acb);
210 
211     while (acb->status == -EINPROGRESS) {
212         qemu_aio_wait();
213     }
214 }
215 
216 static const AIOCBInfo iscsi_aiocb_info = {
217     .aiocb_size         = sizeof(IscsiAIOCB),
218     .cancel             = iscsi_aio_cancel,
219 };
220 
221 
222 static void iscsi_process_read(void *arg);
223 static void iscsi_process_write(void *arg);
224 
225 static void
226 iscsi_set_events(IscsiLun *iscsilun)
227 {
228     struct iscsi_context *iscsi = iscsilun->iscsi;
229     int ev;
230 
231     /* We always register a read handler.  */
232     ev = POLLIN;
233     ev |= iscsi_which_events(iscsi);
234     if (ev != iscsilun->events) {
235         qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
236                       iscsi_process_read,
237                       (ev & POLLOUT) ? iscsi_process_write : NULL,
238                       iscsilun);
239 
240     }
241 
242     iscsilun->events = ev;
243 }
244 
245 static void
246 iscsi_process_read(void *arg)
247 {
248     IscsiLun *iscsilun = arg;
249     struct iscsi_context *iscsi = iscsilun->iscsi;
250 
251     iscsi_service(iscsi, POLLIN);
252     iscsi_set_events(iscsilun);
253 }
254 
255 static void
256 iscsi_process_write(void *arg)
257 {
258     IscsiLun *iscsilun = arg;
259     struct iscsi_context *iscsi = iscsilun->iscsi;
260 
261     iscsi_service(iscsi, POLLOUT);
262     iscsi_set_events(iscsilun);
263 }
264 
265 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
266 {
267     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
268 }
269 
270 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
271 {
272     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
273 }
274 
275 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
276                                       IscsiLun *iscsilun)
277 {
278     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
279         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
280             error_report("iSCSI misaligned request: "
281                          "iscsilun->block_size %u, sector_num %" PRIi64
282                          ", nb_sectors %d",
283                          iscsilun->block_size, sector_num, nb_sectors);
284             return 0;
285     }
286     return 1;
287 }
288 
289 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
290                                     int nb_sectors)
291 {
292     if (iscsilun->allocationmap == NULL) {
293         return;
294     }
295     bitmap_set(iscsilun->allocationmap,
296                sector_num / iscsilun->cluster_sectors,
297                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
298 }
299 
300 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
301                                       int nb_sectors)
302 {
303     int64_t cluster_num, nb_clusters;
304     if (iscsilun->allocationmap == NULL) {
305         return;
306     }
307     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
308     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
309                   - cluster_num;
310     if (nb_clusters > 0) {
311         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
312     }
313 }
314 
315 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
316                                         int64_t sector_num, int nb_sectors,
317                                         QEMUIOVector *iov)
318 {
319     IscsiLun *iscsilun = bs->opaque;
320     struct IscsiTask iTask;
321     uint64_t lba;
322     uint32_t num_sectors;
323     uint8_t *data = NULL;
324     uint8_t *buf = NULL;
325 
326     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
327         return -EINVAL;
328     }
329 
330     lba = sector_qemu2lun(sector_num, iscsilun);
331     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
332 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
333     /* if the iovec only contains one buffer we can pass it directly */
334     if (iov->niov == 1) {
335         data = iov->iov[0].iov_base;
336     } else {
337         size_t size = MIN(nb_sectors * BDRV_SECTOR_SIZE, iov->size);
338         buf = g_malloc(size);
339         qemu_iovec_to_buf(iov, 0, buf, size);
340         data = buf;
341     }
342 #endif
343     iscsi_co_init_iscsitask(iscsilun, &iTask);
344 retry:
345     iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
346                                     data, num_sectors * iscsilun->block_size,
347                                     iscsilun->block_size, 0, 0, 0, 0, 0,
348                                     iscsi_co_generic_cb, &iTask);
349     if (iTask.task == NULL) {
350         g_free(buf);
351         return -ENOMEM;
352     }
353 #if defined(LIBISCSI_FEATURE_IOVECTOR)
354     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
355                           iov->niov);
356 #endif
357     while (!iTask.complete) {
358         iscsi_set_events(iscsilun);
359         qemu_coroutine_yield();
360     }
361 
362     if (iTask.task != NULL) {
363         scsi_free_scsi_task(iTask.task);
364         iTask.task = NULL;
365     }
366 
367     if (iTask.do_retry) {
368         iTask.complete = 0;
369         goto retry;
370     }
371 
372     g_free(buf);
373 
374     if (iTask.status != SCSI_STATUS_GOOD) {
375         return -EIO;
376     }
377 
378     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
379 
380     return 0;
381 }
382 
383 
384 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
385                                              int64_t sector_num, int nb_sectors)
386 {
387     unsigned long size;
388     if (iscsilun->allocationmap == NULL) {
389         return true;
390     }
391     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
392     return !(find_next_bit(iscsilun->allocationmap, size,
393                            sector_num / iscsilun->cluster_sectors) == size);
394 }
395 
396 
397 #if defined(LIBISCSI_FEATURE_IOVECTOR)
398 
399 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
400                                                   int64_t sector_num,
401                                                   int nb_sectors, int *pnum)
402 {
403     IscsiLun *iscsilun = bs->opaque;
404     struct scsi_get_lba_status *lbas = NULL;
405     struct scsi_lba_status_descriptor *lbasd = NULL;
406     struct IscsiTask iTask;
407     int64_t ret;
408 
409     iscsi_co_init_iscsitask(iscsilun, &iTask);
410 
411     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
412         ret = -EINVAL;
413         goto out;
414     }
415 
416     /* default to all sectors allocated */
417     ret = BDRV_BLOCK_DATA;
418     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
419     *pnum = nb_sectors;
420 
421     /* LUN does not support logical block provisioning */
422     if (iscsilun->lbpme == 0) {
423         goto out;
424     }
425 
426 retry:
427     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
428                                   sector_qemu2lun(sector_num, iscsilun),
429                                   8 + 16, iscsi_co_generic_cb,
430                                   &iTask) == NULL) {
431         ret = -ENOMEM;
432         goto out;
433     }
434 
435     while (!iTask.complete) {
436         iscsi_set_events(iscsilun);
437         qemu_coroutine_yield();
438     }
439 
440     if (iTask.do_retry) {
441         if (iTask.task != NULL) {
442             scsi_free_scsi_task(iTask.task);
443             iTask.task = NULL;
444         }
445         iTask.complete = 0;
446         goto retry;
447     }
448 
449     if (iTask.status != SCSI_STATUS_GOOD) {
450         /* in case the get_lba_status_callout fails (i.e.
451          * because the device is busy or the cmd is not
452          * supported) we pretend all blocks are allocated
453          * for backwards compatibility */
454         goto out;
455     }
456 
457     lbas = scsi_datain_unmarshall(iTask.task);
458     if (lbas == NULL) {
459         ret = -EIO;
460         goto out;
461     }
462 
463     lbasd = &lbas->descriptors[0];
464 
465     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
466         ret = -EIO;
467         goto out;
468     }
469 
470     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
471 
472     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
473         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
474         ret &= ~BDRV_BLOCK_DATA;
475         if (iscsilun->lbprz) {
476             ret |= BDRV_BLOCK_ZERO;
477         }
478     }
479 
480     if (ret & BDRV_BLOCK_ZERO) {
481         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
482     } else {
483         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
484     }
485 
486     if (*pnum > nb_sectors) {
487         *pnum = nb_sectors;
488     }
489 out:
490     if (iTask.task != NULL) {
491         scsi_free_scsi_task(iTask.task);
492     }
493     return ret;
494 }
495 
496 #endif /* LIBISCSI_FEATURE_IOVECTOR */
497 
498 
499 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
500                                        int64_t sector_num, int nb_sectors,
501                                        QEMUIOVector *iov)
502 {
503     IscsiLun *iscsilun = bs->opaque;
504     struct IscsiTask iTask;
505     uint64_t lba;
506     uint32_t num_sectors;
507 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
508     int i;
509 #endif
510 
511     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
512         return -EINVAL;
513     }
514 
515 #if defined(LIBISCSI_FEATURE_IOVECTOR)
516     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
517         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
518         int64_t ret;
519         int pnum;
520         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
521         if (ret < 0) {
522             return ret;
523         }
524         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
525             qemu_iovec_memset(iov, 0, 0x00, iov->size);
526             return 0;
527         }
528     }
529 #endif
530 
531     lba = sector_qemu2lun(sector_num, iscsilun);
532     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
533 
534     iscsi_co_init_iscsitask(iscsilun, &iTask);
535 retry:
536     switch (iscsilun->type) {
537     case TYPE_DISK:
538         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
539                                        num_sectors * iscsilun->block_size,
540                                        iscsilun->block_size, 0, 0, 0, 0, 0,
541                                        iscsi_co_generic_cb, &iTask);
542         break;
543     default:
544         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
545                                        num_sectors * iscsilun->block_size,
546                                        iscsilun->block_size,
547 #if !defined(CONFIG_LIBISCSI_1_4) /* API change from 1.4.0 to 1.5.0 */
548                                        0, 0, 0, 0, 0,
549 #endif
550                                        iscsi_co_generic_cb, &iTask);
551         break;
552     }
553     if (iTask.task == NULL) {
554         return -ENOMEM;
555     }
556 #if defined(LIBISCSI_FEATURE_IOVECTOR)
557     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
558 #else
559     for (i = 0; i < iov->niov; i++) {
560         scsi_task_add_data_in_buffer(iTask.task,
561                                      iov->iov[i].iov_len,
562                                      iov->iov[i].iov_base);
563     }
564 #endif
565 
566     while (!iTask.complete) {
567         iscsi_set_events(iscsilun);
568         qemu_coroutine_yield();
569     }
570 
571     if (iTask.task != NULL) {
572         scsi_free_scsi_task(iTask.task);
573         iTask.task = NULL;
574     }
575 
576     if (iTask.do_retry) {
577         iTask.complete = 0;
578         goto retry;
579     }
580 
581     if (iTask.status != SCSI_STATUS_GOOD) {
582         return -EIO;
583     }
584 
585     return 0;
586 }
587 
588 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
589 {
590     IscsiLun *iscsilun = bs->opaque;
591     struct IscsiTask iTask;
592 
593     if (bs->sg) {
594         return 0;
595     }
596 
597     iscsi_co_init_iscsitask(iscsilun, &iTask);
598 
599 retry:
600     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
601                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
602         return -ENOMEM;
603     }
604 
605     while (!iTask.complete) {
606         iscsi_set_events(iscsilun);
607         qemu_coroutine_yield();
608     }
609 
610     if (iTask.task != NULL) {
611         scsi_free_scsi_task(iTask.task);
612         iTask.task = NULL;
613     }
614 
615     if (iTask.do_retry) {
616         iTask.complete = 0;
617         goto retry;
618     }
619 
620     if (iTask.status != SCSI_STATUS_GOOD) {
621         return -EIO;
622     }
623 
624     return 0;
625 }
626 
627 #ifdef __linux__
628 static void
629 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
630                      void *command_data, void *opaque)
631 {
632     IscsiAIOCB *acb = opaque;
633 
634     g_free(acb->buf);
635     acb->buf = NULL;
636 
637     if (acb->canceled != 0) {
638         return;
639     }
640 
641     acb->status = 0;
642     if (status < 0) {
643         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
644                      iscsi_get_error(iscsi));
645         acb->status = -EIO;
646     }
647 
648     acb->ioh->driver_status = 0;
649     acb->ioh->host_status   = 0;
650     acb->ioh->resid         = 0;
651 
652 #define SG_ERR_DRIVER_SENSE    0x08
653 
654     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
655         int ss;
656 
657         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
658 
659         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
660         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
661              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
662         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
663     }
664 
665     iscsi_schedule_bh(acb);
666 }
667 
668 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
669         unsigned long int req, void *buf,
670         BlockDriverCompletionFunc *cb, void *opaque)
671 {
672     IscsiLun *iscsilun = bs->opaque;
673     struct iscsi_context *iscsi = iscsilun->iscsi;
674     struct iscsi_data data;
675     IscsiAIOCB *acb;
676 
677     assert(req == SG_IO);
678 
679     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
680 
681     acb->iscsilun = iscsilun;
682     acb->canceled    = 0;
683     acb->bh          = NULL;
684     acb->status      = -EINPROGRESS;
685     acb->buf         = NULL;
686     acb->ioh         = buf;
687 
688     acb->task = malloc(sizeof(struct scsi_task));
689     if (acb->task == NULL) {
690         error_report("iSCSI: Failed to allocate task for scsi command. %s",
691                      iscsi_get_error(iscsi));
692         qemu_aio_release(acb);
693         return NULL;
694     }
695     memset(acb->task, 0, sizeof(struct scsi_task));
696 
697     switch (acb->ioh->dxfer_direction) {
698     case SG_DXFER_TO_DEV:
699         acb->task->xfer_dir = SCSI_XFER_WRITE;
700         break;
701     case SG_DXFER_FROM_DEV:
702         acb->task->xfer_dir = SCSI_XFER_READ;
703         break;
704     default:
705         acb->task->xfer_dir = SCSI_XFER_NONE;
706         break;
707     }
708 
709     acb->task->cdb_size = acb->ioh->cmd_len;
710     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
711     acb->task->expxferlen = acb->ioh->dxfer_len;
712 
713     data.size = 0;
714     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
715         if (acb->ioh->iovec_count == 0) {
716             data.data = acb->ioh->dxferp;
717             data.size = acb->ioh->dxfer_len;
718         } else {
719 #if defined(LIBISCSI_FEATURE_IOVECTOR)
720             scsi_task_set_iov_out(acb->task,
721                                  (struct scsi_iovec *) acb->ioh->dxferp,
722                                  acb->ioh->iovec_count);
723 #else
724             struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
725 
726             acb->buf = g_malloc(acb->ioh->dxfer_len);
727             data.data = acb->buf;
728             data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
729                                    acb->buf, acb->ioh->dxfer_len);
730 #endif
731         }
732     }
733 
734     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
735                                  iscsi_aio_ioctl_cb,
736                                  (data.size > 0) ? &data : NULL,
737                                  acb) != 0) {
738         scsi_free_scsi_task(acb->task);
739         qemu_aio_release(acb);
740         return NULL;
741     }
742 
743     /* tell libiscsi to read straight into the buffer we got from ioctl */
744     if (acb->task->xfer_dir == SCSI_XFER_READ) {
745         if (acb->ioh->iovec_count == 0) {
746             scsi_task_add_data_in_buffer(acb->task,
747                                          acb->ioh->dxfer_len,
748                                          acb->ioh->dxferp);
749         } else {
750 #if defined(LIBISCSI_FEATURE_IOVECTOR)
751             scsi_task_set_iov_in(acb->task,
752                                  (struct scsi_iovec *) acb->ioh->dxferp,
753                                  acb->ioh->iovec_count);
754 #else
755             int i;
756             for (i = 0; i < acb->ioh->iovec_count; i++) {
757                 struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
758 
759                 scsi_task_add_data_in_buffer(acb->task,
760                     iov[i].iov_len,
761                     iov[i].iov_base);
762             }
763 #endif
764         }
765     }
766 
767     iscsi_set_events(iscsilun);
768 
769     return &acb->common;
770 }
771 
772 
773 static void ioctl_cb(void *opaque, int status)
774 {
775     int *p_status = opaque;
776     *p_status = status;
777 }
778 
779 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
780 {
781     IscsiLun *iscsilun = bs->opaque;
782     int status;
783 
784     switch (req) {
785     case SG_GET_VERSION_NUM:
786         *(int *)buf = 30000;
787         break;
788     case SG_GET_SCSI_ID:
789         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
790         break;
791     case SG_IO:
792         status = -EINPROGRESS;
793         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
794 
795         while (status == -EINPROGRESS) {
796             qemu_aio_wait();
797         }
798 
799         return 0;
800     default:
801         return -1;
802     }
803     return 0;
804 }
805 #endif
806 
807 static int64_t
808 iscsi_getlength(BlockDriverState *bs)
809 {
810     IscsiLun *iscsilun = bs->opaque;
811     int64_t len;
812 
813     len  = iscsilun->num_blocks;
814     len *= iscsilun->block_size;
815 
816     return len;
817 }
818 
819 static int
820 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
821                                    int nb_sectors)
822 {
823     IscsiLun *iscsilun = bs->opaque;
824     struct IscsiTask iTask;
825     struct unmap_list list;
826 
827     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
828         return -EINVAL;
829     }
830 
831     if (!iscsilun->lbp.lbpu) {
832         /* UNMAP is not supported by the target */
833         return 0;
834     }
835 
836     list.lba = sector_qemu2lun(sector_num, iscsilun);
837     list.num = sector_qemu2lun(nb_sectors, iscsilun);
838 
839     iscsi_co_init_iscsitask(iscsilun, &iTask);
840 retry:
841     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
842                      iscsi_co_generic_cb, &iTask) == NULL) {
843         return -ENOMEM;
844     }
845 
846     while (!iTask.complete) {
847         iscsi_set_events(iscsilun);
848         qemu_coroutine_yield();
849     }
850 
851     if (iTask.task != NULL) {
852         scsi_free_scsi_task(iTask.task);
853         iTask.task = NULL;
854     }
855 
856     if (iTask.do_retry) {
857         iTask.complete = 0;
858         goto retry;
859     }
860 
861     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
862         /* the target might fail with a check condition if it
863            is not happy with the alignment of the UNMAP request
864            we silently fail in this case */
865         return 0;
866     }
867 
868     if (iTask.status != SCSI_STATUS_GOOD) {
869         return -EIO;
870     }
871 
872     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
873 
874     return 0;
875 }
876 
877 #if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
878 
879 static int
880 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
881                                    int nb_sectors, BdrvRequestFlags flags)
882 {
883     IscsiLun *iscsilun = bs->opaque;
884     struct IscsiTask iTask;
885     uint64_t lba;
886     uint32_t nb_blocks;
887 
888     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
889         return -EINVAL;
890     }
891 
892     if ((flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->lbp.lbpws) {
893         /* WRITE SAME with UNMAP is not supported by the target,
894          * fall back and try WRITE SAME without UNMAP */
895         flags &= ~BDRV_REQ_MAY_UNMAP;
896     }
897 
898     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
899         /* WRITE SAME without UNMAP is not supported by the target */
900         return -ENOTSUP;
901     }
902 
903     lba = sector_qemu2lun(sector_num, iscsilun);
904     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
905 
906     if (iscsilun->zeroblock == NULL) {
907         iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
908     }
909 
910     iscsi_co_init_iscsitask(iscsilun, &iTask);
911 retry:
912     if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
913                                iscsilun->zeroblock, iscsilun->block_size,
914                                nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
915                                0, 0, iscsi_co_generic_cb, &iTask) == NULL) {
916         return -ENOMEM;
917     }
918 
919     while (!iTask.complete) {
920         iscsi_set_events(iscsilun);
921         qemu_coroutine_yield();
922     }
923 
924     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
925         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
926         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
927          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
928         /* WRITE SAME is not supported by the target */
929         iscsilun->has_write_same = false;
930         scsi_free_scsi_task(iTask.task);
931         return -ENOTSUP;
932     }
933 
934     if (iTask.task != NULL) {
935         scsi_free_scsi_task(iTask.task);
936         iTask.task = NULL;
937     }
938 
939     if (iTask.do_retry) {
940         iTask.complete = 0;
941         goto retry;
942     }
943 
944     if (iTask.status != SCSI_STATUS_GOOD) {
945         return -EIO;
946     }
947 
948     if (flags & BDRV_REQ_MAY_UNMAP) {
949         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
950     } else {
951         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
952     }
953 
954     return 0;
955 }
956 
957 #endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */
958 
959 static void parse_chap(struct iscsi_context *iscsi, const char *target,
960                        Error **errp)
961 {
962     QemuOptsList *list;
963     QemuOpts *opts;
964     const char *user = NULL;
965     const char *password = NULL;
966 
967     list = qemu_find_opts("iscsi");
968     if (!list) {
969         return;
970     }
971 
972     opts = qemu_opts_find(list, target);
973     if (opts == NULL) {
974         opts = QTAILQ_FIRST(&list->head);
975         if (!opts) {
976             return;
977         }
978     }
979 
980     user = qemu_opt_get(opts, "user");
981     if (!user) {
982         return;
983     }
984 
985     password = qemu_opt_get(opts, "password");
986     if (!password) {
987         error_setg(errp, "CHAP username specified but no password was given");
988         return;
989     }
990 
991     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
992         error_setg(errp, "Failed to set initiator username and password");
993     }
994 }
995 
996 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
997                                 Error **errp)
998 {
999     QemuOptsList *list;
1000     QemuOpts *opts;
1001     const char *digest = NULL;
1002 
1003     list = qemu_find_opts("iscsi");
1004     if (!list) {
1005         return;
1006     }
1007 
1008     opts = qemu_opts_find(list, target);
1009     if (opts == NULL) {
1010         opts = QTAILQ_FIRST(&list->head);
1011         if (!opts) {
1012             return;
1013         }
1014     }
1015 
1016     digest = qemu_opt_get(opts, "header-digest");
1017     if (!digest) {
1018         return;
1019     }
1020 
1021     if (!strcmp(digest, "CRC32C")) {
1022         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1023     } else if (!strcmp(digest, "NONE")) {
1024         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1025     } else if (!strcmp(digest, "CRC32C-NONE")) {
1026         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1027     } else if (!strcmp(digest, "NONE-CRC32C")) {
1028         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1029     } else {
1030         error_setg(errp, "Invalid header-digest setting : %s", digest);
1031     }
1032 }
1033 
1034 static char *parse_initiator_name(const char *target)
1035 {
1036     QemuOptsList *list;
1037     QemuOpts *opts;
1038     const char *name;
1039     char *iscsi_name;
1040     UuidInfo *uuid_info;
1041 
1042     list = qemu_find_opts("iscsi");
1043     if (list) {
1044         opts = qemu_opts_find(list, target);
1045         if (!opts) {
1046             opts = QTAILQ_FIRST(&list->head);
1047         }
1048         if (opts) {
1049             name = qemu_opt_get(opts, "initiator-name");
1050             if (name) {
1051                 return g_strdup(name);
1052             }
1053         }
1054     }
1055 
1056     uuid_info = qmp_query_uuid(NULL);
1057     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1058         name = qemu_get_vm_name();
1059     } else {
1060         name = uuid_info->UUID;
1061     }
1062     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1063                                  name ? ":" : "", name ? name : "");
1064     qapi_free_UuidInfo(uuid_info);
1065     return iscsi_name;
1066 }
1067 
1068 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1069 static void iscsi_nop_timed_event(void *opaque)
1070 {
1071     IscsiLun *iscsilun = opaque;
1072 
1073     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1074         error_report("iSCSI: NOP timeout. Reconnecting...");
1075         iscsi_reconnect(iscsilun->iscsi);
1076     }
1077 
1078     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1079         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1080         return;
1081     }
1082 
1083     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1084     iscsi_set_events(iscsilun);
1085 }
1086 #endif
1087 
1088 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1089 {
1090     struct scsi_task *task = NULL;
1091     struct scsi_readcapacity10 *rc10 = NULL;
1092     struct scsi_readcapacity16 *rc16 = NULL;
1093     int retries = ISCSI_CMD_RETRIES;
1094 
1095     do {
1096         if (task != NULL) {
1097             scsi_free_scsi_task(task);
1098             task = NULL;
1099         }
1100 
1101         switch (iscsilun->type) {
1102         case TYPE_DISK:
1103             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1104             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1105                 rc16 = scsi_datain_unmarshall(task);
1106                 if (rc16 == NULL) {
1107                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1108                 } else {
1109                     iscsilun->block_size = rc16->block_length;
1110                     iscsilun->num_blocks = rc16->returned_lba + 1;
1111                     iscsilun->lbpme = rc16->lbpme;
1112                     iscsilun->lbprz = rc16->lbprz;
1113                 }
1114             }
1115             break;
1116         case TYPE_ROM:
1117             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1118             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1119                 rc10 = scsi_datain_unmarshall(task);
1120                 if (rc10 == NULL) {
1121                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1122                 } else {
1123                     iscsilun->block_size = rc10->block_size;
1124                     if (rc10->lba == 0) {
1125                         /* blank disk loaded */
1126                         iscsilun->num_blocks = 0;
1127                     } else {
1128                         iscsilun->num_blocks = rc10->lba + 1;
1129                     }
1130                 }
1131             }
1132             break;
1133         default:
1134             return;
1135         }
1136     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1137              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1138              && retries-- > 0);
1139 
1140     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1141         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1142     }
1143     if (task) {
1144         scsi_free_scsi_task(task);
1145     }
1146 }
1147 
1148 /* TODO Convert to fine grained options */
1149 static QemuOptsList runtime_opts = {
1150     .name = "iscsi",
1151     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1152     .desc = {
1153         {
1154             .name = "filename",
1155             .type = QEMU_OPT_STRING,
1156             .help = "URL to the iscsi image",
1157         },
1158         { /* end of list */ }
1159     },
1160 };
1161 
1162 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1163                                           int evpd, int pc, void **inq, Error **errp)
1164 {
1165     int full_size;
1166     struct scsi_task *task = NULL;
1167     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1168     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1169         goto fail;
1170     }
1171     full_size = scsi_datain_getfullsize(task);
1172     if (full_size > task->datain.size) {
1173         scsi_free_scsi_task(task);
1174 
1175         /* we need more data for the full list */
1176         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1177         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1178             goto fail;
1179         }
1180     }
1181 
1182     *inq = scsi_datain_unmarshall(task);
1183     if (*inq == NULL) {
1184         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1185         goto fail_with_err;
1186     }
1187 
1188     return task;
1189 
1190 fail:
1191     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1192                iscsi_get_error(iscsi));
1193 fail_with_err:
1194     if (task != NULL) {
1195         scsi_free_scsi_task(task);
1196     }
1197     return NULL;
1198 }
1199 
1200 /*
1201  * We support iscsi url's on the form
1202  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1203  *
1204  * Note: flags are currently not used by iscsi_open.  If this function
1205  * is changed such that flags are used, please examine iscsi_reopen_prepare()
1206  * to see if needs to be changed as well.
1207  */
1208 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1209                       Error **errp)
1210 {
1211     IscsiLun *iscsilun = bs->opaque;
1212     struct iscsi_context *iscsi = NULL;
1213     struct iscsi_url *iscsi_url = NULL;
1214     struct scsi_task *task = NULL;
1215     struct scsi_inquiry_standard *inq = NULL;
1216     struct scsi_inquiry_supported_pages *inq_vpd;
1217     char *initiator_name = NULL;
1218     QemuOpts *opts;
1219     Error *local_err = NULL;
1220     const char *filename;
1221     int i, ret;
1222 
1223     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1224         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1225                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1226                    "of 512", BDRV_SECTOR_SIZE);
1227         return -EINVAL;
1228     }
1229 
1230     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1231     qemu_opts_absorb_qdict(opts, options, &local_err);
1232     if (local_err) {
1233         error_propagate(errp, local_err);
1234         ret = -EINVAL;
1235         goto out;
1236     }
1237 
1238     filename = qemu_opt_get(opts, "filename");
1239 
1240     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1241     if (iscsi_url == NULL) {
1242         error_setg(errp, "Failed to parse URL : %s", filename);
1243         ret = -EINVAL;
1244         goto out;
1245     }
1246 
1247     memset(iscsilun, 0, sizeof(IscsiLun));
1248 
1249     initiator_name = parse_initiator_name(iscsi_url->target);
1250 
1251     iscsi = iscsi_create_context(initiator_name);
1252     if (iscsi == NULL) {
1253         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1254         ret = -ENOMEM;
1255         goto out;
1256     }
1257 
1258     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1259         error_setg(errp, "iSCSI: Failed to set target name.");
1260         ret = -EINVAL;
1261         goto out;
1262     }
1263 
1264     if (iscsi_url->user != NULL) {
1265         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1266                                               iscsi_url->passwd);
1267         if (ret != 0) {
1268             error_setg(errp, "Failed to set initiator username and password");
1269             ret = -EINVAL;
1270             goto out;
1271         }
1272     }
1273 
1274     /* check if we got CHAP username/password via the options */
1275     parse_chap(iscsi, iscsi_url->target, &local_err);
1276     if (local_err != NULL) {
1277         error_propagate(errp, local_err);
1278         ret = -EINVAL;
1279         goto out;
1280     }
1281 
1282     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1283         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1284         ret = -EINVAL;
1285         goto out;
1286     }
1287 
1288     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1289 
1290     /* check if we got HEADER_DIGEST via the options */
1291     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1292     if (local_err != NULL) {
1293         error_propagate(errp, local_err);
1294         ret = -EINVAL;
1295         goto out;
1296     }
1297 
1298     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1299         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1300             iscsi_get_error(iscsi));
1301         ret = -EINVAL;
1302         goto out;
1303     }
1304 
1305     iscsilun->iscsi = iscsi;
1306     iscsilun->lun   = iscsi_url->lun;
1307     iscsilun->has_write_same = true;
1308 
1309     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1310                             (void **) &inq, errp);
1311     if (task == NULL) {
1312         ret = -EINVAL;
1313         goto out;
1314     }
1315     iscsilun->type = inq->periperal_device_type;
1316     scsi_free_scsi_task(task);
1317     task = NULL;
1318 
1319     iscsi_readcapacity_sync(iscsilun, &local_err);
1320     if (local_err != NULL) {
1321         error_propagate(errp, local_err);
1322         ret = -EINVAL;
1323         goto out;
1324     }
1325     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1326     bs->request_alignment = iscsilun->block_size;
1327 
1328     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1329      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1330      * will try to read from the device to guess the image format.
1331      */
1332     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1333         bs->sg = 1;
1334     }
1335 
1336     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1337                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1338                             (void **) &inq_vpd, errp);
1339     if (task == NULL) {
1340         ret = -EINVAL;
1341         goto out;
1342     }
1343     for (i = 0; i < inq_vpd->num_pages; i++) {
1344         struct scsi_task *inq_task;
1345         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1346         struct scsi_inquiry_block_limits *inq_bl;
1347         switch (inq_vpd->pages[i]) {
1348         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1349             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1350                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1351                                         (void **) &inq_lbp, errp);
1352             if (inq_task == NULL) {
1353                 ret = -EINVAL;
1354                 goto out;
1355             }
1356             memcpy(&iscsilun->lbp, inq_lbp,
1357                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1358             scsi_free_scsi_task(inq_task);
1359             break;
1360         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1361             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1362                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1363                                     (void **) &inq_bl, errp);
1364             if (inq_task == NULL) {
1365                 ret = -EINVAL;
1366                 goto out;
1367             }
1368             memcpy(&iscsilun->bl, inq_bl,
1369                    sizeof(struct scsi_inquiry_block_limits));
1370             scsi_free_scsi_task(inq_task);
1371             break;
1372         default:
1373             break;
1374         }
1375     }
1376     scsi_free_scsi_task(task);
1377     task = NULL;
1378 
1379 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1380     /* Set up a timer for sending out iSCSI NOPs */
1381     iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
1382     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1383 #endif
1384 
1385     /* Guess the internal cluster (page) size of the iscsi target by the means
1386      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1387      * reasonable size */
1388     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1389         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1390         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1391                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1392 #if defined(LIBISCSI_FEATURE_IOVECTOR)
1393         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1394             iscsilun->allocationmap =
1395                 bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1396                                         iscsilun->cluster_sectors));
1397         }
1398 #endif
1399     }
1400 
1401 out:
1402     qemu_opts_del(opts);
1403     if (initiator_name != NULL) {
1404         g_free(initiator_name);
1405     }
1406     if (iscsi_url != NULL) {
1407         iscsi_destroy_url(iscsi_url);
1408     }
1409     if (task != NULL) {
1410         scsi_free_scsi_task(task);
1411     }
1412 
1413     if (ret) {
1414         if (iscsi != NULL) {
1415             iscsi_destroy_context(iscsi);
1416         }
1417         memset(iscsilun, 0, sizeof(IscsiLun));
1418     }
1419     return ret;
1420 }
1421 
1422 static void iscsi_close(BlockDriverState *bs)
1423 {
1424     IscsiLun *iscsilun = bs->opaque;
1425     struct iscsi_context *iscsi = iscsilun->iscsi;
1426 
1427     if (iscsilun->nop_timer) {
1428         timer_del(iscsilun->nop_timer);
1429         timer_free(iscsilun->nop_timer);
1430     }
1431     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
1432     iscsi_destroy_context(iscsi);
1433     g_free(iscsilun->zeroblock);
1434     g_free(iscsilun->allocationmap);
1435     memset(iscsilun, 0, sizeof(IscsiLun));
1436 }
1437 
1438 static int iscsi_refresh_limits(BlockDriverState *bs)
1439 {
1440     IscsiLun *iscsilun = bs->opaque;
1441 
1442     /* We don't actually refresh here, but just return data queried in
1443      * iscsi_open(): iscsi targets don't change their limits. */
1444     if (iscsilun->lbp.lbpu) {
1445         if (iscsilun->bl.max_unmap < 0xffffffff) {
1446             bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
1447                                                  iscsilun);
1448         }
1449         bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1450                                                    iscsilun);
1451     }
1452 
1453     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1454         bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
1455                                                   iscsilun);
1456     }
1457     if (iscsilun->lbp.lbpws) {
1458         bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1459                                                         iscsilun);
1460     }
1461     bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
1462                                                  iscsilun);
1463     return 0;
1464 }
1465 
1466 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
1467  * prepare.  Note that this will not re-establish a connection with an iSCSI
1468  * target - it is effectively a NOP.  */
1469 static int iscsi_reopen_prepare(BDRVReopenState *state,
1470                                 BlockReopenQueue *queue, Error **errp)
1471 {
1472     /* NOP */
1473     return 0;
1474 }
1475 
1476 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1477 {
1478     IscsiLun *iscsilun = bs->opaque;
1479     Error *local_err = NULL;
1480 
1481     if (iscsilun->type != TYPE_DISK) {
1482         return -ENOTSUP;
1483     }
1484 
1485     iscsi_readcapacity_sync(iscsilun, &local_err);
1486     if (local_err != NULL) {
1487         error_free(local_err);
1488         return -EIO;
1489     }
1490 
1491     if (offset > iscsi_getlength(bs)) {
1492         return -EINVAL;
1493     }
1494 
1495     if (iscsilun->allocationmap != NULL) {
1496         g_free(iscsilun->allocationmap);
1497         iscsilun->allocationmap =
1498             bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1499                                     iscsilun->cluster_sectors));
1500     }
1501 
1502     return 0;
1503 }
1504 
1505 static int iscsi_create(const char *filename, QEMUOptionParameter *options,
1506                         Error **errp)
1507 {
1508     int ret = 0;
1509     int64_t total_size = 0;
1510     BlockDriverState *bs;
1511     IscsiLun *iscsilun = NULL;
1512     QDict *bs_options;
1513 
1514     bs = bdrv_new("", &error_abort);
1515 
1516     /* Read out options */
1517     while (options && options->name) {
1518         if (!strcmp(options->name, "size")) {
1519             total_size = options->value.n / BDRV_SECTOR_SIZE;
1520         }
1521         options++;
1522     }
1523 
1524     bs->opaque = g_malloc0(sizeof(struct IscsiLun));
1525     iscsilun = bs->opaque;
1526 
1527     bs_options = qdict_new();
1528     qdict_put(bs_options, "filename", qstring_from_str(filename));
1529     ret = iscsi_open(bs, bs_options, 0, NULL);
1530     QDECREF(bs_options);
1531 
1532     if (ret != 0) {
1533         goto out;
1534     }
1535     if (iscsilun->nop_timer) {
1536         timer_del(iscsilun->nop_timer);
1537         timer_free(iscsilun->nop_timer);
1538     }
1539     if (iscsilun->type != TYPE_DISK) {
1540         ret = -ENODEV;
1541         goto out;
1542     }
1543     if (bs->total_sectors < total_size) {
1544         ret = -ENOSPC;
1545         goto out;
1546     }
1547 
1548     ret = 0;
1549 out:
1550     if (iscsilun->iscsi != NULL) {
1551         iscsi_destroy_context(iscsilun->iscsi);
1552     }
1553     g_free(bs->opaque);
1554     bs->opaque = NULL;
1555     bdrv_unref(bs);
1556     return ret;
1557 }
1558 
1559 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1560 {
1561     IscsiLun *iscsilun = bs->opaque;
1562     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1563     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1564     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1565     return 0;
1566 }
1567 
1568 static QEMUOptionParameter iscsi_create_options[] = {
1569     {
1570         .name = BLOCK_OPT_SIZE,
1571         .type = OPT_SIZE,
1572         .help = "Virtual disk size"
1573     },
1574     { NULL }
1575 };
1576 
1577 static BlockDriver bdrv_iscsi = {
1578     .format_name     = "iscsi",
1579     .protocol_name   = "iscsi",
1580 
1581     .instance_size   = sizeof(IscsiLun),
1582     .bdrv_needs_filename = true,
1583     .bdrv_file_open  = iscsi_open,
1584     .bdrv_close      = iscsi_close,
1585     .bdrv_create     = iscsi_create,
1586     .create_options  = iscsi_create_options,
1587     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1588 
1589     .bdrv_getlength  = iscsi_getlength,
1590     .bdrv_get_info   = iscsi_get_info,
1591     .bdrv_truncate   = iscsi_truncate,
1592     .bdrv_refresh_limits = iscsi_refresh_limits,
1593 
1594 #if defined(LIBISCSI_FEATURE_IOVECTOR)
1595     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1596 #endif
1597     .bdrv_co_discard      = iscsi_co_discard,
1598 #if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
1599     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1600 #endif
1601     .bdrv_co_readv         = iscsi_co_readv,
1602     .bdrv_co_writev        = iscsi_co_writev,
1603     .bdrv_co_flush_to_disk = iscsi_co_flush,
1604 
1605 #ifdef __linux__
1606     .bdrv_ioctl       = iscsi_ioctl,
1607     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1608 #endif
1609 };
1610 
1611 static QemuOptsList qemu_iscsi_opts = {
1612     .name = "iscsi",
1613     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1614     .desc = {
1615         {
1616             .name = "user",
1617             .type = QEMU_OPT_STRING,
1618             .help = "username for CHAP authentication to target",
1619         },{
1620             .name = "password",
1621             .type = QEMU_OPT_STRING,
1622             .help = "password for CHAP authentication to target",
1623         },{
1624             .name = "header-digest",
1625             .type = QEMU_OPT_STRING,
1626             .help = "HeaderDigest setting. "
1627                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1628         },{
1629             .name = "initiator-name",
1630             .type = QEMU_OPT_STRING,
1631             .help = "Initiator iqn name to use when connecting",
1632         },
1633         { /* end of list */ }
1634     },
1635 };
1636 
1637 static void iscsi_block_init(void)
1638 {
1639     bdrv_register(&bdrv_iscsi);
1640     qemu_add_opts(&qemu_iscsi_opts);
1641 }
1642 
1643 block_init(iscsi_block_init);
1644