xref: /openbmc/qemu/block/iscsi.c (revision 95faaa73)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <arpa/inet.h>
30 #include "qemu-common.h"
31 #include "qemu/config-file.h"
32 #include "qemu/error-report.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "block/block_int.h"
36 #include "trace.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 
42 #include <iscsi/iscsi.h>
43 #include <iscsi/scsi-lowlevel.h>
44 
45 #ifdef __linux__
46 #include <scsi/sg.h>
47 #include <block/scsi.h>
48 #endif
49 
50 typedef struct IscsiLun {
51     struct iscsi_context *iscsi;
52     int lun;
53     enum scsi_inquiry_peripheral_device_type type;
54     int block_size;
55     uint64_t num_blocks;
56     int events;
57     QEMUTimer *nop_timer;
58     uint8_t lbpme;
59     uint8_t lbprz;
60     uint8_t has_write_same;
61     struct scsi_inquiry_logical_block_provisioning lbp;
62     struct scsi_inquiry_block_limits bl;
63     unsigned char *zeroblock;
64     unsigned long *allocationmap;
65     int cluster_sectors;
66 } IscsiLun;
67 
68 typedef struct IscsiTask {
69     int status;
70     int complete;
71     int retries;
72     int do_retry;
73     struct scsi_task *task;
74     Coroutine *co;
75     QEMUBH *bh;
76 } IscsiTask;
77 
78 typedef struct IscsiAIOCB {
79     BlockDriverAIOCB common;
80     QEMUIOVector *qiov;
81     QEMUBH *bh;
82     IscsiLun *iscsilun;
83     struct scsi_task *task;
84     uint8_t *buf;
85     int status;
86     int canceled;
87     int retries;
88     int64_t sector_num;
89     int nb_sectors;
90 #ifdef __linux__
91     sg_io_hdr_t *ioh;
92 #endif
93 } IscsiAIOCB;
94 
95 #define NOP_INTERVAL 5000
96 #define MAX_NOP_FAILURES 3
97 #define ISCSI_CMD_RETRIES 5
98 
99 /* this threshhold is a trade-off knob to choose between
100  * the potential additional overhead of an extra GET_LBA_STATUS request
101  * vs. unnecessarily reading a lot of zero sectors over the wire.
102  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
103  * sectors we check the allocation status of the area covered by the
104  * request first if the allocationmap indicates that the area might be
105  * unallocated. */
106 #define ISCSI_CHECKALLOC_THRES 64
107 
108 static void
109 iscsi_bh_cb(void *p)
110 {
111     IscsiAIOCB *acb = p;
112 
113     qemu_bh_delete(acb->bh);
114 
115     g_free(acb->buf);
116     acb->buf = NULL;
117 
118     if (acb->canceled == 0) {
119         acb->common.cb(acb->common.opaque, acb->status);
120     }
121 
122     if (acb->task != NULL) {
123         scsi_free_scsi_task(acb->task);
124         acb->task = NULL;
125     }
126 
127     qemu_aio_release(acb);
128 }
129 
130 static void
131 iscsi_schedule_bh(IscsiAIOCB *acb)
132 {
133     if (acb->bh) {
134         return;
135     }
136     acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
137     qemu_bh_schedule(acb->bh);
138 }
139 
140 static void iscsi_co_generic_bh_cb(void *opaque)
141 {
142     struct IscsiTask *iTask = opaque;
143     qemu_bh_delete(iTask->bh);
144     qemu_coroutine_enter(iTask->co, NULL);
145 }
146 
147 static void
148 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
149                         void *command_data, void *opaque)
150 {
151     struct IscsiTask *iTask = opaque;
152     struct scsi_task *task = command_data;
153 
154     iTask->complete = 1;
155     iTask->status = status;
156     iTask->do_retry = 0;
157     iTask->task = task;
158 
159     if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION
160         && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
161         error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi));
162         iTask->do_retry = 1;
163         goto out;
164     }
165 
166     if (status != SCSI_STATUS_GOOD) {
167         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
168     }
169 
170 out:
171     if (iTask->co) {
172         iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
173         qemu_bh_schedule(iTask->bh);
174     }
175 }
176 
177 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
178 {
179     *iTask = (struct IscsiTask) {
180         .co         = qemu_coroutine_self(),
181         .retries    = ISCSI_CMD_RETRIES,
182     };
183 }
184 
185 static void
186 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
187                     void *private_data)
188 {
189     IscsiAIOCB *acb = private_data;
190 
191     acb->status = -ECANCELED;
192     iscsi_schedule_bh(acb);
193 }
194 
195 static void
196 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
197 {
198     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
199     IscsiLun *iscsilun = acb->iscsilun;
200 
201     if (acb->status != -EINPROGRESS) {
202         return;
203     }
204 
205     acb->canceled = 1;
206 
207     /* send a task mgmt call to the target to cancel the task on the target */
208     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
209                                      iscsi_abort_task_cb, acb);
210 
211     while (acb->status == -EINPROGRESS) {
212         qemu_aio_wait();
213     }
214 }
215 
216 static const AIOCBInfo iscsi_aiocb_info = {
217     .aiocb_size         = sizeof(IscsiAIOCB),
218     .cancel             = iscsi_aio_cancel,
219 };
220 
221 
222 static void iscsi_process_read(void *arg);
223 static void iscsi_process_write(void *arg);
224 
225 static void
226 iscsi_set_events(IscsiLun *iscsilun)
227 {
228     struct iscsi_context *iscsi = iscsilun->iscsi;
229     int ev;
230 
231     /* We always register a read handler.  */
232     ev = POLLIN;
233     ev |= iscsi_which_events(iscsi);
234     if (ev != iscsilun->events) {
235         qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
236                       iscsi_process_read,
237                       (ev & POLLOUT) ? iscsi_process_write : NULL,
238                       iscsilun);
239 
240     }
241 
242     iscsilun->events = ev;
243 }
244 
245 static void
246 iscsi_process_read(void *arg)
247 {
248     IscsiLun *iscsilun = arg;
249     struct iscsi_context *iscsi = iscsilun->iscsi;
250 
251     iscsi_service(iscsi, POLLIN);
252     iscsi_set_events(iscsilun);
253 }
254 
255 static void
256 iscsi_process_write(void *arg)
257 {
258     IscsiLun *iscsilun = arg;
259     struct iscsi_context *iscsi = iscsilun->iscsi;
260 
261     iscsi_service(iscsi, POLLOUT);
262     iscsi_set_events(iscsilun);
263 }
264 
265 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
266 {
267     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
268 }
269 
270 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
271 {
272     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
273 }
274 
275 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
276                                       IscsiLun *iscsilun)
277 {
278     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
279         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
280             error_report("iSCSI misaligned request: "
281                          "iscsilun->block_size %u, sector_num %" PRIi64
282                          ", nb_sectors %d",
283                          iscsilun->block_size, sector_num, nb_sectors);
284             return 0;
285     }
286     return 1;
287 }
288 
289 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
290                                     int nb_sectors)
291 {
292     if (iscsilun->allocationmap == NULL) {
293         return;
294     }
295     bitmap_set(iscsilun->allocationmap,
296                sector_num / iscsilun->cluster_sectors,
297                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
298 }
299 
300 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
301                                       int nb_sectors)
302 {
303     int64_t cluster_num, nb_clusters;
304     if (iscsilun->allocationmap == NULL) {
305         return;
306     }
307     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
308     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
309                   - cluster_num;
310     if (nb_clusters > 0) {
311         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
312     }
313 }
314 
315 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
316                                         int64_t sector_num, int nb_sectors,
317                                         QEMUIOVector *iov)
318 {
319     IscsiLun *iscsilun = bs->opaque;
320     struct IscsiTask iTask;
321     uint64_t lba;
322     uint32_t num_sectors;
323     uint8_t *data = NULL;
324     uint8_t *buf = NULL;
325 
326     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
327         return -EINVAL;
328     }
329 
330     lba = sector_qemu2lun(sector_num, iscsilun);
331     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
332 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
333     /* if the iovec only contains one buffer we can pass it directly */
334     if (iov->niov == 1) {
335         data = iov->iov[0].iov_base;
336     } else {
337         size_t size = MIN(nb_sectors * BDRV_SECTOR_SIZE, iov->size);
338         buf = g_malloc(size);
339         qemu_iovec_to_buf(iov, 0, buf, size);
340         data = buf;
341     }
342 #endif
343     iscsi_co_init_iscsitask(iscsilun, &iTask);
344 retry:
345     iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
346                                     data, num_sectors * iscsilun->block_size,
347                                     iscsilun->block_size, 0, 0, 0, 0, 0,
348                                     iscsi_co_generic_cb, &iTask);
349     if (iTask.task == NULL) {
350         g_free(buf);
351         return -ENOMEM;
352     }
353 #if defined(LIBISCSI_FEATURE_IOVECTOR)
354     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
355                           iov->niov);
356 #endif
357     while (!iTask.complete) {
358         iscsi_set_events(iscsilun);
359         qemu_coroutine_yield();
360     }
361 
362     if (iTask.task != NULL) {
363         scsi_free_scsi_task(iTask.task);
364         iTask.task = NULL;
365     }
366 
367     if (iTask.do_retry) {
368         iTask.complete = 0;
369         goto retry;
370     }
371 
372     g_free(buf);
373 
374     if (iTask.status != SCSI_STATUS_GOOD) {
375         return -EIO;
376     }
377 
378     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
379 
380     return 0;
381 }
382 
383 
384 #if defined(LIBISCSI_FEATURE_IOVECTOR)
385 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
386                                              int64_t sector_num, int nb_sectors)
387 {
388     unsigned long size;
389     if (iscsilun->allocationmap == NULL) {
390         return true;
391     }
392     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
393     return !(find_next_bit(iscsilun->allocationmap, size,
394                            sector_num / iscsilun->cluster_sectors) == size);
395 }
396 
397 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
398                                                   int64_t sector_num,
399                                                   int nb_sectors, int *pnum)
400 {
401     IscsiLun *iscsilun = bs->opaque;
402     struct scsi_get_lba_status *lbas = NULL;
403     struct scsi_lba_status_descriptor *lbasd = NULL;
404     struct IscsiTask iTask;
405     int64_t ret;
406 
407     iscsi_co_init_iscsitask(iscsilun, &iTask);
408 
409     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
410         ret = -EINVAL;
411         goto out;
412     }
413 
414     /* default to all sectors allocated */
415     ret = BDRV_BLOCK_DATA;
416     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
417     *pnum = nb_sectors;
418 
419     /* LUN does not support logical block provisioning */
420     if (iscsilun->lbpme == 0) {
421         goto out;
422     }
423 
424 retry:
425     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
426                                   sector_qemu2lun(sector_num, iscsilun),
427                                   8 + 16, iscsi_co_generic_cb,
428                                   &iTask) == NULL) {
429         ret = -ENOMEM;
430         goto out;
431     }
432 
433     while (!iTask.complete) {
434         iscsi_set_events(iscsilun);
435         qemu_coroutine_yield();
436     }
437 
438     if (iTask.do_retry) {
439         if (iTask.task != NULL) {
440             scsi_free_scsi_task(iTask.task);
441             iTask.task = NULL;
442         }
443         iTask.complete = 0;
444         goto retry;
445     }
446 
447     if (iTask.status != SCSI_STATUS_GOOD) {
448         /* in case the get_lba_status_callout fails (i.e.
449          * because the device is busy or the cmd is not
450          * supported) we pretend all blocks are allocated
451          * for backwards compatibility */
452         goto out;
453     }
454 
455     lbas = scsi_datain_unmarshall(iTask.task);
456     if (lbas == NULL) {
457         ret = -EIO;
458         goto out;
459     }
460 
461     lbasd = &lbas->descriptors[0];
462 
463     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
464         ret = -EIO;
465         goto out;
466     }
467 
468     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
469 
470     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
471         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
472         ret &= ~BDRV_BLOCK_DATA;
473         if (iscsilun->lbprz) {
474             ret |= BDRV_BLOCK_ZERO;
475         }
476     }
477 
478     if (ret & BDRV_BLOCK_ZERO) {
479         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
480     } else {
481         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
482     }
483 
484     if (*pnum > nb_sectors) {
485         *pnum = nb_sectors;
486     }
487 out:
488     if (iTask.task != NULL) {
489         scsi_free_scsi_task(iTask.task);
490     }
491     return ret;
492 }
493 
494 #endif /* LIBISCSI_FEATURE_IOVECTOR */
495 
496 
497 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
498                                        int64_t sector_num, int nb_sectors,
499                                        QEMUIOVector *iov)
500 {
501     IscsiLun *iscsilun = bs->opaque;
502     struct IscsiTask iTask;
503     uint64_t lba;
504     uint32_t num_sectors;
505 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
506     int i;
507 #endif
508 
509     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
510         return -EINVAL;
511     }
512 
513 #if defined(LIBISCSI_FEATURE_IOVECTOR)
514     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
515         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
516         int64_t ret;
517         int pnum;
518         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
519         if (ret < 0) {
520             return ret;
521         }
522         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
523             qemu_iovec_memset(iov, 0, 0x00, iov->size);
524             return 0;
525         }
526     }
527 #endif
528 
529     lba = sector_qemu2lun(sector_num, iscsilun);
530     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
531 
532     iscsi_co_init_iscsitask(iscsilun, &iTask);
533 retry:
534     switch (iscsilun->type) {
535     case TYPE_DISK:
536         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
537                                        num_sectors * iscsilun->block_size,
538                                        iscsilun->block_size, 0, 0, 0, 0, 0,
539                                        iscsi_co_generic_cb, &iTask);
540         break;
541     default:
542         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
543                                        num_sectors * iscsilun->block_size,
544                                        iscsilun->block_size,
545 #if !defined(CONFIG_LIBISCSI_1_4) /* API change from 1.4.0 to 1.5.0 */
546                                        0, 0, 0, 0, 0,
547 #endif
548                                        iscsi_co_generic_cb, &iTask);
549         break;
550     }
551     if (iTask.task == NULL) {
552         return -ENOMEM;
553     }
554 #if defined(LIBISCSI_FEATURE_IOVECTOR)
555     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
556 #else
557     for (i = 0; i < iov->niov; i++) {
558         scsi_task_add_data_in_buffer(iTask.task,
559                                      iov->iov[i].iov_len,
560                                      iov->iov[i].iov_base);
561     }
562 #endif
563 
564     while (!iTask.complete) {
565         iscsi_set_events(iscsilun);
566         qemu_coroutine_yield();
567     }
568 
569     if (iTask.task != NULL) {
570         scsi_free_scsi_task(iTask.task);
571         iTask.task = NULL;
572     }
573 
574     if (iTask.do_retry) {
575         iTask.complete = 0;
576         goto retry;
577     }
578 
579     if (iTask.status != SCSI_STATUS_GOOD) {
580         return -EIO;
581     }
582 
583     return 0;
584 }
585 
586 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
587 {
588     IscsiLun *iscsilun = bs->opaque;
589     struct IscsiTask iTask;
590 
591     if (bs->sg) {
592         return 0;
593     }
594 
595     iscsi_co_init_iscsitask(iscsilun, &iTask);
596 
597 retry:
598     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
599                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
600         return -ENOMEM;
601     }
602 
603     while (!iTask.complete) {
604         iscsi_set_events(iscsilun);
605         qemu_coroutine_yield();
606     }
607 
608     if (iTask.task != NULL) {
609         scsi_free_scsi_task(iTask.task);
610         iTask.task = NULL;
611     }
612 
613     if (iTask.do_retry) {
614         iTask.complete = 0;
615         goto retry;
616     }
617 
618     if (iTask.status != SCSI_STATUS_GOOD) {
619         return -EIO;
620     }
621 
622     return 0;
623 }
624 
625 #ifdef __linux__
626 static void
627 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
628                      void *command_data, void *opaque)
629 {
630     IscsiAIOCB *acb = opaque;
631 
632     g_free(acb->buf);
633     acb->buf = NULL;
634 
635     if (acb->canceled != 0) {
636         return;
637     }
638 
639     acb->status = 0;
640     if (status < 0) {
641         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
642                      iscsi_get_error(iscsi));
643         acb->status = -EIO;
644     }
645 
646     acb->ioh->driver_status = 0;
647     acb->ioh->host_status   = 0;
648     acb->ioh->resid         = 0;
649 
650 #define SG_ERR_DRIVER_SENSE    0x08
651 
652     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
653         int ss;
654 
655         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
656 
657         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
658         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
659              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
660         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
661     }
662 
663     iscsi_schedule_bh(acb);
664 }
665 
666 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
667         unsigned long int req, void *buf,
668         BlockDriverCompletionFunc *cb, void *opaque)
669 {
670     IscsiLun *iscsilun = bs->opaque;
671     struct iscsi_context *iscsi = iscsilun->iscsi;
672     struct iscsi_data data;
673     IscsiAIOCB *acb;
674 
675     assert(req == SG_IO);
676 
677     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
678 
679     acb->iscsilun = iscsilun;
680     acb->canceled    = 0;
681     acb->bh          = NULL;
682     acb->status      = -EINPROGRESS;
683     acb->buf         = NULL;
684     acb->ioh         = buf;
685 
686     acb->task = malloc(sizeof(struct scsi_task));
687     if (acb->task == NULL) {
688         error_report("iSCSI: Failed to allocate task for scsi command. %s",
689                      iscsi_get_error(iscsi));
690         qemu_aio_release(acb);
691         return NULL;
692     }
693     memset(acb->task, 0, sizeof(struct scsi_task));
694 
695     switch (acb->ioh->dxfer_direction) {
696     case SG_DXFER_TO_DEV:
697         acb->task->xfer_dir = SCSI_XFER_WRITE;
698         break;
699     case SG_DXFER_FROM_DEV:
700         acb->task->xfer_dir = SCSI_XFER_READ;
701         break;
702     default:
703         acb->task->xfer_dir = SCSI_XFER_NONE;
704         break;
705     }
706 
707     acb->task->cdb_size = acb->ioh->cmd_len;
708     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
709     acb->task->expxferlen = acb->ioh->dxfer_len;
710 
711     data.size = 0;
712     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
713         if (acb->ioh->iovec_count == 0) {
714             data.data = acb->ioh->dxferp;
715             data.size = acb->ioh->dxfer_len;
716         } else {
717 #if defined(LIBISCSI_FEATURE_IOVECTOR)
718             scsi_task_set_iov_out(acb->task,
719                                  (struct scsi_iovec *) acb->ioh->dxferp,
720                                  acb->ioh->iovec_count);
721 #else
722             struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
723 
724             acb->buf = g_malloc(acb->ioh->dxfer_len);
725             data.data = acb->buf;
726             data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
727                                    acb->buf, acb->ioh->dxfer_len);
728 #endif
729         }
730     }
731 
732     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
733                                  iscsi_aio_ioctl_cb,
734                                  (data.size > 0) ? &data : NULL,
735                                  acb) != 0) {
736         scsi_free_scsi_task(acb->task);
737         qemu_aio_release(acb);
738         return NULL;
739     }
740 
741     /* tell libiscsi to read straight into the buffer we got from ioctl */
742     if (acb->task->xfer_dir == SCSI_XFER_READ) {
743         if (acb->ioh->iovec_count == 0) {
744             scsi_task_add_data_in_buffer(acb->task,
745                                          acb->ioh->dxfer_len,
746                                          acb->ioh->dxferp);
747         } else {
748 #if defined(LIBISCSI_FEATURE_IOVECTOR)
749             scsi_task_set_iov_in(acb->task,
750                                  (struct scsi_iovec *) acb->ioh->dxferp,
751                                  acb->ioh->iovec_count);
752 #else
753             int i;
754             for (i = 0; i < acb->ioh->iovec_count; i++) {
755                 struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
756 
757                 scsi_task_add_data_in_buffer(acb->task,
758                     iov[i].iov_len,
759                     iov[i].iov_base);
760             }
761 #endif
762         }
763     }
764 
765     iscsi_set_events(iscsilun);
766 
767     return &acb->common;
768 }
769 
770 
771 static void ioctl_cb(void *opaque, int status)
772 {
773     int *p_status = opaque;
774     *p_status = status;
775 }
776 
777 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
778 {
779     IscsiLun *iscsilun = bs->opaque;
780     int status;
781 
782     switch (req) {
783     case SG_GET_VERSION_NUM:
784         *(int *)buf = 30000;
785         break;
786     case SG_GET_SCSI_ID:
787         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
788         break;
789     case SG_IO:
790         status = -EINPROGRESS;
791         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
792 
793         while (status == -EINPROGRESS) {
794             qemu_aio_wait();
795         }
796 
797         return 0;
798     default:
799         return -1;
800     }
801     return 0;
802 }
803 #endif
804 
805 static int64_t
806 iscsi_getlength(BlockDriverState *bs)
807 {
808     IscsiLun *iscsilun = bs->opaque;
809     int64_t len;
810 
811     len  = iscsilun->num_blocks;
812     len *= iscsilun->block_size;
813 
814     return len;
815 }
816 
817 static int
818 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
819                                    int nb_sectors)
820 {
821     IscsiLun *iscsilun = bs->opaque;
822     struct IscsiTask iTask;
823     struct unmap_list list;
824 
825     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
826         return -EINVAL;
827     }
828 
829     if (!iscsilun->lbp.lbpu) {
830         /* UNMAP is not supported by the target */
831         return 0;
832     }
833 
834     list.lba = sector_qemu2lun(sector_num, iscsilun);
835     list.num = sector_qemu2lun(nb_sectors, iscsilun);
836 
837     iscsi_co_init_iscsitask(iscsilun, &iTask);
838 retry:
839     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
840                      iscsi_co_generic_cb, &iTask) == NULL) {
841         return -ENOMEM;
842     }
843 
844     while (!iTask.complete) {
845         iscsi_set_events(iscsilun);
846         qemu_coroutine_yield();
847     }
848 
849     if (iTask.task != NULL) {
850         scsi_free_scsi_task(iTask.task);
851         iTask.task = NULL;
852     }
853 
854     if (iTask.do_retry) {
855         iTask.complete = 0;
856         goto retry;
857     }
858 
859     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
860         /* the target might fail with a check condition if it
861            is not happy with the alignment of the UNMAP request
862            we silently fail in this case */
863         return 0;
864     }
865 
866     if (iTask.status != SCSI_STATUS_GOOD) {
867         return -EIO;
868     }
869 
870     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
871 
872     return 0;
873 }
874 
875 #if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
876 
877 static int
878 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
879                                    int nb_sectors, BdrvRequestFlags flags)
880 {
881     IscsiLun *iscsilun = bs->opaque;
882     struct IscsiTask iTask;
883     uint64_t lba;
884     uint32_t nb_blocks;
885 
886     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
887         return -EINVAL;
888     }
889 
890     if ((flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->lbp.lbpws) {
891         /* WRITE SAME with UNMAP is not supported by the target,
892          * fall back and try WRITE SAME without UNMAP */
893         flags &= ~BDRV_REQ_MAY_UNMAP;
894     }
895 
896     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
897         /* WRITE SAME without UNMAP is not supported by the target */
898         return -ENOTSUP;
899     }
900 
901     lba = sector_qemu2lun(sector_num, iscsilun);
902     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
903 
904     if (iscsilun->zeroblock == NULL) {
905         iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
906     }
907 
908     iscsi_co_init_iscsitask(iscsilun, &iTask);
909 retry:
910     if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
911                                iscsilun->zeroblock, iscsilun->block_size,
912                                nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
913                                0, 0, iscsi_co_generic_cb, &iTask) == NULL) {
914         return -ENOMEM;
915     }
916 
917     while (!iTask.complete) {
918         iscsi_set_events(iscsilun);
919         qemu_coroutine_yield();
920     }
921 
922     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
923         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
924         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
925          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
926         /* WRITE SAME is not supported by the target */
927         iscsilun->has_write_same = false;
928         scsi_free_scsi_task(iTask.task);
929         return -ENOTSUP;
930     }
931 
932     if (iTask.task != NULL) {
933         scsi_free_scsi_task(iTask.task);
934         iTask.task = NULL;
935     }
936 
937     if (iTask.do_retry) {
938         iTask.complete = 0;
939         goto retry;
940     }
941 
942     if (iTask.status != SCSI_STATUS_GOOD) {
943         return -EIO;
944     }
945 
946     if (flags & BDRV_REQ_MAY_UNMAP) {
947         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
948     } else {
949         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
950     }
951 
952     return 0;
953 }
954 
955 #endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */
956 
957 static void parse_chap(struct iscsi_context *iscsi, const char *target,
958                        Error **errp)
959 {
960     QemuOptsList *list;
961     QemuOpts *opts;
962     const char *user = NULL;
963     const char *password = NULL;
964 
965     list = qemu_find_opts("iscsi");
966     if (!list) {
967         return;
968     }
969 
970     opts = qemu_opts_find(list, target);
971     if (opts == NULL) {
972         opts = QTAILQ_FIRST(&list->head);
973         if (!opts) {
974             return;
975         }
976     }
977 
978     user = qemu_opt_get(opts, "user");
979     if (!user) {
980         return;
981     }
982 
983     password = qemu_opt_get(opts, "password");
984     if (!password) {
985         error_setg(errp, "CHAP username specified but no password was given");
986         return;
987     }
988 
989     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
990         error_setg(errp, "Failed to set initiator username and password");
991     }
992 }
993 
994 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
995                                 Error **errp)
996 {
997     QemuOptsList *list;
998     QemuOpts *opts;
999     const char *digest = NULL;
1000 
1001     list = qemu_find_opts("iscsi");
1002     if (!list) {
1003         return;
1004     }
1005 
1006     opts = qemu_opts_find(list, target);
1007     if (opts == NULL) {
1008         opts = QTAILQ_FIRST(&list->head);
1009         if (!opts) {
1010             return;
1011         }
1012     }
1013 
1014     digest = qemu_opt_get(opts, "header-digest");
1015     if (!digest) {
1016         return;
1017     }
1018 
1019     if (!strcmp(digest, "CRC32C")) {
1020         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1021     } else if (!strcmp(digest, "NONE")) {
1022         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1023     } else if (!strcmp(digest, "CRC32C-NONE")) {
1024         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1025     } else if (!strcmp(digest, "NONE-CRC32C")) {
1026         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1027     } else {
1028         error_setg(errp, "Invalid header-digest setting : %s", digest);
1029     }
1030 }
1031 
1032 static char *parse_initiator_name(const char *target)
1033 {
1034     QemuOptsList *list;
1035     QemuOpts *opts;
1036     const char *name;
1037     char *iscsi_name;
1038     UuidInfo *uuid_info;
1039 
1040     list = qemu_find_opts("iscsi");
1041     if (list) {
1042         opts = qemu_opts_find(list, target);
1043         if (!opts) {
1044             opts = QTAILQ_FIRST(&list->head);
1045         }
1046         if (opts) {
1047             name = qemu_opt_get(opts, "initiator-name");
1048             if (name) {
1049                 return g_strdup(name);
1050             }
1051         }
1052     }
1053 
1054     uuid_info = qmp_query_uuid(NULL);
1055     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1056         name = qemu_get_vm_name();
1057     } else {
1058         name = uuid_info->UUID;
1059     }
1060     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1061                                  name ? ":" : "", name ? name : "");
1062     qapi_free_UuidInfo(uuid_info);
1063     return iscsi_name;
1064 }
1065 
1066 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1067 static void iscsi_nop_timed_event(void *opaque)
1068 {
1069     IscsiLun *iscsilun = opaque;
1070 
1071     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1072         error_report("iSCSI: NOP timeout. Reconnecting...");
1073         iscsi_reconnect(iscsilun->iscsi);
1074     }
1075 
1076     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1077         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1078         return;
1079     }
1080 
1081     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1082     iscsi_set_events(iscsilun);
1083 }
1084 #endif
1085 
1086 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1087 {
1088     struct scsi_task *task = NULL;
1089     struct scsi_readcapacity10 *rc10 = NULL;
1090     struct scsi_readcapacity16 *rc16 = NULL;
1091     int retries = ISCSI_CMD_RETRIES;
1092 
1093     do {
1094         if (task != NULL) {
1095             scsi_free_scsi_task(task);
1096             task = NULL;
1097         }
1098 
1099         switch (iscsilun->type) {
1100         case TYPE_DISK:
1101             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1102             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1103                 rc16 = scsi_datain_unmarshall(task);
1104                 if (rc16 == NULL) {
1105                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1106                 } else {
1107                     iscsilun->block_size = rc16->block_length;
1108                     iscsilun->num_blocks = rc16->returned_lba + 1;
1109                     iscsilun->lbpme = rc16->lbpme;
1110                     iscsilun->lbprz = rc16->lbprz;
1111                 }
1112             }
1113             break;
1114         case TYPE_ROM:
1115             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1116             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1117                 rc10 = scsi_datain_unmarshall(task);
1118                 if (rc10 == NULL) {
1119                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1120                 } else {
1121                     iscsilun->block_size = rc10->block_size;
1122                     if (rc10->lba == 0) {
1123                         /* blank disk loaded */
1124                         iscsilun->num_blocks = 0;
1125                     } else {
1126                         iscsilun->num_blocks = rc10->lba + 1;
1127                     }
1128                 }
1129             }
1130             break;
1131         default:
1132             return;
1133         }
1134     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1135              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1136              && retries-- > 0);
1137 
1138     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1139         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1140     }
1141     if (task) {
1142         scsi_free_scsi_task(task);
1143     }
1144 }
1145 
1146 /* TODO Convert to fine grained options */
1147 static QemuOptsList runtime_opts = {
1148     .name = "iscsi",
1149     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1150     .desc = {
1151         {
1152             .name = "filename",
1153             .type = QEMU_OPT_STRING,
1154             .help = "URL to the iscsi image",
1155         },
1156         { /* end of list */ }
1157     },
1158 };
1159 
1160 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1161                                           int evpd, int pc, void **inq, Error **errp)
1162 {
1163     int full_size;
1164     struct scsi_task *task = NULL;
1165     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1166     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1167         goto fail;
1168     }
1169     full_size = scsi_datain_getfullsize(task);
1170     if (full_size > task->datain.size) {
1171         scsi_free_scsi_task(task);
1172 
1173         /* we need more data for the full list */
1174         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1175         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1176             goto fail;
1177         }
1178     }
1179 
1180     *inq = scsi_datain_unmarshall(task);
1181     if (*inq == NULL) {
1182         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1183         goto fail_with_err;
1184     }
1185 
1186     return task;
1187 
1188 fail:
1189     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1190                iscsi_get_error(iscsi));
1191 fail_with_err:
1192     if (task != NULL) {
1193         scsi_free_scsi_task(task);
1194     }
1195     return NULL;
1196 }
1197 
1198 /*
1199  * We support iscsi url's on the form
1200  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1201  *
1202  * Note: flags are currently not used by iscsi_open.  If this function
1203  * is changed such that flags are used, please examine iscsi_reopen_prepare()
1204  * to see if needs to be changed as well.
1205  */
1206 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1207                       Error **errp)
1208 {
1209     IscsiLun *iscsilun = bs->opaque;
1210     struct iscsi_context *iscsi = NULL;
1211     struct iscsi_url *iscsi_url = NULL;
1212     struct scsi_task *task = NULL;
1213     struct scsi_inquiry_standard *inq = NULL;
1214     struct scsi_inquiry_supported_pages *inq_vpd;
1215     char *initiator_name = NULL;
1216     QemuOpts *opts;
1217     Error *local_err = NULL;
1218     const char *filename;
1219     int i, ret;
1220 
1221     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1222         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1223                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1224                    "of 512", BDRV_SECTOR_SIZE);
1225         return -EINVAL;
1226     }
1227 
1228     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1229     qemu_opts_absorb_qdict(opts, options, &local_err);
1230     if (local_err) {
1231         error_propagate(errp, local_err);
1232         ret = -EINVAL;
1233         goto out;
1234     }
1235 
1236     filename = qemu_opt_get(opts, "filename");
1237 
1238     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1239     if (iscsi_url == NULL) {
1240         error_setg(errp, "Failed to parse URL : %s", filename);
1241         ret = -EINVAL;
1242         goto out;
1243     }
1244 
1245     memset(iscsilun, 0, sizeof(IscsiLun));
1246 
1247     initiator_name = parse_initiator_name(iscsi_url->target);
1248 
1249     iscsi = iscsi_create_context(initiator_name);
1250     if (iscsi == NULL) {
1251         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1252         ret = -ENOMEM;
1253         goto out;
1254     }
1255 
1256     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1257         error_setg(errp, "iSCSI: Failed to set target name.");
1258         ret = -EINVAL;
1259         goto out;
1260     }
1261 
1262     if (iscsi_url->user != NULL) {
1263         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1264                                               iscsi_url->passwd);
1265         if (ret != 0) {
1266             error_setg(errp, "Failed to set initiator username and password");
1267             ret = -EINVAL;
1268             goto out;
1269         }
1270     }
1271 
1272     /* check if we got CHAP username/password via the options */
1273     parse_chap(iscsi, iscsi_url->target, &local_err);
1274     if (local_err != NULL) {
1275         error_propagate(errp, local_err);
1276         ret = -EINVAL;
1277         goto out;
1278     }
1279 
1280     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1281         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1282         ret = -EINVAL;
1283         goto out;
1284     }
1285 
1286     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1287 
1288     /* check if we got HEADER_DIGEST via the options */
1289     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1290     if (local_err != NULL) {
1291         error_propagate(errp, local_err);
1292         ret = -EINVAL;
1293         goto out;
1294     }
1295 
1296     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1297         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1298             iscsi_get_error(iscsi));
1299         ret = -EINVAL;
1300         goto out;
1301     }
1302 
1303     iscsilun->iscsi = iscsi;
1304     iscsilun->lun   = iscsi_url->lun;
1305     iscsilun->has_write_same = true;
1306 
1307     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1308                             (void **) &inq, errp);
1309     if (task == NULL) {
1310         ret = -EINVAL;
1311         goto out;
1312     }
1313     iscsilun->type = inq->periperal_device_type;
1314     scsi_free_scsi_task(task);
1315     task = NULL;
1316 
1317     iscsi_readcapacity_sync(iscsilun, &local_err);
1318     if (local_err != NULL) {
1319         error_propagate(errp, local_err);
1320         ret = -EINVAL;
1321         goto out;
1322     }
1323     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1324     bs->request_alignment = iscsilun->block_size;
1325 
1326     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1327      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1328      * will try to read from the device to guess the image format.
1329      */
1330     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1331         bs->sg = 1;
1332     }
1333 
1334     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1335                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1336                             (void **) &inq_vpd, errp);
1337     if (task == NULL) {
1338         ret = -EINVAL;
1339         goto out;
1340     }
1341     for (i = 0; i < inq_vpd->num_pages; i++) {
1342         struct scsi_task *inq_task;
1343         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1344         struct scsi_inquiry_block_limits *inq_bl;
1345         switch (inq_vpd->pages[i]) {
1346         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1347             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1348                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1349                                         (void **) &inq_lbp, errp);
1350             if (inq_task == NULL) {
1351                 ret = -EINVAL;
1352                 goto out;
1353             }
1354             memcpy(&iscsilun->lbp, inq_lbp,
1355                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1356             scsi_free_scsi_task(inq_task);
1357             break;
1358         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1359             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1360                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1361                                     (void **) &inq_bl, errp);
1362             if (inq_task == NULL) {
1363                 ret = -EINVAL;
1364                 goto out;
1365             }
1366             memcpy(&iscsilun->bl, inq_bl,
1367                    sizeof(struct scsi_inquiry_block_limits));
1368             scsi_free_scsi_task(inq_task);
1369             break;
1370         default:
1371             break;
1372         }
1373     }
1374     scsi_free_scsi_task(task);
1375     task = NULL;
1376 
1377 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1378     /* Set up a timer for sending out iSCSI NOPs */
1379     iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
1380     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1381 #endif
1382 
1383     /* Guess the internal cluster (page) size of the iscsi target by the means
1384      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1385      * reasonable size */
1386     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1387         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1388         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1389                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1390 #if defined(LIBISCSI_FEATURE_IOVECTOR)
1391         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1392             iscsilun->allocationmap =
1393                 bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1394                                         iscsilun->cluster_sectors));
1395         }
1396 #endif
1397     }
1398 
1399 out:
1400     qemu_opts_del(opts);
1401     if (initiator_name != NULL) {
1402         g_free(initiator_name);
1403     }
1404     if (iscsi_url != NULL) {
1405         iscsi_destroy_url(iscsi_url);
1406     }
1407     if (task != NULL) {
1408         scsi_free_scsi_task(task);
1409     }
1410 
1411     if (ret) {
1412         if (iscsi != NULL) {
1413             iscsi_destroy_context(iscsi);
1414         }
1415         memset(iscsilun, 0, sizeof(IscsiLun));
1416     }
1417     return ret;
1418 }
1419 
1420 static void iscsi_close(BlockDriverState *bs)
1421 {
1422     IscsiLun *iscsilun = bs->opaque;
1423     struct iscsi_context *iscsi = iscsilun->iscsi;
1424 
1425     if (iscsilun->nop_timer) {
1426         timer_del(iscsilun->nop_timer);
1427         timer_free(iscsilun->nop_timer);
1428     }
1429     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
1430     iscsi_destroy_context(iscsi);
1431     g_free(iscsilun->zeroblock);
1432     g_free(iscsilun->allocationmap);
1433     memset(iscsilun, 0, sizeof(IscsiLun));
1434 }
1435 
1436 static int iscsi_refresh_limits(BlockDriverState *bs)
1437 {
1438     IscsiLun *iscsilun = bs->opaque;
1439 
1440     /* We don't actually refresh here, but just return data queried in
1441      * iscsi_open(): iscsi targets don't change their limits. */
1442     if (iscsilun->lbp.lbpu) {
1443         if (iscsilun->bl.max_unmap < 0xffffffff) {
1444             bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
1445                                                  iscsilun);
1446         }
1447         bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1448                                                    iscsilun);
1449     }
1450 
1451     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1452         bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
1453                                                   iscsilun);
1454     }
1455     if (iscsilun->lbp.lbpws) {
1456         bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1457                                                         iscsilun);
1458     }
1459     bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
1460                                                  iscsilun);
1461     return 0;
1462 }
1463 
1464 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
1465  * prepare.  Note that this will not re-establish a connection with an iSCSI
1466  * target - it is effectively a NOP.  */
1467 static int iscsi_reopen_prepare(BDRVReopenState *state,
1468                                 BlockReopenQueue *queue, Error **errp)
1469 {
1470     /* NOP */
1471     return 0;
1472 }
1473 
1474 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1475 {
1476     IscsiLun *iscsilun = bs->opaque;
1477     Error *local_err = NULL;
1478 
1479     if (iscsilun->type != TYPE_DISK) {
1480         return -ENOTSUP;
1481     }
1482 
1483     iscsi_readcapacity_sync(iscsilun, &local_err);
1484     if (local_err != NULL) {
1485         error_free(local_err);
1486         return -EIO;
1487     }
1488 
1489     if (offset > iscsi_getlength(bs)) {
1490         return -EINVAL;
1491     }
1492 
1493     if (iscsilun->allocationmap != NULL) {
1494         g_free(iscsilun->allocationmap);
1495         iscsilun->allocationmap =
1496             bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1497                                     iscsilun->cluster_sectors));
1498     }
1499 
1500     return 0;
1501 }
1502 
1503 static int iscsi_create(const char *filename, QEMUOptionParameter *options,
1504                         Error **errp)
1505 {
1506     int ret = 0;
1507     int64_t total_size = 0;
1508     BlockDriverState *bs;
1509     IscsiLun *iscsilun = NULL;
1510     QDict *bs_options;
1511 
1512     bs = bdrv_new("", &error_abort);
1513 
1514     /* Read out options */
1515     while (options && options->name) {
1516         if (!strcmp(options->name, "size")) {
1517             total_size = options->value.n / BDRV_SECTOR_SIZE;
1518         }
1519         options++;
1520     }
1521 
1522     bs->opaque = g_malloc0(sizeof(struct IscsiLun));
1523     iscsilun = bs->opaque;
1524 
1525     bs_options = qdict_new();
1526     qdict_put(bs_options, "filename", qstring_from_str(filename));
1527     ret = iscsi_open(bs, bs_options, 0, NULL);
1528     QDECREF(bs_options);
1529 
1530     if (ret != 0) {
1531         goto out;
1532     }
1533     if (iscsilun->nop_timer) {
1534         timer_del(iscsilun->nop_timer);
1535         timer_free(iscsilun->nop_timer);
1536     }
1537     if (iscsilun->type != TYPE_DISK) {
1538         ret = -ENODEV;
1539         goto out;
1540     }
1541     if (bs->total_sectors < total_size) {
1542         ret = -ENOSPC;
1543         goto out;
1544     }
1545 
1546     ret = 0;
1547 out:
1548     if (iscsilun->iscsi != NULL) {
1549         iscsi_destroy_context(iscsilun->iscsi);
1550     }
1551     g_free(bs->opaque);
1552     bs->opaque = NULL;
1553     bdrv_unref(bs);
1554     return ret;
1555 }
1556 
1557 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1558 {
1559     IscsiLun *iscsilun = bs->opaque;
1560     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1561     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1562     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1563     return 0;
1564 }
1565 
1566 static QEMUOptionParameter iscsi_create_options[] = {
1567     {
1568         .name = BLOCK_OPT_SIZE,
1569         .type = OPT_SIZE,
1570         .help = "Virtual disk size"
1571     },
1572     { NULL }
1573 };
1574 
1575 static BlockDriver bdrv_iscsi = {
1576     .format_name     = "iscsi",
1577     .protocol_name   = "iscsi",
1578 
1579     .instance_size   = sizeof(IscsiLun),
1580     .bdrv_needs_filename = true,
1581     .bdrv_file_open  = iscsi_open,
1582     .bdrv_close      = iscsi_close,
1583     .bdrv_create     = iscsi_create,
1584     .create_options  = iscsi_create_options,
1585     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1586 
1587     .bdrv_getlength  = iscsi_getlength,
1588     .bdrv_get_info   = iscsi_get_info,
1589     .bdrv_truncate   = iscsi_truncate,
1590     .bdrv_refresh_limits = iscsi_refresh_limits,
1591 
1592 #if defined(LIBISCSI_FEATURE_IOVECTOR)
1593     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1594 #endif
1595     .bdrv_co_discard      = iscsi_co_discard,
1596 #if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
1597     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1598 #endif
1599     .bdrv_co_readv         = iscsi_co_readv,
1600     .bdrv_co_writev        = iscsi_co_writev,
1601     .bdrv_co_flush_to_disk = iscsi_co_flush,
1602 
1603 #ifdef __linux__
1604     .bdrv_ioctl       = iscsi_ioctl,
1605     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1606 #endif
1607 };
1608 
1609 static QemuOptsList qemu_iscsi_opts = {
1610     .name = "iscsi",
1611     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1612     .desc = {
1613         {
1614             .name = "user",
1615             .type = QEMU_OPT_STRING,
1616             .help = "username for CHAP authentication to target",
1617         },{
1618             .name = "password",
1619             .type = QEMU_OPT_STRING,
1620             .help = "password for CHAP authentication to target",
1621         },{
1622             .name = "header-digest",
1623             .type = QEMU_OPT_STRING,
1624             .help = "HeaderDigest setting. "
1625                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1626         },{
1627             .name = "initiator-name",
1628             .type = QEMU_OPT_STRING,
1629             .help = "Initiator iqn name to use when connecting",
1630         },
1631         { /* end of list */ }
1632     },
1633 };
1634 
1635 static void iscsi_block_init(void)
1636 {
1637     bdrv_register(&bdrv_iscsi);
1638     qemu_add_opts(&qemu_iscsi_opts);
1639 }
1640 
1641 block_init(iscsi_block_init);
1642