xref: /openbmc/qemu/block/iscsi.c (revision 786a4ea8)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 
42 #include <iscsi/iscsi.h>
43 #include <iscsi/scsi-lowlevel.h>
44 
45 #ifdef __linux__
46 #include <scsi/sg.h>
47 #include <block/scsi.h>
48 #endif
49 
50 typedef struct IscsiLun {
51     struct iscsi_context *iscsi;
52     AioContext *aio_context;
53     int lun;
54     enum scsi_inquiry_peripheral_device_type type;
55     int block_size;
56     uint64_t num_blocks;
57     int events;
58     QEMUTimer *nop_timer;
59     QEMUTimer *event_timer;
60     uint8_t lbpme;
61     uint8_t lbprz;
62     uint8_t has_write_same;
63     struct scsi_inquiry_logical_block_provisioning lbp;
64     struct scsi_inquiry_block_limits bl;
65     unsigned char *zeroblock;
66     unsigned long *allocationmap;
67     int cluster_sectors;
68     bool use_16_for_rw;
69     bool write_protected;
70 } IscsiLun;
71 
72 typedef struct IscsiTask {
73     int status;
74     int complete;
75     int retries;
76     int do_retry;
77     struct scsi_task *task;
78     Coroutine *co;
79     QEMUBH *bh;
80     IscsiLun *iscsilun;
81     QEMUTimer retry_timer;
82 } IscsiTask;
83 
84 typedef struct IscsiAIOCB {
85     BlockAIOCB common;
86     QEMUIOVector *qiov;
87     QEMUBH *bh;
88     IscsiLun *iscsilun;
89     struct scsi_task *task;
90     uint8_t *buf;
91     int status;
92     int64_t sector_num;
93     int nb_sectors;
94 #ifdef __linux__
95     sg_io_hdr_t *ioh;
96 #endif
97 } IscsiAIOCB;
98 
99 #define EVENT_INTERVAL 250
100 #define NOP_INTERVAL 5000
101 #define MAX_NOP_FAILURES 3
102 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
103 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
104 
105 /* this threshold is a trade-off knob to choose between
106  * the potential additional overhead of an extra GET_LBA_STATUS request
107  * vs. unnecessarily reading a lot of zero sectors over the wire.
108  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
109  * sectors we check the allocation status of the area covered by the
110  * request first if the allocationmap indicates that the area might be
111  * unallocated. */
112 #define ISCSI_CHECKALLOC_THRES 64
113 
114 static void
115 iscsi_bh_cb(void *p)
116 {
117     IscsiAIOCB *acb = p;
118 
119     qemu_bh_delete(acb->bh);
120 
121     g_free(acb->buf);
122     acb->buf = NULL;
123 
124     acb->common.cb(acb->common.opaque, acb->status);
125 
126     if (acb->task != NULL) {
127         scsi_free_scsi_task(acb->task);
128         acb->task = NULL;
129     }
130 
131     qemu_aio_unref(acb);
132 }
133 
134 static void
135 iscsi_schedule_bh(IscsiAIOCB *acb)
136 {
137     if (acb->bh) {
138         return;
139     }
140     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
141     qemu_bh_schedule(acb->bh);
142 }
143 
144 static void iscsi_co_generic_bh_cb(void *opaque)
145 {
146     struct IscsiTask *iTask = opaque;
147     iTask->complete = 1;
148     qemu_bh_delete(iTask->bh);
149     qemu_coroutine_enter(iTask->co, NULL);
150 }
151 
152 static void iscsi_retry_timer_expired(void *opaque)
153 {
154     struct IscsiTask *iTask = opaque;
155     iTask->complete = 1;
156     if (iTask->co) {
157         qemu_coroutine_enter(iTask->co, NULL);
158     }
159 }
160 
161 static inline unsigned exp_random(double mean)
162 {
163     return -mean * log((double)rand() / RAND_MAX);
164 }
165 
166 static void
167 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
168                         void *command_data, void *opaque)
169 {
170     struct IscsiTask *iTask = opaque;
171     struct scsi_task *task = command_data;
172 
173     iTask->status = status;
174     iTask->do_retry = 0;
175     iTask->task = task;
176 
177     if (status != SCSI_STATUS_GOOD) {
178         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
179             if (status == SCSI_STATUS_CHECK_CONDITION
180                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
181                 error_report("iSCSI CheckCondition: %s",
182                              iscsi_get_error(iscsi));
183                 iTask->do_retry = 1;
184                 goto out;
185             }
186             if (status == SCSI_STATUS_BUSY) {
187                 unsigned retry_time =
188                     exp_random(iscsi_retry_times[iTask->retries - 1]);
189                 error_report("iSCSI Busy (retry #%u in %u ms): %s",
190                              iTask->retries, retry_time,
191                              iscsi_get_error(iscsi));
192                 aio_timer_init(iTask->iscsilun->aio_context,
193                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
194                                SCALE_MS, iscsi_retry_timer_expired, iTask);
195                 timer_mod(&iTask->retry_timer,
196                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
197                 iTask->do_retry = 1;
198                 return;
199             }
200         }
201         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
202     }
203 
204 out:
205     if (iTask->co) {
206         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
207                                iscsi_co_generic_bh_cb, iTask);
208         qemu_bh_schedule(iTask->bh);
209     } else {
210         iTask->complete = 1;
211     }
212 }
213 
214 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
215 {
216     *iTask = (struct IscsiTask) {
217         .co         = qemu_coroutine_self(),
218         .iscsilun   = iscsilun,
219     };
220 }
221 
222 static void
223 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
224                     void *private_data)
225 {
226     IscsiAIOCB *acb = private_data;
227 
228     acb->status = -ECANCELED;
229     iscsi_schedule_bh(acb);
230 }
231 
232 static void
233 iscsi_aio_cancel(BlockAIOCB *blockacb)
234 {
235     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
236     IscsiLun *iscsilun = acb->iscsilun;
237 
238     if (acb->status != -EINPROGRESS) {
239         return;
240     }
241 
242     /* send a task mgmt call to the target to cancel the task on the target */
243     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
244                                      iscsi_abort_task_cb, acb);
245 
246 }
247 
248 static const AIOCBInfo iscsi_aiocb_info = {
249     .aiocb_size         = sizeof(IscsiAIOCB),
250     .cancel_async       = iscsi_aio_cancel,
251 };
252 
253 
254 static void iscsi_process_read(void *arg);
255 static void iscsi_process_write(void *arg);
256 
257 static void
258 iscsi_set_events(IscsiLun *iscsilun)
259 {
260     struct iscsi_context *iscsi = iscsilun->iscsi;
261     int ev = iscsi_which_events(iscsi);
262 
263     if (ev != iscsilun->events) {
264         aio_set_fd_handler(iscsilun->aio_context,
265                            iscsi_get_fd(iscsi),
266                            (ev & POLLIN) ? iscsi_process_read : NULL,
267                            (ev & POLLOUT) ? iscsi_process_write : NULL,
268                            iscsilun);
269         iscsilun->events = ev;
270     }
271 
272     /* newer versions of libiscsi may return zero events. In this
273      * case start a timer to ensure we are able to return to service
274      * once this situation changes. */
275     if (!ev) {
276         timer_mod(iscsilun->event_timer,
277                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
278     }
279 }
280 
281 static void iscsi_timed_set_events(void *opaque)
282 {
283     IscsiLun *iscsilun = opaque;
284     iscsi_set_events(iscsilun);
285 }
286 
287 static void
288 iscsi_process_read(void *arg)
289 {
290     IscsiLun *iscsilun = arg;
291     struct iscsi_context *iscsi = iscsilun->iscsi;
292 
293     iscsi_service(iscsi, POLLIN);
294     iscsi_set_events(iscsilun);
295 }
296 
297 static void
298 iscsi_process_write(void *arg)
299 {
300     IscsiLun *iscsilun = arg;
301     struct iscsi_context *iscsi = iscsilun->iscsi;
302 
303     iscsi_service(iscsi, POLLOUT);
304     iscsi_set_events(iscsilun);
305 }
306 
307 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
308 {
309     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
310 }
311 
312 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
313 {
314     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
315 }
316 
317 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
318                                       IscsiLun *iscsilun)
319 {
320     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
321         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
322             error_report("iSCSI misaligned request: "
323                          "iscsilun->block_size %u, sector_num %" PRIi64
324                          ", nb_sectors %d",
325                          iscsilun->block_size, sector_num, nb_sectors);
326             return 0;
327     }
328     return 1;
329 }
330 
331 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
332 {
333     return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
334                                                        iscsilun),
335                                        iscsilun->cluster_sectors));
336 }
337 
338 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
339                                     int nb_sectors)
340 {
341     if (iscsilun->allocationmap == NULL) {
342         return;
343     }
344     bitmap_set(iscsilun->allocationmap,
345                sector_num / iscsilun->cluster_sectors,
346                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
347 }
348 
349 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
350                                       int nb_sectors)
351 {
352     int64_t cluster_num, nb_clusters;
353     if (iscsilun->allocationmap == NULL) {
354         return;
355     }
356     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
357     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
358                   - cluster_num;
359     if (nb_clusters > 0) {
360         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
361     }
362 }
363 
364 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
365                                         int64_t sector_num, int nb_sectors,
366                                         QEMUIOVector *iov)
367 {
368     IscsiLun *iscsilun = bs->opaque;
369     struct IscsiTask iTask;
370     uint64_t lba;
371     uint32_t num_sectors;
372 
373     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
374         return -EINVAL;
375     }
376 
377     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
378         error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
379                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
380         return -EINVAL;
381     }
382 
383     lba = sector_qemu2lun(sector_num, iscsilun);
384     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
385     iscsi_co_init_iscsitask(iscsilun, &iTask);
386 retry:
387     if (iscsilun->use_16_for_rw) {
388         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
389                                         NULL, num_sectors * iscsilun->block_size,
390                                         iscsilun->block_size, 0, 0, 0, 0, 0,
391                                         iscsi_co_generic_cb, &iTask);
392     } else {
393         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
394                                         NULL, num_sectors * iscsilun->block_size,
395                                         iscsilun->block_size, 0, 0, 0, 0, 0,
396                                         iscsi_co_generic_cb, &iTask);
397     }
398     if (iTask.task == NULL) {
399         return -ENOMEM;
400     }
401     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
402                           iov->niov);
403     while (!iTask.complete) {
404         iscsi_set_events(iscsilun);
405         qemu_coroutine_yield();
406     }
407 
408     if (iTask.task != NULL) {
409         scsi_free_scsi_task(iTask.task);
410         iTask.task = NULL;
411     }
412 
413     if (iTask.do_retry) {
414         iTask.complete = 0;
415         goto retry;
416     }
417 
418     if (iTask.status != SCSI_STATUS_GOOD) {
419         return -EIO;
420     }
421 
422     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
423 
424     return 0;
425 }
426 
427 
428 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
429                                              int64_t sector_num, int nb_sectors)
430 {
431     unsigned long size;
432     if (iscsilun->allocationmap == NULL) {
433         return true;
434     }
435     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
436     return !(find_next_bit(iscsilun->allocationmap, size,
437                            sector_num / iscsilun->cluster_sectors) == size);
438 }
439 
440 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
441                                                   int64_t sector_num,
442                                                   int nb_sectors, int *pnum)
443 {
444     IscsiLun *iscsilun = bs->opaque;
445     struct scsi_get_lba_status *lbas = NULL;
446     struct scsi_lba_status_descriptor *lbasd = NULL;
447     struct IscsiTask iTask;
448     int64_t ret;
449 
450     iscsi_co_init_iscsitask(iscsilun, &iTask);
451 
452     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
453         ret = -EINVAL;
454         goto out;
455     }
456 
457     /* default to all sectors allocated */
458     ret = BDRV_BLOCK_DATA;
459     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
460     *pnum = nb_sectors;
461 
462     /* LUN does not support logical block provisioning */
463     if (iscsilun->lbpme == 0) {
464         goto out;
465     }
466 
467 retry:
468     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
469                                   sector_qemu2lun(sector_num, iscsilun),
470                                   8 + 16, iscsi_co_generic_cb,
471                                   &iTask) == NULL) {
472         ret = -ENOMEM;
473         goto out;
474     }
475 
476     while (!iTask.complete) {
477         iscsi_set_events(iscsilun);
478         qemu_coroutine_yield();
479     }
480 
481     if (iTask.do_retry) {
482         if (iTask.task != NULL) {
483             scsi_free_scsi_task(iTask.task);
484             iTask.task = NULL;
485         }
486         iTask.complete = 0;
487         goto retry;
488     }
489 
490     if (iTask.status != SCSI_STATUS_GOOD) {
491         /* in case the get_lba_status_callout fails (i.e.
492          * because the device is busy or the cmd is not
493          * supported) we pretend all blocks are allocated
494          * for backwards compatibility */
495         goto out;
496     }
497 
498     lbas = scsi_datain_unmarshall(iTask.task);
499     if (lbas == NULL) {
500         ret = -EIO;
501         goto out;
502     }
503 
504     lbasd = &lbas->descriptors[0];
505 
506     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
507         ret = -EIO;
508         goto out;
509     }
510 
511     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
512 
513     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
514         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
515         ret &= ~BDRV_BLOCK_DATA;
516         if (iscsilun->lbprz) {
517             ret |= BDRV_BLOCK_ZERO;
518         }
519     }
520 
521     if (ret & BDRV_BLOCK_ZERO) {
522         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
523     } else {
524         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
525     }
526 
527     if (*pnum > nb_sectors) {
528         *pnum = nb_sectors;
529     }
530 out:
531     if (iTask.task != NULL) {
532         scsi_free_scsi_task(iTask.task);
533     }
534     return ret;
535 }
536 
537 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
538                                        int64_t sector_num, int nb_sectors,
539                                        QEMUIOVector *iov)
540 {
541     IscsiLun *iscsilun = bs->opaque;
542     struct IscsiTask iTask;
543     uint64_t lba;
544     uint32_t num_sectors;
545 
546     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
547         return -EINVAL;
548     }
549 
550     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
551         error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
552                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
553         return -EINVAL;
554     }
555 
556     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
557         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
558         int64_t ret;
559         int pnum;
560         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
561         if (ret < 0) {
562             return ret;
563         }
564         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
565             qemu_iovec_memset(iov, 0, 0x00, iov->size);
566             return 0;
567         }
568     }
569 
570     lba = sector_qemu2lun(sector_num, iscsilun);
571     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
572 
573     iscsi_co_init_iscsitask(iscsilun, &iTask);
574 retry:
575     if (iscsilun->use_16_for_rw) {
576         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
577                                        num_sectors * iscsilun->block_size,
578                                        iscsilun->block_size, 0, 0, 0, 0, 0,
579                                        iscsi_co_generic_cb, &iTask);
580     } else {
581         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
582                                        num_sectors * iscsilun->block_size,
583                                        iscsilun->block_size,
584                                        0, 0, 0, 0, 0,
585                                        iscsi_co_generic_cb, &iTask);
586     }
587     if (iTask.task == NULL) {
588         return -ENOMEM;
589     }
590     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
591 
592     while (!iTask.complete) {
593         iscsi_set_events(iscsilun);
594         qemu_coroutine_yield();
595     }
596 
597     if (iTask.task != NULL) {
598         scsi_free_scsi_task(iTask.task);
599         iTask.task = NULL;
600     }
601 
602     if (iTask.do_retry) {
603         iTask.complete = 0;
604         goto retry;
605     }
606 
607     if (iTask.status != SCSI_STATUS_GOOD) {
608         return -EIO;
609     }
610 
611     return 0;
612 }
613 
614 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
615 {
616     IscsiLun *iscsilun = bs->opaque;
617     struct IscsiTask iTask;
618 
619     if (bs->sg) {
620         return 0;
621     }
622 
623     iscsi_co_init_iscsitask(iscsilun, &iTask);
624 
625 retry:
626     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
627                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
628         return -ENOMEM;
629     }
630 
631     while (!iTask.complete) {
632         iscsi_set_events(iscsilun);
633         qemu_coroutine_yield();
634     }
635 
636     if (iTask.task != NULL) {
637         scsi_free_scsi_task(iTask.task);
638         iTask.task = NULL;
639     }
640 
641     if (iTask.do_retry) {
642         iTask.complete = 0;
643         goto retry;
644     }
645 
646     if (iTask.status != SCSI_STATUS_GOOD) {
647         return -EIO;
648     }
649 
650     return 0;
651 }
652 
653 #ifdef __linux__
654 static void
655 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
656                      void *command_data, void *opaque)
657 {
658     IscsiAIOCB *acb = opaque;
659 
660     g_free(acb->buf);
661     acb->buf = NULL;
662 
663     acb->status = 0;
664     if (status < 0) {
665         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
666                      iscsi_get_error(iscsi));
667         acb->status = -EIO;
668     }
669 
670     acb->ioh->driver_status = 0;
671     acb->ioh->host_status   = 0;
672     acb->ioh->resid         = 0;
673 
674 #define SG_ERR_DRIVER_SENSE    0x08
675 
676     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
677         int ss;
678 
679         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
680 
681         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
682         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
683              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
684         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
685     }
686 
687     iscsi_schedule_bh(acb);
688 }
689 
690 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
691         unsigned long int req, void *buf,
692         BlockCompletionFunc *cb, void *opaque)
693 {
694     IscsiLun *iscsilun = bs->opaque;
695     struct iscsi_context *iscsi = iscsilun->iscsi;
696     struct iscsi_data data;
697     IscsiAIOCB *acb;
698 
699     assert(req == SG_IO);
700 
701     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
702 
703     acb->iscsilun = iscsilun;
704     acb->bh          = NULL;
705     acb->status      = -EINPROGRESS;
706     acb->buf         = NULL;
707     acb->ioh         = buf;
708 
709     acb->task = malloc(sizeof(struct scsi_task));
710     if (acb->task == NULL) {
711         error_report("iSCSI: Failed to allocate task for scsi command. %s",
712                      iscsi_get_error(iscsi));
713         qemu_aio_unref(acb);
714         return NULL;
715     }
716     memset(acb->task, 0, sizeof(struct scsi_task));
717 
718     switch (acb->ioh->dxfer_direction) {
719     case SG_DXFER_TO_DEV:
720         acb->task->xfer_dir = SCSI_XFER_WRITE;
721         break;
722     case SG_DXFER_FROM_DEV:
723         acb->task->xfer_dir = SCSI_XFER_READ;
724         break;
725     default:
726         acb->task->xfer_dir = SCSI_XFER_NONE;
727         break;
728     }
729 
730     acb->task->cdb_size = acb->ioh->cmd_len;
731     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
732     acb->task->expxferlen = acb->ioh->dxfer_len;
733 
734     data.size = 0;
735     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
736         if (acb->ioh->iovec_count == 0) {
737             data.data = acb->ioh->dxferp;
738             data.size = acb->ioh->dxfer_len;
739         } else {
740             scsi_task_set_iov_out(acb->task,
741                                  (struct scsi_iovec *) acb->ioh->dxferp,
742                                  acb->ioh->iovec_count);
743         }
744     }
745 
746     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
747                                  iscsi_aio_ioctl_cb,
748                                  (data.size > 0) ? &data : NULL,
749                                  acb) != 0) {
750         scsi_free_scsi_task(acb->task);
751         qemu_aio_unref(acb);
752         return NULL;
753     }
754 
755     /* tell libiscsi to read straight into the buffer we got from ioctl */
756     if (acb->task->xfer_dir == SCSI_XFER_READ) {
757         if (acb->ioh->iovec_count == 0) {
758             scsi_task_add_data_in_buffer(acb->task,
759                                          acb->ioh->dxfer_len,
760                                          acb->ioh->dxferp);
761         } else {
762             scsi_task_set_iov_in(acb->task,
763                                  (struct scsi_iovec *) acb->ioh->dxferp,
764                                  acb->ioh->iovec_count);
765         }
766     }
767 
768     iscsi_set_events(iscsilun);
769 
770     return &acb->common;
771 }
772 
773 static void ioctl_cb(void *opaque, int status)
774 {
775     int *p_status = opaque;
776     *p_status = status;
777 }
778 
779 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
780 {
781     IscsiLun *iscsilun = bs->opaque;
782     int status;
783 
784     switch (req) {
785     case SG_GET_VERSION_NUM:
786         *(int *)buf = 30000;
787         break;
788     case SG_GET_SCSI_ID:
789         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
790         break;
791     case SG_IO:
792         status = -EINPROGRESS;
793         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
794 
795         while (status == -EINPROGRESS) {
796             aio_poll(iscsilun->aio_context, true);
797         }
798 
799         return 0;
800     default:
801         return -1;
802     }
803     return 0;
804 }
805 #endif
806 
807 static int64_t
808 iscsi_getlength(BlockDriverState *bs)
809 {
810     IscsiLun *iscsilun = bs->opaque;
811     int64_t len;
812 
813     len  = iscsilun->num_blocks;
814     len *= iscsilun->block_size;
815 
816     return len;
817 }
818 
819 static int
820 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
821                                    int nb_sectors)
822 {
823     IscsiLun *iscsilun = bs->opaque;
824     struct IscsiTask iTask;
825     struct unmap_list list;
826 
827     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
828         return -EINVAL;
829     }
830 
831     if (!iscsilun->lbp.lbpu) {
832         /* UNMAP is not supported by the target */
833         return 0;
834     }
835 
836     list.lba = sector_qemu2lun(sector_num, iscsilun);
837     list.num = sector_qemu2lun(nb_sectors, iscsilun);
838 
839     iscsi_co_init_iscsitask(iscsilun, &iTask);
840 retry:
841     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
842                      iscsi_co_generic_cb, &iTask) == NULL) {
843         return -ENOMEM;
844     }
845 
846     while (!iTask.complete) {
847         iscsi_set_events(iscsilun);
848         qemu_coroutine_yield();
849     }
850 
851     if (iTask.task != NULL) {
852         scsi_free_scsi_task(iTask.task);
853         iTask.task = NULL;
854     }
855 
856     if (iTask.do_retry) {
857         iTask.complete = 0;
858         goto retry;
859     }
860 
861     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
862         /* the target might fail with a check condition if it
863            is not happy with the alignment of the UNMAP request
864            we silently fail in this case */
865         return 0;
866     }
867 
868     if (iTask.status != SCSI_STATUS_GOOD) {
869         return -EIO;
870     }
871 
872     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
873 
874     return 0;
875 }
876 
877 static int
878 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
879                                    int nb_sectors, BdrvRequestFlags flags)
880 {
881     IscsiLun *iscsilun = bs->opaque;
882     struct IscsiTask iTask;
883     uint64_t lba;
884     uint32_t nb_blocks;
885     bool use_16_for_ws = iscsilun->use_16_for_rw;
886 
887     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
888         return -EINVAL;
889     }
890 
891     if (flags & BDRV_REQ_MAY_UNMAP) {
892         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
893             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
894             use_16_for_ws = true;
895         }
896         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
897             /* WRITESAME16 with UNMAP is not supported by the target,
898              * fall back and try WRITESAME10/16 without UNMAP */
899             flags &= ~BDRV_REQ_MAY_UNMAP;
900             use_16_for_ws = iscsilun->use_16_for_rw;
901         }
902     }
903 
904     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
905         /* WRITESAME without UNMAP is not supported by the target */
906         return -ENOTSUP;
907     }
908 
909     lba = sector_qemu2lun(sector_num, iscsilun);
910     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
911 
912     if (iscsilun->zeroblock == NULL) {
913         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
914         if (iscsilun->zeroblock == NULL) {
915             return -ENOMEM;
916         }
917     }
918 
919     iscsi_co_init_iscsitask(iscsilun, &iTask);
920 retry:
921     if (use_16_for_ws) {
922         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
923                                             iscsilun->zeroblock, iscsilun->block_size,
924                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
925                                             0, 0, iscsi_co_generic_cb, &iTask);
926     } else {
927         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
928                                             iscsilun->zeroblock, iscsilun->block_size,
929                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
930                                             0, 0, iscsi_co_generic_cb, &iTask);
931     }
932     if (iTask.task == NULL) {
933         return -ENOMEM;
934     }
935 
936     while (!iTask.complete) {
937         iscsi_set_events(iscsilun);
938         qemu_coroutine_yield();
939     }
940 
941     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
942         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
943         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
944          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
945         /* WRITE SAME is not supported by the target */
946         iscsilun->has_write_same = false;
947         scsi_free_scsi_task(iTask.task);
948         return -ENOTSUP;
949     }
950 
951     if (iTask.task != NULL) {
952         scsi_free_scsi_task(iTask.task);
953         iTask.task = NULL;
954     }
955 
956     if (iTask.do_retry) {
957         iTask.complete = 0;
958         goto retry;
959     }
960 
961     if (iTask.status != SCSI_STATUS_GOOD) {
962         return -EIO;
963     }
964 
965     if (flags & BDRV_REQ_MAY_UNMAP) {
966         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
967     } else {
968         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
969     }
970 
971     return 0;
972 }
973 
974 static void parse_chap(struct iscsi_context *iscsi, const char *target,
975                        Error **errp)
976 {
977     QemuOptsList *list;
978     QemuOpts *opts;
979     const char *user = NULL;
980     const char *password = NULL;
981 
982     list = qemu_find_opts("iscsi");
983     if (!list) {
984         return;
985     }
986 
987     opts = qemu_opts_find(list, target);
988     if (opts == NULL) {
989         opts = QTAILQ_FIRST(&list->head);
990         if (!opts) {
991             return;
992         }
993     }
994 
995     user = qemu_opt_get(opts, "user");
996     if (!user) {
997         return;
998     }
999 
1000     password = qemu_opt_get(opts, "password");
1001     if (!password) {
1002         error_setg(errp, "CHAP username specified but no password was given");
1003         return;
1004     }
1005 
1006     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1007         error_setg(errp, "Failed to set initiator username and password");
1008     }
1009 }
1010 
1011 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1012                                 Error **errp)
1013 {
1014     QemuOptsList *list;
1015     QemuOpts *opts;
1016     const char *digest = NULL;
1017 
1018     list = qemu_find_opts("iscsi");
1019     if (!list) {
1020         return;
1021     }
1022 
1023     opts = qemu_opts_find(list, target);
1024     if (opts == NULL) {
1025         opts = QTAILQ_FIRST(&list->head);
1026         if (!opts) {
1027             return;
1028         }
1029     }
1030 
1031     digest = qemu_opt_get(opts, "header-digest");
1032     if (!digest) {
1033         return;
1034     }
1035 
1036     if (!strcmp(digest, "CRC32C")) {
1037         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1038     } else if (!strcmp(digest, "NONE")) {
1039         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1040     } else if (!strcmp(digest, "CRC32C-NONE")) {
1041         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1042     } else if (!strcmp(digest, "NONE-CRC32C")) {
1043         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1044     } else {
1045         error_setg(errp, "Invalid header-digest setting : %s", digest);
1046     }
1047 }
1048 
1049 static char *parse_initiator_name(const char *target)
1050 {
1051     QemuOptsList *list;
1052     QemuOpts *opts;
1053     const char *name;
1054     char *iscsi_name;
1055     UuidInfo *uuid_info;
1056 
1057     list = qemu_find_opts("iscsi");
1058     if (list) {
1059         opts = qemu_opts_find(list, target);
1060         if (!opts) {
1061             opts = QTAILQ_FIRST(&list->head);
1062         }
1063         if (opts) {
1064             name = qemu_opt_get(opts, "initiator-name");
1065             if (name) {
1066                 return g_strdup(name);
1067             }
1068         }
1069     }
1070 
1071     uuid_info = qmp_query_uuid(NULL);
1072     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1073         name = qemu_get_vm_name();
1074     } else {
1075         name = uuid_info->UUID;
1076     }
1077     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1078                                  name ? ":" : "", name ? name : "");
1079     qapi_free_UuidInfo(uuid_info);
1080     return iscsi_name;
1081 }
1082 
1083 static void iscsi_nop_timed_event(void *opaque)
1084 {
1085     IscsiLun *iscsilun = opaque;
1086 
1087     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1088         error_report("iSCSI: NOP timeout. Reconnecting...");
1089         iscsi_reconnect(iscsilun->iscsi);
1090     }
1091 
1092     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1093         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1094         return;
1095     }
1096 
1097     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1098     iscsi_set_events(iscsilun);
1099 }
1100 
1101 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1102 {
1103     struct scsi_task *task = NULL;
1104     struct scsi_readcapacity10 *rc10 = NULL;
1105     struct scsi_readcapacity16 *rc16 = NULL;
1106     int retries = ISCSI_CMD_RETRIES;
1107 
1108     do {
1109         if (task != NULL) {
1110             scsi_free_scsi_task(task);
1111             task = NULL;
1112         }
1113 
1114         switch (iscsilun->type) {
1115         case TYPE_DISK:
1116             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1117             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1118                 rc16 = scsi_datain_unmarshall(task);
1119                 if (rc16 == NULL) {
1120                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1121                 } else {
1122                     iscsilun->block_size = rc16->block_length;
1123                     iscsilun->num_blocks = rc16->returned_lba + 1;
1124                     iscsilun->lbpme = rc16->lbpme;
1125                     iscsilun->lbprz = rc16->lbprz;
1126                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1127                 }
1128             }
1129             break;
1130         case TYPE_ROM:
1131             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1132             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1133                 rc10 = scsi_datain_unmarshall(task);
1134                 if (rc10 == NULL) {
1135                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1136                 } else {
1137                     iscsilun->block_size = rc10->block_size;
1138                     if (rc10->lba == 0) {
1139                         /* blank disk loaded */
1140                         iscsilun->num_blocks = 0;
1141                     } else {
1142                         iscsilun->num_blocks = rc10->lba + 1;
1143                     }
1144                 }
1145             }
1146             break;
1147         default:
1148             return;
1149         }
1150     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1151              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1152              && retries-- > 0);
1153 
1154     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1155         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1156     }
1157     if (task) {
1158         scsi_free_scsi_task(task);
1159     }
1160 }
1161 
1162 /* TODO Convert to fine grained options */
1163 static QemuOptsList runtime_opts = {
1164     .name = "iscsi",
1165     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1166     .desc = {
1167         {
1168             .name = "filename",
1169             .type = QEMU_OPT_STRING,
1170             .help = "URL to the iscsi image",
1171         },
1172         { /* end of list */ }
1173     },
1174 };
1175 
1176 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1177                                           int evpd, int pc, void **inq, Error **errp)
1178 {
1179     int full_size;
1180     struct scsi_task *task = NULL;
1181     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1182     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1183         goto fail;
1184     }
1185     full_size = scsi_datain_getfullsize(task);
1186     if (full_size > task->datain.size) {
1187         scsi_free_scsi_task(task);
1188 
1189         /* we need more data for the full list */
1190         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1191         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1192             goto fail;
1193         }
1194     }
1195 
1196     *inq = scsi_datain_unmarshall(task);
1197     if (*inq == NULL) {
1198         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1199         goto fail_with_err;
1200     }
1201 
1202     return task;
1203 
1204 fail:
1205     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1206                iscsi_get_error(iscsi));
1207 fail_with_err:
1208     if (task != NULL) {
1209         scsi_free_scsi_task(task);
1210     }
1211     return NULL;
1212 }
1213 
1214 static void iscsi_detach_aio_context(BlockDriverState *bs)
1215 {
1216     IscsiLun *iscsilun = bs->opaque;
1217 
1218     aio_set_fd_handler(iscsilun->aio_context,
1219                        iscsi_get_fd(iscsilun->iscsi),
1220                        NULL, NULL, NULL);
1221     iscsilun->events = 0;
1222 
1223     if (iscsilun->nop_timer) {
1224         timer_del(iscsilun->nop_timer);
1225         timer_free(iscsilun->nop_timer);
1226         iscsilun->nop_timer = NULL;
1227     }
1228     if (iscsilun->event_timer) {
1229         timer_del(iscsilun->event_timer);
1230         timer_free(iscsilun->event_timer);
1231         iscsilun->event_timer = NULL;
1232     }
1233 }
1234 
1235 static void iscsi_attach_aio_context(BlockDriverState *bs,
1236                                      AioContext *new_context)
1237 {
1238     IscsiLun *iscsilun = bs->opaque;
1239 
1240     iscsilun->aio_context = new_context;
1241     iscsi_set_events(iscsilun);
1242 
1243     /* Set up a timer for sending out iSCSI NOPs */
1244     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1245                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1246                                         iscsi_nop_timed_event, iscsilun);
1247     timer_mod(iscsilun->nop_timer,
1248               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1249 
1250     /* Prepare a timer for a delayed call to iscsi_set_events */
1251     iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1252                                           QEMU_CLOCK_REALTIME, SCALE_MS,
1253                                           iscsi_timed_set_events, iscsilun);
1254 }
1255 
1256 static bool iscsi_is_write_protected(IscsiLun *iscsilun)
1257 {
1258     struct scsi_task *task;
1259     struct scsi_mode_sense *ms = NULL;
1260     bool wrprotected = false;
1261 
1262     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1263                                  1, SCSI_MODESENSE_PC_CURRENT,
1264                                  0x3F, 0, 255);
1265     if (task == NULL) {
1266         error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1267                      iscsi_get_error(iscsilun->iscsi));
1268         goto out;
1269     }
1270 
1271     if (task->status != SCSI_STATUS_GOOD) {
1272         error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1273         goto out;
1274     }
1275     ms = scsi_datain_unmarshall(task);
1276     if (!ms) {
1277         error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1278                      iscsi_get_error(iscsilun->iscsi));
1279         goto out;
1280     }
1281     wrprotected = ms->device_specific_parameter & 0x80;
1282 
1283 out:
1284     if (task) {
1285         scsi_free_scsi_task(task);
1286     }
1287     return wrprotected;
1288 }
1289 
1290 /*
1291  * We support iscsi url's on the form
1292  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1293  */
1294 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1295                       Error **errp)
1296 {
1297     IscsiLun *iscsilun = bs->opaque;
1298     struct iscsi_context *iscsi = NULL;
1299     struct iscsi_url *iscsi_url = NULL;
1300     struct scsi_task *task = NULL;
1301     struct scsi_inquiry_standard *inq = NULL;
1302     struct scsi_inquiry_supported_pages *inq_vpd;
1303     char *initiator_name = NULL;
1304     QemuOpts *opts;
1305     Error *local_err = NULL;
1306     const char *filename;
1307     int i, ret = 0;
1308 
1309     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1310         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1311                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1312                    "of 512", BDRV_SECTOR_SIZE);
1313         return -EINVAL;
1314     }
1315 
1316     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1317     qemu_opts_absorb_qdict(opts, options, &local_err);
1318     if (local_err) {
1319         error_propagate(errp, local_err);
1320         ret = -EINVAL;
1321         goto out;
1322     }
1323 
1324     filename = qemu_opt_get(opts, "filename");
1325 
1326     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1327     if (iscsi_url == NULL) {
1328         error_setg(errp, "Failed to parse URL : %s", filename);
1329         ret = -EINVAL;
1330         goto out;
1331     }
1332 
1333     memset(iscsilun, 0, sizeof(IscsiLun));
1334 
1335     initiator_name = parse_initiator_name(iscsi_url->target);
1336 
1337     iscsi = iscsi_create_context(initiator_name);
1338     if (iscsi == NULL) {
1339         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1340         ret = -ENOMEM;
1341         goto out;
1342     }
1343 
1344     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1345         error_setg(errp, "iSCSI: Failed to set target name.");
1346         ret = -EINVAL;
1347         goto out;
1348     }
1349 
1350     if (iscsi_url->user[0] != '\0') {
1351         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1352                                               iscsi_url->passwd);
1353         if (ret != 0) {
1354             error_setg(errp, "Failed to set initiator username and password");
1355             ret = -EINVAL;
1356             goto out;
1357         }
1358     }
1359 
1360     /* check if we got CHAP username/password via the options */
1361     parse_chap(iscsi, iscsi_url->target, &local_err);
1362     if (local_err != NULL) {
1363         error_propagate(errp, local_err);
1364         ret = -EINVAL;
1365         goto out;
1366     }
1367 
1368     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1369         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1370         ret = -EINVAL;
1371         goto out;
1372     }
1373 
1374     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1375 
1376     /* check if we got HEADER_DIGEST via the options */
1377     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1378     if (local_err != NULL) {
1379         error_propagate(errp, local_err);
1380         ret = -EINVAL;
1381         goto out;
1382     }
1383 
1384     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1385         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1386             iscsi_get_error(iscsi));
1387         ret = -EINVAL;
1388         goto out;
1389     }
1390 
1391     iscsilun->iscsi = iscsi;
1392     iscsilun->aio_context = bdrv_get_aio_context(bs);
1393     iscsilun->lun   = iscsi_url->lun;
1394     iscsilun->has_write_same = true;
1395 
1396     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1397                             (void **) &inq, errp);
1398     if (task == NULL) {
1399         ret = -EINVAL;
1400         goto out;
1401     }
1402     iscsilun->type = inq->periperal_device_type;
1403     scsi_free_scsi_task(task);
1404     task = NULL;
1405 
1406     iscsilun->write_protected = iscsi_is_write_protected(iscsilun);
1407     /* Check the write protect flag of the LUN if we want to write */
1408     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1409         iscsilun->write_protected) {
1410         error_setg(errp, "Cannot open a write protected LUN as read-write");
1411         ret = -EACCES;
1412         goto out;
1413     }
1414 
1415     iscsi_readcapacity_sync(iscsilun, &local_err);
1416     if (local_err != NULL) {
1417         error_propagate(errp, local_err);
1418         ret = -EINVAL;
1419         goto out;
1420     }
1421     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1422     bs->request_alignment = iscsilun->block_size;
1423 
1424     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1425      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1426      * will try to read from the device to guess the image format.
1427      */
1428     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1429         bs->sg = 1;
1430     }
1431 
1432     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1433                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1434                             (void **) &inq_vpd, errp);
1435     if (task == NULL) {
1436         ret = -EINVAL;
1437         goto out;
1438     }
1439     for (i = 0; i < inq_vpd->num_pages; i++) {
1440         struct scsi_task *inq_task;
1441         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1442         struct scsi_inquiry_block_limits *inq_bl;
1443         switch (inq_vpd->pages[i]) {
1444         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1445             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1446                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1447                                         (void **) &inq_lbp, errp);
1448             if (inq_task == NULL) {
1449                 ret = -EINVAL;
1450                 goto out;
1451             }
1452             memcpy(&iscsilun->lbp, inq_lbp,
1453                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1454             scsi_free_scsi_task(inq_task);
1455             break;
1456         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1457             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1458                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1459                                     (void **) &inq_bl, errp);
1460             if (inq_task == NULL) {
1461                 ret = -EINVAL;
1462                 goto out;
1463             }
1464             memcpy(&iscsilun->bl, inq_bl,
1465                    sizeof(struct scsi_inquiry_block_limits));
1466             scsi_free_scsi_task(inq_task);
1467             break;
1468         default:
1469             break;
1470         }
1471     }
1472     scsi_free_scsi_task(task);
1473     task = NULL;
1474 
1475     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1476 
1477     /* Guess the internal cluster (page) size of the iscsi target by the means
1478      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1479      * reasonable size */
1480     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1481         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1482         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1483                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1484         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1485             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1486             if (iscsilun->allocationmap == NULL) {
1487                 ret = -ENOMEM;
1488             }
1489         }
1490     }
1491 
1492 out:
1493     qemu_opts_del(opts);
1494     g_free(initiator_name);
1495     if (iscsi_url != NULL) {
1496         iscsi_destroy_url(iscsi_url);
1497     }
1498     if (task != NULL) {
1499         scsi_free_scsi_task(task);
1500     }
1501 
1502     if (ret) {
1503         if (iscsi != NULL) {
1504             iscsi_destroy_context(iscsi);
1505         }
1506         memset(iscsilun, 0, sizeof(IscsiLun));
1507     }
1508     return ret;
1509 }
1510 
1511 static void iscsi_close(BlockDriverState *bs)
1512 {
1513     IscsiLun *iscsilun = bs->opaque;
1514     struct iscsi_context *iscsi = iscsilun->iscsi;
1515 
1516     iscsi_detach_aio_context(bs);
1517     iscsi_destroy_context(iscsi);
1518     g_free(iscsilun->zeroblock);
1519     g_free(iscsilun->allocationmap);
1520     memset(iscsilun, 0, sizeof(IscsiLun));
1521 }
1522 
1523 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
1524 {
1525     return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
1526 }
1527 
1528 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1529 {
1530     /* We don't actually refresh here, but just return data queried in
1531      * iscsi_open(): iscsi targets don't change their limits. */
1532 
1533     IscsiLun *iscsilun = bs->opaque;
1534     uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1535 
1536     if (iscsilun->bl.max_xfer_len) {
1537         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1538     }
1539 
1540     bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
1541 
1542     if (iscsilun->lbp.lbpu) {
1543         if (iscsilun->bl.max_unmap < 0xffffffff) {
1544             bs->bl.max_discard =
1545                 sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
1546         }
1547         bs->bl.discard_alignment =
1548             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1549     }
1550 
1551     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1552         bs->bl.max_write_zeroes =
1553             sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
1554     }
1555     if (iscsilun->lbp.lbpws) {
1556         bs->bl.write_zeroes_alignment =
1557             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1558     }
1559     bs->bl.opt_transfer_length =
1560         sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
1561 }
1562 
1563 /* Note that this will not re-establish a connection with an iSCSI target - it
1564  * is effectively a NOP.  */
1565 static int iscsi_reopen_prepare(BDRVReopenState *state,
1566                                 BlockReopenQueue *queue, Error **errp)
1567 {
1568     IscsiLun *iscsilun = state->bs->opaque;
1569 
1570     if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1571         error_setg(errp, "Cannot open a write protected LUN as read-write");
1572         return -EACCES;
1573     }
1574     return 0;
1575 }
1576 
1577 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1578 {
1579     IscsiLun *iscsilun = bs->opaque;
1580     Error *local_err = NULL;
1581 
1582     if (iscsilun->type != TYPE_DISK) {
1583         return -ENOTSUP;
1584     }
1585 
1586     iscsi_readcapacity_sync(iscsilun, &local_err);
1587     if (local_err != NULL) {
1588         error_free(local_err);
1589         return -EIO;
1590     }
1591 
1592     if (offset > iscsi_getlength(bs)) {
1593         return -EINVAL;
1594     }
1595 
1596     if (iscsilun->allocationmap != NULL) {
1597         g_free(iscsilun->allocationmap);
1598         iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1599     }
1600 
1601     return 0;
1602 }
1603 
1604 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1605 {
1606     int ret = 0;
1607     int64_t total_size = 0;
1608     BlockDriverState *bs;
1609     IscsiLun *iscsilun = NULL;
1610     QDict *bs_options;
1611 
1612     bs = bdrv_new();
1613 
1614     /* Read out options */
1615     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1616                               BDRV_SECTOR_SIZE);
1617     bs->opaque = g_new0(struct IscsiLun, 1);
1618     iscsilun = bs->opaque;
1619 
1620     bs_options = qdict_new();
1621     qdict_put(bs_options, "filename", qstring_from_str(filename));
1622     ret = iscsi_open(bs, bs_options, 0, NULL);
1623     QDECREF(bs_options);
1624 
1625     if (ret != 0) {
1626         goto out;
1627     }
1628     iscsi_detach_aio_context(bs);
1629     if (iscsilun->type != TYPE_DISK) {
1630         ret = -ENODEV;
1631         goto out;
1632     }
1633     if (bs->total_sectors < total_size) {
1634         ret = -ENOSPC;
1635         goto out;
1636     }
1637 
1638     ret = 0;
1639 out:
1640     if (iscsilun->iscsi != NULL) {
1641         iscsi_destroy_context(iscsilun->iscsi);
1642     }
1643     g_free(bs->opaque);
1644     bs->opaque = NULL;
1645     bdrv_unref(bs);
1646     return ret;
1647 }
1648 
1649 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1650 {
1651     IscsiLun *iscsilun = bs->opaque;
1652     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1653     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1654     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1655     return 0;
1656 }
1657 
1658 static QemuOptsList iscsi_create_opts = {
1659     .name = "iscsi-create-opts",
1660     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1661     .desc = {
1662         {
1663             .name = BLOCK_OPT_SIZE,
1664             .type = QEMU_OPT_SIZE,
1665             .help = "Virtual disk size"
1666         },
1667         { /* end of list */ }
1668     }
1669 };
1670 
1671 static BlockDriver bdrv_iscsi = {
1672     .format_name     = "iscsi",
1673     .protocol_name   = "iscsi",
1674 
1675     .instance_size   = sizeof(IscsiLun),
1676     .bdrv_needs_filename = true,
1677     .bdrv_file_open  = iscsi_open,
1678     .bdrv_close      = iscsi_close,
1679     .bdrv_create     = iscsi_create,
1680     .create_opts     = &iscsi_create_opts,
1681     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1682 
1683     .bdrv_getlength  = iscsi_getlength,
1684     .bdrv_get_info   = iscsi_get_info,
1685     .bdrv_truncate   = iscsi_truncate,
1686     .bdrv_refresh_limits = iscsi_refresh_limits,
1687 
1688     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1689     .bdrv_co_discard      = iscsi_co_discard,
1690     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1691     .bdrv_co_readv         = iscsi_co_readv,
1692     .bdrv_co_writev        = iscsi_co_writev,
1693     .bdrv_co_flush_to_disk = iscsi_co_flush,
1694 
1695 #ifdef __linux__
1696     .bdrv_ioctl       = iscsi_ioctl,
1697     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1698 #endif
1699 
1700     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1701     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1702 };
1703 
1704 static QemuOptsList qemu_iscsi_opts = {
1705     .name = "iscsi",
1706     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1707     .desc = {
1708         {
1709             .name = "user",
1710             .type = QEMU_OPT_STRING,
1711             .help = "username for CHAP authentication to target",
1712         },{
1713             .name = "password",
1714             .type = QEMU_OPT_STRING,
1715             .help = "password for CHAP authentication to target",
1716         },{
1717             .name = "header-digest",
1718             .type = QEMU_OPT_STRING,
1719             .help = "HeaderDigest setting. "
1720                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1721         },{
1722             .name = "initiator-name",
1723             .type = QEMU_OPT_STRING,
1724             .help = "Initiator iqn name to use when connecting",
1725         },
1726         { /* end of list */ }
1727     },
1728 };
1729 
1730 static void iscsi_block_init(void)
1731 {
1732     bdrv_register(&bdrv_iscsi);
1733     qemu_add_opts(&qemu_iscsi_opts);
1734 }
1735 
1736 block_init(iscsi_block_init);
1737