xref: /openbmc/qemu/block/iscsi.c (revision ef6dbf1e)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 
42 #include <iscsi/iscsi.h>
43 #include <iscsi/scsi-lowlevel.h>
44 
45 #ifdef __linux__
46 #include <scsi/sg.h>
47 #include <block/scsi.h>
48 #endif
49 
50 typedef struct IscsiLun {
51     struct iscsi_context *iscsi;
52     AioContext *aio_context;
53     int lun;
54     enum scsi_inquiry_peripheral_device_type type;
55     int block_size;
56     uint64_t num_blocks;
57     int events;
58     QEMUTimer *nop_timer;
59     uint8_t lbpme;
60     uint8_t lbprz;
61     uint8_t has_write_same;
62     struct scsi_inquiry_logical_block_provisioning lbp;
63     struct scsi_inquiry_block_limits bl;
64     unsigned char *zeroblock;
65     unsigned long *allocationmap;
66     int cluster_sectors;
67     bool use_16_for_rw;
68 } IscsiLun;
69 
70 typedef struct IscsiTask {
71     int status;
72     int complete;
73     int retries;
74     int do_retry;
75     struct scsi_task *task;
76     Coroutine *co;
77     QEMUBH *bh;
78     IscsiLun *iscsilun;
79     QEMUTimer retry_timer;
80 } IscsiTask;
81 
82 typedef struct IscsiAIOCB {
83     BlockAIOCB common;
84     QEMUIOVector *qiov;
85     QEMUBH *bh;
86     IscsiLun *iscsilun;
87     struct scsi_task *task;
88     uint8_t *buf;
89     int status;
90     int64_t sector_num;
91     int nb_sectors;
92 #ifdef __linux__
93     sg_io_hdr_t *ioh;
94 #endif
95 } IscsiAIOCB;
96 
97 #define NOP_INTERVAL 5000
98 #define MAX_NOP_FAILURES 3
99 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
100 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
101 
102 /* this threshold is a trade-off knob to choose between
103  * the potential additional overhead of an extra GET_LBA_STATUS request
104  * vs. unnecessarily reading a lot of zero sectors over the wire.
105  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
106  * sectors we check the allocation status of the area covered by the
107  * request first if the allocationmap indicates that the area might be
108  * unallocated. */
109 #define ISCSI_CHECKALLOC_THRES 64
110 
111 static void
112 iscsi_bh_cb(void *p)
113 {
114     IscsiAIOCB *acb = p;
115 
116     qemu_bh_delete(acb->bh);
117 
118     g_free(acb->buf);
119     acb->buf = NULL;
120 
121     acb->common.cb(acb->common.opaque, acb->status);
122 
123     if (acb->task != NULL) {
124         scsi_free_scsi_task(acb->task);
125         acb->task = NULL;
126     }
127 
128     qemu_aio_unref(acb);
129 }
130 
131 static void
132 iscsi_schedule_bh(IscsiAIOCB *acb)
133 {
134     if (acb->bh) {
135         return;
136     }
137     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
138     qemu_bh_schedule(acb->bh);
139 }
140 
141 static void iscsi_co_generic_bh_cb(void *opaque)
142 {
143     struct IscsiTask *iTask = opaque;
144     iTask->complete = 1;
145     qemu_bh_delete(iTask->bh);
146     qemu_coroutine_enter(iTask->co, NULL);
147 }
148 
149 static void iscsi_retry_timer_expired(void *opaque)
150 {
151     struct IscsiTask *iTask = opaque;
152     iTask->complete = 1;
153     if (iTask->co) {
154         qemu_coroutine_enter(iTask->co, NULL);
155     }
156 }
157 
158 static inline unsigned exp_random(double mean)
159 {
160     return -mean * log((double)rand() / RAND_MAX);
161 }
162 
163 static void
164 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
165                         void *command_data, void *opaque)
166 {
167     struct IscsiTask *iTask = opaque;
168     struct scsi_task *task = command_data;
169 
170     iTask->status = status;
171     iTask->do_retry = 0;
172     iTask->task = task;
173 
174     if (status != SCSI_STATUS_GOOD) {
175         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
176             if (status == SCSI_STATUS_CHECK_CONDITION
177                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
178                 error_report("iSCSI CheckCondition: %s",
179                              iscsi_get_error(iscsi));
180                 iTask->do_retry = 1;
181                 goto out;
182             }
183             if (status == SCSI_STATUS_BUSY) {
184                 unsigned retry_time =
185                     exp_random(iscsi_retry_times[iTask->retries - 1]);
186                 error_report("iSCSI Busy (retry #%u in %u ms): %s",
187                              iTask->retries, retry_time,
188                              iscsi_get_error(iscsi));
189                 aio_timer_init(iTask->iscsilun->aio_context,
190                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
191                                SCALE_MS, iscsi_retry_timer_expired, iTask);
192                 timer_mod(&iTask->retry_timer,
193                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
194                 iTask->do_retry = 1;
195                 return;
196             }
197         }
198         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
199     }
200 
201 out:
202     if (iTask->co) {
203         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
204                                iscsi_co_generic_bh_cb, iTask);
205         qemu_bh_schedule(iTask->bh);
206     } else {
207         iTask->complete = 1;
208     }
209 }
210 
211 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
212 {
213     *iTask = (struct IscsiTask) {
214         .co         = qemu_coroutine_self(),
215         .iscsilun   = iscsilun,
216     };
217 }
218 
219 static void
220 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
221                     void *private_data)
222 {
223     IscsiAIOCB *acb = private_data;
224 
225     acb->status = -ECANCELED;
226     iscsi_schedule_bh(acb);
227 }
228 
229 static void
230 iscsi_aio_cancel(BlockAIOCB *blockacb)
231 {
232     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
233     IscsiLun *iscsilun = acb->iscsilun;
234 
235     if (acb->status != -EINPROGRESS) {
236         return;
237     }
238 
239     /* send a task mgmt call to the target to cancel the task on the target */
240     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
241                                      iscsi_abort_task_cb, acb);
242 
243 }
244 
245 static const AIOCBInfo iscsi_aiocb_info = {
246     .aiocb_size         = sizeof(IscsiAIOCB),
247     .cancel_async       = iscsi_aio_cancel,
248 };
249 
250 
251 static void iscsi_process_read(void *arg);
252 static void iscsi_process_write(void *arg);
253 
254 static void
255 iscsi_set_events(IscsiLun *iscsilun)
256 {
257     struct iscsi_context *iscsi = iscsilun->iscsi;
258     int ev;
259 
260     /* We always register a read handler.  */
261     ev = POLLIN;
262     ev |= iscsi_which_events(iscsi);
263     if (ev != iscsilun->events) {
264         aio_set_fd_handler(iscsilun->aio_context,
265                            iscsi_get_fd(iscsi),
266                            iscsi_process_read,
267                            (ev & POLLOUT) ? iscsi_process_write : NULL,
268                            iscsilun);
269 
270     }
271 
272     iscsilun->events = ev;
273 }
274 
275 static void
276 iscsi_process_read(void *arg)
277 {
278     IscsiLun *iscsilun = arg;
279     struct iscsi_context *iscsi = iscsilun->iscsi;
280 
281     iscsi_service(iscsi, POLLIN);
282     iscsi_set_events(iscsilun);
283 }
284 
285 static void
286 iscsi_process_write(void *arg)
287 {
288     IscsiLun *iscsilun = arg;
289     struct iscsi_context *iscsi = iscsilun->iscsi;
290 
291     iscsi_service(iscsi, POLLOUT);
292     iscsi_set_events(iscsilun);
293 }
294 
295 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
296 {
297     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
298 }
299 
300 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
301 {
302     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
303 }
304 
305 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
306                                       IscsiLun *iscsilun)
307 {
308     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
309         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
310             error_report("iSCSI misaligned request: "
311                          "iscsilun->block_size %u, sector_num %" PRIi64
312                          ", nb_sectors %d",
313                          iscsilun->block_size, sector_num, nb_sectors);
314             return 0;
315     }
316     return 1;
317 }
318 
319 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
320 {
321     return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
322                                                        iscsilun),
323                                        iscsilun->cluster_sectors));
324 }
325 
326 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
327                                     int nb_sectors)
328 {
329     if (iscsilun->allocationmap == NULL) {
330         return;
331     }
332     bitmap_set(iscsilun->allocationmap,
333                sector_num / iscsilun->cluster_sectors,
334                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
335 }
336 
337 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
338                                       int nb_sectors)
339 {
340     int64_t cluster_num, nb_clusters;
341     if (iscsilun->allocationmap == NULL) {
342         return;
343     }
344     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
345     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
346                   - cluster_num;
347     if (nb_clusters > 0) {
348         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
349     }
350 }
351 
352 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
353                                         int64_t sector_num, int nb_sectors,
354                                         QEMUIOVector *iov)
355 {
356     IscsiLun *iscsilun = bs->opaque;
357     struct IscsiTask iTask;
358     uint64_t lba;
359     uint32_t num_sectors;
360 
361     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
362         return -EINVAL;
363     }
364 
365     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
366         error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
367                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
368         return -EINVAL;
369     }
370 
371     lba = sector_qemu2lun(sector_num, iscsilun);
372     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
373     iscsi_co_init_iscsitask(iscsilun, &iTask);
374 retry:
375     if (iscsilun->use_16_for_rw) {
376         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
377                                         NULL, num_sectors * iscsilun->block_size,
378                                         iscsilun->block_size, 0, 0, 0, 0, 0,
379                                         iscsi_co_generic_cb, &iTask);
380     } else {
381         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
382                                         NULL, num_sectors * iscsilun->block_size,
383                                         iscsilun->block_size, 0, 0, 0, 0, 0,
384                                         iscsi_co_generic_cb, &iTask);
385     }
386     if (iTask.task == NULL) {
387         return -ENOMEM;
388     }
389     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
390                           iov->niov);
391     while (!iTask.complete) {
392         iscsi_set_events(iscsilun);
393         qemu_coroutine_yield();
394     }
395 
396     if (iTask.task != NULL) {
397         scsi_free_scsi_task(iTask.task);
398         iTask.task = NULL;
399     }
400 
401     if (iTask.do_retry) {
402         iTask.complete = 0;
403         goto retry;
404     }
405 
406     if (iTask.status != SCSI_STATUS_GOOD) {
407         return -EIO;
408     }
409 
410     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
411 
412     return 0;
413 }
414 
415 
416 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
417                                              int64_t sector_num, int nb_sectors)
418 {
419     unsigned long size;
420     if (iscsilun->allocationmap == NULL) {
421         return true;
422     }
423     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
424     return !(find_next_bit(iscsilun->allocationmap, size,
425                            sector_num / iscsilun->cluster_sectors) == size);
426 }
427 
428 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
429                                                   int64_t sector_num,
430                                                   int nb_sectors, int *pnum)
431 {
432     IscsiLun *iscsilun = bs->opaque;
433     struct scsi_get_lba_status *lbas = NULL;
434     struct scsi_lba_status_descriptor *lbasd = NULL;
435     struct IscsiTask iTask;
436     int64_t ret;
437 
438     iscsi_co_init_iscsitask(iscsilun, &iTask);
439 
440     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
441         ret = -EINVAL;
442         goto out;
443     }
444 
445     /* default to all sectors allocated */
446     ret = BDRV_BLOCK_DATA;
447     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
448     *pnum = nb_sectors;
449 
450     /* LUN does not support logical block provisioning */
451     if (iscsilun->lbpme == 0) {
452         goto out;
453     }
454 
455 retry:
456     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
457                                   sector_qemu2lun(sector_num, iscsilun),
458                                   8 + 16, iscsi_co_generic_cb,
459                                   &iTask) == NULL) {
460         ret = -ENOMEM;
461         goto out;
462     }
463 
464     while (!iTask.complete) {
465         iscsi_set_events(iscsilun);
466         qemu_coroutine_yield();
467     }
468 
469     if (iTask.do_retry) {
470         if (iTask.task != NULL) {
471             scsi_free_scsi_task(iTask.task);
472             iTask.task = NULL;
473         }
474         iTask.complete = 0;
475         goto retry;
476     }
477 
478     if (iTask.status != SCSI_STATUS_GOOD) {
479         /* in case the get_lba_status_callout fails (i.e.
480          * because the device is busy or the cmd is not
481          * supported) we pretend all blocks are allocated
482          * for backwards compatibility */
483         goto out;
484     }
485 
486     lbas = scsi_datain_unmarshall(iTask.task);
487     if (lbas == NULL) {
488         ret = -EIO;
489         goto out;
490     }
491 
492     lbasd = &lbas->descriptors[0];
493 
494     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
495         ret = -EIO;
496         goto out;
497     }
498 
499     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
500 
501     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
502         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
503         ret &= ~BDRV_BLOCK_DATA;
504         if (iscsilun->lbprz) {
505             ret |= BDRV_BLOCK_ZERO;
506         }
507     }
508 
509     if (ret & BDRV_BLOCK_ZERO) {
510         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
511     } else {
512         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
513     }
514 
515     if (*pnum > nb_sectors) {
516         *pnum = nb_sectors;
517     }
518 out:
519     if (iTask.task != NULL) {
520         scsi_free_scsi_task(iTask.task);
521     }
522     return ret;
523 }
524 
525 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
526                                        int64_t sector_num, int nb_sectors,
527                                        QEMUIOVector *iov)
528 {
529     IscsiLun *iscsilun = bs->opaque;
530     struct IscsiTask iTask;
531     uint64_t lba;
532     uint32_t num_sectors;
533 
534     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
535         return -EINVAL;
536     }
537 
538     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
539         error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
540                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
541         return -EINVAL;
542     }
543 
544     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
545         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
546         int64_t ret;
547         int pnum;
548         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
549         if (ret < 0) {
550             return ret;
551         }
552         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
553             qemu_iovec_memset(iov, 0, 0x00, iov->size);
554             return 0;
555         }
556     }
557 
558     lba = sector_qemu2lun(sector_num, iscsilun);
559     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
560 
561     iscsi_co_init_iscsitask(iscsilun, &iTask);
562 retry:
563     if (iscsilun->use_16_for_rw) {
564         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
565                                        num_sectors * iscsilun->block_size,
566                                        iscsilun->block_size, 0, 0, 0, 0, 0,
567                                        iscsi_co_generic_cb, &iTask);
568     } else {
569         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
570                                        num_sectors * iscsilun->block_size,
571                                        iscsilun->block_size,
572                                        0, 0, 0, 0, 0,
573                                        iscsi_co_generic_cb, &iTask);
574     }
575     if (iTask.task == NULL) {
576         return -ENOMEM;
577     }
578     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
579 
580     while (!iTask.complete) {
581         iscsi_set_events(iscsilun);
582         qemu_coroutine_yield();
583     }
584 
585     if (iTask.task != NULL) {
586         scsi_free_scsi_task(iTask.task);
587         iTask.task = NULL;
588     }
589 
590     if (iTask.do_retry) {
591         iTask.complete = 0;
592         goto retry;
593     }
594 
595     if (iTask.status != SCSI_STATUS_GOOD) {
596         return -EIO;
597     }
598 
599     return 0;
600 }
601 
602 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
603 {
604     IscsiLun *iscsilun = bs->opaque;
605     struct IscsiTask iTask;
606 
607     if (bs->sg) {
608         return 0;
609     }
610 
611     iscsi_co_init_iscsitask(iscsilun, &iTask);
612 
613 retry:
614     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
615                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
616         return -ENOMEM;
617     }
618 
619     while (!iTask.complete) {
620         iscsi_set_events(iscsilun);
621         qemu_coroutine_yield();
622     }
623 
624     if (iTask.task != NULL) {
625         scsi_free_scsi_task(iTask.task);
626         iTask.task = NULL;
627     }
628 
629     if (iTask.do_retry) {
630         iTask.complete = 0;
631         goto retry;
632     }
633 
634     if (iTask.status != SCSI_STATUS_GOOD) {
635         return -EIO;
636     }
637 
638     return 0;
639 }
640 
641 #ifdef __linux__
642 static void
643 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
644                      void *command_data, void *opaque)
645 {
646     IscsiAIOCB *acb = opaque;
647 
648     g_free(acb->buf);
649     acb->buf = NULL;
650 
651     acb->status = 0;
652     if (status < 0) {
653         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
654                      iscsi_get_error(iscsi));
655         acb->status = -EIO;
656     }
657 
658     acb->ioh->driver_status = 0;
659     acb->ioh->host_status   = 0;
660     acb->ioh->resid         = 0;
661 
662 #define SG_ERR_DRIVER_SENSE    0x08
663 
664     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
665         int ss;
666 
667         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
668 
669         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
670         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
671              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
672         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
673     }
674 
675     iscsi_schedule_bh(acb);
676 }
677 
678 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
679         unsigned long int req, void *buf,
680         BlockCompletionFunc *cb, void *opaque)
681 {
682     IscsiLun *iscsilun = bs->opaque;
683     struct iscsi_context *iscsi = iscsilun->iscsi;
684     struct iscsi_data data;
685     IscsiAIOCB *acb;
686 
687     assert(req == SG_IO);
688 
689     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
690 
691     acb->iscsilun = iscsilun;
692     acb->bh          = NULL;
693     acb->status      = -EINPROGRESS;
694     acb->buf         = NULL;
695     acb->ioh         = buf;
696 
697     acb->task = malloc(sizeof(struct scsi_task));
698     if (acb->task == NULL) {
699         error_report("iSCSI: Failed to allocate task for scsi command. %s",
700                      iscsi_get_error(iscsi));
701         qemu_aio_unref(acb);
702         return NULL;
703     }
704     memset(acb->task, 0, sizeof(struct scsi_task));
705 
706     switch (acb->ioh->dxfer_direction) {
707     case SG_DXFER_TO_DEV:
708         acb->task->xfer_dir = SCSI_XFER_WRITE;
709         break;
710     case SG_DXFER_FROM_DEV:
711         acb->task->xfer_dir = SCSI_XFER_READ;
712         break;
713     default:
714         acb->task->xfer_dir = SCSI_XFER_NONE;
715         break;
716     }
717 
718     acb->task->cdb_size = acb->ioh->cmd_len;
719     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
720     acb->task->expxferlen = acb->ioh->dxfer_len;
721 
722     data.size = 0;
723     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
724         if (acb->ioh->iovec_count == 0) {
725             data.data = acb->ioh->dxferp;
726             data.size = acb->ioh->dxfer_len;
727         } else {
728             scsi_task_set_iov_out(acb->task,
729                                  (struct scsi_iovec *) acb->ioh->dxferp,
730                                  acb->ioh->iovec_count);
731         }
732     }
733 
734     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
735                                  iscsi_aio_ioctl_cb,
736                                  (data.size > 0) ? &data : NULL,
737                                  acb) != 0) {
738         scsi_free_scsi_task(acb->task);
739         qemu_aio_unref(acb);
740         return NULL;
741     }
742 
743     /* tell libiscsi to read straight into the buffer we got from ioctl */
744     if (acb->task->xfer_dir == SCSI_XFER_READ) {
745         if (acb->ioh->iovec_count == 0) {
746             scsi_task_add_data_in_buffer(acb->task,
747                                          acb->ioh->dxfer_len,
748                                          acb->ioh->dxferp);
749         } else {
750             scsi_task_set_iov_in(acb->task,
751                                  (struct scsi_iovec *) acb->ioh->dxferp,
752                                  acb->ioh->iovec_count);
753         }
754     }
755 
756     iscsi_set_events(iscsilun);
757 
758     return &acb->common;
759 }
760 
761 static void ioctl_cb(void *opaque, int status)
762 {
763     int *p_status = opaque;
764     *p_status = status;
765 }
766 
767 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
768 {
769     IscsiLun *iscsilun = bs->opaque;
770     int status;
771 
772     switch (req) {
773     case SG_GET_VERSION_NUM:
774         *(int *)buf = 30000;
775         break;
776     case SG_GET_SCSI_ID:
777         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
778         break;
779     case SG_IO:
780         status = -EINPROGRESS;
781         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
782 
783         while (status == -EINPROGRESS) {
784             aio_poll(iscsilun->aio_context, true);
785         }
786 
787         return 0;
788     default:
789         return -1;
790     }
791     return 0;
792 }
793 #endif
794 
795 static int64_t
796 iscsi_getlength(BlockDriverState *bs)
797 {
798     IscsiLun *iscsilun = bs->opaque;
799     int64_t len;
800 
801     len  = iscsilun->num_blocks;
802     len *= iscsilun->block_size;
803 
804     return len;
805 }
806 
807 static int
808 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
809                                    int nb_sectors)
810 {
811     IscsiLun *iscsilun = bs->opaque;
812     struct IscsiTask iTask;
813     struct unmap_list list;
814 
815     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
816         return -EINVAL;
817     }
818 
819     if (!iscsilun->lbp.lbpu) {
820         /* UNMAP is not supported by the target */
821         return 0;
822     }
823 
824     list.lba = sector_qemu2lun(sector_num, iscsilun);
825     list.num = sector_qemu2lun(nb_sectors, iscsilun);
826 
827     iscsi_co_init_iscsitask(iscsilun, &iTask);
828 retry:
829     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
830                      iscsi_co_generic_cb, &iTask) == NULL) {
831         return -ENOMEM;
832     }
833 
834     while (!iTask.complete) {
835         iscsi_set_events(iscsilun);
836         qemu_coroutine_yield();
837     }
838 
839     if (iTask.task != NULL) {
840         scsi_free_scsi_task(iTask.task);
841         iTask.task = NULL;
842     }
843 
844     if (iTask.do_retry) {
845         iTask.complete = 0;
846         goto retry;
847     }
848 
849     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
850         /* the target might fail with a check condition if it
851            is not happy with the alignment of the UNMAP request
852            we silently fail in this case */
853         return 0;
854     }
855 
856     if (iTask.status != SCSI_STATUS_GOOD) {
857         return -EIO;
858     }
859 
860     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
861 
862     return 0;
863 }
864 
865 static int
866 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
867                                    int nb_sectors, BdrvRequestFlags flags)
868 {
869     IscsiLun *iscsilun = bs->opaque;
870     struct IscsiTask iTask;
871     uint64_t lba;
872     uint32_t nb_blocks;
873     bool use_16_for_ws = iscsilun->use_16_for_rw;
874 
875     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
876         return -EINVAL;
877     }
878 
879     if (flags & BDRV_REQ_MAY_UNMAP) {
880         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
881             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
882             use_16_for_ws = true;
883         }
884         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
885             /* WRITESAME16 with UNMAP is not supported by the target,
886              * fall back and try WRITESAME10/16 without UNMAP */
887             flags &= ~BDRV_REQ_MAY_UNMAP;
888             use_16_for_ws = iscsilun->use_16_for_rw;
889         }
890     }
891 
892     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
893         /* WRITESAME without UNMAP is not supported by the target */
894         return -ENOTSUP;
895     }
896 
897     lba = sector_qemu2lun(sector_num, iscsilun);
898     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
899 
900     if (iscsilun->zeroblock == NULL) {
901         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
902         if (iscsilun->zeroblock == NULL) {
903             return -ENOMEM;
904         }
905     }
906 
907     iscsi_co_init_iscsitask(iscsilun, &iTask);
908 retry:
909     if (use_16_for_ws) {
910         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
911                                             iscsilun->zeroblock, iscsilun->block_size,
912                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
913                                             0, 0, iscsi_co_generic_cb, &iTask);
914     } else {
915         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
916                                             iscsilun->zeroblock, iscsilun->block_size,
917                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
918                                             0, 0, iscsi_co_generic_cb, &iTask);
919     }
920     if (iTask.task == NULL) {
921         return -ENOMEM;
922     }
923 
924     while (!iTask.complete) {
925         iscsi_set_events(iscsilun);
926         qemu_coroutine_yield();
927     }
928 
929     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
930         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
931         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
932          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
933         /* WRITE SAME is not supported by the target */
934         iscsilun->has_write_same = false;
935         scsi_free_scsi_task(iTask.task);
936         return -ENOTSUP;
937     }
938 
939     if (iTask.task != NULL) {
940         scsi_free_scsi_task(iTask.task);
941         iTask.task = NULL;
942     }
943 
944     if (iTask.do_retry) {
945         iTask.complete = 0;
946         goto retry;
947     }
948 
949     if (iTask.status != SCSI_STATUS_GOOD) {
950         return -EIO;
951     }
952 
953     if (flags & BDRV_REQ_MAY_UNMAP) {
954         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
955     } else {
956         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
957     }
958 
959     return 0;
960 }
961 
962 static void parse_chap(struct iscsi_context *iscsi, const char *target,
963                        Error **errp)
964 {
965     QemuOptsList *list;
966     QemuOpts *opts;
967     const char *user = NULL;
968     const char *password = NULL;
969 
970     list = qemu_find_opts("iscsi");
971     if (!list) {
972         return;
973     }
974 
975     opts = qemu_opts_find(list, target);
976     if (opts == NULL) {
977         opts = QTAILQ_FIRST(&list->head);
978         if (!opts) {
979             return;
980         }
981     }
982 
983     user = qemu_opt_get(opts, "user");
984     if (!user) {
985         return;
986     }
987 
988     password = qemu_opt_get(opts, "password");
989     if (!password) {
990         error_setg(errp, "CHAP username specified but no password was given");
991         return;
992     }
993 
994     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
995         error_setg(errp, "Failed to set initiator username and password");
996     }
997 }
998 
999 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1000                                 Error **errp)
1001 {
1002     QemuOptsList *list;
1003     QemuOpts *opts;
1004     const char *digest = NULL;
1005 
1006     list = qemu_find_opts("iscsi");
1007     if (!list) {
1008         return;
1009     }
1010 
1011     opts = qemu_opts_find(list, target);
1012     if (opts == NULL) {
1013         opts = QTAILQ_FIRST(&list->head);
1014         if (!opts) {
1015             return;
1016         }
1017     }
1018 
1019     digest = qemu_opt_get(opts, "header-digest");
1020     if (!digest) {
1021         return;
1022     }
1023 
1024     if (!strcmp(digest, "CRC32C")) {
1025         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1026     } else if (!strcmp(digest, "NONE")) {
1027         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1028     } else if (!strcmp(digest, "CRC32C-NONE")) {
1029         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1030     } else if (!strcmp(digest, "NONE-CRC32C")) {
1031         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1032     } else {
1033         error_setg(errp, "Invalid header-digest setting : %s", digest);
1034     }
1035 }
1036 
1037 static char *parse_initiator_name(const char *target)
1038 {
1039     QemuOptsList *list;
1040     QemuOpts *opts;
1041     const char *name;
1042     char *iscsi_name;
1043     UuidInfo *uuid_info;
1044 
1045     list = qemu_find_opts("iscsi");
1046     if (list) {
1047         opts = qemu_opts_find(list, target);
1048         if (!opts) {
1049             opts = QTAILQ_FIRST(&list->head);
1050         }
1051         if (opts) {
1052             name = qemu_opt_get(opts, "initiator-name");
1053             if (name) {
1054                 return g_strdup(name);
1055             }
1056         }
1057     }
1058 
1059     uuid_info = qmp_query_uuid(NULL);
1060     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1061         name = qemu_get_vm_name();
1062     } else {
1063         name = uuid_info->UUID;
1064     }
1065     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1066                                  name ? ":" : "", name ? name : "");
1067     qapi_free_UuidInfo(uuid_info);
1068     return iscsi_name;
1069 }
1070 
1071 static void iscsi_nop_timed_event(void *opaque)
1072 {
1073     IscsiLun *iscsilun = opaque;
1074 
1075     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1076         error_report("iSCSI: NOP timeout. Reconnecting...");
1077         iscsi_reconnect(iscsilun->iscsi);
1078     }
1079 
1080     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1081         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1082         return;
1083     }
1084 
1085     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1086     iscsi_set_events(iscsilun);
1087 }
1088 
1089 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1090 {
1091     struct scsi_task *task = NULL;
1092     struct scsi_readcapacity10 *rc10 = NULL;
1093     struct scsi_readcapacity16 *rc16 = NULL;
1094     int retries = ISCSI_CMD_RETRIES;
1095 
1096     do {
1097         if (task != NULL) {
1098             scsi_free_scsi_task(task);
1099             task = NULL;
1100         }
1101 
1102         switch (iscsilun->type) {
1103         case TYPE_DISK:
1104             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1105             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1106                 rc16 = scsi_datain_unmarshall(task);
1107                 if (rc16 == NULL) {
1108                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1109                 } else {
1110                     iscsilun->block_size = rc16->block_length;
1111                     iscsilun->num_blocks = rc16->returned_lba + 1;
1112                     iscsilun->lbpme = rc16->lbpme;
1113                     iscsilun->lbprz = rc16->lbprz;
1114                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1115                 }
1116             }
1117             break;
1118         case TYPE_ROM:
1119             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1120             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1121                 rc10 = scsi_datain_unmarshall(task);
1122                 if (rc10 == NULL) {
1123                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1124                 } else {
1125                     iscsilun->block_size = rc10->block_size;
1126                     if (rc10->lba == 0) {
1127                         /* blank disk loaded */
1128                         iscsilun->num_blocks = 0;
1129                     } else {
1130                         iscsilun->num_blocks = rc10->lba + 1;
1131                     }
1132                 }
1133             }
1134             break;
1135         default:
1136             return;
1137         }
1138     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1139              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1140              && retries-- > 0);
1141 
1142     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1143         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1144     }
1145     if (task) {
1146         scsi_free_scsi_task(task);
1147     }
1148 }
1149 
1150 /* TODO Convert to fine grained options */
1151 static QemuOptsList runtime_opts = {
1152     .name = "iscsi",
1153     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1154     .desc = {
1155         {
1156             .name = "filename",
1157             .type = QEMU_OPT_STRING,
1158             .help = "URL to the iscsi image",
1159         },
1160         { /* end of list */ }
1161     },
1162 };
1163 
1164 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1165                                           int evpd, int pc, void **inq, Error **errp)
1166 {
1167     int full_size;
1168     struct scsi_task *task = NULL;
1169     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1170     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1171         goto fail;
1172     }
1173     full_size = scsi_datain_getfullsize(task);
1174     if (full_size > task->datain.size) {
1175         scsi_free_scsi_task(task);
1176 
1177         /* we need more data for the full list */
1178         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1179         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1180             goto fail;
1181         }
1182     }
1183 
1184     *inq = scsi_datain_unmarshall(task);
1185     if (*inq == NULL) {
1186         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1187         goto fail_with_err;
1188     }
1189 
1190     return task;
1191 
1192 fail:
1193     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1194                iscsi_get_error(iscsi));
1195 fail_with_err:
1196     if (task != NULL) {
1197         scsi_free_scsi_task(task);
1198     }
1199     return NULL;
1200 }
1201 
1202 static void iscsi_detach_aio_context(BlockDriverState *bs)
1203 {
1204     IscsiLun *iscsilun = bs->opaque;
1205 
1206     aio_set_fd_handler(iscsilun->aio_context,
1207                        iscsi_get_fd(iscsilun->iscsi),
1208                        NULL, NULL, NULL);
1209     iscsilun->events = 0;
1210 
1211     if (iscsilun->nop_timer) {
1212         timer_del(iscsilun->nop_timer);
1213         timer_free(iscsilun->nop_timer);
1214         iscsilun->nop_timer = NULL;
1215     }
1216 }
1217 
1218 static void iscsi_attach_aio_context(BlockDriverState *bs,
1219                                      AioContext *new_context)
1220 {
1221     IscsiLun *iscsilun = bs->opaque;
1222 
1223     iscsilun->aio_context = new_context;
1224     iscsi_set_events(iscsilun);
1225 
1226     /* Set up a timer for sending out iSCSI NOPs */
1227     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1228                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1229                                         iscsi_nop_timed_event, iscsilun);
1230     timer_mod(iscsilun->nop_timer,
1231               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1232 }
1233 
1234 /*
1235  * We support iscsi url's on the form
1236  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1237  *
1238  * Note: flags are currently not used by iscsi_open.  If this function
1239  * is changed such that flags are used, please examine iscsi_reopen_prepare()
1240  * to see if needs to be changed as well.
1241  */
1242 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1243                       Error **errp)
1244 {
1245     IscsiLun *iscsilun = bs->opaque;
1246     struct iscsi_context *iscsi = NULL;
1247     struct iscsi_url *iscsi_url = NULL;
1248     struct scsi_task *task = NULL;
1249     struct scsi_inquiry_standard *inq = NULL;
1250     struct scsi_inquiry_supported_pages *inq_vpd;
1251     char *initiator_name = NULL;
1252     QemuOpts *opts;
1253     Error *local_err = NULL;
1254     const char *filename;
1255     int i, ret;
1256 
1257     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1258         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1259                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1260                    "of 512", BDRV_SECTOR_SIZE);
1261         return -EINVAL;
1262     }
1263 
1264     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1265     qemu_opts_absorb_qdict(opts, options, &local_err);
1266     if (local_err) {
1267         error_propagate(errp, local_err);
1268         ret = -EINVAL;
1269         goto out;
1270     }
1271 
1272     filename = qemu_opt_get(opts, "filename");
1273 
1274     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1275     if (iscsi_url == NULL) {
1276         error_setg(errp, "Failed to parse URL : %s", filename);
1277         ret = -EINVAL;
1278         goto out;
1279     }
1280 
1281     memset(iscsilun, 0, sizeof(IscsiLun));
1282 
1283     initiator_name = parse_initiator_name(iscsi_url->target);
1284 
1285     iscsi = iscsi_create_context(initiator_name);
1286     if (iscsi == NULL) {
1287         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1288         ret = -ENOMEM;
1289         goto out;
1290     }
1291 
1292     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1293         error_setg(errp, "iSCSI: Failed to set target name.");
1294         ret = -EINVAL;
1295         goto out;
1296     }
1297 
1298     if (iscsi_url->user != NULL) {
1299         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1300                                               iscsi_url->passwd);
1301         if (ret != 0) {
1302             error_setg(errp, "Failed to set initiator username and password");
1303             ret = -EINVAL;
1304             goto out;
1305         }
1306     }
1307 
1308     /* check if we got CHAP username/password via the options */
1309     parse_chap(iscsi, iscsi_url->target, &local_err);
1310     if (local_err != NULL) {
1311         error_propagate(errp, local_err);
1312         ret = -EINVAL;
1313         goto out;
1314     }
1315 
1316     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1317         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1318         ret = -EINVAL;
1319         goto out;
1320     }
1321 
1322     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1323 
1324     /* check if we got HEADER_DIGEST via the options */
1325     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1326     if (local_err != NULL) {
1327         error_propagate(errp, local_err);
1328         ret = -EINVAL;
1329         goto out;
1330     }
1331 
1332     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1333         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1334             iscsi_get_error(iscsi));
1335         ret = -EINVAL;
1336         goto out;
1337     }
1338 
1339     iscsilun->iscsi = iscsi;
1340     iscsilun->aio_context = bdrv_get_aio_context(bs);
1341     iscsilun->lun   = iscsi_url->lun;
1342     iscsilun->has_write_same = true;
1343 
1344     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1345                             (void **) &inq, errp);
1346     if (task == NULL) {
1347         ret = -EINVAL;
1348         goto out;
1349     }
1350     iscsilun->type = inq->periperal_device_type;
1351     scsi_free_scsi_task(task);
1352     task = NULL;
1353 
1354     iscsi_readcapacity_sync(iscsilun, &local_err);
1355     if (local_err != NULL) {
1356         error_propagate(errp, local_err);
1357         ret = -EINVAL;
1358         goto out;
1359     }
1360     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1361     bs->request_alignment = iscsilun->block_size;
1362 
1363     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1364      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1365      * will try to read from the device to guess the image format.
1366      */
1367     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1368         bs->sg = 1;
1369     }
1370 
1371     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1372                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1373                             (void **) &inq_vpd, errp);
1374     if (task == NULL) {
1375         ret = -EINVAL;
1376         goto out;
1377     }
1378     for (i = 0; i < inq_vpd->num_pages; i++) {
1379         struct scsi_task *inq_task;
1380         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1381         struct scsi_inquiry_block_limits *inq_bl;
1382         switch (inq_vpd->pages[i]) {
1383         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1384             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1385                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1386                                         (void **) &inq_lbp, errp);
1387             if (inq_task == NULL) {
1388                 ret = -EINVAL;
1389                 goto out;
1390             }
1391             memcpy(&iscsilun->lbp, inq_lbp,
1392                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1393             scsi_free_scsi_task(inq_task);
1394             break;
1395         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1396             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1397                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1398                                     (void **) &inq_bl, errp);
1399             if (inq_task == NULL) {
1400                 ret = -EINVAL;
1401                 goto out;
1402             }
1403             memcpy(&iscsilun->bl, inq_bl,
1404                    sizeof(struct scsi_inquiry_block_limits));
1405             scsi_free_scsi_task(inq_task);
1406             break;
1407         default:
1408             break;
1409         }
1410     }
1411     scsi_free_scsi_task(task);
1412     task = NULL;
1413 
1414     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1415 
1416     /* Guess the internal cluster (page) size of the iscsi target by the means
1417      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1418      * reasonable size */
1419     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1420         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1421         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1422                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1423         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1424             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1425             if (iscsilun->allocationmap == NULL) {
1426                 ret = -ENOMEM;
1427             }
1428         }
1429     }
1430 
1431 out:
1432     qemu_opts_del(opts);
1433     g_free(initiator_name);
1434     if (iscsi_url != NULL) {
1435         iscsi_destroy_url(iscsi_url);
1436     }
1437     if (task != NULL) {
1438         scsi_free_scsi_task(task);
1439     }
1440 
1441     if (ret) {
1442         if (iscsi != NULL) {
1443             iscsi_destroy_context(iscsi);
1444         }
1445         memset(iscsilun, 0, sizeof(IscsiLun));
1446     }
1447     return ret;
1448 }
1449 
1450 static void iscsi_close(BlockDriverState *bs)
1451 {
1452     IscsiLun *iscsilun = bs->opaque;
1453     struct iscsi_context *iscsi = iscsilun->iscsi;
1454 
1455     iscsi_detach_aio_context(bs);
1456     iscsi_destroy_context(iscsi);
1457     g_free(iscsilun->zeroblock);
1458     g_free(iscsilun->allocationmap);
1459     memset(iscsilun, 0, sizeof(IscsiLun));
1460 }
1461 
1462 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
1463 {
1464     return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
1465 }
1466 
1467 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1468 {
1469     /* We don't actually refresh here, but just return data queried in
1470      * iscsi_open(): iscsi targets don't change their limits. */
1471 
1472     IscsiLun *iscsilun = bs->opaque;
1473     uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1474 
1475     if (iscsilun->bl.max_xfer_len) {
1476         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1477     }
1478 
1479     bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
1480 
1481     if (iscsilun->lbp.lbpu) {
1482         if (iscsilun->bl.max_unmap < 0xffffffff) {
1483             bs->bl.max_discard =
1484                 sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
1485         }
1486         bs->bl.discard_alignment =
1487             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1488     }
1489 
1490     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1491         bs->bl.max_write_zeroes =
1492             sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
1493     }
1494     if (iscsilun->lbp.lbpws) {
1495         bs->bl.write_zeroes_alignment =
1496             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1497     }
1498     bs->bl.opt_transfer_length =
1499         sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
1500 }
1501 
1502 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
1503  * prepare.  Note that this will not re-establish a connection with an iSCSI
1504  * target - it is effectively a NOP.  */
1505 static int iscsi_reopen_prepare(BDRVReopenState *state,
1506                                 BlockReopenQueue *queue, Error **errp)
1507 {
1508     /* NOP */
1509     return 0;
1510 }
1511 
1512 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1513 {
1514     IscsiLun *iscsilun = bs->opaque;
1515     Error *local_err = NULL;
1516 
1517     if (iscsilun->type != TYPE_DISK) {
1518         return -ENOTSUP;
1519     }
1520 
1521     iscsi_readcapacity_sync(iscsilun, &local_err);
1522     if (local_err != NULL) {
1523         error_free(local_err);
1524         return -EIO;
1525     }
1526 
1527     if (offset > iscsi_getlength(bs)) {
1528         return -EINVAL;
1529     }
1530 
1531     if (iscsilun->allocationmap != NULL) {
1532         g_free(iscsilun->allocationmap);
1533         iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1534     }
1535 
1536     return 0;
1537 }
1538 
1539 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1540 {
1541     int ret = 0;
1542     int64_t total_size = 0;
1543     BlockDriverState *bs;
1544     IscsiLun *iscsilun = NULL;
1545     QDict *bs_options;
1546 
1547     bs = bdrv_new();
1548 
1549     /* Read out options */
1550     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1551                               BDRV_SECTOR_SIZE);
1552     bs->opaque = g_new0(struct IscsiLun, 1);
1553     iscsilun = bs->opaque;
1554 
1555     bs_options = qdict_new();
1556     qdict_put(bs_options, "filename", qstring_from_str(filename));
1557     ret = iscsi_open(bs, bs_options, 0, NULL);
1558     QDECREF(bs_options);
1559 
1560     if (ret != 0) {
1561         goto out;
1562     }
1563     iscsi_detach_aio_context(bs);
1564     if (iscsilun->type != TYPE_DISK) {
1565         ret = -ENODEV;
1566         goto out;
1567     }
1568     if (bs->total_sectors < total_size) {
1569         ret = -ENOSPC;
1570         goto out;
1571     }
1572 
1573     ret = 0;
1574 out:
1575     if (iscsilun->iscsi != NULL) {
1576         iscsi_destroy_context(iscsilun->iscsi);
1577     }
1578     g_free(bs->opaque);
1579     bs->opaque = NULL;
1580     bdrv_unref(bs);
1581     return ret;
1582 }
1583 
1584 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1585 {
1586     IscsiLun *iscsilun = bs->opaque;
1587     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1588     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1589     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1590     return 0;
1591 }
1592 
1593 static QemuOptsList iscsi_create_opts = {
1594     .name = "iscsi-create-opts",
1595     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1596     .desc = {
1597         {
1598             .name = BLOCK_OPT_SIZE,
1599             .type = QEMU_OPT_SIZE,
1600             .help = "Virtual disk size"
1601         },
1602         { /* end of list */ }
1603     }
1604 };
1605 
1606 static BlockDriver bdrv_iscsi = {
1607     .format_name     = "iscsi",
1608     .protocol_name   = "iscsi",
1609 
1610     .instance_size   = sizeof(IscsiLun),
1611     .bdrv_needs_filename = true,
1612     .bdrv_file_open  = iscsi_open,
1613     .bdrv_close      = iscsi_close,
1614     .bdrv_create     = iscsi_create,
1615     .create_opts     = &iscsi_create_opts,
1616     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1617 
1618     .bdrv_getlength  = iscsi_getlength,
1619     .bdrv_get_info   = iscsi_get_info,
1620     .bdrv_truncate   = iscsi_truncate,
1621     .bdrv_refresh_limits = iscsi_refresh_limits,
1622 
1623     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1624     .bdrv_co_discard      = iscsi_co_discard,
1625     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1626     .bdrv_co_readv         = iscsi_co_readv,
1627     .bdrv_co_writev        = iscsi_co_writev,
1628     .bdrv_co_flush_to_disk = iscsi_co_flush,
1629 
1630 #ifdef __linux__
1631     .bdrv_ioctl       = iscsi_ioctl,
1632     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1633 #endif
1634 
1635     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1636     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1637 };
1638 
1639 static QemuOptsList qemu_iscsi_opts = {
1640     .name = "iscsi",
1641     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1642     .desc = {
1643         {
1644             .name = "user",
1645             .type = QEMU_OPT_STRING,
1646             .help = "username for CHAP authentication to target",
1647         },{
1648             .name = "password",
1649             .type = QEMU_OPT_STRING,
1650             .help = "password for CHAP authentication to target",
1651         },{
1652             .name = "header-digest",
1653             .type = QEMU_OPT_STRING,
1654             .help = "HeaderDigest setting. "
1655                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1656         },{
1657             .name = "initiator-name",
1658             .type = QEMU_OPT_STRING,
1659             .help = "Initiator iqn name to use when connecting",
1660         },
1661         { /* end of list */ }
1662     },
1663 };
1664 
1665 static void iscsi_block_init(void)
1666 {
1667     bdrv_register(&bdrv_iscsi);
1668     qemu_add_opts(&qemu_iscsi_opts);
1669 }
1670 
1671 block_init(iscsi_block_init);
1672