xref: /openbmc/qemu/block/iscsi.c (revision 1dde0f48d53ad39401ec5064a61162d6784aad44)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "trace.h"
38 #include "block/scsi.h"
39 #include "qemu/iov.h"
40 #include "sysemu/sysemu.h"
41 #include "qmp-commands.h"
42 
43 #include <iscsi/iscsi.h>
44 #include <iscsi/scsi-lowlevel.h>
45 
46 #ifdef __linux__
47 #include <scsi/sg.h>
48 #include <block/scsi.h>
49 #endif
50 
51 typedef struct IscsiLun {
52     struct iscsi_context *iscsi;
53     AioContext *aio_context;
54     int lun;
55     enum scsi_inquiry_peripheral_device_type type;
56     int block_size;
57     uint64_t num_blocks;
58     int events;
59     QEMUTimer *nop_timer;
60     uint8_t lbpme;
61     uint8_t lbprz;
62     uint8_t has_write_same;
63     struct scsi_inquiry_logical_block_provisioning lbp;
64     struct scsi_inquiry_block_limits bl;
65     unsigned char *zeroblock;
66     unsigned long *allocationmap;
67     int cluster_sectors;
68     bool use_16_for_rw;
69 } IscsiLun;
70 
71 typedef struct IscsiTask {
72     int status;
73     int complete;
74     int retries;
75     int do_retry;
76     struct scsi_task *task;
77     Coroutine *co;
78     QEMUBH *bh;
79     IscsiLun *iscsilun;
80     QEMUTimer retry_timer;
81 } IscsiTask;
82 
83 typedef struct IscsiAIOCB {
84     BlockDriverAIOCB common;
85     QEMUIOVector *qiov;
86     QEMUBH *bh;
87     IscsiLun *iscsilun;
88     struct scsi_task *task;
89     uint8_t *buf;
90     int status;
91     int64_t sector_num;
92     int nb_sectors;
93 #ifdef __linux__
94     sg_io_hdr_t *ioh;
95 #endif
96 } IscsiAIOCB;
97 
98 #define NOP_INTERVAL 5000
99 #define MAX_NOP_FAILURES 3
100 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
101 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
102 
103 /* this threshold is a trade-off knob to choose between
104  * the potential additional overhead of an extra GET_LBA_STATUS request
105  * vs. unnecessarily reading a lot of zero sectors over the wire.
106  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
107  * sectors we check the allocation status of the area covered by the
108  * request first if the allocationmap indicates that the area might be
109  * unallocated. */
110 #define ISCSI_CHECKALLOC_THRES 64
111 
112 static void
113 iscsi_bh_cb(void *p)
114 {
115     IscsiAIOCB *acb = p;
116 
117     qemu_bh_delete(acb->bh);
118 
119     g_free(acb->buf);
120     acb->buf = NULL;
121 
122     acb->common.cb(acb->common.opaque, acb->status);
123 
124     if (acb->task != NULL) {
125         scsi_free_scsi_task(acb->task);
126         acb->task = NULL;
127     }
128 
129     qemu_aio_unref(acb);
130 }
131 
132 static void
133 iscsi_schedule_bh(IscsiAIOCB *acb)
134 {
135     if (acb->bh) {
136         return;
137     }
138     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
139     qemu_bh_schedule(acb->bh);
140 }
141 
142 static void iscsi_co_generic_bh_cb(void *opaque)
143 {
144     struct IscsiTask *iTask = opaque;
145     iTask->complete = 1;
146     qemu_bh_delete(iTask->bh);
147     qemu_coroutine_enter(iTask->co, NULL);
148 }
149 
150 static void iscsi_retry_timer_expired(void *opaque)
151 {
152     struct IscsiTask *iTask = opaque;
153     iTask->complete = 1;
154     if (iTask->co) {
155         qemu_coroutine_enter(iTask->co, NULL);
156     }
157 }
158 
159 static inline unsigned exp_random(double mean)
160 {
161     return -mean * log((double)rand() / RAND_MAX);
162 }
163 
164 static void
165 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
166                         void *command_data, void *opaque)
167 {
168     struct IscsiTask *iTask = opaque;
169     struct scsi_task *task = command_data;
170 
171     iTask->status = status;
172     iTask->do_retry = 0;
173     iTask->task = task;
174 
175     if (status != SCSI_STATUS_GOOD) {
176         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
177             if (status == SCSI_STATUS_CHECK_CONDITION
178                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
179                 error_report("iSCSI CheckCondition: %s",
180                              iscsi_get_error(iscsi));
181                 iTask->do_retry = 1;
182                 goto out;
183             }
184             if (status == SCSI_STATUS_BUSY) {
185                 unsigned retry_time =
186                     exp_random(iscsi_retry_times[iTask->retries - 1]);
187                 error_report("iSCSI Busy (retry #%u in %u ms): %s",
188                              iTask->retries, retry_time,
189                              iscsi_get_error(iscsi));
190                 aio_timer_init(iTask->iscsilun->aio_context,
191                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
192                                SCALE_MS, iscsi_retry_timer_expired, iTask);
193                 timer_mod(&iTask->retry_timer,
194                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
195                 iTask->do_retry = 1;
196                 return;
197             }
198         }
199         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
200     }
201 
202 out:
203     if (iTask->co) {
204         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
205                                iscsi_co_generic_bh_cb, iTask);
206         qemu_bh_schedule(iTask->bh);
207     } else {
208         iTask->complete = 1;
209     }
210 }
211 
212 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
213 {
214     *iTask = (struct IscsiTask) {
215         .co         = qemu_coroutine_self(),
216         .iscsilun   = iscsilun,
217     };
218 }
219 
220 static void
221 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
222                     void *private_data)
223 {
224     IscsiAIOCB *acb = private_data;
225 
226     acb->status = -ECANCELED;
227     iscsi_schedule_bh(acb);
228 }
229 
230 static void
231 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
232 {
233     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
234     IscsiLun *iscsilun = acb->iscsilun;
235 
236     if (acb->status != -EINPROGRESS) {
237         return;
238     }
239 
240     /* send a task mgmt call to the target to cancel the task on the target */
241     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
242                                      iscsi_abort_task_cb, acb);
243 
244 }
245 
246 static const AIOCBInfo iscsi_aiocb_info = {
247     .aiocb_size         = sizeof(IscsiAIOCB),
248     .cancel_async       = iscsi_aio_cancel,
249 };
250 
251 
252 static void iscsi_process_read(void *arg);
253 static void iscsi_process_write(void *arg);
254 
255 static void
256 iscsi_set_events(IscsiLun *iscsilun)
257 {
258     struct iscsi_context *iscsi = iscsilun->iscsi;
259     int ev;
260 
261     /* We always register a read handler.  */
262     ev = POLLIN;
263     ev |= iscsi_which_events(iscsi);
264     if (ev != iscsilun->events) {
265         aio_set_fd_handler(iscsilun->aio_context,
266                            iscsi_get_fd(iscsi),
267                            iscsi_process_read,
268                            (ev & POLLOUT) ? iscsi_process_write : NULL,
269                            iscsilun);
270 
271     }
272 
273     iscsilun->events = ev;
274 }
275 
276 static void
277 iscsi_process_read(void *arg)
278 {
279     IscsiLun *iscsilun = arg;
280     struct iscsi_context *iscsi = iscsilun->iscsi;
281 
282     iscsi_service(iscsi, POLLIN);
283     iscsi_set_events(iscsilun);
284 }
285 
286 static void
287 iscsi_process_write(void *arg)
288 {
289     IscsiLun *iscsilun = arg;
290     struct iscsi_context *iscsi = iscsilun->iscsi;
291 
292     iscsi_service(iscsi, POLLOUT);
293     iscsi_set_events(iscsilun);
294 }
295 
296 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
297 {
298     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
299 }
300 
301 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
302 {
303     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
304 }
305 
306 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
307                                       IscsiLun *iscsilun)
308 {
309     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
310         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
311             error_report("iSCSI misaligned request: "
312                          "iscsilun->block_size %u, sector_num %" PRIi64
313                          ", nb_sectors %d",
314                          iscsilun->block_size, sector_num, nb_sectors);
315             return 0;
316     }
317     return 1;
318 }
319 
320 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
321                                     int nb_sectors)
322 {
323     if (iscsilun->allocationmap == NULL) {
324         return;
325     }
326     bitmap_set(iscsilun->allocationmap,
327                sector_num / iscsilun->cluster_sectors,
328                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
329 }
330 
331 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
332                                       int nb_sectors)
333 {
334     int64_t cluster_num, nb_clusters;
335     if (iscsilun->allocationmap == NULL) {
336         return;
337     }
338     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
339     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
340                   - cluster_num;
341     if (nb_clusters > 0) {
342         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
343     }
344 }
345 
346 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
347                                         int64_t sector_num, int nb_sectors,
348                                         QEMUIOVector *iov)
349 {
350     IscsiLun *iscsilun = bs->opaque;
351     struct IscsiTask iTask;
352     uint64_t lba;
353     uint32_t num_sectors;
354 
355     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
356         return -EINVAL;
357     }
358 
359     lba = sector_qemu2lun(sector_num, iscsilun);
360     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
361     iscsi_co_init_iscsitask(iscsilun, &iTask);
362 retry:
363     if (iscsilun->use_16_for_rw) {
364         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
365                                         NULL, num_sectors * iscsilun->block_size,
366                                         iscsilun->block_size, 0, 0, 0, 0, 0,
367                                         iscsi_co_generic_cb, &iTask);
368     } else {
369         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
370                                         NULL, num_sectors * iscsilun->block_size,
371                                         iscsilun->block_size, 0, 0, 0, 0, 0,
372                                         iscsi_co_generic_cb, &iTask);
373     }
374     if (iTask.task == NULL) {
375         return -ENOMEM;
376     }
377     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
378                           iov->niov);
379     while (!iTask.complete) {
380         iscsi_set_events(iscsilun);
381         qemu_coroutine_yield();
382     }
383 
384     if (iTask.task != NULL) {
385         scsi_free_scsi_task(iTask.task);
386         iTask.task = NULL;
387     }
388 
389     if (iTask.do_retry) {
390         iTask.complete = 0;
391         goto retry;
392     }
393 
394     if (iTask.status != SCSI_STATUS_GOOD) {
395         return -EIO;
396     }
397 
398     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
399 
400     return 0;
401 }
402 
403 
404 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
405                                              int64_t sector_num, int nb_sectors)
406 {
407     unsigned long size;
408     if (iscsilun->allocationmap == NULL) {
409         return true;
410     }
411     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
412     return !(find_next_bit(iscsilun->allocationmap, size,
413                            sector_num / iscsilun->cluster_sectors) == size);
414 }
415 
416 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
417                                                   int64_t sector_num,
418                                                   int nb_sectors, int *pnum)
419 {
420     IscsiLun *iscsilun = bs->opaque;
421     struct scsi_get_lba_status *lbas = NULL;
422     struct scsi_lba_status_descriptor *lbasd = NULL;
423     struct IscsiTask iTask;
424     int64_t ret;
425 
426     iscsi_co_init_iscsitask(iscsilun, &iTask);
427 
428     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
429         ret = -EINVAL;
430         goto out;
431     }
432 
433     /* default to all sectors allocated */
434     ret = BDRV_BLOCK_DATA;
435     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
436     *pnum = nb_sectors;
437 
438     /* LUN does not support logical block provisioning */
439     if (iscsilun->lbpme == 0) {
440         goto out;
441     }
442 
443 retry:
444     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
445                                   sector_qemu2lun(sector_num, iscsilun),
446                                   8 + 16, iscsi_co_generic_cb,
447                                   &iTask) == NULL) {
448         ret = -ENOMEM;
449         goto out;
450     }
451 
452     while (!iTask.complete) {
453         iscsi_set_events(iscsilun);
454         qemu_coroutine_yield();
455     }
456 
457     if (iTask.do_retry) {
458         if (iTask.task != NULL) {
459             scsi_free_scsi_task(iTask.task);
460             iTask.task = NULL;
461         }
462         iTask.complete = 0;
463         goto retry;
464     }
465 
466     if (iTask.status != SCSI_STATUS_GOOD) {
467         /* in case the get_lba_status_callout fails (i.e.
468          * because the device is busy or the cmd is not
469          * supported) we pretend all blocks are allocated
470          * for backwards compatibility */
471         goto out;
472     }
473 
474     lbas = scsi_datain_unmarshall(iTask.task);
475     if (lbas == NULL) {
476         ret = -EIO;
477         goto out;
478     }
479 
480     lbasd = &lbas->descriptors[0];
481 
482     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
483         ret = -EIO;
484         goto out;
485     }
486 
487     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
488 
489     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
490         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
491         ret &= ~BDRV_BLOCK_DATA;
492         if (iscsilun->lbprz) {
493             ret |= BDRV_BLOCK_ZERO;
494         }
495     }
496 
497     if (ret & BDRV_BLOCK_ZERO) {
498         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
499     } else {
500         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
501     }
502 
503     if (*pnum > nb_sectors) {
504         *pnum = nb_sectors;
505     }
506 out:
507     if (iTask.task != NULL) {
508         scsi_free_scsi_task(iTask.task);
509     }
510     return ret;
511 }
512 
513 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
514                                        int64_t sector_num, int nb_sectors,
515                                        QEMUIOVector *iov)
516 {
517     IscsiLun *iscsilun = bs->opaque;
518     struct IscsiTask iTask;
519     uint64_t lba;
520     uint32_t num_sectors;
521 
522     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
523         return -EINVAL;
524     }
525 
526     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
527         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
528         int64_t ret;
529         int pnum;
530         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
531         if (ret < 0) {
532             return ret;
533         }
534         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
535             qemu_iovec_memset(iov, 0, 0x00, iov->size);
536             return 0;
537         }
538     }
539 
540     lba = sector_qemu2lun(sector_num, iscsilun);
541     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
542 
543     iscsi_co_init_iscsitask(iscsilun, &iTask);
544 retry:
545     if (iscsilun->use_16_for_rw) {
546         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
547                                        num_sectors * iscsilun->block_size,
548                                        iscsilun->block_size, 0, 0, 0, 0, 0,
549                                        iscsi_co_generic_cb, &iTask);
550     } else {
551         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
552                                        num_sectors * iscsilun->block_size,
553                                        iscsilun->block_size,
554                                        0, 0, 0, 0, 0,
555                                        iscsi_co_generic_cb, &iTask);
556     }
557     if (iTask.task == NULL) {
558         return -ENOMEM;
559     }
560     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
561 
562     while (!iTask.complete) {
563         iscsi_set_events(iscsilun);
564         qemu_coroutine_yield();
565     }
566 
567     if (iTask.task != NULL) {
568         scsi_free_scsi_task(iTask.task);
569         iTask.task = NULL;
570     }
571 
572     if (iTask.do_retry) {
573         iTask.complete = 0;
574         goto retry;
575     }
576 
577     if (iTask.status != SCSI_STATUS_GOOD) {
578         return -EIO;
579     }
580 
581     return 0;
582 }
583 
584 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
585 {
586     IscsiLun *iscsilun = bs->opaque;
587     struct IscsiTask iTask;
588 
589     if (bs->sg) {
590         return 0;
591     }
592 
593     iscsi_co_init_iscsitask(iscsilun, &iTask);
594 
595 retry:
596     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
597                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
598         return -ENOMEM;
599     }
600 
601     while (!iTask.complete) {
602         iscsi_set_events(iscsilun);
603         qemu_coroutine_yield();
604     }
605 
606     if (iTask.task != NULL) {
607         scsi_free_scsi_task(iTask.task);
608         iTask.task = NULL;
609     }
610 
611     if (iTask.do_retry) {
612         iTask.complete = 0;
613         goto retry;
614     }
615 
616     if (iTask.status != SCSI_STATUS_GOOD) {
617         return -EIO;
618     }
619 
620     return 0;
621 }
622 
623 #ifdef __linux__
624 static void
625 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
626                      void *command_data, void *opaque)
627 {
628     IscsiAIOCB *acb = opaque;
629 
630     g_free(acb->buf);
631     acb->buf = NULL;
632 
633     acb->status = 0;
634     if (status < 0) {
635         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
636                      iscsi_get_error(iscsi));
637         acb->status = -EIO;
638     }
639 
640     acb->ioh->driver_status = 0;
641     acb->ioh->host_status   = 0;
642     acb->ioh->resid         = 0;
643 
644 #define SG_ERR_DRIVER_SENSE    0x08
645 
646     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
647         int ss;
648 
649         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
650 
651         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
652         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
653              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
654         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
655     }
656 
657     iscsi_schedule_bh(acb);
658 }
659 
660 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
661         unsigned long int req, void *buf,
662         BlockDriverCompletionFunc *cb, void *opaque)
663 {
664     IscsiLun *iscsilun = bs->opaque;
665     struct iscsi_context *iscsi = iscsilun->iscsi;
666     struct iscsi_data data;
667     IscsiAIOCB *acb;
668 
669     assert(req == SG_IO);
670 
671     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
672 
673     acb->iscsilun = iscsilun;
674     acb->bh          = NULL;
675     acb->status      = -EINPROGRESS;
676     acb->buf         = NULL;
677     acb->ioh         = buf;
678 
679     acb->task = malloc(sizeof(struct scsi_task));
680     if (acb->task == NULL) {
681         error_report("iSCSI: Failed to allocate task for scsi command. %s",
682                      iscsi_get_error(iscsi));
683         qemu_aio_unref(acb);
684         return NULL;
685     }
686     memset(acb->task, 0, sizeof(struct scsi_task));
687 
688     switch (acb->ioh->dxfer_direction) {
689     case SG_DXFER_TO_DEV:
690         acb->task->xfer_dir = SCSI_XFER_WRITE;
691         break;
692     case SG_DXFER_FROM_DEV:
693         acb->task->xfer_dir = SCSI_XFER_READ;
694         break;
695     default:
696         acb->task->xfer_dir = SCSI_XFER_NONE;
697         break;
698     }
699 
700     acb->task->cdb_size = acb->ioh->cmd_len;
701     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
702     acb->task->expxferlen = acb->ioh->dxfer_len;
703 
704     data.size = 0;
705     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
706         if (acb->ioh->iovec_count == 0) {
707             data.data = acb->ioh->dxferp;
708             data.size = acb->ioh->dxfer_len;
709         } else {
710             scsi_task_set_iov_out(acb->task,
711                                  (struct scsi_iovec *) acb->ioh->dxferp,
712                                  acb->ioh->iovec_count);
713         }
714     }
715 
716     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
717                                  iscsi_aio_ioctl_cb,
718                                  (data.size > 0) ? &data : NULL,
719                                  acb) != 0) {
720         scsi_free_scsi_task(acb->task);
721         qemu_aio_unref(acb);
722         return NULL;
723     }
724 
725     /* tell libiscsi to read straight into the buffer we got from ioctl */
726     if (acb->task->xfer_dir == SCSI_XFER_READ) {
727         if (acb->ioh->iovec_count == 0) {
728             scsi_task_add_data_in_buffer(acb->task,
729                                          acb->ioh->dxfer_len,
730                                          acb->ioh->dxferp);
731         } else {
732             scsi_task_set_iov_in(acb->task,
733                                  (struct scsi_iovec *) acb->ioh->dxferp,
734                                  acb->ioh->iovec_count);
735         }
736     }
737 
738     iscsi_set_events(iscsilun);
739 
740     return &acb->common;
741 }
742 
743 static void ioctl_cb(void *opaque, int status)
744 {
745     int *p_status = opaque;
746     *p_status = status;
747 }
748 
749 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
750 {
751     IscsiLun *iscsilun = bs->opaque;
752     int status;
753 
754     switch (req) {
755     case SG_GET_VERSION_NUM:
756         *(int *)buf = 30000;
757         break;
758     case SG_GET_SCSI_ID:
759         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
760         break;
761     case SG_IO:
762         status = -EINPROGRESS;
763         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
764 
765         while (status == -EINPROGRESS) {
766             aio_poll(iscsilun->aio_context, true);
767         }
768 
769         return 0;
770     default:
771         return -1;
772     }
773     return 0;
774 }
775 #endif
776 
777 static int64_t
778 iscsi_getlength(BlockDriverState *bs)
779 {
780     IscsiLun *iscsilun = bs->opaque;
781     int64_t len;
782 
783     len  = iscsilun->num_blocks;
784     len *= iscsilun->block_size;
785 
786     return len;
787 }
788 
789 static int
790 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
791                                    int nb_sectors)
792 {
793     IscsiLun *iscsilun = bs->opaque;
794     struct IscsiTask iTask;
795     struct unmap_list list;
796 
797     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
798         return -EINVAL;
799     }
800 
801     if (!iscsilun->lbp.lbpu) {
802         /* UNMAP is not supported by the target */
803         return 0;
804     }
805 
806     list.lba = sector_qemu2lun(sector_num, iscsilun);
807     list.num = sector_qemu2lun(nb_sectors, iscsilun);
808 
809     iscsi_co_init_iscsitask(iscsilun, &iTask);
810 retry:
811     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
812                      iscsi_co_generic_cb, &iTask) == NULL) {
813         return -ENOMEM;
814     }
815 
816     while (!iTask.complete) {
817         iscsi_set_events(iscsilun);
818         qemu_coroutine_yield();
819     }
820 
821     if (iTask.task != NULL) {
822         scsi_free_scsi_task(iTask.task);
823         iTask.task = NULL;
824     }
825 
826     if (iTask.do_retry) {
827         iTask.complete = 0;
828         goto retry;
829     }
830 
831     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
832         /* the target might fail with a check condition if it
833            is not happy with the alignment of the UNMAP request
834            we silently fail in this case */
835         return 0;
836     }
837 
838     if (iTask.status != SCSI_STATUS_GOOD) {
839         return -EIO;
840     }
841 
842     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
843 
844     return 0;
845 }
846 
847 static int
848 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
849                                    int nb_sectors, BdrvRequestFlags flags)
850 {
851     IscsiLun *iscsilun = bs->opaque;
852     struct IscsiTask iTask;
853     uint64_t lba;
854     uint32_t nb_blocks;
855     bool use_16_for_ws = iscsilun->use_16_for_rw;
856 
857     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
858         return -EINVAL;
859     }
860 
861     if (flags & BDRV_REQ_MAY_UNMAP) {
862         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
863             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
864             use_16_for_ws = true;
865         }
866         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
867             /* WRITESAME16 with UNMAP is not supported by the target,
868              * fall back and try WRITESAME10/16 without UNMAP */
869             flags &= ~BDRV_REQ_MAY_UNMAP;
870             use_16_for_ws = iscsilun->use_16_for_rw;
871         }
872     }
873 
874     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
875         /* WRITESAME without UNMAP is not supported by the target */
876         return -ENOTSUP;
877     }
878 
879     lba = sector_qemu2lun(sector_num, iscsilun);
880     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
881 
882     if (iscsilun->zeroblock == NULL) {
883         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
884         if (iscsilun->zeroblock == NULL) {
885             return -ENOMEM;
886         }
887     }
888 
889     iscsi_co_init_iscsitask(iscsilun, &iTask);
890 retry:
891     if (use_16_for_ws) {
892         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
893                                             iscsilun->zeroblock, iscsilun->block_size,
894                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
895                                             0, 0, iscsi_co_generic_cb, &iTask);
896     } else {
897         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
898                                             iscsilun->zeroblock, iscsilun->block_size,
899                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
900                                             0, 0, iscsi_co_generic_cb, &iTask);
901     }
902     if (iTask.task == NULL) {
903         return -ENOMEM;
904     }
905 
906     while (!iTask.complete) {
907         iscsi_set_events(iscsilun);
908         qemu_coroutine_yield();
909     }
910 
911     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
912         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
913         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
914          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
915         /* WRITE SAME is not supported by the target */
916         iscsilun->has_write_same = false;
917         scsi_free_scsi_task(iTask.task);
918         return -ENOTSUP;
919     }
920 
921     if (iTask.task != NULL) {
922         scsi_free_scsi_task(iTask.task);
923         iTask.task = NULL;
924     }
925 
926     if (iTask.do_retry) {
927         iTask.complete = 0;
928         goto retry;
929     }
930 
931     if (iTask.status != SCSI_STATUS_GOOD) {
932         return -EIO;
933     }
934 
935     if (flags & BDRV_REQ_MAY_UNMAP) {
936         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
937     } else {
938         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
939     }
940 
941     return 0;
942 }
943 
944 static void parse_chap(struct iscsi_context *iscsi, const char *target,
945                        Error **errp)
946 {
947     QemuOptsList *list;
948     QemuOpts *opts;
949     const char *user = NULL;
950     const char *password = NULL;
951 
952     list = qemu_find_opts("iscsi");
953     if (!list) {
954         return;
955     }
956 
957     opts = qemu_opts_find(list, target);
958     if (opts == NULL) {
959         opts = QTAILQ_FIRST(&list->head);
960         if (!opts) {
961             return;
962         }
963     }
964 
965     user = qemu_opt_get(opts, "user");
966     if (!user) {
967         return;
968     }
969 
970     password = qemu_opt_get(opts, "password");
971     if (!password) {
972         error_setg(errp, "CHAP username specified but no password was given");
973         return;
974     }
975 
976     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
977         error_setg(errp, "Failed to set initiator username and password");
978     }
979 }
980 
981 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
982                                 Error **errp)
983 {
984     QemuOptsList *list;
985     QemuOpts *opts;
986     const char *digest = NULL;
987 
988     list = qemu_find_opts("iscsi");
989     if (!list) {
990         return;
991     }
992 
993     opts = qemu_opts_find(list, target);
994     if (opts == NULL) {
995         opts = QTAILQ_FIRST(&list->head);
996         if (!opts) {
997             return;
998         }
999     }
1000 
1001     digest = qemu_opt_get(opts, "header-digest");
1002     if (!digest) {
1003         return;
1004     }
1005 
1006     if (!strcmp(digest, "CRC32C")) {
1007         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1008     } else if (!strcmp(digest, "NONE")) {
1009         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1010     } else if (!strcmp(digest, "CRC32C-NONE")) {
1011         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1012     } else if (!strcmp(digest, "NONE-CRC32C")) {
1013         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1014     } else {
1015         error_setg(errp, "Invalid header-digest setting : %s", digest);
1016     }
1017 }
1018 
1019 static char *parse_initiator_name(const char *target)
1020 {
1021     QemuOptsList *list;
1022     QemuOpts *opts;
1023     const char *name;
1024     char *iscsi_name;
1025     UuidInfo *uuid_info;
1026 
1027     list = qemu_find_opts("iscsi");
1028     if (list) {
1029         opts = qemu_opts_find(list, target);
1030         if (!opts) {
1031             opts = QTAILQ_FIRST(&list->head);
1032         }
1033         if (opts) {
1034             name = qemu_opt_get(opts, "initiator-name");
1035             if (name) {
1036                 return g_strdup(name);
1037             }
1038         }
1039     }
1040 
1041     uuid_info = qmp_query_uuid(NULL);
1042     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1043         name = qemu_get_vm_name();
1044     } else {
1045         name = uuid_info->UUID;
1046     }
1047     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1048                                  name ? ":" : "", name ? name : "");
1049     qapi_free_UuidInfo(uuid_info);
1050     return iscsi_name;
1051 }
1052 
1053 static void iscsi_nop_timed_event(void *opaque)
1054 {
1055     IscsiLun *iscsilun = opaque;
1056 
1057     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1058         error_report("iSCSI: NOP timeout. Reconnecting...");
1059         iscsi_reconnect(iscsilun->iscsi);
1060     }
1061 
1062     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1063         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1064         return;
1065     }
1066 
1067     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1068     iscsi_set_events(iscsilun);
1069 }
1070 
1071 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1072 {
1073     struct scsi_task *task = NULL;
1074     struct scsi_readcapacity10 *rc10 = NULL;
1075     struct scsi_readcapacity16 *rc16 = NULL;
1076     int retries = ISCSI_CMD_RETRIES;
1077 
1078     do {
1079         if (task != NULL) {
1080             scsi_free_scsi_task(task);
1081             task = NULL;
1082         }
1083 
1084         switch (iscsilun->type) {
1085         case TYPE_DISK:
1086             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1087             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1088                 rc16 = scsi_datain_unmarshall(task);
1089                 if (rc16 == NULL) {
1090                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1091                 } else {
1092                     iscsilun->block_size = rc16->block_length;
1093                     iscsilun->num_blocks = rc16->returned_lba + 1;
1094                     iscsilun->lbpme = rc16->lbpme;
1095                     iscsilun->lbprz = rc16->lbprz;
1096                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1097                 }
1098             }
1099             break;
1100         case TYPE_ROM:
1101             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1102             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1103                 rc10 = scsi_datain_unmarshall(task);
1104                 if (rc10 == NULL) {
1105                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1106                 } else {
1107                     iscsilun->block_size = rc10->block_size;
1108                     if (rc10->lba == 0) {
1109                         /* blank disk loaded */
1110                         iscsilun->num_blocks = 0;
1111                     } else {
1112                         iscsilun->num_blocks = rc10->lba + 1;
1113                     }
1114                 }
1115             }
1116             break;
1117         default:
1118             return;
1119         }
1120     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1121              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1122              && retries-- > 0);
1123 
1124     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1125         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1126     }
1127     if (task) {
1128         scsi_free_scsi_task(task);
1129     }
1130 }
1131 
1132 /* TODO Convert to fine grained options */
1133 static QemuOptsList runtime_opts = {
1134     .name = "iscsi",
1135     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1136     .desc = {
1137         {
1138             .name = "filename",
1139             .type = QEMU_OPT_STRING,
1140             .help = "URL to the iscsi image",
1141         },
1142         { /* end of list */ }
1143     },
1144 };
1145 
1146 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1147                                           int evpd, int pc, void **inq, Error **errp)
1148 {
1149     int full_size;
1150     struct scsi_task *task = NULL;
1151     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1152     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1153         goto fail;
1154     }
1155     full_size = scsi_datain_getfullsize(task);
1156     if (full_size > task->datain.size) {
1157         scsi_free_scsi_task(task);
1158 
1159         /* we need more data for the full list */
1160         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1161         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1162             goto fail;
1163         }
1164     }
1165 
1166     *inq = scsi_datain_unmarshall(task);
1167     if (*inq == NULL) {
1168         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1169         goto fail_with_err;
1170     }
1171 
1172     return task;
1173 
1174 fail:
1175     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1176                iscsi_get_error(iscsi));
1177 fail_with_err:
1178     if (task != NULL) {
1179         scsi_free_scsi_task(task);
1180     }
1181     return NULL;
1182 }
1183 
1184 static void iscsi_detach_aio_context(BlockDriverState *bs)
1185 {
1186     IscsiLun *iscsilun = bs->opaque;
1187 
1188     aio_set_fd_handler(iscsilun->aio_context,
1189                        iscsi_get_fd(iscsilun->iscsi),
1190                        NULL, NULL, NULL);
1191     iscsilun->events = 0;
1192 
1193     if (iscsilun->nop_timer) {
1194         timer_del(iscsilun->nop_timer);
1195         timer_free(iscsilun->nop_timer);
1196         iscsilun->nop_timer = NULL;
1197     }
1198 }
1199 
1200 static void iscsi_attach_aio_context(BlockDriverState *bs,
1201                                      AioContext *new_context)
1202 {
1203     IscsiLun *iscsilun = bs->opaque;
1204 
1205     iscsilun->aio_context = new_context;
1206     iscsi_set_events(iscsilun);
1207 
1208     /* Set up a timer for sending out iSCSI NOPs */
1209     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1210                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1211                                         iscsi_nop_timed_event, iscsilun);
1212     timer_mod(iscsilun->nop_timer,
1213               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1214 }
1215 
1216 /*
1217  * We support iscsi url's on the form
1218  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1219  *
1220  * Note: flags are currently not used by iscsi_open.  If this function
1221  * is changed such that flags are used, please examine iscsi_reopen_prepare()
1222  * to see if needs to be changed as well.
1223  */
1224 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1225                       Error **errp)
1226 {
1227     IscsiLun *iscsilun = bs->opaque;
1228     struct iscsi_context *iscsi = NULL;
1229     struct iscsi_url *iscsi_url = NULL;
1230     struct scsi_task *task = NULL;
1231     struct scsi_inquiry_standard *inq = NULL;
1232     struct scsi_inquiry_supported_pages *inq_vpd;
1233     char *initiator_name = NULL;
1234     QemuOpts *opts;
1235     Error *local_err = NULL;
1236     const char *filename;
1237     int i, ret;
1238 
1239     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1240         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1241                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1242                    "of 512", BDRV_SECTOR_SIZE);
1243         return -EINVAL;
1244     }
1245 
1246     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1247     qemu_opts_absorb_qdict(opts, options, &local_err);
1248     if (local_err) {
1249         error_propagate(errp, local_err);
1250         ret = -EINVAL;
1251         goto out;
1252     }
1253 
1254     filename = qemu_opt_get(opts, "filename");
1255 
1256     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1257     if (iscsi_url == NULL) {
1258         error_setg(errp, "Failed to parse URL : %s", filename);
1259         ret = -EINVAL;
1260         goto out;
1261     }
1262 
1263     memset(iscsilun, 0, sizeof(IscsiLun));
1264 
1265     initiator_name = parse_initiator_name(iscsi_url->target);
1266 
1267     iscsi = iscsi_create_context(initiator_name);
1268     if (iscsi == NULL) {
1269         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1270         ret = -ENOMEM;
1271         goto out;
1272     }
1273 
1274     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1275         error_setg(errp, "iSCSI: Failed to set target name.");
1276         ret = -EINVAL;
1277         goto out;
1278     }
1279 
1280     if (iscsi_url->user != NULL) {
1281         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1282                                               iscsi_url->passwd);
1283         if (ret != 0) {
1284             error_setg(errp, "Failed to set initiator username and password");
1285             ret = -EINVAL;
1286             goto out;
1287         }
1288     }
1289 
1290     /* check if we got CHAP username/password via the options */
1291     parse_chap(iscsi, iscsi_url->target, &local_err);
1292     if (local_err != NULL) {
1293         error_propagate(errp, local_err);
1294         ret = -EINVAL;
1295         goto out;
1296     }
1297 
1298     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1299         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1300         ret = -EINVAL;
1301         goto out;
1302     }
1303 
1304     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1305 
1306     /* check if we got HEADER_DIGEST via the options */
1307     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1308     if (local_err != NULL) {
1309         error_propagate(errp, local_err);
1310         ret = -EINVAL;
1311         goto out;
1312     }
1313 
1314     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1315         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1316             iscsi_get_error(iscsi));
1317         ret = -EINVAL;
1318         goto out;
1319     }
1320 
1321     iscsilun->iscsi = iscsi;
1322     iscsilun->aio_context = bdrv_get_aio_context(bs);
1323     iscsilun->lun   = iscsi_url->lun;
1324     iscsilun->has_write_same = true;
1325 
1326     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1327                             (void **) &inq, errp);
1328     if (task == NULL) {
1329         ret = -EINVAL;
1330         goto out;
1331     }
1332     iscsilun->type = inq->periperal_device_type;
1333     scsi_free_scsi_task(task);
1334     task = NULL;
1335 
1336     iscsi_readcapacity_sync(iscsilun, &local_err);
1337     if (local_err != NULL) {
1338         error_propagate(errp, local_err);
1339         ret = -EINVAL;
1340         goto out;
1341     }
1342     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1343     bs->request_alignment = iscsilun->block_size;
1344 
1345     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1346      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1347      * will try to read from the device to guess the image format.
1348      */
1349     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1350         bs->sg = 1;
1351     }
1352 
1353     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1354                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1355                             (void **) &inq_vpd, errp);
1356     if (task == NULL) {
1357         ret = -EINVAL;
1358         goto out;
1359     }
1360     for (i = 0; i < inq_vpd->num_pages; i++) {
1361         struct scsi_task *inq_task;
1362         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1363         struct scsi_inquiry_block_limits *inq_bl;
1364         switch (inq_vpd->pages[i]) {
1365         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1366             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1367                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1368                                         (void **) &inq_lbp, errp);
1369             if (inq_task == NULL) {
1370                 ret = -EINVAL;
1371                 goto out;
1372             }
1373             memcpy(&iscsilun->lbp, inq_lbp,
1374                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1375             scsi_free_scsi_task(inq_task);
1376             break;
1377         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1378             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1379                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1380                                     (void **) &inq_bl, errp);
1381             if (inq_task == NULL) {
1382                 ret = -EINVAL;
1383                 goto out;
1384             }
1385             memcpy(&iscsilun->bl, inq_bl,
1386                    sizeof(struct scsi_inquiry_block_limits));
1387             scsi_free_scsi_task(inq_task);
1388             break;
1389         default:
1390             break;
1391         }
1392     }
1393     scsi_free_scsi_task(task);
1394     task = NULL;
1395 
1396     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1397 
1398     /* Guess the internal cluster (page) size of the iscsi target by the means
1399      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1400      * reasonable size */
1401     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1402         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1403         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1404                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1405         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1406             iscsilun->allocationmap =
1407                 bitmap_new(DIV_ROUND_UP(bs->total_sectors,
1408                                         iscsilun->cluster_sectors));
1409         }
1410     }
1411 
1412 out:
1413     qemu_opts_del(opts);
1414     g_free(initiator_name);
1415     if (iscsi_url != NULL) {
1416         iscsi_destroy_url(iscsi_url);
1417     }
1418     if (task != NULL) {
1419         scsi_free_scsi_task(task);
1420     }
1421 
1422     if (ret) {
1423         if (iscsi != NULL) {
1424             iscsi_destroy_context(iscsi);
1425         }
1426         memset(iscsilun, 0, sizeof(IscsiLun));
1427     }
1428     return ret;
1429 }
1430 
1431 static void iscsi_close(BlockDriverState *bs)
1432 {
1433     IscsiLun *iscsilun = bs->opaque;
1434     struct iscsi_context *iscsi = iscsilun->iscsi;
1435 
1436     iscsi_detach_aio_context(bs);
1437     iscsi_destroy_context(iscsi);
1438     g_free(iscsilun->zeroblock);
1439     g_free(iscsilun->allocationmap);
1440     memset(iscsilun, 0, sizeof(IscsiLun));
1441 }
1442 
1443 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1444 {
1445     IscsiLun *iscsilun = bs->opaque;
1446 
1447     /* We don't actually refresh here, but just return data queried in
1448      * iscsi_open(): iscsi targets don't change their limits. */
1449     if (iscsilun->lbp.lbpu) {
1450         if (iscsilun->bl.max_unmap < 0xffffffff) {
1451             bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
1452                                                  iscsilun);
1453         }
1454         bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1455                                                    iscsilun);
1456     }
1457 
1458     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1459         bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
1460                                                   iscsilun);
1461     }
1462     if (iscsilun->lbp.lbpws) {
1463         bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
1464                                                         iscsilun);
1465     }
1466     bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
1467                                                  iscsilun);
1468 }
1469 
1470 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
1471  * prepare.  Note that this will not re-establish a connection with an iSCSI
1472  * target - it is effectively a NOP.  */
1473 static int iscsi_reopen_prepare(BDRVReopenState *state,
1474                                 BlockReopenQueue *queue, Error **errp)
1475 {
1476     /* NOP */
1477     return 0;
1478 }
1479 
1480 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1481 {
1482     IscsiLun *iscsilun = bs->opaque;
1483     Error *local_err = NULL;
1484 
1485     if (iscsilun->type != TYPE_DISK) {
1486         return -ENOTSUP;
1487     }
1488 
1489     iscsi_readcapacity_sync(iscsilun, &local_err);
1490     if (local_err != NULL) {
1491         error_free(local_err);
1492         return -EIO;
1493     }
1494 
1495     if (offset > iscsi_getlength(bs)) {
1496         return -EINVAL;
1497     }
1498 
1499     if (iscsilun->allocationmap != NULL) {
1500         g_free(iscsilun->allocationmap);
1501         iscsilun->allocationmap =
1502             bitmap_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
1503                                                     iscsilun),
1504                                     iscsilun->cluster_sectors));
1505     }
1506 
1507     return 0;
1508 }
1509 
1510 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1511 {
1512     int ret = 0;
1513     int64_t total_size = 0;
1514     BlockDriverState *bs;
1515     IscsiLun *iscsilun = NULL;
1516     QDict *bs_options;
1517 
1518     bs = bdrv_new("", &error_abort);
1519 
1520     /* Read out options */
1521     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1522                               BDRV_SECTOR_SIZE);
1523     bs->opaque = g_new0(struct IscsiLun, 1);
1524     iscsilun = bs->opaque;
1525 
1526     bs_options = qdict_new();
1527     qdict_put(bs_options, "filename", qstring_from_str(filename));
1528     ret = iscsi_open(bs, bs_options, 0, NULL);
1529     QDECREF(bs_options);
1530 
1531     if (ret != 0) {
1532         goto out;
1533     }
1534     iscsi_detach_aio_context(bs);
1535     if (iscsilun->type != TYPE_DISK) {
1536         ret = -ENODEV;
1537         goto out;
1538     }
1539     if (bs->total_sectors < total_size) {
1540         ret = -ENOSPC;
1541         goto out;
1542     }
1543 
1544     ret = 0;
1545 out:
1546     if (iscsilun->iscsi != NULL) {
1547         iscsi_destroy_context(iscsilun->iscsi);
1548     }
1549     g_free(bs->opaque);
1550     bs->opaque = NULL;
1551     bdrv_unref(bs);
1552     return ret;
1553 }
1554 
1555 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1556 {
1557     IscsiLun *iscsilun = bs->opaque;
1558     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1559     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1560     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1561     return 0;
1562 }
1563 
1564 static QemuOptsList iscsi_create_opts = {
1565     .name = "iscsi-create-opts",
1566     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1567     .desc = {
1568         {
1569             .name = BLOCK_OPT_SIZE,
1570             .type = QEMU_OPT_SIZE,
1571             .help = "Virtual disk size"
1572         },
1573         { /* end of list */ }
1574     }
1575 };
1576 
1577 static BlockDriver bdrv_iscsi = {
1578     .format_name     = "iscsi",
1579     .protocol_name   = "iscsi",
1580 
1581     .instance_size   = sizeof(IscsiLun),
1582     .bdrv_needs_filename = true,
1583     .bdrv_file_open  = iscsi_open,
1584     .bdrv_close      = iscsi_close,
1585     .bdrv_create     = iscsi_create,
1586     .create_opts     = &iscsi_create_opts,
1587     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1588 
1589     .bdrv_getlength  = iscsi_getlength,
1590     .bdrv_get_info   = iscsi_get_info,
1591     .bdrv_truncate   = iscsi_truncate,
1592     .bdrv_refresh_limits = iscsi_refresh_limits,
1593 
1594     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1595     .bdrv_co_discard      = iscsi_co_discard,
1596     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1597     .bdrv_co_readv         = iscsi_co_readv,
1598     .bdrv_co_writev        = iscsi_co_writev,
1599     .bdrv_co_flush_to_disk = iscsi_co_flush,
1600 
1601 #ifdef __linux__
1602     .bdrv_ioctl       = iscsi_ioctl,
1603     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1604 #endif
1605 
1606     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1607     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1608 };
1609 
1610 static QemuOptsList qemu_iscsi_opts = {
1611     .name = "iscsi",
1612     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1613     .desc = {
1614         {
1615             .name = "user",
1616             .type = QEMU_OPT_STRING,
1617             .help = "username for CHAP authentication to target",
1618         },{
1619             .name = "password",
1620             .type = QEMU_OPT_STRING,
1621             .help = "password for CHAP authentication to target",
1622         },{
1623             .name = "header-digest",
1624             .type = QEMU_OPT_STRING,
1625             .help = "HeaderDigest setting. "
1626                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1627         },{
1628             .name = "initiator-name",
1629             .type = QEMU_OPT_STRING,
1630             .help = "Initiator iqn name to use when connecting",
1631         },
1632         { /* end of list */ }
1633     },
1634 };
1635 
1636 static void iscsi_block_init(void)
1637 {
1638     bdrv_register(&bdrv_iscsi);
1639     qemu_add_opts(&qemu_iscsi_opts);
1640 }
1641 
1642 block_init(iscsi_block_init);
1643