xref: /openbmc/qemu/block/iscsi.c (revision 070c7607)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 
42 #include <iscsi/iscsi.h>
43 #include <iscsi/scsi-lowlevel.h>
44 
45 #ifdef __linux__
46 #include <scsi/sg.h>
47 #include <block/scsi.h>
48 #endif
49 
50 typedef struct IscsiLun {
51     struct iscsi_context *iscsi;
52     AioContext *aio_context;
53     int lun;
54     enum scsi_inquiry_peripheral_device_type type;
55     int block_size;
56     uint64_t num_blocks;
57     int events;
58     QEMUTimer *nop_timer;
59     QEMUTimer *event_timer;
60     struct scsi_inquiry_logical_block_provisioning lbp;
61     struct scsi_inquiry_block_limits bl;
62     unsigned char *zeroblock;
63     unsigned long *allocationmap;
64     int cluster_sectors;
65     bool use_16_for_rw;
66     bool write_protected;
67     bool lbpme;
68     bool lbprz;
69     bool dpofua;
70     bool has_write_same;
71     bool force_next_flush;
72 } IscsiLun;
73 
74 typedef struct IscsiTask {
75     int status;
76     int complete;
77     int retries;
78     int do_retry;
79     struct scsi_task *task;
80     Coroutine *co;
81     QEMUBH *bh;
82     IscsiLun *iscsilun;
83     QEMUTimer retry_timer;
84     bool force_next_flush;
85 } IscsiTask;
86 
87 typedef struct IscsiAIOCB {
88     BlockAIOCB common;
89     QEMUIOVector *qiov;
90     QEMUBH *bh;
91     IscsiLun *iscsilun;
92     struct scsi_task *task;
93     uint8_t *buf;
94     int status;
95     int64_t sector_num;
96     int nb_sectors;
97 #ifdef __linux__
98     sg_io_hdr_t *ioh;
99 #endif
100 } IscsiAIOCB;
101 
102 #define EVENT_INTERVAL 250
103 #define NOP_INTERVAL 5000
104 #define MAX_NOP_FAILURES 3
105 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
106 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
107 
108 /* this threshold is a trade-off knob to choose between
109  * the potential additional overhead of an extra GET_LBA_STATUS request
110  * vs. unnecessarily reading a lot of zero sectors over the wire.
111  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
112  * sectors we check the allocation status of the area covered by the
113  * request first if the allocationmap indicates that the area might be
114  * unallocated. */
115 #define ISCSI_CHECKALLOC_THRES 64
116 
117 static void
118 iscsi_bh_cb(void *p)
119 {
120     IscsiAIOCB *acb = p;
121 
122     qemu_bh_delete(acb->bh);
123 
124     g_free(acb->buf);
125     acb->buf = NULL;
126 
127     acb->common.cb(acb->common.opaque, acb->status);
128 
129     if (acb->task != NULL) {
130         scsi_free_scsi_task(acb->task);
131         acb->task = NULL;
132     }
133 
134     qemu_aio_unref(acb);
135 }
136 
137 static void
138 iscsi_schedule_bh(IscsiAIOCB *acb)
139 {
140     if (acb->bh) {
141         return;
142     }
143     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
144     qemu_bh_schedule(acb->bh);
145 }
146 
147 static void iscsi_co_generic_bh_cb(void *opaque)
148 {
149     struct IscsiTask *iTask = opaque;
150     iTask->complete = 1;
151     qemu_bh_delete(iTask->bh);
152     qemu_coroutine_enter(iTask->co, NULL);
153 }
154 
155 static void iscsi_retry_timer_expired(void *opaque)
156 {
157     struct IscsiTask *iTask = opaque;
158     iTask->complete = 1;
159     if (iTask->co) {
160         qemu_coroutine_enter(iTask->co, NULL);
161     }
162 }
163 
164 static inline unsigned exp_random(double mean)
165 {
166     return -mean * log((double)rand() / RAND_MAX);
167 }
168 
169 static void
170 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
171                         void *command_data, void *opaque)
172 {
173     struct IscsiTask *iTask = opaque;
174     struct scsi_task *task = command_data;
175 
176     iTask->status = status;
177     iTask->do_retry = 0;
178     iTask->task = task;
179 
180     if (status != SCSI_STATUS_GOOD) {
181         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
182             if (status == SCSI_STATUS_CHECK_CONDITION
183                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
184                 error_report("iSCSI CheckCondition: %s",
185                              iscsi_get_error(iscsi));
186                 iTask->do_retry = 1;
187                 goto out;
188             }
189             /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced
190              * in libiscsi 1.10.0. Hardcode this value here to avoid
191              * the need to bump the libiscsi requirement to 1.10.0 */
192             if (status == SCSI_STATUS_BUSY || status == 0x28) {
193                 unsigned retry_time =
194                     exp_random(iscsi_retry_times[iTask->retries - 1]);
195                 error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s",
196                              iTask->retries, retry_time,
197                              iscsi_get_error(iscsi));
198                 aio_timer_init(iTask->iscsilun->aio_context,
199                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
200                                SCALE_MS, iscsi_retry_timer_expired, iTask);
201                 timer_mod(&iTask->retry_timer,
202                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
203                 iTask->do_retry = 1;
204                 return;
205             }
206         }
207         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
208     } else {
209         iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
210     }
211 
212 out:
213     if (iTask->co) {
214         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
215                                iscsi_co_generic_bh_cb, iTask);
216         qemu_bh_schedule(iTask->bh);
217     } else {
218         iTask->complete = 1;
219     }
220 }
221 
222 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
223 {
224     *iTask = (struct IscsiTask) {
225         .co         = qemu_coroutine_self(),
226         .iscsilun   = iscsilun,
227     };
228 }
229 
230 static void
231 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
232                     void *private_data)
233 {
234     IscsiAIOCB *acb = private_data;
235 
236     acb->status = -ECANCELED;
237     iscsi_schedule_bh(acb);
238 }
239 
240 static void
241 iscsi_aio_cancel(BlockAIOCB *blockacb)
242 {
243     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
244     IscsiLun *iscsilun = acb->iscsilun;
245 
246     if (acb->status != -EINPROGRESS) {
247         return;
248     }
249 
250     /* send a task mgmt call to the target to cancel the task on the target */
251     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
252                                      iscsi_abort_task_cb, acb);
253 
254 }
255 
256 static const AIOCBInfo iscsi_aiocb_info = {
257     .aiocb_size         = sizeof(IscsiAIOCB),
258     .cancel_async       = iscsi_aio_cancel,
259 };
260 
261 
262 static void iscsi_process_read(void *arg);
263 static void iscsi_process_write(void *arg);
264 
265 static void
266 iscsi_set_events(IscsiLun *iscsilun)
267 {
268     struct iscsi_context *iscsi = iscsilun->iscsi;
269     int ev = iscsi_which_events(iscsi);
270 
271     if (ev != iscsilun->events) {
272         aio_set_fd_handler(iscsilun->aio_context,
273                            iscsi_get_fd(iscsi),
274                            (ev & POLLIN) ? iscsi_process_read : NULL,
275                            (ev & POLLOUT) ? iscsi_process_write : NULL,
276                            iscsilun);
277         iscsilun->events = ev;
278     }
279 
280     /* newer versions of libiscsi may return zero events. In this
281      * case start a timer to ensure we are able to return to service
282      * once this situation changes. */
283     if (!ev) {
284         timer_mod(iscsilun->event_timer,
285                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
286     }
287 }
288 
289 static void iscsi_timed_set_events(void *opaque)
290 {
291     IscsiLun *iscsilun = opaque;
292     iscsi_set_events(iscsilun);
293 }
294 
295 static void
296 iscsi_process_read(void *arg)
297 {
298     IscsiLun *iscsilun = arg;
299     struct iscsi_context *iscsi = iscsilun->iscsi;
300 
301     iscsi_service(iscsi, POLLIN);
302     iscsi_set_events(iscsilun);
303 }
304 
305 static void
306 iscsi_process_write(void *arg)
307 {
308     IscsiLun *iscsilun = arg;
309     struct iscsi_context *iscsi = iscsilun->iscsi;
310 
311     iscsi_service(iscsi, POLLOUT);
312     iscsi_set_events(iscsilun);
313 }
314 
315 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
316 {
317     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
318 }
319 
320 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
321 {
322     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
323 }
324 
325 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
326                                       IscsiLun *iscsilun)
327 {
328     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
329         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
330             error_report("iSCSI misaligned request: "
331                          "iscsilun->block_size %u, sector_num %" PRIi64
332                          ", nb_sectors %d",
333                          iscsilun->block_size, sector_num, nb_sectors);
334             return 0;
335     }
336     return 1;
337 }
338 
339 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
340 {
341     return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
342                                                        iscsilun),
343                                        iscsilun->cluster_sectors));
344 }
345 
346 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
347                                     int nb_sectors)
348 {
349     if (iscsilun->allocationmap == NULL) {
350         return;
351     }
352     bitmap_set(iscsilun->allocationmap,
353                sector_num / iscsilun->cluster_sectors,
354                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
355 }
356 
357 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
358                                       int nb_sectors)
359 {
360     int64_t cluster_num, nb_clusters;
361     if (iscsilun->allocationmap == NULL) {
362         return;
363     }
364     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
365     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
366                   - cluster_num;
367     if (nb_clusters > 0) {
368         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
369     }
370 }
371 
372 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
373                                         int64_t sector_num, int nb_sectors,
374                                         QEMUIOVector *iov)
375 {
376     IscsiLun *iscsilun = bs->opaque;
377     struct IscsiTask iTask;
378     uint64_t lba;
379     uint32_t num_sectors;
380     int fua;
381 
382     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
383         return -EINVAL;
384     }
385 
386     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
387         error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
388                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
389         return -EINVAL;
390     }
391 
392     lba = sector_qemu2lun(sector_num, iscsilun);
393     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
394     iscsi_co_init_iscsitask(iscsilun, &iTask);
395 retry:
396     fua = iscsilun->dpofua && !bs->enable_write_cache;
397     iTask.force_next_flush = !fua;
398     if (iscsilun->use_16_for_rw) {
399         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
400                                         NULL, num_sectors * iscsilun->block_size,
401                                         iscsilun->block_size, 0, 0, fua, 0, 0,
402                                         iscsi_co_generic_cb, &iTask);
403     } else {
404         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
405                                         NULL, num_sectors * iscsilun->block_size,
406                                         iscsilun->block_size, 0, 0, fua, 0, 0,
407                                         iscsi_co_generic_cb, &iTask);
408     }
409     if (iTask.task == NULL) {
410         return -ENOMEM;
411     }
412     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
413                           iov->niov);
414     while (!iTask.complete) {
415         iscsi_set_events(iscsilun);
416         qemu_coroutine_yield();
417     }
418 
419     if (iTask.task != NULL) {
420         scsi_free_scsi_task(iTask.task);
421         iTask.task = NULL;
422     }
423 
424     if (iTask.do_retry) {
425         iTask.complete = 0;
426         goto retry;
427     }
428 
429     if (iTask.status != SCSI_STATUS_GOOD) {
430         return -EIO;
431     }
432 
433     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
434 
435     return 0;
436 }
437 
438 
439 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
440                                              int64_t sector_num, int nb_sectors)
441 {
442     unsigned long size;
443     if (iscsilun->allocationmap == NULL) {
444         return true;
445     }
446     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
447     return !(find_next_bit(iscsilun->allocationmap, size,
448                            sector_num / iscsilun->cluster_sectors) == size);
449 }
450 
451 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
452                                                   int64_t sector_num,
453                                                   int nb_sectors, int *pnum)
454 {
455     IscsiLun *iscsilun = bs->opaque;
456     struct scsi_get_lba_status *lbas = NULL;
457     struct scsi_lba_status_descriptor *lbasd = NULL;
458     struct IscsiTask iTask;
459     int64_t ret;
460 
461     iscsi_co_init_iscsitask(iscsilun, &iTask);
462 
463     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
464         ret = -EINVAL;
465         goto out;
466     }
467 
468     /* default to all sectors allocated */
469     ret = BDRV_BLOCK_DATA;
470     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
471     *pnum = nb_sectors;
472 
473     /* LUN does not support logical block provisioning */
474     if (!iscsilun->lbpme) {
475         goto out;
476     }
477 
478 retry:
479     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
480                                   sector_qemu2lun(sector_num, iscsilun),
481                                   8 + 16, iscsi_co_generic_cb,
482                                   &iTask) == NULL) {
483         ret = -ENOMEM;
484         goto out;
485     }
486 
487     while (!iTask.complete) {
488         iscsi_set_events(iscsilun);
489         qemu_coroutine_yield();
490     }
491 
492     if (iTask.do_retry) {
493         if (iTask.task != NULL) {
494             scsi_free_scsi_task(iTask.task);
495             iTask.task = NULL;
496         }
497         iTask.complete = 0;
498         goto retry;
499     }
500 
501     if (iTask.status != SCSI_STATUS_GOOD) {
502         /* in case the get_lba_status_callout fails (i.e.
503          * because the device is busy or the cmd is not
504          * supported) we pretend all blocks are allocated
505          * for backwards compatibility */
506         goto out;
507     }
508 
509     lbas = scsi_datain_unmarshall(iTask.task);
510     if (lbas == NULL) {
511         ret = -EIO;
512         goto out;
513     }
514 
515     lbasd = &lbas->descriptors[0];
516 
517     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
518         ret = -EIO;
519         goto out;
520     }
521 
522     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
523 
524     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
525         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
526         ret &= ~BDRV_BLOCK_DATA;
527         if (iscsilun->lbprz) {
528             ret |= BDRV_BLOCK_ZERO;
529         }
530     }
531 
532     if (ret & BDRV_BLOCK_ZERO) {
533         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
534     } else {
535         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
536     }
537 
538     if (*pnum > nb_sectors) {
539         *pnum = nb_sectors;
540     }
541 out:
542     if (iTask.task != NULL) {
543         scsi_free_scsi_task(iTask.task);
544     }
545     return ret;
546 }
547 
548 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
549                                        int64_t sector_num, int nb_sectors,
550                                        QEMUIOVector *iov)
551 {
552     IscsiLun *iscsilun = bs->opaque;
553     struct IscsiTask iTask;
554     uint64_t lba;
555     uint32_t num_sectors;
556 
557     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
558         return -EINVAL;
559     }
560 
561     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
562         error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
563                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
564         return -EINVAL;
565     }
566 
567     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
568         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
569         int64_t ret;
570         int pnum;
571         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
572         if (ret < 0) {
573             return ret;
574         }
575         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
576             qemu_iovec_memset(iov, 0, 0x00, iov->size);
577             return 0;
578         }
579     }
580 
581     lba = sector_qemu2lun(sector_num, iscsilun);
582     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
583 
584     iscsi_co_init_iscsitask(iscsilun, &iTask);
585 retry:
586     if (iscsilun->use_16_for_rw) {
587         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
588                                        num_sectors * iscsilun->block_size,
589                                        iscsilun->block_size, 0, 0, 0, 0, 0,
590                                        iscsi_co_generic_cb, &iTask);
591     } else {
592         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
593                                        num_sectors * iscsilun->block_size,
594                                        iscsilun->block_size,
595                                        0, 0, 0, 0, 0,
596                                        iscsi_co_generic_cb, &iTask);
597     }
598     if (iTask.task == NULL) {
599         return -ENOMEM;
600     }
601     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
602 
603     while (!iTask.complete) {
604         iscsi_set_events(iscsilun);
605         qemu_coroutine_yield();
606     }
607 
608     if (iTask.task != NULL) {
609         scsi_free_scsi_task(iTask.task);
610         iTask.task = NULL;
611     }
612 
613     if (iTask.do_retry) {
614         iTask.complete = 0;
615         goto retry;
616     }
617 
618     if (iTask.status != SCSI_STATUS_GOOD) {
619         return -EIO;
620     }
621 
622     return 0;
623 }
624 
625 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
626 {
627     IscsiLun *iscsilun = bs->opaque;
628     struct IscsiTask iTask;
629 
630     if (bs->sg) {
631         return 0;
632     }
633 
634     if (!iscsilun->force_next_flush) {
635         return 0;
636     }
637     iscsilun->force_next_flush = false;
638 
639     iscsi_co_init_iscsitask(iscsilun, &iTask);
640 retry:
641     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
642                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
643         return -ENOMEM;
644     }
645 
646     while (!iTask.complete) {
647         iscsi_set_events(iscsilun);
648         qemu_coroutine_yield();
649     }
650 
651     if (iTask.task != NULL) {
652         scsi_free_scsi_task(iTask.task);
653         iTask.task = NULL;
654     }
655 
656     if (iTask.do_retry) {
657         iTask.complete = 0;
658         goto retry;
659     }
660 
661     if (iTask.status != SCSI_STATUS_GOOD) {
662         return -EIO;
663     }
664 
665     return 0;
666 }
667 
668 #ifdef __linux__
669 static void
670 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
671                      void *command_data, void *opaque)
672 {
673     IscsiAIOCB *acb = opaque;
674 
675     g_free(acb->buf);
676     acb->buf = NULL;
677 
678     acb->status = 0;
679     if (status < 0) {
680         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
681                      iscsi_get_error(iscsi));
682         acb->status = -EIO;
683     }
684 
685     acb->ioh->driver_status = 0;
686     acb->ioh->host_status   = 0;
687     acb->ioh->resid         = 0;
688 
689 #define SG_ERR_DRIVER_SENSE    0x08
690 
691     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
692         int ss;
693 
694         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
695 
696         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
697         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
698              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
699         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
700     }
701 
702     iscsi_schedule_bh(acb);
703 }
704 
705 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
706         unsigned long int req, void *buf,
707         BlockCompletionFunc *cb, void *opaque)
708 {
709     IscsiLun *iscsilun = bs->opaque;
710     struct iscsi_context *iscsi = iscsilun->iscsi;
711     struct iscsi_data data;
712     IscsiAIOCB *acb;
713 
714     assert(req == SG_IO);
715 
716     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
717 
718     acb->iscsilun = iscsilun;
719     acb->bh          = NULL;
720     acb->status      = -EINPROGRESS;
721     acb->buf         = NULL;
722     acb->ioh         = buf;
723 
724     acb->task = malloc(sizeof(struct scsi_task));
725     if (acb->task == NULL) {
726         error_report("iSCSI: Failed to allocate task for scsi command. %s",
727                      iscsi_get_error(iscsi));
728         qemu_aio_unref(acb);
729         return NULL;
730     }
731     memset(acb->task, 0, sizeof(struct scsi_task));
732 
733     switch (acb->ioh->dxfer_direction) {
734     case SG_DXFER_TO_DEV:
735         acb->task->xfer_dir = SCSI_XFER_WRITE;
736         break;
737     case SG_DXFER_FROM_DEV:
738         acb->task->xfer_dir = SCSI_XFER_READ;
739         break;
740     default:
741         acb->task->xfer_dir = SCSI_XFER_NONE;
742         break;
743     }
744 
745     acb->task->cdb_size = acb->ioh->cmd_len;
746     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
747     acb->task->expxferlen = acb->ioh->dxfer_len;
748 
749     data.size = 0;
750     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
751         if (acb->ioh->iovec_count == 0) {
752             data.data = acb->ioh->dxferp;
753             data.size = acb->ioh->dxfer_len;
754         } else {
755             scsi_task_set_iov_out(acb->task,
756                                  (struct scsi_iovec *) acb->ioh->dxferp,
757                                  acb->ioh->iovec_count);
758         }
759     }
760 
761     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
762                                  iscsi_aio_ioctl_cb,
763                                  (data.size > 0) ? &data : NULL,
764                                  acb) != 0) {
765         scsi_free_scsi_task(acb->task);
766         qemu_aio_unref(acb);
767         return NULL;
768     }
769 
770     /* tell libiscsi to read straight into the buffer we got from ioctl */
771     if (acb->task->xfer_dir == SCSI_XFER_READ) {
772         if (acb->ioh->iovec_count == 0) {
773             scsi_task_add_data_in_buffer(acb->task,
774                                          acb->ioh->dxfer_len,
775                                          acb->ioh->dxferp);
776         } else {
777             scsi_task_set_iov_in(acb->task,
778                                  (struct scsi_iovec *) acb->ioh->dxferp,
779                                  acb->ioh->iovec_count);
780         }
781     }
782 
783     iscsi_set_events(iscsilun);
784 
785     return &acb->common;
786 }
787 
788 static void ioctl_cb(void *opaque, int status)
789 {
790     int *p_status = opaque;
791     *p_status = status;
792 }
793 
794 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
795 {
796     IscsiLun *iscsilun = bs->opaque;
797     int status;
798 
799     switch (req) {
800     case SG_GET_VERSION_NUM:
801         *(int *)buf = 30000;
802         break;
803     case SG_GET_SCSI_ID:
804         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
805         break;
806     case SG_IO:
807         status = -EINPROGRESS;
808         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
809 
810         while (status == -EINPROGRESS) {
811             aio_poll(iscsilun->aio_context, true);
812         }
813 
814         return 0;
815     default:
816         return -1;
817     }
818     return 0;
819 }
820 #endif
821 
822 static int64_t
823 iscsi_getlength(BlockDriverState *bs)
824 {
825     IscsiLun *iscsilun = bs->opaque;
826     int64_t len;
827 
828     len  = iscsilun->num_blocks;
829     len *= iscsilun->block_size;
830 
831     return len;
832 }
833 
834 static int
835 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
836                                    int nb_sectors)
837 {
838     IscsiLun *iscsilun = bs->opaque;
839     struct IscsiTask iTask;
840     struct unmap_list list;
841 
842     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
843         return -EINVAL;
844     }
845 
846     if (!iscsilun->lbp.lbpu) {
847         /* UNMAP is not supported by the target */
848         return 0;
849     }
850 
851     list.lba = sector_qemu2lun(sector_num, iscsilun);
852     list.num = sector_qemu2lun(nb_sectors, iscsilun);
853 
854     iscsi_co_init_iscsitask(iscsilun, &iTask);
855 retry:
856     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
857                      iscsi_co_generic_cb, &iTask) == NULL) {
858         return -ENOMEM;
859     }
860 
861     while (!iTask.complete) {
862         iscsi_set_events(iscsilun);
863         qemu_coroutine_yield();
864     }
865 
866     if (iTask.task != NULL) {
867         scsi_free_scsi_task(iTask.task);
868         iTask.task = NULL;
869     }
870 
871     if (iTask.do_retry) {
872         iTask.complete = 0;
873         goto retry;
874     }
875 
876     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
877         /* the target might fail with a check condition if it
878            is not happy with the alignment of the UNMAP request
879            we silently fail in this case */
880         return 0;
881     }
882 
883     if (iTask.status != SCSI_STATUS_GOOD) {
884         return -EIO;
885     }
886 
887     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
888 
889     return 0;
890 }
891 
892 static int
893 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
894                                    int nb_sectors, BdrvRequestFlags flags)
895 {
896     IscsiLun *iscsilun = bs->opaque;
897     struct IscsiTask iTask;
898     uint64_t lba;
899     uint32_t nb_blocks;
900     bool use_16_for_ws = iscsilun->use_16_for_rw;
901 
902     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
903         return -EINVAL;
904     }
905 
906     if (flags & BDRV_REQ_MAY_UNMAP) {
907         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
908             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
909             use_16_for_ws = true;
910         }
911         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
912             /* WRITESAME16 with UNMAP is not supported by the target,
913              * fall back and try WRITESAME10/16 without UNMAP */
914             flags &= ~BDRV_REQ_MAY_UNMAP;
915             use_16_for_ws = iscsilun->use_16_for_rw;
916         }
917     }
918 
919     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
920         /* WRITESAME without UNMAP is not supported by the target */
921         return -ENOTSUP;
922     }
923 
924     lba = sector_qemu2lun(sector_num, iscsilun);
925     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
926 
927     if (iscsilun->zeroblock == NULL) {
928         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
929         if (iscsilun->zeroblock == NULL) {
930             return -ENOMEM;
931         }
932     }
933 
934     iscsi_co_init_iscsitask(iscsilun, &iTask);
935     iTask.force_next_flush = true;
936 retry:
937     if (use_16_for_ws) {
938         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
939                                             iscsilun->zeroblock, iscsilun->block_size,
940                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
941                                             0, 0, iscsi_co_generic_cb, &iTask);
942     } else {
943         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
944                                             iscsilun->zeroblock, iscsilun->block_size,
945                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
946                                             0, 0, iscsi_co_generic_cb, &iTask);
947     }
948     if (iTask.task == NULL) {
949         return -ENOMEM;
950     }
951 
952     while (!iTask.complete) {
953         iscsi_set_events(iscsilun);
954         qemu_coroutine_yield();
955     }
956 
957     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
958         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
959         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
960          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
961         /* WRITE SAME is not supported by the target */
962         iscsilun->has_write_same = false;
963         scsi_free_scsi_task(iTask.task);
964         return -ENOTSUP;
965     }
966 
967     if (iTask.task != NULL) {
968         scsi_free_scsi_task(iTask.task);
969         iTask.task = NULL;
970     }
971 
972     if (iTask.do_retry) {
973         iTask.complete = 0;
974         goto retry;
975     }
976 
977     if (iTask.status != SCSI_STATUS_GOOD) {
978         return -EIO;
979     }
980 
981     if (flags & BDRV_REQ_MAY_UNMAP) {
982         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
983     } else {
984         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
985     }
986 
987     return 0;
988 }
989 
990 static void parse_chap(struct iscsi_context *iscsi, const char *target,
991                        Error **errp)
992 {
993     QemuOptsList *list;
994     QemuOpts *opts;
995     const char *user = NULL;
996     const char *password = NULL;
997 
998     list = qemu_find_opts("iscsi");
999     if (!list) {
1000         return;
1001     }
1002 
1003     opts = qemu_opts_find(list, target);
1004     if (opts == NULL) {
1005         opts = QTAILQ_FIRST(&list->head);
1006         if (!opts) {
1007             return;
1008         }
1009     }
1010 
1011     user = qemu_opt_get(opts, "user");
1012     if (!user) {
1013         return;
1014     }
1015 
1016     password = qemu_opt_get(opts, "password");
1017     if (!password) {
1018         error_setg(errp, "CHAP username specified but no password was given");
1019         return;
1020     }
1021 
1022     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1023         error_setg(errp, "Failed to set initiator username and password");
1024     }
1025 }
1026 
1027 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1028                                 Error **errp)
1029 {
1030     QemuOptsList *list;
1031     QemuOpts *opts;
1032     const char *digest = NULL;
1033 
1034     list = qemu_find_opts("iscsi");
1035     if (!list) {
1036         return;
1037     }
1038 
1039     opts = qemu_opts_find(list, target);
1040     if (opts == NULL) {
1041         opts = QTAILQ_FIRST(&list->head);
1042         if (!opts) {
1043             return;
1044         }
1045     }
1046 
1047     digest = qemu_opt_get(opts, "header-digest");
1048     if (!digest) {
1049         return;
1050     }
1051 
1052     if (!strcmp(digest, "CRC32C")) {
1053         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1054     } else if (!strcmp(digest, "NONE")) {
1055         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1056     } else if (!strcmp(digest, "CRC32C-NONE")) {
1057         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1058     } else if (!strcmp(digest, "NONE-CRC32C")) {
1059         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1060     } else {
1061         error_setg(errp, "Invalid header-digest setting : %s", digest);
1062     }
1063 }
1064 
1065 static char *parse_initiator_name(const char *target)
1066 {
1067     QemuOptsList *list;
1068     QemuOpts *opts;
1069     const char *name;
1070     char *iscsi_name;
1071     UuidInfo *uuid_info;
1072 
1073     list = qemu_find_opts("iscsi");
1074     if (list) {
1075         opts = qemu_opts_find(list, target);
1076         if (!opts) {
1077             opts = QTAILQ_FIRST(&list->head);
1078         }
1079         if (opts) {
1080             name = qemu_opt_get(opts, "initiator-name");
1081             if (name) {
1082                 return g_strdup(name);
1083             }
1084         }
1085     }
1086 
1087     uuid_info = qmp_query_uuid(NULL);
1088     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1089         name = qemu_get_vm_name();
1090     } else {
1091         name = uuid_info->UUID;
1092     }
1093     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1094                                  name ? ":" : "", name ? name : "");
1095     qapi_free_UuidInfo(uuid_info);
1096     return iscsi_name;
1097 }
1098 
1099 static void iscsi_nop_timed_event(void *opaque)
1100 {
1101     IscsiLun *iscsilun = opaque;
1102 
1103     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1104         error_report("iSCSI: NOP timeout. Reconnecting...");
1105         iscsi_reconnect(iscsilun->iscsi);
1106     }
1107 
1108     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1109         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1110         return;
1111     }
1112 
1113     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1114     iscsi_set_events(iscsilun);
1115 }
1116 
1117 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1118 {
1119     struct scsi_task *task = NULL;
1120     struct scsi_readcapacity10 *rc10 = NULL;
1121     struct scsi_readcapacity16 *rc16 = NULL;
1122     int retries = ISCSI_CMD_RETRIES;
1123 
1124     do {
1125         if (task != NULL) {
1126             scsi_free_scsi_task(task);
1127             task = NULL;
1128         }
1129 
1130         switch (iscsilun->type) {
1131         case TYPE_DISK:
1132             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1133             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1134                 rc16 = scsi_datain_unmarshall(task);
1135                 if (rc16 == NULL) {
1136                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1137                 } else {
1138                     iscsilun->block_size = rc16->block_length;
1139                     iscsilun->num_blocks = rc16->returned_lba + 1;
1140                     iscsilun->lbpme = !!rc16->lbpme;
1141                     iscsilun->lbprz = !!rc16->lbprz;
1142                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1143                 }
1144             }
1145             break;
1146         case TYPE_ROM:
1147             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1148             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1149                 rc10 = scsi_datain_unmarshall(task);
1150                 if (rc10 == NULL) {
1151                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1152                 } else {
1153                     iscsilun->block_size = rc10->block_size;
1154                     if (rc10->lba == 0) {
1155                         /* blank disk loaded */
1156                         iscsilun->num_blocks = 0;
1157                     } else {
1158                         iscsilun->num_blocks = rc10->lba + 1;
1159                     }
1160                 }
1161             }
1162             break;
1163         default:
1164             return;
1165         }
1166     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1167              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1168              && retries-- > 0);
1169 
1170     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1171         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1172     }
1173     if (task) {
1174         scsi_free_scsi_task(task);
1175     }
1176 }
1177 
1178 /* TODO Convert to fine grained options */
1179 static QemuOptsList runtime_opts = {
1180     .name = "iscsi",
1181     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1182     .desc = {
1183         {
1184             .name = "filename",
1185             .type = QEMU_OPT_STRING,
1186             .help = "URL to the iscsi image",
1187         },
1188         { /* end of list */ }
1189     },
1190 };
1191 
1192 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1193                                           int evpd, int pc, void **inq, Error **errp)
1194 {
1195     int full_size;
1196     struct scsi_task *task = NULL;
1197     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1198     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1199         goto fail;
1200     }
1201     full_size = scsi_datain_getfullsize(task);
1202     if (full_size > task->datain.size) {
1203         scsi_free_scsi_task(task);
1204 
1205         /* we need more data for the full list */
1206         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1207         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1208             goto fail;
1209         }
1210     }
1211 
1212     *inq = scsi_datain_unmarshall(task);
1213     if (*inq == NULL) {
1214         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1215         goto fail_with_err;
1216     }
1217 
1218     return task;
1219 
1220 fail:
1221     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1222                iscsi_get_error(iscsi));
1223 fail_with_err:
1224     if (task != NULL) {
1225         scsi_free_scsi_task(task);
1226     }
1227     return NULL;
1228 }
1229 
1230 static void iscsi_detach_aio_context(BlockDriverState *bs)
1231 {
1232     IscsiLun *iscsilun = bs->opaque;
1233 
1234     aio_set_fd_handler(iscsilun->aio_context,
1235                        iscsi_get_fd(iscsilun->iscsi),
1236                        NULL, NULL, NULL);
1237     iscsilun->events = 0;
1238 
1239     if (iscsilun->nop_timer) {
1240         timer_del(iscsilun->nop_timer);
1241         timer_free(iscsilun->nop_timer);
1242         iscsilun->nop_timer = NULL;
1243     }
1244     if (iscsilun->event_timer) {
1245         timer_del(iscsilun->event_timer);
1246         timer_free(iscsilun->event_timer);
1247         iscsilun->event_timer = NULL;
1248     }
1249 }
1250 
1251 static void iscsi_attach_aio_context(BlockDriverState *bs,
1252                                      AioContext *new_context)
1253 {
1254     IscsiLun *iscsilun = bs->opaque;
1255 
1256     iscsilun->aio_context = new_context;
1257     iscsi_set_events(iscsilun);
1258 
1259     /* Set up a timer for sending out iSCSI NOPs */
1260     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1261                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1262                                         iscsi_nop_timed_event, iscsilun);
1263     timer_mod(iscsilun->nop_timer,
1264               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1265 
1266     /* Prepare a timer for a delayed call to iscsi_set_events */
1267     iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1268                                           QEMU_CLOCK_REALTIME, SCALE_MS,
1269                                           iscsi_timed_set_events, iscsilun);
1270 }
1271 
1272 static void iscsi_modesense_sync(IscsiLun *iscsilun)
1273 {
1274     struct scsi_task *task;
1275     struct scsi_mode_sense *ms = NULL;
1276     iscsilun->write_protected = false;
1277     iscsilun->dpofua = false;
1278 
1279     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1280                                  1, SCSI_MODESENSE_PC_CURRENT,
1281                                  0x3F, 0, 255);
1282     if (task == NULL) {
1283         error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1284                      iscsi_get_error(iscsilun->iscsi));
1285         goto out;
1286     }
1287 
1288     if (task->status != SCSI_STATUS_GOOD) {
1289         error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1290         goto out;
1291     }
1292     ms = scsi_datain_unmarshall(task);
1293     if (!ms) {
1294         error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1295                      iscsi_get_error(iscsilun->iscsi));
1296         goto out;
1297     }
1298     iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1299     iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1300 
1301 out:
1302     if (task) {
1303         scsi_free_scsi_task(task);
1304     }
1305 }
1306 
1307 /*
1308  * We support iscsi url's on the form
1309  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1310  */
1311 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1312                       Error **errp)
1313 {
1314     IscsiLun *iscsilun = bs->opaque;
1315     struct iscsi_context *iscsi = NULL;
1316     struct iscsi_url *iscsi_url = NULL;
1317     struct scsi_task *task = NULL;
1318     struct scsi_inquiry_standard *inq = NULL;
1319     struct scsi_inquiry_supported_pages *inq_vpd;
1320     char *initiator_name = NULL;
1321     QemuOpts *opts;
1322     Error *local_err = NULL;
1323     const char *filename;
1324     int i, ret = 0;
1325 
1326     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1327         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1328                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1329                    "of 512", BDRV_SECTOR_SIZE);
1330         return -EINVAL;
1331     }
1332 
1333     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1334     qemu_opts_absorb_qdict(opts, options, &local_err);
1335     if (local_err) {
1336         error_propagate(errp, local_err);
1337         ret = -EINVAL;
1338         goto out;
1339     }
1340 
1341     filename = qemu_opt_get(opts, "filename");
1342 
1343     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1344     if (iscsi_url == NULL) {
1345         error_setg(errp, "Failed to parse URL : %s", filename);
1346         ret = -EINVAL;
1347         goto out;
1348     }
1349 
1350     memset(iscsilun, 0, sizeof(IscsiLun));
1351 
1352     initiator_name = parse_initiator_name(iscsi_url->target);
1353 
1354     iscsi = iscsi_create_context(initiator_name);
1355     if (iscsi == NULL) {
1356         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1357         ret = -ENOMEM;
1358         goto out;
1359     }
1360 
1361     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1362         error_setg(errp, "iSCSI: Failed to set target name.");
1363         ret = -EINVAL;
1364         goto out;
1365     }
1366 
1367     if (iscsi_url->user[0] != '\0') {
1368         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1369                                               iscsi_url->passwd);
1370         if (ret != 0) {
1371             error_setg(errp, "Failed to set initiator username and password");
1372             ret = -EINVAL;
1373             goto out;
1374         }
1375     }
1376 
1377     /* check if we got CHAP username/password via the options */
1378     parse_chap(iscsi, iscsi_url->target, &local_err);
1379     if (local_err != NULL) {
1380         error_propagate(errp, local_err);
1381         ret = -EINVAL;
1382         goto out;
1383     }
1384 
1385     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1386         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1387         ret = -EINVAL;
1388         goto out;
1389     }
1390 
1391     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1392 
1393     /* check if we got HEADER_DIGEST via the options */
1394     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1395     if (local_err != NULL) {
1396         error_propagate(errp, local_err);
1397         ret = -EINVAL;
1398         goto out;
1399     }
1400 
1401     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1402         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1403             iscsi_get_error(iscsi));
1404         ret = -EINVAL;
1405         goto out;
1406     }
1407 
1408     iscsilun->iscsi = iscsi;
1409     iscsilun->aio_context = bdrv_get_aio_context(bs);
1410     iscsilun->lun   = iscsi_url->lun;
1411     iscsilun->has_write_same = true;
1412 
1413     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1414                             (void **) &inq, errp);
1415     if (task == NULL) {
1416         ret = -EINVAL;
1417         goto out;
1418     }
1419     iscsilun->type = inq->periperal_device_type;
1420     scsi_free_scsi_task(task);
1421     task = NULL;
1422 
1423     iscsi_modesense_sync(iscsilun);
1424 
1425     /* Check the write protect flag of the LUN if we want to write */
1426     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1427         iscsilun->write_protected) {
1428         error_setg(errp, "Cannot open a write protected LUN as read-write");
1429         ret = -EACCES;
1430         goto out;
1431     }
1432 
1433     iscsi_readcapacity_sync(iscsilun, &local_err);
1434     if (local_err != NULL) {
1435         error_propagate(errp, local_err);
1436         ret = -EINVAL;
1437         goto out;
1438     }
1439     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1440     bs->request_alignment = iscsilun->block_size;
1441 
1442     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1443      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1444      * will try to read from the device to guess the image format.
1445      */
1446     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1447         bs->sg = 1;
1448     }
1449 
1450     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1451                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1452                             (void **) &inq_vpd, errp);
1453     if (task == NULL) {
1454         ret = -EINVAL;
1455         goto out;
1456     }
1457     for (i = 0; i < inq_vpd->num_pages; i++) {
1458         struct scsi_task *inq_task;
1459         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1460         struct scsi_inquiry_block_limits *inq_bl;
1461         switch (inq_vpd->pages[i]) {
1462         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1463             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1464                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1465                                         (void **) &inq_lbp, errp);
1466             if (inq_task == NULL) {
1467                 ret = -EINVAL;
1468                 goto out;
1469             }
1470             memcpy(&iscsilun->lbp, inq_lbp,
1471                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1472             scsi_free_scsi_task(inq_task);
1473             break;
1474         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1475             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1476                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1477                                     (void **) &inq_bl, errp);
1478             if (inq_task == NULL) {
1479                 ret = -EINVAL;
1480                 goto out;
1481             }
1482             memcpy(&iscsilun->bl, inq_bl,
1483                    sizeof(struct scsi_inquiry_block_limits));
1484             scsi_free_scsi_task(inq_task);
1485             break;
1486         default:
1487             break;
1488         }
1489     }
1490     scsi_free_scsi_task(task);
1491     task = NULL;
1492 
1493     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1494 
1495     /* Guess the internal cluster (page) size of the iscsi target by the means
1496      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1497      * reasonable size */
1498     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1499         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1500         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1501                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1502         if (iscsilun->lbprz) {
1503             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1504             if (iscsilun->allocationmap == NULL) {
1505                 ret = -ENOMEM;
1506             }
1507         }
1508     }
1509 
1510 out:
1511     qemu_opts_del(opts);
1512     g_free(initiator_name);
1513     if (iscsi_url != NULL) {
1514         iscsi_destroy_url(iscsi_url);
1515     }
1516     if (task != NULL) {
1517         scsi_free_scsi_task(task);
1518     }
1519 
1520     if (ret) {
1521         if (iscsi != NULL) {
1522             if (iscsi_is_logged_in(iscsi)) {
1523                 iscsi_logout_sync(iscsi);
1524             }
1525             iscsi_destroy_context(iscsi);
1526         }
1527         memset(iscsilun, 0, sizeof(IscsiLun));
1528     }
1529     return ret;
1530 }
1531 
1532 static void iscsi_close(BlockDriverState *bs)
1533 {
1534     IscsiLun *iscsilun = bs->opaque;
1535     struct iscsi_context *iscsi = iscsilun->iscsi;
1536 
1537     iscsi_detach_aio_context(bs);
1538     if (iscsi_is_logged_in(iscsi)) {
1539         iscsi_logout_sync(iscsi);
1540     }
1541     iscsi_destroy_context(iscsi);
1542     g_free(iscsilun->zeroblock);
1543     g_free(iscsilun->allocationmap);
1544     memset(iscsilun, 0, sizeof(IscsiLun));
1545 }
1546 
1547 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
1548 {
1549     return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
1550 }
1551 
1552 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1553 {
1554     /* We don't actually refresh here, but just return data queried in
1555      * iscsi_open(): iscsi targets don't change their limits. */
1556 
1557     IscsiLun *iscsilun = bs->opaque;
1558     uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1559 
1560     if (iscsilun->bl.max_xfer_len) {
1561         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1562     }
1563 
1564     bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
1565 
1566     if (iscsilun->lbp.lbpu) {
1567         if (iscsilun->bl.max_unmap < 0xffffffff) {
1568             bs->bl.max_discard =
1569                 sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
1570         }
1571         bs->bl.discard_alignment =
1572             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1573     }
1574 
1575     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1576         bs->bl.max_write_zeroes =
1577             sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
1578     }
1579     if (iscsilun->lbp.lbpws) {
1580         bs->bl.write_zeroes_alignment =
1581             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1582     }
1583     bs->bl.opt_transfer_length =
1584         sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
1585 }
1586 
1587 /* Note that this will not re-establish a connection with an iSCSI target - it
1588  * is effectively a NOP.  */
1589 static int iscsi_reopen_prepare(BDRVReopenState *state,
1590                                 BlockReopenQueue *queue, Error **errp)
1591 {
1592     IscsiLun *iscsilun = state->bs->opaque;
1593 
1594     if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1595         error_setg(errp, "Cannot open a write protected LUN as read-write");
1596         return -EACCES;
1597     }
1598     return 0;
1599 }
1600 
1601 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1602 {
1603     IscsiLun *iscsilun = bs->opaque;
1604     Error *local_err = NULL;
1605 
1606     if (iscsilun->type != TYPE_DISK) {
1607         return -ENOTSUP;
1608     }
1609 
1610     iscsi_readcapacity_sync(iscsilun, &local_err);
1611     if (local_err != NULL) {
1612         error_free(local_err);
1613         return -EIO;
1614     }
1615 
1616     if (offset > iscsi_getlength(bs)) {
1617         return -EINVAL;
1618     }
1619 
1620     if (iscsilun->allocationmap != NULL) {
1621         g_free(iscsilun->allocationmap);
1622         iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1623     }
1624 
1625     return 0;
1626 }
1627 
1628 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1629 {
1630     int ret = 0;
1631     int64_t total_size = 0;
1632     BlockDriverState *bs;
1633     IscsiLun *iscsilun = NULL;
1634     QDict *bs_options;
1635 
1636     bs = bdrv_new();
1637 
1638     /* Read out options */
1639     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1640                               BDRV_SECTOR_SIZE);
1641     bs->opaque = g_new0(struct IscsiLun, 1);
1642     iscsilun = bs->opaque;
1643 
1644     bs_options = qdict_new();
1645     qdict_put(bs_options, "filename", qstring_from_str(filename));
1646     ret = iscsi_open(bs, bs_options, 0, NULL);
1647     QDECREF(bs_options);
1648 
1649     if (ret != 0) {
1650         goto out;
1651     }
1652     iscsi_detach_aio_context(bs);
1653     if (iscsilun->type != TYPE_DISK) {
1654         ret = -ENODEV;
1655         goto out;
1656     }
1657     if (bs->total_sectors < total_size) {
1658         ret = -ENOSPC;
1659         goto out;
1660     }
1661 
1662     ret = 0;
1663 out:
1664     if (iscsilun->iscsi != NULL) {
1665         iscsi_destroy_context(iscsilun->iscsi);
1666     }
1667     g_free(bs->opaque);
1668     bs->opaque = NULL;
1669     bdrv_unref(bs);
1670     return ret;
1671 }
1672 
1673 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1674 {
1675     IscsiLun *iscsilun = bs->opaque;
1676     bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
1677     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1678     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1679     return 0;
1680 }
1681 
1682 static QemuOptsList iscsi_create_opts = {
1683     .name = "iscsi-create-opts",
1684     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1685     .desc = {
1686         {
1687             .name = BLOCK_OPT_SIZE,
1688             .type = QEMU_OPT_SIZE,
1689             .help = "Virtual disk size"
1690         },
1691         { /* end of list */ }
1692     }
1693 };
1694 
1695 static BlockDriver bdrv_iscsi = {
1696     .format_name     = "iscsi",
1697     .protocol_name   = "iscsi",
1698 
1699     .instance_size   = sizeof(IscsiLun),
1700     .bdrv_needs_filename = true,
1701     .bdrv_file_open  = iscsi_open,
1702     .bdrv_close      = iscsi_close,
1703     .bdrv_create     = iscsi_create,
1704     .create_opts     = &iscsi_create_opts,
1705     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1706 
1707     .bdrv_getlength  = iscsi_getlength,
1708     .bdrv_get_info   = iscsi_get_info,
1709     .bdrv_truncate   = iscsi_truncate,
1710     .bdrv_refresh_limits = iscsi_refresh_limits,
1711 
1712     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1713     .bdrv_co_discard      = iscsi_co_discard,
1714     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1715     .bdrv_co_readv         = iscsi_co_readv,
1716     .bdrv_co_writev        = iscsi_co_writev,
1717     .bdrv_co_flush_to_disk = iscsi_co_flush,
1718 
1719 #ifdef __linux__
1720     .bdrv_ioctl       = iscsi_ioctl,
1721     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1722 #endif
1723 
1724     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1725     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1726 };
1727 
1728 static QemuOptsList qemu_iscsi_opts = {
1729     .name = "iscsi",
1730     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1731     .desc = {
1732         {
1733             .name = "user",
1734             .type = QEMU_OPT_STRING,
1735             .help = "username for CHAP authentication to target",
1736         },{
1737             .name = "password",
1738             .type = QEMU_OPT_STRING,
1739             .help = "password for CHAP authentication to target",
1740         },{
1741             .name = "header-digest",
1742             .type = QEMU_OPT_STRING,
1743             .help = "HeaderDigest setting. "
1744                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1745         },{
1746             .name = "initiator-name",
1747             .type = QEMU_OPT_STRING,
1748             .help = "Initiator iqn name to use when connecting",
1749         },
1750         { /* end of list */ }
1751     },
1752 };
1753 
1754 static void iscsi_block_init(void)
1755 {
1756     bdrv_register(&bdrv_iscsi);
1757     qemu_add_opts(&qemu_iscsi_opts);
1758 }
1759 
1760 block_init(iscsi_block_init);
1761