xref: /openbmc/qemu/block/iscsi.c (revision 0bc12c4f)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 
42 #include <iscsi/iscsi.h>
43 #include <iscsi/scsi-lowlevel.h>
44 
45 #ifdef __linux__
46 #include <scsi/sg.h>
47 #include <block/scsi.h>
48 #endif
49 
50 typedef struct IscsiLun {
51     struct iscsi_context *iscsi;
52     AioContext *aio_context;
53     int lun;
54     enum scsi_inquiry_peripheral_device_type type;
55     int block_size;
56     uint64_t num_blocks;
57     int events;
58     QEMUTimer *nop_timer;
59     QEMUTimer *event_timer;
60     struct scsi_inquiry_logical_block_provisioning lbp;
61     struct scsi_inquiry_block_limits bl;
62     unsigned char *zeroblock;
63     unsigned long *allocationmap;
64     int cluster_sectors;
65     bool use_16_for_rw;
66     bool write_protected;
67     bool lbpme;
68     bool lbprz;
69     bool dpofua;
70     bool has_write_same;
71     bool force_next_flush;
72 } IscsiLun;
73 
74 typedef struct IscsiTask {
75     int status;
76     int complete;
77     int retries;
78     int do_retry;
79     struct scsi_task *task;
80     Coroutine *co;
81     QEMUBH *bh;
82     IscsiLun *iscsilun;
83     QEMUTimer retry_timer;
84     bool force_next_flush;
85 } IscsiTask;
86 
87 typedef struct IscsiAIOCB {
88     BlockAIOCB common;
89     QEMUIOVector *qiov;
90     QEMUBH *bh;
91     IscsiLun *iscsilun;
92     struct scsi_task *task;
93     uint8_t *buf;
94     int status;
95     int64_t sector_num;
96     int nb_sectors;
97 #ifdef __linux__
98     sg_io_hdr_t *ioh;
99 #endif
100 } IscsiAIOCB;
101 
102 #define EVENT_INTERVAL 250
103 #define NOP_INTERVAL 5000
104 #define MAX_NOP_FAILURES 3
105 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
106 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
107 
108 /* this threshold is a trade-off knob to choose between
109  * the potential additional overhead of an extra GET_LBA_STATUS request
110  * vs. unnecessarily reading a lot of zero sectors over the wire.
111  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
112  * sectors we check the allocation status of the area covered by the
113  * request first if the allocationmap indicates that the area might be
114  * unallocated. */
115 #define ISCSI_CHECKALLOC_THRES 64
116 
117 static void
118 iscsi_bh_cb(void *p)
119 {
120     IscsiAIOCB *acb = p;
121 
122     qemu_bh_delete(acb->bh);
123 
124     g_free(acb->buf);
125     acb->buf = NULL;
126 
127     acb->common.cb(acb->common.opaque, acb->status);
128 
129     if (acb->task != NULL) {
130         scsi_free_scsi_task(acb->task);
131         acb->task = NULL;
132     }
133 
134     qemu_aio_unref(acb);
135 }
136 
137 static void
138 iscsi_schedule_bh(IscsiAIOCB *acb)
139 {
140     if (acb->bh) {
141         return;
142     }
143     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
144     qemu_bh_schedule(acb->bh);
145 }
146 
147 static void iscsi_co_generic_bh_cb(void *opaque)
148 {
149     struct IscsiTask *iTask = opaque;
150     iTask->complete = 1;
151     qemu_bh_delete(iTask->bh);
152     qemu_coroutine_enter(iTask->co, NULL);
153 }
154 
155 static void iscsi_retry_timer_expired(void *opaque)
156 {
157     struct IscsiTask *iTask = opaque;
158     iTask->complete = 1;
159     if (iTask->co) {
160         qemu_coroutine_enter(iTask->co, NULL);
161     }
162 }
163 
164 static inline unsigned exp_random(double mean)
165 {
166     return -mean * log((double)rand() / RAND_MAX);
167 }
168 
169 static void
170 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
171                         void *command_data, void *opaque)
172 {
173     struct IscsiTask *iTask = opaque;
174     struct scsi_task *task = command_data;
175 
176     iTask->status = status;
177     iTask->do_retry = 0;
178     iTask->task = task;
179 
180     if (status != SCSI_STATUS_GOOD) {
181         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
182             if (status == SCSI_STATUS_CHECK_CONDITION
183                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
184                 error_report("iSCSI CheckCondition: %s",
185                              iscsi_get_error(iscsi));
186                 iTask->do_retry = 1;
187                 goto out;
188             }
189             /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced
190              * in libiscsi 1.10.0. Hardcode this value here to avoid
191              * the need to bump the libiscsi requirement to 1.10.0 */
192             if (status == SCSI_STATUS_BUSY || status == 0x28) {
193                 unsigned retry_time =
194                     exp_random(iscsi_retry_times[iTask->retries - 1]);
195                 error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s",
196                              iTask->retries, retry_time,
197                              iscsi_get_error(iscsi));
198                 aio_timer_init(iTask->iscsilun->aio_context,
199                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
200                                SCALE_MS, iscsi_retry_timer_expired, iTask);
201                 timer_mod(&iTask->retry_timer,
202                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
203                 iTask->do_retry = 1;
204                 return;
205             }
206         }
207         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
208     } else {
209         iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
210     }
211 
212 out:
213     if (iTask->co) {
214         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
215                                iscsi_co_generic_bh_cb, iTask);
216         qemu_bh_schedule(iTask->bh);
217     } else {
218         iTask->complete = 1;
219     }
220 }
221 
222 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
223 {
224     *iTask = (struct IscsiTask) {
225         .co         = qemu_coroutine_self(),
226         .iscsilun   = iscsilun,
227     };
228 }
229 
230 static void
231 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
232                     void *private_data)
233 {
234     IscsiAIOCB *acb = private_data;
235 
236     acb->status = -ECANCELED;
237     iscsi_schedule_bh(acb);
238 }
239 
240 static void
241 iscsi_aio_cancel(BlockAIOCB *blockacb)
242 {
243     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
244     IscsiLun *iscsilun = acb->iscsilun;
245 
246     if (acb->status != -EINPROGRESS) {
247         return;
248     }
249 
250     /* send a task mgmt call to the target to cancel the task on the target */
251     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
252                                      iscsi_abort_task_cb, acb);
253 
254 }
255 
256 static const AIOCBInfo iscsi_aiocb_info = {
257     .aiocb_size         = sizeof(IscsiAIOCB),
258     .cancel_async       = iscsi_aio_cancel,
259 };
260 
261 
262 static void iscsi_process_read(void *arg);
263 static void iscsi_process_write(void *arg);
264 
265 static void
266 iscsi_set_events(IscsiLun *iscsilun)
267 {
268     struct iscsi_context *iscsi = iscsilun->iscsi;
269     int ev = iscsi_which_events(iscsi);
270 
271     if (ev != iscsilun->events) {
272         aio_set_fd_handler(iscsilun->aio_context,
273                            iscsi_get_fd(iscsi),
274                            (ev & POLLIN) ? iscsi_process_read : NULL,
275                            (ev & POLLOUT) ? iscsi_process_write : NULL,
276                            iscsilun);
277         iscsilun->events = ev;
278     }
279 
280     /* newer versions of libiscsi may return zero events. In this
281      * case start a timer to ensure we are able to return to service
282      * once this situation changes. */
283     if (!ev) {
284         timer_mod(iscsilun->event_timer,
285                   qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
286     }
287 }
288 
289 static void iscsi_timed_set_events(void *opaque)
290 {
291     IscsiLun *iscsilun = opaque;
292     iscsi_set_events(iscsilun);
293 }
294 
295 static void
296 iscsi_process_read(void *arg)
297 {
298     IscsiLun *iscsilun = arg;
299     struct iscsi_context *iscsi = iscsilun->iscsi;
300 
301     iscsi_service(iscsi, POLLIN);
302     iscsi_set_events(iscsilun);
303 }
304 
305 static void
306 iscsi_process_write(void *arg)
307 {
308     IscsiLun *iscsilun = arg;
309     struct iscsi_context *iscsi = iscsilun->iscsi;
310 
311     iscsi_service(iscsi, POLLOUT);
312     iscsi_set_events(iscsilun);
313 }
314 
315 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
316 {
317     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
318 }
319 
320 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
321 {
322     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
323 }
324 
325 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
326                                       IscsiLun *iscsilun)
327 {
328     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
329         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
330             error_report("iSCSI misaligned request: "
331                          "iscsilun->block_size %u, sector_num %" PRIi64
332                          ", nb_sectors %d",
333                          iscsilun->block_size, sector_num, nb_sectors);
334             return 0;
335     }
336     return 1;
337 }
338 
339 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
340 {
341     return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
342                                                        iscsilun),
343                                        iscsilun->cluster_sectors));
344 }
345 
346 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
347                                     int nb_sectors)
348 {
349     if (iscsilun->allocationmap == NULL) {
350         return;
351     }
352     bitmap_set(iscsilun->allocationmap,
353                sector_num / iscsilun->cluster_sectors,
354                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
355 }
356 
357 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
358                                       int nb_sectors)
359 {
360     int64_t cluster_num, nb_clusters;
361     if (iscsilun->allocationmap == NULL) {
362         return;
363     }
364     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
365     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
366                   - cluster_num;
367     if (nb_clusters > 0) {
368         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
369     }
370 }
371 
372 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
373                                         int64_t sector_num, int nb_sectors,
374                                         QEMUIOVector *iov)
375 {
376     IscsiLun *iscsilun = bs->opaque;
377     struct IscsiTask iTask;
378     uint64_t lba;
379     uint32_t num_sectors;
380     int fua;
381 
382     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
383         return -EINVAL;
384     }
385 
386     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
387         error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
388                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
389         return -EINVAL;
390     }
391 
392     lba = sector_qemu2lun(sector_num, iscsilun);
393     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
394     iscsi_co_init_iscsitask(iscsilun, &iTask);
395 retry:
396     fua = iscsilun->dpofua && !bs->enable_write_cache;
397     iTask.force_next_flush = !fua;
398     if (iscsilun->use_16_for_rw) {
399         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
400                                         NULL, num_sectors * iscsilun->block_size,
401                                         iscsilun->block_size, 0, 0, fua, 0, 0,
402                                         iscsi_co_generic_cb, &iTask);
403     } else {
404         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
405                                         NULL, num_sectors * iscsilun->block_size,
406                                         iscsilun->block_size, 0, 0, fua, 0, 0,
407                                         iscsi_co_generic_cb, &iTask);
408     }
409     if (iTask.task == NULL) {
410         return -ENOMEM;
411     }
412     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
413                           iov->niov);
414     while (!iTask.complete) {
415         iscsi_set_events(iscsilun);
416         qemu_coroutine_yield();
417     }
418 
419     if (iTask.task != NULL) {
420         scsi_free_scsi_task(iTask.task);
421         iTask.task = NULL;
422     }
423 
424     if (iTask.do_retry) {
425         iTask.complete = 0;
426         goto retry;
427     }
428 
429     if (iTask.status != SCSI_STATUS_GOOD) {
430         return -EIO;
431     }
432 
433     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
434 
435     return 0;
436 }
437 
438 
439 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
440                                              int64_t sector_num, int nb_sectors)
441 {
442     unsigned long size;
443     if (iscsilun->allocationmap == NULL) {
444         return true;
445     }
446     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
447     return !(find_next_bit(iscsilun->allocationmap, size,
448                            sector_num / iscsilun->cluster_sectors) == size);
449 }
450 
451 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
452                                                   int64_t sector_num,
453                                                   int nb_sectors, int *pnum)
454 {
455     IscsiLun *iscsilun = bs->opaque;
456     struct scsi_get_lba_status *lbas = NULL;
457     struct scsi_lba_status_descriptor *lbasd = NULL;
458     struct IscsiTask iTask;
459     int64_t ret;
460 
461     iscsi_co_init_iscsitask(iscsilun, &iTask);
462 
463     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
464         ret = -EINVAL;
465         goto out;
466     }
467 
468     /* default to all sectors allocated */
469     ret = BDRV_BLOCK_DATA;
470     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
471     *pnum = nb_sectors;
472 
473     /* LUN does not support logical block provisioning */
474     if (!iscsilun->lbpme) {
475         goto out;
476     }
477 
478 retry:
479     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
480                                   sector_qemu2lun(sector_num, iscsilun),
481                                   8 + 16, iscsi_co_generic_cb,
482                                   &iTask) == NULL) {
483         ret = -ENOMEM;
484         goto out;
485     }
486 
487     while (!iTask.complete) {
488         iscsi_set_events(iscsilun);
489         qemu_coroutine_yield();
490     }
491 
492     if (iTask.do_retry) {
493         if (iTask.task != NULL) {
494             scsi_free_scsi_task(iTask.task);
495             iTask.task = NULL;
496         }
497         iTask.complete = 0;
498         goto retry;
499     }
500 
501     if (iTask.status != SCSI_STATUS_GOOD) {
502         /* in case the get_lba_status_callout fails (i.e.
503          * because the device is busy or the cmd is not
504          * supported) we pretend all blocks are allocated
505          * for backwards compatibility */
506         goto out;
507     }
508 
509     lbas = scsi_datain_unmarshall(iTask.task);
510     if (lbas == NULL) {
511         ret = -EIO;
512         goto out;
513     }
514 
515     lbasd = &lbas->descriptors[0];
516 
517     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
518         ret = -EIO;
519         goto out;
520     }
521 
522     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
523 
524     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
525         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
526         ret &= ~BDRV_BLOCK_DATA;
527         if (iscsilun->lbprz) {
528             ret |= BDRV_BLOCK_ZERO;
529         }
530     }
531 
532     if (ret & BDRV_BLOCK_ZERO) {
533         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
534     } else {
535         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
536     }
537 
538     if (*pnum > nb_sectors) {
539         *pnum = nb_sectors;
540     }
541 out:
542     if (iTask.task != NULL) {
543         scsi_free_scsi_task(iTask.task);
544     }
545     return ret;
546 }
547 
548 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
549                                        int64_t sector_num, int nb_sectors,
550                                        QEMUIOVector *iov)
551 {
552     IscsiLun *iscsilun = bs->opaque;
553     struct IscsiTask iTask;
554     uint64_t lba;
555     uint32_t num_sectors;
556 
557     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
558         return -EINVAL;
559     }
560 
561     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
562         error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
563                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
564         return -EINVAL;
565     }
566 
567     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
568         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
569         int64_t ret;
570         int pnum;
571         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
572         if (ret < 0) {
573             return ret;
574         }
575         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
576             qemu_iovec_memset(iov, 0, 0x00, iov->size);
577             return 0;
578         }
579     }
580 
581     lba = sector_qemu2lun(sector_num, iscsilun);
582     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
583 
584     iscsi_co_init_iscsitask(iscsilun, &iTask);
585 retry:
586     if (iscsilun->use_16_for_rw) {
587         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
588                                        num_sectors * iscsilun->block_size,
589                                        iscsilun->block_size, 0, 0, 0, 0, 0,
590                                        iscsi_co_generic_cb, &iTask);
591     } else {
592         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
593                                        num_sectors * iscsilun->block_size,
594                                        iscsilun->block_size,
595                                        0, 0, 0, 0, 0,
596                                        iscsi_co_generic_cb, &iTask);
597     }
598     if (iTask.task == NULL) {
599         return -ENOMEM;
600     }
601     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
602 
603     while (!iTask.complete) {
604         iscsi_set_events(iscsilun);
605         qemu_coroutine_yield();
606     }
607 
608     if (iTask.task != NULL) {
609         scsi_free_scsi_task(iTask.task);
610         iTask.task = NULL;
611     }
612 
613     if (iTask.do_retry) {
614         iTask.complete = 0;
615         goto retry;
616     }
617 
618     if (iTask.status != SCSI_STATUS_GOOD) {
619         return -EIO;
620     }
621 
622     return 0;
623 }
624 
625 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
626 {
627     IscsiLun *iscsilun = bs->opaque;
628     struct IscsiTask iTask;
629 
630     if (bs->sg) {
631         return 0;
632     }
633 
634     if (!iscsilun->force_next_flush) {
635         return 0;
636     }
637     iscsilun->force_next_flush = false;
638 
639     iscsi_co_init_iscsitask(iscsilun, &iTask);
640 retry:
641     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
642                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
643         return -ENOMEM;
644     }
645 
646     while (!iTask.complete) {
647         iscsi_set_events(iscsilun);
648         qemu_coroutine_yield();
649     }
650 
651     if (iTask.task != NULL) {
652         scsi_free_scsi_task(iTask.task);
653         iTask.task = NULL;
654     }
655 
656     if (iTask.do_retry) {
657         iTask.complete = 0;
658         goto retry;
659     }
660 
661     if (iTask.status != SCSI_STATUS_GOOD) {
662         return -EIO;
663     }
664 
665     return 0;
666 }
667 
668 #ifdef __linux__
669 static void
670 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
671                      void *command_data, void *opaque)
672 {
673     IscsiAIOCB *acb = opaque;
674 
675     g_free(acb->buf);
676     acb->buf = NULL;
677 
678     acb->status = 0;
679     if (status < 0) {
680         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
681                      iscsi_get_error(iscsi));
682         acb->status = -EIO;
683     }
684 
685     acb->ioh->driver_status = 0;
686     acb->ioh->host_status   = 0;
687     acb->ioh->resid         = 0;
688 
689 #define SG_ERR_DRIVER_SENSE    0x08
690 
691     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
692         int ss;
693 
694         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
695 
696         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
697         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
698              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
699         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
700     }
701 
702     iscsi_schedule_bh(acb);
703 }
704 
705 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
706         unsigned long int req, void *buf,
707         BlockCompletionFunc *cb, void *opaque)
708 {
709     IscsiLun *iscsilun = bs->opaque;
710     struct iscsi_context *iscsi = iscsilun->iscsi;
711     struct iscsi_data data;
712     IscsiAIOCB *acb;
713 
714     assert(req == SG_IO);
715 
716     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
717 
718     acb->iscsilun = iscsilun;
719     acb->bh          = NULL;
720     acb->status      = -EINPROGRESS;
721     acb->buf         = NULL;
722     acb->ioh         = buf;
723 
724     acb->task = malloc(sizeof(struct scsi_task));
725     if (acb->task == NULL) {
726         error_report("iSCSI: Failed to allocate task for scsi command. %s",
727                      iscsi_get_error(iscsi));
728         qemu_aio_unref(acb);
729         return NULL;
730     }
731     memset(acb->task, 0, sizeof(struct scsi_task));
732 
733     switch (acb->ioh->dxfer_direction) {
734     case SG_DXFER_TO_DEV:
735         acb->task->xfer_dir = SCSI_XFER_WRITE;
736         break;
737     case SG_DXFER_FROM_DEV:
738         acb->task->xfer_dir = SCSI_XFER_READ;
739         break;
740     default:
741         acb->task->xfer_dir = SCSI_XFER_NONE;
742         break;
743     }
744 
745     acb->task->cdb_size = acb->ioh->cmd_len;
746     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
747     acb->task->expxferlen = acb->ioh->dxfer_len;
748 
749     data.size = 0;
750     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
751         if (acb->ioh->iovec_count == 0) {
752             data.data = acb->ioh->dxferp;
753             data.size = acb->ioh->dxfer_len;
754         } else {
755             scsi_task_set_iov_out(acb->task,
756                                  (struct scsi_iovec *) acb->ioh->dxferp,
757                                  acb->ioh->iovec_count);
758         }
759     }
760 
761     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
762                                  iscsi_aio_ioctl_cb,
763                                  (data.size > 0) ? &data : NULL,
764                                  acb) != 0) {
765         scsi_free_scsi_task(acb->task);
766         qemu_aio_unref(acb);
767         return NULL;
768     }
769 
770     /* tell libiscsi to read straight into the buffer we got from ioctl */
771     if (acb->task->xfer_dir == SCSI_XFER_READ) {
772         if (acb->ioh->iovec_count == 0) {
773             scsi_task_add_data_in_buffer(acb->task,
774                                          acb->ioh->dxfer_len,
775                                          acb->ioh->dxferp);
776         } else {
777             scsi_task_set_iov_in(acb->task,
778                                  (struct scsi_iovec *) acb->ioh->dxferp,
779                                  acb->ioh->iovec_count);
780         }
781     }
782 
783     iscsi_set_events(iscsilun);
784 
785     return &acb->common;
786 }
787 
788 static void ioctl_cb(void *opaque, int status)
789 {
790     int *p_status = opaque;
791     *p_status = status;
792 }
793 
794 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
795 {
796     IscsiLun *iscsilun = bs->opaque;
797     int status;
798 
799     switch (req) {
800     case SG_GET_VERSION_NUM:
801         *(int *)buf = 30000;
802         break;
803     case SG_GET_SCSI_ID:
804         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
805         break;
806     case SG_IO:
807         status = -EINPROGRESS;
808         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
809 
810         while (status == -EINPROGRESS) {
811             aio_poll(iscsilun->aio_context, true);
812         }
813 
814         return 0;
815     default:
816         return -1;
817     }
818     return 0;
819 }
820 #endif
821 
822 static int64_t
823 iscsi_getlength(BlockDriverState *bs)
824 {
825     IscsiLun *iscsilun = bs->opaque;
826     int64_t len;
827 
828     len  = iscsilun->num_blocks;
829     len *= iscsilun->block_size;
830 
831     return len;
832 }
833 
834 static int
835 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
836                                    int nb_sectors)
837 {
838     IscsiLun *iscsilun = bs->opaque;
839     struct IscsiTask iTask;
840     struct unmap_list list;
841 
842     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
843         return -EINVAL;
844     }
845 
846     if (!iscsilun->lbp.lbpu) {
847         /* UNMAP is not supported by the target */
848         return 0;
849     }
850 
851     list.lba = sector_qemu2lun(sector_num, iscsilun);
852     list.num = sector_qemu2lun(nb_sectors, iscsilun);
853 
854     iscsi_co_init_iscsitask(iscsilun, &iTask);
855 retry:
856     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
857                      iscsi_co_generic_cb, &iTask) == NULL) {
858         return -ENOMEM;
859     }
860 
861     while (!iTask.complete) {
862         iscsi_set_events(iscsilun);
863         qemu_coroutine_yield();
864     }
865 
866     if (iTask.task != NULL) {
867         scsi_free_scsi_task(iTask.task);
868         iTask.task = NULL;
869     }
870 
871     if (iTask.do_retry) {
872         iTask.complete = 0;
873         goto retry;
874     }
875 
876     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
877         /* the target might fail with a check condition if it
878            is not happy with the alignment of the UNMAP request
879            we silently fail in this case */
880         return 0;
881     }
882 
883     if (iTask.status != SCSI_STATUS_GOOD) {
884         return -EIO;
885     }
886 
887     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
888 
889     return 0;
890 }
891 
892 static int
893 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
894                                    int nb_sectors, BdrvRequestFlags flags)
895 {
896     IscsiLun *iscsilun = bs->opaque;
897     struct IscsiTask iTask;
898     uint64_t lba;
899     uint32_t nb_blocks;
900     bool use_16_for_ws = iscsilun->use_16_for_rw;
901 
902     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
903         return -EINVAL;
904     }
905 
906     if (flags & BDRV_REQ_MAY_UNMAP) {
907         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
908             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
909             use_16_for_ws = true;
910         }
911         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
912             /* WRITESAME16 with UNMAP is not supported by the target,
913              * fall back and try WRITESAME10/16 without UNMAP */
914             flags &= ~BDRV_REQ_MAY_UNMAP;
915             use_16_for_ws = iscsilun->use_16_for_rw;
916         }
917     }
918 
919     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
920         /* WRITESAME without UNMAP is not supported by the target */
921         return -ENOTSUP;
922     }
923 
924     lba = sector_qemu2lun(sector_num, iscsilun);
925     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
926 
927     if (iscsilun->zeroblock == NULL) {
928         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
929         if (iscsilun->zeroblock == NULL) {
930             return -ENOMEM;
931         }
932     }
933 
934     iscsi_co_init_iscsitask(iscsilun, &iTask);
935     iTask.force_next_flush = true;
936 retry:
937     if (use_16_for_ws) {
938         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
939                                             iscsilun->zeroblock, iscsilun->block_size,
940                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
941                                             0, 0, iscsi_co_generic_cb, &iTask);
942     } else {
943         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
944                                             iscsilun->zeroblock, iscsilun->block_size,
945                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
946                                             0, 0, iscsi_co_generic_cb, &iTask);
947     }
948     if (iTask.task == NULL) {
949         return -ENOMEM;
950     }
951 
952     while (!iTask.complete) {
953         iscsi_set_events(iscsilun);
954         qemu_coroutine_yield();
955     }
956 
957     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
958         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
959         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
960          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
961         /* WRITE SAME is not supported by the target */
962         iscsilun->has_write_same = false;
963         scsi_free_scsi_task(iTask.task);
964         return -ENOTSUP;
965     }
966 
967     if (iTask.task != NULL) {
968         scsi_free_scsi_task(iTask.task);
969         iTask.task = NULL;
970     }
971 
972     if (iTask.do_retry) {
973         iTask.complete = 0;
974         goto retry;
975     }
976 
977     if (iTask.status != SCSI_STATUS_GOOD) {
978         return -EIO;
979     }
980 
981     if (flags & BDRV_REQ_MAY_UNMAP) {
982         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
983     } else {
984         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
985     }
986 
987     return 0;
988 }
989 
990 static void parse_chap(struct iscsi_context *iscsi, const char *target,
991                        Error **errp)
992 {
993     QemuOptsList *list;
994     QemuOpts *opts;
995     const char *user = NULL;
996     const char *password = NULL;
997 
998     list = qemu_find_opts("iscsi");
999     if (!list) {
1000         return;
1001     }
1002 
1003     opts = qemu_opts_find(list, target);
1004     if (opts == NULL) {
1005         opts = QTAILQ_FIRST(&list->head);
1006         if (!opts) {
1007             return;
1008         }
1009     }
1010 
1011     user = qemu_opt_get(opts, "user");
1012     if (!user) {
1013         return;
1014     }
1015 
1016     password = qemu_opt_get(opts, "password");
1017     if (!password) {
1018         error_setg(errp, "CHAP username specified but no password was given");
1019         return;
1020     }
1021 
1022     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1023         error_setg(errp, "Failed to set initiator username and password");
1024     }
1025 }
1026 
1027 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1028                                 Error **errp)
1029 {
1030     QemuOptsList *list;
1031     QemuOpts *opts;
1032     const char *digest = NULL;
1033 
1034     list = qemu_find_opts("iscsi");
1035     if (!list) {
1036         return;
1037     }
1038 
1039     opts = qemu_opts_find(list, target);
1040     if (opts == NULL) {
1041         opts = QTAILQ_FIRST(&list->head);
1042         if (!opts) {
1043             return;
1044         }
1045     }
1046 
1047     digest = qemu_opt_get(opts, "header-digest");
1048     if (!digest) {
1049         return;
1050     }
1051 
1052     if (!strcmp(digest, "CRC32C")) {
1053         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1054     } else if (!strcmp(digest, "NONE")) {
1055         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1056     } else if (!strcmp(digest, "CRC32C-NONE")) {
1057         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1058     } else if (!strcmp(digest, "NONE-CRC32C")) {
1059         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1060     } else {
1061         error_setg(errp, "Invalid header-digest setting : %s", digest);
1062     }
1063 }
1064 
1065 static char *parse_initiator_name(const char *target)
1066 {
1067     QemuOptsList *list;
1068     QemuOpts *opts;
1069     const char *name;
1070     char *iscsi_name;
1071     UuidInfo *uuid_info;
1072 
1073     list = qemu_find_opts("iscsi");
1074     if (list) {
1075         opts = qemu_opts_find(list, target);
1076         if (!opts) {
1077             opts = QTAILQ_FIRST(&list->head);
1078         }
1079         if (opts) {
1080             name = qemu_opt_get(opts, "initiator-name");
1081             if (name) {
1082                 return g_strdup(name);
1083             }
1084         }
1085     }
1086 
1087     uuid_info = qmp_query_uuid(NULL);
1088     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1089         name = qemu_get_vm_name();
1090     } else {
1091         name = uuid_info->UUID;
1092     }
1093     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1094                                  name ? ":" : "", name ? name : "");
1095     qapi_free_UuidInfo(uuid_info);
1096     return iscsi_name;
1097 }
1098 
1099 static void iscsi_nop_timed_event(void *opaque)
1100 {
1101     IscsiLun *iscsilun = opaque;
1102 
1103     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1104         error_report("iSCSI: NOP timeout. Reconnecting...");
1105         iscsi_reconnect(iscsilun->iscsi);
1106     }
1107 
1108     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1109         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1110         return;
1111     }
1112 
1113     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1114     iscsi_set_events(iscsilun);
1115 }
1116 
1117 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1118 {
1119     struct scsi_task *task = NULL;
1120     struct scsi_readcapacity10 *rc10 = NULL;
1121     struct scsi_readcapacity16 *rc16 = NULL;
1122     int retries = ISCSI_CMD_RETRIES;
1123 
1124     do {
1125         if (task != NULL) {
1126             scsi_free_scsi_task(task);
1127             task = NULL;
1128         }
1129 
1130         switch (iscsilun->type) {
1131         case TYPE_DISK:
1132             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1133             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1134                 rc16 = scsi_datain_unmarshall(task);
1135                 if (rc16 == NULL) {
1136                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1137                 } else {
1138                     iscsilun->block_size = rc16->block_length;
1139                     iscsilun->num_blocks = rc16->returned_lba + 1;
1140                     iscsilun->lbpme = !!rc16->lbpme;
1141                     iscsilun->lbprz = !!rc16->lbprz;
1142                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1143                 }
1144             }
1145             break;
1146         case TYPE_ROM:
1147             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1148             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1149                 rc10 = scsi_datain_unmarshall(task);
1150                 if (rc10 == NULL) {
1151                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1152                 } else {
1153                     iscsilun->block_size = rc10->block_size;
1154                     if (rc10->lba == 0) {
1155                         /* blank disk loaded */
1156                         iscsilun->num_blocks = 0;
1157                     } else {
1158                         iscsilun->num_blocks = rc10->lba + 1;
1159                     }
1160                 }
1161             }
1162             break;
1163         default:
1164             return;
1165         }
1166     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1167              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1168              && retries-- > 0);
1169 
1170     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1171         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1172     }
1173     if (task) {
1174         scsi_free_scsi_task(task);
1175     }
1176 }
1177 
1178 /* TODO Convert to fine grained options */
1179 static QemuOptsList runtime_opts = {
1180     .name = "iscsi",
1181     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1182     .desc = {
1183         {
1184             .name = "filename",
1185             .type = QEMU_OPT_STRING,
1186             .help = "URL to the iscsi image",
1187         },
1188         { /* end of list */ }
1189     },
1190 };
1191 
1192 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1193                                           int evpd, int pc, void **inq, Error **errp)
1194 {
1195     int full_size;
1196     struct scsi_task *task = NULL;
1197     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1198     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1199         goto fail;
1200     }
1201     full_size = scsi_datain_getfullsize(task);
1202     if (full_size > task->datain.size) {
1203         scsi_free_scsi_task(task);
1204 
1205         /* we need more data for the full list */
1206         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1207         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1208             goto fail;
1209         }
1210     }
1211 
1212     *inq = scsi_datain_unmarshall(task);
1213     if (*inq == NULL) {
1214         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1215         goto fail_with_err;
1216     }
1217 
1218     return task;
1219 
1220 fail:
1221     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1222                iscsi_get_error(iscsi));
1223 fail_with_err:
1224     if (task != NULL) {
1225         scsi_free_scsi_task(task);
1226     }
1227     return NULL;
1228 }
1229 
1230 static void iscsi_detach_aio_context(BlockDriverState *bs)
1231 {
1232     IscsiLun *iscsilun = bs->opaque;
1233 
1234     aio_set_fd_handler(iscsilun->aio_context,
1235                        iscsi_get_fd(iscsilun->iscsi),
1236                        NULL, NULL, NULL);
1237     iscsilun->events = 0;
1238 
1239     if (iscsilun->nop_timer) {
1240         timer_del(iscsilun->nop_timer);
1241         timer_free(iscsilun->nop_timer);
1242         iscsilun->nop_timer = NULL;
1243     }
1244     if (iscsilun->event_timer) {
1245         timer_del(iscsilun->event_timer);
1246         timer_free(iscsilun->event_timer);
1247         iscsilun->event_timer = NULL;
1248     }
1249 }
1250 
1251 static void iscsi_attach_aio_context(BlockDriverState *bs,
1252                                      AioContext *new_context)
1253 {
1254     IscsiLun *iscsilun = bs->opaque;
1255 
1256     iscsilun->aio_context = new_context;
1257     iscsi_set_events(iscsilun);
1258 
1259     /* Set up a timer for sending out iSCSI NOPs */
1260     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1261                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1262                                         iscsi_nop_timed_event, iscsilun);
1263     timer_mod(iscsilun->nop_timer,
1264               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1265 
1266     /* Prepare a timer for a delayed call to iscsi_set_events */
1267     iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1268                                           QEMU_CLOCK_REALTIME, SCALE_MS,
1269                                           iscsi_timed_set_events, iscsilun);
1270 }
1271 
1272 static void iscsi_modesense_sync(IscsiLun *iscsilun)
1273 {
1274     struct scsi_task *task;
1275     struct scsi_mode_sense *ms = NULL;
1276     iscsilun->write_protected = false;
1277     iscsilun->dpofua = false;
1278 
1279     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1280                                  1, SCSI_MODESENSE_PC_CURRENT,
1281                                  0x3F, 0, 255);
1282     if (task == NULL) {
1283         error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1284                      iscsi_get_error(iscsilun->iscsi));
1285         goto out;
1286     }
1287 
1288     if (task->status != SCSI_STATUS_GOOD) {
1289         error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1290         goto out;
1291     }
1292     ms = scsi_datain_unmarshall(task);
1293     if (!ms) {
1294         error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1295                      iscsi_get_error(iscsilun->iscsi));
1296         goto out;
1297     }
1298     iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1299     iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1300 
1301 out:
1302     if (task) {
1303         scsi_free_scsi_task(task);
1304     }
1305 }
1306 
1307 /*
1308  * We support iscsi url's on the form
1309  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1310  */
1311 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1312                       Error **errp)
1313 {
1314     IscsiLun *iscsilun = bs->opaque;
1315     struct iscsi_context *iscsi = NULL;
1316     struct iscsi_url *iscsi_url = NULL;
1317     struct scsi_task *task = NULL;
1318     struct scsi_inquiry_standard *inq = NULL;
1319     struct scsi_inquiry_supported_pages *inq_vpd;
1320     char *initiator_name = NULL;
1321     QemuOpts *opts;
1322     Error *local_err = NULL;
1323     const char *filename;
1324     int i, ret = 0;
1325 
1326     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1327     qemu_opts_absorb_qdict(opts, options, &local_err);
1328     if (local_err) {
1329         error_propagate(errp, local_err);
1330         ret = -EINVAL;
1331         goto out;
1332     }
1333 
1334     filename = qemu_opt_get(opts, "filename");
1335 
1336     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1337     if (iscsi_url == NULL) {
1338         error_setg(errp, "Failed to parse URL : %s", filename);
1339         ret = -EINVAL;
1340         goto out;
1341     }
1342 
1343     memset(iscsilun, 0, sizeof(IscsiLun));
1344 
1345     initiator_name = parse_initiator_name(iscsi_url->target);
1346 
1347     iscsi = iscsi_create_context(initiator_name);
1348     if (iscsi == NULL) {
1349         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1350         ret = -ENOMEM;
1351         goto out;
1352     }
1353 
1354     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1355         error_setg(errp, "iSCSI: Failed to set target name.");
1356         ret = -EINVAL;
1357         goto out;
1358     }
1359 
1360     if (iscsi_url->user[0] != '\0') {
1361         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1362                                               iscsi_url->passwd);
1363         if (ret != 0) {
1364             error_setg(errp, "Failed to set initiator username and password");
1365             ret = -EINVAL;
1366             goto out;
1367         }
1368     }
1369 
1370     /* check if we got CHAP username/password via the options */
1371     parse_chap(iscsi, iscsi_url->target, &local_err);
1372     if (local_err != NULL) {
1373         error_propagate(errp, local_err);
1374         ret = -EINVAL;
1375         goto out;
1376     }
1377 
1378     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1379         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1380         ret = -EINVAL;
1381         goto out;
1382     }
1383 
1384     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1385 
1386     /* check if we got HEADER_DIGEST via the options */
1387     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1388     if (local_err != NULL) {
1389         error_propagate(errp, local_err);
1390         ret = -EINVAL;
1391         goto out;
1392     }
1393 
1394     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1395         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1396             iscsi_get_error(iscsi));
1397         ret = -EINVAL;
1398         goto out;
1399     }
1400 
1401     iscsilun->iscsi = iscsi;
1402     iscsilun->aio_context = bdrv_get_aio_context(bs);
1403     iscsilun->lun   = iscsi_url->lun;
1404     iscsilun->has_write_same = true;
1405 
1406     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1407                             (void **) &inq, errp);
1408     if (task == NULL) {
1409         ret = -EINVAL;
1410         goto out;
1411     }
1412     iscsilun->type = inq->periperal_device_type;
1413     scsi_free_scsi_task(task);
1414     task = NULL;
1415 
1416     iscsi_modesense_sync(iscsilun);
1417 
1418     /* Check the write protect flag of the LUN if we want to write */
1419     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1420         iscsilun->write_protected) {
1421         error_setg(errp, "Cannot open a write protected LUN as read-write");
1422         ret = -EACCES;
1423         goto out;
1424     }
1425 
1426     iscsi_readcapacity_sync(iscsilun, &local_err);
1427     if (local_err != NULL) {
1428         error_propagate(errp, local_err);
1429         ret = -EINVAL;
1430         goto out;
1431     }
1432     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1433     bs->request_alignment = iscsilun->block_size;
1434 
1435     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1436      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1437      * will try to read from the device to guess the image format.
1438      */
1439     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1440         bs->sg = 1;
1441     }
1442 
1443     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1444                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1445                             (void **) &inq_vpd, errp);
1446     if (task == NULL) {
1447         ret = -EINVAL;
1448         goto out;
1449     }
1450     for (i = 0; i < inq_vpd->num_pages; i++) {
1451         struct scsi_task *inq_task;
1452         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1453         struct scsi_inquiry_block_limits *inq_bl;
1454         switch (inq_vpd->pages[i]) {
1455         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1456             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1457                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1458                                         (void **) &inq_lbp, errp);
1459             if (inq_task == NULL) {
1460                 ret = -EINVAL;
1461                 goto out;
1462             }
1463             memcpy(&iscsilun->lbp, inq_lbp,
1464                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1465             scsi_free_scsi_task(inq_task);
1466             break;
1467         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1468             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1469                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1470                                     (void **) &inq_bl, errp);
1471             if (inq_task == NULL) {
1472                 ret = -EINVAL;
1473                 goto out;
1474             }
1475             memcpy(&iscsilun->bl, inq_bl,
1476                    sizeof(struct scsi_inquiry_block_limits));
1477             scsi_free_scsi_task(inq_task);
1478             break;
1479         default:
1480             break;
1481         }
1482     }
1483     scsi_free_scsi_task(task);
1484     task = NULL;
1485 
1486     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1487 
1488     /* Guess the internal cluster (page) size of the iscsi target by the means
1489      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1490      * reasonable size */
1491     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1492         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1493         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1494                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1495         if (iscsilun->lbprz) {
1496             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1497             if (iscsilun->allocationmap == NULL) {
1498                 ret = -ENOMEM;
1499             }
1500         }
1501     }
1502 
1503 out:
1504     qemu_opts_del(opts);
1505     g_free(initiator_name);
1506     if (iscsi_url != NULL) {
1507         iscsi_destroy_url(iscsi_url);
1508     }
1509     if (task != NULL) {
1510         scsi_free_scsi_task(task);
1511     }
1512 
1513     if (ret) {
1514         if (iscsi != NULL) {
1515             if (iscsi_is_logged_in(iscsi)) {
1516                 iscsi_logout_sync(iscsi);
1517             }
1518             iscsi_destroy_context(iscsi);
1519         }
1520         memset(iscsilun, 0, sizeof(IscsiLun));
1521     }
1522     return ret;
1523 }
1524 
1525 static void iscsi_close(BlockDriverState *bs)
1526 {
1527     IscsiLun *iscsilun = bs->opaque;
1528     struct iscsi_context *iscsi = iscsilun->iscsi;
1529 
1530     iscsi_detach_aio_context(bs);
1531     if (iscsi_is_logged_in(iscsi)) {
1532         iscsi_logout_sync(iscsi);
1533     }
1534     iscsi_destroy_context(iscsi);
1535     g_free(iscsilun->zeroblock);
1536     g_free(iscsilun->allocationmap);
1537     memset(iscsilun, 0, sizeof(IscsiLun));
1538 }
1539 
1540 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
1541 {
1542     return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
1543 }
1544 
1545 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1546 {
1547     /* We don't actually refresh here, but just return data queried in
1548      * iscsi_open(): iscsi targets don't change their limits. */
1549 
1550     IscsiLun *iscsilun = bs->opaque;
1551     uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1552 
1553     if (iscsilun->bl.max_xfer_len) {
1554         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1555     }
1556 
1557     bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
1558 
1559     if (iscsilun->lbp.lbpu) {
1560         if (iscsilun->bl.max_unmap < 0xffffffff) {
1561             bs->bl.max_discard =
1562                 sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
1563         }
1564         bs->bl.discard_alignment =
1565             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1566     }
1567 
1568     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1569         bs->bl.max_write_zeroes =
1570             sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
1571     }
1572     if (iscsilun->lbp.lbpws) {
1573         bs->bl.write_zeroes_alignment =
1574             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1575     }
1576     bs->bl.opt_transfer_length =
1577         sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
1578 }
1579 
1580 /* Note that this will not re-establish a connection with an iSCSI target - it
1581  * is effectively a NOP.  */
1582 static int iscsi_reopen_prepare(BDRVReopenState *state,
1583                                 BlockReopenQueue *queue, Error **errp)
1584 {
1585     IscsiLun *iscsilun = state->bs->opaque;
1586 
1587     if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1588         error_setg(errp, "Cannot open a write protected LUN as read-write");
1589         return -EACCES;
1590     }
1591     return 0;
1592 }
1593 
1594 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1595 {
1596     IscsiLun *iscsilun = bs->opaque;
1597     Error *local_err = NULL;
1598 
1599     if (iscsilun->type != TYPE_DISK) {
1600         return -ENOTSUP;
1601     }
1602 
1603     iscsi_readcapacity_sync(iscsilun, &local_err);
1604     if (local_err != NULL) {
1605         error_free(local_err);
1606         return -EIO;
1607     }
1608 
1609     if (offset > iscsi_getlength(bs)) {
1610         return -EINVAL;
1611     }
1612 
1613     if (iscsilun->allocationmap != NULL) {
1614         g_free(iscsilun->allocationmap);
1615         iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1616     }
1617 
1618     return 0;
1619 }
1620 
1621 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1622 {
1623     int ret = 0;
1624     int64_t total_size = 0;
1625     BlockDriverState *bs;
1626     IscsiLun *iscsilun = NULL;
1627     QDict *bs_options;
1628 
1629     bs = bdrv_new();
1630 
1631     /* Read out options */
1632     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1633                               BDRV_SECTOR_SIZE);
1634     bs->opaque = g_new0(struct IscsiLun, 1);
1635     iscsilun = bs->opaque;
1636 
1637     bs_options = qdict_new();
1638     qdict_put(bs_options, "filename", qstring_from_str(filename));
1639     ret = iscsi_open(bs, bs_options, 0, NULL);
1640     QDECREF(bs_options);
1641 
1642     if (ret != 0) {
1643         goto out;
1644     }
1645     iscsi_detach_aio_context(bs);
1646     if (iscsilun->type != TYPE_DISK) {
1647         ret = -ENODEV;
1648         goto out;
1649     }
1650     if (bs->total_sectors < total_size) {
1651         ret = -ENOSPC;
1652         goto out;
1653     }
1654 
1655     ret = 0;
1656 out:
1657     if (iscsilun->iscsi != NULL) {
1658         iscsi_destroy_context(iscsilun->iscsi);
1659     }
1660     g_free(bs->opaque);
1661     bs->opaque = NULL;
1662     bdrv_unref(bs);
1663     return ret;
1664 }
1665 
1666 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1667 {
1668     IscsiLun *iscsilun = bs->opaque;
1669     bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
1670     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1671     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1672     return 0;
1673 }
1674 
1675 static QemuOptsList iscsi_create_opts = {
1676     .name = "iscsi-create-opts",
1677     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1678     .desc = {
1679         {
1680             .name = BLOCK_OPT_SIZE,
1681             .type = QEMU_OPT_SIZE,
1682             .help = "Virtual disk size"
1683         },
1684         { /* end of list */ }
1685     }
1686 };
1687 
1688 static BlockDriver bdrv_iscsi = {
1689     .format_name     = "iscsi",
1690     .protocol_name   = "iscsi",
1691 
1692     .instance_size   = sizeof(IscsiLun),
1693     .bdrv_needs_filename = true,
1694     .bdrv_file_open  = iscsi_open,
1695     .bdrv_close      = iscsi_close,
1696     .bdrv_create     = iscsi_create,
1697     .create_opts     = &iscsi_create_opts,
1698     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1699 
1700     .bdrv_getlength  = iscsi_getlength,
1701     .bdrv_get_info   = iscsi_get_info,
1702     .bdrv_truncate   = iscsi_truncate,
1703     .bdrv_refresh_limits = iscsi_refresh_limits,
1704 
1705     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1706     .bdrv_co_discard      = iscsi_co_discard,
1707     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1708     .bdrv_co_readv         = iscsi_co_readv,
1709     .bdrv_co_writev        = iscsi_co_writev,
1710     .bdrv_co_flush_to_disk = iscsi_co_flush,
1711 
1712 #ifdef __linux__
1713     .bdrv_ioctl       = iscsi_ioctl,
1714     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1715 #endif
1716 
1717     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1718     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1719 };
1720 
1721 static QemuOptsList qemu_iscsi_opts = {
1722     .name = "iscsi",
1723     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1724     .desc = {
1725         {
1726             .name = "user",
1727             .type = QEMU_OPT_STRING,
1728             .help = "username for CHAP authentication to target",
1729         },{
1730             .name = "password",
1731             .type = QEMU_OPT_STRING,
1732             .help = "password for CHAP authentication to target",
1733         },{
1734             .name = "header-digest",
1735             .type = QEMU_OPT_STRING,
1736             .help = "HeaderDigest setting. "
1737                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1738         },{
1739             .name = "initiator-name",
1740             .type = QEMU_OPT_STRING,
1741             .help = "Initiator iqn name to use when connecting",
1742         },
1743         { /* end of list */ }
1744     },
1745 };
1746 
1747 static void iscsi_block_init(void)
1748 {
1749     bdrv_register(&bdrv_iscsi);
1750     qemu_add_opts(&qemu_iscsi_opts);
1751 }
1752 
1753 block_init(iscsi_block_init);
1754