xref: /openbmc/qemu/block/iscsi.c (revision 77a8257e)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config-host.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 
42 #include <iscsi/iscsi.h>
43 #include <iscsi/scsi-lowlevel.h>
44 
45 #ifdef __linux__
46 #include <scsi/sg.h>
47 #include <block/scsi.h>
48 #endif
49 
50 typedef struct IscsiLun {
51     struct iscsi_context *iscsi;
52     AioContext *aio_context;
53     int lun;
54     enum scsi_inquiry_peripheral_device_type type;
55     int block_size;
56     uint64_t num_blocks;
57     int events;
58     QEMUTimer *nop_timer;
59     uint8_t lbpme;
60     uint8_t lbprz;
61     uint8_t has_write_same;
62     struct scsi_inquiry_logical_block_provisioning lbp;
63     struct scsi_inquiry_block_limits bl;
64     unsigned char *zeroblock;
65     unsigned long *allocationmap;
66     int cluster_sectors;
67     bool use_16_for_rw;
68     bool write_protected;
69 } IscsiLun;
70 
71 typedef struct IscsiTask {
72     int status;
73     int complete;
74     int retries;
75     int do_retry;
76     struct scsi_task *task;
77     Coroutine *co;
78     QEMUBH *bh;
79     IscsiLun *iscsilun;
80     QEMUTimer retry_timer;
81 } IscsiTask;
82 
83 typedef struct IscsiAIOCB {
84     BlockAIOCB common;
85     QEMUIOVector *qiov;
86     QEMUBH *bh;
87     IscsiLun *iscsilun;
88     struct scsi_task *task;
89     uint8_t *buf;
90     int status;
91     int64_t sector_num;
92     int nb_sectors;
93 #ifdef __linux__
94     sg_io_hdr_t *ioh;
95 #endif
96 } IscsiAIOCB;
97 
98 #define NOP_INTERVAL 5000
99 #define MAX_NOP_FAILURES 3
100 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
101 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
102 
103 /* this threshold is a trade-off knob to choose between
104  * the potential additional overhead of an extra GET_LBA_STATUS request
105  * vs. unnecessarily reading a lot of zero sectors over the wire.
106  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
107  * sectors we check the allocation status of the area covered by the
108  * request first if the allocationmap indicates that the area might be
109  * unallocated. */
110 #define ISCSI_CHECKALLOC_THRES 64
111 
112 static void
113 iscsi_bh_cb(void *p)
114 {
115     IscsiAIOCB *acb = p;
116 
117     qemu_bh_delete(acb->bh);
118 
119     g_free(acb->buf);
120     acb->buf = NULL;
121 
122     acb->common.cb(acb->common.opaque, acb->status);
123 
124     if (acb->task != NULL) {
125         scsi_free_scsi_task(acb->task);
126         acb->task = NULL;
127     }
128 
129     qemu_aio_unref(acb);
130 }
131 
132 static void
133 iscsi_schedule_bh(IscsiAIOCB *acb)
134 {
135     if (acb->bh) {
136         return;
137     }
138     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
139     qemu_bh_schedule(acb->bh);
140 }
141 
142 static void iscsi_co_generic_bh_cb(void *opaque)
143 {
144     struct IscsiTask *iTask = opaque;
145     iTask->complete = 1;
146     qemu_bh_delete(iTask->bh);
147     qemu_coroutine_enter(iTask->co, NULL);
148 }
149 
150 static void iscsi_retry_timer_expired(void *opaque)
151 {
152     struct IscsiTask *iTask = opaque;
153     iTask->complete = 1;
154     if (iTask->co) {
155         qemu_coroutine_enter(iTask->co, NULL);
156     }
157 }
158 
159 static inline unsigned exp_random(double mean)
160 {
161     return -mean * log((double)rand() / RAND_MAX);
162 }
163 
164 static void
165 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
166                         void *command_data, void *opaque)
167 {
168     struct IscsiTask *iTask = opaque;
169     struct scsi_task *task = command_data;
170 
171     iTask->status = status;
172     iTask->do_retry = 0;
173     iTask->task = task;
174 
175     if (status != SCSI_STATUS_GOOD) {
176         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
177             if (status == SCSI_STATUS_CHECK_CONDITION
178                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
179                 error_report("iSCSI CheckCondition: %s",
180                              iscsi_get_error(iscsi));
181                 iTask->do_retry = 1;
182                 goto out;
183             }
184             if (status == SCSI_STATUS_BUSY) {
185                 unsigned retry_time =
186                     exp_random(iscsi_retry_times[iTask->retries - 1]);
187                 error_report("iSCSI Busy (retry #%u in %u ms): %s",
188                              iTask->retries, retry_time,
189                              iscsi_get_error(iscsi));
190                 aio_timer_init(iTask->iscsilun->aio_context,
191                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
192                                SCALE_MS, iscsi_retry_timer_expired, iTask);
193                 timer_mod(&iTask->retry_timer,
194                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
195                 iTask->do_retry = 1;
196                 return;
197             }
198         }
199         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
200     }
201 
202 out:
203     if (iTask->co) {
204         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
205                                iscsi_co_generic_bh_cb, iTask);
206         qemu_bh_schedule(iTask->bh);
207     } else {
208         iTask->complete = 1;
209     }
210 }
211 
212 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
213 {
214     *iTask = (struct IscsiTask) {
215         .co         = qemu_coroutine_self(),
216         .iscsilun   = iscsilun,
217     };
218 }
219 
220 static void
221 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
222                     void *private_data)
223 {
224     IscsiAIOCB *acb = private_data;
225 
226     acb->status = -ECANCELED;
227     iscsi_schedule_bh(acb);
228 }
229 
230 static void
231 iscsi_aio_cancel(BlockAIOCB *blockacb)
232 {
233     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
234     IscsiLun *iscsilun = acb->iscsilun;
235 
236     if (acb->status != -EINPROGRESS) {
237         return;
238     }
239 
240     /* send a task mgmt call to the target to cancel the task on the target */
241     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
242                                      iscsi_abort_task_cb, acb);
243 
244 }
245 
246 static const AIOCBInfo iscsi_aiocb_info = {
247     .aiocb_size         = sizeof(IscsiAIOCB),
248     .cancel_async       = iscsi_aio_cancel,
249 };
250 
251 
252 static void iscsi_process_read(void *arg);
253 static void iscsi_process_write(void *arg);
254 
255 static void
256 iscsi_set_events(IscsiLun *iscsilun)
257 {
258     struct iscsi_context *iscsi = iscsilun->iscsi;
259     int ev;
260 
261     /* We always register a read handler.  */
262     ev = POLLIN;
263     ev |= iscsi_which_events(iscsi);
264     if (ev != iscsilun->events) {
265         aio_set_fd_handler(iscsilun->aio_context,
266                            iscsi_get_fd(iscsi),
267                            iscsi_process_read,
268                            (ev & POLLOUT) ? iscsi_process_write : NULL,
269                            iscsilun);
270 
271     }
272 
273     iscsilun->events = ev;
274 }
275 
276 static void
277 iscsi_process_read(void *arg)
278 {
279     IscsiLun *iscsilun = arg;
280     struct iscsi_context *iscsi = iscsilun->iscsi;
281 
282     iscsi_service(iscsi, POLLIN);
283     iscsi_set_events(iscsilun);
284 }
285 
286 static void
287 iscsi_process_write(void *arg)
288 {
289     IscsiLun *iscsilun = arg;
290     struct iscsi_context *iscsi = iscsilun->iscsi;
291 
292     iscsi_service(iscsi, POLLOUT);
293     iscsi_set_events(iscsilun);
294 }
295 
296 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
297 {
298     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
299 }
300 
301 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
302 {
303     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
304 }
305 
306 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
307                                       IscsiLun *iscsilun)
308 {
309     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
310         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
311             error_report("iSCSI misaligned request: "
312                          "iscsilun->block_size %u, sector_num %" PRIi64
313                          ", nb_sectors %d",
314                          iscsilun->block_size, sector_num, nb_sectors);
315             return 0;
316     }
317     return 1;
318 }
319 
320 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
321 {
322     return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
323                                                        iscsilun),
324                                        iscsilun->cluster_sectors));
325 }
326 
327 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
328                                     int nb_sectors)
329 {
330     if (iscsilun->allocationmap == NULL) {
331         return;
332     }
333     bitmap_set(iscsilun->allocationmap,
334                sector_num / iscsilun->cluster_sectors,
335                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
336 }
337 
338 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
339                                       int nb_sectors)
340 {
341     int64_t cluster_num, nb_clusters;
342     if (iscsilun->allocationmap == NULL) {
343         return;
344     }
345     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
346     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
347                   - cluster_num;
348     if (nb_clusters > 0) {
349         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
350     }
351 }
352 
353 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
354                                         int64_t sector_num, int nb_sectors,
355                                         QEMUIOVector *iov)
356 {
357     IscsiLun *iscsilun = bs->opaque;
358     struct IscsiTask iTask;
359     uint64_t lba;
360     uint32_t num_sectors;
361 
362     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
363         return -EINVAL;
364     }
365 
366     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
367         error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
368                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
369         return -EINVAL;
370     }
371 
372     lba = sector_qemu2lun(sector_num, iscsilun);
373     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
374     iscsi_co_init_iscsitask(iscsilun, &iTask);
375 retry:
376     if (iscsilun->use_16_for_rw) {
377         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
378                                         NULL, num_sectors * iscsilun->block_size,
379                                         iscsilun->block_size, 0, 0, 0, 0, 0,
380                                         iscsi_co_generic_cb, &iTask);
381     } else {
382         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
383                                         NULL, num_sectors * iscsilun->block_size,
384                                         iscsilun->block_size, 0, 0, 0, 0, 0,
385                                         iscsi_co_generic_cb, &iTask);
386     }
387     if (iTask.task == NULL) {
388         return -ENOMEM;
389     }
390     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
391                           iov->niov);
392     while (!iTask.complete) {
393         iscsi_set_events(iscsilun);
394         qemu_coroutine_yield();
395     }
396 
397     if (iTask.task != NULL) {
398         scsi_free_scsi_task(iTask.task);
399         iTask.task = NULL;
400     }
401 
402     if (iTask.do_retry) {
403         iTask.complete = 0;
404         goto retry;
405     }
406 
407     if (iTask.status != SCSI_STATUS_GOOD) {
408         return -EIO;
409     }
410 
411     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
412 
413     return 0;
414 }
415 
416 
417 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
418                                              int64_t sector_num, int nb_sectors)
419 {
420     unsigned long size;
421     if (iscsilun->allocationmap == NULL) {
422         return true;
423     }
424     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
425     return !(find_next_bit(iscsilun->allocationmap, size,
426                            sector_num / iscsilun->cluster_sectors) == size);
427 }
428 
429 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
430                                                   int64_t sector_num,
431                                                   int nb_sectors, int *pnum)
432 {
433     IscsiLun *iscsilun = bs->opaque;
434     struct scsi_get_lba_status *lbas = NULL;
435     struct scsi_lba_status_descriptor *lbasd = NULL;
436     struct IscsiTask iTask;
437     int64_t ret;
438 
439     iscsi_co_init_iscsitask(iscsilun, &iTask);
440 
441     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
442         ret = -EINVAL;
443         goto out;
444     }
445 
446     /* default to all sectors allocated */
447     ret = BDRV_BLOCK_DATA;
448     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
449     *pnum = nb_sectors;
450 
451     /* LUN does not support logical block provisioning */
452     if (iscsilun->lbpme == 0) {
453         goto out;
454     }
455 
456 retry:
457     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
458                                   sector_qemu2lun(sector_num, iscsilun),
459                                   8 + 16, iscsi_co_generic_cb,
460                                   &iTask) == NULL) {
461         ret = -ENOMEM;
462         goto out;
463     }
464 
465     while (!iTask.complete) {
466         iscsi_set_events(iscsilun);
467         qemu_coroutine_yield();
468     }
469 
470     if (iTask.do_retry) {
471         if (iTask.task != NULL) {
472             scsi_free_scsi_task(iTask.task);
473             iTask.task = NULL;
474         }
475         iTask.complete = 0;
476         goto retry;
477     }
478 
479     if (iTask.status != SCSI_STATUS_GOOD) {
480         /* in case the get_lba_status_callout fails (i.e.
481          * because the device is busy or the cmd is not
482          * supported) we pretend all blocks are allocated
483          * for backwards compatibility */
484         goto out;
485     }
486 
487     lbas = scsi_datain_unmarshall(iTask.task);
488     if (lbas == NULL) {
489         ret = -EIO;
490         goto out;
491     }
492 
493     lbasd = &lbas->descriptors[0];
494 
495     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
496         ret = -EIO;
497         goto out;
498     }
499 
500     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
501 
502     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
503         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
504         ret &= ~BDRV_BLOCK_DATA;
505         if (iscsilun->lbprz) {
506             ret |= BDRV_BLOCK_ZERO;
507         }
508     }
509 
510     if (ret & BDRV_BLOCK_ZERO) {
511         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
512     } else {
513         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
514     }
515 
516     if (*pnum > nb_sectors) {
517         *pnum = nb_sectors;
518     }
519 out:
520     if (iTask.task != NULL) {
521         scsi_free_scsi_task(iTask.task);
522     }
523     return ret;
524 }
525 
526 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
527                                        int64_t sector_num, int nb_sectors,
528                                        QEMUIOVector *iov)
529 {
530     IscsiLun *iscsilun = bs->opaque;
531     struct IscsiTask iTask;
532     uint64_t lba;
533     uint32_t num_sectors;
534 
535     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
536         return -EINVAL;
537     }
538 
539     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
540         error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
541                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
542         return -EINVAL;
543     }
544 
545     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
546         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
547         int64_t ret;
548         int pnum;
549         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
550         if (ret < 0) {
551             return ret;
552         }
553         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
554             qemu_iovec_memset(iov, 0, 0x00, iov->size);
555             return 0;
556         }
557     }
558 
559     lba = sector_qemu2lun(sector_num, iscsilun);
560     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
561 
562     iscsi_co_init_iscsitask(iscsilun, &iTask);
563 retry:
564     if (iscsilun->use_16_for_rw) {
565         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
566                                        num_sectors * iscsilun->block_size,
567                                        iscsilun->block_size, 0, 0, 0, 0, 0,
568                                        iscsi_co_generic_cb, &iTask);
569     } else {
570         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
571                                        num_sectors * iscsilun->block_size,
572                                        iscsilun->block_size,
573                                        0, 0, 0, 0, 0,
574                                        iscsi_co_generic_cb, &iTask);
575     }
576     if (iTask.task == NULL) {
577         return -ENOMEM;
578     }
579     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
580 
581     while (!iTask.complete) {
582         iscsi_set_events(iscsilun);
583         qemu_coroutine_yield();
584     }
585 
586     if (iTask.task != NULL) {
587         scsi_free_scsi_task(iTask.task);
588         iTask.task = NULL;
589     }
590 
591     if (iTask.do_retry) {
592         iTask.complete = 0;
593         goto retry;
594     }
595 
596     if (iTask.status != SCSI_STATUS_GOOD) {
597         return -EIO;
598     }
599 
600     return 0;
601 }
602 
603 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
604 {
605     IscsiLun *iscsilun = bs->opaque;
606     struct IscsiTask iTask;
607 
608     if (bs->sg) {
609         return 0;
610     }
611 
612     iscsi_co_init_iscsitask(iscsilun, &iTask);
613 
614 retry:
615     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
616                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
617         return -ENOMEM;
618     }
619 
620     while (!iTask.complete) {
621         iscsi_set_events(iscsilun);
622         qemu_coroutine_yield();
623     }
624 
625     if (iTask.task != NULL) {
626         scsi_free_scsi_task(iTask.task);
627         iTask.task = NULL;
628     }
629 
630     if (iTask.do_retry) {
631         iTask.complete = 0;
632         goto retry;
633     }
634 
635     if (iTask.status != SCSI_STATUS_GOOD) {
636         return -EIO;
637     }
638 
639     return 0;
640 }
641 
642 #ifdef __linux__
643 static void
644 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
645                      void *command_data, void *opaque)
646 {
647     IscsiAIOCB *acb = opaque;
648 
649     g_free(acb->buf);
650     acb->buf = NULL;
651 
652     acb->status = 0;
653     if (status < 0) {
654         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
655                      iscsi_get_error(iscsi));
656         acb->status = -EIO;
657     }
658 
659     acb->ioh->driver_status = 0;
660     acb->ioh->host_status   = 0;
661     acb->ioh->resid         = 0;
662 
663 #define SG_ERR_DRIVER_SENSE    0x08
664 
665     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
666         int ss;
667 
668         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
669 
670         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
671         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
672              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
673         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
674     }
675 
676     iscsi_schedule_bh(acb);
677 }
678 
679 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
680         unsigned long int req, void *buf,
681         BlockCompletionFunc *cb, void *opaque)
682 {
683     IscsiLun *iscsilun = bs->opaque;
684     struct iscsi_context *iscsi = iscsilun->iscsi;
685     struct iscsi_data data;
686     IscsiAIOCB *acb;
687 
688     assert(req == SG_IO);
689 
690     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
691 
692     acb->iscsilun = iscsilun;
693     acb->bh          = NULL;
694     acb->status      = -EINPROGRESS;
695     acb->buf         = NULL;
696     acb->ioh         = buf;
697 
698     acb->task = malloc(sizeof(struct scsi_task));
699     if (acb->task == NULL) {
700         error_report("iSCSI: Failed to allocate task for scsi command. %s",
701                      iscsi_get_error(iscsi));
702         qemu_aio_unref(acb);
703         return NULL;
704     }
705     memset(acb->task, 0, sizeof(struct scsi_task));
706 
707     switch (acb->ioh->dxfer_direction) {
708     case SG_DXFER_TO_DEV:
709         acb->task->xfer_dir = SCSI_XFER_WRITE;
710         break;
711     case SG_DXFER_FROM_DEV:
712         acb->task->xfer_dir = SCSI_XFER_READ;
713         break;
714     default:
715         acb->task->xfer_dir = SCSI_XFER_NONE;
716         break;
717     }
718 
719     acb->task->cdb_size = acb->ioh->cmd_len;
720     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
721     acb->task->expxferlen = acb->ioh->dxfer_len;
722 
723     data.size = 0;
724     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
725         if (acb->ioh->iovec_count == 0) {
726             data.data = acb->ioh->dxferp;
727             data.size = acb->ioh->dxfer_len;
728         } else {
729             scsi_task_set_iov_out(acb->task,
730                                  (struct scsi_iovec *) acb->ioh->dxferp,
731                                  acb->ioh->iovec_count);
732         }
733     }
734 
735     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
736                                  iscsi_aio_ioctl_cb,
737                                  (data.size > 0) ? &data : NULL,
738                                  acb) != 0) {
739         scsi_free_scsi_task(acb->task);
740         qemu_aio_unref(acb);
741         return NULL;
742     }
743 
744     /* tell libiscsi to read straight into the buffer we got from ioctl */
745     if (acb->task->xfer_dir == SCSI_XFER_READ) {
746         if (acb->ioh->iovec_count == 0) {
747             scsi_task_add_data_in_buffer(acb->task,
748                                          acb->ioh->dxfer_len,
749                                          acb->ioh->dxferp);
750         } else {
751             scsi_task_set_iov_in(acb->task,
752                                  (struct scsi_iovec *) acb->ioh->dxferp,
753                                  acb->ioh->iovec_count);
754         }
755     }
756 
757     iscsi_set_events(iscsilun);
758 
759     return &acb->common;
760 }
761 
762 static void ioctl_cb(void *opaque, int status)
763 {
764     int *p_status = opaque;
765     *p_status = status;
766 }
767 
768 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
769 {
770     IscsiLun *iscsilun = bs->opaque;
771     int status;
772 
773     switch (req) {
774     case SG_GET_VERSION_NUM:
775         *(int *)buf = 30000;
776         break;
777     case SG_GET_SCSI_ID:
778         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
779         break;
780     case SG_IO:
781         status = -EINPROGRESS;
782         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
783 
784         while (status == -EINPROGRESS) {
785             aio_poll(iscsilun->aio_context, true);
786         }
787 
788         return 0;
789     default:
790         return -1;
791     }
792     return 0;
793 }
794 #endif
795 
796 static int64_t
797 iscsi_getlength(BlockDriverState *bs)
798 {
799     IscsiLun *iscsilun = bs->opaque;
800     int64_t len;
801 
802     len  = iscsilun->num_blocks;
803     len *= iscsilun->block_size;
804 
805     return len;
806 }
807 
808 static int
809 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
810                                    int nb_sectors)
811 {
812     IscsiLun *iscsilun = bs->opaque;
813     struct IscsiTask iTask;
814     struct unmap_list list;
815 
816     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
817         return -EINVAL;
818     }
819 
820     if (!iscsilun->lbp.lbpu) {
821         /* UNMAP is not supported by the target */
822         return 0;
823     }
824 
825     list.lba = sector_qemu2lun(sector_num, iscsilun);
826     list.num = sector_qemu2lun(nb_sectors, iscsilun);
827 
828     iscsi_co_init_iscsitask(iscsilun, &iTask);
829 retry:
830     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
831                      iscsi_co_generic_cb, &iTask) == NULL) {
832         return -ENOMEM;
833     }
834 
835     while (!iTask.complete) {
836         iscsi_set_events(iscsilun);
837         qemu_coroutine_yield();
838     }
839 
840     if (iTask.task != NULL) {
841         scsi_free_scsi_task(iTask.task);
842         iTask.task = NULL;
843     }
844 
845     if (iTask.do_retry) {
846         iTask.complete = 0;
847         goto retry;
848     }
849 
850     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
851         /* the target might fail with a check condition if it
852            is not happy with the alignment of the UNMAP request
853            we silently fail in this case */
854         return 0;
855     }
856 
857     if (iTask.status != SCSI_STATUS_GOOD) {
858         return -EIO;
859     }
860 
861     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
862 
863     return 0;
864 }
865 
866 static int
867 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
868                                    int nb_sectors, BdrvRequestFlags flags)
869 {
870     IscsiLun *iscsilun = bs->opaque;
871     struct IscsiTask iTask;
872     uint64_t lba;
873     uint32_t nb_blocks;
874     bool use_16_for_ws = iscsilun->use_16_for_rw;
875 
876     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
877         return -EINVAL;
878     }
879 
880     if (flags & BDRV_REQ_MAY_UNMAP) {
881         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
882             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
883             use_16_for_ws = true;
884         }
885         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
886             /* WRITESAME16 with UNMAP is not supported by the target,
887              * fall back and try WRITESAME10/16 without UNMAP */
888             flags &= ~BDRV_REQ_MAY_UNMAP;
889             use_16_for_ws = iscsilun->use_16_for_rw;
890         }
891     }
892 
893     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
894         /* WRITESAME without UNMAP is not supported by the target */
895         return -ENOTSUP;
896     }
897 
898     lba = sector_qemu2lun(sector_num, iscsilun);
899     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
900 
901     if (iscsilun->zeroblock == NULL) {
902         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
903         if (iscsilun->zeroblock == NULL) {
904             return -ENOMEM;
905         }
906     }
907 
908     iscsi_co_init_iscsitask(iscsilun, &iTask);
909 retry:
910     if (use_16_for_ws) {
911         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
912                                             iscsilun->zeroblock, iscsilun->block_size,
913                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
914                                             0, 0, iscsi_co_generic_cb, &iTask);
915     } else {
916         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
917                                             iscsilun->zeroblock, iscsilun->block_size,
918                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
919                                             0, 0, iscsi_co_generic_cb, &iTask);
920     }
921     if (iTask.task == NULL) {
922         return -ENOMEM;
923     }
924 
925     while (!iTask.complete) {
926         iscsi_set_events(iscsilun);
927         qemu_coroutine_yield();
928     }
929 
930     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
931         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
932         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
933          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
934         /* WRITE SAME is not supported by the target */
935         iscsilun->has_write_same = false;
936         scsi_free_scsi_task(iTask.task);
937         return -ENOTSUP;
938     }
939 
940     if (iTask.task != NULL) {
941         scsi_free_scsi_task(iTask.task);
942         iTask.task = NULL;
943     }
944 
945     if (iTask.do_retry) {
946         iTask.complete = 0;
947         goto retry;
948     }
949 
950     if (iTask.status != SCSI_STATUS_GOOD) {
951         return -EIO;
952     }
953 
954     if (flags & BDRV_REQ_MAY_UNMAP) {
955         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
956     } else {
957         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
958     }
959 
960     return 0;
961 }
962 
963 static void parse_chap(struct iscsi_context *iscsi, const char *target,
964                        Error **errp)
965 {
966     QemuOptsList *list;
967     QemuOpts *opts;
968     const char *user = NULL;
969     const char *password = NULL;
970 
971     list = qemu_find_opts("iscsi");
972     if (!list) {
973         return;
974     }
975 
976     opts = qemu_opts_find(list, target);
977     if (opts == NULL) {
978         opts = QTAILQ_FIRST(&list->head);
979         if (!opts) {
980             return;
981         }
982     }
983 
984     user = qemu_opt_get(opts, "user");
985     if (!user) {
986         return;
987     }
988 
989     password = qemu_opt_get(opts, "password");
990     if (!password) {
991         error_setg(errp, "CHAP username specified but no password was given");
992         return;
993     }
994 
995     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
996         error_setg(errp, "Failed to set initiator username and password");
997     }
998 }
999 
1000 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1001                                 Error **errp)
1002 {
1003     QemuOptsList *list;
1004     QemuOpts *opts;
1005     const char *digest = NULL;
1006 
1007     list = qemu_find_opts("iscsi");
1008     if (!list) {
1009         return;
1010     }
1011 
1012     opts = qemu_opts_find(list, target);
1013     if (opts == NULL) {
1014         opts = QTAILQ_FIRST(&list->head);
1015         if (!opts) {
1016             return;
1017         }
1018     }
1019 
1020     digest = qemu_opt_get(opts, "header-digest");
1021     if (!digest) {
1022         return;
1023     }
1024 
1025     if (!strcmp(digest, "CRC32C")) {
1026         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1027     } else if (!strcmp(digest, "NONE")) {
1028         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1029     } else if (!strcmp(digest, "CRC32C-NONE")) {
1030         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1031     } else if (!strcmp(digest, "NONE-CRC32C")) {
1032         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1033     } else {
1034         error_setg(errp, "Invalid header-digest setting : %s", digest);
1035     }
1036 }
1037 
1038 static char *parse_initiator_name(const char *target)
1039 {
1040     QemuOptsList *list;
1041     QemuOpts *opts;
1042     const char *name;
1043     char *iscsi_name;
1044     UuidInfo *uuid_info;
1045 
1046     list = qemu_find_opts("iscsi");
1047     if (list) {
1048         opts = qemu_opts_find(list, target);
1049         if (!opts) {
1050             opts = QTAILQ_FIRST(&list->head);
1051         }
1052         if (opts) {
1053             name = qemu_opt_get(opts, "initiator-name");
1054             if (name) {
1055                 return g_strdup(name);
1056             }
1057         }
1058     }
1059 
1060     uuid_info = qmp_query_uuid(NULL);
1061     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1062         name = qemu_get_vm_name();
1063     } else {
1064         name = uuid_info->UUID;
1065     }
1066     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1067                                  name ? ":" : "", name ? name : "");
1068     qapi_free_UuidInfo(uuid_info);
1069     return iscsi_name;
1070 }
1071 
1072 static void iscsi_nop_timed_event(void *opaque)
1073 {
1074     IscsiLun *iscsilun = opaque;
1075 
1076     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
1077         error_report("iSCSI: NOP timeout. Reconnecting...");
1078         iscsi_reconnect(iscsilun->iscsi);
1079     }
1080 
1081     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1082         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1083         return;
1084     }
1085 
1086     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1087     iscsi_set_events(iscsilun);
1088 }
1089 
1090 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1091 {
1092     struct scsi_task *task = NULL;
1093     struct scsi_readcapacity10 *rc10 = NULL;
1094     struct scsi_readcapacity16 *rc16 = NULL;
1095     int retries = ISCSI_CMD_RETRIES;
1096 
1097     do {
1098         if (task != NULL) {
1099             scsi_free_scsi_task(task);
1100             task = NULL;
1101         }
1102 
1103         switch (iscsilun->type) {
1104         case TYPE_DISK:
1105             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1106             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1107                 rc16 = scsi_datain_unmarshall(task);
1108                 if (rc16 == NULL) {
1109                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1110                 } else {
1111                     iscsilun->block_size = rc16->block_length;
1112                     iscsilun->num_blocks = rc16->returned_lba + 1;
1113                     iscsilun->lbpme = rc16->lbpme;
1114                     iscsilun->lbprz = rc16->lbprz;
1115                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1116                 }
1117             }
1118             break;
1119         case TYPE_ROM:
1120             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1121             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1122                 rc10 = scsi_datain_unmarshall(task);
1123                 if (rc10 == NULL) {
1124                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1125                 } else {
1126                     iscsilun->block_size = rc10->block_size;
1127                     if (rc10->lba == 0) {
1128                         /* blank disk loaded */
1129                         iscsilun->num_blocks = 0;
1130                     } else {
1131                         iscsilun->num_blocks = rc10->lba + 1;
1132                     }
1133                 }
1134             }
1135             break;
1136         default:
1137             return;
1138         }
1139     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1140              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1141              && retries-- > 0);
1142 
1143     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1144         error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
1145     }
1146     if (task) {
1147         scsi_free_scsi_task(task);
1148     }
1149 }
1150 
1151 /* TODO Convert to fine grained options */
1152 static QemuOptsList runtime_opts = {
1153     .name = "iscsi",
1154     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1155     .desc = {
1156         {
1157             .name = "filename",
1158             .type = QEMU_OPT_STRING,
1159             .help = "URL to the iscsi image",
1160         },
1161         { /* end of list */ }
1162     },
1163 };
1164 
1165 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1166                                           int evpd, int pc, void **inq, Error **errp)
1167 {
1168     int full_size;
1169     struct scsi_task *task = NULL;
1170     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1171     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1172         goto fail;
1173     }
1174     full_size = scsi_datain_getfullsize(task);
1175     if (full_size > task->datain.size) {
1176         scsi_free_scsi_task(task);
1177 
1178         /* we need more data for the full list */
1179         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1180         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1181             goto fail;
1182         }
1183     }
1184 
1185     *inq = scsi_datain_unmarshall(task);
1186     if (*inq == NULL) {
1187         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1188         goto fail_with_err;
1189     }
1190 
1191     return task;
1192 
1193 fail:
1194     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1195                iscsi_get_error(iscsi));
1196 fail_with_err:
1197     if (task != NULL) {
1198         scsi_free_scsi_task(task);
1199     }
1200     return NULL;
1201 }
1202 
1203 static void iscsi_detach_aio_context(BlockDriverState *bs)
1204 {
1205     IscsiLun *iscsilun = bs->opaque;
1206 
1207     aio_set_fd_handler(iscsilun->aio_context,
1208                        iscsi_get_fd(iscsilun->iscsi),
1209                        NULL, NULL, NULL);
1210     iscsilun->events = 0;
1211 
1212     if (iscsilun->nop_timer) {
1213         timer_del(iscsilun->nop_timer);
1214         timer_free(iscsilun->nop_timer);
1215         iscsilun->nop_timer = NULL;
1216     }
1217 }
1218 
1219 static void iscsi_attach_aio_context(BlockDriverState *bs,
1220                                      AioContext *new_context)
1221 {
1222     IscsiLun *iscsilun = bs->opaque;
1223 
1224     iscsilun->aio_context = new_context;
1225     iscsi_set_events(iscsilun);
1226 
1227     /* Set up a timer for sending out iSCSI NOPs */
1228     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1229                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1230                                         iscsi_nop_timed_event, iscsilun);
1231     timer_mod(iscsilun->nop_timer,
1232               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1233 }
1234 
1235 static bool iscsi_is_write_protected(IscsiLun *iscsilun)
1236 {
1237     struct scsi_task *task;
1238     struct scsi_mode_sense *ms = NULL;
1239     bool wrprotected = false;
1240 
1241     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1242                                  1, SCSI_MODESENSE_PC_CURRENT,
1243                                  0x3F, 0, 255);
1244     if (task == NULL) {
1245         error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1246                      iscsi_get_error(iscsilun->iscsi));
1247         goto out;
1248     }
1249 
1250     if (task->status != SCSI_STATUS_GOOD) {
1251         error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1252         goto out;
1253     }
1254     ms = scsi_datain_unmarshall(task);
1255     if (!ms) {
1256         error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1257                      iscsi_get_error(iscsilun->iscsi));
1258         goto out;
1259     }
1260     wrprotected = ms->device_specific_parameter & 0x80;
1261 
1262 out:
1263     if (task) {
1264         scsi_free_scsi_task(task);
1265     }
1266     return wrprotected;
1267 }
1268 
1269 /*
1270  * We support iscsi url's on the form
1271  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1272  */
1273 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1274                       Error **errp)
1275 {
1276     IscsiLun *iscsilun = bs->opaque;
1277     struct iscsi_context *iscsi = NULL;
1278     struct iscsi_url *iscsi_url = NULL;
1279     struct scsi_task *task = NULL;
1280     struct scsi_inquiry_standard *inq = NULL;
1281     struct scsi_inquiry_supported_pages *inq_vpd;
1282     char *initiator_name = NULL;
1283     QemuOpts *opts;
1284     Error *local_err = NULL;
1285     const char *filename;
1286     int i, ret = 0;
1287 
1288     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1289         error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
1290                    "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1291                    "of 512", BDRV_SECTOR_SIZE);
1292         return -EINVAL;
1293     }
1294 
1295     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1296     qemu_opts_absorb_qdict(opts, options, &local_err);
1297     if (local_err) {
1298         error_propagate(errp, local_err);
1299         ret = -EINVAL;
1300         goto out;
1301     }
1302 
1303     filename = qemu_opt_get(opts, "filename");
1304 
1305     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1306     if (iscsi_url == NULL) {
1307         error_setg(errp, "Failed to parse URL : %s", filename);
1308         ret = -EINVAL;
1309         goto out;
1310     }
1311 
1312     memset(iscsilun, 0, sizeof(IscsiLun));
1313 
1314     initiator_name = parse_initiator_name(iscsi_url->target);
1315 
1316     iscsi = iscsi_create_context(initiator_name);
1317     if (iscsi == NULL) {
1318         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1319         ret = -ENOMEM;
1320         goto out;
1321     }
1322 
1323     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1324         error_setg(errp, "iSCSI: Failed to set target name.");
1325         ret = -EINVAL;
1326         goto out;
1327     }
1328 
1329     if (iscsi_url->user[0] != '\0') {
1330         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1331                                               iscsi_url->passwd);
1332         if (ret != 0) {
1333             error_setg(errp, "Failed to set initiator username and password");
1334             ret = -EINVAL;
1335             goto out;
1336         }
1337     }
1338 
1339     /* check if we got CHAP username/password via the options */
1340     parse_chap(iscsi, iscsi_url->target, &local_err);
1341     if (local_err != NULL) {
1342         error_propagate(errp, local_err);
1343         ret = -EINVAL;
1344         goto out;
1345     }
1346 
1347     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1348         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1349         ret = -EINVAL;
1350         goto out;
1351     }
1352 
1353     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1354 
1355     /* check if we got HEADER_DIGEST via the options */
1356     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1357     if (local_err != NULL) {
1358         error_propagate(errp, local_err);
1359         ret = -EINVAL;
1360         goto out;
1361     }
1362 
1363     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1364         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1365             iscsi_get_error(iscsi));
1366         ret = -EINVAL;
1367         goto out;
1368     }
1369 
1370     iscsilun->iscsi = iscsi;
1371     iscsilun->aio_context = bdrv_get_aio_context(bs);
1372     iscsilun->lun   = iscsi_url->lun;
1373     iscsilun->has_write_same = true;
1374 
1375     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1376                             (void **) &inq, errp);
1377     if (task == NULL) {
1378         ret = -EINVAL;
1379         goto out;
1380     }
1381     iscsilun->type = inq->periperal_device_type;
1382     scsi_free_scsi_task(task);
1383     task = NULL;
1384 
1385     iscsilun->write_protected = iscsi_is_write_protected(iscsilun);
1386     /* Check the write protect flag of the LUN if we want to write */
1387     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1388         iscsilun->write_protected) {
1389         error_setg(errp, "Cannot open a write protected LUN as read-write");
1390         ret = -EACCES;
1391         goto out;
1392     }
1393 
1394     iscsi_readcapacity_sync(iscsilun, &local_err);
1395     if (local_err != NULL) {
1396         error_propagate(errp, local_err);
1397         ret = -EINVAL;
1398         goto out;
1399     }
1400     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1401     bs->request_alignment = iscsilun->block_size;
1402 
1403     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1404      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1405      * will try to read from the device to guess the image format.
1406      */
1407     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1408         bs->sg = 1;
1409     }
1410 
1411     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1412                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1413                             (void **) &inq_vpd, errp);
1414     if (task == NULL) {
1415         ret = -EINVAL;
1416         goto out;
1417     }
1418     for (i = 0; i < inq_vpd->num_pages; i++) {
1419         struct scsi_task *inq_task;
1420         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1421         struct scsi_inquiry_block_limits *inq_bl;
1422         switch (inq_vpd->pages[i]) {
1423         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1424             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1425                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1426                                         (void **) &inq_lbp, errp);
1427             if (inq_task == NULL) {
1428                 ret = -EINVAL;
1429                 goto out;
1430             }
1431             memcpy(&iscsilun->lbp, inq_lbp,
1432                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1433             scsi_free_scsi_task(inq_task);
1434             break;
1435         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1436             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1437                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1438                                     (void **) &inq_bl, errp);
1439             if (inq_task == NULL) {
1440                 ret = -EINVAL;
1441                 goto out;
1442             }
1443             memcpy(&iscsilun->bl, inq_bl,
1444                    sizeof(struct scsi_inquiry_block_limits));
1445             scsi_free_scsi_task(inq_task);
1446             break;
1447         default:
1448             break;
1449         }
1450     }
1451     scsi_free_scsi_task(task);
1452     task = NULL;
1453 
1454     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1455 
1456     /* Guess the internal cluster (page) size of the iscsi target by the means
1457      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1458      * reasonable size */
1459     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1460         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1461         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1462                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1463         if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
1464             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1465             if (iscsilun->allocationmap == NULL) {
1466                 ret = -ENOMEM;
1467             }
1468         }
1469     }
1470 
1471 out:
1472     qemu_opts_del(opts);
1473     g_free(initiator_name);
1474     if (iscsi_url != NULL) {
1475         iscsi_destroy_url(iscsi_url);
1476     }
1477     if (task != NULL) {
1478         scsi_free_scsi_task(task);
1479     }
1480 
1481     if (ret) {
1482         if (iscsi != NULL) {
1483             iscsi_destroy_context(iscsi);
1484         }
1485         memset(iscsilun, 0, sizeof(IscsiLun));
1486     }
1487     return ret;
1488 }
1489 
1490 static void iscsi_close(BlockDriverState *bs)
1491 {
1492     IscsiLun *iscsilun = bs->opaque;
1493     struct iscsi_context *iscsi = iscsilun->iscsi;
1494 
1495     iscsi_detach_aio_context(bs);
1496     iscsi_destroy_context(iscsi);
1497     g_free(iscsilun->zeroblock);
1498     g_free(iscsilun->allocationmap);
1499     memset(iscsilun, 0, sizeof(IscsiLun));
1500 }
1501 
1502 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
1503 {
1504     return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
1505 }
1506 
1507 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1508 {
1509     /* We don't actually refresh here, but just return data queried in
1510      * iscsi_open(): iscsi targets don't change their limits. */
1511 
1512     IscsiLun *iscsilun = bs->opaque;
1513     uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1514 
1515     if (iscsilun->bl.max_xfer_len) {
1516         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1517     }
1518 
1519     bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
1520 
1521     if (iscsilun->lbp.lbpu) {
1522         if (iscsilun->bl.max_unmap < 0xffffffff) {
1523             bs->bl.max_discard =
1524                 sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
1525         }
1526         bs->bl.discard_alignment =
1527             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1528     }
1529 
1530     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1531         bs->bl.max_write_zeroes =
1532             sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
1533     }
1534     if (iscsilun->lbp.lbpws) {
1535         bs->bl.write_zeroes_alignment =
1536             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1537     }
1538     bs->bl.opt_transfer_length =
1539         sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
1540 }
1541 
1542 /* Note that this will not re-establish a connection with an iSCSI target - it
1543  * is effectively a NOP.  */
1544 static int iscsi_reopen_prepare(BDRVReopenState *state,
1545                                 BlockReopenQueue *queue, Error **errp)
1546 {
1547     IscsiLun *iscsilun = state->bs->opaque;
1548 
1549     if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1550         error_setg(errp, "Cannot open a write protected LUN as read-write");
1551         return -EACCES;
1552     }
1553     return 0;
1554 }
1555 
1556 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1557 {
1558     IscsiLun *iscsilun = bs->opaque;
1559     Error *local_err = NULL;
1560 
1561     if (iscsilun->type != TYPE_DISK) {
1562         return -ENOTSUP;
1563     }
1564 
1565     iscsi_readcapacity_sync(iscsilun, &local_err);
1566     if (local_err != NULL) {
1567         error_free(local_err);
1568         return -EIO;
1569     }
1570 
1571     if (offset > iscsi_getlength(bs)) {
1572         return -EINVAL;
1573     }
1574 
1575     if (iscsilun->allocationmap != NULL) {
1576         g_free(iscsilun->allocationmap);
1577         iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1578     }
1579 
1580     return 0;
1581 }
1582 
1583 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1584 {
1585     int ret = 0;
1586     int64_t total_size = 0;
1587     BlockDriverState *bs;
1588     IscsiLun *iscsilun = NULL;
1589     QDict *bs_options;
1590 
1591     bs = bdrv_new();
1592 
1593     /* Read out options */
1594     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1595                               BDRV_SECTOR_SIZE);
1596     bs->opaque = g_new0(struct IscsiLun, 1);
1597     iscsilun = bs->opaque;
1598 
1599     bs_options = qdict_new();
1600     qdict_put(bs_options, "filename", qstring_from_str(filename));
1601     ret = iscsi_open(bs, bs_options, 0, NULL);
1602     QDECREF(bs_options);
1603 
1604     if (ret != 0) {
1605         goto out;
1606     }
1607     iscsi_detach_aio_context(bs);
1608     if (iscsilun->type != TYPE_DISK) {
1609         ret = -ENODEV;
1610         goto out;
1611     }
1612     if (bs->total_sectors < total_size) {
1613         ret = -ENOSPC;
1614         goto out;
1615     }
1616 
1617     ret = 0;
1618 out:
1619     if (iscsilun->iscsi != NULL) {
1620         iscsi_destroy_context(iscsilun->iscsi);
1621     }
1622     g_free(bs->opaque);
1623     bs->opaque = NULL;
1624     bdrv_unref(bs);
1625     return ret;
1626 }
1627 
1628 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1629 {
1630     IscsiLun *iscsilun = bs->opaque;
1631     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
1632     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1633     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1634     return 0;
1635 }
1636 
1637 static QemuOptsList iscsi_create_opts = {
1638     .name = "iscsi-create-opts",
1639     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1640     .desc = {
1641         {
1642             .name = BLOCK_OPT_SIZE,
1643             .type = QEMU_OPT_SIZE,
1644             .help = "Virtual disk size"
1645         },
1646         { /* end of list */ }
1647     }
1648 };
1649 
1650 static BlockDriver bdrv_iscsi = {
1651     .format_name     = "iscsi",
1652     .protocol_name   = "iscsi",
1653 
1654     .instance_size   = sizeof(IscsiLun),
1655     .bdrv_needs_filename = true,
1656     .bdrv_file_open  = iscsi_open,
1657     .bdrv_close      = iscsi_close,
1658     .bdrv_create     = iscsi_create,
1659     .create_opts     = &iscsi_create_opts,
1660     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1661 
1662     .bdrv_getlength  = iscsi_getlength,
1663     .bdrv_get_info   = iscsi_get_info,
1664     .bdrv_truncate   = iscsi_truncate,
1665     .bdrv_refresh_limits = iscsi_refresh_limits,
1666 
1667     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1668     .bdrv_co_discard      = iscsi_co_discard,
1669     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1670     .bdrv_co_readv         = iscsi_co_readv,
1671     .bdrv_co_writev        = iscsi_co_writev,
1672     .bdrv_co_flush_to_disk = iscsi_co_flush,
1673 
1674 #ifdef __linux__
1675     .bdrv_ioctl       = iscsi_ioctl,
1676     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1677 #endif
1678 
1679     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1680     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1681 };
1682 
1683 static QemuOptsList qemu_iscsi_opts = {
1684     .name = "iscsi",
1685     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1686     .desc = {
1687         {
1688             .name = "user",
1689             .type = QEMU_OPT_STRING,
1690             .help = "username for CHAP authentication to target",
1691         },{
1692             .name = "password",
1693             .type = QEMU_OPT_STRING,
1694             .help = "password for CHAP authentication to target",
1695         },{
1696             .name = "header-digest",
1697             .type = QEMU_OPT_STRING,
1698             .help = "HeaderDigest setting. "
1699                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1700         },{
1701             .name = "initiator-name",
1702             .type = QEMU_OPT_STRING,
1703             .help = "Initiator iqn name to use when connecting",
1704         },
1705         { /* end of list */ }
1706     },
1707 };
1708 
1709 static void iscsi_block_init(void)
1710 {
1711     bdrv_register(&bdrv_iscsi);
1712     qemu_add_opts(&qemu_iscsi_opts);
1713 }
1714 
1715 block_init(iscsi_block_init);
1716