xref: /openbmc/qemu/block/iscsi.c (revision b917da4c)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 #include "qapi/qmp/qstring.h"
42 #include "crypto/secret.h"
43 
44 #include <iscsi/iscsi.h>
45 #include <iscsi/scsi-lowlevel.h>
46 
47 #ifdef __linux__
48 #include <scsi/sg.h>
49 #include <block/scsi.h>
50 #endif
51 
52 typedef struct IscsiLun {
53     struct iscsi_context *iscsi;
54     AioContext *aio_context;
55     int lun;
56     enum scsi_inquiry_peripheral_device_type type;
57     int block_size;
58     uint64_t num_blocks;
59     int events;
60     QEMUTimer *nop_timer;
61     QEMUTimer *event_timer;
62     struct scsi_inquiry_logical_block_provisioning lbp;
63     struct scsi_inquiry_block_limits bl;
64     unsigned char *zeroblock;
65     unsigned long *allocationmap;
66     int cluster_sectors;
67     bool use_16_for_rw;
68     bool write_protected;
69     bool lbpme;
70     bool lbprz;
71     bool dpofua;
72     bool has_write_same;
73     bool force_next_flush;
74     bool request_timed_out;
75 } IscsiLun;
76 
77 typedef struct IscsiTask {
78     int status;
79     int complete;
80     int retries;
81     int do_retry;
82     struct scsi_task *task;
83     Coroutine *co;
84     QEMUBH *bh;
85     IscsiLun *iscsilun;
86     QEMUTimer retry_timer;
87     bool force_next_flush;
88     int err_code;
89 } IscsiTask;
90 
91 typedef struct IscsiAIOCB {
92     BlockAIOCB common;
93     QEMUIOVector *qiov;
94     QEMUBH *bh;
95     IscsiLun *iscsilun;
96     struct scsi_task *task;
97     uint8_t *buf;
98     int status;
99     int64_t sector_num;
100     int nb_sectors;
101     int ret;
102 #ifdef __linux__
103     sg_io_hdr_t *ioh;
104 #endif
105 } IscsiAIOCB;
106 
107 /* libiscsi uses time_t so its enough to process events every second */
108 #define EVENT_INTERVAL 1000
109 #define NOP_INTERVAL 5000
110 #define MAX_NOP_FAILURES 3
111 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
112 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
113 
114 /* this threshold is a trade-off knob to choose between
115  * the potential additional overhead of an extra GET_LBA_STATUS request
116  * vs. unnecessarily reading a lot of zero sectors over the wire.
117  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
118  * sectors we check the allocation status of the area covered by the
119  * request first if the allocationmap indicates that the area might be
120  * unallocated. */
121 #define ISCSI_CHECKALLOC_THRES 64
122 
123 static void
124 iscsi_bh_cb(void *p)
125 {
126     IscsiAIOCB *acb = p;
127 
128     qemu_bh_delete(acb->bh);
129 
130     g_free(acb->buf);
131     acb->buf = NULL;
132 
133     acb->common.cb(acb->common.opaque, acb->status);
134 
135     if (acb->task != NULL) {
136         scsi_free_scsi_task(acb->task);
137         acb->task = NULL;
138     }
139 
140     qemu_aio_unref(acb);
141 }
142 
143 static void
144 iscsi_schedule_bh(IscsiAIOCB *acb)
145 {
146     if (acb->bh) {
147         return;
148     }
149     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
150     qemu_bh_schedule(acb->bh);
151 }
152 
153 static void iscsi_co_generic_bh_cb(void *opaque)
154 {
155     struct IscsiTask *iTask = opaque;
156     iTask->complete = 1;
157     qemu_bh_delete(iTask->bh);
158     qemu_coroutine_enter(iTask->co, NULL);
159 }
160 
161 static void iscsi_retry_timer_expired(void *opaque)
162 {
163     struct IscsiTask *iTask = opaque;
164     iTask->complete = 1;
165     if (iTask->co) {
166         qemu_coroutine_enter(iTask->co, NULL);
167     }
168 }
169 
170 static inline unsigned exp_random(double mean)
171 {
172     return -mean * log((double)rand() / RAND_MAX);
173 }
174 
175 /* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
176  * libiscsi 1.10.0, together with other constants we need.  Use it as
177  * a hint that we have to define them ourselves if needed, to keep the
178  * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
179  * the test because SCSI_STATUS_* is an enum.
180  *
181  * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
182  * an enum, check against the LIBISCSI_API_VERSION macro, which was
183  * introduced in 1.11.0.  If it is present, there is no need to define
184  * anything.
185  */
186 #if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
187     !defined(LIBISCSI_API_VERSION)
188 #define SCSI_STATUS_TASK_SET_FULL                          0x28
189 #define SCSI_STATUS_TIMEOUT                                0x0f000002
190 #define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
191 #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
192 #endif
193 
194 static int iscsi_translate_sense(struct scsi_sense *sense)
195 {
196     int ret;
197 
198     switch (sense->key) {
199     case SCSI_SENSE_NOT_READY:
200         return -EBUSY;
201     case SCSI_SENSE_DATA_PROTECTION:
202         return -EACCES;
203     case SCSI_SENSE_COMMAND_ABORTED:
204         return -ECANCELED;
205     case SCSI_SENSE_ILLEGAL_REQUEST:
206         /* Parse ASCQ */
207         break;
208     default:
209         return -EIO;
210     }
211     switch (sense->ascq) {
212     case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
213     case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
214     case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
215     case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
216         ret = -EINVAL;
217         break;
218     case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
219         ret = -ENOSPC;
220         break;
221     case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
222         ret = -ENOTSUP;
223         break;
224     case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
225     case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
226     case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
227         ret = -ENOMEDIUM;
228         break;
229     case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
230         ret = -EACCES;
231         break;
232     default:
233         ret = -EIO;
234         break;
235     }
236     return ret;
237 }
238 
239 static void
240 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
241                         void *command_data, void *opaque)
242 {
243     struct IscsiTask *iTask = opaque;
244     struct scsi_task *task = command_data;
245 
246     iTask->status = status;
247     iTask->do_retry = 0;
248     iTask->task = task;
249 
250     if (status != SCSI_STATUS_GOOD) {
251         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
252             if (status == SCSI_STATUS_CHECK_CONDITION
253                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
254                 error_report("iSCSI CheckCondition: %s",
255                              iscsi_get_error(iscsi));
256                 iTask->do_retry = 1;
257                 goto out;
258             }
259             if (status == SCSI_STATUS_BUSY ||
260                 status == SCSI_STATUS_TIMEOUT ||
261                 status == SCSI_STATUS_TASK_SET_FULL) {
262                 unsigned retry_time =
263                     exp_random(iscsi_retry_times[iTask->retries - 1]);
264                 if (status == SCSI_STATUS_TIMEOUT) {
265                     /* make sure the request is rescheduled AFTER the
266                      * reconnect is initiated */
267                     retry_time = EVENT_INTERVAL * 2;
268                     iTask->iscsilun->request_timed_out = true;
269                 }
270                 error_report("iSCSI Busy/TaskSetFull/TimeOut"
271                              " (retry #%u in %u ms): %s",
272                              iTask->retries, retry_time,
273                              iscsi_get_error(iscsi));
274                 aio_timer_init(iTask->iscsilun->aio_context,
275                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
276                                SCALE_MS, iscsi_retry_timer_expired, iTask);
277                 timer_mod(&iTask->retry_timer,
278                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
279                 iTask->do_retry = 1;
280                 return;
281             }
282         }
283         iTask->err_code = iscsi_translate_sense(&task->sense);
284         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
285     } else {
286         iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
287     }
288 
289 out:
290     if (iTask->co) {
291         iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
292                                iscsi_co_generic_bh_cb, iTask);
293         qemu_bh_schedule(iTask->bh);
294     } else {
295         iTask->complete = 1;
296     }
297 }
298 
299 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
300 {
301     *iTask = (struct IscsiTask) {
302         .co         = qemu_coroutine_self(),
303         .iscsilun   = iscsilun,
304     };
305 }
306 
307 static void
308 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
309                     void *private_data)
310 {
311     IscsiAIOCB *acb = private_data;
312 
313     acb->status = -ECANCELED;
314     iscsi_schedule_bh(acb);
315 }
316 
317 static void
318 iscsi_aio_cancel(BlockAIOCB *blockacb)
319 {
320     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
321     IscsiLun *iscsilun = acb->iscsilun;
322 
323     if (acb->status != -EINPROGRESS) {
324         return;
325     }
326 
327     /* send a task mgmt call to the target to cancel the task on the target */
328     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
329                                      iscsi_abort_task_cb, acb);
330 
331 }
332 
333 static const AIOCBInfo iscsi_aiocb_info = {
334     .aiocb_size         = sizeof(IscsiAIOCB),
335     .cancel_async       = iscsi_aio_cancel,
336 };
337 
338 
339 static void iscsi_process_read(void *arg);
340 static void iscsi_process_write(void *arg);
341 
342 static void
343 iscsi_set_events(IscsiLun *iscsilun)
344 {
345     struct iscsi_context *iscsi = iscsilun->iscsi;
346     int ev = iscsi_which_events(iscsi);
347 
348     if (ev != iscsilun->events) {
349         aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
350                            false,
351                            (ev & POLLIN) ? iscsi_process_read : NULL,
352                            (ev & POLLOUT) ? iscsi_process_write : NULL,
353                            iscsilun);
354         iscsilun->events = ev;
355     }
356 }
357 
358 static void iscsi_timed_check_events(void *opaque)
359 {
360     IscsiLun *iscsilun = opaque;
361 
362     /* check for timed out requests */
363     iscsi_service(iscsilun->iscsi, 0);
364 
365     if (iscsilun->request_timed_out) {
366         iscsilun->request_timed_out = false;
367         iscsi_reconnect(iscsilun->iscsi);
368     }
369 
370     /* newer versions of libiscsi may return zero events. Ensure we are able
371      * to return to service once this situation changes. */
372     iscsi_set_events(iscsilun);
373 
374     timer_mod(iscsilun->event_timer,
375               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
376 }
377 
378 static void
379 iscsi_process_read(void *arg)
380 {
381     IscsiLun *iscsilun = arg;
382     struct iscsi_context *iscsi = iscsilun->iscsi;
383 
384     iscsi_service(iscsi, POLLIN);
385     iscsi_set_events(iscsilun);
386 }
387 
388 static void
389 iscsi_process_write(void *arg)
390 {
391     IscsiLun *iscsilun = arg;
392     struct iscsi_context *iscsi = iscsilun->iscsi;
393 
394     iscsi_service(iscsi, POLLOUT);
395     iscsi_set_events(iscsilun);
396 }
397 
398 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
399 {
400     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
401 }
402 
403 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
404 {
405     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
406 }
407 
408 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
409                                       IscsiLun *iscsilun)
410 {
411     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
412         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
413             error_report("iSCSI misaligned request: "
414                          "iscsilun->block_size %u, sector_num %" PRIi64
415                          ", nb_sectors %d",
416                          iscsilun->block_size, sector_num, nb_sectors);
417             return 0;
418     }
419     return 1;
420 }
421 
422 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
423 {
424     return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
425                                                        iscsilun),
426                                        iscsilun->cluster_sectors));
427 }
428 
429 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
430                                     int nb_sectors)
431 {
432     if (iscsilun->allocationmap == NULL) {
433         return;
434     }
435     bitmap_set(iscsilun->allocationmap,
436                sector_num / iscsilun->cluster_sectors,
437                DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
438 }
439 
440 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
441                                       int nb_sectors)
442 {
443     int64_t cluster_num, nb_clusters;
444     if (iscsilun->allocationmap == NULL) {
445         return;
446     }
447     cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
448     nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
449                   - cluster_num;
450     if (nb_clusters > 0) {
451         bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
452     }
453 }
454 
455 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
456                                         int64_t sector_num, int nb_sectors,
457                                         QEMUIOVector *iov)
458 {
459     IscsiLun *iscsilun = bs->opaque;
460     struct IscsiTask iTask;
461     uint64_t lba;
462     uint32_t num_sectors;
463     int fua;
464 
465     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
466         return -EINVAL;
467     }
468 
469     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
470         error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
471                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
472         return -EINVAL;
473     }
474 
475     lba = sector_qemu2lun(sector_num, iscsilun);
476     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
477     iscsi_co_init_iscsitask(iscsilun, &iTask);
478 retry:
479     fua = iscsilun->dpofua && !bs->enable_write_cache;
480     iTask.force_next_flush = !fua;
481     if (iscsilun->use_16_for_rw) {
482         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
483                                         NULL, num_sectors * iscsilun->block_size,
484                                         iscsilun->block_size, 0, 0, fua, 0, 0,
485                                         iscsi_co_generic_cb, &iTask);
486     } else {
487         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
488                                         NULL, num_sectors * iscsilun->block_size,
489                                         iscsilun->block_size, 0, 0, fua, 0, 0,
490                                         iscsi_co_generic_cb, &iTask);
491     }
492     if (iTask.task == NULL) {
493         return -ENOMEM;
494     }
495     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
496                           iov->niov);
497     while (!iTask.complete) {
498         iscsi_set_events(iscsilun);
499         qemu_coroutine_yield();
500     }
501 
502     if (iTask.task != NULL) {
503         scsi_free_scsi_task(iTask.task);
504         iTask.task = NULL;
505     }
506 
507     if (iTask.do_retry) {
508         iTask.complete = 0;
509         goto retry;
510     }
511 
512     if (iTask.status != SCSI_STATUS_GOOD) {
513         return iTask.err_code;
514     }
515 
516     iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
517 
518     return 0;
519 }
520 
521 
522 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
523                                              int64_t sector_num, int nb_sectors)
524 {
525     unsigned long size;
526     if (iscsilun->allocationmap == NULL) {
527         return true;
528     }
529     size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
530     return !(find_next_bit(iscsilun->allocationmap, size,
531                            sector_num / iscsilun->cluster_sectors) == size);
532 }
533 
534 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
535                                                   int64_t sector_num,
536                                                   int nb_sectors, int *pnum,
537                                                   BlockDriverState **file)
538 {
539     IscsiLun *iscsilun = bs->opaque;
540     struct scsi_get_lba_status *lbas = NULL;
541     struct scsi_lba_status_descriptor *lbasd = NULL;
542     struct IscsiTask iTask;
543     int64_t ret;
544 
545     iscsi_co_init_iscsitask(iscsilun, &iTask);
546 
547     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
548         ret = -EINVAL;
549         goto out;
550     }
551 
552     /* default to all sectors allocated */
553     ret = BDRV_BLOCK_DATA;
554     ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
555     *pnum = nb_sectors;
556 
557     /* LUN does not support logical block provisioning */
558     if (!iscsilun->lbpme) {
559         goto out;
560     }
561 
562 retry:
563     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
564                                   sector_qemu2lun(sector_num, iscsilun),
565                                   8 + 16, iscsi_co_generic_cb,
566                                   &iTask) == NULL) {
567         ret = -ENOMEM;
568         goto out;
569     }
570 
571     while (!iTask.complete) {
572         iscsi_set_events(iscsilun);
573         qemu_coroutine_yield();
574     }
575 
576     if (iTask.do_retry) {
577         if (iTask.task != NULL) {
578             scsi_free_scsi_task(iTask.task);
579             iTask.task = NULL;
580         }
581         iTask.complete = 0;
582         goto retry;
583     }
584 
585     if (iTask.status != SCSI_STATUS_GOOD) {
586         /* in case the get_lba_status_callout fails (i.e.
587          * because the device is busy or the cmd is not
588          * supported) we pretend all blocks are allocated
589          * for backwards compatibility */
590         goto out;
591     }
592 
593     lbas = scsi_datain_unmarshall(iTask.task);
594     if (lbas == NULL) {
595         ret = -EIO;
596         goto out;
597     }
598 
599     lbasd = &lbas->descriptors[0];
600 
601     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
602         ret = -EIO;
603         goto out;
604     }
605 
606     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
607 
608     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
609         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
610         ret &= ~BDRV_BLOCK_DATA;
611         if (iscsilun->lbprz) {
612             ret |= BDRV_BLOCK_ZERO;
613         }
614     }
615 
616     if (ret & BDRV_BLOCK_ZERO) {
617         iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
618     } else {
619         iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
620     }
621 
622     if (*pnum > nb_sectors) {
623         *pnum = nb_sectors;
624     }
625 out:
626     if (iTask.task != NULL) {
627         scsi_free_scsi_task(iTask.task);
628     }
629     if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
630         *file = bs;
631     }
632     return ret;
633 }
634 
635 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
636                                        int64_t sector_num, int nb_sectors,
637                                        QEMUIOVector *iov)
638 {
639     IscsiLun *iscsilun = bs->opaque;
640     struct IscsiTask iTask;
641     uint64_t lba;
642     uint32_t num_sectors;
643 
644     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
645         return -EINVAL;
646     }
647 
648     if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
649         error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
650                      "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
651         return -EINVAL;
652     }
653 
654     if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
655         !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
656         int64_t ret;
657         int pnum;
658         BlockDriverState *file;
659         ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum, &file);
660         if (ret < 0) {
661             return ret;
662         }
663         if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
664             qemu_iovec_memset(iov, 0, 0x00, iov->size);
665             return 0;
666         }
667     }
668 
669     lba = sector_qemu2lun(sector_num, iscsilun);
670     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
671 
672     iscsi_co_init_iscsitask(iscsilun, &iTask);
673 retry:
674     if (iscsilun->use_16_for_rw) {
675         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
676                                        num_sectors * iscsilun->block_size,
677                                        iscsilun->block_size, 0, 0, 0, 0, 0,
678                                        iscsi_co_generic_cb, &iTask);
679     } else {
680         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
681                                        num_sectors * iscsilun->block_size,
682                                        iscsilun->block_size,
683                                        0, 0, 0, 0, 0,
684                                        iscsi_co_generic_cb, &iTask);
685     }
686     if (iTask.task == NULL) {
687         return -ENOMEM;
688     }
689     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
690 
691     while (!iTask.complete) {
692         iscsi_set_events(iscsilun);
693         qemu_coroutine_yield();
694     }
695 
696     if (iTask.task != NULL) {
697         scsi_free_scsi_task(iTask.task);
698         iTask.task = NULL;
699     }
700 
701     if (iTask.do_retry) {
702         iTask.complete = 0;
703         goto retry;
704     }
705 
706     if (iTask.status != SCSI_STATUS_GOOD) {
707         return iTask.err_code;
708     }
709 
710     return 0;
711 }
712 
713 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
714 {
715     IscsiLun *iscsilun = bs->opaque;
716     struct IscsiTask iTask;
717 
718     if (!iscsilun->force_next_flush) {
719         return 0;
720     }
721     iscsilun->force_next_flush = false;
722 
723     iscsi_co_init_iscsitask(iscsilun, &iTask);
724 retry:
725     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
726                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
727         return -ENOMEM;
728     }
729 
730     while (!iTask.complete) {
731         iscsi_set_events(iscsilun);
732         qemu_coroutine_yield();
733     }
734 
735     if (iTask.task != NULL) {
736         scsi_free_scsi_task(iTask.task);
737         iTask.task = NULL;
738     }
739 
740     if (iTask.do_retry) {
741         iTask.complete = 0;
742         goto retry;
743     }
744 
745     if (iTask.status != SCSI_STATUS_GOOD) {
746         return iTask.err_code;
747     }
748 
749     return 0;
750 }
751 
752 #ifdef __linux__
753 static void
754 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
755                      void *command_data, void *opaque)
756 {
757     IscsiAIOCB *acb = opaque;
758 
759     g_free(acb->buf);
760     acb->buf = NULL;
761 
762     acb->status = 0;
763     if (status < 0) {
764         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
765                      iscsi_get_error(iscsi));
766         acb->status = iscsi_translate_sense(&acb->task->sense);
767     }
768 
769     acb->ioh->driver_status = 0;
770     acb->ioh->host_status   = 0;
771     acb->ioh->resid         = 0;
772 
773 #define SG_ERR_DRIVER_SENSE    0x08
774 
775     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
776         int ss;
777 
778         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
779 
780         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
781         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
782              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
783         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
784     }
785 
786     iscsi_schedule_bh(acb);
787 }
788 
789 static void iscsi_ioctl_bh_completion(void *opaque)
790 {
791     IscsiAIOCB *acb = opaque;
792 
793     qemu_bh_delete(acb->bh);
794     acb->common.cb(acb->common.opaque, acb->ret);
795     qemu_aio_unref(acb);
796 }
797 
798 static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
799 {
800     BlockDriverState *bs = acb->common.bs;
801     IscsiLun *iscsilun = bs->opaque;
802     int ret = 0;
803 
804     switch (req) {
805     case SG_GET_VERSION_NUM:
806         *(int *)buf = 30000;
807         break;
808     case SG_GET_SCSI_ID:
809         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
810         break;
811     default:
812         ret = -EINVAL;
813     }
814     assert(!acb->bh);
815     acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
816                          iscsi_ioctl_bh_completion, acb);
817     acb->ret = ret;
818     qemu_bh_schedule(acb->bh);
819 }
820 
821 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
822         unsigned long int req, void *buf,
823         BlockCompletionFunc *cb, void *opaque)
824 {
825     IscsiLun *iscsilun = bs->opaque;
826     struct iscsi_context *iscsi = iscsilun->iscsi;
827     struct iscsi_data data;
828     IscsiAIOCB *acb;
829 
830     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
831 
832     acb->iscsilun = iscsilun;
833     acb->bh          = NULL;
834     acb->status      = -EINPROGRESS;
835     acb->buf         = NULL;
836     acb->ioh         = buf;
837 
838     if (req != SG_IO) {
839         iscsi_ioctl_handle_emulated(acb, req, buf);
840         return &acb->common;
841     }
842 
843     acb->task = malloc(sizeof(struct scsi_task));
844     if (acb->task == NULL) {
845         error_report("iSCSI: Failed to allocate task for scsi command. %s",
846                      iscsi_get_error(iscsi));
847         qemu_aio_unref(acb);
848         return NULL;
849     }
850     memset(acb->task, 0, sizeof(struct scsi_task));
851 
852     switch (acb->ioh->dxfer_direction) {
853     case SG_DXFER_TO_DEV:
854         acb->task->xfer_dir = SCSI_XFER_WRITE;
855         break;
856     case SG_DXFER_FROM_DEV:
857         acb->task->xfer_dir = SCSI_XFER_READ;
858         break;
859     default:
860         acb->task->xfer_dir = SCSI_XFER_NONE;
861         break;
862     }
863 
864     acb->task->cdb_size = acb->ioh->cmd_len;
865     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
866     acb->task->expxferlen = acb->ioh->dxfer_len;
867 
868     data.size = 0;
869     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
870         if (acb->ioh->iovec_count == 0) {
871             data.data = acb->ioh->dxferp;
872             data.size = acb->ioh->dxfer_len;
873         } else {
874             scsi_task_set_iov_out(acb->task,
875                                  (struct scsi_iovec *) acb->ioh->dxferp,
876                                  acb->ioh->iovec_count);
877         }
878     }
879 
880     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
881                                  iscsi_aio_ioctl_cb,
882                                  (data.size > 0) ? &data : NULL,
883                                  acb) != 0) {
884         scsi_free_scsi_task(acb->task);
885         qemu_aio_unref(acb);
886         return NULL;
887     }
888 
889     /* tell libiscsi to read straight into the buffer we got from ioctl */
890     if (acb->task->xfer_dir == SCSI_XFER_READ) {
891         if (acb->ioh->iovec_count == 0) {
892             scsi_task_add_data_in_buffer(acb->task,
893                                          acb->ioh->dxfer_len,
894                                          acb->ioh->dxferp);
895         } else {
896             scsi_task_set_iov_in(acb->task,
897                                  (struct scsi_iovec *) acb->ioh->dxferp,
898                                  acb->ioh->iovec_count);
899         }
900     }
901 
902     iscsi_set_events(iscsilun);
903 
904     return &acb->common;
905 }
906 
907 #endif
908 
909 static int64_t
910 iscsi_getlength(BlockDriverState *bs)
911 {
912     IscsiLun *iscsilun = bs->opaque;
913     int64_t len;
914 
915     len  = iscsilun->num_blocks;
916     len *= iscsilun->block_size;
917 
918     return len;
919 }
920 
921 static int
922 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
923                                    int nb_sectors)
924 {
925     IscsiLun *iscsilun = bs->opaque;
926     struct IscsiTask iTask;
927     struct unmap_list list;
928 
929     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
930         return -EINVAL;
931     }
932 
933     if (!iscsilun->lbp.lbpu) {
934         /* UNMAP is not supported by the target */
935         return 0;
936     }
937 
938     list.lba = sector_qemu2lun(sector_num, iscsilun);
939     list.num = sector_qemu2lun(nb_sectors, iscsilun);
940 
941     iscsi_co_init_iscsitask(iscsilun, &iTask);
942 retry:
943     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
944                      iscsi_co_generic_cb, &iTask) == NULL) {
945         return -ENOMEM;
946     }
947 
948     while (!iTask.complete) {
949         iscsi_set_events(iscsilun);
950         qemu_coroutine_yield();
951     }
952 
953     if (iTask.task != NULL) {
954         scsi_free_scsi_task(iTask.task);
955         iTask.task = NULL;
956     }
957 
958     if (iTask.do_retry) {
959         iTask.complete = 0;
960         goto retry;
961     }
962 
963     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
964         /* the target might fail with a check condition if it
965            is not happy with the alignment of the UNMAP request
966            we silently fail in this case */
967         return 0;
968     }
969 
970     if (iTask.status != SCSI_STATUS_GOOD) {
971         return iTask.err_code;
972     }
973 
974     iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
975 
976     return 0;
977 }
978 
979 static int
980 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
981                                    int nb_sectors, BdrvRequestFlags flags)
982 {
983     IscsiLun *iscsilun = bs->opaque;
984     struct IscsiTask iTask;
985     uint64_t lba;
986     uint32_t nb_blocks;
987     bool use_16_for_ws = iscsilun->use_16_for_rw;
988 
989     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
990         return -EINVAL;
991     }
992 
993     if (flags & BDRV_REQ_MAY_UNMAP) {
994         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
995             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
996             use_16_for_ws = true;
997         }
998         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
999             /* WRITESAME16 with UNMAP is not supported by the target,
1000              * fall back and try WRITESAME10/16 without UNMAP */
1001             flags &= ~BDRV_REQ_MAY_UNMAP;
1002             use_16_for_ws = iscsilun->use_16_for_rw;
1003         }
1004     }
1005 
1006     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1007         /* WRITESAME without UNMAP is not supported by the target */
1008         return -ENOTSUP;
1009     }
1010 
1011     lba = sector_qemu2lun(sector_num, iscsilun);
1012     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
1013 
1014     if (iscsilun->zeroblock == NULL) {
1015         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1016         if (iscsilun->zeroblock == NULL) {
1017             return -ENOMEM;
1018         }
1019     }
1020 
1021     iscsi_co_init_iscsitask(iscsilun, &iTask);
1022     iTask.force_next_flush = true;
1023 retry:
1024     if (use_16_for_ws) {
1025         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1026                                             iscsilun->zeroblock, iscsilun->block_size,
1027                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1028                                             0, 0, iscsi_co_generic_cb, &iTask);
1029     } else {
1030         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1031                                             iscsilun->zeroblock, iscsilun->block_size,
1032                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1033                                             0, 0, iscsi_co_generic_cb, &iTask);
1034     }
1035     if (iTask.task == NULL) {
1036         return -ENOMEM;
1037     }
1038 
1039     while (!iTask.complete) {
1040         iscsi_set_events(iscsilun);
1041         qemu_coroutine_yield();
1042     }
1043 
1044     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1045         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1046         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1047          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1048         /* WRITE SAME is not supported by the target */
1049         iscsilun->has_write_same = false;
1050         scsi_free_scsi_task(iTask.task);
1051         return -ENOTSUP;
1052     }
1053 
1054     if (iTask.task != NULL) {
1055         scsi_free_scsi_task(iTask.task);
1056         iTask.task = NULL;
1057     }
1058 
1059     if (iTask.do_retry) {
1060         iTask.complete = 0;
1061         goto retry;
1062     }
1063 
1064     if (iTask.status != SCSI_STATUS_GOOD) {
1065         return iTask.err_code;
1066     }
1067 
1068     if (flags & BDRV_REQ_MAY_UNMAP) {
1069         iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
1070     } else {
1071         iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
1072     }
1073 
1074     return 0;
1075 }
1076 
1077 static void parse_chap(struct iscsi_context *iscsi, const char *target,
1078                        Error **errp)
1079 {
1080     QemuOptsList *list;
1081     QemuOpts *opts;
1082     const char *user = NULL;
1083     const char *password = NULL;
1084     const char *secretid;
1085     char *secret = NULL;
1086 
1087     list = qemu_find_opts("iscsi");
1088     if (!list) {
1089         return;
1090     }
1091 
1092     opts = qemu_opts_find(list, target);
1093     if (opts == NULL) {
1094         opts = QTAILQ_FIRST(&list->head);
1095         if (!opts) {
1096             return;
1097         }
1098     }
1099 
1100     user = qemu_opt_get(opts, "user");
1101     if (!user) {
1102         return;
1103     }
1104 
1105     secretid = qemu_opt_get(opts, "password-secret");
1106     password = qemu_opt_get(opts, "password");
1107     if (secretid && password) {
1108         error_setg(errp, "'password' and 'password-secret' properties are "
1109                    "mutually exclusive");
1110         return;
1111     }
1112     if (secretid) {
1113         secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1114         if (!secret) {
1115             return;
1116         }
1117         password = secret;
1118     } else if (!password) {
1119         error_setg(errp, "CHAP username specified but no password was given");
1120         return;
1121     }
1122 
1123     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1124         error_setg(errp, "Failed to set initiator username and password");
1125     }
1126 
1127     g_free(secret);
1128 }
1129 
1130 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1131                                 Error **errp)
1132 {
1133     QemuOptsList *list;
1134     QemuOpts *opts;
1135     const char *digest = NULL;
1136 
1137     list = qemu_find_opts("iscsi");
1138     if (!list) {
1139         return;
1140     }
1141 
1142     opts = qemu_opts_find(list, target);
1143     if (opts == NULL) {
1144         opts = QTAILQ_FIRST(&list->head);
1145         if (!opts) {
1146             return;
1147         }
1148     }
1149 
1150     digest = qemu_opt_get(opts, "header-digest");
1151     if (!digest) {
1152         return;
1153     }
1154 
1155     if (!strcmp(digest, "CRC32C")) {
1156         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1157     } else if (!strcmp(digest, "NONE")) {
1158         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1159     } else if (!strcmp(digest, "CRC32C-NONE")) {
1160         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1161     } else if (!strcmp(digest, "NONE-CRC32C")) {
1162         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1163     } else {
1164         error_setg(errp, "Invalid header-digest setting : %s", digest);
1165     }
1166 }
1167 
1168 static char *parse_initiator_name(const char *target)
1169 {
1170     QemuOptsList *list;
1171     QemuOpts *opts;
1172     const char *name;
1173     char *iscsi_name;
1174     UuidInfo *uuid_info;
1175 
1176     list = qemu_find_opts("iscsi");
1177     if (list) {
1178         opts = qemu_opts_find(list, target);
1179         if (!opts) {
1180             opts = QTAILQ_FIRST(&list->head);
1181         }
1182         if (opts) {
1183             name = qemu_opt_get(opts, "initiator-name");
1184             if (name) {
1185                 return g_strdup(name);
1186             }
1187         }
1188     }
1189 
1190     uuid_info = qmp_query_uuid(NULL);
1191     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1192         name = qemu_get_vm_name();
1193     } else {
1194         name = uuid_info->UUID;
1195     }
1196     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1197                                  name ? ":" : "", name ? name : "");
1198     qapi_free_UuidInfo(uuid_info);
1199     return iscsi_name;
1200 }
1201 
1202 static int parse_timeout(const char *target)
1203 {
1204     QemuOptsList *list;
1205     QemuOpts *opts;
1206     const char *timeout;
1207 
1208     list = qemu_find_opts("iscsi");
1209     if (list) {
1210         opts = qemu_opts_find(list, target);
1211         if (!opts) {
1212             opts = QTAILQ_FIRST(&list->head);
1213         }
1214         if (opts) {
1215             timeout = qemu_opt_get(opts, "timeout");
1216             if (timeout) {
1217                 return atoi(timeout);
1218             }
1219         }
1220     }
1221 
1222     return 0;
1223 }
1224 
1225 static void iscsi_nop_timed_event(void *opaque)
1226 {
1227     IscsiLun *iscsilun = opaque;
1228 
1229     if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1230         error_report("iSCSI: NOP timeout. Reconnecting...");
1231         iscsilun->request_timed_out = true;
1232     } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1233         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1234         return;
1235     }
1236 
1237     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1238     iscsi_set_events(iscsilun);
1239 }
1240 
1241 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1242 {
1243     struct scsi_task *task = NULL;
1244     struct scsi_readcapacity10 *rc10 = NULL;
1245     struct scsi_readcapacity16 *rc16 = NULL;
1246     int retries = ISCSI_CMD_RETRIES;
1247 
1248     do {
1249         if (task != NULL) {
1250             scsi_free_scsi_task(task);
1251             task = NULL;
1252         }
1253 
1254         switch (iscsilun->type) {
1255         case TYPE_DISK:
1256             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1257             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1258                 rc16 = scsi_datain_unmarshall(task);
1259                 if (rc16 == NULL) {
1260                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1261                 } else {
1262                     iscsilun->block_size = rc16->block_length;
1263                     iscsilun->num_blocks = rc16->returned_lba + 1;
1264                     iscsilun->lbpme = !!rc16->lbpme;
1265                     iscsilun->lbprz = !!rc16->lbprz;
1266                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1267                 }
1268                 break;
1269             }
1270             if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1271                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1272                 break;
1273             }
1274             /* Fall through and try READ CAPACITY(10) instead.  */
1275         case TYPE_ROM:
1276             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1277             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1278                 rc10 = scsi_datain_unmarshall(task);
1279                 if (rc10 == NULL) {
1280                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1281                 } else {
1282                     iscsilun->block_size = rc10->block_size;
1283                     if (rc10->lba == 0) {
1284                         /* blank disk loaded */
1285                         iscsilun->num_blocks = 0;
1286                     } else {
1287                         iscsilun->num_blocks = rc10->lba + 1;
1288                     }
1289                 }
1290             }
1291             break;
1292         default:
1293             return;
1294         }
1295     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1296              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1297              && retries-- > 0);
1298 
1299     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1300         error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1301     } else if (!iscsilun->block_size ||
1302                iscsilun->block_size % BDRV_SECTOR_SIZE) {
1303         error_setg(errp, "iSCSI: the target returned an invalid "
1304                    "block size of %d.", iscsilun->block_size);
1305     }
1306     if (task) {
1307         scsi_free_scsi_task(task);
1308     }
1309 }
1310 
1311 /* TODO Convert to fine grained options */
1312 static QemuOptsList runtime_opts = {
1313     .name = "iscsi",
1314     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1315     .desc = {
1316         {
1317             .name = "filename",
1318             .type = QEMU_OPT_STRING,
1319             .help = "URL to the iscsi image",
1320         },
1321         { /* end of list */ }
1322     },
1323 };
1324 
1325 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1326                                           int evpd, int pc, void **inq, Error **errp)
1327 {
1328     int full_size;
1329     struct scsi_task *task = NULL;
1330     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1331     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1332         goto fail;
1333     }
1334     full_size = scsi_datain_getfullsize(task);
1335     if (full_size > task->datain.size) {
1336         scsi_free_scsi_task(task);
1337 
1338         /* we need more data for the full list */
1339         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1340         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1341             goto fail;
1342         }
1343     }
1344 
1345     *inq = scsi_datain_unmarshall(task);
1346     if (*inq == NULL) {
1347         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1348         goto fail_with_err;
1349     }
1350 
1351     return task;
1352 
1353 fail:
1354     error_setg(errp, "iSCSI: Inquiry command failed : %s",
1355                iscsi_get_error(iscsi));
1356 fail_with_err:
1357     if (task != NULL) {
1358         scsi_free_scsi_task(task);
1359     }
1360     return NULL;
1361 }
1362 
1363 static void iscsi_detach_aio_context(BlockDriverState *bs)
1364 {
1365     IscsiLun *iscsilun = bs->opaque;
1366 
1367     aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1368                        false, NULL, NULL, NULL);
1369     iscsilun->events = 0;
1370 
1371     if (iscsilun->nop_timer) {
1372         timer_del(iscsilun->nop_timer);
1373         timer_free(iscsilun->nop_timer);
1374         iscsilun->nop_timer = NULL;
1375     }
1376     if (iscsilun->event_timer) {
1377         timer_del(iscsilun->event_timer);
1378         timer_free(iscsilun->event_timer);
1379         iscsilun->event_timer = NULL;
1380     }
1381 }
1382 
1383 static void iscsi_attach_aio_context(BlockDriverState *bs,
1384                                      AioContext *new_context)
1385 {
1386     IscsiLun *iscsilun = bs->opaque;
1387 
1388     iscsilun->aio_context = new_context;
1389     iscsi_set_events(iscsilun);
1390 
1391     /* Set up a timer for sending out iSCSI NOPs */
1392     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1393                                         QEMU_CLOCK_REALTIME, SCALE_MS,
1394                                         iscsi_nop_timed_event, iscsilun);
1395     timer_mod(iscsilun->nop_timer,
1396               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1397 
1398     /* Set up a timer for periodic calls to iscsi_set_events and to
1399      * scan for command timeout */
1400     iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1401                                           QEMU_CLOCK_REALTIME, SCALE_MS,
1402                                           iscsi_timed_check_events, iscsilun);
1403     timer_mod(iscsilun->event_timer,
1404               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1405 }
1406 
1407 static void iscsi_modesense_sync(IscsiLun *iscsilun)
1408 {
1409     struct scsi_task *task;
1410     struct scsi_mode_sense *ms = NULL;
1411     iscsilun->write_protected = false;
1412     iscsilun->dpofua = false;
1413 
1414     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1415                                  1, SCSI_MODESENSE_PC_CURRENT,
1416                                  0x3F, 0, 255);
1417     if (task == NULL) {
1418         error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1419                      iscsi_get_error(iscsilun->iscsi));
1420         goto out;
1421     }
1422 
1423     if (task->status != SCSI_STATUS_GOOD) {
1424         error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1425         goto out;
1426     }
1427     ms = scsi_datain_unmarshall(task);
1428     if (!ms) {
1429         error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1430                      iscsi_get_error(iscsilun->iscsi));
1431         goto out;
1432     }
1433     iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1434     iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1435 
1436 out:
1437     if (task) {
1438         scsi_free_scsi_task(task);
1439     }
1440 }
1441 
1442 /*
1443  * We support iscsi url's on the form
1444  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1445  */
1446 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1447                       Error **errp)
1448 {
1449     IscsiLun *iscsilun = bs->opaque;
1450     struct iscsi_context *iscsi = NULL;
1451     struct iscsi_url *iscsi_url = NULL;
1452     struct scsi_task *task = NULL;
1453     struct scsi_inquiry_standard *inq = NULL;
1454     struct scsi_inquiry_supported_pages *inq_vpd;
1455     char *initiator_name = NULL;
1456     QemuOpts *opts;
1457     Error *local_err = NULL;
1458     const char *filename;
1459     int i, ret = 0, timeout = 0;
1460 
1461     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1462     qemu_opts_absorb_qdict(opts, options, &local_err);
1463     if (local_err) {
1464         error_propagate(errp, local_err);
1465         ret = -EINVAL;
1466         goto out;
1467     }
1468 
1469     filename = qemu_opt_get(opts, "filename");
1470 
1471     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1472     if (iscsi_url == NULL) {
1473         error_setg(errp, "Failed to parse URL : %s", filename);
1474         ret = -EINVAL;
1475         goto out;
1476     }
1477 
1478     memset(iscsilun, 0, sizeof(IscsiLun));
1479 
1480     initiator_name = parse_initiator_name(iscsi_url->target);
1481 
1482     iscsi = iscsi_create_context(initiator_name);
1483     if (iscsi == NULL) {
1484         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1485         ret = -ENOMEM;
1486         goto out;
1487     }
1488 
1489     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1490         error_setg(errp, "iSCSI: Failed to set target name.");
1491         ret = -EINVAL;
1492         goto out;
1493     }
1494 
1495     if (iscsi_url->user[0] != '\0') {
1496         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1497                                               iscsi_url->passwd);
1498         if (ret != 0) {
1499             error_setg(errp, "Failed to set initiator username and password");
1500             ret = -EINVAL;
1501             goto out;
1502         }
1503     }
1504 
1505     /* check if we got CHAP username/password via the options */
1506     parse_chap(iscsi, iscsi_url->target, &local_err);
1507     if (local_err != NULL) {
1508         error_propagate(errp, local_err);
1509         ret = -EINVAL;
1510         goto out;
1511     }
1512 
1513     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1514         error_setg(errp, "iSCSI: Failed to set session type to normal.");
1515         ret = -EINVAL;
1516         goto out;
1517     }
1518 
1519     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1520 
1521     /* check if we got HEADER_DIGEST via the options */
1522     parse_header_digest(iscsi, iscsi_url->target, &local_err);
1523     if (local_err != NULL) {
1524         error_propagate(errp, local_err);
1525         ret = -EINVAL;
1526         goto out;
1527     }
1528 
1529     /* timeout handling is broken in libiscsi before 1.15.0 */
1530     timeout = parse_timeout(iscsi_url->target);
1531 #if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
1532     iscsi_set_timeout(iscsi, timeout);
1533 #else
1534     if (timeout) {
1535         error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1536     }
1537 #endif
1538 
1539     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1540         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1541             iscsi_get_error(iscsi));
1542         ret = -EINVAL;
1543         goto out;
1544     }
1545 
1546     iscsilun->iscsi = iscsi;
1547     iscsilun->aio_context = bdrv_get_aio_context(bs);
1548     iscsilun->lun   = iscsi_url->lun;
1549     iscsilun->has_write_same = true;
1550 
1551     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1552                             (void **) &inq, errp);
1553     if (task == NULL) {
1554         ret = -EINVAL;
1555         goto out;
1556     }
1557     iscsilun->type = inq->periperal_device_type;
1558     scsi_free_scsi_task(task);
1559     task = NULL;
1560 
1561     iscsi_modesense_sync(iscsilun);
1562 
1563     /* Check the write protect flag of the LUN if we want to write */
1564     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1565         iscsilun->write_protected) {
1566         error_setg(errp, "Cannot open a write protected LUN as read-write");
1567         ret = -EACCES;
1568         goto out;
1569     }
1570 
1571     iscsi_readcapacity_sync(iscsilun, &local_err);
1572     if (local_err != NULL) {
1573         error_propagate(errp, local_err);
1574         ret = -EINVAL;
1575         goto out;
1576     }
1577     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1578     bs->request_alignment = iscsilun->block_size;
1579 
1580     /* We don't have any emulation for devices other than disks and CD-ROMs, so
1581      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1582      * will try to read from the device to guess the image format.
1583      */
1584     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1585         bs->sg = 1;
1586     }
1587 
1588     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1589                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1590                             (void **) &inq_vpd, errp);
1591     if (task == NULL) {
1592         ret = -EINVAL;
1593         goto out;
1594     }
1595     for (i = 0; i < inq_vpd->num_pages; i++) {
1596         struct scsi_task *inq_task;
1597         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1598         struct scsi_inquiry_block_limits *inq_bl;
1599         switch (inq_vpd->pages[i]) {
1600         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1601             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1602                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1603                                         (void **) &inq_lbp, errp);
1604             if (inq_task == NULL) {
1605                 ret = -EINVAL;
1606                 goto out;
1607             }
1608             memcpy(&iscsilun->lbp, inq_lbp,
1609                    sizeof(struct scsi_inquiry_logical_block_provisioning));
1610             scsi_free_scsi_task(inq_task);
1611             break;
1612         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1613             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1614                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1615                                     (void **) &inq_bl, errp);
1616             if (inq_task == NULL) {
1617                 ret = -EINVAL;
1618                 goto out;
1619             }
1620             memcpy(&iscsilun->bl, inq_bl,
1621                    sizeof(struct scsi_inquiry_block_limits));
1622             scsi_free_scsi_task(inq_task);
1623             break;
1624         default:
1625             break;
1626         }
1627     }
1628     scsi_free_scsi_task(task);
1629     task = NULL;
1630 
1631     iscsi_attach_aio_context(bs, iscsilun->aio_context);
1632 
1633     /* Guess the internal cluster (page) size of the iscsi target by the means
1634      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1635      * reasonable size */
1636     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1637         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1638         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1639                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
1640         if (iscsilun->lbprz) {
1641             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1642             if (iscsilun->allocationmap == NULL) {
1643                 ret = -ENOMEM;
1644             }
1645         }
1646     }
1647 
1648 out:
1649     qemu_opts_del(opts);
1650     g_free(initiator_name);
1651     if (iscsi_url != NULL) {
1652         iscsi_destroy_url(iscsi_url);
1653     }
1654     if (task != NULL) {
1655         scsi_free_scsi_task(task);
1656     }
1657 
1658     if (ret) {
1659         if (iscsi != NULL) {
1660             if (iscsi_is_logged_in(iscsi)) {
1661                 iscsi_logout_sync(iscsi);
1662             }
1663             iscsi_destroy_context(iscsi);
1664         }
1665         memset(iscsilun, 0, sizeof(IscsiLun));
1666     }
1667     return ret;
1668 }
1669 
1670 static void iscsi_close(BlockDriverState *bs)
1671 {
1672     IscsiLun *iscsilun = bs->opaque;
1673     struct iscsi_context *iscsi = iscsilun->iscsi;
1674 
1675     iscsi_detach_aio_context(bs);
1676     if (iscsi_is_logged_in(iscsi)) {
1677         iscsi_logout_sync(iscsi);
1678     }
1679     iscsi_destroy_context(iscsi);
1680     g_free(iscsilun->zeroblock);
1681     g_free(iscsilun->allocationmap);
1682     memset(iscsilun, 0, sizeof(IscsiLun));
1683 }
1684 
1685 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
1686 {
1687     return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
1688 }
1689 
1690 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1691 {
1692     /* We don't actually refresh here, but just return data queried in
1693      * iscsi_open(): iscsi targets don't change their limits. */
1694 
1695     IscsiLun *iscsilun = bs->opaque;
1696     uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1697 
1698     if (iscsilun->bl.max_xfer_len) {
1699         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1700     }
1701 
1702     bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
1703 
1704     if (iscsilun->lbp.lbpu) {
1705         if (iscsilun->bl.max_unmap < 0xffffffff) {
1706             bs->bl.max_discard =
1707                 sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
1708         }
1709         bs->bl.discard_alignment =
1710             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1711     }
1712 
1713     if (iscsilun->bl.max_ws_len < 0xffffffff) {
1714         bs->bl.max_write_zeroes =
1715             sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
1716     }
1717     if (iscsilun->lbp.lbpws) {
1718         bs->bl.write_zeroes_alignment =
1719             sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1720     }
1721     bs->bl.opt_transfer_length =
1722         sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
1723 }
1724 
1725 /* Note that this will not re-establish a connection with an iSCSI target - it
1726  * is effectively a NOP.  */
1727 static int iscsi_reopen_prepare(BDRVReopenState *state,
1728                                 BlockReopenQueue *queue, Error **errp)
1729 {
1730     IscsiLun *iscsilun = state->bs->opaque;
1731 
1732     if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1733         error_setg(errp, "Cannot open a write protected LUN as read-write");
1734         return -EACCES;
1735     }
1736     return 0;
1737 }
1738 
1739 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1740 {
1741     IscsiLun *iscsilun = bs->opaque;
1742     Error *local_err = NULL;
1743 
1744     if (iscsilun->type != TYPE_DISK) {
1745         return -ENOTSUP;
1746     }
1747 
1748     iscsi_readcapacity_sync(iscsilun, &local_err);
1749     if (local_err != NULL) {
1750         error_free(local_err);
1751         return -EIO;
1752     }
1753 
1754     if (offset > iscsi_getlength(bs)) {
1755         return -EINVAL;
1756     }
1757 
1758     if (iscsilun->allocationmap != NULL) {
1759         g_free(iscsilun->allocationmap);
1760         iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1761     }
1762 
1763     return 0;
1764 }
1765 
1766 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1767 {
1768     int ret = 0;
1769     int64_t total_size = 0;
1770     BlockDriverState *bs;
1771     IscsiLun *iscsilun = NULL;
1772     QDict *bs_options;
1773 
1774     bs = bdrv_new();
1775 
1776     /* Read out options */
1777     total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1778                               BDRV_SECTOR_SIZE);
1779     bs->opaque = g_new0(struct IscsiLun, 1);
1780     iscsilun = bs->opaque;
1781 
1782     bs_options = qdict_new();
1783     qdict_put(bs_options, "filename", qstring_from_str(filename));
1784     ret = iscsi_open(bs, bs_options, 0, NULL);
1785     QDECREF(bs_options);
1786 
1787     if (ret != 0) {
1788         goto out;
1789     }
1790     iscsi_detach_aio_context(bs);
1791     if (iscsilun->type != TYPE_DISK) {
1792         ret = -ENODEV;
1793         goto out;
1794     }
1795     if (bs->total_sectors < total_size) {
1796         ret = -ENOSPC;
1797         goto out;
1798     }
1799 
1800     ret = 0;
1801 out:
1802     if (iscsilun->iscsi != NULL) {
1803         iscsi_destroy_context(iscsilun->iscsi);
1804     }
1805     g_free(bs->opaque);
1806     bs->opaque = NULL;
1807     bdrv_unref(bs);
1808     return ret;
1809 }
1810 
1811 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1812 {
1813     IscsiLun *iscsilun = bs->opaque;
1814     bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
1815     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1816     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1817     return 0;
1818 }
1819 
1820 static QemuOptsList iscsi_create_opts = {
1821     .name = "iscsi-create-opts",
1822     .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1823     .desc = {
1824         {
1825             .name = BLOCK_OPT_SIZE,
1826             .type = QEMU_OPT_SIZE,
1827             .help = "Virtual disk size"
1828         },
1829         { /* end of list */ }
1830     }
1831 };
1832 
1833 static BlockDriver bdrv_iscsi = {
1834     .format_name     = "iscsi",
1835     .protocol_name   = "iscsi",
1836 
1837     .instance_size   = sizeof(IscsiLun),
1838     .bdrv_needs_filename = true,
1839     .bdrv_file_open  = iscsi_open,
1840     .bdrv_close      = iscsi_close,
1841     .bdrv_create     = iscsi_create,
1842     .create_opts     = &iscsi_create_opts,
1843     .bdrv_reopen_prepare  = iscsi_reopen_prepare,
1844 
1845     .bdrv_getlength  = iscsi_getlength,
1846     .bdrv_get_info   = iscsi_get_info,
1847     .bdrv_truncate   = iscsi_truncate,
1848     .bdrv_refresh_limits = iscsi_refresh_limits,
1849 
1850     .bdrv_co_get_block_status = iscsi_co_get_block_status,
1851     .bdrv_co_discard      = iscsi_co_discard,
1852     .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1853     .bdrv_co_readv         = iscsi_co_readv,
1854     .bdrv_co_writev        = iscsi_co_writev,
1855     .bdrv_co_flush_to_disk = iscsi_co_flush,
1856 
1857 #ifdef __linux__
1858     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1859 #endif
1860 
1861     .bdrv_detach_aio_context = iscsi_detach_aio_context,
1862     .bdrv_attach_aio_context = iscsi_attach_aio_context,
1863 };
1864 
1865 static QemuOptsList qemu_iscsi_opts = {
1866     .name = "iscsi",
1867     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1868     .desc = {
1869         {
1870             .name = "user",
1871             .type = QEMU_OPT_STRING,
1872             .help = "username for CHAP authentication to target",
1873         },{
1874             .name = "password",
1875             .type = QEMU_OPT_STRING,
1876             .help = "password for CHAP authentication to target",
1877         },{
1878             .name = "password-secret",
1879             .type = QEMU_OPT_STRING,
1880             .help = "ID of the secret providing password for CHAP "
1881                     "authentication to target",
1882         },{
1883             .name = "header-digest",
1884             .type = QEMU_OPT_STRING,
1885             .help = "HeaderDigest setting. "
1886                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1887         },{
1888             .name = "initiator-name",
1889             .type = QEMU_OPT_STRING,
1890             .help = "Initiator iqn name to use when connecting",
1891         },{
1892             .name = "timeout",
1893             .type = QEMU_OPT_NUMBER,
1894             .help = "Request timeout in seconds (default 0 = no timeout)",
1895         },
1896         { /* end of list */ }
1897     },
1898 };
1899 
1900 static void iscsi_block_init(void)
1901 {
1902     bdrv_register(&bdrv_iscsi);
1903     qemu_add_opts(&qemu_iscsi_opts);
1904 }
1905 
1906 block_init(iscsi_block_init);
1907