xref: /openbmc/qemu/block/iscsi.c (revision 56c4bfb3)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "config-host.h"
26 
27 #include <poll.h>
28 #include <arpa/inet.h>
29 #include "qemu-common.h"
30 #include "qemu/config-file.h"
31 #include "qemu/error-report.h"
32 #include "block/block_int.h"
33 #include "trace.h"
34 #include "block/scsi.h"
35 #include "qemu/iov.h"
36 
37 #include <iscsi/iscsi.h>
38 #include <iscsi/scsi-lowlevel.h>
39 
40 #ifdef __linux__
41 #include <scsi/sg.h>
42 #include <block/scsi.h>
43 #endif
44 
45 typedef struct IscsiLun {
46     struct iscsi_context *iscsi;
47     int lun;
48     enum scsi_inquiry_peripheral_device_type type;
49     int block_size;
50     uint64_t num_blocks;
51     int events;
52     QEMUTimer *nop_timer;
53 } IscsiLun;
54 
55 typedef struct IscsiAIOCB {
56     BlockDriverAIOCB common;
57     QEMUIOVector *qiov;
58     QEMUBH *bh;
59     IscsiLun *iscsilun;
60     struct scsi_task *task;
61     uint8_t *buf;
62     int status;
63     int canceled;
64     int retries;
65     int64_t sector_num;
66     int nb_sectors;
67 #ifdef __linux__
68     sg_io_hdr_t *ioh;
69 #endif
70 } IscsiAIOCB;
71 
72 #define NOP_INTERVAL 5000
73 #define MAX_NOP_FAILURES 3
74 #define ISCSI_CMD_RETRIES 5
75 
76 static void
77 iscsi_bh_cb(void *p)
78 {
79     IscsiAIOCB *acb = p;
80 
81     qemu_bh_delete(acb->bh);
82 
83     g_free(acb->buf);
84     acb->buf = NULL;
85 
86     if (acb->canceled == 0) {
87         acb->common.cb(acb->common.opaque, acb->status);
88     }
89 
90     if (acb->task != NULL) {
91         scsi_free_scsi_task(acb->task);
92         acb->task = NULL;
93     }
94 
95     qemu_aio_release(acb);
96 }
97 
98 static void
99 iscsi_schedule_bh(IscsiAIOCB *acb)
100 {
101     if (acb->bh) {
102         return;
103     }
104     acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
105     qemu_bh_schedule(acb->bh);
106 }
107 
108 
109 static void
110 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
111                     void *private_data)
112 {
113     IscsiAIOCB *acb = private_data;
114 
115     acb->status = -ECANCELED;
116     iscsi_schedule_bh(acb);
117 }
118 
119 static void
120 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
121 {
122     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
123     IscsiLun *iscsilun = acb->iscsilun;
124 
125     if (acb->status != -EINPROGRESS) {
126         return;
127     }
128 
129     acb->canceled = 1;
130 
131     /* send a task mgmt call to the target to cancel the task on the target */
132     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
133                                      iscsi_abort_task_cb, acb);
134 
135     while (acb->status == -EINPROGRESS) {
136         qemu_aio_wait();
137     }
138 }
139 
140 static const AIOCBInfo iscsi_aiocb_info = {
141     .aiocb_size         = sizeof(IscsiAIOCB),
142     .cancel             = iscsi_aio_cancel,
143 };
144 
145 
146 static void iscsi_process_read(void *arg);
147 static void iscsi_process_write(void *arg);
148 
149 static int iscsi_process_flush(void *arg)
150 {
151     IscsiLun *iscsilun = arg;
152 
153     return iscsi_queue_length(iscsilun->iscsi) > 0;
154 }
155 
156 static void
157 iscsi_set_events(IscsiLun *iscsilun)
158 {
159     struct iscsi_context *iscsi = iscsilun->iscsi;
160     int ev;
161 
162     /* We always register a read handler.  */
163     ev = POLLIN;
164     ev |= iscsi_which_events(iscsi);
165     if (ev != iscsilun->events) {
166         qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
167                       iscsi_process_read,
168                       (ev & POLLOUT) ? iscsi_process_write : NULL,
169                       iscsi_process_flush,
170                       iscsilun);
171 
172     }
173 
174     iscsilun->events = ev;
175 }
176 
177 static void
178 iscsi_process_read(void *arg)
179 {
180     IscsiLun *iscsilun = arg;
181     struct iscsi_context *iscsi = iscsilun->iscsi;
182 
183     iscsi_service(iscsi, POLLIN);
184     iscsi_set_events(iscsilun);
185 }
186 
187 static void
188 iscsi_process_write(void *arg)
189 {
190     IscsiLun *iscsilun = arg;
191     struct iscsi_context *iscsi = iscsilun->iscsi;
192 
193     iscsi_service(iscsi, POLLOUT);
194     iscsi_set_events(iscsilun);
195 }
196 
197 static int
198 iscsi_aio_writev_acb(IscsiAIOCB *acb);
199 
200 static void
201 iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
202                      void *command_data, void *opaque)
203 {
204     IscsiAIOCB *acb = opaque;
205 
206     trace_iscsi_aio_write16_cb(iscsi, status, acb, acb->canceled);
207 
208     g_free(acb->buf);
209     acb->buf = NULL;
210 
211     if (acb->canceled != 0) {
212         return;
213     }
214 
215     acb->status = 0;
216     if (status != 0) {
217         if (status == SCSI_STATUS_CHECK_CONDITION
218             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
219             && acb->retries-- > 0) {
220             scsi_free_scsi_task(acb->task);
221             acb->task = NULL;
222             if (iscsi_aio_writev_acb(acb) == 0) {
223                 iscsi_set_events(acb->iscsilun);
224                 return;
225             }
226         }
227         error_report("Failed to write16 data to iSCSI lun. %s",
228                      iscsi_get_error(iscsi));
229         acb->status = -EIO;
230     }
231 
232     iscsi_schedule_bh(acb);
233 }
234 
235 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
236 {
237     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
238 }
239 
240 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
241 {
242     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
243 }
244 
245 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
246                                       IscsiLun *iscsilun)
247 {
248     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
249         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
250             error_report("iSCSI misaligned request: "
251                          "iscsilun->block_size %u, sector_num %" PRIi64
252                          ", nb_sectors %d",
253                          iscsilun->block_size, sector_num, nb_sectors);
254             return 0;
255     }
256     return 1;
257 }
258 
259 static int
260 iscsi_aio_writev_acb(IscsiAIOCB *acb)
261 {
262     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
263     size_t size;
264     uint32_t num_sectors;
265     uint64_t lba;
266 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
267     struct iscsi_data data;
268 #endif
269     int ret;
270 
271     acb->canceled   = 0;
272     acb->bh         = NULL;
273     acb->status     = -EINPROGRESS;
274     acb->buf        = NULL;
275 
276     /* this will allow us to get rid of 'buf' completely */
277     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
278 
279 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
280     data.size = MIN(size, acb->qiov->size);
281 
282     /* if the iovec only contains one buffer we can pass it directly */
283     if (acb->qiov->niov == 1) {
284         data.data = acb->qiov->iov[0].iov_base;
285     } else {
286         acb->buf = g_malloc(data.size);
287         qemu_iovec_to_buf(acb->qiov, 0, acb->buf, data.size);
288         data.data = acb->buf;
289     }
290 #endif
291 
292     acb->task = malloc(sizeof(struct scsi_task));
293     if (acb->task == NULL) {
294         error_report("iSCSI: Failed to allocate task for scsi WRITE16 "
295                      "command. %s", iscsi_get_error(iscsi));
296         return -1;
297     }
298     memset(acb->task, 0, sizeof(struct scsi_task));
299 
300     acb->task->xfer_dir = SCSI_XFER_WRITE;
301     acb->task->cdb_size = 16;
302     acb->task->cdb[0] = 0x8a;
303     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
304     *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
305     *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
306     num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
307     *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
308     acb->task->expxferlen = size;
309 
310 #if defined(LIBISCSI_FEATURE_IOVECTOR)
311     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
312                                    iscsi_aio_write16_cb,
313                                    NULL,
314                                    acb);
315 #else
316     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
317                                    iscsi_aio_write16_cb,
318                                    &data,
319                                    acb);
320 #endif
321     if (ret != 0) {
322         scsi_free_scsi_task(acb->task);
323         g_free(acb->buf);
324         return -1;
325     }
326 
327 #if defined(LIBISCSI_FEATURE_IOVECTOR)
328     scsi_task_set_iov_out(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
329 #endif
330 
331     return 0;
332 }
333 
334 static BlockDriverAIOCB *
335 iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
336                  QEMUIOVector *qiov, int nb_sectors,
337                  BlockDriverCompletionFunc *cb,
338                  void *opaque)
339 {
340     IscsiLun *iscsilun = bs->opaque;
341     IscsiAIOCB *acb;
342 
343     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
344         return NULL;
345     }
346 
347     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
348     trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
349 
350     acb->iscsilun    = iscsilun;
351     acb->qiov        = qiov;
352     acb->nb_sectors  = nb_sectors;
353     acb->sector_num  = sector_num;
354     acb->retries     = ISCSI_CMD_RETRIES;
355 
356     if (iscsi_aio_writev_acb(acb) != 0) {
357         qemu_aio_release(acb);
358         return NULL;
359     }
360 
361     iscsi_set_events(iscsilun);
362     return &acb->common;
363 }
364 
365 static int
366 iscsi_aio_readv_acb(IscsiAIOCB *acb);
367 
368 static void
369 iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
370                     void *command_data, void *opaque)
371 {
372     IscsiAIOCB *acb = opaque;
373 
374     trace_iscsi_aio_read16_cb(iscsi, status, acb, acb->canceled);
375 
376     if (acb->canceled != 0) {
377         return;
378     }
379 
380     acb->status = 0;
381     if (status != 0) {
382         if (status == SCSI_STATUS_CHECK_CONDITION
383             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
384             && acb->retries-- > 0) {
385             scsi_free_scsi_task(acb->task);
386             acb->task = NULL;
387             if (iscsi_aio_readv_acb(acb) == 0) {
388                 iscsi_set_events(acb->iscsilun);
389                 return;
390             }
391         }
392         error_report("Failed to read16 data from iSCSI lun. %s",
393                      iscsi_get_error(iscsi));
394         acb->status = -EIO;
395     }
396 
397     iscsi_schedule_bh(acb);
398 }
399 
400 static int
401 iscsi_aio_readv_acb(IscsiAIOCB *acb)
402 {
403     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
404     size_t size;
405     uint64_t lba;
406     uint32_t num_sectors;
407     int ret;
408 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
409     int i;
410 #endif
411 
412     acb->canceled    = 0;
413     acb->bh          = NULL;
414     acb->status      = -EINPROGRESS;
415     acb->buf         = NULL;
416 
417     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
418 
419     acb->task = malloc(sizeof(struct scsi_task));
420     if (acb->task == NULL) {
421         error_report("iSCSI: Failed to allocate task for scsi READ16 "
422                      "command. %s", iscsi_get_error(iscsi));
423         return -1;
424     }
425     memset(acb->task, 0, sizeof(struct scsi_task));
426 
427     acb->task->xfer_dir = SCSI_XFER_READ;
428     acb->task->expxferlen = size;
429     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
430     num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
431 
432     switch (acb->iscsilun->type) {
433     case TYPE_DISK:
434         acb->task->cdb_size = 16;
435         acb->task->cdb[0]  = 0x88;
436         *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
437         *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
438         *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
439         break;
440     default:
441         acb->task->cdb_size = 10;
442         acb->task->cdb[0]  = 0x28;
443         *(uint32_t *)&acb->task->cdb[2] = htonl(lba);
444         *(uint16_t *)&acb->task->cdb[7] = htons(num_sectors);
445         break;
446     }
447 
448     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
449                                    iscsi_aio_read16_cb,
450                                    NULL,
451                                    acb);
452     if (ret != 0) {
453         scsi_free_scsi_task(acb->task);
454         return -1;
455     }
456 
457 #if defined(LIBISCSI_FEATURE_IOVECTOR)
458     scsi_task_set_iov_in(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
459 #else
460     for (i = 0; i < acb->qiov->niov; i++) {
461         scsi_task_add_data_in_buffer(acb->task,
462                 acb->qiov->iov[i].iov_len,
463                 acb->qiov->iov[i].iov_base);
464     }
465 #endif
466     return 0;
467 }
468 
469 static BlockDriverAIOCB *
470 iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
471                 QEMUIOVector *qiov, int nb_sectors,
472                 BlockDriverCompletionFunc *cb,
473                 void *opaque)
474 {
475     IscsiLun *iscsilun = bs->opaque;
476     IscsiAIOCB *acb;
477 
478     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
479         return NULL;
480     }
481 
482     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
483     trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
484 
485     acb->nb_sectors  = nb_sectors;
486     acb->sector_num  = sector_num;
487     acb->iscsilun    = iscsilun;
488     acb->qiov        = qiov;
489     acb->retries     = ISCSI_CMD_RETRIES;
490 
491     if (iscsi_aio_readv_acb(acb) != 0) {
492         qemu_aio_release(acb);
493         return NULL;
494     }
495 
496     iscsi_set_events(iscsilun);
497     return &acb->common;
498 }
499 
500 static int
501 iscsi_aio_flush_acb(IscsiAIOCB *acb);
502 
503 static void
504 iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
505                      void *command_data, void *opaque)
506 {
507     IscsiAIOCB *acb = opaque;
508 
509     if (acb->canceled != 0) {
510         return;
511     }
512 
513     acb->status = 0;
514     if (status != 0) {
515         if (status == SCSI_STATUS_CHECK_CONDITION
516             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
517             && acb->retries-- > 0) {
518             scsi_free_scsi_task(acb->task);
519             acb->task = NULL;
520             if (iscsi_aio_flush_acb(acb) == 0) {
521                 iscsi_set_events(acb->iscsilun);
522                 return;
523             }
524         }
525         error_report("Failed to sync10 data on iSCSI lun. %s",
526                      iscsi_get_error(iscsi));
527         acb->status = -EIO;
528     }
529 
530     iscsi_schedule_bh(acb);
531 }
532 
533 static int
534 iscsi_aio_flush_acb(IscsiAIOCB *acb)
535 {
536     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
537 
538     acb->canceled   = 0;
539     acb->bh         = NULL;
540     acb->status     = -EINPROGRESS;
541     acb->buf        = NULL;
542 
543     acb->task = iscsi_synchronizecache10_task(iscsi, acb->iscsilun->lun,
544                                          0, 0, 0, 0,
545                                          iscsi_synccache10_cb,
546                                          acb);
547     if (acb->task == NULL) {
548         error_report("iSCSI: Failed to send synchronizecache10 command. %s",
549                      iscsi_get_error(iscsi));
550         return -1;
551     }
552 
553     return 0;
554 }
555 
556 static BlockDriverAIOCB *
557 iscsi_aio_flush(BlockDriverState *bs,
558                 BlockDriverCompletionFunc *cb, void *opaque)
559 {
560     IscsiLun *iscsilun = bs->opaque;
561 
562     IscsiAIOCB *acb;
563 
564     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
565 
566     acb->iscsilun    = iscsilun;
567     acb->retries     = ISCSI_CMD_RETRIES;
568 
569     if (iscsi_aio_flush_acb(acb) != 0) {
570         qemu_aio_release(acb);
571         return NULL;
572     }
573 
574     iscsi_set_events(iscsilun);
575 
576     return &acb->common;
577 }
578 
579 static int iscsi_aio_discard_acb(IscsiAIOCB *acb);
580 
581 static void
582 iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
583                      void *command_data, void *opaque)
584 {
585     IscsiAIOCB *acb = opaque;
586 
587     if (acb->canceled != 0) {
588         return;
589     }
590 
591     acb->status = 0;
592     if (status != 0) {
593         if (status == SCSI_STATUS_CHECK_CONDITION
594             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
595             && acb->retries-- > 0) {
596             scsi_free_scsi_task(acb->task);
597             acb->task = NULL;
598             if (iscsi_aio_discard_acb(acb) == 0) {
599                 iscsi_set_events(acb->iscsilun);
600                 return;
601             }
602         }
603         error_report("Failed to unmap data on iSCSI lun. %s",
604                      iscsi_get_error(iscsi));
605         acb->status = -EIO;
606     }
607 
608     iscsi_schedule_bh(acb);
609 }
610 
611 static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
612     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
613     struct unmap_list list[1];
614 
615     acb->canceled   = 0;
616     acb->bh         = NULL;
617     acb->status     = -EINPROGRESS;
618     acb->buf        = NULL;
619 
620     list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
621     list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
622 
623     acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
624                                  0, 0, &list[0], 1,
625                                  iscsi_unmap_cb,
626                                  acb);
627     if (acb->task == NULL) {
628         error_report("iSCSI: Failed to send unmap command. %s",
629                      iscsi_get_error(iscsi));
630         return -1;
631     }
632 
633     return 0;
634 }
635 
636 static BlockDriverAIOCB *
637 iscsi_aio_discard(BlockDriverState *bs,
638                   int64_t sector_num, int nb_sectors,
639                   BlockDriverCompletionFunc *cb, void *opaque)
640 {
641     IscsiLun *iscsilun = bs->opaque;
642     IscsiAIOCB *acb;
643 
644     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
645 
646     acb->iscsilun    = iscsilun;
647     acb->nb_sectors  = nb_sectors;
648     acb->sector_num  = sector_num;
649     acb->retries     = ISCSI_CMD_RETRIES;
650 
651     if (iscsi_aio_discard_acb(acb) != 0) {
652         qemu_aio_release(acb);
653         return NULL;
654     }
655 
656     iscsi_set_events(iscsilun);
657 
658     return &acb->common;
659 }
660 
661 #ifdef __linux__
662 static void
663 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
664                      void *command_data, void *opaque)
665 {
666     IscsiAIOCB *acb = opaque;
667 
668     g_free(acb->buf);
669     acb->buf = NULL;
670 
671     if (acb->canceled != 0) {
672         return;
673     }
674 
675     acb->status = 0;
676     if (status < 0) {
677         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
678                      iscsi_get_error(iscsi));
679         acb->status = -EIO;
680     }
681 
682     acb->ioh->driver_status = 0;
683     acb->ioh->host_status   = 0;
684     acb->ioh->resid         = 0;
685 
686 #define SG_ERR_DRIVER_SENSE    0x08
687 
688     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
689         int ss;
690 
691         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
692 
693         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
694         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
695              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
696         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
697     }
698 
699     iscsi_schedule_bh(acb);
700 }
701 
702 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
703         unsigned long int req, void *buf,
704         BlockDriverCompletionFunc *cb, void *opaque)
705 {
706     IscsiLun *iscsilun = bs->opaque;
707     struct iscsi_context *iscsi = iscsilun->iscsi;
708     struct iscsi_data data;
709     IscsiAIOCB *acb;
710 
711     assert(req == SG_IO);
712 
713     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
714 
715     acb->iscsilun = iscsilun;
716     acb->canceled    = 0;
717     acb->bh          = NULL;
718     acb->status      = -EINPROGRESS;
719     acb->buf         = NULL;
720     acb->ioh         = buf;
721 
722     acb->task = malloc(sizeof(struct scsi_task));
723     if (acb->task == NULL) {
724         error_report("iSCSI: Failed to allocate task for scsi command. %s",
725                      iscsi_get_error(iscsi));
726         qemu_aio_release(acb);
727         return NULL;
728     }
729     memset(acb->task, 0, sizeof(struct scsi_task));
730 
731     switch (acb->ioh->dxfer_direction) {
732     case SG_DXFER_TO_DEV:
733         acb->task->xfer_dir = SCSI_XFER_WRITE;
734         break;
735     case SG_DXFER_FROM_DEV:
736         acb->task->xfer_dir = SCSI_XFER_READ;
737         break;
738     default:
739         acb->task->xfer_dir = SCSI_XFER_NONE;
740         break;
741     }
742 
743     acb->task->cdb_size = acb->ioh->cmd_len;
744     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
745     acb->task->expxferlen = acb->ioh->dxfer_len;
746 
747     data.size = 0;
748     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
749         if (acb->ioh->iovec_count == 0) {
750             data.data = acb->ioh->dxferp;
751             data.size = acb->ioh->dxfer_len;
752         } else {
753 #if defined(LIBISCSI_FEATURE_IOVECTOR)
754             scsi_task_set_iov_out(acb->task,
755                                  (struct scsi_iovec *) acb->ioh->dxferp,
756                                  acb->ioh->iovec_count);
757 #else
758             struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
759 
760             acb->buf = g_malloc(acb->ioh->dxfer_len);
761             data.data = acb->buf;
762             data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
763                                    acb->buf, acb->ioh->dxfer_len);
764 #endif
765         }
766     }
767 
768     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
769                                  iscsi_aio_ioctl_cb,
770                                  (data.size > 0) ? &data : NULL,
771                                  acb) != 0) {
772         scsi_free_scsi_task(acb->task);
773         qemu_aio_release(acb);
774         return NULL;
775     }
776 
777     /* tell libiscsi to read straight into the buffer we got from ioctl */
778     if (acb->task->xfer_dir == SCSI_XFER_READ) {
779         if (acb->ioh->iovec_count == 0) {
780             scsi_task_add_data_in_buffer(acb->task,
781                                          acb->ioh->dxfer_len,
782                                          acb->ioh->dxferp);
783         } else {
784 #if defined(LIBISCSI_FEATURE_IOVECTOR)
785             scsi_task_set_iov_in(acb->task,
786                                  (struct scsi_iovec *) acb->ioh->dxferp,
787                                  acb->ioh->iovec_count);
788 #else
789             int i;
790             for (i = 0; i < acb->ioh->iovec_count; i++) {
791                 struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
792 
793                 scsi_task_add_data_in_buffer(acb->task,
794                     iov[i].iov_len,
795                     iov[i].iov_base);
796             }
797 #endif
798         }
799     }
800 
801     iscsi_set_events(iscsilun);
802 
803     return &acb->common;
804 }
805 
806 
807 static void ioctl_cb(void *opaque, int status)
808 {
809     int *p_status = opaque;
810     *p_status = status;
811 }
812 
813 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
814 {
815     IscsiLun *iscsilun = bs->opaque;
816     int status;
817 
818     switch (req) {
819     case SG_GET_VERSION_NUM:
820         *(int *)buf = 30000;
821         break;
822     case SG_GET_SCSI_ID:
823         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
824         break;
825     case SG_IO:
826         status = -EINPROGRESS;
827         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
828 
829         while (status == -EINPROGRESS) {
830             qemu_aio_wait();
831         }
832 
833         return 0;
834     default:
835         return -1;
836     }
837     return 0;
838 }
839 #endif
840 
841 static int64_t
842 iscsi_getlength(BlockDriverState *bs)
843 {
844     IscsiLun *iscsilun = bs->opaque;
845     int64_t len;
846 
847     len  = iscsilun->num_blocks;
848     len *= iscsilun->block_size;
849 
850     return len;
851 }
852 
853 static int parse_chap(struct iscsi_context *iscsi, const char *target)
854 {
855     QemuOptsList *list;
856     QemuOpts *opts;
857     const char *user = NULL;
858     const char *password = NULL;
859 
860     list = qemu_find_opts("iscsi");
861     if (!list) {
862         return 0;
863     }
864 
865     opts = qemu_opts_find(list, target);
866     if (opts == NULL) {
867         opts = QTAILQ_FIRST(&list->head);
868         if (!opts) {
869             return 0;
870         }
871     }
872 
873     user = qemu_opt_get(opts, "user");
874     if (!user) {
875         return 0;
876     }
877 
878     password = qemu_opt_get(opts, "password");
879     if (!password) {
880         error_report("CHAP username specified but no password was given");
881         return -1;
882     }
883 
884     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
885         error_report("Failed to set initiator username and password");
886         return -1;
887     }
888 
889     return 0;
890 }
891 
892 static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
893 {
894     QemuOptsList *list;
895     QemuOpts *opts;
896     const char *digest = NULL;
897 
898     list = qemu_find_opts("iscsi");
899     if (!list) {
900         return;
901     }
902 
903     opts = qemu_opts_find(list, target);
904     if (opts == NULL) {
905         opts = QTAILQ_FIRST(&list->head);
906         if (!opts) {
907             return;
908         }
909     }
910 
911     digest = qemu_opt_get(opts, "header-digest");
912     if (!digest) {
913         return;
914     }
915 
916     if (!strcmp(digest, "CRC32C")) {
917         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
918     } else if (!strcmp(digest, "NONE")) {
919         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
920     } else if (!strcmp(digest, "CRC32C-NONE")) {
921         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
922     } else if (!strcmp(digest, "NONE-CRC32C")) {
923         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
924     } else {
925         error_report("Invalid header-digest setting : %s", digest);
926     }
927 }
928 
929 static char *parse_initiator_name(const char *target)
930 {
931     QemuOptsList *list;
932     QemuOpts *opts;
933     const char *name = NULL;
934     const char *iscsi_name = qemu_get_vm_name();
935 
936     list = qemu_find_opts("iscsi");
937     if (list) {
938         opts = qemu_opts_find(list, target);
939         if (!opts) {
940             opts = QTAILQ_FIRST(&list->head);
941         }
942         if (opts) {
943             name = qemu_opt_get(opts, "initiator-name");
944         }
945     }
946 
947     if (name) {
948         return g_strdup(name);
949     } else {
950         return g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
951                                iscsi_name ? ":" : "",
952                                iscsi_name ? iscsi_name : "");
953     }
954 }
955 
956 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
957 static void iscsi_nop_timed_event(void *opaque)
958 {
959     IscsiLun *iscsilun = opaque;
960 
961     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
962         error_report("iSCSI: NOP timeout. Reconnecting...");
963         iscsi_reconnect(iscsilun->iscsi);
964     }
965 
966     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
967         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
968         return;
969     }
970 
971     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
972     iscsi_set_events(iscsilun);
973 }
974 #endif
975 
976 static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
977 {
978     struct scsi_task *task = NULL;
979     struct scsi_readcapacity10 *rc10 = NULL;
980     struct scsi_readcapacity16 *rc16 = NULL;
981     int ret = 0;
982     int retries = ISCSI_CMD_RETRIES;
983 
984     do {
985         if (task != NULL) {
986             scsi_free_scsi_task(task);
987             task = NULL;
988         }
989 
990         switch (iscsilun->type) {
991         case TYPE_DISK:
992             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
993             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
994                 rc16 = scsi_datain_unmarshall(task);
995                 if (rc16 == NULL) {
996                     error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
997                     ret = -EINVAL;
998                 } else {
999                     iscsilun->block_size = rc16->block_length;
1000                     iscsilun->num_blocks = rc16->returned_lba + 1;
1001                 }
1002             }
1003             break;
1004         case TYPE_ROM:
1005             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1006             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1007                 rc10 = scsi_datain_unmarshall(task);
1008                 if (rc10 == NULL) {
1009                     error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
1010                     ret = -EINVAL;
1011                 } else {
1012                     iscsilun->block_size = rc10->block_size;
1013                     if (rc10->lba == 0) {
1014                         /* blank disk loaded */
1015                         iscsilun->num_blocks = 0;
1016                     } else {
1017                         iscsilun->num_blocks = rc10->lba + 1;
1018                     }
1019                 }
1020             }
1021             break;
1022         default:
1023             return 0;
1024         }
1025     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1026              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1027              && retries-- > 0);
1028 
1029     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1030         error_report("iSCSI: failed to send readcapacity10 command.");
1031         ret = -EINVAL;
1032     }
1033     if (task) {
1034         scsi_free_scsi_task(task);
1035     }
1036     return ret;
1037 }
1038 
1039 /* TODO Convert to fine grained options */
1040 static QemuOptsList runtime_opts = {
1041     .name = "iscsi",
1042     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1043     .desc = {
1044         {
1045             .name = "filename",
1046             .type = QEMU_OPT_STRING,
1047             .help = "URL to the iscsi image",
1048         },
1049         { /* end of list */ }
1050     },
1051 };
1052 
1053 /*
1054  * We support iscsi url's on the form
1055  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1056  */
1057 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
1058 {
1059     IscsiLun *iscsilun = bs->opaque;
1060     struct iscsi_context *iscsi = NULL;
1061     struct iscsi_url *iscsi_url = NULL;
1062     struct scsi_task *task = NULL;
1063     struct scsi_inquiry_standard *inq = NULL;
1064     char *initiator_name = NULL;
1065     QemuOpts *opts;
1066     Error *local_err = NULL;
1067     const char *filename;
1068     int ret;
1069 
1070     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1071         error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
1072                      "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1073                      "of 512", BDRV_SECTOR_SIZE);
1074         return -EINVAL;
1075     }
1076 
1077     opts = qemu_opts_create_nofail(&runtime_opts);
1078     qemu_opts_absorb_qdict(opts, options, &local_err);
1079     if (error_is_set(&local_err)) {
1080         qerror_report_err(local_err);
1081         error_free(local_err);
1082         ret = -EINVAL;
1083         goto out;
1084     }
1085 
1086     filename = qemu_opt_get(opts, "filename");
1087 
1088 
1089     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1090     if (iscsi_url == NULL) {
1091         error_report("Failed to parse URL : %s", filename);
1092         ret = -EINVAL;
1093         goto out;
1094     }
1095 
1096     memset(iscsilun, 0, sizeof(IscsiLun));
1097 
1098     initiator_name = parse_initiator_name(iscsi_url->target);
1099 
1100     iscsi = iscsi_create_context(initiator_name);
1101     if (iscsi == NULL) {
1102         error_report("iSCSI: Failed to create iSCSI context.");
1103         ret = -ENOMEM;
1104         goto out;
1105     }
1106 
1107     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1108         error_report("iSCSI: Failed to set target name.");
1109         ret = -EINVAL;
1110         goto out;
1111     }
1112 
1113     if (iscsi_url->user != NULL) {
1114         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1115                                               iscsi_url->passwd);
1116         if (ret != 0) {
1117             error_report("Failed to set initiator username and password");
1118             ret = -EINVAL;
1119             goto out;
1120         }
1121     }
1122 
1123     /* check if we got CHAP username/password via the options */
1124     if (parse_chap(iscsi, iscsi_url->target) != 0) {
1125         error_report("iSCSI: Failed to set CHAP user/password");
1126         ret = -EINVAL;
1127         goto out;
1128     }
1129 
1130     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1131         error_report("iSCSI: Failed to set session type to normal.");
1132         ret = -EINVAL;
1133         goto out;
1134     }
1135 
1136     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1137 
1138     /* check if we got HEADER_DIGEST via the options */
1139     parse_header_digest(iscsi, iscsi_url->target);
1140 
1141     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1142         error_report("iSCSI: Failed to connect to LUN : %s",
1143             iscsi_get_error(iscsi));
1144         ret = -EINVAL;
1145         goto out;
1146     }
1147 
1148     iscsilun->iscsi = iscsi;
1149     iscsilun->lun   = iscsi_url->lun;
1150 
1151     task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
1152 
1153     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1154         error_report("iSCSI: failed to send inquiry command.");
1155         ret = -EINVAL;
1156         goto out;
1157     }
1158 
1159     inq = scsi_datain_unmarshall(task);
1160     if (inq == NULL) {
1161         error_report("iSCSI: Failed to unmarshall inquiry data.");
1162         ret = -EINVAL;
1163         goto out;
1164     }
1165 
1166     iscsilun->type = inq->periperal_device_type;
1167 
1168     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1169         goto out;
1170     }
1171     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1172 
1173     /* Medium changer or tape. We dont have any emulation for this so this must
1174      * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
1175      * to read from the device to guess the image format.
1176      */
1177     if (iscsilun->type == TYPE_MEDIUM_CHANGER ||
1178         iscsilun->type == TYPE_TAPE) {
1179         bs->sg = 1;
1180     }
1181 
1182 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1183     /* Set up a timer for sending out iSCSI NOPs */
1184     iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun);
1185     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
1186 #endif
1187 
1188 out:
1189     qemu_opts_del(opts);
1190     if (initiator_name != NULL) {
1191         g_free(initiator_name);
1192     }
1193     if (iscsi_url != NULL) {
1194         iscsi_destroy_url(iscsi_url);
1195     }
1196     if (task != NULL) {
1197         scsi_free_scsi_task(task);
1198     }
1199 
1200     if (ret) {
1201         if (iscsi != NULL) {
1202             iscsi_destroy_context(iscsi);
1203         }
1204         memset(iscsilun, 0, sizeof(IscsiLun));
1205     }
1206     return ret;
1207 }
1208 
1209 static void iscsi_close(BlockDriverState *bs)
1210 {
1211     IscsiLun *iscsilun = bs->opaque;
1212     struct iscsi_context *iscsi = iscsilun->iscsi;
1213 
1214     if (iscsilun->nop_timer) {
1215         qemu_del_timer(iscsilun->nop_timer);
1216         qemu_free_timer(iscsilun->nop_timer);
1217     }
1218     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
1219     iscsi_destroy_context(iscsi);
1220     memset(iscsilun, 0, sizeof(IscsiLun));
1221 }
1222 
1223 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1224 {
1225     IscsiLun *iscsilun = bs->opaque;
1226     int ret = 0;
1227 
1228     if (iscsilun->type != TYPE_DISK) {
1229         return -ENOTSUP;
1230     }
1231 
1232     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1233         return ret;
1234     }
1235 
1236     if (offset > iscsi_getlength(bs)) {
1237         return -EINVAL;
1238     }
1239 
1240     return 0;
1241 }
1242 
1243 static int iscsi_has_zero_init(BlockDriverState *bs)
1244 {
1245     return 0;
1246 }
1247 
1248 static int iscsi_create(const char *filename, QEMUOptionParameter *options)
1249 {
1250     int ret = 0;
1251     int64_t total_size = 0;
1252     BlockDriverState bs;
1253     IscsiLun *iscsilun = NULL;
1254     QDict *bs_options;
1255 
1256     memset(&bs, 0, sizeof(BlockDriverState));
1257 
1258     /* Read out options */
1259     while (options && options->name) {
1260         if (!strcmp(options->name, "size")) {
1261             total_size = options->value.n / BDRV_SECTOR_SIZE;
1262         }
1263         options++;
1264     }
1265 
1266     bs.opaque = g_malloc0(sizeof(struct IscsiLun));
1267     iscsilun = bs.opaque;
1268 
1269     bs_options = qdict_new();
1270     qdict_put(bs_options, "filename", qstring_from_str(filename));
1271     ret = iscsi_open(&bs, bs_options, 0);
1272     QDECREF(bs_options);
1273 
1274     if (ret != 0) {
1275         goto out;
1276     }
1277     if (iscsilun->nop_timer) {
1278         qemu_del_timer(iscsilun->nop_timer);
1279         qemu_free_timer(iscsilun->nop_timer);
1280     }
1281     if (iscsilun->type != TYPE_DISK) {
1282         ret = -ENODEV;
1283         goto out;
1284     }
1285     if (bs.total_sectors < total_size) {
1286         ret = -ENOSPC;
1287         goto out;
1288     }
1289 
1290     ret = 0;
1291 out:
1292     if (iscsilun->iscsi != NULL) {
1293         iscsi_destroy_context(iscsilun->iscsi);
1294     }
1295     g_free(bs.opaque);
1296     return ret;
1297 }
1298 
1299 static QEMUOptionParameter iscsi_create_options[] = {
1300     {
1301         .name = BLOCK_OPT_SIZE,
1302         .type = OPT_SIZE,
1303         .help = "Virtual disk size"
1304     },
1305     { NULL }
1306 };
1307 
1308 static BlockDriver bdrv_iscsi = {
1309     .format_name     = "iscsi",
1310     .protocol_name   = "iscsi",
1311 
1312     .instance_size   = sizeof(IscsiLun),
1313     .bdrv_file_open  = iscsi_open,
1314     .bdrv_close      = iscsi_close,
1315     .bdrv_create     = iscsi_create,
1316     .create_options  = iscsi_create_options,
1317 
1318     .bdrv_getlength  = iscsi_getlength,
1319     .bdrv_truncate   = iscsi_truncate,
1320 
1321     .bdrv_aio_readv  = iscsi_aio_readv,
1322     .bdrv_aio_writev = iscsi_aio_writev,
1323     .bdrv_aio_flush  = iscsi_aio_flush,
1324 
1325     .bdrv_aio_discard = iscsi_aio_discard,
1326     .bdrv_has_zero_init = iscsi_has_zero_init,
1327 
1328 #ifdef __linux__
1329     .bdrv_ioctl       = iscsi_ioctl,
1330     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1331 #endif
1332 };
1333 
1334 static QemuOptsList qemu_iscsi_opts = {
1335     .name = "iscsi",
1336     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1337     .desc = {
1338         {
1339             .name = "user",
1340             .type = QEMU_OPT_STRING,
1341             .help = "username for CHAP authentication to target",
1342         },{
1343             .name = "password",
1344             .type = QEMU_OPT_STRING,
1345             .help = "password for CHAP authentication to target",
1346         },{
1347             .name = "header-digest",
1348             .type = QEMU_OPT_STRING,
1349             .help = "HeaderDigest setting. "
1350                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1351         },{
1352             .name = "initiator-name",
1353             .type = QEMU_OPT_STRING,
1354             .help = "Initiator iqn name to use when connecting",
1355         },
1356         { /* end of list */ }
1357     },
1358 };
1359 
1360 static void iscsi_block_init(void)
1361 {
1362     bdrv_register(&bdrv_iscsi);
1363     qemu_add_opts(&qemu_iscsi_opts);
1364 }
1365 
1366 block_init(iscsi_block_init);
1367