xref: /openbmc/qemu/block/iscsi.c (revision 878096ee)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "config-host.h"
26 
27 #include <poll.h>
28 #include <arpa/inet.h>
29 #include "qemu-common.h"
30 #include "qemu/config-file.h"
31 #include "qemu/error-report.h"
32 #include "block/block_int.h"
33 #include "trace.h"
34 #include "block/scsi.h"
35 
36 #include <iscsi/iscsi.h>
37 #include <iscsi/scsi-lowlevel.h>
38 
39 #ifdef __linux__
40 #include <scsi/sg.h>
41 #include <block/scsi.h>
42 #endif
43 
44 typedef struct IscsiLun {
45     struct iscsi_context *iscsi;
46     int lun;
47     enum scsi_inquiry_peripheral_device_type type;
48     int block_size;
49     uint64_t num_blocks;
50     int events;
51     QEMUTimer *nop_timer;
52 } IscsiLun;
53 
54 typedef struct IscsiAIOCB {
55     BlockDriverAIOCB common;
56     QEMUIOVector *qiov;
57     QEMUBH *bh;
58     IscsiLun *iscsilun;
59     struct scsi_task *task;
60     uint8_t *buf;
61     int status;
62     int canceled;
63     int retries;
64     size_t read_size;
65     size_t read_offset;
66     int64_t sector_num;
67     int nb_sectors;
68 #ifdef __linux__
69     sg_io_hdr_t *ioh;
70 #endif
71 } IscsiAIOCB;
72 
73 #define NOP_INTERVAL 5000
74 #define MAX_NOP_FAILURES 3
75 #define ISCSI_CMD_RETRIES 5
76 
77 static void
78 iscsi_bh_cb(void *p)
79 {
80     IscsiAIOCB *acb = p;
81 
82     qemu_bh_delete(acb->bh);
83 
84     g_free(acb->buf);
85     acb->buf = NULL;
86 
87     if (acb->canceled == 0) {
88         acb->common.cb(acb->common.opaque, acb->status);
89     }
90 
91     if (acb->task != NULL) {
92         scsi_free_scsi_task(acb->task);
93         acb->task = NULL;
94     }
95 
96     qemu_aio_release(acb);
97 }
98 
99 static void
100 iscsi_schedule_bh(IscsiAIOCB *acb)
101 {
102     if (acb->bh) {
103         return;
104     }
105     acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
106     qemu_bh_schedule(acb->bh);
107 }
108 
109 
110 static void
111 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
112                     void *private_data)
113 {
114     IscsiAIOCB *acb = private_data;
115 
116     acb->status = -ECANCELED;
117     iscsi_schedule_bh(acb);
118 }
119 
120 static void
121 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
122 {
123     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
124     IscsiLun *iscsilun = acb->iscsilun;
125 
126     if (acb->status != -EINPROGRESS) {
127         return;
128     }
129 
130     acb->canceled = 1;
131 
132     /* send a task mgmt call to the target to cancel the task on the target */
133     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
134                                      iscsi_abort_task_cb, acb);
135 
136     while (acb->status == -EINPROGRESS) {
137         qemu_aio_wait();
138     }
139 }
140 
141 static const AIOCBInfo iscsi_aiocb_info = {
142     .aiocb_size         = sizeof(IscsiAIOCB),
143     .cancel             = iscsi_aio_cancel,
144 };
145 
146 
147 static void iscsi_process_read(void *arg);
148 static void iscsi_process_write(void *arg);
149 
150 static int iscsi_process_flush(void *arg)
151 {
152     IscsiLun *iscsilun = arg;
153 
154     return iscsi_queue_length(iscsilun->iscsi) > 0;
155 }
156 
157 static void
158 iscsi_set_events(IscsiLun *iscsilun)
159 {
160     struct iscsi_context *iscsi = iscsilun->iscsi;
161     int ev;
162 
163     /* We always register a read handler.  */
164     ev = POLLIN;
165     ev |= iscsi_which_events(iscsi);
166     if (ev != iscsilun->events) {
167         qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
168                       iscsi_process_read,
169                       (ev & POLLOUT) ? iscsi_process_write : NULL,
170                       iscsi_process_flush,
171                       iscsilun);
172 
173     }
174 
175     iscsilun->events = ev;
176 }
177 
178 static void
179 iscsi_process_read(void *arg)
180 {
181     IscsiLun *iscsilun = arg;
182     struct iscsi_context *iscsi = iscsilun->iscsi;
183 
184     iscsi_service(iscsi, POLLIN);
185     iscsi_set_events(iscsilun);
186 }
187 
188 static void
189 iscsi_process_write(void *arg)
190 {
191     IscsiLun *iscsilun = arg;
192     struct iscsi_context *iscsi = iscsilun->iscsi;
193 
194     iscsi_service(iscsi, POLLOUT);
195     iscsi_set_events(iscsilun);
196 }
197 
198 static int
199 iscsi_aio_writev_acb(IscsiAIOCB *acb);
200 
201 static void
202 iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
203                      void *command_data, void *opaque)
204 {
205     IscsiAIOCB *acb = opaque;
206 
207     trace_iscsi_aio_write16_cb(iscsi, status, acb, acb->canceled);
208 
209     g_free(acb->buf);
210     acb->buf = NULL;
211 
212     if (acb->canceled != 0) {
213         return;
214     }
215 
216     acb->status = 0;
217     if (status != 0) {
218         if (status == SCSI_STATUS_CHECK_CONDITION
219             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
220             && acb->retries-- > 0) {
221             scsi_free_scsi_task(acb->task);
222             acb->task = NULL;
223             if (iscsi_aio_writev_acb(acb) == 0) {
224                 iscsi_set_events(acb->iscsilun);
225                 return;
226             }
227         }
228         error_report("Failed to write16 data to iSCSI lun. %s",
229                      iscsi_get_error(iscsi));
230         acb->status = -EIO;
231     }
232 
233     iscsi_schedule_bh(acb);
234 }
235 
236 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
237 {
238     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
239 }
240 
241 static int
242 iscsi_aio_writev_acb(IscsiAIOCB *acb)
243 {
244     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
245     size_t size;
246     uint32_t num_sectors;
247     uint64_t lba;
248 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
249     struct iscsi_data data;
250 #endif
251     int ret;
252 
253     acb->canceled   = 0;
254     acb->bh         = NULL;
255     acb->status     = -EINPROGRESS;
256     acb->buf        = NULL;
257 
258     /* this will allow us to get rid of 'buf' completely */
259     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
260 
261 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
262     data.size = MIN(size, acb->qiov->size);
263 
264     /* if the iovec only contains one buffer we can pass it directly */
265     if (acb->qiov->niov == 1) {
266         data.data = acb->qiov->iov[0].iov_base;
267     } else {
268         acb->buf = g_malloc(data.size);
269         qemu_iovec_to_buf(acb->qiov, 0, acb->buf, data.size);
270         data.data = acb->buf;
271     }
272 #endif
273 
274     acb->task = malloc(sizeof(struct scsi_task));
275     if (acb->task == NULL) {
276         error_report("iSCSI: Failed to allocate task for scsi WRITE16 "
277                      "command. %s", iscsi_get_error(iscsi));
278         return -1;
279     }
280     memset(acb->task, 0, sizeof(struct scsi_task));
281 
282     acb->task->xfer_dir = SCSI_XFER_WRITE;
283     acb->task->cdb_size = 16;
284     acb->task->cdb[0] = 0x8a;
285     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
286     *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
287     *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
288     num_sectors = size / acb->iscsilun->block_size;
289     *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
290     acb->task->expxferlen = size;
291 
292 #if defined(LIBISCSI_FEATURE_IOVECTOR)
293     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
294                                    iscsi_aio_write16_cb,
295                                    NULL,
296                                    acb);
297 #else
298     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
299                                    iscsi_aio_write16_cb,
300                                    &data,
301                                    acb);
302 #endif
303     if (ret != 0) {
304         scsi_free_scsi_task(acb->task);
305         g_free(acb->buf);
306         return -1;
307     }
308 
309 #if defined(LIBISCSI_FEATURE_IOVECTOR)
310     scsi_task_set_iov_out(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
311 #endif
312 
313     return 0;
314 }
315 
316 static BlockDriverAIOCB *
317 iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
318                  QEMUIOVector *qiov, int nb_sectors,
319                  BlockDriverCompletionFunc *cb,
320                  void *opaque)
321 {
322     IscsiLun *iscsilun = bs->opaque;
323     IscsiAIOCB *acb;
324 
325     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
326     trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
327 
328     acb->iscsilun    = iscsilun;
329     acb->qiov        = qiov;
330     acb->nb_sectors  = nb_sectors;
331     acb->sector_num  = sector_num;
332     acb->retries     = ISCSI_CMD_RETRIES;
333 
334     if (iscsi_aio_writev_acb(acb) != 0) {
335         qemu_aio_release(acb);
336         return NULL;
337     }
338 
339     iscsi_set_events(iscsilun);
340     return &acb->common;
341 }
342 
343 static int
344 iscsi_aio_readv_acb(IscsiAIOCB *acb);
345 
346 static void
347 iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
348                     void *command_data, void *opaque)
349 {
350     IscsiAIOCB *acb = opaque;
351 
352     trace_iscsi_aio_read16_cb(iscsi, status, acb, acb->canceled);
353 
354     if (acb->canceled != 0) {
355         return;
356     }
357 
358     acb->status = 0;
359     if (status != 0) {
360         if (status == SCSI_STATUS_CHECK_CONDITION
361             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
362             && acb->retries-- > 0) {
363             scsi_free_scsi_task(acb->task);
364             acb->task = NULL;
365             if (iscsi_aio_readv_acb(acb) == 0) {
366                 iscsi_set_events(acb->iscsilun);
367                 return;
368             }
369         }
370         error_report("Failed to read16 data from iSCSI lun. %s",
371                      iscsi_get_error(iscsi));
372         acb->status = -EIO;
373     }
374 
375     iscsi_schedule_bh(acb);
376 }
377 
378 static int
379 iscsi_aio_readv_acb(IscsiAIOCB *acb)
380 {
381     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
382     uint64_t lba;
383     uint32_t num_sectors;
384     int ret;
385 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
386     int i;
387 #endif
388 
389     acb->canceled    = 0;
390     acb->bh          = NULL;
391     acb->status      = -EINPROGRESS;
392     acb->buf         = NULL;
393 
394     /* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
395      * may be misaligned to the LUN, so we may need to read some extra
396      * data.
397      */
398     acb->read_offset = 0;
399     if (acb->iscsilun->block_size > BDRV_SECTOR_SIZE) {
400         uint64_t bdrv_offset = BDRV_SECTOR_SIZE * acb->sector_num;
401 
402         acb->read_offset  = bdrv_offset % acb->iscsilun->block_size;
403     }
404 
405     num_sectors  = (acb->read_size + acb->iscsilun->block_size
406                     + acb->read_offset - 1)
407                     / acb->iscsilun->block_size;
408 
409     acb->task = malloc(sizeof(struct scsi_task));
410     if (acb->task == NULL) {
411         error_report("iSCSI: Failed to allocate task for scsi READ16 "
412                      "command. %s", iscsi_get_error(iscsi));
413         return -1;
414     }
415     memset(acb->task, 0, sizeof(struct scsi_task));
416 
417     acb->task->xfer_dir = SCSI_XFER_READ;
418     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
419     acb->task->expxferlen = acb->read_size;
420 
421     switch (acb->iscsilun->type) {
422     case TYPE_DISK:
423         acb->task->cdb_size = 16;
424         acb->task->cdb[0]  = 0x88;
425         *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
426         *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
427         *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
428         break;
429     default:
430         acb->task->cdb_size = 10;
431         acb->task->cdb[0]  = 0x28;
432         *(uint32_t *)&acb->task->cdb[2] = htonl(lba);
433         *(uint16_t *)&acb->task->cdb[7] = htons(num_sectors);
434         break;
435     }
436 
437     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
438                                    iscsi_aio_read16_cb,
439                                    NULL,
440                                    acb);
441     if (ret != 0) {
442         scsi_free_scsi_task(acb->task);
443         return -1;
444     }
445 
446 #if defined(LIBISCSI_FEATURE_IOVECTOR)
447     scsi_task_set_iov_in(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
448 #else
449     for (i = 0; i < acb->qiov->niov; i++) {
450         scsi_task_add_data_in_buffer(acb->task,
451                 acb->qiov->iov[i].iov_len,
452                 acb->qiov->iov[i].iov_base);
453     }
454 #endif
455     return 0;
456 }
457 
458 static BlockDriverAIOCB *
459 iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
460                 QEMUIOVector *qiov, int nb_sectors,
461                 BlockDriverCompletionFunc *cb,
462                 void *opaque)
463 {
464     IscsiLun *iscsilun = bs->opaque;
465     IscsiAIOCB *acb;
466 
467     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
468     trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
469 
470     acb->nb_sectors  = nb_sectors;
471     acb->sector_num  = sector_num;
472     acb->iscsilun    = iscsilun;
473     acb->qiov        = qiov;
474     acb->read_size   = BDRV_SECTOR_SIZE * (size_t)acb->nb_sectors;
475     acb->retries     = ISCSI_CMD_RETRIES;
476 
477     if (iscsi_aio_readv_acb(acb) != 0) {
478         qemu_aio_release(acb);
479         return NULL;
480     }
481 
482     iscsi_set_events(iscsilun);
483     return &acb->common;
484 }
485 
486 static int
487 iscsi_aio_flush_acb(IscsiAIOCB *acb);
488 
489 static void
490 iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
491                      void *command_data, void *opaque)
492 {
493     IscsiAIOCB *acb = opaque;
494 
495     if (acb->canceled != 0) {
496         return;
497     }
498 
499     acb->status = 0;
500     if (status != 0) {
501         if (status == SCSI_STATUS_CHECK_CONDITION
502             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
503             && acb->retries-- > 0) {
504             scsi_free_scsi_task(acb->task);
505             acb->task = NULL;
506             if (iscsi_aio_flush_acb(acb) == 0) {
507                 iscsi_set_events(acb->iscsilun);
508                 return;
509             }
510         }
511         error_report("Failed to sync10 data on iSCSI lun. %s",
512                      iscsi_get_error(iscsi));
513         acb->status = -EIO;
514     }
515 
516     iscsi_schedule_bh(acb);
517 }
518 
519 static int
520 iscsi_aio_flush_acb(IscsiAIOCB *acb)
521 {
522     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
523 
524     acb->canceled   = 0;
525     acb->bh         = NULL;
526     acb->status     = -EINPROGRESS;
527     acb->buf        = NULL;
528 
529     acb->task = iscsi_synchronizecache10_task(iscsi, acb->iscsilun->lun,
530                                          0, 0, 0, 0,
531                                          iscsi_synccache10_cb,
532                                          acb);
533     if (acb->task == NULL) {
534         error_report("iSCSI: Failed to send synchronizecache10 command. %s",
535                      iscsi_get_error(iscsi));
536         return -1;
537     }
538 
539     return 0;
540 }
541 
542 static BlockDriverAIOCB *
543 iscsi_aio_flush(BlockDriverState *bs,
544                 BlockDriverCompletionFunc *cb, void *opaque)
545 {
546     IscsiLun *iscsilun = bs->opaque;
547 
548     IscsiAIOCB *acb;
549 
550     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
551 
552     acb->iscsilun    = iscsilun;
553     acb->retries     = ISCSI_CMD_RETRIES;
554 
555     if (iscsi_aio_flush_acb(acb) != 0) {
556         qemu_aio_release(acb);
557         return NULL;
558     }
559 
560     iscsi_set_events(iscsilun);
561 
562     return &acb->common;
563 }
564 
565 static int iscsi_aio_discard_acb(IscsiAIOCB *acb);
566 
567 static void
568 iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
569                      void *command_data, void *opaque)
570 {
571     IscsiAIOCB *acb = opaque;
572 
573     if (acb->canceled != 0) {
574         return;
575     }
576 
577     acb->status = 0;
578     if (status != 0) {
579         if (status == SCSI_STATUS_CHECK_CONDITION
580             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
581             && acb->retries-- > 0) {
582             scsi_free_scsi_task(acb->task);
583             acb->task = NULL;
584             if (iscsi_aio_discard_acb(acb) == 0) {
585                 iscsi_set_events(acb->iscsilun);
586                 return;
587             }
588         }
589         error_report("Failed to unmap data on iSCSI lun. %s",
590                      iscsi_get_error(iscsi));
591         acb->status = -EIO;
592     }
593 
594     iscsi_schedule_bh(acb);
595 }
596 
597 static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
598     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
599     struct unmap_list list[1];
600 
601     acb->canceled   = 0;
602     acb->bh         = NULL;
603     acb->status     = -EINPROGRESS;
604     acb->buf        = NULL;
605 
606     list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
607     list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
608 
609     acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
610                                  0, 0, &list[0], 1,
611                                  iscsi_unmap_cb,
612                                  acb);
613     if (acb->task == NULL) {
614         error_report("iSCSI: Failed to send unmap command. %s",
615                      iscsi_get_error(iscsi));
616         return -1;
617     }
618 
619     return 0;
620 }
621 
622 static BlockDriverAIOCB *
623 iscsi_aio_discard(BlockDriverState *bs,
624                   int64_t sector_num, int nb_sectors,
625                   BlockDriverCompletionFunc *cb, void *opaque)
626 {
627     IscsiLun *iscsilun = bs->opaque;
628     IscsiAIOCB *acb;
629 
630     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
631 
632     acb->iscsilun    = iscsilun;
633     acb->nb_sectors  = nb_sectors;
634     acb->sector_num  = sector_num;
635     acb->retries     = ISCSI_CMD_RETRIES;
636 
637     if (iscsi_aio_discard_acb(acb) != 0) {
638         qemu_aio_release(acb);
639         return NULL;
640     }
641 
642     iscsi_set_events(iscsilun);
643 
644     return &acb->common;
645 }
646 
647 #ifdef __linux__
648 static void
649 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
650                      void *command_data, void *opaque)
651 {
652     IscsiAIOCB *acb = opaque;
653 
654     if (acb->canceled != 0) {
655         return;
656     }
657 
658     acb->status = 0;
659     if (status < 0) {
660         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
661                      iscsi_get_error(iscsi));
662         acb->status = -EIO;
663     }
664 
665     acb->ioh->driver_status = 0;
666     acb->ioh->host_status   = 0;
667     acb->ioh->resid         = 0;
668 
669 #define SG_ERR_DRIVER_SENSE    0x08
670 
671     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
672         int ss;
673 
674         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
675 
676         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
677         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
678              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
679         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
680     }
681 
682     iscsi_schedule_bh(acb);
683 }
684 
685 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
686         unsigned long int req, void *buf,
687         BlockDriverCompletionFunc *cb, void *opaque)
688 {
689     IscsiLun *iscsilun = bs->opaque;
690     struct iscsi_context *iscsi = iscsilun->iscsi;
691     struct iscsi_data data;
692     IscsiAIOCB *acb;
693 
694     assert(req == SG_IO);
695 
696     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
697 
698     acb->iscsilun = iscsilun;
699     acb->canceled    = 0;
700     acb->bh          = NULL;
701     acb->status      = -EINPROGRESS;
702     acb->buf         = NULL;
703     acb->ioh         = buf;
704 
705     acb->task = malloc(sizeof(struct scsi_task));
706     if (acb->task == NULL) {
707         error_report("iSCSI: Failed to allocate task for scsi command. %s",
708                      iscsi_get_error(iscsi));
709         qemu_aio_release(acb);
710         return NULL;
711     }
712     memset(acb->task, 0, sizeof(struct scsi_task));
713 
714     switch (acb->ioh->dxfer_direction) {
715     case SG_DXFER_TO_DEV:
716         acb->task->xfer_dir = SCSI_XFER_WRITE;
717         break;
718     case SG_DXFER_FROM_DEV:
719         acb->task->xfer_dir = SCSI_XFER_READ;
720         break;
721     default:
722         acb->task->xfer_dir = SCSI_XFER_NONE;
723         break;
724     }
725 
726     acb->task->cdb_size = acb->ioh->cmd_len;
727     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
728     acb->task->expxferlen = acb->ioh->dxfer_len;
729 
730     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
731         data.data = acb->ioh->dxferp;
732         data.size = acb->ioh->dxfer_len;
733     }
734     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
735                                  iscsi_aio_ioctl_cb,
736                                  (acb->task->xfer_dir == SCSI_XFER_WRITE) ?
737                                      &data : NULL,
738                                  acb) != 0) {
739         scsi_free_scsi_task(acb->task);
740         qemu_aio_release(acb);
741         return NULL;
742     }
743 
744     /* tell libiscsi to read straight into the buffer we got from ioctl */
745     if (acb->task->xfer_dir == SCSI_XFER_READ) {
746         scsi_task_add_data_in_buffer(acb->task,
747                                      acb->ioh->dxfer_len,
748                                      acb->ioh->dxferp);
749     }
750 
751     iscsi_set_events(iscsilun);
752 
753     return &acb->common;
754 }
755 
756 
757 static void ioctl_cb(void *opaque, int status)
758 {
759     int *p_status = opaque;
760     *p_status = status;
761 }
762 
763 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
764 {
765     IscsiLun *iscsilun = bs->opaque;
766     int status;
767 
768     switch (req) {
769     case SG_GET_VERSION_NUM:
770         *(int *)buf = 30000;
771         break;
772     case SG_GET_SCSI_ID:
773         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
774         break;
775     case SG_IO:
776         status = -EINPROGRESS;
777         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
778 
779         while (status == -EINPROGRESS) {
780             qemu_aio_wait();
781         }
782 
783         return 0;
784     default:
785         return -1;
786     }
787     return 0;
788 }
789 #endif
790 
791 static int64_t
792 iscsi_getlength(BlockDriverState *bs)
793 {
794     IscsiLun *iscsilun = bs->opaque;
795     int64_t len;
796 
797     len  = iscsilun->num_blocks;
798     len *= iscsilun->block_size;
799 
800     return len;
801 }
802 
803 static int parse_chap(struct iscsi_context *iscsi, const char *target)
804 {
805     QemuOptsList *list;
806     QemuOpts *opts;
807     const char *user = NULL;
808     const char *password = NULL;
809 
810     list = qemu_find_opts("iscsi");
811     if (!list) {
812         return 0;
813     }
814 
815     opts = qemu_opts_find(list, target);
816     if (opts == NULL) {
817         opts = QTAILQ_FIRST(&list->head);
818         if (!opts) {
819             return 0;
820         }
821     }
822 
823     user = qemu_opt_get(opts, "user");
824     if (!user) {
825         return 0;
826     }
827 
828     password = qemu_opt_get(opts, "password");
829     if (!password) {
830         error_report("CHAP username specified but no password was given");
831         return -1;
832     }
833 
834     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
835         error_report("Failed to set initiator username and password");
836         return -1;
837     }
838 
839     return 0;
840 }
841 
842 static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
843 {
844     QemuOptsList *list;
845     QemuOpts *opts;
846     const char *digest = NULL;
847 
848     list = qemu_find_opts("iscsi");
849     if (!list) {
850         return;
851     }
852 
853     opts = qemu_opts_find(list, target);
854     if (opts == NULL) {
855         opts = QTAILQ_FIRST(&list->head);
856         if (!opts) {
857             return;
858         }
859     }
860 
861     digest = qemu_opt_get(opts, "header-digest");
862     if (!digest) {
863         return;
864     }
865 
866     if (!strcmp(digest, "CRC32C")) {
867         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
868     } else if (!strcmp(digest, "NONE")) {
869         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
870     } else if (!strcmp(digest, "CRC32C-NONE")) {
871         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
872     } else if (!strcmp(digest, "NONE-CRC32C")) {
873         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
874     } else {
875         error_report("Invalid header-digest setting : %s", digest);
876     }
877 }
878 
879 static char *parse_initiator_name(const char *target)
880 {
881     QemuOptsList *list;
882     QemuOpts *opts;
883     const char *name = NULL;
884     const char *iscsi_name = qemu_get_vm_name();
885 
886     list = qemu_find_opts("iscsi");
887     if (list) {
888         opts = qemu_opts_find(list, target);
889         if (!opts) {
890             opts = QTAILQ_FIRST(&list->head);
891         }
892         if (opts) {
893             name = qemu_opt_get(opts, "initiator-name");
894         }
895     }
896 
897     if (name) {
898         return g_strdup(name);
899     } else {
900         return g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
901                                iscsi_name ? ":" : "",
902                                iscsi_name ? iscsi_name : "");
903     }
904 }
905 
906 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
907 static void iscsi_nop_timed_event(void *opaque)
908 {
909     IscsiLun *iscsilun = opaque;
910 
911     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
912         error_report("iSCSI: NOP timeout. Reconnecting...");
913         iscsi_reconnect(iscsilun->iscsi);
914     }
915 
916     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
917         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
918         return;
919     }
920 
921     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
922     iscsi_set_events(iscsilun);
923 }
924 #endif
925 
926 static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
927 {
928     struct scsi_task *task = NULL;
929     struct scsi_readcapacity10 *rc10 = NULL;
930     struct scsi_readcapacity16 *rc16 = NULL;
931     int ret = 0;
932     int retries = ISCSI_CMD_RETRIES;
933 
934     do {
935         if (task != NULL) {
936             scsi_free_scsi_task(task);
937             task = NULL;
938         }
939 
940         switch (iscsilun->type) {
941         case TYPE_DISK:
942             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
943             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
944                 rc16 = scsi_datain_unmarshall(task);
945                 if (rc16 == NULL) {
946                     error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
947                     ret = -EINVAL;
948                 } else {
949                     iscsilun->block_size = rc16->block_length;
950                     iscsilun->num_blocks = rc16->returned_lba + 1;
951                 }
952             }
953             break;
954         case TYPE_ROM:
955             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
956             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
957                 rc10 = scsi_datain_unmarshall(task);
958                 if (rc10 == NULL) {
959                     error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
960                     ret = -EINVAL;
961                 } else {
962                     iscsilun->block_size = rc10->block_size;
963                     if (rc10->lba == 0) {
964                         /* blank disk loaded */
965                         iscsilun->num_blocks = 0;
966                     } else {
967                         iscsilun->num_blocks = rc10->lba + 1;
968                     }
969                 }
970             }
971             break;
972         default:
973             return 0;
974         }
975     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
976              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
977              && retries-- > 0);
978 
979     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
980         error_report("iSCSI: failed to send readcapacity10 command.");
981         ret = -EINVAL;
982     }
983     if (task) {
984         scsi_free_scsi_task(task);
985     }
986     return ret;
987 }
988 
989 /* TODO Convert to fine grained options */
990 static QemuOptsList runtime_opts = {
991     .name = "iscsi",
992     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
993     .desc = {
994         {
995             .name = "filename",
996             .type = QEMU_OPT_STRING,
997             .help = "URL to the iscsi image",
998         },
999         { /* end of list */ }
1000     },
1001 };
1002 
1003 /*
1004  * We support iscsi url's on the form
1005  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1006  */
1007 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
1008 {
1009     IscsiLun *iscsilun = bs->opaque;
1010     struct iscsi_context *iscsi = NULL;
1011     struct iscsi_url *iscsi_url = NULL;
1012     struct scsi_task *task = NULL;
1013     struct scsi_inquiry_standard *inq = NULL;
1014     char *initiator_name = NULL;
1015     QemuOpts *opts;
1016     Error *local_err = NULL;
1017     const char *filename;
1018     int ret;
1019 
1020     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1021         error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
1022                      "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1023                      "of 512", BDRV_SECTOR_SIZE);
1024         return -EINVAL;
1025     }
1026 
1027     opts = qemu_opts_create_nofail(&runtime_opts);
1028     qemu_opts_absorb_qdict(opts, options, &local_err);
1029     if (error_is_set(&local_err)) {
1030         qerror_report_err(local_err);
1031         error_free(local_err);
1032         ret = -EINVAL;
1033         goto out;
1034     }
1035 
1036     filename = qemu_opt_get(opts, "filename");
1037 
1038 
1039     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1040     if (iscsi_url == NULL) {
1041         error_report("Failed to parse URL : %s", filename);
1042         ret = -EINVAL;
1043         goto out;
1044     }
1045 
1046     memset(iscsilun, 0, sizeof(IscsiLun));
1047 
1048     initiator_name = parse_initiator_name(iscsi_url->target);
1049 
1050     iscsi = iscsi_create_context(initiator_name);
1051     if (iscsi == NULL) {
1052         error_report("iSCSI: Failed to create iSCSI context.");
1053         ret = -ENOMEM;
1054         goto out;
1055     }
1056 
1057     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1058         error_report("iSCSI: Failed to set target name.");
1059         ret = -EINVAL;
1060         goto out;
1061     }
1062 
1063     if (iscsi_url->user != NULL) {
1064         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1065                                               iscsi_url->passwd);
1066         if (ret != 0) {
1067             error_report("Failed to set initiator username and password");
1068             ret = -EINVAL;
1069             goto out;
1070         }
1071     }
1072 
1073     /* check if we got CHAP username/password via the options */
1074     if (parse_chap(iscsi, iscsi_url->target) != 0) {
1075         error_report("iSCSI: Failed to set CHAP user/password");
1076         ret = -EINVAL;
1077         goto out;
1078     }
1079 
1080     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1081         error_report("iSCSI: Failed to set session type to normal.");
1082         ret = -EINVAL;
1083         goto out;
1084     }
1085 
1086     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1087 
1088     /* check if we got HEADER_DIGEST via the options */
1089     parse_header_digest(iscsi, iscsi_url->target);
1090 
1091     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1092         error_report("iSCSI: Failed to connect to LUN : %s",
1093             iscsi_get_error(iscsi));
1094         ret = -EINVAL;
1095         goto out;
1096     }
1097 
1098     iscsilun->iscsi = iscsi;
1099     iscsilun->lun   = iscsi_url->lun;
1100 
1101     task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
1102 
1103     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1104         error_report("iSCSI: failed to send inquiry command.");
1105         ret = -EINVAL;
1106         goto out;
1107     }
1108 
1109     inq = scsi_datain_unmarshall(task);
1110     if (inq == NULL) {
1111         error_report("iSCSI: Failed to unmarshall inquiry data.");
1112         ret = -EINVAL;
1113         goto out;
1114     }
1115 
1116     iscsilun->type = inq->periperal_device_type;
1117 
1118     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1119         goto out;
1120     }
1121     bs->total_sectors    = iscsilun->num_blocks *
1122                            iscsilun->block_size / BDRV_SECTOR_SIZE ;
1123 
1124     /* Medium changer or tape. We dont have any emulation for this so this must
1125      * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
1126      * to read from the device to guess the image format.
1127      */
1128     if (iscsilun->type == TYPE_MEDIUM_CHANGER ||
1129         iscsilun->type == TYPE_TAPE) {
1130         bs->sg = 1;
1131     }
1132 
1133 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1134     /* Set up a timer for sending out iSCSI NOPs */
1135     iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun);
1136     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
1137 #endif
1138 
1139 out:
1140     qemu_opts_del(opts);
1141     if (initiator_name != NULL) {
1142         g_free(initiator_name);
1143     }
1144     if (iscsi_url != NULL) {
1145         iscsi_destroy_url(iscsi_url);
1146     }
1147     if (task != NULL) {
1148         scsi_free_scsi_task(task);
1149     }
1150 
1151     if (ret) {
1152         if (iscsi != NULL) {
1153             iscsi_destroy_context(iscsi);
1154         }
1155         memset(iscsilun, 0, sizeof(IscsiLun));
1156     }
1157     return ret;
1158 }
1159 
1160 static void iscsi_close(BlockDriverState *bs)
1161 {
1162     IscsiLun *iscsilun = bs->opaque;
1163     struct iscsi_context *iscsi = iscsilun->iscsi;
1164 
1165     if (iscsilun->nop_timer) {
1166         qemu_del_timer(iscsilun->nop_timer);
1167         qemu_free_timer(iscsilun->nop_timer);
1168     }
1169     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
1170     iscsi_destroy_context(iscsi);
1171     memset(iscsilun, 0, sizeof(IscsiLun));
1172 }
1173 
1174 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1175 {
1176     IscsiLun *iscsilun = bs->opaque;
1177     int ret = 0;
1178 
1179     if (iscsilun->type != TYPE_DISK) {
1180         return -ENOTSUP;
1181     }
1182 
1183     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1184         return ret;
1185     }
1186 
1187     if (offset > iscsi_getlength(bs)) {
1188         return -EINVAL;
1189     }
1190 
1191     return 0;
1192 }
1193 
1194 static int iscsi_has_zero_init(BlockDriverState *bs)
1195 {
1196     return 0;
1197 }
1198 
1199 static int iscsi_create(const char *filename, QEMUOptionParameter *options)
1200 {
1201     int ret = 0;
1202     int64_t total_size = 0;
1203     BlockDriverState bs;
1204     IscsiLun *iscsilun = NULL;
1205     QDict *bs_options;
1206 
1207     memset(&bs, 0, sizeof(BlockDriverState));
1208 
1209     /* Read out options */
1210     while (options && options->name) {
1211         if (!strcmp(options->name, "size")) {
1212             total_size = options->value.n / BDRV_SECTOR_SIZE;
1213         }
1214         options++;
1215     }
1216 
1217     bs.opaque = g_malloc0(sizeof(struct IscsiLun));
1218     iscsilun = bs.opaque;
1219 
1220     bs_options = qdict_new();
1221     qdict_put(bs_options, "filename", qstring_from_str(filename));
1222     ret = iscsi_open(&bs, bs_options, 0);
1223     QDECREF(bs_options);
1224 
1225     if (ret != 0) {
1226         goto out;
1227     }
1228     if (iscsilun->nop_timer) {
1229         qemu_del_timer(iscsilun->nop_timer);
1230         qemu_free_timer(iscsilun->nop_timer);
1231     }
1232     if (iscsilun->type != TYPE_DISK) {
1233         ret = -ENODEV;
1234         goto out;
1235     }
1236     if (bs.total_sectors < total_size) {
1237         ret = -ENOSPC;
1238     }
1239 
1240     ret = 0;
1241 out:
1242     if (iscsilun->iscsi != NULL) {
1243         iscsi_destroy_context(iscsilun->iscsi);
1244     }
1245     g_free(bs.opaque);
1246     return ret;
1247 }
1248 
1249 static QEMUOptionParameter iscsi_create_options[] = {
1250     {
1251         .name = BLOCK_OPT_SIZE,
1252         .type = OPT_SIZE,
1253         .help = "Virtual disk size"
1254     },
1255     { NULL }
1256 };
1257 
1258 static BlockDriver bdrv_iscsi = {
1259     .format_name     = "iscsi",
1260     .protocol_name   = "iscsi",
1261 
1262     .instance_size   = sizeof(IscsiLun),
1263     .bdrv_file_open  = iscsi_open,
1264     .bdrv_close      = iscsi_close,
1265     .bdrv_create     = iscsi_create,
1266     .create_options  = iscsi_create_options,
1267 
1268     .bdrv_getlength  = iscsi_getlength,
1269     .bdrv_truncate   = iscsi_truncate,
1270 
1271     .bdrv_aio_readv  = iscsi_aio_readv,
1272     .bdrv_aio_writev = iscsi_aio_writev,
1273     .bdrv_aio_flush  = iscsi_aio_flush,
1274 
1275     .bdrv_aio_discard = iscsi_aio_discard,
1276     .bdrv_has_zero_init = iscsi_has_zero_init,
1277 
1278 #ifdef __linux__
1279     .bdrv_ioctl       = iscsi_ioctl,
1280     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1281 #endif
1282 };
1283 
1284 static QemuOptsList qemu_iscsi_opts = {
1285     .name = "iscsi",
1286     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1287     .desc = {
1288         {
1289             .name = "user",
1290             .type = QEMU_OPT_STRING,
1291             .help = "username for CHAP authentication to target",
1292         },{
1293             .name = "password",
1294             .type = QEMU_OPT_STRING,
1295             .help = "password for CHAP authentication to target",
1296         },{
1297             .name = "header-digest",
1298             .type = QEMU_OPT_STRING,
1299             .help = "HeaderDigest setting. "
1300                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1301         },{
1302             .name = "initiator-name",
1303             .type = QEMU_OPT_STRING,
1304             .help = "Initiator iqn name to use when connecting",
1305         },
1306         { /* end of list */ }
1307     },
1308 };
1309 
1310 static void iscsi_block_init(void)
1311 {
1312     bdrv_register(&bdrv_iscsi);
1313     qemu_add_opts(&qemu_iscsi_opts);
1314 }
1315 
1316 block_init(iscsi_block_init);
1317