xref: /openbmc/qemu/block/iscsi.c (revision c71c3e99)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "config-host.h"
26 
27 #include <poll.h>
28 #include <arpa/inet.h>
29 #include "qemu-common.h"
30 #include "qemu/config-file.h"
31 #include "qemu/error-report.h"
32 #include "block/block_int.h"
33 #include "trace.h"
34 #include "hw/scsi-defs.h"
35 
36 #include <iscsi/iscsi.h>
37 #include <iscsi/scsi-lowlevel.h>
38 
39 #ifdef __linux__
40 #include <scsi/sg.h>
41 #include <hw/scsi-defs.h>
42 #endif
43 
44 typedef struct IscsiLun {
45     struct iscsi_context *iscsi;
46     int lun;
47     enum scsi_inquiry_peripheral_device_type type;
48     int block_size;
49     uint64_t num_blocks;
50     int events;
51     QEMUTimer *nop_timer;
52 } IscsiLun;
53 
54 typedef struct IscsiAIOCB {
55     BlockDriverAIOCB common;
56     QEMUIOVector *qiov;
57     QEMUBH *bh;
58     IscsiLun *iscsilun;
59     struct scsi_task *task;
60     uint8_t *buf;
61     int status;
62     int canceled;
63     int retries;
64     size_t read_size;
65     size_t read_offset;
66     int64_t sector_num;
67     int nb_sectors;
68 #ifdef __linux__
69     sg_io_hdr_t *ioh;
70 #endif
71 } IscsiAIOCB;
72 
73 #define NOP_INTERVAL 5000
74 #define MAX_NOP_FAILURES 3
75 #define ISCSI_CMD_RETRIES 5
76 
77 static void
78 iscsi_bh_cb(void *p)
79 {
80     IscsiAIOCB *acb = p;
81 
82     qemu_bh_delete(acb->bh);
83 
84     g_free(acb->buf);
85     acb->buf = NULL;
86 
87     if (acb->canceled == 0) {
88         acb->common.cb(acb->common.opaque, acb->status);
89     }
90 
91     if (acb->task != NULL) {
92         scsi_free_scsi_task(acb->task);
93         acb->task = NULL;
94     }
95 
96     qemu_aio_release(acb);
97 }
98 
99 static void
100 iscsi_schedule_bh(IscsiAIOCB *acb)
101 {
102     if (acb->bh) {
103         return;
104     }
105     acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
106     qemu_bh_schedule(acb->bh);
107 }
108 
109 
110 static void
111 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
112                     void *private_data)
113 {
114     IscsiAIOCB *acb = private_data;
115 
116     acb->status = -ECANCELED;
117     iscsi_schedule_bh(acb);
118 }
119 
120 static void
121 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
122 {
123     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
124     IscsiLun *iscsilun = acb->iscsilun;
125 
126     if (acb->status != -EINPROGRESS) {
127         return;
128     }
129 
130     acb->canceled = 1;
131 
132     /* send a task mgmt call to the target to cancel the task on the target */
133     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
134                                      iscsi_abort_task_cb, acb);
135 
136     while (acb->status == -EINPROGRESS) {
137         qemu_aio_wait();
138     }
139 }
140 
141 static const AIOCBInfo iscsi_aiocb_info = {
142     .aiocb_size         = sizeof(IscsiAIOCB),
143     .cancel             = iscsi_aio_cancel,
144 };
145 
146 
147 static void iscsi_process_read(void *arg);
148 static void iscsi_process_write(void *arg);
149 
150 static int iscsi_process_flush(void *arg)
151 {
152     IscsiLun *iscsilun = arg;
153 
154     return iscsi_queue_length(iscsilun->iscsi) > 0;
155 }
156 
157 static void
158 iscsi_set_events(IscsiLun *iscsilun)
159 {
160     struct iscsi_context *iscsi = iscsilun->iscsi;
161     int ev;
162 
163     /* We always register a read handler.  */
164     ev = POLLIN;
165     ev |= iscsi_which_events(iscsi);
166     if (ev != iscsilun->events) {
167         qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
168                       iscsi_process_read,
169                       (ev & POLLOUT) ? iscsi_process_write : NULL,
170                       iscsi_process_flush,
171                       iscsilun);
172 
173     }
174 
175     iscsilun->events = ev;
176 }
177 
178 static void
179 iscsi_process_read(void *arg)
180 {
181     IscsiLun *iscsilun = arg;
182     struct iscsi_context *iscsi = iscsilun->iscsi;
183 
184     iscsi_service(iscsi, POLLIN);
185     iscsi_set_events(iscsilun);
186 }
187 
188 static void
189 iscsi_process_write(void *arg)
190 {
191     IscsiLun *iscsilun = arg;
192     struct iscsi_context *iscsi = iscsilun->iscsi;
193 
194     iscsi_service(iscsi, POLLOUT);
195     iscsi_set_events(iscsilun);
196 }
197 
198 static int
199 iscsi_aio_writev_acb(IscsiAIOCB *acb);
200 
201 static void
202 iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
203                      void *command_data, void *opaque)
204 {
205     IscsiAIOCB *acb = opaque;
206 
207     trace_iscsi_aio_write16_cb(iscsi, status, acb, acb->canceled);
208 
209     g_free(acb->buf);
210     acb->buf = NULL;
211 
212     if (acb->canceled != 0) {
213         return;
214     }
215 
216     acb->status = 0;
217     if (status != 0) {
218         if (status == SCSI_STATUS_CHECK_CONDITION
219             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
220             && acb->retries-- > 0) {
221             if (acb->task != NULL) {
222                 scsi_free_scsi_task(acb->task);
223                 acb->task = NULL;
224             }
225             if (iscsi_aio_writev_acb(acb) == 0) {
226                 iscsi_set_events(acb->iscsilun);
227                 return;
228             }
229         }
230         error_report("Failed to write16 data to iSCSI lun. %s",
231                      iscsi_get_error(iscsi));
232         acb->status = -EIO;
233     }
234 
235     iscsi_schedule_bh(acb);
236 }
237 
238 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
239 {
240     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
241 }
242 
243 static int
244 iscsi_aio_writev_acb(IscsiAIOCB *acb)
245 {
246     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
247     size_t size;
248     uint32_t num_sectors;
249     uint64_t lba;
250 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
251     struct iscsi_data data;
252 #endif
253     int ret;
254 
255     acb->canceled   = 0;
256     acb->bh         = NULL;
257     acb->status     = -EINPROGRESS;
258     acb->buf        = NULL;
259 
260     /* this will allow us to get rid of 'buf' completely */
261     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
262 
263 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
264     data.size = MIN(size, acb->qiov->size);
265 
266     /* if the iovec only contains one buffer we can pass it directly */
267     if (acb->qiov->niov == 1) {
268         data.data = acb->qiov->iov[0].iov_base;
269     } else {
270         acb->buf = g_malloc(data.size);
271         qemu_iovec_to_buf(acb->qiov, 0, acb->buf, data.size);
272         data.data = acb->buf;
273     }
274 #endif
275 
276     acb->task = malloc(sizeof(struct scsi_task));
277     if (acb->task == NULL) {
278         error_report("iSCSI: Failed to allocate task for scsi WRITE16 "
279                      "command. %s", iscsi_get_error(iscsi));
280         return -1;
281     }
282     memset(acb->task, 0, sizeof(struct scsi_task));
283 
284     acb->task->xfer_dir = SCSI_XFER_WRITE;
285     acb->task->cdb_size = 16;
286     acb->task->cdb[0] = 0x8a;
287     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
288     *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
289     *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
290     num_sectors = size / acb->iscsilun->block_size;
291     *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
292     acb->task->expxferlen = size;
293 
294 #if defined(LIBISCSI_FEATURE_IOVECTOR)
295     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
296                                    iscsi_aio_write16_cb,
297                                    NULL,
298                                    acb);
299 #else
300     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
301                                    iscsi_aio_write16_cb,
302                                    &data,
303                                    acb);
304 #endif
305     if (ret != 0) {
306         g_free(acb->buf);
307         return -1;
308     }
309 
310 #if defined(LIBISCSI_FEATURE_IOVECTOR)
311     scsi_task_set_iov_out(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
312 #endif
313 
314     return 0;
315 }
316 
317 static BlockDriverAIOCB *
318 iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
319                  QEMUIOVector *qiov, int nb_sectors,
320                  BlockDriverCompletionFunc *cb,
321                  void *opaque)
322 {
323     IscsiLun *iscsilun = bs->opaque;
324     IscsiAIOCB *acb;
325 
326     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
327     trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
328 
329     acb->iscsilun    = iscsilun;
330     acb->qiov        = qiov;
331     acb->nb_sectors  = nb_sectors;
332     acb->sector_num  = sector_num;
333     acb->retries     = ISCSI_CMD_RETRIES;
334 
335     if (iscsi_aio_writev_acb(acb) != 0) {
336         if (acb->task) {
337             scsi_free_scsi_task(acb->task);
338         }
339         qemu_aio_release(acb);
340         return NULL;
341     }
342 
343     iscsi_set_events(iscsilun);
344     return &acb->common;
345 }
346 
347 static int
348 iscsi_aio_readv_acb(IscsiAIOCB *acb);
349 
350 static void
351 iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
352                     void *command_data, void *opaque)
353 {
354     IscsiAIOCB *acb = opaque;
355 
356     trace_iscsi_aio_read16_cb(iscsi, status, acb, acb->canceled);
357 
358     if (acb->canceled != 0) {
359         return;
360     }
361 
362     acb->status = 0;
363     if (status != 0) {
364         if (status == SCSI_STATUS_CHECK_CONDITION
365             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
366             && acb->retries-- > 0) {
367             if (acb->task != NULL) {
368                 scsi_free_scsi_task(acb->task);
369                 acb->task = NULL;
370             }
371             if (iscsi_aio_readv_acb(acb) == 0) {
372                 iscsi_set_events(acb->iscsilun);
373                 return;
374             }
375         }
376         error_report("Failed to read16 data from iSCSI lun. %s",
377                      iscsi_get_error(iscsi));
378         acb->status = -EIO;
379     }
380 
381     iscsi_schedule_bh(acb);
382 }
383 
384 static int
385 iscsi_aio_readv_acb(IscsiAIOCB *acb)
386 {
387     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
388     uint64_t lba;
389     uint32_t num_sectors;
390     int ret;
391 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
392     int i;
393 #endif
394 
395     acb->canceled    = 0;
396     acb->bh          = NULL;
397     acb->status      = -EINPROGRESS;
398     acb->buf         = NULL;
399 
400     /* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
401      * may be misaligned to the LUN, so we may need to read some extra
402      * data.
403      */
404     acb->read_offset = 0;
405     if (acb->iscsilun->block_size > BDRV_SECTOR_SIZE) {
406         uint64_t bdrv_offset = BDRV_SECTOR_SIZE * acb->sector_num;
407 
408         acb->read_offset  = bdrv_offset % acb->iscsilun->block_size;
409     }
410 
411     num_sectors  = (acb->read_size + acb->iscsilun->block_size
412                     + acb->read_offset - 1)
413                     / acb->iscsilun->block_size;
414 
415     acb->task = malloc(sizeof(struct scsi_task));
416     if (acb->task == NULL) {
417         error_report("iSCSI: Failed to allocate task for scsi READ16 "
418                      "command. %s", iscsi_get_error(iscsi));
419         return -1;
420     }
421     memset(acb->task, 0, sizeof(struct scsi_task));
422 
423     acb->task->xfer_dir = SCSI_XFER_READ;
424     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
425     acb->task->expxferlen = acb->read_size;
426 
427     switch (acb->iscsilun->type) {
428     case TYPE_DISK:
429         acb->task->cdb_size = 16;
430         acb->task->cdb[0]  = 0x88;
431         *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
432         *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
433         *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
434         break;
435     default:
436         acb->task->cdb_size = 10;
437         acb->task->cdb[0]  = 0x28;
438         *(uint32_t *)&acb->task->cdb[2] = htonl(lba);
439         *(uint16_t *)&acb->task->cdb[7] = htons(num_sectors);
440         break;
441     }
442 
443     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
444                                    iscsi_aio_read16_cb,
445                                    NULL,
446                                    acb);
447     if (ret != 0) {
448         return -1;
449     }
450 
451 #if defined(LIBISCSI_FEATURE_IOVECTOR)
452     scsi_task_set_iov_in(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
453 #else
454     for (i = 0; i < acb->qiov->niov; i++) {
455         scsi_task_add_data_in_buffer(acb->task,
456                 acb->qiov->iov[i].iov_len,
457                 acb->qiov->iov[i].iov_base);
458     }
459 #endif
460     return 0;
461 }
462 
463 static BlockDriverAIOCB *
464 iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
465                 QEMUIOVector *qiov, int nb_sectors,
466                 BlockDriverCompletionFunc *cb,
467                 void *opaque)
468 {
469     IscsiLun *iscsilun = bs->opaque;
470     IscsiAIOCB *acb;
471 
472     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
473     trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
474 
475     acb->nb_sectors  = nb_sectors;
476     acb->sector_num  = sector_num;
477     acb->iscsilun    = iscsilun;
478     acb->qiov        = qiov;
479     acb->read_size   = BDRV_SECTOR_SIZE * (size_t)acb->nb_sectors;
480     acb->retries     = ISCSI_CMD_RETRIES;
481 
482     if (iscsi_aio_readv_acb(acb) != 0) {
483         if (acb->task) {
484             scsi_free_scsi_task(acb->task);
485         }
486         qemu_aio_release(acb);
487         return NULL;
488     }
489 
490     iscsi_set_events(iscsilun);
491     return &acb->common;
492 }
493 
494 static int
495 iscsi_aio_flush_acb(IscsiAIOCB *acb);
496 
497 static void
498 iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
499                      void *command_data, void *opaque)
500 {
501     IscsiAIOCB *acb = opaque;
502 
503     if (acb->canceled != 0) {
504         return;
505     }
506 
507     acb->status = 0;
508     if (status != 0) {
509         if (status == SCSI_STATUS_CHECK_CONDITION
510             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
511             && acb->retries-- > 0) {
512             if (acb->task != NULL) {
513                 scsi_free_scsi_task(acb->task);
514                 acb->task = NULL;
515             }
516             if (iscsi_aio_flush_acb(acb) == 0) {
517                 iscsi_set_events(acb->iscsilun);
518                 return;
519             }
520         }
521         error_report("Failed to sync10 data on iSCSI lun. %s",
522                      iscsi_get_error(iscsi));
523         acb->status = -EIO;
524     }
525 
526     iscsi_schedule_bh(acb);
527 }
528 
529 static int
530 iscsi_aio_flush_acb(IscsiAIOCB *acb)
531 {
532     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
533 
534     acb->canceled   = 0;
535     acb->bh         = NULL;
536     acb->status     = -EINPROGRESS;
537     acb->buf        = NULL;
538 
539     acb->task = iscsi_synchronizecache10_task(iscsi, acb->iscsilun->lun,
540                                          0, 0, 0, 0,
541                                          iscsi_synccache10_cb,
542                                          acb);
543     if (acb->task == NULL) {
544         error_report("iSCSI: Failed to send synchronizecache10 command. %s",
545                      iscsi_get_error(iscsi));
546         return -1;
547     }
548 
549     return 0;
550 }
551 
552 static BlockDriverAIOCB *
553 iscsi_aio_flush(BlockDriverState *bs,
554                 BlockDriverCompletionFunc *cb, void *opaque)
555 {
556     IscsiLun *iscsilun = bs->opaque;
557 
558     IscsiAIOCB *acb;
559 
560     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
561 
562     acb->iscsilun    = iscsilun;
563     acb->retries     = ISCSI_CMD_RETRIES;
564 
565     if (iscsi_aio_flush_acb(acb) != 0) {
566         qemu_aio_release(acb);
567         return NULL;
568     }
569 
570     iscsi_set_events(iscsilun);
571 
572     return &acb->common;
573 }
574 
575 static int iscsi_aio_discard_acb(IscsiAIOCB *acb);
576 
577 static void
578 iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
579                      void *command_data, void *opaque)
580 {
581     IscsiAIOCB *acb = opaque;
582 
583     if (acb->canceled != 0) {
584         return;
585     }
586 
587     acb->status = 0;
588     if (status != 0) {
589         if (status == SCSI_STATUS_CHECK_CONDITION
590             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
591             && acb->retries-- > 0) {
592             if (acb->task != NULL) {
593                 scsi_free_scsi_task(acb->task);
594                 acb->task = NULL;
595             }
596             if (iscsi_aio_discard_acb(acb) == 0) {
597                 iscsi_set_events(acb->iscsilun);
598                 return;
599             }
600         }
601         error_report("Failed to unmap data on iSCSI lun. %s",
602                      iscsi_get_error(iscsi));
603         acb->status = -EIO;
604     }
605 
606     iscsi_schedule_bh(acb);
607 }
608 
609 static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
610     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
611     struct unmap_list list[1];
612 
613     acb->canceled   = 0;
614     acb->bh         = NULL;
615     acb->status     = -EINPROGRESS;
616     acb->buf        = NULL;
617 
618     list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
619     list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
620 
621     acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
622                                  0, 0, &list[0], 1,
623                                  iscsi_unmap_cb,
624                                  acb);
625     if (acb->task == NULL) {
626         error_report("iSCSI: Failed to send unmap command. %s",
627                      iscsi_get_error(iscsi));
628         return -1;
629     }
630 
631     return 0;
632 }
633 
634 static BlockDriverAIOCB *
635 iscsi_aio_discard(BlockDriverState *bs,
636                   int64_t sector_num, int nb_sectors,
637                   BlockDriverCompletionFunc *cb, void *opaque)
638 {
639     IscsiLun *iscsilun = bs->opaque;
640     IscsiAIOCB *acb;
641 
642     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
643 
644     acb->iscsilun    = iscsilun;
645     acb->nb_sectors  = nb_sectors;
646     acb->sector_num  = sector_num;
647     acb->retries     = ISCSI_CMD_RETRIES;
648 
649     if (iscsi_aio_discard_acb(acb) != 0) {
650         if (acb->task) {
651             scsi_free_scsi_task(acb->task);
652         }
653         qemu_aio_release(acb);
654         return NULL;
655     }
656 
657     iscsi_set_events(iscsilun);
658 
659     return &acb->common;
660 }
661 
662 #ifdef __linux__
663 static void
664 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
665                      void *command_data, void *opaque)
666 {
667     IscsiAIOCB *acb = opaque;
668 
669     if (acb->canceled != 0) {
670         return;
671     }
672 
673     acb->status = 0;
674     if (status < 0) {
675         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
676                      iscsi_get_error(iscsi));
677         acb->status = -EIO;
678     }
679 
680     acb->ioh->driver_status = 0;
681     acb->ioh->host_status   = 0;
682     acb->ioh->resid         = 0;
683 
684 #define SG_ERR_DRIVER_SENSE    0x08
685 
686     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
687         int ss;
688 
689         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
690 
691         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
692         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
693              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
694         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
695     }
696 
697     iscsi_schedule_bh(acb);
698 }
699 
700 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
701         unsigned long int req, void *buf,
702         BlockDriverCompletionFunc *cb, void *opaque)
703 {
704     IscsiLun *iscsilun = bs->opaque;
705     struct iscsi_context *iscsi = iscsilun->iscsi;
706     struct iscsi_data data;
707     IscsiAIOCB *acb;
708 
709     assert(req == SG_IO);
710 
711     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
712 
713     acb->iscsilun = iscsilun;
714     acb->canceled    = 0;
715     acb->bh          = NULL;
716     acb->status      = -EINPROGRESS;
717     acb->buf         = NULL;
718     acb->ioh         = buf;
719 
720     acb->task = malloc(sizeof(struct scsi_task));
721     if (acb->task == NULL) {
722         error_report("iSCSI: Failed to allocate task for scsi command. %s",
723                      iscsi_get_error(iscsi));
724         qemu_aio_release(acb);
725         return NULL;
726     }
727     memset(acb->task, 0, sizeof(struct scsi_task));
728 
729     switch (acb->ioh->dxfer_direction) {
730     case SG_DXFER_TO_DEV:
731         acb->task->xfer_dir = SCSI_XFER_WRITE;
732         break;
733     case SG_DXFER_FROM_DEV:
734         acb->task->xfer_dir = SCSI_XFER_READ;
735         break;
736     default:
737         acb->task->xfer_dir = SCSI_XFER_NONE;
738         break;
739     }
740 
741     acb->task->cdb_size = acb->ioh->cmd_len;
742     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
743     acb->task->expxferlen = acb->ioh->dxfer_len;
744 
745     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
746         data.data = acb->ioh->dxferp;
747         data.size = acb->ioh->dxfer_len;
748     }
749     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
750                                  iscsi_aio_ioctl_cb,
751                                  (acb->task->xfer_dir == SCSI_XFER_WRITE) ?
752                                      &data : NULL,
753                                  acb) != 0) {
754         scsi_free_scsi_task(acb->task);
755         qemu_aio_release(acb);
756         return NULL;
757     }
758 
759     /* tell libiscsi to read straight into the buffer we got from ioctl */
760     if (acb->task->xfer_dir == SCSI_XFER_READ) {
761         scsi_task_add_data_in_buffer(acb->task,
762                                      acb->ioh->dxfer_len,
763                                      acb->ioh->dxferp);
764     }
765 
766     iscsi_set_events(iscsilun);
767 
768     return &acb->common;
769 }
770 
771 
772 static void ioctl_cb(void *opaque, int status)
773 {
774     int *p_status = opaque;
775     *p_status = status;
776 }
777 
778 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
779 {
780     IscsiLun *iscsilun = bs->opaque;
781     int status;
782 
783     switch (req) {
784     case SG_GET_VERSION_NUM:
785         *(int *)buf = 30000;
786         break;
787     case SG_GET_SCSI_ID:
788         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
789         break;
790     case SG_IO:
791         status = -EINPROGRESS;
792         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
793 
794         while (status == -EINPROGRESS) {
795             qemu_aio_wait();
796         }
797 
798         return 0;
799     default:
800         return -1;
801     }
802     return 0;
803 }
804 #endif
805 
806 static int64_t
807 iscsi_getlength(BlockDriverState *bs)
808 {
809     IscsiLun *iscsilun = bs->opaque;
810     int64_t len;
811 
812     len  = iscsilun->num_blocks;
813     len *= iscsilun->block_size;
814 
815     return len;
816 }
817 
818 static int parse_chap(struct iscsi_context *iscsi, const char *target)
819 {
820     QemuOptsList *list;
821     QemuOpts *opts;
822     const char *user = NULL;
823     const char *password = NULL;
824 
825     list = qemu_find_opts("iscsi");
826     if (!list) {
827         return 0;
828     }
829 
830     opts = qemu_opts_find(list, target);
831     if (opts == NULL) {
832         opts = QTAILQ_FIRST(&list->head);
833         if (!opts) {
834             return 0;
835         }
836     }
837 
838     user = qemu_opt_get(opts, "user");
839     if (!user) {
840         return 0;
841     }
842 
843     password = qemu_opt_get(opts, "password");
844     if (!password) {
845         error_report("CHAP username specified but no password was given");
846         return -1;
847     }
848 
849     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
850         error_report("Failed to set initiator username and password");
851         return -1;
852     }
853 
854     return 0;
855 }
856 
857 static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
858 {
859     QemuOptsList *list;
860     QemuOpts *opts;
861     const char *digest = NULL;
862 
863     list = qemu_find_opts("iscsi");
864     if (!list) {
865         return;
866     }
867 
868     opts = qemu_opts_find(list, target);
869     if (opts == NULL) {
870         opts = QTAILQ_FIRST(&list->head);
871         if (!opts) {
872             return;
873         }
874     }
875 
876     digest = qemu_opt_get(opts, "header-digest");
877     if (!digest) {
878         return;
879     }
880 
881     if (!strcmp(digest, "CRC32C")) {
882         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
883     } else if (!strcmp(digest, "NONE")) {
884         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
885     } else if (!strcmp(digest, "CRC32C-NONE")) {
886         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
887     } else if (!strcmp(digest, "NONE-CRC32C")) {
888         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
889     } else {
890         error_report("Invalid header-digest setting : %s", digest);
891     }
892 }
893 
894 static char *parse_initiator_name(const char *target)
895 {
896     QemuOptsList *list;
897     QemuOpts *opts;
898     const char *name = NULL;
899     const char *iscsi_name = qemu_get_vm_name();
900 
901     list = qemu_find_opts("iscsi");
902     if (list) {
903         opts = qemu_opts_find(list, target);
904         if (!opts) {
905             opts = QTAILQ_FIRST(&list->head);
906         }
907         if (opts) {
908             name = qemu_opt_get(opts, "initiator-name");
909         }
910     }
911 
912     if (name) {
913         return g_strdup(name);
914     } else {
915         return g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
916                                iscsi_name ? ":" : "",
917                                iscsi_name ? iscsi_name : "");
918     }
919 }
920 
921 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
922 static void iscsi_nop_timed_event(void *opaque)
923 {
924     IscsiLun *iscsilun = opaque;
925 
926     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
927         error_report("iSCSI: NOP timeout. Reconnecting...");
928         iscsi_reconnect(iscsilun->iscsi);
929     }
930 
931     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
932         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
933         return;
934     }
935 
936     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
937     iscsi_set_events(iscsilun);
938 }
939 #endif
940 
941 static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
942 {
943     struct scsi_task *task = NULL;
944     struct scsi_readcapacity10 *rc10 = NULL;
945     struct scsi_readcapacity16 *rc16 = NULL;
946     int ret = 0;
947     int retries = ISCSI_CMD_RETRIES;
948 
949 try_again:
950     switch (iscsilun->type) {
951     case TYPE_DISK:
952         task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
953         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
954             if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
955                     && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
956                     && retries-- > 0) {
957                 scsi_free_scsi_task(task);
958                 goto try_again;
959             }
960             error_report("iSCSI: failed to send readcapacity16 command.");
961             ret = -EINVAL;
962             goto out;
963         }
964         rc16 = scsi_datain_unmarshall(task);
965         if (rc16 == NULL) {
966             error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
967             ret = -EINVAL;
968             goto out;
969         }
970         iscsilun->block_size = rc16->block_length;
971         iscsilun->num_blocks = rc16->returned_lba + 1;
972         break;
973     case TYPE_ROM:
974         task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
975         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
976             error_report("iSCSI: failed to send readcapacity10 command.");
977             ret = -EINVAL;
978             goto out;
979         }
980         rc10 = scsi_datain_unmarshall(task);
981         if (rc10 == NULL) {
982             error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
983             ret = -EINVAL;
984             goto out;
985         }
986         iscsilun->block_size = rc10->block_size;
987         if (rc10->lba == 0) {
988             /* blank disk loaded */
989             iscsilun->num_blocks = 0;
990         } else {
991             iscsilun->num_blocks = rc10->lba + 1;
992         }
993         break;
994     default:
995         break;
996     }
997 
998 out:
999     if (task) {
1000         scsi_free_scsi_task(task);
1001     }
1002 
1003     return ret;
1004 }
1005 
1006 /*
1007  * We support iscsi url's on the form
1008  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1009  */
1010 static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
1011 {
1012     IscsiLun *iscsilun = bs->opaque;
1013     struct iscsi_context *iscsi = NULL;
1014     struct iscsi_url *iscsi_url = NULL;
1015     struct scsi_task *task = NULL;
1016     struct scsi_inquiry_standard *inq = NULL;
1017     char *initiator_name = NULL;
1018     int ret;
1019 
1020     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1021         error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
1022                      "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1023                      "of 512", BDRV_SECTOR_SIZE);
1024         return -EINVAL;
1025     }
1026 
1027     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1028     if (iscsi_url == NULL) {
1029         error_report("Failed to parse URL : %s", filename);
1030         ret = -EINVAL;
1031         goto out;
1032     }
1033 
1034     memset(iscsilun, 0, sizeof(IscsiLun));
1035 
1036     initiator_name = parse_initiator_name(iscsi_url->target);
1037 
1038     iscsi = iscsi_create_context(initiator_name);
1039     if (iscsi == NULL) {
1040         error_report("iSCSI: Failed to create iSCSI context.");
1041         ret = -ENOMEM;
1042         goto out;
1043     }
1044 
1045     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1046         error_report("iSCSI: Failed to set target name.");
1047         ret = -EINVAL;
1048         goto out;
1049     }
1050 
1051     if (iscsi_url->user != NULL) {
1052         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1053                                               iscsi_url->passwd);
1054         if (ret != 0) {
1055             error_report("Failed to set initiator username and password");
1056             ret = -EINVAL;
1057             goto out;
1058         }
1059     }
1060 
1061     /* check if we got CHAP username/password via the options */
1062     if (parse_chap(iscsi, iscsi_url->target) != 0) {
1063         error_report("iSCSI: Failed to set CHAP user/password");
1064         ret = -EINVAL;
1065         goto out;
1066     }
1067 
1068     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1069         error_report("iSCSI: Failed to set session type to normal.");
1070         ret = -EINVAL;
1071         goto out;
1072     }
1073 
1074     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1075 
1076     /* check if we got HEADER_DIGEST via the options */
1077     parse_header_digest(iscsi, iscsi_url->target);
1078 
1079     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1080         error_report("iSCSI: Failed to connect to LUN : %s",
1081             iscsi_get_error(iscsi));
1082         ret = -EINVAL;
1083         goto out;
1084     }
1085 
1086     iscsilun->iscsi = iscsi;
1087     iscsilun->lun   = iscsi_url->lun;
1088 
1089     task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
1090 
1091     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1092         error_report("iSCSI: failed to send inquiry command.");
1093         ret = -EINVAL;
1094         goto out;
1095     }
1096 
1097     inq = scsi_datain_unmarshall(task);
1098     if (inq == NULL) {
1099         error_report("iSCSI: Failed to unmarshall inquiry data.");
1100         ret = -EINVAL;
1101         goto out;
1102     }
1103 
1104     iscsilun->type = inq->periperal_device_type;
1105 
1106     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1107         goto out;
1108     }
1109     bs->total_sectors    = iscsilun->num_blocks *
1110                            iscsilun->block_size / BDRV_SECTOR_SIZE ;
1111 
1112     /* Medium changer or tape. We dont have any emulation for this so this must
1113      * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
1114      * to read from the device to guess the image format.
1115      */
1116     if (iscsilun->type == TYPE_MEDIUM_CHANGER ||
1117         iscsilun->type == TYPE_TAPE) {
1118         bs->sg = 1;
1119     }
1120 
1121 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1122     /* Set up a timer for sending out iSCSI NOPs */
1123     iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun);
1124     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
1125 #endif
1126 
1127 out:
1128     if (initiator_name != NULL) {
1129         g_free(initiator_name);
1130     }
1131     if (iscsi_url != NULL) {
1132         iscsi_destroy_url(iscsi_url);
1133     }
1134     if (task != NULL) {
1135         scsi_free_scsi_task(task);
1136     }
1137 
1138     if (ret) {
1139         if (iscsi != NULL) {
1140             iscsi_destroy_context(iscsi);
1141         }
1142         memset(iscsilun, 0, sizeof(IscsiLun));
1143     }
1144     return ret;
1145 }
1146 
1147 static void iscsi_close(BlockDriverState *bs)
1148 {
1149     IscsiLun *iscsilun = bs->opaque;
1150     struct iscsi_context *iscsi = iscsilun->iscsi;
1151 
1152     if (iscsilun->nop_timer) {
1153         qemu_del_timer(iscsilun->nop_timer);
1154         qemu_free_timer(iscsilun->nop_timer);
1155     }
1156     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
1157     iscsi_destroy_context(iscsi);
1158     memset(iscsilun, 0, sizeof(IscsiLun));
1159 }
1160 
1161 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1162 {
1163     IscsiLun *iscsilun = bs->opaque;
1164     int ret = 0;
1165 
1166     if (iscsilun->type != TYPE_DISK) {
1167         return -ENOTSUP;
1168     }
1169 
1170     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1171         return ret;
1172     }
1173 
1174     if (offset > iscsi_getlength(bs)) {
1175         return -EINVAL;
1176     }
1177 
1178     return 0;
1179 }
1180 
1181 static int iscsi_has_zero_init(BlockDriverState *bs)
1182 {
1183     return 0;
1184 }
1185 
1186 static int iscsi_create(const char *filename, QEMUOptionParameter *options)
1187 {
1188     int ret = 0;
1189     int64_t total_size = 0;
1190     BlockDriverState bs;
1191     IscsiLun *iscsilun = NULL;
1192 
1193     memset(&bs, 0, sizeof(BlockDriverState));
1194 
1195     /* Read out options */
1196     while (options && options->name) {
1197         if (!strcmp(options->name, "size")) {
1198             total_size = options->value.n / BDRV_SECTOR_SIZE;
1199         }
1200         options++;
1201     }
1202 
1203     bs.opaque = g_malloc0(sizeof(struct IscsiLun));
1204     iscsilun = bs.opaque;
1205 
1206     ret = iscsi_open(&bs, filename, 0);
1207     if (ret != 0) {
1208         goto out;
1209     }
1210     if (iscsilun->nop_timer) {
1211         qemu_del_timer(iscsilun->nop_timer);
1212         qemu_free_timer(iscsilun->nop_timer);
1213     }
1214     if (iscsilun->type != TYPE_DISK) {
1215         ret = -ENODEV;
1216         goto out;
1217     }
1218     if (bs.total_sectors < total_size) {
1219         ret = -ENOSPC;
1220     }
1221 
1222     ret = 0;
1223 out:
1224     if (iscsilun->iscsi != NULL) {
1225         iscsi_destroy_context(iscsilun->iscsi);
1226     }
1227     g_free(bs.opaque);
1228     return ret;
1229 }
1230 
1231 static QEMUOptionParameter iscsi_create_options[] = {
1232     {
1233         .name = BLOCK_OPT_SIZE,
1234         .type = OPT_SIZE,
1235         .help = "Virtual disk size"
1236     },
1237     { NULL }
1238 };
1239 
1240 static BlockDriver bdrv_iscsi = {
1241     .format_name     = "iscsi",
1242     .protocol_name   = "iscsi",
1243 
1244     .instance_size   = sizeof(IscsiLun),
1245     .bdrv_file_open  = iscsi_open,
1246     .bdrv_close      = iscsi_close,
1247     .bdrv_create     = iscsi_create,
1248     .create_options  = iscsi_create_options,
1249 
1250     .bdrv_getlength  = iscsi_getlength,
1251     .bdrv_truncate   = iscsi_truncate,
1252 
1253     .bdrv_aio_readv  = iscsi_aio_readv,
1254     .bdrv_aio_writev = iscsi_aio_writev,
1255     .bdrv_aio_flush  = iscsi_aio_flush,
1256 
1257     .bdrv_aio_discard = iscsi_aio_discard,
1258     .bdrv_has_zero_init = iscsi_has_zero_init,
1259 
1260 #ifdef __linux__
1261     .bdrv_ioctl       = iscsi_ioctl,
1262     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1263 #endif
1264 };
1265 
1266 static QemuOptsList qemu_iscsi_opts = {
1267     .name = "iscsi",
1268     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1269     .desc = {
1270         {
1271             .name = "user",
1272             .type = QEMU_OPT_STRING,
1273             .help = "username for CHAP authentication to target",
1274         },{
1275             .name = "password",
1276             .type = QEMU_OPT_STRING,
1277             .help = "password for CHAP authentication to target",
1278         },{
1279             .name = "header-digest",
1280             .type = QEMU_OPT_STRING,
1281             .help = "HeaderDigest setting. "
1282                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1283         },{
1284             .name = "initiator-name",
1285             .type = QEMU_OPT_STRING,
1286             .help = "Initiator iqn name to use when connecting",
1287         },
1288         { /* end of list */ }
1289     },
1290 };
1291 
1292 static void iscsi_block_init(void)
1293 {
1294     bdrv_register(&bdrv_iscsi);
1295     qemu_add_opts(&qemu_iscsi_opts);
1296 }
1297 
1298 block_init(iscsi_block_init);
1299