xref: /openbmc/qemu/block/iscsi.c (revision bc72ad67)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "config-host.h"
26 
27 #include <poll.h>
28 #include <arpa/inet.h>
29 #include "qemu-common.h"
30 #include "qemu/config-file.h"
31 #include "qemu/error-report.h"
32 #include "block/block_int.h"
33 #include "trace.h"
34 #include "block/scsi.h"
35 #include "qemu/iov.h"
36 
37 #include <iscsi/iscsi.h>
38 #include <iscsi/scsi-lowlevel.h>
39 
40 #ifdef __linux__
41 #include <scsi/sg.h>
42 #include <block/scsi.h>
43 #endif
44 
45 typedef struct IscsiLun {
46     struct iscsi_context *iscsi;
47     int lun;
48     enum scsi_inquiry_peripheral_device_type type;
49     int block_size;
50     uint64_t num_blocks;
51     int events;
52     QEMUTimer *nop_timer;
53 } IscsiLun;
54 
55 typedef struct IscsiAIOCB {
56     BlockDriverAIOCB common;
57     QEMUIOVector *qiov;
58     QEMUBH *bh;
59     IscsiLun *iscsilun;
60     struct scsi_task *task;
61     uint8_t *buf;
62     int status;
63     int canceled;
64     int retries;
65     int64_t sector_num;
66     int nb_sectors;
67 #ifdef __linux__
68     sg_io_hdr_t *ioh;
69 #endif
70 } IscsiAIOCB;
71 
72 #define NOP_INTERVAL 5000
73 #define MAX_NOP_FAILURES 3
74 #define ISCSI_CMD_RETRIES 5
75 
76 static void
77 iscsi_bh_cb(void *p)
78 {
79     IscsiAIOCB *acb = p;
80 
81     qemu_bh_delete(acb->bh);
82 
83     g_free(acb->buf);
84     acb->buf = NULL;
85 
86     if (acb->canceled == 0) {
87         acb->common.cb(acb->common.opaque, acb->status);
88     }
89 
90     if (acb->task != NULL) {
91         scsi_free_scsi_task(acb->task);
92         acb->task = NULL;
93     }
94 
95     qemu_aio_release(acb);
96 }
97 
98 static void
99 iscsi_schedule_bh(IscsiAIOCB *acb)
100 {
101     if (acb->bh) {
102         return;
103     }
104     acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
105     qemu_bh_schedule(acb->bh);
106 }
107 
108 
109 static void
110 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
111                     void *private_data)
112 {
113     IscsiAIOCB *acb = private_data;
114 
115     acb->status = -ECANCELED;
116     iscsi_schedule_bh(acb);
117 }
118 
119 static void
120 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
121 {
122     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
123     IscsiLun *iscsilun = acb->iscsilun;
124 
125     if (acb->status != -EINPROGRESS) {
126         return;
127     }
128 
129     acb->canceled = 1;
130 
131     /* send a task mgmt call to the target to cancel the task on the target */
132     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
133                                      iscsi_abort_task_cb, acb);
134 
135     while (acb->status == -EINPROGRESS) {
136         qemu_aio_wait();
137     }
138 }
139 
140 static const AIOCBInfo iscsi_aiocb_info = {
141     .aiocb_size         = sizeof(IscsiAIOCB),
142     .cancel             = iscsi_aio_cancel,
143 };
144 
145 
146 static void iscsi_process_read(void *arg);
147 static void iscsi_process_write(void *arg);
148 
149 static void
150 iscsi_set_events(IscsiLun *iscsilun)
151 {
152     struct iscsi_context *iscsi = iscsilun->iscsi;
153     int ev;
154 
155     /* We always register a read handler.  */
156     ev = POLLIN;
157     ev |= iscsi_which_events(iscsi);
158     if (ev != iscsilun->events) {
159         qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
160                       iscsi_process_read,
161                       (ev & POLLOUT) ? iscsi_process_write : NULL,
162                       iscsilun);
163 
164     }
165 
166     iscsilun->events = ev;
167 }
168 
169 static void
170 iscsi_process_read(void *arg)
171 {
172     IscsiLun *iscsilun = arg;
173     struct iscsi_context *iscsi = iscsilun->iscsi;
174 
175     iscsi_service(iscsi, POLLIN);
176     iscsi_set_events(iscsilun);
177 }
178 
179 static void
180 iscsi_process_write(void *arg)
181 {
182     IscsiLun *iscsilun = arg;
183     struct iscsi_context *iscsi = iscsilun->iscsi;
184 
185     iscsi_service(iscsi, POLLOUT);
186     iscsi_set_events(iscsilun);
187 }
188 
189 static int
190 iscsi_aio_writev_acb(IscsiAIOCB *acb);
191 
192 static void
193 iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
194                      void *command_data, void *opaque)
195 {
196     IscsiAIOCB *acb = opaque;
197 
198     trace_iscsi_aio_write16_cb(iscsi, status, acb, acb->canceled);
199 
200     g_free(acb->buf);
201     acb->buf = NULL;
202 
203     if (acb->canceled != 0) {
204         return;
205     }
206 
207     acb->status = 0;
208     if (status != 0) {
209         if (status == SCSI_STATUS_CHECK_CONDITION
210             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
211             && acb->retries-- > 0) {
212             scsi_free_scsi_task(acb->task);
213             acb->task = NULL;
214             if (iscsi_aio_writev_acb(acb) == 0) {
215                 iscsi_set_events(acb->iscsilun);
216                 return;
217             }
218         }
219         error_report("Failed to write16 data to iSCSI lun. %s",
220                      iscsi_get_error(iscsi));
221         acb->status = -EIO;
222     }
223 
224     iscsi_schedule_bh(acb);
225 }
226 
227 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
228 {
229     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
230 }
231 
232 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
233 {
234     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
235 }
236 
237 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
238                                       IscsiLun *iscsilun)
239 {
240     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
241         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
242             error_report("iSCSI misaligned request: "
243                          "iscsilun->block_size %u, sector_num %" PRIi64
244                          ", nb_sectors %d",
245                          iscsilun->block_size, sector_num, nb_sectors);
246             return 0;
247     }
248     return 1;
249 }
250 
251 static int
252 iscsi_aio_writev_acb(IscsiAIOCB *acb)
253 {
254     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
255     size_t size;
256     uint32_t num_sectors;
257     uint64_t lba;
258 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
259     struct iscsi_data data;
260 #endif
261     int ret;
262 
263     acb->canceled   = 0;
264     acb->bh         = NULL;
265     acb->status     = -EINPROGRESS;
266     acb->buf        = NULL;
267 
268     /* this will allow us to get rid of 'buf' completely */
269     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
270 
271 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
272     data.size = MIN(size, acb->qiov->size);
273 
274     /* if the iovec only contains one buffer we can pass it directly */
275     if (acb->qiov->niov == 1) {
276         data.data = acb->qiov->iov[0].iov_base;
277     } else {
278         acb->buf = g_malloc(data.size);
279         qemu_iovec_to_buf(acb->qiov, 0, acb->buf, data.size);
280         data.data = acb->buf;
281     }
282 #endif
283 
284     acb->task = malloc(sizeof(struct scsi_task));
285     if (acb->task == NULL) {
286         error_report("iSCSI: Failed to allocate task for scsi WRITE16 "
287                      "command. %s", iscsi_get_error(iscsi));
288         return -1;
289     }
290     memset(acb->task, 0, sizeof(struct scsi_task));
291 
292     acb->task->xfer_dir = SCSI_XFER_WRITE;
293     acb->task->cdb_size = 16;
294     acb->task->cdb[0] = 0x8a;
295     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
296     *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
297     *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
298     num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
299     *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
300     acb->task->expxferlen = size;
301 
302 #if defined(LIBISCSI_FEATURE_IOVECTOR)
303     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
304                                    iscsi_aio_write16_cb,
305                                    NULL,
306                                    acb);
307 #else
308     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
309                                    iscsi_aio_write16_cb,
310                                    &data,
311                                    acb);
312 #endif
313     if (ret != 0) {
314         scsi_free_scsi_task(acb->task);
315         g_free(acb->buf);
316         return -1;
317     }
318 
319 #if defined(LIBISCSI_FEATURE_IOVECTOR)
320     scsi_task_set_iov_out(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
321 #endif
322 
323     return 0;
324 }
325 
326 static BlockDriverAIOCB *
327 iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
328                  QEMUIOVector *qiov, int nb_sectors,
329                  BlockDriverCompletionFunc *cb,
330                  void *opaque)
331 {
332     IscsiLun *iscsilun = bs->opaque;
333     IscsiAIOCB *acb;
334 
335     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
336         return NULL;
337     }
338 
339     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
340     trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
341 
342     acb->iscsilun    = iscsilun;
343     acb->qiov        = qiov;
344     acb->nb_sectors  = nb_sectors;
345     acb->sector_num  = sector_num;
346     acb->retries     = ISCSI_CMD_RETRIES;
347 
348     if (iscsi_aio_writev_acb(acb) != 0) {
349         qemu_aio_release(acb);
350         return NULL;
351     }
352 
353     iscsi_set_events(iscsilun);
354     return &acb->common;
355 }
356 
357 static int
358 iscsi_aio_readv_acb(IscsiAIOCB *acb);
359 
360 static void
361 iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
362                     void *command_data, void *opaque)
363 {
364     IscsiAIOCB *acb = opaque;
365 
366     trace_iscsi_aio_read16_cb(iscsi, status, acb, acb->canceled);
367 
368     if (acb->canceled != 0) {
369         return;
370     }
371 
372     acb->status = 0;
373     if (status != 0) {
374         if (status == SCSI_STATUS_CHECK_CONDITION
375             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
376             && acb->retries-- > 0) {
377             scsi_free_scsi_task(acb->task);
378             acb->task = NULL;
379             if (iscsi_aio_readv_acb(acb) == 0) {
380                 iscsi_set_events(acb->iscsilun);
381                 return;
382             }
383         }
384         error_report("Failed to read16 data from iSCSI lun. %s",
385                      iscsi_get_error(iscsi));
386         acb->status = -EIO;
387     }
388 
389     iscsi_schedule_bh(acb);
390 }
391 
392 static int
393 iscsi_aio_readv_acb(IscsiAIOCB *acb)
394 {
395     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
396     size_t size;
397     uint64_t lba;
398     uint32_t num_sectors;
399     int ret;
400 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
401     int i;
402 #endif
403 
404     acb->canceled    = 0;
405     acb->bh          = NULL;
406     acb->status      = -EINPROGRESS;
407     acb->buf         = NULL;
408 
409     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
410 
411     acb->task = malloc(sizeof(struct scsi_task));
412     if (acb->task == NULL) {
413         error_report("iSCSI: Failed to allocate task for scsi READ16 "
414                      "command. %s", iscsi_get_error(iscsi));
415         return -1;
416     }
417     memset(acb->task, 0, sizeof(struct scsi_task));
418 
419     acb->task->xfer_dir = SCSI_XFER_READ;
420     acb->task->expxferlen = size;
421     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
422     num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
423 
424     switch (acb->iscsilun->type) {
425     case TYPE_DISK:
426         acb->task->cdb_size = 16;
427         acb->task->cdb[0]  = 0x88;
428         *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
429         *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
430         *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
431         break;
432     default:
433         acb->task->cdb_size = 10;
434         acb->task->cdb[0]  = 0x28;
435         *(uint32_t *)&acb->task->cdb[2] = htonl(lba);
436         *(uint16_t *)&acb->task->cdb[7] = htons(num_sectors);
437         break;
438     }
439 
440     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
441                                    iscsi_aio_read16_cb,
442                                    NULL,
443                                    acb);
444     if (ret != 0) {
445         scsi_free_scsi_task(acb->task);
446         return -1;
447     }
448 
449 #if defined(LIBISCSI_FEATURE_IOVECTOR)
450     scsi_task_set_iov_in(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
451 #else
452     for (i = 0; i < acb->qiov->niov; i++) {
453         scsi_task_add_data_in_buffer(acb->task,
454                 acb->qiov->iov[i].iov_len,
455                 acb->qiov->iov[i].iov_base);
456     }
457 #endif
458     return 0;
459 }
460 
461 static BlockDriverAIOCB *
462 iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
463                 QEMUIOVector *qiov, int nb_sectors,
464                 BlockDriverCompletionFunc *cb,
465                 void *opaque)
466 {
467     IscsiLun *iscsilun = bs->opaque;
468     IscsiAIOCB *acb;
469 
470     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
471         return NULL;
472     }
473 
474     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
475     trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
476 
477     acb->nb_sectors  = nb_sectors;
478     acb->sector_num  = sector_num;
479     acb->iscsilun    = iscsilun;
480     acb->qiov        = qiov;
481     acb->retries     = ISCSI_CMD_RETRIES;
482 
483     if (iscsi_aio_readv_acb(acb) != 0) {
484         qemu_aio_release(acb);
485         return NULL;
486     }
487 
488     iscsi_set_events(iscsilun);
489     return &acb->common;
490 }
491 
492 static int
493 iscsi_aio_flush_acb(IscsiAIOCB *acb);
494 
495 static void
496 iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
497                      void *command_data, void *opaque)
498 {
499     IscsiAIOCB *acb = opaque;
500 
501     if (acb->canceled != 0) {
502         return;
503     }
504 
505     acb->status = 0;
506     if (status != 0) {
507         if (status == SCSI_STATUS_CHECK_CONDITION
508             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
509             && acb->retries-- > 0) {
510             scsi_free_scsi_task(acb->task);
511             acb->task = NULL;
512             if (iscsi_aio_flush_acb(acb) == 0) {
513                 iscsi_set_events(acb->iscsilun);
514                 return;
515             }
516         }
517         error_report("Failed to sync10 data on iSCSI lun. %s",
518                      iscsi_get_error(iscsi));
519         acb->status = -EIO;
520     }
521 
522     iscsi_schedule_bh(acb);
523 }
524 
525 static int
526 iscsi_aio_flush_acb(IscsiAIOCB *acb)
527 {
528     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
529 
530     acb->canceled   = 0;
531     acb->bh         = NULL;
532     acb->status     = -EINPROGRESS;
533     acb->buf        = NULL;
534 
535     acb->task = iscsi_synchronizecache10_task(iscsi, acb->iscsilun->lun,
536                                          0, 0, 0, 0,
537                                          iscsi_synccache10_cb,
538                                          acb);
539     if (acb->task == NULL) {
540         error_report("iSCSI: Failed to send synchronizecache10 command. %s",
541                      iscsi_get_error(iscsi));
542         return -1;
543     }
544 
545     return 0;
546 }
547 
548 static BlockDriverAIOCB *
549 iscsi_aio_flush(BlockDriverState *bs,
550                 BlockDriverCompletionFunc *cb, void *opaque)
551 {
552     IscsiLun *iscsilun = bs->opaque;
553 
554     IscsiAIOCB *acb;
555 
556     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
557 
558     acb->iscsilun    = iscsilun;
559     acb->retries     = ISCSI_CMD_RETRIES;
560 
561     if (iscsi_aio_flush_acb(acb) != 0) {
562         qemu_aio_release(acb);
563         return NULL;
564     }
565 
566     iscsi_set_events(iscsilun);
567 
568     return &acb->common;
569 }
570 
571 static int iscsi_aio_discard_acb(IscsiAIOCB *acb);
572 
573 static void
574 iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
575                      void *command_data, void *opaque)
576 {
577     IscsiAIOCB *acb = opaque;
578 
579     if (acb->canceled != 0) {
580         return;
581     }
582 
583     acb->status = 0;
584     if (status != 0) {
585         if (status == SCSI_STATUS_CHECK_CONDITION
586             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
587             && acb->retries-- > 0) {
588             scsi_free_scsi_task(acb->task);
589             acb->task = NULL;
590             if (iscsi_aio_discard_acb(acb) == 0) {
591                 iscsi_set_events(acb->iscsilun);
592                 return;
593             }
594         }
595         error_report("Failed to unmap data on iSCSI lun. %s",
596                      iscsi_get_error(iscsi));
597         acb->status = -EIO;
598     }
599 
600     iscsi_schedule_bh(acb);
601 }
602 
603 static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
604     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
605     struct unmap_list list[1];
606 
607     acb->canceled   = 0;
608     acb->bh         = NULL;
609     acb->status     = -EINPROGRESS;
610     acb->buf        = NULL;
611 
612     list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
613     list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
614 
615     acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
616                                  0, 0, &list[0], 1,
617                                  iscsi_unmap_cb,
618                                  acb);
619     if (acb->task == NULL) {
620         error_report("iSCSI: Failed to send unmap command. %s",
621                      iscsi_get_error(iscsi));
622         return -1;
623     }
624 
625     return 0;
626 }
627 
628 static BlockDriverAIOCB *
629 iscsi_aio_discard(BlockDriverState *bs,
630                   int64_t sector_num, int nb_sectors,
631                   BlockDriverCompletionFunc *cb, void *opaque)
632 {
633     IscsiLun *iscsilun = bs->opaque;
634     IscsiAIOCB *acb;
635 
636     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
637 
638     acb->iscsilun    = iscsilun;
639     acb->nb_sectors  = nb_sectors;
640     acb->sector_num  = sector_num;
641     acb->retries     = ISCSI_CMD_RETRIES;
642 
643     if (iscsi_aio_discard_acb(acb) != 0) {
644         qemu_aio_release(acb);
645         return NULL;
646     }
647 
648     iscsi_set_events(iscsilun);
649 
650     return &acb->common;
651 }
652 
653 #ifdef __linux__
654 static void
655 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
656                      void *command_data, void *opaque)
657 {
658     IscsiAIOCB *acb = opaque;
659 
660     g_free(acb->buf);
661     acb->buf = NULL;
662 
663     if (acb->canceled != 0) {
664         return;
665     }
666 
667     acb->status = 0;
668     if (status < 0) {
669         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
670                      iscsi_get_error(iscsi));
671         acb->status = -EIO;
672     }
673 
674     acb->ioh->driver_status = 0;
675     acb->ioh->host_status   = 0;
676     acb->ioh->resid         = 0;
677 
678 #define SG_ERR_DRIVER_SENSE    0x08
679 
680     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
681         int ss;
682 
683         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
684 
685         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
686         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
687              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
688         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
689     }
690 
691     iscsi_schedule_bh(acb);
692 }
693 
694 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
695         unsigned long int req, void *buf,
696         BlockDriverCompletionFunc *cb, void *opaque)
697 {
698     IscsiLun *iscsilun = bs->opaque;
699     struct iscsi_context *iscsi = iscsilun->iscsi;
700     struct iscsi_data data;
701     IscsiAIOCB *acb;
702 
703     assert(req == SG_IO);
704 
705     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
706 
707     acb->iscsilun = iscsilun;
708     acb->canceled    = 0;
709     acb->bh          = NULL;
710     acb->status      = -EINPROGRESS;
711     acb->buf         = NULL;
712     acb->ioh         = buf;
713 
714     acb->task = malloc(sizeof(struct scsi_task));
715     if (acb->task == NULL) {
716         error_report("iSCSI: Failed to allocate task for scsi command. %s",
717                      iscsi_get_error(iscsi));
718         qemu_aio_release(acb);
719         return NULL;
720     }
721     memset(acb->task, 0, sizeof(struct scsi_task));
722 
723     switch (acb->ioh->dxfer_direction) {
724     case SG_DXFER_TO_DEV:
725         acb->task->xfer_dir = SCSI_XFER_WRITE;
726         break;
727     case SG_DXFER_FROM_DEV:
728         acb->task->xfer_dir = SCSI_XFER_READ;
729         break;
730     default:
731         acb->task->xfer_dir = SCSI_XFER_NONE;
732         break;
733     }
734 
735     acb->task->cdb_size = acb->ioh->cmd_len;
736     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
737     acb->task->expxferlen = acb->ioh->dxfer_len;
738 
739     data.size = 0;
740     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
741         if (acb->ioh->iovec_count == 0) {
742             data.data = acb->ioh->dxferp;
743             data.size = acb->ioh->dxfer_len;
744         } else {
745 #if defined(LIBISCSI_FEATURE_IOVECTOR)
746             scsi_task_set_iov_out(acb->task,
747                                  (struct scsi_iovec *) acb->ioh->dxferp,
748                                  acb->ioh->iovec_count);
749 #else
750             struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
751 
752             acb->buf = g_malloc(acb->ioh->dxfer_len);
753             data.data = acb->buf;
754             data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
755                                    acb->buf, acb->ioh->dxfer_len);
756 #endif
757         }
758     }
759 
760     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
761                                  iscsi_aio_ioctl_cb,
762                                  (data.size > 0) ? &data : NULL,
763                                  acb) != 0) {
764         scsi_free_scsi_task(acb->task);
765         qemu_aio_release(acb);
766         return NULL;
767     }
768 
769     /* tell libiscsi to read straight into the buffer we got from ioctl */
770     if (acb->task->xfer_dir == SCSI_XFER_READ) {
771         if (acb->ioh->iovec_count == 0) {
772             scsi_task_add_data_in_buffer(acb->task,
773                                          acb->ioh->dxfer_len,
774                                          acb->ioh->dxferp);
775         } else {
776 #if defined(LIBISCSI_FEATURE_IOVECTOR)
777             scsi_task_set_iov_in(acb->task,
778                                  (struct scsi_iovec *) acb->ioh->dxferp,
779                                  acb->ioh->iovec_count);
780 #else
781             int i;
782             for (i = 0; i < acb->ioh->iovec_count; i++) {
783                 struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
784 
785                 scsi_task_add_data_in_buffer(acb->task,
786                     iov[i].iov_len,
787                     iov[i].iov_base);
788             }
789 #endif
790         }
791     }
792 
793     iscsi_set_events(iscsilun);
794 
795     return &acb->common;
796 }
797 
798 
799 static void ioctl_cb(void *opaque, int status)
800 {
801     int *p_status = opaque;
802     *p_status = status;
803 }
804 
805 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
806 {
807     IscsiLun *iscsilun = bs->opaque;
808     int status;
809 
810     switch (req) {
811     case SG_GET_VERSION_NUM:
812         *(int *)buf = 30000;
813         break;
814     case SG_GET_SCSI_ID:
815         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
816         break;
817     case SG_IO:
818         status = -EINPROGRESS;
819         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
820 
821         while (status == -EINPROGRESS) {
822             qemu_aio_wait();
823         }
824 
825         return 0;
826     default:
827         return -1;
828     }
829     return 0;
830 }
831 #endif
832 
833 static int64_t
834 iscsi_getlength(BlockDriverState *bs)
835 {
836     IscsiLun *iscsilun = bs->opaque;
837     int64_t len;
838 
839     len  = iscsilun->num_blocks;
840     len *= iscsilun->block_size;
841 
842     return len;
843 }
844 
845 static int parse_chap(struct iscsi_context *iscsi, const char *target)
846 {
847     QemuOptsList *list;
848     QemuOpts *opts;
849     const char *user = NULL;
850     const char *password = NULL;
851 
852     list = qemu_find_opts("iscsi");
853     if (!list) {
854         return 0;
855     }
856 
857     opts = qemu_opts_find(list, target);
858     if (opts == NULL) {
859         opts = QTAILQ_FIRST(&list->head);
860         if (!opts) {
861             return 0;
862         }
863     }
864 
865     user = qemu_opt_get(opts, "user");
866     if (!user) {
867         return 0;
868     }
869 
870     password = qemu_opt_get(opts, "password");
871     if (!password) {
872         error_report("CHAP username specified but no password was given");
873         return -1;
874     }
875 
876     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
877         error_report("Failed to set initiator username and password");
878         return -1;
879     }
880 
881     return 0;
882 }
883 
884 static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
885 {
886     QemuOptsList *list;
887     QemuOpts *opts;
888     const char *digest = NULL;
889 
890     list = qemu_find_opts("iscsi");
891     if (!list) {
892         return;
893     }
894 
895     opts = qemu_opts_find(list, target);
896     if (opts == NULL) {
897         opts = QTAILQ_FIRST(&list->head);
898         if (!opts) {
899             return;
900         }
901     }
902 
903     digest = qemu_opt_get(opts, "header-digest");
904     if (!digest) {
905         return;
906     }
907 
908     if (!strcmp(digest, "CRC32C")) {
909         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
910     } else if (!strcmp(digest, "NONE")) {
911         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
912     } else if (!strcmp(digest, "CRC32C-NONE")) {
913         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
914     } else if (!strcmp(digest, "NONE-CRC32C")) {
915         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
916     } else {
917         error_report("Invalid header-digest setting : %s", digest);
918     }
919 }
920 
921 static char *parse_initiator_name(const char *target)
922 {
923     QemuOptsList *list;
924     QemuOpts *opts;
925     const char *name = NULL;
926     const char *iscsi_name = qemu_get_vm_name();
927 
928     list = qemu_find_opts("iscsi");
929     if (list) {
930         opts = qemu_opts_find(list, target);
931         if (!opts) {
932             opts = QTAILQ_FIRST(&list->head);
933         }
934         if (opts) {
935             name = qemu_opt_get(opts, "initiator-name");
936         }
937     }
938 
939     if (name) {
940         return g_strdup(name);
941     } else {
942         return g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
943                                iscsi_name ? ":" : "",
944                                iscsi_name ? iscsi_name : "");
945     }
946 }
947 
948 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
949 static void iscsi_nop_timed_event(void *opaque)
950 {
951     IscsiLun *iscsilun = opaque;
952 
953     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
954         error_report("iSCSI: NOP timeout. Reconnecting...");
955         iscsi_reconnect(iscsilun->iscsi);
956     }
957 
958     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
959         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
960         return;
961     }
962 
963     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
964     iscsi_set_events(iscsilun);
965 }
966 #endif
967 
968 static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
969 {
970     struct scsi_task *task = NULL;
971     struct scsi_readcapacity10 *rc10 = NULL;
972     struct scsi_readcapacity16 *rc16 = NULL;
973     int ret = 0;
974     int retries = ISCSI_CMD_RETRIES;
975 
976     do {
977         if (task != NULL) {
978             scsi_free_scsi_task(task);
979             task = NULL;
980         }
981 
982         switch (iscsilun->type) {
983         case TYPE_DISK:
984             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
985             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
986                 rc16 = scsi_datain_unmarshall(task);
987                 if (rc16 == NULL) {
988                     error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
989                     ret = -EINVAL;
990                 } else {
991                     iscsilun->block_size = rc16->block_length;
992                     iscsilun->num_blocks = rc16->returned_lba + 1;
993                 }
994             }
995             break;
996         case TYPE_ROM:
997             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
998             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
999                 rc10 = scsi_datain_unmarshall(task);
1000                 if (rc10 == NULL) {
1001                     error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
1002                     ret = -EINVAL;
1003                 } else {
1004                     iscsilun->block_size = rc10->block_size;
1005                     if (rc10->lba == 0) {
1006                         /* blank disk loaded */
1007                         iscsilun->num_blocks = 0;
1008                     } else {
1009                         iscsilun->num_blocks = rc10->lba + 1;
1010                     }
1011                 }
1012             }
1013             break;
1014         default:
1015             return 0;
1016         }
1017     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1018              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1019              && retries-- > 0);
1020 
1021     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1022         error_report("iSCSI: failed to send readcapacity10 command.");
1023         ret = -EINVAL;
1024     }
1025     if (task) {
1026         scsi_free_scsi_task(task);
1027     }
1028     return ret;
1029 }
1030 
1031 /* TODO Convert to fine grained options */
1032 static QemuOptsList runtime_opts = {
1033     .name = "iscsi",
1034     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1035     .desc = {
1036         {
1037             .name = "filename",
1038             .type = QEMU_OPT_STRING,
1039             .help = "URL to the iscsi image",
1040         },
1041         { /* end of list */ }
1042     },
1043 };
1044 
1045 /*
1046  * We support iscsi url's on the form
1047  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1048  */
1049 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
1050 {
1051     IscsiLun *iscsilun = bs->opaque;
1052     struct iscsi_context *iscsi = NULL;
1053     struct iscsi_url *iscsi_url = NULL;
1054     struct scsi_task *task = NULL;
1055     struct scsi_inquiry_standard *inq = NULL;
1056     char *initiator_name = NULL;
1057     QemuOpts *opts;
1058     Error *local_err = NULL;
1059     const char *filename;
1060     int ret;
1061 
1062     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1063         error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
1064                      "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1065                      "of 512", BDRV_SECTOR_SIZE);
1066         return -EINVAL;
1067     }
1068 
1069     opts = qemu_opts_create_nofail(&runtime_opts);
1070     qemu_opts_absorb_qdict(opts, options, &local_err);
1071     if (error_is_set(&local_err)) {
1072         qerror_report_err(local_err);
1073         error_free(local_err);
1074         ret = -EINVAL;
1075         goto out;
1076     }
1077 
1078     filename = qemu_opt_get(opts, "filename");
1079 
1080 
1081     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1082     if (iscsi_url == NULL) {
1083         error_report("Failed to parse URL : %s", filename);
1084         ret = -EINVAL;
1085         goto out;
1086     }
1087 
1088     memset(iscsilun, 0, sizeof(IscsiLun));
1089 
1090     initiator_name = parse_initiator_name(iscsi_url->target);
1091 
1092     iscsi = iscsi_create_context(initiator_name);
1093     if (iscsi == NULL) {
1094         error_report("iSCSI: Failed to create iSCSI context.");
1095         ret = -ENOMEM;
1096         goto out;
1097     }
1098 
1099     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1100         error_report("iSCSI: Failed to set target name.");
1101         ret = -EINVAL;
1102         goto out;
1103     }
1104 
1105     if (iscsi_url->user != NULL) {
1106         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1107                                               iscsi_url->passwd);
1108         if (ret != 0) {
1109             error_report("Failed to set initiator username and password");
1110             ret = -EINVAL;
1111             goto out;
1112         }
1113     }
1114 
1115     /* check if we got CHAP username/password via the options */
1116     if (parse_chap(iscsi, iscsi_url->target) != 0) {
1117         error_report("iSCSI: Failed to set CHAP user/password");
1118         ret = -EINVAL;
1119         goto out;
1120     }
1121 
1122     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1123         error_report("iSCSI: Failed to set session type to normal.");
1124         ret = -EINVAL;
1125         goto out;
1126     }
1127 
1128     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1129 
1130     /* check if we got HEADER_DIGEST via the options */
1131     parse_header_digest(iscsi, iscsi_url->target);
1132 
1133     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1134         error_report("iSCSI: Failed to connect to LUN : %s",
1135             iscsi_get_error(iscsi));
1136         ret = -EINVAL;
1137         goto out;
1138     }
1139 
1140     iscsilun->iscsi = iscsi;
1141     iscsilun->lun   = iscsi_url->lun;
1142 
1143     task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
1144 
1145     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1146         error_report("iSCSI: failed to send inquiry command.");
1147         ret = -EINVAL;
1148         goto out;
1149     }
1150 
1151     inq = scsi_datain_unmarshall(task);
1152     if (inq == NULL) {
1153         error_report("iSCSI: Failed to unmarshall inquiry data.");
1154         ret = -EINVAL;
1155         goto out;
1156     }
1157 
1158     iscsilun->type = inq->periperal_device_type;
1159 
1160     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1161         goto out;
1162     }
1163     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1164 
1165     /* Medium changer or tape. We dont have any emulation for this so this must
1166      * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
1167      * to read from the device to guess the image format.
1168      */
1169     if (iscsilun->type == TYPE_MEDIUM_CHANGER ||
1170         iscsilun->type == TYPE_TAPE) {
1171         bs->sg = 1;
1172     }
1173 
1174 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1175     /* Set up a timer for sending out iSCSI NOPs */
1176     iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
1177     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1178 #endif
1179 
1180 out:
1181     qemu_opts_del(opts);
1182     if (initiator_name != NULL) {
1183         g_free(initiator_name);
1184     }
1185     if (iscsi_url != NULL) {
1186         iscsi_destroy_url(iscsi_url);
1187     }
1188     if (task != NULL) {
1189         scsi_free_scsi_task(task);
1190     }
1191 
1192     if (ret) {
1193         if (iscsi != NULL) {
1194             iscsi_destroy_context(iscsi);
1195         }
1196         memset(iscsilun, 0, sizeof(IscsiLun));
1197     }
1198     return ret;
1199 }
1200 
1201 static void iscsi_close(BlockDriverState *bs)
1202 {
1203     IscsiLun *iscsilun = bs->opaque;
1204     struct iscsi_context *iscsi = iscsilun->iscsi;
1205 
1206     if (iscsilun->nop_timer) {
1207         timer_del(iscsilun->nop_timer);
1208         timer_free(iscsilun->nop_timer);
1209     }
1210     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
1211     iscsi_destroy_context(iscsi);
1212     memset(iscsilun, 0, sizeof(IscsiLun));
1213 }
1214 
1215 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1216 {
1217     IscsiLun *iscsilun = bs->opaque;
1218     int ret = 0;
1219 
1220     if (iscsilun->type != TYPE_DISK) {
1221         return -ENOTSUP;
1222     }
1223 
1224     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1225         return ret;
1226     }
1227 
1228     if (offset > iscsi_getlength(bs)) {
1229         return -EINVAL;
1230     }
1231 
1232     return 0;
1233 }
1234 
1235 static int iscsi_has_zero_init(BlockDriverState *bs)
1236 {
1237     return 0;
1238 }
1239 
1240 static int iscsi_create(const char *filename, QEMUOptionParameter *options)
1241 {
1242     int ret = 0;
1243     int64_t total_size = 0;
1244     BlockDriverState bs;
1245     IscsiLun *iscsilun = NULL;
1246     QDict *bs_options;
1247 
1248     memset(&bs, 0, sizeof(BlockDriverState));
1249 
1250     /* Read out options */
1251     while (options && options->name) {
1252         if (!strcmp(options->name, "size")) {
1253             total_size = options->value.n / BDRV_SECTOR_SIZE;
1254         }
1255         options++;
1256     }
1257 
1258     bs.opaque = g_malloc0(sizeof(struct IscsiLun));
1259     iscsilun = bs.opaque;
1260 
1261     bs_options = qdict_new();
1262     qdict_put(bs_options, "filename", qstring_from_str(filename));
1263     ret = iscsi_open(&bs, bs_options, 0);
1264     QDECREF(bs_options);
1265 
1266     if (ret != 0) {
1267         goto out;
1268     }
1269     if (iscsilun->nop_timer) {
1270         timer_del(iscsilun->nop_timer);
1271         timer_free(iscsilun->nop_timer);
1272     }
1273     if (iscsilun->type != TYPE_DISK) {
1274         ret = -ENODEV;
1275         goto out;
1276     }
1277     if (bs.total_sectors < total_size) {
1278         ret = -ENOSPC;
1279         goto out;
1280     }
1281 
1282     ret = 0;
1283 out:
1284     if (iscsilun->iscsi != NULL) {
1285         iscsi_destroy_context(iscsilun->iscsi);
1286     }
1287     g_free(bs.opaque);
1288     return ret;
1289 }
1290 
1291 static QEMUOptionParameter iscsi_create_options[] = {
1292     {
1293         .name = BLOCK_OPT_SIZE,
1294         .type = OPT_SIZE,
1295         .help = "Virtual disk size"
1296     },
1297     { NULL }
1298 };
1299 
1300 static BlockDriver bdrv_iscsi = {
1301     .format_name     = "iscsi",
1302     .protocol_name   = "iscsi",
1303 
1304     .instance_size   = sizeof(IscsiLun),
1305     .bdrv_file_open  = iscsi_open,
1306     .bdrv_close      = iscsi_close,
1307     .bdrv_create     = iscsi_create,
1308     .create_options  = iscsi_create_options,
1309 
1310     .bdrv_getlength  = iscsi_getlength,
1311     .bdrv_truncate   = iscsi_truncate,
1312 
1313     .bdrv_aio_readv  = iscsi_aio_readv,
1314     .bdrv_aio_writev = iscsi_aio_writev,
1315     .bdrv_aio_flush  = iscsi_aio_flush,
1316 
1317     .bdrv_aio_discard = iscsi_aio_discard,
1318     .bdrv_has_zero_init = iscsi_has_zero_init,
1319 
1320 #ifdef __linux__
1321     .bdrv_ioctl       = iscsi_ioctl,
1322     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1323 #endif
1324 };
1325 
1326 static QemuOptsList qemu_iscsi_opts = {
1327     .name = "iscsi",
1328     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1329     .desc = {
1330         {
1331             .name = "user",
1332             .type = QEMU_OPT_STRING,
1333             .help = "username for CHAP authentication to target",
1334         },{
1335             .name = "password",
1336             .type = QEMU_OPT_STRING,
1337             .help = "password for CHAP authentication to target",
1338         },{
1339             .name = "header-digest",
1340             .type = QEMU_OPT_STRING,
1341             .help = "HeaderDigest setting. "
1342                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1343         },{
1344             .name = "initiator-name",
1345             .type = QEMU_OPT_STRING,
1346             .help = "Initiator iqn name to use when connecting",
1347         },
1348         { /* end of list */ }
1349     },
1350 };
1351 
1352 static void iscsi_block_init(void)
1353 {
1354     bdrv_register(&bdrv_iscsi);
1355     qemu_add_opts(&qemu_iscsi_opts);
1356 }
1357 
1358 block_init(iscsi_block_init);
1359