xref: /openbmc/qemu/block/iscsi.c (revision 5b24c641)
1 /*
2  * QEMU Block driver for iSCSI images
3  *
4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "config-host.h"
26 
27 #include <poll.h>
28 #include <arpa/inet.h>
29 #include "qemu-common.h"
30 #include "qemu/config-file.h"
31 #include "qemu/error-report.h"
32 #include "block/block_int.h"
33 #include "trace.h"
34 #include "block/scsi.h"
35 #include "qemu/iov.h"
36 
37 #include <iscsi/iscsi.h>
38 #include <iscsi/scsi-lowlevel.h>
39 
40 #ifdef __linux__
41 #include <scsi/sg.h>
42 #include <block/scsi.h>
43 #endif
44 
45 typedef struct IscsiLun {
46     struct iscsi_context *iscsi;
47     int lun;
48     enum scsi_inquiry_peripheral_device_type type;
49     int block_size;
50     uint64_t num_blocks;
51     int events;
52     QEMUTimer *nop_timer;
53 } IscsiLun;
54 
55 typedef struct IscsiAIOCB {
56     BlockDriverAIOCB common;
57     QEMUIOVector *qiov;
58     QEMUBH *bh;
59     IscsiLun *iscsilun;
60     struct scsi_task *task;
61     uint8_t *buf;
62     int status;
63     int canceled;
64     int retries;
65     int64_t sector_num;
66     int nb_sectors;
67 #ifdef __linux__
68     sg_io_hdr_t *ioh;
69 #endif
70 } IscsiAIOCB;
71 
72 #define NOP_INTERVAL 5000
73 #define MAX_NOP_FAILURES 3
74 #define ISCSI_CMD_RETRIES 5
75 
76 static void
77 iscsi_bh_cb(void *p)
78 {
79     IscsiAIOCB *acb = p;
80 
81     qemu_bh_delete(acb->bh);
82 
83     g_free(acb->buf);
84     acb->buf = NULL;
85 
86     if (acb->canceled == 0) {
87         acb->common.cb(acb->common.opaque, acb->status);
88     }
89 
90     if (acb->task != NULL) {
91         scsi_free_scsi_task(acb->task);
92         acb->task = NULL;
93     }
94 
95     qemu_aio_release(acb);
96 }
97 
98 static void
99 iscsi_schedule_bh(IscsiAIOCB *acb)
100 {
101     if (acb->bh) {
102         return;
103     }
104     acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
105     qemu_bh_schedule(acb->bh);
106 }
107 
108 
109 static void
110 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
111                     void *private_data)
112 {
113     IscsiAIOCB *acb = private_data;
114 
115     acb->status = -ECANCELED;
116     iscsi_schedule_bh(acb);
117 }
118 
119 static void
120 iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
121 {
122     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
123     IscsiLun *iscsilun = acb->iscsilun;
124 
125     if (acb->status != -EINPROGRESS) {
126         return;
127     }
128 
129     acb->canceled = 1;
130 
131     /* send a task mgmt call to the target to cancel the task on the target */
132     iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
133                                      iscsi_abort_task_cb, acb);
134 
135     while (acb->status == -EINPROGRESS) {
136         qemu_aio_wait();
137     }
138 }
139 
140 static const AIOCBInfo iscsi_aiocb_info = {
141     .aiocb_size         = sizeof(IscsiAIOCB),
142     .cancel             = iscsi_aio_cancel,
143 };
144 
145 
146 static void iscsi_process_read(void *arg);
147 static void iscsi_process_write(void *arg);
148 
149 static int iscsi_process_flush(void *arg)
150 {
151     IscsiLun *iscsilun = arg;
152 
153     return iscsi_queue_length(iscsilun->iscsi) > 0;
154 }
155 
156 static void
157 iscsi_set_events(IscsiLun *iscsilun)
158 {
159     struct iscsi_context *iscsi = iscsilun->iscsi;
160     int ev;
161 
162     /* We always register a read handler.  */
163     ev = POLLIN;
164     ev |= iscsi_which_events(iscsi);
165     if (ev != iscsilun->events) {
166         qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
167                       iscsi_process_read,
168                       (ev & POLLOUT) ? iscsi_process_write : NULL,
169                       iscsi_process_flush,
170                       iscsilun);
171 
172     }
173 
174     iscsilun->events = ev;
175 }
176 
177 static void
178 iscsi_process_read(void *arg)
179 {
180     IscsiLun *iscsilun = arg;
181     struct iscsi_context *iscsi = iscsilun->iscsi;
182 
183     iscsi_service(iscsi, POLLIN);
184     iscsi_set_events(iscsilun);
185 }
186 
187 static void
188 iscsi_process_write(void *arg)
189 {
190     IscsiLun *iscsilun = arg;
191     struct iscsi_context *iscsi = iscsilun->iscsi;
192 
193     iscsi_service(iscsi, POLLOUT);
194     iscsi_set_events(iscsilun);
195 }
196 
197 static int
198 iscsi_aio_writev_acb(IscsiAIOCB *acb);
199 
200 static void
201 iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
202                      void *command_data, void *opaque)
203 {
204     IscsiAIOCB *acb = opaque;
205 
206     trace_iscsi_aio_write16_cb(iscsi, status, acb, acb->canceled);
207 
208     g_free(acb->buf);
209     acb->buf = NULL;
210 
211     if (acb->canceled != 0) {
212         return;
213     }
214 
215     acb->status = 0;
216     if (status != 0) {
217         if (status == SCSI_STATUS_CHECK_CONDITION
218             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
219             && acb->retries-- > 0) {
220             scsi_free_scsi_task(acb->task);
221             acb->task = NULL;
222             if (iscsi_aio_writev_acb(acb) == 0) {
223                 iscsi_set_events(acb->iscsilun);
224                 return;
225             }
226         }
227         error_report("Failed to write16 data to iSCSI lun. %s",
228                      iscsi_get_error(iscsi));
229         acb->status = -EIO;
230     }
231 
232     iscsi_schedule_bh(acb);
233 }
234 
235 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
236 {
237     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
238 }
239 
240 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
241 {
242     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
243 }
244 
245 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
246                                       IscsiLun *iscsilun)
247 {
248     if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
249         (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
250             error_report("iSCSI misaligned request: iscsilun->block_size %u, sector_num %ld, nb_sectors %d",
251                          iscsilun->block_size, sector_num, nb_sectors);
252             return 0;
253     }
254     return 1;
255 }
256 
257 static int
258 iscsi_aio_writev_acb(IscsiAIOCB *acb)
259 {
260     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
261     size_t size;
262     uint32_t num_sectors;
263     uint64_t lba;
264 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
265     struct iscsi_data data;
266 #endif
267     int ret;
268 
269     acb->canceled   = 0;
270     acb->bh         = NULL;
271     acb->status     = -EINPROGRESS;
272     acb->buf        = NULL;
273 
274     /* this will allow us to get rid of 'buf' completely */
275     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
276 
277 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
278     data.size = MIN(size, acb->qiov->size);
279 
280     /* if the iovec only contains one buffer we can pass it directly */
281     if (acb->qiov->niov == 1) {
282         data.data = acb->qiov->iov[0].iov_base;
283     } else {
284         acb->buf = g_malloc(data.size);
285         qemu_iovec_to_buf(acb->qiov, 0, acb->buf, data.size);
286         data.data = acb->buf;
287     }
288 #endif
289 
290     acb->task = malloc(sizeof(struct scsi_task));
291     if (acb->task == NULL) {
292         error_report("iSCSI: Failed to allocate task for scsi WRITE16 "
293                      "command. %s", iscsi_get_error(iscsi));
294         return -1;
295     }
296     memset(acb->task, 0, sizeof(struct scsi_task));
297 
298     acb->task->xfer_dir = SCSI_XFER_WRITE;
299     acb->task->cdb_size = 16;
300     acb->task->cdb[0] = 0x8a;
301     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
302     *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
303     *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
304     num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
305     *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
306     acb->task->expxferlen = size;
307 
308 #if defined(LIBISCSI_FEATURE_IOVECTOR)
309     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
310                                    iscsi_aio_write16_cb,
311                                    NULL,
312                                    acb);
313 #else
314     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
315                                    iscsi_aio_write16_cb,
316                                    &data,
317                                    acb);
318 #endif
319     if (ret != 0) {
320         scsi_free_scsi_task(acb->task);
321         g_free(acb->buf);
322         return -1;
323     }
324 
325 #if defined(LIBISCSI_FEATURE_IOVECTOR)
326     scsi_task_set_iov_out(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
327 #endif
328 
329     return 0;
330 }
331 
332 static BlockDriverAIOCB *
333 iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
334                  QEMUIOVector *qiov, int nb_sectors,
335                  BlockDriverCompletionFunc *cb,
336                  void *opaque)
337 {
338     IscsiLun *iscsilun = bs->opaque;
339     IscsiAIOCB *acb;
340 
341     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
342         return NULL;
343     }
344 
345     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
346     trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
347 
348     acb->iscsilun    = iscsilun;
349     acb->qiov        = qiov;
350     acb->nb_sectors  = nb_sectors;
351     acb->sector_num  = sector_num;
352     acb->retries     = ISCSI_CMD_RETRIES;
353 
354     if (iscsi_aio_writev_acb(acb) != 0) {
355         qemu_aio_release(acb);
356         return NULL;
357     }
358 
359     iscsi_set_events(iscsilun);
360     return &acb->common;
361 }
362 
363 static int
364 iscsi_aio_readv_acb(IscsiAIOCB *acb);
365 
366 static void
367 iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
368                     void *command_data, void *opaque)
369 {
370     IscsiAIOCB *acb = opaque;
371 
372     trace_iscsi_aio_read16_cb(iscsi, status, acb, acb->canceled);
373 
374     if (acb->canceled != 0) {
375         return;
376     }
377 
378     acb->status = 0;
379     if (status != 0) {
380         if (status == SCSI_STATUS_CHECK_CONDITION
381             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
382             && acb->retries-- > 0) {
383             scsi_free_scsi_task(acb->task);
384             acb->task = NULL;
385             if (iscsi_aio_readv_acb(acb) == 0) {
386                 iscsi_set_events(acb->iscsilun);
387                 return;
388             }
389         }
390         error_report("Failed to read16 data from iSCSI lun. %s",
391                      iscsi_get_error(iscsi));
392         acb->status = -EIO;
393     }
394 
395     iscsi_schedule_bh(acb);
396 }
397 
398 static int
399 iscsi_aio_readv_acb(IscsiAIOCB *acb)
400 {
401     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
402     size_t size;
403     uint64_t lba;
404     uint32_t num_sectors;
405     int ret;
406 #if !defined(LIBISCSI_FEATURE_IOVECTOR)
407     int i;
408 #endif
409 
410     acb->canceled    = 0;
411     acb->bh          = NULL;
412     acb->status      = -EINPROGRESS;
413     acb->buf         = NULL;
414 
415     size = acb->nb_sectors * BDRV_SECTOR_SIZE;
416 
417     acb->task = malloc(sizeof(struct scsi_task));
418     if (acb->task == NULL) {
419         error_report("iSCSI: Failed to allocate task for scsi READ16 "
420                      "command. %s", iscsi_get_error(iscsi));
421         return -1;
422     }
423     memset(acb->task, 0, sizeof(struct scsi_task));
424 
425     acb->task->xfer_dir = SCSI_XFER_READ;
426     acb->task->expxferlen = size;
427     lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
428     num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
429 
430     switch (acb->iscsilun->type) {
431     case TYPE_DISK:
432         acb->task->cdb_size = 16;
433         acb->task->cdb[0]  = 0x88;
434         *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
435         *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
436         *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
437         break;
438     default:
439         acb->task->cdb_size = 10;
440         acb->task->cdb[0]  = 0x28;
441         *(uint32_t *)&acb->task->cdb[2] = htonl(lba);
442         *(uint16_t *)&acb->task->cdb[7] = htons(num_sectors);
443         break;
444     }
445 
446     ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task,
447                                    iscsi_aio_read16_cb,
448                                    NULL,
449                                    acb);
450     if (ret != 0) {
451         scsi_free_scsi_task(acb->task);
452         return -1;
453     }
454 
455 #if defined(LIBISCSI_FEATURE_IOVECTOR)
456     scsi_task_set_iov_in(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov);
457 #else
458     for (i = 0; i < acb->qiov->niov; i++) {
459         scsi_task_add_data_in_buffer(acb->task,
460                 acb->qiov->iov[i].iov_len,
461                 acb->qiov->iov[i].iov_base);
462     }
463 #endif
464     return 0;
465 }
466 
467 static BlockDriverAIOCB *
468 iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
469                 QEMUIOVector *qiov, int nb_sectors,
470                 BlockDriverCompletionFunc *cb,
471                 void *opaque)
472 {
473     IscsiLun *iscsilun = bs->opaque;
474     IscsiAIOCB *acb;
475 
476     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
477         return NULL;
478     }
479 
480     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
481     trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
482 
483     acb->nb_sectors  = nb_sectors;
484     acb->sector_num  = sector_num;
485     acb->iscsilun    = iscsilun;
486     acb->qiov        = qiov;
487     acb->retries     = ISCSI_CMD_RETRIES;
488 
489     if (iscsi_aio_readv_acb(acb) != 0) {
490         qemu_aio_release(acb);
491         return NULL;
492     }
493 
494     iscsi_set_events(iscsilun);
495     return &acb->common;
496 }
497 
498 static int
499 iscsi_aio_flush_acb(IscsiAIOCB *acb);
500 
501 static void
502 iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
503                      void *command_data, void *opaque)
504 {
505     IscsiAIOCB *acb = opaque;
506 
507     if (acb->canceled != 0) {
508         return;
509     }
510 
511     acb->status = 0;
512     if (status != 0) {
513         if (status == SCSI_STATUS_CHECK_CONDITION
514             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
515             && acb->retries-- > 0) {
516             scsi_free_scsi_task(acb->task);
517             acb->task = NULL;
518             if (iscsi_aio_flush_acb(acb) == 0) {
519                 iscsi_set_events(acb->iscsilun);
520                 return;
521             }
522         }
523         error_report("Failed to sync10 data on iSCSI lun. %s",
524                      iscsi_get_error(iscsi));
525         acb->status = -EIO;
526     }
527 
528     iscsi_schedule_bh(acb);
529 }
530 
531 static int
532 iscsi_aio_flush_acb(IscsiAIOCB *acb)
533 {
534     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
535 
536     acb->canceled   = 0;
537     acb->bh         = NULL;
538     acb->status     = -EINPROGRESS;
539     acb->buf        = NULL;
540 
541     acb->task = iscsi_synchronizecache10_task(iscsi, acb->iscsilun->lun,
542                                          0, 0, 0, 0,
543                                          iscsi_synccache10_cb,
544                                          acb);
545     if (acb->task == NULL) {
546         error_report("iSCSI: Failed to send synchronizecache10 command. %s",
547                      iscsi_get_error(iscsi));
548         return -1;
549     }
550 
551     return 0;
552 }
553 
554 static BlockDriverAIOCB *
555 iscsi_aio_flush(BlockDriverState *bs,
556                 BlockDriverCompletionFunc *cb, void *opaque)
557 {
558     IscsiLun *iscsilun = bs->opaque;
559 
560     IscsiAIOCB *acb;
561 
562     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
563 
564     acb->iscsilun    = iscsilun;
565     acb->retries     = ISCSI_CMD_RETRIES;
566 
567     if (iscsi_aio_flush_acb(acb) != 0) {
568         qemu_aio_release(acb);
569         return NULL;
570     }
571 
572     iscsi_set_events(iscsilun);
573 
574     return &acb->common;
575 }
576 
577 static int iscsi_aio_discard_acb(IscsiAIOCB *acb);
578 
579 static void
580 iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
581                      void *command_data, void *opaque)
582 {
583     IscsiAIOCB *acb = opaque;
584 
585     if (acb->canceled != 0) {
586         return;
587     }
588 
589     acb->status = 0;
590     if (status != 0) {
591         if (status == SCSI_STATUS_CHECK_CONDITION
592             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
593             && acb->retries-- > 0) {
594             scsi_free_scsi_task(acb->task);
595             acb->task = NULL;
596             if (iscsi_aio_discard_acb(acb) == 0) {
597                 iscsi_set_events(acb->iscsilun);
598                 return;
599             }
600         }
601         error_report("Failed to unmap data on iSCSI lun. %s",
602                      iscsi_get_error(iscsi));
603         acb->status = -EIO;
604     }
605 
606     iscsi_schedule_bh(acb);
607 }
608 
609 static int iscsi_aio_discard_acb(IscsiAIOCB *acb) {
610     struct iscsi_context *iscsi = acb->iscsilun->iscsi;
611     struct unmap_list list[1];
612 
613     acb->canceled   = 0;
614     acb->bh         = NULL;
615     acb->status     = -EINPROGRESS;
616     acb->buf        = NULL;
617 
618     list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
619     list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size;
620 
621     acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun,
622                                  0, 0, &list[0], 1,
623                                  iscsi_unmap_cb,
624                                  acb);
625     if (acb->task == NULL) {
626         error_report("iSCSI: Failed to send unmap command. %s",
627                      iscsi_get_error(iscsi));
628         return -1;
629     }
630 
631     return 0;
632 }
633 
634 static BlockDriverAIOCB *
635 iscsi_aio_discard(BlockDriverState *bs,
636                   int64_t sector_num, int nb_sectors,
637                   BlockDriverCompletionFunc *cb, void *opaque)
638 {
639     IscsiLun *iscsilun = bs->opaque;
640     IscsiAIOCB *acb;
641 
642     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
643 
644     acb->iscsilun    = iscsilun;
645     acb->nb_sectors  = nb_sectors;
646     acb->sector_num  = sector_num;
647     acb->retries     = ISCSI_CMD_RETRIES;
648 
649     if (iscsi_aio_discard_acb(acb) != 0) {
650         qemu_aio_release(acb);
651         return NULL;
652     }
653 
654     iscsi_set_events(iscsilun);
655 
656     return &acb->common;
657 }
658 
659 #ifdef __linux__
660 static void
661 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
662                      void *command_data, void *opaque)
663 {
664     IscsiAIOCB *acb = opaque;
665 
666     g_free(acb->buf);
667     acb->buf = NULL;
668 
669     if (acb->canceled != 0) {
670         return;
671     }
672 
673     acb->status = 0;
674     if (status < 0) {
675         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
676                      iscsi_get_error(iscsi));
677         acb->status = -EIO;
678     }
679 
680     acb->ioh->driver_status = 0;
681     acb->ioh->host_status   = 0;
682     acb->ioh->resid         = 0;
683 
684 #define SG_ERR_DRIVER_SENSE    0x08
685 
686     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
687         int ss;
688 
689         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
690 
691         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
692         ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
693              acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
694         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
695     }
696 
697     iscsi_schedule_bh(acb);
698 }
699 
700 static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
701         unsigned long int req, void *buf,
702         BlockDriverCompletionFunc *cb, void *opaque)
703 {
704     IscsiLun *iscsilun = bs->opaque;
705     struct iscsi_context *iscsi = iscsilun->iscsi;
706     struct iscsi_data data;
707     IscsiAIOCB *acb;
708 
709     assert(req == SG_IO);
710 
711     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
712 
713     acb->iscsilun = iscsilun;
714     acb->canceled    = 0;
715     acb->bh          = NULL;
716     acb->status      = -EINPROGRESS;
717     acb->buf         = NULL;
718     acb->ioh         = buf;
719 
720     acb->task = malloc(sizeof(struct scsi_task));
721     if (acb->task == NULL) {
722         error_report("iSCSI: Failed to allocate task for scsi command. %s",
723                      iscsi_get_error(iscsi));
724         qemu_aio_release(acb);
725         return NULL;
726     }
727     memset(acb->task, 0, sizeof(struct scsi_task));
728 
729     switch (acb->ioh->dxfer_direction) {
730     case SG_DXFER_TO_DEV:
731         acb->task->xfer_dir = SCSI_XFER_WRITE;
732         break;
733     case SG_DXFER_FROM_DEV:
734         acb->task->xfer_dir = SCSI_XFER_READ;
735         break;
736     default:
737         acb->task->xfer_dir = SCSI_XFER_NONE;
738         break;
739     }
740 
741     acb->task->cdb_size = acb->ioh->cmd_len;
742     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
743     acb->task->expxferlen = acb->ioh->dxfer_len;
744 
745     data.size = 0;
746     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
747         if (acb->ioh->iovec_count == 0) {
748             data.data = acb->ioh->dxferp;
749             data.size = acb->ioh->dxfer_len;
750         } else {
751 #if defined(LIBISCSI_FEATURE_IOVECTOR)
752             scsi_task_set_iov_out(acb->task,
753                                  (struct scsi_iovec *) acb->ioh->dxferp,
754                                  acb->ioh->iovec_count);
755 #else
756             struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
757 
758             acb->buf = g_malloc(acb->ioh->dxfer_len);
759             data.data = acb->buf;
760             data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
761                                    acb->buf, acb->ioh->dxfer_len);
762 #endif
763         }
764     }
765 
766     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
767                                  iscsi_aio_ioctl_cb,
768                                  (data.size > 0) ? &data : NULL,
769                                  acb) != 0) {
770         scsi_free_scsi_task(acb->task);
771         qemu_aio_release(acb);
772         return NULL;
773     }
774 
775     /* tell libiscsi to read straight into the buffer we got from ioctl */
776     if (acb->task->xfer_dir == SCSI_XFER_READ) {
777         if (acb->ioh->iovec_count == 0) {
778             scsi_task_add_data_in_buffer(acb->task,
779                                          acb->ioh->dxfer_len,
780                                          acb->ioh->dxferp);
781         } else {
782 #if defined(LIBISCSI_FEATURE_IOVECTOR)
783             scsi_task_set_iov_in(acb->task,
784                                  (struct scsi_iovec *) acb->ioh->dxferp,
785                                  acb->ioh->iovec_count);
786 #else
787             int i;
788             for (i = 0; i < acb->ioh->iovec_count; i++) {
789                 struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
790 
791                 scsi_task_add_data_in_buffer(acb->task,
792                     iov[i].iov_len,
793                     iov[i].iov_base);
794             }
795 #endif
796         }
797     }
798 
799     iscsi_set_events(iscsilun);
800 
801     return &acb->common;
802 }
803 
804 
805 static void ioctl_cb(void *opaque, int status)
806 {
807     int *p_status = opaque;
808     *p_status = status;
809 }
810 
811 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
812 {
813     IscsiLun *iscsilun = bs->opaque;
814     int status;
815 
816     switch (req) {
817     case SG_GET_VERSION_NUM:
818         *(int *)buf = 30000;
819         break;
820     case SG_GET_SCSI_ID:
821         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
822         break;
823     case SG_IO:
824         status = -EINPROGRESS;
825         iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
826 
827         while (status == -EINPROGRESS) {
828             qemu_aio_wait();
829         }
830 
831         return 0;
832     default:
833         return -1;
834     }
835     return 0;
836 }
837 #endif
838 
839 static int64_t
840 iscsi_getlength(BlockDriverState *bs)
841 {
842     IscsiLun *iscsilun = bs->opaque;
843     int64_t len;
844 
845     len  = iscsilun->num_blocks;
846     len *= iscsilun->block_size;
847 
848     return len;
849 }
850 
851 static int parse_chap(struct iscsi_context *iscsi, const char *target)
852 {
853     QemuOptsList *list;
854     QemuOpts *opts;
855     const char *user = NULL;
856     const char *password = NULL;
857 
858     list = qemu_find_opts("iscsi");
859     if (!list) {
860         return 0;
861     }
862 
863     opts = qemu_opts_find(list, target);
864     if (opts == NULL) {
865         opts = QTAILQ_FIRST(&list->head);
866         if (!opts) {
867             return 0;
868         }
869     }
870 
871     user = qemu_opt_get(opts, "user");
872     if (!user) {
873         return 0;
874     }
875 
876     password = qemu_opt_get(opts, "password");
877     if (!password) {
878         error_report("CHAP username specified but no password was given");
879         return -1;
880     }
881 
882     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
883         error_report("Failed to set initiator username and password");
884         return -1;
885     }
886 
887     return 0;
888 }
889 
890 static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
891 {
892     QemuOptsList *list;
893     QemuOpts *opts;
894     const char *digest = NULL;
895 
896     list = qemu_find_opts("iscsi");
897     if (!list) {
898         return;
899     }
900 
901     opts = qemu_opts_find(list, target);
902     if (opts == NULL) {
903         opts = QTAILQ_FIRST(&list->head);
904         if (!opts) {
905             return;
906         }
907     }
908 
909     digest = qemu_opt_get(opts, "header-digest");
910     if (!digest) {
911         return;
912     }
913 
914     if (!strcmp(digest, "CRC32C")) {
915         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
916     } else if (!strcmp(digest, "NONE")) {
917         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
918     } else if (!strcmp(digest, "CRC32C-NONE")) {
919         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
920     } else if (!strcmp(digest, "NONE-CRC32C")) {
921         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
922     } else {
923         error_report("Invalid header-digest setting : %s", digest);
924     }
925 }
926 
927 static char *parse_initiator_name(const char *target)
928 {
929     QemuOptsList *list;
930     QemuOpts *opts;
931     const char *name = NULL;
932     const char *iscsi_name = qemu_get_vm_name();
933 
934     list = qemu_find_opts("iscsi");
935     if (list) {
936         opts = qemu_opts_find(list, target);
937         if (!opts) {
938             opts = QTAILQ_FIRST(&list->head);
939         }
940         if (opts) {
941             name = qemu_opt_get(opts, "initiator-name");
942         }
943     }
944 
945     if (name) {
946         return g_strdup(name);
947     } else {
948         return g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
949                                iscsi_name ? ":" : "",
950                                iscsi_name ? iscsi_name : "");
951     }
952 }
953 
954 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
955 static void iscsi_nop_timed_event(void *opaque)
956 {
957     IscsiLun *iscsilun = opaque;
958 
959     if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
960         error_report("iSCSI: NOP timeout. Reconnecting...");
961         iscsi_reconnect(iscsilun->iscsi);
962     }
963 
964     if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
965         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
966         return;
967     }
968 
969     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
970     iscsi_set_events(iscsilun);
971 }
972 #endif
973 
974 static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
975 {
976     struct scsi_task *task = NULL;
977     struct scsi_readcapacity10 *rc10 = NULL;
978     struct scsi_readcapacity16 *rc16 = NULL;
979     int ret = 0;
980     int retries = ISCSI_CMD_RETRIES;
981 
982     do {
983         if (task != NULL) {
984             scsi_free_scsi_task(task);
985             task = NULL;
986         }
987 
988         switch (iscsilun->type) {
989         case TYPE_DISK:
990             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
991             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
992                 rc16 = scsi_datain_unmarshall(task);
993                 if (rc16 == NULL) {
994                     error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
995                     ret = -EINVAL;
996                 } else {
997                     iscsilun->block_size = rc16->block_length;
998                     iscsilun->num_blocks = rc16->returned_lba + 1;
999                 }
1000             }
1001             break;
1002         case TYPE_ROM:
1003             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1004             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1005                 rc10 = scsi_datain_unmarshall(task);
1006                 if (rc10 == NULL) {
1007                     error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
1008                     ret = -EINVAL;
1009                 } else {
1010                     iscsilun->block_size = rc10->block_size;
1011                     if (rc10->lba == 0) {
1012                         /* blank disk loaded */
1013                         iscsilun->num_blocks = 0;
1014                     } else {
1015                         iscsilun->num_blocks = rc10->lba + 1;
1016                     }
1017                 }
1018             }
1019             break;
1020         default:
1021             return 0;
1022         }
1023     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1024              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1025              && retries-- > 0);
1026 
1027     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1028         error_report("iSCSI: failed to send readcapacity10 command.");
1029         ret = -EINVAL;
1030     }
1031     if (task) {
1032         scsi_free_scsi_task(task);
1033     }
1034     return ret;
1035 }
1036 
1037 /* TODO Convert to fine grained options */
1038 static QemuOptsList runtime_opts = {
1039     .name = "iscsi",
1040     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1041     .desc = {
1042         {
1043             .name = "filename",
1044             .type = QEMU_OPT_STRING,
1045             .help = "URL to the iscsi image",
1046         },
1047         { /* end of list */ }
1048     },
1049 };
1050 
1051 /*
1052  * We support iscsi url's on the form
1053  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1054  */
1055 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
1056 {
1057     IscsiLun *iscsilun = bs->opaque;
1058     struct iscsi_context *iscsi = NULL;
1059     struct iscsi_url *iscsi_url = NULL;
1060     struct scsi_task *task = NULL;
1061     struct scsi_inquiry_standard *inq = NULL;
1062     char *initiator_name = NULL;
1063     QemuOpts *opts;
1064     Error *local_err = NULL;
1065     const char *filename;
1066     int ret;
1067 
1068     if ((BDRV_SECTOR_SIZE % 512) != 0) {
1069         error_report("iSCSI: Invalid BDRV_SECTOR_SIZE. "
1070                      "BDRV_SECTOR_SIZE(%lld) is not a multiple "
1071                      "of 512", BDRV_SECTOR_SIZE);
1072         return -EINVAL;
1073     }
1074 
1075     opts = qemu_opts_create_nofail(&runtime_opts);
1076     qemu_opts_absorb_qdict(opts, options, &local_err);
1077     if (error_is_set(&local_err)) {
1078         qerror_report_err(local_err);
1079         error_free(local_err);
1080         ret = -EINVAL;
1081         goto out;
1082     }
1083 
1084     filename = qemu_opt_get(opts, "filename");
1085 
1086 
1087     iscsi_url = iscsi_parse_full_url(iscsi, filename);
1088     if (iscsi_url == NULL) {
1089         error_report("Failed to parse URL : %s", filename);
1090         ret = -EINVAL;
1091         goto out;
1092     }
1093 
1094     memset(iscsilun, 0, sizeof(IscsiLun));
1095 
1096     initiator_name = parse_initiator_name(iscsi_url->target);
1097 
1098     iscsi = iscsi_create_context(initiator_name);
1099     if (iscsi == NULL) {
1100         error_report("iSCSI: Failed to create iSCSI context.");
1101         ret = -ENOMEM;
1102         goto out;
1103     }
1104 
1105     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1106         error_report("iSCSI: Failed to set target name.");
1107         ret = -EINVAL;
1108         goto out;
1109     }
1110 
1111     if (iscsi_url->user != NULL) {
1112         ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1113                                               iscsi_url->passwd);
1114         if (ret != 0) {
1115             error_report("Failed to set initiator username and password");
1116             ret = -EINVAL;
1117             goto out;
1118         }
1119     }
1120 
1121     /* check if we got CHAP username/password via the options */
1122     if (parse_chap(iscsi, iscsi_url->target) != 0) {
1123         error_report("iSCSI: Failed to set CHAP user/password");
1124         ret = -EINVAL;
1125         goto out;
1126     }
1127 
1128     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1129         error_report("iSCSI: Failed to set session type to normal.");
1130         ret = -EINVAL;
1131         goto out;
1132     }
1133 
1134     iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1135 
1136     /* check if we got HEADER_DIGEST via the options */
1137     parse_header_digest(iscsi, iscsi_url->target);
1138 
1139     if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1140         error_report("iSCSI: Failed to connect to LUN : %s",
1141             iscsi_get_error(iscsi));
1142         ret = -EINVAL;
1143         goto out;
1144     }
1145 
1146     iscsilun->iscsi = iscsi;
1147     iscsilun->lun   = iscsi_url->lun;
1148 
1149     task = iscsi_inquiry_sync(iscsi, iscsilun->lun, 0, 0, 36);
1150 
1151     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1152         error_report("iSCSI: failed to send inquiry command.");
1153         ret = -EINVAL;
1154         goto out;
1155     }
1156 
1157     inq = scsi_datain_unmarshall(task);
1158     if (inq == NULL) {
1159         error_report("iSCSI: Failed to unmarshall inquiry data.");
1160         ret = -EINVAL;
1161         goto out;
1162     }
1163 
1164     iscsilun->type = inq->periperal_device_type;
1165 
1166     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1167         goto out;
1168     }
1169     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1170 
1171     /* Medium changer or tape. We dont have any emulation for this so this must
1172      * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
1173      * to read from the device to guess the image format.
1174      */
1175     if (iscsilun->type == TYPE_MEDIUM_CHANGER ||
1176         iscsilun->type == TYPE_TAPE) {
1177         bs->sg = 1;
1178     }
1179 
1180 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
1181     /* Set up a timer for sending out iSCSI NOPs */
1182     iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun);
1183     qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
1184 #endif
1185 
1186 out:
1187     qemu_opts_del(opts);
1188     if (initiator_name != NULL) {
1189         g_free(initiator_name);
1190     }
1191     if (iscsi_url != NULL) {
1192         iscsi_destroy_url(iscsi_url);
1193     }
1194     if (task != NULL) {
1195         scsi_free_scsi_task(task);
1196     }
1197 
1198     if (ret) {
1199         if (iscsi != NULL) {
1200             iscsi_destroy_context(iscsi);
1201         }
1202         memset(iscsilun, 0, sizeof(IscsiLun));
1203     }
1204     return ret;
1205 }
1206 
1207 static void iscsi_close(BlockDriverState *bs)
1208 {
1209     IscsiLun *iscsilun = bs->opaque;
1210     struct iscsi_context *iscsi = iscsilun->iscsi;
1211 
1212     if (iscsilun->nop_timer) {
1213         qemu_del_timer(iscsilun->nop_timer);
1214         qemu_free_timer(iscsilun->nop_timer);
1215     }
1216     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
1217     iscsi_destroy_context(iscsi);
1218     memset(iscsilun, 0, sizeof(IscsiLun));
1219 }
1220 
1221 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1222 {
1223     IscsiLun *iscsilun = bs->opaque;
1224     int ret = 0;
1225 
1226     if (iscsilun->type != TYPE_DISK) {
1227         return -ENOTSUP;
1228     }
1229 
1230     if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
1231         return ret;
1232     }
1233 
1234     if (offset > iscsi_getlength(bs)) {
1235         return -EINVAL;
1236     }
1237 
1238     return 0;
1239 }
1240 
1241 static int iscsi_has_zero_init(BlockDriverState *bs)
1242 {
1243     return 0;
1244 }
1245 
1246 static int iscsi_create(const char *filename, QEMUOptionParameter *options)
1247 {
1248     int ret = 0;
1249     int64_t total_size = 0;
1250     BlockDriverState bs;
1251     IscsiLun *iscsilun = NULL;
1252     QDict *bs_options;
1253 
1254     memset(&bs, 0, sizeof(BlockDriverState));
1255 
1256     /* Read out options */
1257     while (options && options->name) {
1258         if (!strcmp(options->name, "size")) {
1259             total_size = options->value.n / BDRV_SECTOR_SIZE;
1260         }
1261         options++;
1262     }
1263 
1264     bs.opaque = g_malloc0(sizeof(struct IscsiLun));
1265     iscsilun = bs.opaque;
1266 
1267     bs_options = qdict_new();
1268     qdict_put(bs_options, "filename", qstring_from_str(filename));
1269     ret = iscsi_open(&bs, bs_options, 0);
1270     QDECREF(bs_options);
1271 
1272     if (ret != 0) {
1273         goto out;
1274     }
1275     if (iscsilun->nop_timer) {
1276         qemu_del_timer(iscsilun->nop_timer);
1277         qemu_free_timer(iscsilun->nop_timer);
1278     }
1279     if (iscsilun->type != TYPE_DISK) {
1280         ret = -ENODEV;
1281         goto out;
1282     }
1283     if (bs.total_sectors < total_size) {
1284         ret = -ENOSPC;
1285         goto out;
1286     }
1287 
1288     ret = 0;
1289 out:
1290     if (iscsilun->iscsi != NULL) {
1291         iscsi_destroy_context(iscsilun->iscsi);
1292     }
1293     g_free(bs.opaque);
1294     return ret;
1295 }
1296 
1297 static QEMUOptionParameter iscsi_create_options[] = {
1298     {
1299         .name = BLOCK_OPT_SIZE,
1300         .type = OPT_SIZE,
1301         .help = "Virtual disk size"
1302     },
1303     { NULL }
1304 };
1305 
1306 static BlockDriver bdrv_iscsi = {
1307     .format_name     = "iscsi",
1308     .protocol_name   = "iscsi",
1309 
1310     .instance_size   = sizeof(IscsiLun),
1311     .bdrv_file_open  = iscsi_open,
1312     .bdrv_close      = iscsi_close,
1313     .bdrv_create     = iscsi_create,
1314     .create_options  = iscsi_create_options,
1315 
1316     .bdrv_getlength  = iscsi_getlength,
1317     .bdrv_truncate   = iscsi_truncate,
1318 
1319     .bdrv_aio_readv  = iscsi_aio_readv,
1320     .bdrv_aio_writev = iscsi_aio_writev,
1321     .bdrv_aio_flush  = iscsi_aio_flush,
1322 
1323     .bdrv_aio_discard = iscsi_aio_discard,
1324     .bdrv_has_zero_init = iscsi_has_zero_init,
1325 
1326 #ifdef __linux__
1327     .bdrv_ioctl       = iscsi_ioctl,
1328     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
1329 #endif
1330 };
1331 
1332 static QemuOptsList qemu_iscsi_opts = {
1333     .name = "iscsi",
1334     .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1335     .desc = {
1336         {
1337             .name = "user",
1338             .type = QEMU_OPT_STRING,
1339             .help = "username for CHAP authentication to target",
1340         },{
1341             .name = "password",
1342             .type = QEMU_OPT_STRING,
1343             .help = "password for CHAP authentication to target",
1344         },{
1345             .name = "header-digest",
1346             .type = QEMU_OPT_STRING,
1347             .help = "HeaderDigest setting. "
1348                     "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1349         },{
1350             .name = "initiator-name",
1351             .type = QEMU_OPT_STRING,
1352             .help = "Initiator iqn name to use when connecting",
1353         },
1354         { /* end of list */ }
1355     },
1356 };
1357 
1358 static void iscsi_block_init(void)
1359 {
1360     bdrv_register(&bdrv_iscsi);
1361     qemu_add_opts(&qemu_iscsi_opts);
1362 }
1363 
1364 block_init(iscsi_block_init);
1365