xref: /openbmc/qemu/block/iscsi.c (revision 3665dd6bb9043bef181c91e2dce9e1efff47ed51)
1  /*
2   * QEMU Block driver for iSCSI images
3   *
4   * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5   * Copyright (c) 2012-2017 Peter Lieven <pl@kamp.de>
6   *
7   * Permission is hereby granted, free of charge, to any person obtaining a copy
8   * of this software and associated documentation files (the "Software"), to deal
9   * in the Software without restriction, including without limitation the rights
10   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11   * copies of the Software, and to permit persons to whom the Software is
12   * furnished to do so, subject to the following conditions:
13   *
14   * The above copyright notice and this permission notice shall be included in
15   * all copies or substantial portions of the Software.
16   *
17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23   * THE SOFTWARE.
24   */
25  
26  #include "qemu/osdep.h"
27  
28  #include <poll.h>
29  #include <math.h>
30  #include <arpa/inet.h>
31  #include "sysemu/sysemu.h"
32  #include "qemu/config-file.h"
33  #include "qemu/error-report.h"
34  #include "qemu/bitops.h"
35  #include "qemu/bitmap.h"
36  #include "block/block-io.h"
37  #include "block/block_int.h"
38  #include "block/qdict.h"
39  #include "scsi/constants.h"
40  #include "qemu/iov.h"
41  #include "qemu/module.h"
42  #include "qemu/option.h"
43  #include "qemu/uuid.h"
44  #include "sysemu/replay.h"
45  #include "qapi/error.h"
46  #include "qapi/qapi-commands-machine.h"
47  #include "qapi/qmp/qdict.h"
48  #include "qapi/qmp/qstring.h"
49  #include "crypto/secret.h"
50  #include "scsi/utils.h"
51  #include "trace.h"
52  
53  /* Conflict between scsi/utils.h and libiscsi! :( */
54  #define SCSI_XFER_NONE ISCSI_XFER_NONE
55  #include <iscsi/iscsi.h>
56  #define inline __attribute__((gnu_inline))  /* required for libiscsi v1.9.0 */
57  #include <iscsi/scsi-lowlevel.h>
58  #undef inline
59  #undef SCSI_XFER_NONE
60  QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE);
61  
62  #ifdef __linux__
63  #include <scsi/sg.h>
64  #endif
65  
66  typedef struct IscsiLun {
67      struct iscsi_context *iscsi;
68      AioContext *aio_context;
69      int lun;
70      enum scsi_inquiry_peripheral_device_type type;
71      int block_size;
72      uint64_t num_blocks;
73      int events;
74      QEMUTimer *nop_timer;
75      QEMUTimer *event_timer;
76      QemuMutex mutex;
77      struct scsi_inquiry_logical_block_provisioning lbp;
78      struct scsi_inquiry_block_limits bl;
79      struct scsi_inquiry_device_designator *dd;
80      unsigned char *zeroblock;
81      /* The allocmap tracks which clusters (pages) on the iSCSI target are
82       * allocated and which are not. In case a target returns zeros for
83       * unallocated pages (iscsilun->lprz) we can directly return zeros instead
84       * of reading zeros over the wire if a read request falls within an
85       * unallocated block. As there are 3 possible states we need 2 bitmaps to
86       * track. allocmap_valid keeps track if QEMU's information about a page is
87       * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
88       * valid information about a page the corresponding allocmap entry should be
89       * switched to unallocated as well to force a new lookup of the allocation
90       * status as lookups are generally skipped if a page is suspect to be
91       * allocated. If a iSCSI target is opened with cache.direct = on the
92       * allocmap_valid does not exist turning all cached information invalid so
93       * that a fresh lookup is made for any page even if allocmap entry returns
94       * it's unallocated. */
95      unsigned long *allocmap;
96      unsigned long *allocmap_valid;
97      long allocmap_size;
98      int cluster_size;
99      bool use_16_for_rw;
100      bool write_protected;
101      bool lbpme;
102      bool lbprz;
103      bool dpofua;
104      bool has_write_same;
105      bool request_timed_out;
106  } IscsiLun;
107  
108  typedef struct IscsiTask {
109      int status;
110      int complete;
111      int retries;
112      int do_retry;
113      struct scsi_task *task;
114      Coroutine *co;
115      IscsiLun *iscsilun;
116      QEMUTimer retry_timer;
117      int err_code;
118      char *err_str;
119  } IscsiTask;
120  
121  typedef struct IscsiAIOCB {
122      BlockAIOCB common;
123      QEMUBH *bh;
124      IscsiLun *iscsilun;
125      struct scsi_task *task;
126      int status;
127      int64_t sector_num;
128      int nb_sectors;
129      int ret;
130  #ifdef __linux__
131      sg_io_hdr_t *ioh;
132  #endif
133      bool cancelled;
134  } IscsiAIOCB;
135  
136  /* libiscsi uses time_t so its enough to process events every second */
137  #define EVENT_INTERVAL 1000
138  #define NOP_INTERVAL 5000
139  #define MAX_NOP_FAILURES 3
140  #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
141  static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
142  
143  /* this threshold is a trade-off knob to choose between
144   * the potential additional overhead of an extra GET_LBA_STATUS request
145   * vs. unnecessarily reading a lot of zero sectors over the wire.
146   * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
147   * sectors we check the allocation status of the area covered by the
148   * request first if the allocationmap indicates that the area might be
149   * unallocated. */
150  #define ISCSI_CHECKALLOC_THRES 64
151  
152  #ifdef __linux__
153  
154  static void
iscsi_bh_cb(void * p)155  iscsi_bh_cb(void *p)
156  {
157      IscsiAIOCB *acb = p;
158  
159      qemu_bh_delete(acb->bh);
160  
161      acb->common.cb(acb->common.opaque, acb->status);
162  
163      if (acb->task != NULL) {
164          scsi_free_scsi_task(acb->task);
165          acb->task = NULL;
166      }
167  
168      qemu_aio_unref(acb);
169  }
170  
171  static void
iscsi_schedule_bh(IscsiAIOCB * acb)172  iscsi_schedule_bh(IscsiAIOCB *acb)
173  {
174      if (acb->bh) {
175          return;
176      }
177      acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
178      qemu_bh_schedule(acb->bh);
179  }
180  
181  #endif
182  
iscsi_co_generic_bh_cb(void * opaque)183  static void iscsi_co_generic_bh_cb(void *opaque)
184  {
185      struct IscsiTask *iTask = opaque;
186  
187      iTask->complete = 1;
188      aio_co_wake(iTask->co);
189  }
190  
iscsi_retry_timer_expired(void * opaque)191  static void iscsi_retry_timer_expired(void *opaque)
192  {
193      struct IscsiTask *iTask = opaque;
194      iTask->complete = 1;
195      if (iTask->co) {
196          aio_co_wake(iTask->co);
197      }
198  }
199  
exp_random(double mean)200  static inline unsigned exp_random(double mean)
201  {
202      return -mean * log((double)rand() / RAND_MAX);
203  }
204  
205  /* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
206   * libiscsi 1.10.0, together with other constants we need.  Use it as
207   * a hint that we have to define them ourselves if needed, to keep the
208   * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
209   * the test because SCSI_STATUS_* is an enum.
210   *
211   * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
212   * an enum, check against the LIBISCSI_API_VERSION macro, which was
213   * introduced in 1.11.0.  If it is present, there is no need to define
214   * anything.
215   */
216  #if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
217      !defined(LIBISCSI_API_VERSION)
218  #define SCSI_STATUS_TASK_SET_FULL                          0x28
219  #define SCSI_STATUS_TIMEOUT                                0x0f000002
220  #define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
221  #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
222  #endif
223  
224  #ifndef LIBISCSI_API_VERSION
225  #define LIBISCSI_API_VERSION 20130701
226  #endif
227  
iscsi_translate_sense(struct scsi_sense * sense)228  static int iscsi_translate_sense(struct scsi_sense *sense)
229  {
230      return scsi_sense_to_errno(sense->key,
231                                 (sense->ascq & 0xFF00) >> 8,
232                                 sense->ascq & 0xFF);
233  }
234  
235  /* Called (via iscsi_service) with QemuMutex held.  */
236  static void
iscsi_co_generic_cb(struct iscsi_context * iscsi,int status,void * command_data,void * opaque)237  iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
238                          void *command_data, void *opaque)
239  {
240      struct IscsiTask *iTask = opaque;
241      struct scsi_task *task = command_data;
242  
243      iTask->status = status;
244      iTask->do_retry = 0;
245      iTask->err_code = 0;
246      iTask->task = task;
247  
248      if (status != SCSI_STATUS_GOOD) {
249          iTask->err_code = -EIO;
250          if (iTask->retries++ < ISCSI_CMD_RETRIES) {
251              if (status == SCSI_STATUS_BUSY ||
252                  status == SCSI_STATUS_TIMEOUT ||
253                  status == SCSI_STATUS_TASK_SET_FULL) {
254                  unsigned retry_time =
255                      exp_random(iscsi_retry_times[iTask->retries - 1]);
256                  if (status == SCSI_STATUS_TIMEOUT) {
257                      /* make sure the request is rescheduled AFTER the
258                       * reconnect is initiated */
259                      retry_time = EVENT_INTERVAL * 2;
260                      iTask->iscsilun->request_timed_out = true;
261                  }
262                  error_report("iSCSI Busy/TaskSetFull/TimeOut"
263                               " (retry #%u in %u ms): %s",
264                               iTask->retries, retry_time,
265                               iscsi_get_error(iscsi));
266                  aio_timer_init(iTask->iscsilun->aio_context,
267                                 &iTask->retry_timer, QEMU_CLOCK_REALTIME,
268                                 SCALE_MS, iscsi_retry_timer_expired, iTask);
269                  timer_mod(&iTask->retry_timer,
270                            qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
271                  iTask->do_retry = 1;
272                  return;
273              } else if (status == SCSI_STATUS_CHECK_CONDITION) {
274                  int error = iscsi_translate_sense(&task->sense);
275                  if (error == EAGAIN) {
276                      error_report("iSCSI CheckCondition: %s",
277                                   iscsi_get_error(iscsi));
278                      iTask->do_retry = 1;
279                  } else {
280                      iTask->err_code = -error;
281                      iTask->err_str = g_strdup(iscsi_get_error(iscsi));
282                  }
283              }
284          }
285      }
286  
287      if (iTask->co) {
288          replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context,
289                                           iscsi_co_generic_bh_cb, iTask);
290      } else {
291          iTask->complete = 1;
292      }
293  }
294  
295  static void coroutine_fn
iscsi_co_init_iscsitask(IscsiLun * iscsilun,struct IscsiTask * iTask)296  iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
297  {
298      *iTask = (struct IscsiTask) {
299          .co         = qemu_coroutine_self(),
300          .iscsilun   = iscsilun,
301      };
302  }
303  
304  #ifdef __linux__
305  
306  /* Called (via iscsi_service) with QemuMutex held. */
307  static void
iscsi_abort_task_cb(struct iscsi_context * iscsi,int status,void * command_data,void * private_data)308  iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
309                      void *private_data)
310  {
311      IscsiAIOCB *acb = private_data;
312  
313      /* If the command callback hasn't been called yet, drop the task */
314      if (!acb->bh) {
315          /* Call iscsi_aio_ioctl_cb() with SCSI_STATUS_CANCELLED */
316          iscsi_scsi_cancel_task(iscsi, acb->task);
317      }
318  
319      qemu_aio_unref(acb); /* acquired in iscsi_aio_cancel() */
320  }
321  
322  static void
iscsi_aio_cancel(BlockAIOCB * blockacb)323  iscsi_aio_cancel(BlockAIOCB *blockacb)
324  {
325      IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
326      IscsiLun *iscsilun = acb->iscsilun;
327  
328      WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
329  
330          /* If it was cancelled or completed already, our work is done here */
331          if (acb->cancelled || acb->status != -EINPROGRESS) {
332              return;
333          }
334  
335          acb->cancelled = true;
336  
337          qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */
338  
339          /* send a task mgmt call to the target to cancel the task on the target */
340          if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
341                                               iscsi_abort_task_cb, acb) < 0) {
342              qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be called */
343          }
344      }
345  }
346  
347  static const AIOCBInfo iscsi_aiocb_info = {
348      .aiocb_size         = sizeof(IscsiAIOCB),
349      .cancel_async       = iscsi_aio_cancel,
350  };
351  
352  #endif
353  
354  static void iscsi_process_read(void *arg);
355  static void iscsi_process_write(void *arg);
356  
357  /* Called with QemuMutex held.  */
358  static void
iscsi_set_events(IscsiLun * iscsilun)359  iscsi_set_events(IscsiLun *iscsilun)
360  {
361      struct iscsi_context *iscsi = iscsilun->iscsi;
362      int ev = iscsi_which_events(iscsi);
363  
364      if (ev != iscsilun->events) {
365          aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
366                             (ev & POLLIN) ? iscsi_process_read : NULL,
367                             (ev & POLLOUT) ? iscsi_process_write : NULL,
368                             NULL, NULL,
369                             iscsilun);
370          iscsilun->events = ev;
371      }
372  }
373  
iscsi_timed_check_events(void * opaque)374  static void iscsi_timed_check_events(void *opaque)
375  {
376      IscsiLun *iscsilun = opaque;
377  
378      WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
379          /* check for timed out requests */
380          iscsi_service(iscsilun->iscsi, 0);
381  
382          if (iscsilun->request_timed_out) {
383              iscsilun->request_timed_out = false;
384              iscsi_reconnect(iscsilun->iscsi);
385          }
386  
387          /*
388           * newer versions of libiscsi may return zero events. Ensure we are
389           * able to return to service once this situation changes.
390           */
391          iscsi_set_events(iscsilun);
392      }
393  
394      timer_mod(iscsilun->event_timer,
395                qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
396  }
397  
398  static void
iscsi_process_read(void * arg)399  iscsi_process_read(void *arg)
400  {
401      IscsiLun *iscsilun = arg;
402      struct iscsi_context *iscsi = iscsilun->iscsi;
403  
404      qemu_mutex_lock(&iscsilun->mutex);
405      iscsi_service(iscsi, POLLIN);
406      iscsi_set_events(iscsilun);
407      qemu_mutex_unlock(&iscsilun->mutex);
408  }
409  
410  static void
iscsi_process_write(void * arg)411  iscsi_process_write(void *arg)
412  {
413      IscsiLun *iscsilun = arg;
414      struct iscsi_context *iscsi = iscsilun->iscsi;
415  
416      qemu_mutex_lock(&iscsilun->mutex);
417      iscsi_service(iscsi, POLLOUT);
418      iscsi_set_events(iscsilun);
419      qemu_mutex_unlock(&iscsilun->mutex);
420  }
421  
sector_lun2qemu(int64_t sector,IscsiLun * iscsilun)422  static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
423  {
424      return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
425  }
426  
sector_qemu2lun(int64_t sector,IscsiLun * iscsilun)427  static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
428  {
429      return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
430  }
431  
is_byte_request_lun_aligned(int64_t offset,int64_t bytes,IscsiLun * iscsilun)432  static bool is_byte_request_lun_aligned(int64_t offset, int64_t bytes,
433                                          IscsiLun *iscsilun)
434  {
435      if (offset % iscsilun->block_size || bytes % iscsilun->block_size) {
436          error_report("iSCSI misaligned request: "
437                       "iscsilun->block_size %u, offset %" PRIi64
438                       ", bytes %" PRIi64,
439                       iscsilun->block_size, offset, bytes);
440          return false;
441      }
442      return true;
443  }
444  
is_sector_request_lun_aligned(int64_t sector_num,int nb_sectors,IscsiLun * iscsilun)445  static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
446                                            IscsiLun *iscsilun)
447  {
448      assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
449      return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
450                                         nb_sectors << BDRV_SECTOR_BITS,
451                                         iscsilun);
452  }
453  
iscsi_allocmap_free(IscsiLun * iscsilun)454  static void iscsi_allocmap_free(IscsiLun *iscsilun)
455  {
456      g_free(iscsilun->allocmap);
457      g_free(iscsilun->allocmap_valid);
458      iscsilun->allocmap = NULL;
459      iscsilun->allocmap_valid = NULL;
460  }
461  
462  
iscsi_allocmap_init(IscsiLun * iscsilun,int open_flags)463  static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
464  {
465      iscsi_allocmap_free(iscsilun);
466  
467      assert(iscsilun->cluster_size);
468      iscsilun->allocmap_size =
469          DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size,
470                       iscsilun->cluster_size);
471  
472      iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
473      if (!iscsilun->allocmap) {
474          return -ENOMEM;
475      }
476  
477      if (open_flags & BDRV_O_NOCACHE) {
478          /* when cache.direct = on all allocmap entries are
479           * treated as invalid to force a relookup of the block
480           * status on every read request */
481          return 0;
482      }
483  
484      iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
485      if (!iscsilun->allocmap_valid) {
486          /* if we are under memory pressure free the allocmap as well */
487          iscsi_allocmap_free(iscsilun);
488          return -ENOMEM;
489      }
490  
491      return 0;
492  }
493  
494  static void
iscsi_allocmap_update(IscsiLun * iscsilun,int64_t offset,int64_t bytes,bool allocated,bool valid)495  iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset,
496                        int64_t bytes, bool allocated, bool valid)
497  {
498      int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
499  
500      if (iscsilun->allocmap == NULL) {
501          return;
502      }
503      /* expand to entirely contain all affected clusters */
504      assert(iscsilun->cluster_size);
505      cl_num_expanded = offset / iscsilun->cluster_size;
506      nb_cls_expanded = DIV_ROUND_UP(offset + bytes,
507                                     iscsilun->cluster_size) - cl_num_expanded;
508      /* shrink to touch only completely contained clusters */
509      cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size);
510      nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk;
511      if (allocated) {
512          bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
513      } else {
514          if (nb_cls_shrunk > 0) {
515              bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
516          }
517      }
518  
519      if (iscsilun->allocmap_valid == NULL) {
520          return;
521      }
522      if (valid) {
523          if (nb_cls_shrunk > 0) {
524              bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
525          }
526      } else {
527          bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
528                       nb_cls_expanded);
529      }
530  }
531  
532  static void
iscsi_allocmap_set_allocated(IscsiLun * iscsilun,int64_t offset,int64_t bytes)533  iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset,
534                               int64_t bytes)
535  {
536      iscsi_allocmap_update(iscsilun, offset, bytes, true, true);
537  }
538  
539  static void
iscsi_allocmap_set_unallocated(IscsiLun * iscsilun,int64_t offset,int64_t bytes)540  iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset,
541                                 int64_t bytes)
542  {
543      /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
544       * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
545       */
546      iscsi_allocmap_update(iscsilun, offset, bytes, false, true);
547  }
548  
iscsi_allocmap_set_invalid(IscsiLun * iscsilun,int64_t offset,int64_t bytes)549  static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset,
550                                         int64_t bytes)
551  {
552      iscsi_allocmap_update(iscsilun, offset, bytes, false, false);
553  }
554  
iscsi_allocmap_invalidate(IscsiLun * iscsilun)555  static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
556  {
557      if (iscsilun->allocmap) {
558          bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
559      }
560      if (iscsilun->allocmap_valid) {
561          bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
562      }
563  }
564  
565  static inline bool
iscsi_allocmap_is_allocated(IscsiLun * iscsilun,int64_t offset,int64_t bytes)566  iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset,
567                              int64_t bytes)
568  {
569      unsigned long size;
570      if (iscsilun->allocmap == NULL) {
571          return true;
572      }
573      assert(iscsilun->cluster_size);
574      size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
575      return !(find_next_bit(iscsilun->allocmap, size,
576                             offset / iscsilun->cluster_size) == size);
577  }
578  
iscsi_allocmap_is_valid(IscsiLun * iscsilun,int64_t offset,int64_t bytes)579  static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
580                                             int64_t offset, int64_t bytes)
581  {
582      unsigned long size;
583      if (iscsilun->allocmap_valid == NULL) {
584          return false;
585      }
586      assert(iscsilun->cluster_size);
587      size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
588      return (find_next_zero_bit(iscsilun->allocmap_valid, size,
589                                 offset / iscsilun->cluster_size) == size);
590  }
591  
iscsi_co_wait_for_task(IscsiTask * iTask,IscsiLun * iscsilun)592  static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
593                                                  IscsiLun *iscsilun)
594  {
595      while (!iTask->complete) {
596          iscsi_set_events(iscsilun);
597          qemu_mutex_unlock(&iscsilun->mutex);
598          qemu_coroutine_yield();
599          qemu_mutex_lock(&iscsilun->mutex);
600      }
601  }
602  
603  static int coroutine_fn
iscsi_co_writev(BlockDriverState * bs,int64_t sector_num,int nb_sectors,QEMUIOVector * iov,int flags)604  iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
605                  QEMUIOVector *iov, int flags)
606  {
607      IscsiLun *iscsilun = bs->opaque;
608      struct IscsiTask iTask;
609      uint64_t lba;
610      uint32_t num_sectors;
611      bool fua = flags & BDRV_REQ_FUA;
612      int r = 0;
613  
614      if (fua) {
615          assert(iscsilun->dpofua);
616      }
617      if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
618          return -EINVAL;
619      }
620  
621      if (bs->bl.max_transfer) {
622          assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
623      }
624  
625      lba = sector_qemu2lun(sector_num, iscsilun);
626      num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
627      iscsi_co_init_iscsitask(iscsilun, &iTask);
628      qemu_mutex_lock(&iscsilun->mutex);
629  retry:
630      if (iscsilun->use_16_for_rw) {
631  #if LIBISCSI_API_VERSION >= (20160603)
632          iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
633                                              NULL, num_sectors * iscsilun->block_size,
634                                              iscsilun->block_size, 0, 0, fua, 0, 0,
635                                              iscsi_co_generic_cb, &iTask,
636                                              (struct scsi_iovec *)iov->iov, iov->niov);
637      } else {
638          iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
639                                              NULL, num_sectors * iscsilun->block_size,
640                                              iscsilun->block_size, 0, 0, fua, 0, 0,
641                                              iscsi_co_generic_cb, &iTask,
642                                              (struct scsi_iovec *)iov->iov, iov->niov);
643      }
644  #else
645          iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
646                                          NULL, num_sectors * iscsilun->block_size,
647                                          iscsilun->block_size, 0, 0, fua, 0, 0,
648                                          iscsi_co_generic_cb, &iTask);
649      } else {
650          iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
651                                          NULL, num_sectors * iscsilun->block_size,
652                                          iscsilun->block_size, 0, 0, fua, 0, 0,
653                                          iscsi_co_generic_cb, &iTask);
654      }
655  #endif
656      if (iTask.task == NULL) {
657          qemu_mutex_unlock(&iscsilun->mutex);
658          return -ENOMEM;
659      }
660  #if LIBISCSI_API_VERSION < (20160603)
661      scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
662                            iov->niov);
663  #endif
664      iscsi_co_wait_for_task(&iTask, iscsilun);
665  
666      if (iTask.task != NULL) {
667          scsi_free_scsi_task(iTask.task);
668          iTask.task = NULL;
669      }
670  
671      if (iTask.do_retry) {
672          iTask.complete = 0;
673          goto retry;
674      }
675  
676      if (iTask.status != SCSI_STATUS_GOOD) {
677          iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
678                                     nb_sectors * BDRV_SECTOR_SIZE);
679          error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
680                       iTask.err_str);
681          r = iTask.err_code;
682          goto out_unlock;
683      }
684  
685      iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
686                                   nb_sectors * BDRV_SECTOR_SIZE);
687  
688  out_unlock:
689      qemu_mutex_unlock(&iscsilun->mutex);
690      g_free(iTask.err_str);
691      return r;
692  }
693  
694  
695  
iscsi_co_block_status(BlockDriverState * bs,bool want_zero,int64_t offset,int64_t bytes,int64_t * pnum,int64_t * map,BlockDriverState ** file)696  static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
697                                                bool want_zero, int64_t offset,
698                                                int64_t bytes, int64_t *pnum,
699                                                int64_t *map,
700                                                BlockDriverState **file)
701  {
702      IscsiLun *iscsilun = bs->opaque;
703      struct scsi_get_lba_status *lbas = NULL;
704      struct scsi_lba_status_descriptor *lbasd = NULL;
705      struct IscsiTask iTask;
706      uint64_t lba, max_bytes;
707      int ret;
708  
709      iscsi_co_init_iscsitask(iscsilun, &iTask);
710  
711      assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size));
712  
713      /* default to all sectors allocated */
714      ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
715      if (map) {
716          *map = offset;
717      }
718      *pnum = bytes;
719  
720      /* LUN does not support logical block provisioning */
721      if (!iscsilun->lbpme) {
722          goto out;
723      }
724  
725      lba = offset / iscsilun->block_size;
726      max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size;
727  
728      qemu_mutex_lock(&iscsilun->mutex);
729  retry:
730      if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
731                                    lba, 8 + 16, iscsi_co_generic_cb,
732                                    &iTask) == NULL) {
733          ret = -ENOMEM;
734          goto out_unlock;
735      }
736      iscsi_co_wait_for_task(&iTask, iscsilun);
737  
738      if (iTask.do_retry) {
739          if (iTask.task != NULL) {
740              scsi_free_scsi_task(iTask.task);
741              iTask.task = NULL;
742          }
743          iTask.complete = 0;
744          goto retry;
745      }
746  
747      if (iTask.status != SCSI_STATUS_GOOD) {
748          /* in case the get_lba_status_callout fails (i.e.
749           * because the device is busy or the cmd is not
750           * supported) we pretend all blocks are allocated
751           * for backwards compatibility */
752          error_report("iSCSI GET_LBA_STATUS failed at lba %" PRIu64 ": %s",
753                       lba, iTask.err_str);
754          goto out_unlock;
755      }
756  
757      lbas = scsi_datain_unmarshall(iTask.task);
758      if (lbas == NULL || lbas->num_descriptors == 0) {
759          ret = -EIO;
760          goto out_unlock;
761      }
762  
763      lbasd = &lbas->descriptors[0];
764  
765      if (lba != lbasd->lba) {
766          ret = -EIO;
767          goto out_unlock;
768      }
769  
770      *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes);
771  
772      if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
773          lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
774          ret &= ~BDRV_BLOCK_DATA;
775          if (iscsilun->lbprz) {
776              ret |= BDRV_BLOCK_ZERO;
777          }
778      }
779  
780      if (ret & BDRV_BLOCK_ZERO) {
781          iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum);
782      } else {
783          iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
784      }
785  
786  out_unlock:
787      qemu_mutex_unlock(&iscsilun->mutex);
788      g_free(iTask.err_str);
789  out:
790      if (iTask.task != NULL) {
791          scsi_free_scsi_task(iTask.task);
792      }
793      if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
794          *file = bs;
795      }
796      return ret;
797  }
798  
iscsi_co_readv(BlockDriverState * bs,int64_t sector_num,int nb_sectors,QEMUIOVector * iov)799  static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
800                                         int64_t sector_num, int nb_sectors,
801                                         QEMUIOVector *iov)
802  {
803      IscsiLun *iscsilun = bs->opaque;
804      struct IscsiTask iTask;
805      uint64_t lba;
806      uint32_t num_sectors;
807      int r = 0;
808  
809      if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
810          return -EINVAL;
811      }
812  
813      if (bs->bl.max_transfer) {
814          assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
815      }
816  
817      /* if cache.direct is off and we have a valid entry in our allocation map
818       * we can skip checking the block status and directly return zeroes if
819       * the request falls within an unallocated area */
820      if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
821                                  nb_sectors * BDRV_SECTOR_SIZE) &&
822          !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
823                                       nb_sectors * BDRV_SECTOR_SIZE)) {
824              qemu_iovec_memset(iov, 0, 0x00, iov->size);
825              return 0;
826      }
827  
828      if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
829          !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
830                                   nb_sectors * BDRV_SECTOR_SIZE) &&
831          !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
832                                       nb_sectors * BDRV_SECTOR_SIZE)) {
833          int64_t pnum;
834          /* check the block status from the beginning of the cluster
835           * containing the start sector */
836          int64_t head;
837          int ret;
838  
839          assert(iscsilun->cluster_size);
840          head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size;
841          ret = iscsi_co_block_status(bs, true,
842                                      sector_num * BDRV_SECTOR_SIZE - head,
843                                      BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL);
844          if (ret < 0) {
845              return ret;
846          }
847          /* if the whole request falls into an unallocated area we can avoid
848           * reading and directly return zeroes instead */
849          if (ret & BDRV_BLOCK_ZERO &&
850              pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) {
851              qemu_iovec_memset(iov, 0, 0x00, iov->size);
852              return 0;
853          }
854      }
855  
856      lba = sector_qemu2lun(sector_num, iscsilun);
857      num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
858  
859      iscsi_co_init_iscsitask(iscsilun, &iTask);
860      qemu_mutex_lock(&iscsilun->mutex);
861  retry:
862      if (iscsilun->use_16_for_rw) {
863  #if LIBISCSI_API_VERSION >= (20160603)
864          iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
865                                             num_sectors * iscsilun->block_size,
866                                             iscsilun->block_size, 0, 0, 0, 0, 0,
867                                             iscsi_co_generic_cb, &iTask,
868                                             (struct scsi_iovec *)iov->iov, iov->niov);
869      } else {
870          iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
871                                             num_sectors * iscsilun->block_size,
872                                             iscsilun->block_size,
873                                             0, 0, 0, 0, 0,
874                                             iscsi_co_generic_cb, &iTask,
875                                             (struct scsi_iovec *)iov->iov, iov->niov);
876      }
877  #else
878          iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
879                                         num_sectors * iscsilun->block_size,
880                                         iscsilun->block_size, 0, 0, 0, 0, 0,
881                                         iscsi_co_generic_cb, &iTask);
882      } else {
883          iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
884                                         num_sectors * iscsilun->block_size,
885                                         iscsilun->block_size,
886                                         0, 0, 0, 0, 0,
887                                         iscsi_co_generic_cb, &iTask);
888      }
889  #endif
890      if (iTask.task == NULL) {
891          qemu_mutex_unlock(&iscsilun->mutex);
892          return -ENOMEM;
893      }
894  #if LIBISCSI_API_VERSION < (20160603)
895      scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
896  #endif
897  
898      iscsi_co_wait_for_task(&iTask, iscsilun);
899      if (iTask.task != NULL) {
900          scsi_free_scsi_task(iTask.task);
901          iTask.task = NULL;
902      }
903  
904      if (iTask.do_retry) {
905          iTask.complete = 0;
906          goto retry;
907      }
908  
909      if (iTask.status != SCSI_STATUS_GOOD) {
910          error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s",
911                       lba, iTask.err_str);
912          r = iTask.err_code;
913      }
914  
915      qemu_mutex_unlock(&iscsilun->mutex);
916      g_free(iTask.err_str);
917      return r;
918  }
919  
iscsi_co_flush(BlockDriverState * bs)920  static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
921  {
922      IscsiLun *iscsilun = bs->opaque;
923      struct IscsiTask iTask;
924      int r = 0;
925  
926      iscsi_co_init_iscsitask(iscsilun, &iTask);
927      qemu_mutex_lock(&iscsilun->mutex);
928  retry:
929      if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
930                                        0, iscsi_co_generic_cb, &iTask) == NULL) {
931          qemu_mutex_unlock(&iscsilun->mutex);
932          return -ENOMEM;
933      }
934  
935      iscsi_co_wait_for_task(&iTask, iscsilun);
936  
937      if (iTask.task != NULL) {
938          scsi_free_scsi_task(iTask.task);
939          iTask.task = NULL;
940      }
941  
942      if (iTask.do_retry) {
943          iTask.complete = 0;
944          goto retry;
945      }
946  
947      if (iTask.status != SCSI_STATUS_GOOD) {
948          error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str);
949          r = iTask.err_code;
950      }
951  
952      qemu_mutex_unlock(&iscsilun->mutex);
953      g_free(iTask.err_str);
954      return r;
955  }
956  
957  #ifdef __linux__
958  /* Called (via iscsi_service) with QemuMutex held.  */
959  static void
iscsi_aio_ioctl_cb(struct iscsi_context * iscsi,int status,void * command_data,void * opaque)960  iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
961                       void *command_data, void *opaque)
962  {
963      IscsiAIOCB *acb = opaque;
964  
965      if (status == SCSI_STATUS_CANCELLED) {
966          if (!acb->bh) {
967              acb->status = -ECANCELED;
968              iscsi_schedule_bh(acb);
969          }
970          return;
971      }
972  
973      acb->status = 0;
974      if (status < 0) {
975          error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
976                       iscsi_get_error(iscsi));
977          acb->status = -iscsi_translate_sense(&acb->task->sense);
978      }
979  
980      acb->ioh->driver_status = 0;
981      acb->ioh->host_status   = 0;
982      acb->ioh->resid         = 0;
983      acb->ioh->status        = status;
984  
985  #define SG_ERR_DRIVER_SENSE    0x08
986  
987      if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
988          int ss;
989  
990          acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
991  
992          acb->ioh->sb_len_wr = acb->task->datain.size - 2;
993          ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr);
994          memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
995      }
996  
997      iscsi_schedule_bh(acb);
998  }
999  
iscsi_ioctl_bh_completion(void * opaque)1000  static void iscsi_ioctl_bh_completion(void *opaque)
1001  {
1002      IscsiAIOCB *acb = opaque;
1003  
1004      qemu_bh_delete(acb->bh);
1005      acb->common.cb(acb->common.opaque, acb->ret);
1006      qemu_aio_unref(acb);
1007  }
1008  
iscsi_ioctl_handle_emulated(IscsiAIOCB * acb,int req,void * buf)1009  static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
1010  {
1011      BlockDriverState *bs = acb->common.bs;
1012      IscsiLun *iscsilun = bs->opaque;
1013      int ret = 0;
1014  
1015      switch (req) {
1016      case SG_GET_VERSION_NUM:
1017          *(int *)buf = 30000;
1018          break;
1019      case SG_GET_SCSI_ID:
1020          ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
1021          break;
1022      default:
1023          ret = -EINVAL;
1024      }
1025      assert(!acb->bh);
1026      acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
1027                           iscsi_ioctl_bh_completion, acb);
1028      acb->ret = ret;
1029      qemu_bh_schedule(acb->bh);
1030  }
1031  
iscsi_aio_ioctl(BlockDriverState * bs,unsigned long int req,void * buf,BlockCompletionFunc * cb,void * opaque)1032  static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
1033          unsigned long int req, void *buf,
1034          BlockCompletionFunc *cb, void *opaque)
1035  {
1036      IscsiLun *iscsilun = bs->opaque;
1037      struct iscsi_context *iscsi = iscsilun->iscsi;
1038      struct iscsi_data data;
1039      IscsiAIOCB *acb;
1040  
1041      acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
1042  
1043      acb->iscsilun = iscsilun;
1044      acb->bh          = NULL;
1045      acb->status      = -EINPROGRESS;
1046      acb->ioh         = buf;
1047      acb->cancelled   = false;
1048  
1049      if (req != SG_IO) {
1050          iscsi_ioctl_handle_emulated(acb, req, buf);
1051          return &acb->common;
1052      }
1053  
1054      if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
1055          error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
1056                       acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
1057          qemu_aio_unref(acb);
1058          return NULL;
1059      }
1060  
1061      /* Must use malloc(): this is freed via scsi_free_scsi_task() */
1062      acb->task = malloc(sizeof(struct scsi_task));
1063      if (acb->task == NULL) {
1064          error_report("iSCSI: Failed to allocate task for scsi command. %s",
1065                       iscsi_get_error(iscsi));
1066          qemu_aio_unref(acb);
1067          return NULL;
1068      }
1069      memset(acb->task, 0, sizeof(struct scsi_task));
1070  
1071      switch (acb->ioh->dxfer_direction) {
1072      case SG_DXFER_TO_DEV:
1073          acb->task->xfer_dir = SCSI_XFER_WRITE;
1074          break;
1075      case SG_DXFER_FROM_DEV:
1076          acb->task->xfer_dir = SCSI_XFER_READ;
1077          break;
1078      default:
1079          acb->task->xfer_dir = SCSI_XFER_NONE;
1080          break;
1081      }
1082  
1083      acb->task->cdb_size = acb->ioh->cmd_len;
1084      memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
1085      acb->task->expxferlen = acb->ioh->dxfer_len;
1086  
1087      data.size = 0;
1088      qemu_mutex_lock(&iscsilun->mutex);
1089      if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
1090          if (acb->ioh->iovec_count == 0) {
1091              data.data = acb->ioh->dxferp;
1092              data.size = acb->ioh->dxfer_len;
1093          } else {
1094              scsi_task_set_iov_out(acb->task,
1095                                   (struct scsi_iovec *) acb->ioh->dxferp,
1096                                   acb->ioh->iovec_count);
1097          }
1098      }
1099  
1100      if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
1101                                   iscsi_aio_ioctl_cb,
1102                                   (data.size > 0) ? &data : NULL,
1103                                   acb) != 0) {
1104          qemu_mutex_unlock(&iscsilun->mutex);
1105          scsi_free_scsi_task(acb->task);
1106          qemu_aio_unref(acb);
1107          return NULL;
1108      }
1109  
1110      /* tell libiscsi to read straight into the buffer we got from ioctl */
1111      if (acb->task->xfer_dir == SCSI_XFER_READ) {
1112          if (acb->ioh->iovec_count == 0) {
1113              scsi_task_add_data_in_buffer(acb->task,
1114                                           acb->ioh->dxfer_len,
1115                                           acb->ioh->dxferp);
1116          } else {
1117              scsi_task_set_iov_in(acb->task,
1118                                   (struct scsi_iovec *) acb->ioh->dxferp,
1119                                   acb->ioh->iovec_count);
1120          }
1121      }
1122  
1123      iscsi_set_events(iscsilun);
1124      qemu_mutex_unlock(&iscsilun->mutex);
1125  
1126      return &acb->common;
1127  }
1128  
1129  #endif
1130  
1131  static int64_t coroutine_fn
iscsi_co_getlength(BlockDriverState * bs)1132  iscsi_co_getlength(BlockDriverState *bs)
1133  {
1134      IscsiLun *iscsilun = bs->opaque;
1135      int64_t len;
1136  
1137      len  = iscsilun->num_blocks;
1138      len *= iscsilun->block_size;
1139  
1140      return len;
1141  }
1142  
1143  static int
iscsi_co_pdiscard(BlockDriverState * bs,int64_t offset,int64_t bytes)1144  coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
1145                                 int64_t bytes)
1146  {
1147      IscsiLun *iscsilun = bs->opaque;
1148      struct IscsiTask iTask;
1149      struct unmap_list list;
1150      int r = 0;
1151  
1152      if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1153          return -ENOTSUP;
1154      }
1155  
1156      if (!iscsilun->lbp.lbpu) {
1157          /* UNMAP is not supported by the target */
1158          return 0;
1159      }
1160  
1161      /*
1162       * We don't want to overflow list.num which is uint32_t.
1163       * We rely on our max_pdiscard.
1164       */
1165      assert(bytes / iscsilun->block_size <= UINT32_MAX);
1166  
1167      list.lba = offset / iscsilun->block_size;
1168      list.num = bytes / iscsilun->block_size;
1169  
1170      iscsi_co_init_iscsitask(iscsilun, &iTask);
1171      qemu_mutex_lock(&iscsilun->mutex);
1172  retry:
1173      if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
1174                           iscsi_co_generic_cb, &iTask) == NULL) {
1175          r = -ENOMEM;
1176          goto out_unlock;
1177      }
1178  
1179      iscsi_co_wait_for_task(&iTask, iscsilun);
1180  
1181      if (iTask.task != NULL) {
1182          scsi_free_scsi_task(iTask.task);
1183          iTask.task = NULL;
1184      }
1185  
1186      if (iTask.do_retry) {
1187          iTask.complete = 0;
1188          goto retry;
1189      }
1190  
1191      iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
1192  
1193      if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
1194          /* the target might fail with a check condition if it
1195             is not happy with the alignment of the UNMAP request
1196             we silently fail in this case */
1197          goto out_unlock;
1198      }
1199  
1200      if (iTask.status != SCSI_STATUS_GOOD) {
1201          error_report("iSCSI UNMAP failed at lba %" PRIu64 ": %s",
1202                       list.lba, iTask.err_str);
1203          r = iTask.err_code;
1204          goto out_unlock;
1205      }
1206  
1207  out_unlock:
1208      qemu_mutex_unlock(&iscsilun->mutex);
1209      g_free(iTask.err_str);
1210      return r;
1211  }
1212  
1213  static int
iscsi_co_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)1214  coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
1215                                      int64_t bytes, BdrvRequestFlags flags)
1216  {
1217      IscsiLun *iscsilun = bs->opaque;
1218      struct IscsiTask iTask;
1219      uint64_t lba;
1220      uint64_t nb_blocks;
1221      bool use_16_for_ws = iscsilun->use_16_for_rw;
1222      int r = 0;
1223  
1224      if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
1225          return -ENOTSUP;
1226      }
1227  
1228      if (flags & BDRV_REQ_MAY_UNMAP) {
1229          if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
1230              /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
1231              use_16_for_ws = true;
1232          }
1233          if (use_16_for_ws && !iscsilun->lbp.lbpws) {
1234              /* WRITESAME16 with UNMAP is not supported by the target,
1235               * fall back and try WRITESAME10/16 without UNMAP */
1236              flags &= ~BDRV_REQ_MAY_UNMAP;
1237              use_16_for_ws = iscsilun->use_16_for_rw;
1238          }
1239      }
1240  
1241      if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1242          /* WRITESAME without UNMAP is not supported by the target */
1243          return -ENOTSUP;
1244      }
1245  
1246      lba = offset / iscsilun->block_size;
1247      nb_blocks = bytes / iscsilun->block_size;
1248  
1249      if (iscsilun->zeroblock == NULL) {
1250          iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1251          if (iscsilun->zeroblock == NULL) {
1252              return -ENOMEM;
1253          }
1254      }
1255  
1256      qemu_mutex_lock(&iscsilun->mutex);
1257      iscsi_co_init_iscsitask(iscsilun, &iTask);
1258  retry:
1259      if (use_16_for_ws) {
1260          /*
1261           * iscsi_writesame16_task num_blocks argument is uint32_t. We rely here
1262           * on our max_pwrite_zeroes limit.
1263           */
1264          assert(nb_blocks <= UINT32_MAX);
1265          iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1266                                              iscsilun->zeroblock, iscsilun->block_size,
1267                                              nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1268                                              0, 0, iscsi_co_generic_cb, &iTask);
1269      } else {
1270          /*
1271           * iscsi_writesame10_task num_blocks argument is uint16_t. We rely here
1272           * on our max_pwrite_zeroes limit.
1273           */
1274          assert(nb_blocks <= UINT16_MAX);
1275          iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1276                                              iscsilun->zeroblock, iscsilun->block_size,
1277                                              nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1278                                              0, 0, iscsi_co_generic_cb, &iTask);
1279      }
1280      if (iTask.task == NULL) {
1281          qemu_mutex_unlock(&iscsilun->mutex);
1282          return -ENOMEM;
1283      }
1284  
1285      iscsi_co_wait_for_task(&iTask, iscsilun);
1286  
1287      if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1288          iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1289          (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1290           iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1291          /* WRITE SAME is not supported by the target */
1292          iscsilun->has_write_same = false;
1293          scsi_free_scsi_task(iTask.task);
1294          r = -ENOTSUP;
1295          goto out_unlock;
1296      }
1297  
1298      if (iTask.task != NULL) {
1299          scsi_free_scsi_task(iTask.task);
1300          iTask.task = NULL;
1301      }
1302  
1303      if (iTask.do_retry) {
1304          iTask.complete = 0;
1305          goto retry;
1306      }
1307  
1308      if (iTask.status != SCSI_STATUS_GOOD) {
1309          iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
1310          error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s",
1311                       lba, iTask.err_str);
1312          r = iTask.err_code;
1313          goto out_unlock;
1314      }
1315  
1316      if (flags & BDRV_REQ_MAY_UNMAP) {
1317          iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
1318      } else {
1319          iscsi_allocmap_set_allocated(iscsilun, offset, bytes);
1320      }
1321  
1322  out_unlock:
1323      qemu_mutex_unlock(&iscsilun->mutex);
1324      g_free(iTask.err_str);
1325      return r;
1326  }
1327  
apply_chap(struct iscsi_context * iscsi,QemuOpts * opts,Error ** errp)1328  static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
1329                         Error **errp)
1330  {
1331      const char *user = NULL;
1332      const char *password = NULL;
1333      const char *secretid;
1334      char *secret = NULL;
1335  
1336      user = qemu_opt_get(opts, "user");
1337      if (!user) {
1338          return;
1339      }
1340  
1341      secretid = qemu_opt_get(opts, "password-secret");
1342      password = qemu_opt_get(opts, "password");
1343      if (secretid && password) {
1344          error_setg(errp, "'password' and 'password-secret' properties are "
1345                     "mutually exclusive");
1346          return;
1347      }
1348      if (secretid) {
1349          secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1350          if (!secret) {
1351              return;
1352          }
1353          password = secret;
1354      } else if (!password) {
1355          error_setg(errp, "CHAP username specified but no password was given");
1356          return;
1357      } else {
1358          warn_report("iSCSI block driver 'password' option is deprecated, "
1359                      "use 'password-secret' instead");
1360      }
1361  
1362      if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1363          error_setg(errp, "Failed to set initiator username and password");
1364      }
1365  
1366      g_free(secret);
1367  }
1368  
apply_header_digest(struct iscsi_context * iscsi,QemuOpts * opts,Error ** errp)1369  static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
1370                                  Error **errp)
1371  {
1372      const char *digest = NULL;
1373  
1374      digest = qemu_opt_get(opts, "header-digest");
1375      if (!digest) {
1376          iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1377      } else if (!strcmp(digest, "crc32c")) {
1378          iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1379      } else if (!strcmp(digest, "none")) {
1380          iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1381      } else if (!strcmp(digest, "crc32c-none")) {
1382          iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1383      } else if (!strcmp(digest, "none-crc32c")) {
1384          iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1385      } else {
1386          error_setg(errp, "Invalid header-digest setting : %s", digest);
1387      }
1388  }
1389  
get_initiator_name(QemuOpts * opts)1390  static char *get_initiator_name(QemuOpts *opts)
1391  {
1392      const char *name;
1393      char *iscsi_name;
1394      UuidInfo *uuid_info;
1395  
1396      name = qemu_opt_get(opts, "initiator-name");
1397      if (name) {
1398          return g_strdup(name);
1399      }
1400  
1401      uuid_info = qmp_query_uuid(NULL);
1402      if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1403          name = qemu_get_vm_name();
1404      } else {
1405          name = uuid_info->UUID;
1406      }
1407      iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1408                                   name ? ":" : "", name ? name : "");
1409      qapi_free_UuidInfo(uuid_info);
1410      return iscsi_name;
1411  }
1412  
iscsi_nop_timed_event(void * opaque)1413  static void iscsi_nop_timed_event(void *opaque)
1414  {
1415      IscsiLun *iscsilun = opaque;
1416  
1417      QEMU_LOCK_GUARD(&iscsilun->mutex);
1418      if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1419          error_report("iSCSI: NOP timeout. Reconnecting...");
1420          iscsilun->request_timed_out = true;
1421      } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1422          error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1423          return;
1424      }
1425  
1426      timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1427      iscsi_set_events(iscsilun);
1428  }
1429  
iscsi_readcapacity_sync(IscsiLun * iscsilun,Error ** errp)1430  static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1431  {
1432      struct scsi_task *task = NULL;
1433      struct scsi_readcapacity10 *rc10 = NULL;
1434      struct scsi_readcapacity16 *rc16 = NULL;
1435      int retries = ISCSI_CMD_RETRIES;
1436  
1437      do {
1438          if (task != NULL) {
1439              scsi_free_scsi_task(task);
1440              task = NULL;
1441          }
1442  
1443          switch (iscsilun->type) {
1444          case TYPE_DISK:
1445              task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1446              if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1447                  rc16 = scsi_datain_unmarshall(task);
1448                  if (rc16 == NULL) {
1449                      error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1450                  } else {
1451                      iscsilun->block_size = rc16->block_length;
1452                      iscsilun->num_blocks = rc16->returned_lba + 1;
1453                      iscsilun->lbpme = !!rc16->lbpme;
1454                      iscsilun->lbprz = !!rc16->lbprz;
1455                      iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1456                  }
1457                  break;
1458              }
1459              if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1460                  && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1461                  break;
1462              }
1463              /* Fall through and try READ CAPACITY(10) instead.  */
1464          case TYPE_ROM:
1465              task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1466              if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1467                  rc10 = scsi_datain_unmarshall(task);
1468                  if (rc10 == NULL) {
1469                      error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1470                  } else {
1471                      iscsilun->block_size = rc10->block_size;
1472                      if (rc10->lba == 0) {
1473                          /* blank disk loaded */
1474                          iscsilun->num_blocks = 0;
1475                      } else {
1476                          iscsilun->num_blocks = rc10->lba + 1;
1477                      }
1478                  }
1479              }
1480              break;
1481          default:
1482              return;
1483          }
1484      } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1485               && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1486               && retries-- > 0);
1487  
1488      if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1489          error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1490      } else if (!iscsilun->block_size ||
1491                 iscsilun->block_size % BDRV_SECTOR_SIZE) {
1492          error_setg(errp, "iSCSI: the target returned an invalid "
1493                     "block size of %d.", iscsilun->block_size);
1494      }
1495      if (task) {
1496          scsi_free_scsi_task(task);
1497      }
1498  }
1499  
iscsi_do_inquiry(struct iscsi_context * iscsi,int lun,int evpd,int pc,void ** inq,Error ** errp)1500  static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1501                                            int evpd, int pc, void **inq, Error **errp)
1502  {
1503      int full_size;
1504      struct scsi_task *task = NULL;
1505      task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1506      if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1507          goto fail;
1508      }
1509      full_size = scsi_datain_getfullsize(task);
1510      if (full_size > task->datain.size) {
1511          scsi_free_scsi_task(task);
1512  
1513          /* we need more data for the full list */
1514          task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1515          if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1516              goto fail;
1517          }
1518      }
1519  
1520      *inq = scsi_datain_unmarshall(task);
1521      if (*inq == NULL) {
1522          error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1523          goto fail_with_err;
1524      }
1525  
1526      return task;
1527  
1528  fail:
1529      error_setg(errp, "iSCSI: Inquiry command failed : %s",
1530                 iscsi_get_error(iscsi));
1531  fail_with_err:
1532      if (task != NULL) {
1533          scsi_free_scsi_task(task);
1534      }
1535      return NULL;
1536  }
1537  
iscsi_detach_aio_context(BlockDriverState * bs)1538  static void iscsi_detach_aio_context(BlockDriverState *bs)
1539  {
1540      IscsiLun *iscsilun = bs->opaque;
1541  
1542      aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1543                         NULL, NULL, NULL, NULL, NULL);
1544      iscsilun->events = 0;
1545  
1546      if (iscsilun->nop_timer) {
1547          timer_free(iscsilun->nop_timer);
1548          iscsilun->nop_timer = NULL;
1549      }
1550      if (iscsilun->event_timer) {
1551          timer_free(iscsilun->event_timer);
1552          iscsilun->event_timer = NULL;
1553      }
1554  }
1555  
iscsi_attach_aio_context(BlockDriverState * bs,AioContext * new_context)1556  static void iscsi_attach_aio_context(BlockDriverState *bs,
1557                                       AioContext *new_context)
1558  {
1559      IscsiLun *iscsilun = bs->opaque;
1560  
1561      iscsilun->aio_context = new_context;
1562      iscsi_set_events(iscsilun);
1563  
1564      /* Set up a timer for sending out iSCSI NOPs */
1565      iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1566                                          QEMU_CLOCK_REALTIME, SCALE_MS,
1567                                          iscsi_nop_timed_event, iscsilun);
1568      timer_mod(iscsilun->nop_timer,
1569                qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1570  
1571      /* Set up a timer for periodic calls to iscsi_set_events and to
1572       * scan for command timeout */
1573      iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1574                                            QEMU_CLOCK_REALTIME, SCALE_MS,
1575                                            iscsi_timed_check_events, iscsilun);
1576      timer_mod(iscsilun->event_timer,
1577                qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1578  }
1579  
iscsi_modesense_sync(IscsiLun * iscsilun)1580  static void iscsi_modesense_sync(IscsiLun *iscsilun)
1581  {
1582      struct scsi_task *task;
1583      struct scsi_mode_sense *ms = NULL;
1584      iscsilun->write_protected = false;
1585      iscsilun->dpofua = false;
1586  
1587      task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1588                                   1, SCSI_MODESENSE_PC_CURRENT,
1589                                   0x3F, 0, 255);
1590      if (task == NULL) {
1591          error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1592                       iscsi_get_error(iscsilun->iscsi));
1593          goto out;
1594      }
1595  
1596      if (task->status != SCSI_STATUS_GOOD) {
1597          error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1598          goto out;
1599      }
1600      ms = scsi_datain_unmarshall(task);
1601      if (!ms) {
1602          error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1603                       iscsi_get_error(iscsilun->iscsi));
1604          goto out;
1605      }
1606      iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1607      iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
1608  
1609  out:
1610      if (task) {
1611          scsi_free_scsi_task(task);
1612      }
1613  }
1614  
iscsi_parse_iscsi_option(const char * target,QDict * options)1615  static void iscsi_parse_iscsi_option(const char *target, QDict *options)
1616  {
1617      QemuOptsList *list;
1618      QemuOpts *opts;
1619      const char *user, *password, *password_secret, *initiator_name,
1620                 *header_digest, *timeout;
1621  
1622      list = qemu_find_opts("iscsi");
1623      if (!list) {
1624          return;
1625      }
1626  
1627      opts = qemu_opts_find(list, target);
1628      if (opts == NULL) {
1629          opts = QTAILQ_FIRST(&list->head);
1630          if (!opts) {
1631              return;
1632          }
1633      }
1634  
1635      user = qemu_opt_get(opts, "user");
1636      if (user) {
1637          qdict_set_default_str(options, "user", user);
1638      }
1639  
1640      password = qemu_opt_get(opts, "password");
1641      if (password) {
1642          qdict_set_default_str(options, "password", password);
1643      }
1644  
1645      password_secret = qemu_opt_get(opts, "password-secret");
1646      if (password_secret) {
1647          qdict_set_default_str(options, "password-secret", password_secret);
1648      }
1649  
1650      initiator_name = qemu_opt_get(opts, "initiator-name");
1651      if (initiator_name) {
1652          qdict_set_default_str(options, "initiator-name", initiator_name);
1653      }
1654  
1655      header_digest = qemu_opt_get(opts, "header-digest");
1656      if (header_digest) {
1657          /* -iscsi takes upper case values, but QAPI only supports lower case
1658           * enum constant names, so we have to convert here. */
1659          char *qapi_value = g_ascii_strdown(header_digest, -1);
1660          qdict_set_default_str(options, "header-digest", qapi_value);
1661          g_free(qapi_value);
1662      }
1663  
1664      timeout = qemu_opt_get(opts, "timeout");
1665      if (timeout) {
1666          qdict_set_default_str(options, "timeout", timeout);
1667      }
1668  }
1669  
1670  /*
1671   * We support iscsi url's on the form
1672   * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1673   */
iscsi_parse_filename(const char * filename,QDict * options,Error ** errp)1674  static void iscsi_parse_filename(const char *filename, QDict *options,
1675                                   Error **errp)
1676  {
1677      struct iscsi_url *iscsi_url;
1678      const char *transport_name;
1679      char *lun_str;
1680  
1681      iscsi_url = iscsi_parse_full_url(NULL, filename);
1682      if (iscsi_url == NULL) {
1683          error_setg(errp, "Failed to parse URL : %s", filename);
1684          return;
1685      }
1686  
1687  #if LIBISCSI_API_VERSION >= (20160603)
1688      switch (iscsi_url->transport) {
1689      case TCP_TRANSPORT:
1690          transport_name = "tcp";
1691          break;
1692      case ISER_TRANSPORT:
1693          transport_name = "iser";
1694          break;
1695      default:
1696          error_setg(errp, "Unknown transport type (%d)",
1697                     iscsi_url->transport);
1698          return;
1699      }
1700  #else
1701      transport_name = "tcp";
1702  #endif
1703  
1704      qdict_set_default_str(options, "transport", transport_name);
1705      qdict_set_default_str(options, "portal", iscsi_url->portal);
1706      qdict_set_default_str(options, "target", iscsi_url->target);
1707  
1708      lun_str = g_strdup_printf("%d", iscsi_url->lun);
1709      qdict_set_default_str(options, "lun", lun_str);
1710      g_free(lun_str);
1711  
1712      /* User/password from -iscsi take precedence over those from the URL */
1713      iscsi_parse_iscsi_option(iscsi_url->target, options);
1714  
1715      if (iscsi_url->user[0] != '\0') {
1716          qdict_set_default_str(options, "user", iscsi_url->user);
1717          qdict_set_default_str(options, "password", iscsi_url->passwd);
1718      }
1719  
1720      iscsi_destroy_url(iscsi_url);
1721  }
1722  
1723  static QemuOptsList runtime_opts = {
1724      .name = "iscsi",
1725      .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1726      .desc = {
1727          {
1728              .name = "transport",
1729              .type = QEMU_OPT_STRING,
1730          },
1731          {
1732              .name = "portal",
1733              .type = QEMU_OPT_STRING,
1734          },
1735          {
1736              .name = "target",
1737              .type = QEMU_OPT_STRING,
1738          },
1739          {
1740              .name = "user",
1741              .type = QEMU_OPT_STRING,
1742          },
1743          {
1744              .name = "password",
1745              .type = QEMU_OPT_STRING,
1746          },
1747          {
1748              .name = "password-secret",
1749              .type = QEMU_OPT_STRING,
1750          },
1751          {
1752              .name = "lun",
1753              .type = QEMU_OPT_NUMBER,
1754          },
1755          {
1756              .name = "initiator-name",
1757              .type = QEMU_OPT_STRING,
1758          },
1759          {
1760              .name = "header-digest",
1761              .type = QEMU_OPT_STRING,
1762          },
1763          {
1764              .name = "timeout",
1765              .type = QEMU_OPT_NUMBER,
1766          },
1767          { /* end of list */ }
1768      },
1769  };
1770  
iscsi_save_designator(IscsiLun * lun,struct scsi_inquiry_device_identification * inq_di)1771  static void iscsi_save_designator(IscsiLun *lun,
1772                                    struct scsi_inquiry_device_identification *inq_di)
1773  {
1774      struct scsi_inquiry_device_designator *desig, *copy = NULL;
1775  
1776      for (desig = inq_di->designators; desig; desig = desig->next) {
1777          if (desig->association ||
1778              desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
1779              continue;
1780          }
1781          /* NAA works better than T10 vendor ID based designator. */
1782          if (!copy || copy->designator_type < desig->designator_type) {
1783              copy = desig;
1784          }
1785      }
1786      if (copy) {
1787          lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
1788          *lun->dd = *copy;
1789          lun->dd->next = NULL;
1790          lun->dd->designator = g_malloc(copy->designator_length);
1791          memcpy(lun->dd->designator, copy->designator, copy->designator_length);
1792      }
1793  }
1794  
iscsi_open(BlockDriverState * bs,QDict * options,int flags,Error ** errp)1795  static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1796                        Error **errp)
1797  {
1798      IscsiLun *iscsilun = bs->opaque;
1799      struct iscsi_context *iscsi = NULL;
1800      struct scsi_task *task = NULL;
1801      struct scsi_inquiry_standard *inq = NULL;
1802      struct scsi_inquiry_supported_pages *inq_vpd;
1803      char *initiator_name = NULL;
1804      QemuOpts *opts;
1805      Error *local_err = NULL;
1806      const char *transport_name, *portal, *target;
1807  #if LIBISCSI_API_VERSION >= (20160603)
1808      enum iscsi_transport_type transport;
1809  #endif
1810      int i, ret = 0, timeout = 0, lun;
1811  
1812      opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1813      if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1814          ret = -EINVAL;
1815          goto out;
1816      }
1817  
1818      transport_name = qemu_opt_get(opts, "transport");
1819      portal = qemu_opt_get(opts, "portal");
1820      target = qemu_opt_get(opts, "target");
1821      lun = qemu_opt_get_number(opts, "lun", 0);
1822  
1823      if (!transport_name || !portal || !target) {
1824          error_setg(errp, "Need all of transport, portal and target options");
1825          ret = -EINVAL;
1826          goto out;
1827      }
1828  
1829      if (!strcmp(transport_name, "tcp")) {
1830  #if LIBISCSI_API_VERSION >= (20160603)
1831          transport = TCP_TRANSPORT;
1832      } else if (!strcmp(transport_name, "iser")) {
1833          transport = ISER_TRANSPORT;
1834  #else
1835          /* TCP is what older libiscsi versions always use */
1836  #endif
1837      } else {
1838          error_setg(errp, "Unknown transport: %s", transport_name);
1839          ret = -EINVAL;
1840          goto out;
1841      }
1842  
1843      memset(iscsilun, 0, sizeof(IscsiLun));
1844  
1845      initiator_name = get_initiator_name(opts);
1846  
1847      iscsi = iscsi_create_context(initiator_name);
1848      if (iscsi == NULL) {
1849          error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1850          ret = -ENOMEM;
1851          goto out;
1852      }
1853  #if LIBISCSI_API_VERSION >= (20160603)
1854      if (iscsi_init_transport(iscsi, transport)) {
1855          error_setg(errp, ("Error initializing transport."));
1856          ret = -EINVAL;
1857          goto out;
1858      }
1859  #endif
1860      if (iscsi_set_targetname(iscsi, target)) {
1861          error_setg(errp, "iSCSI: Failed to set target name.");
1862          ret = -EINVAL;
1863          goto out;
1864      }
1865  
1866      /* check if we got CHAP username/password via the options */
1867      apply_chap(iscsi, opts, &local_err);
1868      if (local_err != NULL) {
1869          error_propagate(errp, local_err);
1870          ret = -EINVAL;
1871          goto out;
1872      }
1873  
1874      if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1875          error_setg(errp, "iSCSI: Failed to set session type to normal.");
1876          ret = -EINVAL;
1877          goto out;
1878      }
1879  
1880      /* check if we got HEADER_DIGEST via the options */
1881      apply_header_digest(iscsi, opts, &local_err);
1882      if (local_err != NULL) {
1883          error_propagate(errp, local_err);
1884          ret = -EINVAL;
1885          goto out;
1886      }
1887  
1888      /* timeout handling is broken in libiscsi before 1.15.0 */
1889      timeout = qemu_opt_get_number(opts, "timeout", 0);
1890  #if LIBISCSI_API_VERSION >= 20150621
1891      iscsi_set_timeout(iscsi, timeout);
1892  #else
1893      if (timeout) {
1894          warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1895      }
1896  #endif
1897  
1898      if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
1899          error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1900              iscsi_get_error(iscsi));
1901          ret = -EINVAL;
1902          goto out;
1903      }
1904  
1905      iscsilun->iscsi = iscsi;
1906      iscsilun->aio_context = bdrv_get_aio_context(bs);
1907      iscsilun->lun = lun;
1908      iscsilun->has_write_same = true;
1909  
1910      task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1911                              (void **) &inq, errp);
1912      if (task == NULL) {
1913          ret = -EINVAL;
1914          goto out;
1915      }
1916      iscsilun->type = inq->periperal_device_type;
1917      scsi_free_scsi_task(task);
1918      task = NULL;
1919  
1920      iscsi_modesense_sync(iscsilun);
1921      if (iscsilun->dpofua) {
1922          bs->supported_write_flags = BDRV_REQ_FUA;
1923      }
1924  
1925      /* Check the write protect flag of the LUN if we want to write */
1926      if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1927          iscsilun->write_protected) {
1928          bdrv_graph_rdlock_main_loop();
1929          ret = bdrv_apply_auto_read_only(bs, "LUN is write protected", errp);
1930          bdrv_graph_rdunlock_main_loop();
1931          if (ret < 0) {
1932              goto out;
1933          }
1934          flags &= ~BDRV_O_RDWR;
1935      }
1936  
1937      iscsi_readcapacity_sync(iscsilun, &local_err);
1938      if (local_err != NULL) {
1939          error_propagate(errp, local_err);
1940          ret = -EINVAL;
1941          goto out;
1942      }
1943      bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1944  
1945      /* We don't have any emulation for devices other than disks and CD-ROMs, so
1946       * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1947       * will try to read from the device to guess the image format.
1948       */
1949      if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1950          bs->sg = true;
1951      }
1952  
1953      task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1954                              SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1955                              (void **) &inq_vpd, errp);
1956      if (task == NULL) {
1957          ret = -EINVAL;
1958          goto out;
1959      }
1960      for (i = 0; i < inq_vpd->num_pages; i++) {
1961          struct scsi_task *inq_task;
1962          struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1963          struct scsi_inquiry_block_limits *inq_bl;
1964          struct scsi_inquiry_device_identification *inq_di;
1965          switch (inq_vpd->pages[i]) {
1966          case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1967              inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1968                                          SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1969                                          (void **) &inq_lbp, errp);
1970              if (inq_task == NULL) {
1971                  ret = -EINVAL;
1972                  goto out;
1973              }
1974              memcpy(&iscsilun->lbp, inq_lbp,
1975                     sizeof(struct scsi_inquiry_logical_block_provisioning));
1976              scsi_free_scsi_task(inq_task);
1977              break;
1978          case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1979              inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1980                                      SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1981                                      (void **) &inq_bl, errp);
1982              if (inq_task == NULL) {
1983                  ret = -EINVAL;
1984                  goto out;
1985              }
1986              memcpy(&iscsilun->bl, inq_bl,
1987                     sizeof(struct scsi_inquiry_block_limits));
1988              scsi_free_scsi_task(inq_task);
1989              break;
1990          case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
1991              inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1992                                      SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
1993                                      (void **) &inq_di, errp);
1994              if (inq_task == NULL) {
1995                  ret = -EINVAL;
1996                  goto out;
1997              }
1998              iscsi_save_designator(iscsilun, inq_di);
1999              scsi_free_scsi_task(inq_task);
2000              break;
2001          default:
2002              break;
2003          }
2004      }
2005      scsi_free_scsi_task(task);
2006      task = NULL;
2007  
2008      qemu_mutex_init(&iscsilun->mutex);
2009      iscsi_attach_aio_context(bs, iscsilun->aio_context);
2010  
2011      /* Guess the internal cluster (page) size of the iscsi target by the means
2012       * of opt_unmap_gran. Transfer the unmap granularity only if it has a
2013       * reasonable size */
2014      if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
2015          iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
2016          iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran *
2017              iscsilun->block_size;
2018          if (iscsilun->lbprz) {
2019              ret = iscsi_allocmap_init(iscsilun, flags);
2020          }
2021      }
2022  
2023      if (iscsilun->lbprz && iscsilun->lbp.lbpws) {
2024          bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
2025      }
2026  
2027  out:
2028      qemu_opts_del(opts);
2029      g_free(initiator_name);
2030      if (task != NULL) {
2031          scsi_free_scsi_task(task);
2032      }
2033  
2034      if (ret) {
2035          if (iscsi != NULL) {
2036              if (iscsi_is_logged_in(iscsi)) {
2037                  iscsi_logout_sync(iscsi);
2038              }
2039              iscsi_destroy_context(iscsi);
2040          }
2041          memset(iscsilun, 0, sizeof(IscsiLun));
2042      }
2043  
2044      return ret;
2045  }
2046  
iscsi_close(BlockDriverState * bs)2047  static void iscsi_close(BlockDriverState *bs)
2048  {
2049      IscsiLun *iscsilun = bs->opaque;
2050      struct iscsi_context *iscsi = iscsilun->iscsi;
2051  
2052      iscsi_detach_aio_context(bs);
2053      if (iscsi_is_logged_in(iscsi)) {
2054          iscsi_logout_sync(iscsi);
2055      }
2056      iscsi_destroy_context(iscsi);
2057      if (iscsilun->dd) {
2058          g_free(iscsilun->dd->designator);
2059          g_free(iscsilun->dd);
2060      }
2061      g_free(iscsilun->zeroblock);
2062      iscsi_allocmap_free(iscsilun);
2063      qemu_mutex_destroy(&iscsilun->mutex);
2064      memset(iscsilun, 0, sizeof(IscsiLun));
2065  }
2066  
iscsi_refresh_limits(BlockDriverState * bs,Error ** errp)2067  static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
2068  {
2069      /* We don't actually refresh here, but just return data queried in
2070       * iscsi_open(): iscsi targets don't change their limits. */
2071  
2072      IscsiLun *iscsilun = bs->opaque;
2073      uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
2074      unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
2075  
2076      assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bdrv_is_sg(bs));
2077  
2078      bs->bl.request_alignment = block_size;
2079  
2080      if (iscsilun->bl.max_xfer_len) {
2081          max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
2082      }
2083  
2084      if (max_xfer_len * block_size < INT_MAX) {
2085          bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
2086      }
2087  
2088      if (iscsilun->lbp.lbpu) {
2089          bs->bl.max_pdiscard =
2090              MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size,
2091                           (uint64_t)UINT32_MAX * iscsilun->block_size);
2092          bs->bl.pdiscard_alignment =
2093              iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2094      } else {
2095          bs->bl.pdiscard_alignment = iscsilun->block_size;
2096      }
2097  
2098      bs->bl.max_pwrite_zeroes =
2099          MIN_NON_ZERO(iscsilun->bl.max_ws_len * iscsilun->block_size,
2100                       max_xfer_len * iscsilun->block_size);
2101  
2102      if (iscsilun->lbp.lbpws) {
2103          bs->bl.pwrite_zeroes_alignment =
2104              iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
2105      } else {
2106          bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
2107      }
2108      if (iscsilun->bl.opt_xfer_len &&
2109          iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
2110          bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
2111                                          iscsilun->block_size);
2112      }
2113  }
2114  
2115  /* Note that this will not re-establish a connection with an iSCSI target - it
2116   * is effectively a NOP.  */
iscsi_reopen_prepare(BDRVReopenState * state,BlockReopenQueue * queue,Error ** errp)2117  static int iscsi_reopen_prepare(BDRVReopenState *state,
2118                                  BlockReopenQueue *queue, Error **errp)
2119  {
2120      IscsiLun *iscsilun = state->bs->opaque;
2121  
2122      if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
2123          error_setg(errp, "Cannot open a write protected LUN as read-write");
2124          return -EACCES;
2125      }
2126      return 0;
2127  }
2128  
iscsi_reopen_commit(BDRVReopenState * reopen_state)2129  static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
2130  {
2131      IscsiLun *iscsilun = reopen_state->bs->opaque;
2132  
2133      /* the cache.direct status might have changed */
2134      if (iscsilun->allocmap != NULL) {
2135          iscsi_allocmap_init(iscsilun, reopen_state->flags);
2136      }
2137  }
2138  
iscsi_co_truncate(BlockDriverState * bs,int64_t offset,bool exact,PreallocMode prealloc,BdrvRequestFlags flags,Error ** errp)2139  static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
2140                                            bool exact, PreallocMode prealloc,
2141                                            BdrvRequestFlags flags, Error **errp)
2142  {
2143      IscsiLun *iscsilun = bs->opaque;
2144      int64_t cur_length;
2145      Error *local_err = NULL;
2146  
2147      if (prealloc != PREALLOC_MODE_OFF) {
2148          error_setg(errp, "Unsupported preallocation mode '%s'",
2149                     PreallocMode_str(prealloc));
2150          return -ENOTSUP;
2151      }
2152  
2153      if (iscsilun->type != TYPE_DISK) {
2154          error_setg(errp, "Cannot resize non-disk iSCSI devices");
2155          return -ENOTSUP;
2156      }
2157  
2158      iscsi_readcapacity_sync(iscsilun, &local_err);
2159      if (local_err != NULL) {
2160          error_propagate(errp, local_err);
2161          return -EIO;
2162      }
2163  
2164      cur_length = iscsi_co_getlength(bs);
2165      if (offset != cur_length && exact) {
2166          error_setg(errp, "Cannot resize iSCSI devices");
2167          return -ENOTSUP;
2168      } else if (offset > cur_length) {
2169          error_setg(errp, "Cannot grow iSCSI devices");
2170          return -EINVAL;
2171      }
2172  
2173      if (iscsilun->allocmap != NULL) {
2174          iscsi_allocmap_init(iscsilun, bs->open_flags);
2175      }
2176  
2177      return 0;
2178  }
2179  
2180  static int coroutine_fn
iscsi_co_get_info(BlockDriverState * bs,BlockDriverInfo * bdi)2181  iscsi_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2182  {
2183      IscsiLun *iscsilun = bs->opaque;
2184      bdi->cluster_size = iscsilun->cluster_size;
2185      return 0;
2186  }
2187  
iscsi_co_invalidate_cache(BlockDriverState * bs,Error ** errp)2188  static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
2189                                                     Error **errp)
2190  {
2191      IscsiLun *iscsilun = bs->opaque;
2192      iscsi_allocmap_invalidate(iscsilun);
2193  }
2194  
2195  static int coroutine_fn GRAPH_RDLOCK
iscsi_co_copy_range_from(BlockDriverState * bs,BdrvChild * src,int64_t src_offset,BdrvChild * dst,int64_t dst_offset,int64_t bytes,BdrvRequestFlags read_flags,BdrvRequestFlags write_flags)2196  iscsi_co_copy_range_from(BlockDriverState *bs,
2197                           BdrvChild *src, int64_t src_offset,
2198                           BdrvChild *dst, int64_t dst_offset,
2199                           int64_t bytes, BdrvRequestFlags read_flags,
2200                           BdrvRequestFlags write_flags)
2201  {
2202      return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
2203                                   read_flags, write_flags);
2204  }
2205  
iscsi_xcopy_task(int param_len)2206  static struct scsi_task *iscsi_xcopy_task(int param_len)
2207  {
2208      struct scsi_task *task;
2209  
2210      task = g_new0(struct scsi_task, 1);
2211  
2212      task->cdb[0]     = EXTENDED_COPY;
2213      task->cdb[10]    = (param_len >> 24) & 0xFF;
2214      task->cdb[11]    = (param_len >> 16) & 0xFF;
2215      task->cdb[12]    = (param_len >> 8) & 0xFF;
2216      task->cdb[13]    = param_len & 0xFF;
2217      task->cdb_size   = 16;
2218      task->xfer_dir   = SCSI_XFER_WRITE;
2219      task->expxferlen = param_len;
2220  
2221      return task;
2222  }
2223  
iscsi_populate_target_desc(unsigned char * desc,IscsiLun * lun)2224  static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
2225  {
2226      struct scsi_inquiry_device_designator *dd = lun->dd;
2227  
2228      memset(desc, 0, 32);
2229      desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */
2230      desc[4] = dd->code_set;
2231      desc[5] = (dd->designator_type & 0xF)
2232          | ((dd->association & 3) << 4);
2233      desc[7] = dd->designator_length;
2234      memcpy(desc + 8, dd->designator, MIN(dd->designator_length, 20));
2235  
2236      desc[28] = 0;
2237      desc[29] = (lun->block_size >> 16) & 0xFF;
2238      desc[30] = (lun->block_size >> 8) & 0xFF;
2239      desc[31] = lun->block_size & 0xFF;
2240  }
2241  
iscsi_xcopy_desc_hdr(uint8_t * hdr,int dc,int cat,int src_index,int dst_index)2242  static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
2243                                   int dst_index)
2244  {
2245      hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
2246      hdr[1] = ((dc << 1) | cat) & 0xFF;
2247      hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
2248      /* don't account for the first 4 bytes in descriptor header*/
2249      hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF;
2250      hdr[4] = (src_index >> 8) & 0xFF;
2251      hdr[5] = src_index & 0xFF;
2252      hdr[6] = (dst_index >> 8) & 0xFF;
2253      hdr[7] = dst_index & 0xFF;
2254  }
2255  
iscsi_xcopy_populate_desc(uint8_t * desc,int dc,int cat,int src_index,int dst_index,int num_blks,uint64_t src_lba,uint64_t dst_lba)2256  static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
2257                                        int src_index, int dst_index, int num_blks,
2258                                        uint64_t src_lba, uint64_t dst_lba)
2259  {
2260      iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
2261  
2262      /* The caller should verify the request size */
2263      assert(num_blks < 65536);
2264      desc[10] = (num_blks >> 8) & 0xFF;
2265      desc[11] = num_blks & 0xFF;
2266      desc[12] = (src_lba >> 56) & 0xFF;
2267      desc[13] = (src_lba >> 48) & 0xFF;
2268      desc[14] = (src_lba >> 40) & 0xFF;
2269      desc[15] = (src_lba >> 32) & 0xFF;
2270      desc[16] = (src_lba >> 24) & 0xFF;
2271      desc[17] = (src_lba >> 16) & 0xFF;
2272      desc[18] = (src_lba >> 8) & 0xFF;
2273      desc[19] = src_lba & 0xFF;
2274      desc[20] = (dst_lba >> 56) & 0xFF;
2275      desc[21] = (dst_lba >> 48) & 0xFF;
2276      desc[22] = (dst_lba >> 40) & 0xFF;
2277      desc[23] = (dst_lba >> 32) & 0xFF;
2278      desc[24] = (dst_lba >> 24) & 0xFF;
2279      desc[25] = (dst_lba >> 16) & 0xFF;
2280      desc[26] = (dst_lba >> 8) & 0xFF;
2281      desc[27] = dst_lba & 0xFF;
2282  }
2283  
iscsi_xcopy_populate_header(unsigned char * buf,int list_id,int str,int list_id_usage,int prio,int tgt_desc_len,int seg_desc_len,int inline_data_len)2284  static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str,
2285                                          int list_id_usage, int prio,
2286                                          int tgt_desc_len,
2287                                          int seg_desc_len, int inline_data_len)
2288  {
2289      buf[0] = list_id;
2290      buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
2291      buf[2] = (tgt_desc_len >> 8) & 0xFF;
2292      buf[3] = tgt_desc_len & 0xFF;
2293      buf[8] = (seg_desc_len >> 24) & 0xFF;
2294      buf[9] = (seg_desc_len >> 16) & 0xFF;
2295      buf[10] = (seg_desc_len >> 8) & 0xFF;
2296      buf[11] = seg_desc_len & 0xFF;
2297      buf[12] = (inline_data_len >> 24) & 0xFF;
2298      buf[13] = (inline_data_len >> 16) & 0xFF;
2299      buf[14] = (inline_data_len >> 8) & 0xFF;
2300      buf[15] = inline_data_len & 0xFF;
2301  }
2302  
iscsi_xcopy_data(struct iscsi_data * data,IscsiLun * src,int64_t src_lba,IscsiLun * dst,int64_t dst_lba,uint16_t num_blocks)2303  static void iscsi_xcopy_data(struct iscsi_data *data,
2304                               IscsiLun *src, int64_t src_lba,
2305                               IscsiLun *dst, int64_t dst_lba,
2306                               uint16_t num_blocks)
2307  {
2308      uint8_t *buf;
2309      const int src_offset = XCOPY_DESC_OFFSET;
2310      const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE;
2311      const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE;
2312  
2313      data->size = XCOPY_DESC_OFFSET +
2314                   IDENT_DESCR_TGT_DESCR_SIZE * 2 +
2315                   XCOPY_BLK2BLK_SEG_DESC_SIZE;
2316      data->data = g_malloc0(data->size);
2317      buf = data->data;
2318  
2319      /* Initialise the parameter list header */
2320      iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */,
2321                                  0, 2 * IDENT_DESCR_TGT_DESCR_SIZE,
2322                                  XCOPY_BLK2BLK_SEG_DESC_SIZE,
2323                                  0);
2324  
2325      /* Initialise CSCD list with one src + one dst descriptor */
2326      iscsi_populate_target_desc(&buf[src_offset], src);
2327      iscsi_populate_target_desc(&buf[dst_offset], dst);
2328  
2329      /* Initialise one segment descriptor */
2330      iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks,
2331                                src_lba, dst_lba);
2332  }
2333  
2334  static int coroutine_fn GRAPH_RDLOCK
iscsi_co_copy_range_to(BlockDriverState * bs,BdrvChild * src,int64_t src_offset,BdrvChild * dst,int64_t dst_offset,int64_t bytes,BdrvRequestFlags read_flags,BdrvRequestFlags write_flags)2335  iscsi_co_copy_range_to(BlockDriverState *bs,
2336                         BdrvChild *src, int64_t src_offset,
2337                         BdrvChild *dst, int64_t dst_offset,
2338                         int64_t bytes, BdrvRequestFlags read_flags,
2339                         BdrvRequestFlags write_flags)
2340  {
2341      IscsiLun *dst_lun = dst->bs->opaque;
2342      IscsiLun *src_lun;
2343      struct IscsiTask iscsi_task;
2344      struct iscsi_data data;
2345      int r = 0;
2346      int block_size;
2347  
2348      if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) {
2349          return -ENOTSUP;
2350      }
2351      src_lun = src->bs->opaque;
2352  
2353      if (!src_lun->dd || !dst_lun->dd) {
2354          return -ENOTSUP;
2355      }
2356      if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) {
2357          return -ENOTSUP;
2358      }
2359      if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) {
2360          return -ENOTSUP;
2361      }
2362      if (dst_lun->block_size != src_lun->block_size ||
2363          !dst_lun->block_size) {
2364          return -ENOTSUP;
2365      }
2366  
2367      block_size = dst_lun->block_size;
2368      if (bytes / block_size > 65535) {
2369          return -ENOTSUP;
2370      }
2371  
2372      iscsi_xcopy_data(&data,
2373                       src_lun, src_offset / block_size,
2374                       dst_lun, dst_offset / block_size,
2375                       bytes / block_size);
2376  
2377      iscsi_co_init_iscsitask(dst_lun, &iscsi_task);
2378  
2379      qemu_mutex_lock(&dst_lun->mutex);
2380      iscsi_task.task = iscsi_xcopy_task(data.size);
2381  retry:
2382      if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun,
2383                                   iscsi_task.task, iscsi_co_generic_cb,
2384                                   &data,
2385                                   &iscsi_task) != 0) {
2386          r = -EIO;
2387          goto out_unlock;
2388      }
2389  
2390      iscsi_co_wait_for_task(&iscsi_task, dst_lun);
2391  
2392      if (iscsi_task.do_retry) {
2393          iscsi_task.complete = 0;
2394          goto retry;
2395      }
2396  
2397      if (iscsi_task.status != SCSI_STATUS_GOOD) {
2398          r = iscsi_task.err_code;
2399          goto out_unlock;
2400      }
2401  
2402  out_unlock:
2403  
2404      trace_iscsi_xcopy(src_lun, src_offset, dst_lun, dst_offset, bytes, r);
2405      g_free(iscsi_task.task);
2406      qemu_mutex_unlock(&dst_lun->mutex);
2407      g_free(iscsi_task.err_str);
2408      return r;
2409  }
2410  
2411  
2412  static const char *const iscsi_strong_runtime_opts[] = {
2413      "transport",
2414      "portal",
2415      "target",
2416      "user",
2417      "password",
2418      "password-secret",
2419      "lun",
2420      "initiator-name",
2421      "header-digest",
2422  
2423      NULL
2424  };
2425  
2426  static BlockDriver bdrv_iscsi = {
2427      .format_name     = "iscsi",
2428      .protocol_name   = "iscsi",
2429  
2430      .instance_size          = sizeof(IscsiLun),
2431      .bdrv_parse_filename    = iscsi_parse_filename,
2432      .bdrv_open              = iscsi_open,
2433      .bdrv_close             = iscsi_close,
2434      .bdrv_co_create_opts    = bdrv_co_create_opts_simple,
2435      .create_opts            = &bdrv_create_opts_simple,
2436      .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2437      .bdrv_reopen_commit     = iscsi_reopen_commit,
2438      .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache,
2439  
2440      .bdrv_co_getlength   = iscsi_co_getlength,
2441      .bdrv_co_get_info    = iscsi_co_get_info,
2442      .bdrv_co_truncate    = iscsi_co_truncate,
2443      .bdrv_refresh_limits = iscsi_refresh_limits,
2444  
2445      .bdrv_co_block_status  = iscsi_co_block_status,
2446      .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2447      .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
2448      .bdrv_co_copy_range_to  = iscsi_co_copy_range_to,
2449      .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2450      .bdrv_co_readv         = iscsi_co_readv,
2451      .bdrv_co_writev        = iscsi_co_writev,
2452      .bdrv_co_flush_to_disk = iscsi_co_flush,
2453  
2454  #ifdef __linux__
2455      .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2456  #endif
2457  
2458      .bdrv_detach_aio_context = iscsi_detach_aio_context,
2459      .bdrv_attach_aio_context = iscsi_attach_aio_context,
2460  
2461      .strong_runtime_opts = iscsi_strong_runtime_opts,
2462  };
2463  
2464  #if LIBISCSI_API_VERSION >= (20160603)
2465  static BlockDriver bdrv_iser = {
2466      .format_name     = "iser",
2467      .protocol_name   = "iser",
2468  
2469      .instance_size          = sizeof(IscsiLun),
2470      .bdrv_parse_filename    = iscsi_parse_filename,
2471      .bdrv_open              = iscsi_open,
2472      .bdrv_close             = iscsi_close,
2473      .bdrv_co_create_opts    = bdrv_co_create_opts_simple,
2474      .create_opts            = &bdrv_create_opts_simple,
2475      .bdrv_reopen_prepare    = iscsi_reopen_prepare,
2476      .bdrv_reopen_commit     = iscsi_reopen_commit,
2477      .bdrv_co_invalidate_cache  = iscsi_co_invalidate_cache,
2478  
2479      .bdrv_co_getlength   = iscsi_co_getlength,
2480      .bdrv_co_get_info    = iscsi_co_get_info,
2481      .bdrv_co_truncate    = iscsi_co_truncate,
2482      .bdrv_refresh_limits = iscsi_refresh_limits,
2483  
2484      .bdrv_co_block_status  = iscsi_co_block_status,
2485      .bdrv_co_pdiscard      = iscsi_co_pdiscard,
2486      .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
2487      .bdrv_co_copy_range_to  = iscsi_co_copy_range_to,
2488      .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
2489      .bdrv_co_readv         = iscsi_co_readv,
2490      .bdrv_co_writev        = iscsi_co_writev,
2491      .bdrv_co_flush_to_disk = iscsi_co_flush,
2492  
2493  #ifdef __linux__
2494      .bdrv_aio_ioctl   = iscsi_aio_ioctl,
2495  #endif
2496  
2497      .bdrv_detach_aio_context = iscsi_detach_aio_context,
2498      .bdrv_attach_aio_context = iscsi_attach_aio_context,
2499  
2500      .strong_runtime_opts = iscsi_strong_runtime_opts,
2501  };
2502  #endif
2503  
iscsi_block_init(void)2504  static void iscsi_block_init(void)
2505  {
2506      bdrv_register(&bdrv_iscsi);
2507  #if LIBISCSI_API_VERSION >= (20160603)
2508      bdrv_register(&bdrv_iser);
2509  #endif
2510  }
2511  
2512  block_init(iscsi_block_init);
2513