xref: /openbmc/qemu/hw/scsi/scsi-disk.c (revision 7d87775f)
1 /*
2  * SCSI Device emulation
3  *
4  * Copyright (c) 2006 CodeSourcery.
5  * Based on code by Fabrice Bellard
6  *
7  * Written by Paul Brook
8  * Modifications:
9  *  2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case
10  *                                 when the allocation length of CDB is smaller
11  *                                 than 36.
12  *  2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the
13  *                                 MODE SENSE response.
14  *
15  * This code is licensed under the LGPL.
16  *
17  * Note that this file only handles the SCSI architecture model and device
18  * commands.  Emulation of interface/link layer protocols is handled by
19  * the host adapter emulator.
20  */
21 
22 #include "qemu/osdep.h"
23 #include "qemu/units.h"
24 #include "qapi/error.h"
25 #include "qemu/error-report.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/module.h"
28 #include "qemu/hw-version.h"
29 #include "qemu/memalign.h"
30 #include "hw/scsi/scsi.h"
31 #include "migration/qemu-file-types.h"
32 #include "migration/vmstate.h"
33 #include "hw/scsi/emulation.h"
34 #include "scsi/constants.h"
35 #include "sysemu/arch_init.h"
36 #include "sysemu/block-backend.h"
37 #include "sysemu/blockdev.h"
38 #include "hw/block/block.h"
39 #include "hw/qdev-properties.h"
40 #include "hw/qdev-properties-system.h"
41 #include "sysemu/dma.h"
42 #include "sysemu/sysemu.h"
43 #include "qemu/cutils.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 #ifdef __linux
48 #include <scsi/sg.h>
49 #endif
50 
51 #define SCSI_WRITE_SAME_MAX         (512 * KiB)
52 #define SCSI_DMA_BUF_SIZE           (128 * KiB)
53 #define SCSI_MAX_INQUIRY_LEN        256
54 #define SCSI_MAX_MODE_LEN           256
55 
56 #define DEFAULT_DISCARD_GRANULARITY (4 * KiB)
57 #define DEFAULT_MAX_UNMAP_SIZE      (1 * GiB)
58 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
59 
60 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
61 
62 #define MAX_SERIAL_LEN              36
63 #define MAX_SERIAL_LEN_FOR_DEVID    20
64 
65 OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
66 
67 struct SCSIDiskClass {
68     SCSIDeviceClass parent_class;
69     /*
70      * Callbacks receive ret == 0 for success. Errors are represented either as
71      * negative errno values, or as positive SAM status codes.
72      *
73      * Beware: For errors returned in host_status, the function may directly
74      * complete the request and never call the callback.
75      */
76     DMAIOFunc       *dma_readv;
77     DMAIOFunc       *dma_writev;
78     bool            (*need_fua_emulation)(SCSICommand *cmd);
79     void            (*update_sense)(SCSIRequest *r);
80 };
81 
82 typedef struct SCSIDiskReq {
83     SCSIRequest req;
84     /* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes.  */
85     uint64_t sector;
86     uint32_t sector_count;
87     uint32_t buflen;
88     bool started;
89     bool need_fua_emulation;
90     struct iovec iov;
91     QEMUIOVector qiov;
92     BlockAcctCookie acct;
93 } SCSIDiskReq;
94 
95 #define SCSI_DISK_F_REMOVABLE             0
96 #define SCSI_DISK_F_DPOFUA                1
97 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
98 
99 struct SCSIDiskState {
100     SCSIDevice qdev;
101     uint32_t features;
102     bool media_changed;
103     bool media_event;
104     bool eject_request;
105     uint16_t port_index;
106     uint64_t max_unmap_size;
107     uint64_t max_io_size;
108     uint32_t quirks;
109     QEMUBH *bh;
110     char *version;
111     char *serial;
112     char *vendor;
113     char *product;
114     char *device_id;
115     char *loadparm;     /* only for s390x */
116     bool tray_open;
117     bool tray_locked;
118     /*
119      * 0x0000        - rotation rate not reported
120      * 0x0001        - non-rotating medium (SSD)
121      * 0x0002-0x0400 - reserved
122      * 0x0401-0xffe  - rotations per minute
123      * 0xffff        - reserved
124      */
125     uint16_t rotation_rate;
126     bool migrate_emulated_scsi_request;
127 };
128 
129 static void scsi_free_request(SCSIRequest *req)
130 {
131     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
132 
133     qemu_vfree(r->iov.iov_base);
134 }
135 
136 /* Helper function for command completion with sense.  */
137 static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense)
138 {
139     trace_scsi_disk_check_condition(r->req.tag, sense.key, sense.asc,
140                                     sense.ascq);
141     scsi_req_build_sense(&r->req, sense);
142     scsi_req_complete(&r->req, CHECK_CONDITION);
143 }
144 
145 static void scsi_init_iovec(SCSIDiskReq *r, size_t size)
146 {
147     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
148 
149     if (!r->iov.iov_base) {
150         r->buflen = size;
151         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
152     }
153     r->iov.iov_len = MIN(r->sector_count * BDRV_SECTOR_SIZE, r->buflen);
154     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
155 }
156 
157 static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
158 {
159     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
160 
161     qemu_put_be64s(f, &r->sector);
162     qemu_put_be32s(f, &r->sector_count);
163     qemu_put_be32s(f, &r->buflen);
164     if (r->buflen) {
165         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
166             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
167         } else if (!req->retry) {
168             uint32_t len = r->iov.iov_len;
169             qemu_put_be32s(f, &len);
170             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
171         }
172     }
173 }
174 
175 static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
176 {
177     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
178 
179     if (s->migrate_emulated_scsi_request) {
180         scsi_disk_save_request(f, req);
181     }
182 }
183 
184 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
185 {
186     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
187 
188     qemu_get_be64s(f, &r->sector);
189     qemu_get_be32s(f, &r->sector_count);
190     qemu_get_be32s(f, &r->buflen);
191     if (r->buflen) {
192         scsi_init_iovec(r, r->buflen);
193         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
194             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
195         } else if (!r->req.retry) {
196             uint32_t len;
197             qemu_get_be32s(f, &len);
198             r->iov.iov_len = len;
199             assert(r->iov.iov_len <= r->buflen);
200             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
201         }
202     }
203 
204     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
205 }
206 
207 static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
208 {
209     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
210 
211     if (s->migrate_emulated_scsi_request) {
212         scsi_disk_load_request(f, req);
213     }
214 }
215 
216 /*
217  * scsi_handle_rw_error has two return values.  False means that the error
218  * must be ignored, true means that the error has been processed and the
219  * caller should not do anything else for this request.  Note that
220  * scsi_handle_rw_error always manages its reference counts, independent
221  * of the return value.
222  */
223 static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
224 {
225     bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
226     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
227     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
228     SCSISense sense = SENSE_CODE(NO_SENSE);
229     int error;
230     bool req_has_sense = false;
231     BlockErrorAction action;
232     int status;
233 
234     if (ret < 0) {
235         status = scsi_sense_from_errno(-ret, &sense);
236         error = -ret;
237     } else {
238         /* A passthrough command has completed with nonzero status.  */
239         status = ret;
240         switch (status) {
241         case CHECK_CONDITION:
242             req_has_sense = true;
243             error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
244             break;
245         case RESERVATION_CONFLICT:
246             /*
247              * Don't apply the error policy, always report to the guest.
248              *
249              * This is a passthrough code path, so it's not a backend error, but
250              * a response to an invalid guest request.
251              *
252              * Windows Failover Cluster validation intentionally sends invalid
253              * requests to verify that reservations work as intended. It is
254              * crucial that it sees the resulting errors.
255              *
256              * Treating a reservation conflict as a guest-side error is obvious
257              * when a pr-manager is in use. Without one, the situation is less
258              * clear, but there might be nothing that can be fixed on the host
259              * (like in the above example), and we don't want to be stuck in a
260              * loop where resuming the VM and retrying the request immediately
261              * stops it again. So always reporting is still the safer option in
262              * this case, too.
263              */
264             error = 0;
265             break;
266         default:
267             error = EINVAL;
268             break;
269         }
270     }
271 
272     /*
273      * Check whether the error has to be handled by the guest or should
274      * rather follow the rerror=/werror= settings.  Guest-handled errors
275      * are usually retried immediately, so do not post them to QMP and
276      * do not account them as failed I/O.
277      */
278     if (!error || (req_has_sense &&
279                    scsi_sense_buf_is_guest_recoverable(r->req.sense,
280                                                        sizeof(r->req.sense)))) {
281         action = BLOCK_ERROR_ACTION_REPORT;
282         acct_failed = false;
283     } else {
284         action = blk_get_error_action(s->qdev.conf.blk, is_read, error);
285         blk_error_action(s->qdev.conf.blk, action, is_read, error);
286     }
287 
288     switch (action) {
289     case BLOCK_ERROR_ACTION_REPORT:
290         if (acct_failed) {
291             block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
292         }
293         if (req_has_sense) {
294             sdc->update_sense(&r->req);
295         } else if (status == CHECK_CONDITION) {
296             scsi_req_build_sense(&r->req, sense);
297         }
298         scsi_req_complete(&r->req, status);
299         return true;
300 
301     case BLOCK_ERROR_ACTION_IGNORE:
302         return false;
303 
304     case BLOCK_ERROR_ACTION_STOP:
305         scsi_req_retry(&r->req);
306         return true;
307 
308     default:
309         g_assert_not_reached();
310     }
311 }
312 
313 static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
314 {
315     if (r->req.io_canceled) {
316         scsi_req_cancel_complete(&r->req);
317         return true;
318     }
319 
320     if (ret != 0) {
321         return scsi_handle_rw_error(r, ret, acct_failed);
322     }
323 
324     return false;
325 }
326 
327 static void scsi_aio_complete(void *opaque, int ret)
328 {
329     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
330     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
331 
332     /* The request must only run in the BlockBackend's AioContext */
333     assert(blk_get_aio_context(s->qdev.conf.blk) ==
334            qemu_get_current_aio_context());
335 
336     assert(r->req.aiocb != NULL);
337     r->req.aiocb = NULL;
338 
339     if (scsi_disk_req_check_error(r, ret, true)) {
340         goto done;
341     }
342 
343     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
344     scsi_req_complete(&r->req, GOOD);
345 
346 done:
347     scsi_req_unref(&r->req);
348 }
349 
350 static bool scsi_is_cmd_fua(SCSICommand *cmd)
351 {
352     switch (cmd->buf[0]) {
353     case READ_10:
354     case READ_12:
355     case READ_16:
356     case WRITE_10:
357     case WRITE_12:
358     case WRITE_16:
359         return (cmd->buf[1] & 8) != 0;
360 
361     case VERIFY_10:
362     case VERIFY_12:
363     case VERIFY_16:
364     case WRITE_VERIFY_10:
365     case WRITE_VERIFY_12:
366     case WRITE_VERIFY_16:
367         return true;
368 
369     case READ_6:
370     case WRITE_6:
371     default:
372         return false;
373     }
374 }
375 
376 static void scsi_write_do_fua(SCSIDiskReq *r)
377 {
378     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
379 
380     assert(r->req.aiocb == NULL);
381     assert(!r->req.io_canceled);
382 
383     if (r->need_fua_emulation) {
384         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
385                          BLOCK_ACCT_FLUSH);
386         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
387         return;
388     }
389 
390     scsi_req_complete(&r->req, GOOD);
391     scsi_req_unref(&r->req);
392 }
393 
394 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
395 {
396     assert(r->req.aiocb == NULL);
397     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
398         goto done;
399     }
400 
401     r->sector += r->sector_count;
402     r->sector_count = 0;
403     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
404         scsi_write_do_fua(r);
405         return;
406     } else {
407         scsi_req_complete(&r->req, GOOD);
408     }
409 
410 done:
411     scsi_req_unref(&r->req);
412 }
413 
414 /* May not be called in all error cases, don't rely on cleanup here */
415 static void scsi_dma_complete(void *opaque, int ret)
416 {
417     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
418     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
419 
420     assert(r->req.aiocb != NULL);
421     r->req.aiocb = NULL;
422 
423     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
424     if (ret < 0) {
425         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
426     } else if (ret == 0) {
427         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
428     }
429     scsi_dma_complete_noio(r, ret);
430 }
431 
432 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
433 {
434     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
435     uint32_t n;
436 
437     /* The request must only run in the BlockBackend's AioContext */
438     assert(blk_get_aio_context(s->qdev.conf.blk) ==
439            qemu_get_current_aio_context());
440 
441     assert(r->req.aiocb == NULL);
442     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
443         goto done;
444     }
445 
446     n = r->qiov.size / BDRV_SECTOR_SIZE;
447     r->sector += n;
448     r->sector_count -= n;
449     scsi_req_data(&r->req, r->qiov.size);
450 
451 done:
452     scsi_req_unref(&r->req);
453 }
454 
455 /* May not be called in all error cases, don't rely on cleanup here */
456 static void scsi_read_complete(void *opaque, int ret)
457 {
458     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
459     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
460 
461     assert(r->req.aiocb != NULL);
462     r->req.aiocb = NULL;
463 
464     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
465     if (ret < 0) {
466         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
467     } else if (ret == 0) {
468         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
469         trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
470     }
471     scsi_read_complete_noio(r, ret);
472 }
473 
474 /* Actually issue a read to the block device.  */
475 static void scsi_do_read(SCSIDiskReq *r, int ret)
476 {
477     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
478     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
479 
480     assert (r->req.aiocb == NULL);
481     if (scsi_disk_req_check_error(r, ret, false)) {
482         goto done;
483     }
484 
485     /* The request is used as the AIO opaque value, so add a ref.  */
486     scsi_req_ref(&r->req);
487 
488     if (r->req.sg) {
489         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
490         r->req.residual -= r->req.sg->size;
491         r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
492                                   r->req.sg, r->sector << BDRV_SECTOR_BITS,
493                                   BDRV_SECTOR_SIZE,
494                                   sdc->dma_readv, r, scsi_dma_complete, r,
495                                   DMA_DIRECTION_FROM_DEVICE);
496     } else {
497         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
498         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
499                          r->qiov.size, BLOCK_ACCT_READ);
500         r->req.aiocb = sdc->dma_readv(r->sector << BDRV_SECTOR_BITS, &r->qiov,
501                                       scsi_read_complete, r, r);
502     }
503 
504 done:
505     scsi_req_unref(&r->req);
506 }
507 
508 static void scsi_do_read_cb(void *opaque, int ret)
509 {
510     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
511     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
512 
513     assert (r->req.aiocb != NULL);
514     r->req.aiocb = NULL;
515 
516     if (ret < 0) {
517         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
518     } else {
519         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
520     }
521     scsi_do_read(opaque, ret);
522 }
523 
524 /* Read more data from scsi device into buffer.  */
525 static void scsi_read_data(SCSIRequest *req)
526 {
527     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
528     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
529     bool first;
530 
531     trace_scsi_disk_read_data_count(r->sector_count);
532     if (r->sector_count == 0) {
533         /* This also clears the sense buffer for REQUEST SENSE.  */
534         scsi_req_complete(&r->req, GOOD);
535         return;
536     }
537 
538     /* No data transfer may already be in progress */
539     assert(r->req.aiocb == NULL);
540 
541     /* The request is used as the AIO opaque value, so add a ref.  */
542     scsi_req_ref(&r->req);
543     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
544         trace_scsi_disk_read_data_invalid();
545         scsi_read_complete_noio(r, -EINVAL);
546         return;
547     }
548 
549     if (!blk_is_available(req->dev->conf.blk)) {
550         scsi_read_complete_noio(r, -ENOMEDIUM);
551         return;
552     }
553 
554     first = !r->started;
555     r->started = true;
556     if (first && r->need_fua_emulation) {
557         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
558                          BLOCK_ACCT_FLUSH);
559         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
560     } else {
561         scsi_do_read(r, 0);
562     }
563 }
564 
565 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
566 {
567     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
568     uint32_t n;
569 
570     /* The request must only run in the BlockBackend's AioContext */
571     assert(blk_get_aio_context(s->qdev.conf.blk) ==
572            qemu_get_current_aio_context());
573 
574     assert (r->req.aiocb == NULL);
575     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
576         goto done;
577     }
578 
579     n = r->qiov.size / BDRV_SECTOR_SIZE;
580     r->sector += n;
581     r->sector_count -= n;
582     if (r->sector_count == 0) {
583         scsi_write_do_fua(r);
584         return;
585     } else {
586         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
587         trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
588         scsi_req_data(&r->req, r->qiov.size);
589     }
590 
591 done:
592     scsi_req_unref(&r->req);
593 }
594 
595 /* May not be called in all error cases, don't rely on cleanup here */
596 static void scsi_write_complete(void * opaque, int ret)
597 {
598     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
599     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
600 
601     assert (r->req.aiocb != NULL);
602     r->req.aiocb = NULL;
603 
604     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
605     if (ret < 0) {
606         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
607     } else if (ret == 0) {
608         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
609     }
610     scsi_write_complete_noio(r, ret);
611 }
612 
613 static void scsi_write_data(SCSIRequest *req)
614 {
615     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
616     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
617     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
618 
619     /* No data transfer may already be in progress */
620     assert(r->req.aiocb == NULL);
621 
622     /* The request is used as the AIO opaque value, so add a ref.  */
623     scsi_req_ref(&r->req);
624     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
625         trace_scsi_disk_write_data_invalid();
626         scsi_write_complete_noio(r, -EINVAL);
627         return;
628     }
629 
630     if (!r->req.sg && !r->qiov.size) {
631         /* Called for the first time.  Ask the driver to send us more data.  */
632         r->started = true;
633         scsi_write_complete_noio(r, 0);
634         return;
635     }
636     if (!blk_is_available(req->dev->conf.blk)) {
637         scsi_write_complete_noio(r, -ENOMEDIUM);
638         return;
639     }
640 
641     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
642         r->req.cmd.buf[0] == VERIFY_16) {
643         if (r->req.sg) {
644             scsi_dma_complete_noio(r, 0);
645         } else {
646             scsi_write_complete_noio(r, 0);
647         }
648         return;
649     }
650 
651     if (r->req.sg) {
652         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
653         r->req.residual -= r->req.sg->size;
654         r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
655                                   r->req.sg, r->sector << BDRV_SECTOR_BITS,
656                                   BDRV_SECTOR_SIZE,
657                                   sdc->dma_writev, r, scsi_dma_complete, r,
658                                   DMA_DIRECTION_TO_DEVICE);
659     } else {
660         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
661                          r->qiov.size, BLOCK_ACCT_WRITE);
662         r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
663                                        scsi_write_complete, r, r);
664     }
665 }
666 
667 /* Return a pointer to the data buffer.  */
668 static uint8_t *scsi_get_buf(SCSIRequest *req)
669 {
670     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
671 
672     return (uint8_t *)r->iov.iov_base;
673 }
674 
675 static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
676 {
677     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
678     uint8_t page_code = req->cmd.buf[2];
679     int start, buflen = 0;
680 
681     outbuf[buflen++] = s->qdev.type & 0x1f;
682     outbuf[buflen++] = page_code;
683     outbuf[buflen++] = 0x00;
684     outbuf[buflen++] = 0x00;
685     start = buflen;
686 
687     switch (page_code) {
688     case 0x00: /* Supported page codes, mandatory */
689     {
690         trace_scsi_disk_emulate_vpd_page_00(req->cmd.xfer);
691         outbuf[buflen++] = 0x00; /* list of supported pages (this page) */
692         if (s->serial) {
693             outbuf[buflen++] = 0x80; /* unit serial number */
694         }
695         outbuf[buflen++] = 0x83; /* device identification */
696         if (s->qdev.type == TYPE_DISK) {
697             outbuf[buflen++] = 0xb0; /* block limits */
698             outbuf[buflen++] = 0xb1; /* block device characteristics */
699             outbuf[buflen++] = 0xb2; /* thin provisioning */
700         }
701         break;
702     }
703     case 0x80: /* Device serial number, optional */
704     {
705         int l;
706 
707         if (!s->serial) {
708             trace_scsi_disk_emulate_vpd_page_80_not_supported();
709             return -1;
710         }
711 
712         l = strlen(s->serial);
713         if (l > MAX_SERIAL_LEN) {
714             l = MAX_SERIAL_LEN;
715         }
716 
717         trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer);
718         memcpy(outbuf + buflen, s->serial, l);
719         buflen += l;
720         break;
721     }
722 
723     case 0x83: /* Device identification page, mandatory */
724     {
725         int id_len = s->device_id ? MIN(strlen(s->device_id), 255 - 8) : 0;
726 
727         trace_scsi_disk_emulate_vpd_page_83(req->cmd.xfer);
728 
729         if (id_len) {
730             outbuf[buflen++] = 0x2; /* ASCII */
731             outbuf[buflen++] = 0;   /* not officially assigned */
732             outbuf[buflen++] = 0;   /* reserved */
733             outbuf[buflen++] = id_len; /* length of data following */
734             memcpy(outbuf + buflen, s->device_id, id_len);
735             buflen += id_len;
736         }
737 
738         if (s->qdev.wwn) {
739             outbuf[buflen++] = 0x1; /* Binary */
740             outbuf[buflen++] = 0x3; /* NAA */
741             outbuf[buflen++] = 0;   /* reserved */
742             outbuf[buflen++] = 8;
743             stq_be_p(&outbuf[buflen], s->qdev.wwn);
744             buflen += 8;
745         }
746 
747         if (s->qdev.port_wwn) {
748             outbuf[buflen++] = 0x61; /* SAS / Binary */
749             outbuf[buflen++] = 0x93; /* PIV / Target port / NAA */
750             outbuf[buflen++] = 0;    /* reserved */
751             outbuf[buflen++] = 8;
752             stq_be_p(&outbuf[buflen], s->qdev.port_wwn);
753             buflen += 8;
754         }
755 
756         if (s->port_index) {
757             outbuf[buflen++] = 0x61; /* SAS / Binary */
758 
759             /* PIV/Target port/relative target port */
760             outbuf[buflen++] = 0x94;
761 
762             outbuf[buflen++] = 0;    /* reserved */
763             outbuf[buflen++] = 4;
764             stw_be_p(&outbuf[buflen + 2], s->port_index);
765             buflen += 4;
766         }
767         break;
768     }
769     case 0xb0: /* block limits */
770     {
771         SCSIBlockLimits bl = {};
772 
773         if (s->qdev.type == TYPE_ROM) {
774             trace_scsi_disk_emulate_vpd_page_b0_not_supported();
775             return -1;
776         }
777         bl.wsnz = 1;
778         bl.unmap_sectors =
779             s->qdev.conf.discard_granularity / s->qdev.blocksize;
780         bl.min_io_size =
781             s->qdev.conf.min_io_size / s->qdev.blocksize;
782         bl.opt_io_size =
783             s->qdev.conf.opt_io_size / s->qdev.blocksize;
784         bl.max_unmap_sectors =
785             s->max_unmap_size / s->qdev.blocksize;
786         bl.max_io_sectors =
787             s->max_io_size / s->qdev.blocksize;
788         /* 255 descriptors fit in 4 KiB with an 8-byte header */
789         bl.max_unmap_descr = 255;
790 
791         if (s->qdev.type == TYPE_DISK) {
792             int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
793             int max_io_sectors_blk =
794                 max_transfer_blk / s->qdev.blocksize;
795 
796             bl.max_io_sectors =
797                 MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
798         }
799         buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
800         break;
801     }
802     case 0xb1: /* block device characteristics */
803     {
804         buflen = 0x40;
805         outbuf[4] = (s->rotation_rate >> 8) & 0xff;
806         outbuf[5] = s->rotation_rate & 0xff;
807         outbuf[6] = 0; /* PRODUCT TYPE */
808         outbuf[7] = 0; /* WABEREQ | WACEREQ | NOMINAL FORM FACTOR */
809         outbuf[8] = 0; /* VBULS */
810         break;
811     }
812     case 0xb2: /* thin provisioning */
813     {
814         buflen = 8;
815         outbuf[4] = 0;
816         outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */
817         outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
818         outbuf[7] = 0;
819         break;
820     }
821     default:
822         return -1;
823     }
824     /* done with EVPD */
825     assert(buflen - start <= 255);
826     outbuf[start - 1] = buflen - start;
827     return buflen;
828 }
829 
830 static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
831 {
832     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
833     int buflen = 0;
834 
835     if (req->cmd.buf[1] & 0x1) {
836         /* Vital product data */
837         return scsi_disk_emulate_vpd_page(req, outbuf);
838     }
839 
840     /* Standard INQUIRY data */
841     if (req->cmd.buf[2] != 0) {
842         return -1;
843     }
844 
845     /* PAGE CODE == 0 */
846     buflen = req->cmd.xfer;
847     if (buflen > SCSI_MAX_INQUIRY_LEN) {
848         buflen = SCSI_MAX_INQUIRY_LEN;
849     }
850 
851     outbuf[0] = s->qdev.type & 0x1f;
852     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
853 
854     strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
855     strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
856 
857     memset(&outbuf[32], 0, 4);
858     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
859     /*
860      * We claim conformance to SPC-3, which is required for guests
861      * to ask for modern features like READ CAPACITY(16) or the
862      * block characteristics VPD page by default.  Not all of SPC-3
863      * is actually implemented, but we're good enough.
864      */
865     outbuf[2] = s->qdev.default_scsi_version;
866     outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
867 
868     if (buflen > 36) {
869         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
870     } else {
871         /* If the allocation length of CDB is too small,
872                the additional length is not adjusted */
873         outbuf[4] = 36 - 5;
874     }
875 
876     /* Sync data transfer and TCQ.  */
877     outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
878     return buflen;
879 }
880 
881 static inline bool media_is_dvd(SCSIDiskState *s)
882 {
883     uint64_t nb_sectors;
884     if (s->qdev.type != TYPE_ROM) {
885         return false;
886     }
887     if (!blk_is_available(s->qdev.conf.blk)) {
888         return false;
889     }
890     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
891     return nb_sectors > CD_MAX_SECTORS;
892 }
893 
894 static inline bool media_is_cd(SCSIDiskState *s)
895 {
896     uint64_t nb_sectors;
897     if (s->qdev.type != TYPE_ROM) {
898         return false;
899     }
900     if (!blk_is_available(s->qdev.conf.blk)) {
901         return false;
902     }
903     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
904     return nb_sectors <= CD_MAX_SECTORS;
905 }
906 
907 static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
908                                       uint8_t *outbuf)
909 {
910     uint8_t type = r->req.cmd.buf[1] & 7;
911 
912     if (s->qdev.type != TYPE_ROM) {
913         return -1;
914     }
915 
916     /* Types 1/2 are only defined for Blu-Ray.  */
917     if (type != 0) {
918         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
919         return -1;
920     }
921 
922     memset(outbuf, 0, 34);
923     outbuf[1] = 32;
924     outbuf[2] = 0xe; /* last session complete, disc finalized */
925     outbuf[3] = 1;   /* first track on disc */
926     outbuf[4] = 1;   /* # of sessions */
927     outbuf[5] = 1;   /* first track of last session */
928     outbuf[6] = 1;   /* last track of last session */
929     outbuf[7] = 0x20; /* unrestricted use */
930     outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
931     /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
932     /* 12-23: not meaningful for CD-ROM or DVD-ROM */
933     /* 24-31: disc bar code */
934     /* 32: disc application code */
935     /* 33: number of OPC tables */
936 
937     return 34;
938 }
939 
940 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
941                                    uint8_t *outbuf)
942 {
943     static const int rds_caps_size[5] = {
944         [0] = 2048 + 4,
945         [1] = 4 + 4,
946         [3] = 188 + 4,
947         [4] = 2048 + 4,
948     };
949 
950     uint8_t media = r->req.cmd.buf[1];
951     uint8_t layer = r->req.cmd.buf[6];
952     uint8_t format = r->req.cmd.buf[7];
953     int size = -1;
954 
955     if (s->qdev.type != TYPE_ROM) {
956         return -1;
957     }
958     if (media != 0) {
959         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
960         return -1;
961     }
962 
963     if (format != 0xff) {
964         if (!blk_is_available(s->qdev.conf.blk)) {
965             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
966             return -1;
967         }
968         if (media_is_cd(s)) {
969             scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
970             return -1;
971         }
972         if (format >= ARRAY_SIZE(rds_caps_size)) {
973             return -1;
974         }
975         size = rds_caps_size[format];
976         memset(outbuf, 0, size);
977     }
978 
979     switch (format) {
980     case 0x00: {
981         /* Physical format information */
982         uint64_t nb_sectors;
983         if (layer != 0) {
984             goto fail;
985         }
986         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
987 
988         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
989         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
990         outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
991         outbuf[7] = 0;   /* default densities */
992 
993         stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
994         stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
995         break;
996     }
997 
998     case 0x01: /* DVD copyright information, all zeros */
999         break;
1000 
1001     case 0x03: /* BCA information - invalid field for no BCA info */
1002         return -1;
1003 
1004     case 0x04: /* DVD disc manufacturing information, all zeros */
1005         break;
1006 
1007     case 0xff: { /* List capabilities */
1008         int i;
1009         size = 4;
1010         for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
1011             if (!rds_caps_size[i]) {
1012                 continue;
1013             }
1014             outbuf[size] = i;
1015             outbuf[size + 1] = 0x40; /* Not writable, readable */
1016             stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
1017             size += 4;
1018         }
1019         break;
1020      }
1021 
1022     default:
1023         return -1;
1024     }
1025 
1026     /* Size of buffer, not including 2 byte size field */
1027     stw_be_p(outbuf, size - 2);
1028     return size;
1029 
1030 fail:
1031     return -1;
1032 }
1033 
1034 static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
1035 {
1036     uint8_t event_code, media_status;
1037 
1038     media_status = 0;
1039     if (s->tray_open) {
1040         media_status = MS_TRAY_OPEN;
1041     } else if (blk_is_inserted(s->qdev.conf.blk)) {
1042         media_status = MS_MEDIA_PRESENT;
1043     }
1044 
1045     /* Event notification descriptor */
1046     event_code = MEC_NO_CHANGE;
1047     if (media_status != MS_TRAY_OPEN) {
1048         if (s->media_event) {
1049             event_code = MEC_NEW_MEDIA;
1050             s->media_event = false;
1051         } else if (s->eject_request) {
1052             event_code = MEC_EJECT_REQUESTED;
1053             s->eject_request = false;
1054         }
1055     }
1056 
1057     outbuf[0] = event_code;
1058     outbuf[1] = media_status;
1059 
1060     /* These fields are reserved, just clear them. */
1061     outbuf[2] = 0;
1062     outbuf[3] = 0;
1063     return 4;
1064 }
1065 
1066 static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
1067                                               uint8_t *outbuf)
1068 {
1069     int size;
1070     uint8_t *buf = r->req.cmd.buf;
1071     uint8_t notification_class_request = buf[4];
1072     if (s->qdev.type != TYPE_ROM) {
1073         return -1;
1074     }
1075     if ((buf[1] & 1) == 0) {
1076         /* asynchronous */
1077         return -1;
1078     }
1079 
1080     size = 4;
1081     outbuf[0] = outbuf[1] = 0;
1082     outbuf[3] = 1 << GESN_MEDIA; /* supported events */
1083     if (notification_class_request & (1 << GESN_MEDIA)) {
1084         outbuf[2] = GESN_MEDIA;
1085         size += scsi_event_status_media(s, &outbuf[size]);
1086     } else {
1087         outbuf[2] = 0x80;
1088     }
1089     stw_be_p(outbuf, size - 4);
1090     return size;
1091 }
1092 
1093 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
1094 {
1095     int current;
1096 
1097     if (s->qdev.type != TYPE_ROM) {
1098         return -1;
1099     }
1100 
1101     if (media_is_dvd(s)) {
1102         current = MMC_PROFILE_DVD_ROM;
1103     } else if (media_is_cd(s)) {
1104         current = MMC_PROFILE_CD_ROM;
1105     } else {
1106         current = MMC_PROFILE_NONE;
1107     }
1108 
1109     memset(outbuf, 0, 40);
1110     stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
1111     stw_be_p(&outbuf[6], current);
1112     /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
1113     outbuf[10] = 0x03; /* persistent, current */
1114     outbuf[11] = 8; /* two profiles */
1115     stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
1116     outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
1117     stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
1118     outbuf[18] = (current == MMC_PROFILE_CD_ROM);
1119     /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
1120     stw_be_p(&outbuf[20], 1);
1121     outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
1122     outbuf[23] = 8;
1123     stl_be_p(&outbuf[24], 1); /* SCSI */
1124     outbuf[28] = 1; /* DBE = 1, mandatory */
1125     /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
1126     stw_be_p(&outbuf[32], 3);
1127     outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
1128     outbuf[35] = 4;
1129     outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
1130     /* TODO: Random readable, CD read, DVD read, drive serial number,
1131        power management */
1132     return 40;
1133 }
1134 
1135 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
1136 {
1137     if (s->qdev.type != TYPE_ROM) {
1138         return -1;
1139     }
1140     memset(outbuf, 0, 8);
1141     outbuf[5] = 1; /* CD-ROM */
1142     return 8;
1143 }
1144 
1145 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
1146                            int page_control)
1147 {
1148     static const int mode_sense_valid[0x3f] = {
1149         [MODE_PAGE_VENDOR_SPECIFIC]        = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1150         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
1151         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
1152         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1153         [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1154         [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
1155         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
1156         [MODE_PAGE_APPLE_VENDOR]           = (1 << TYPE_ROM),
1157     };
1158 
1159     uint8_t *p = *p_outbuf + 2;
1160     int length;
1161 
1162     assert(page < ARRAY_SIZE(mode_sense_valid));
1163     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
1164         return -1;
1165     }
1166 
1167     /*
1168      * If Changeable Values are requested, a mask denoting those mode parameters
1169      * that are changeable shall be returned. As we currently don't support
1170      * parameter changes via MODE_SELECT all bits are returned set to zero.
1171      * The buffer was already menset to zero by the caller of this function.
1172      *
1173      * The offsets here are off by two compared to the descriptions in the
1174      * SCSI specs, because those include a 2-byte header.  This is unfortunate,
1175      * but it is done so that offsets are consistent within our implementation
1176      * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
1177      * 2-byte and 4-byte headers.
1178      */
1179     switch (page) {
1180     case MODE_PAGE_HD_GEOMETRY:
1181         length = 0x16;
1182         if (page_control == 1) { /* Changeable Values */
1183             break;
1184         }
1185         /* if a geometry hint is available, use it */
1186         p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
1187         p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
1188         p[2] = s->qdev.conf.cyls & 0xff;
1189         p[3] = s->qdev.conf.heads & 0xff;
1190         /* Write precomp start cylinder, disabled */
1191         p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
1192         p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
1193         p[6] = s->qdev.conf.cyls & 0xff;
1194         /* Reduced current start cylinder, disabled */
1195         p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
1196         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1197         p[9] = s->qdev.conf.cyls & 0xff;
1198         /* Device step rate [ns], 200ns */
1199         p[10] = 0;
1200         p[11] = 200;
1201         /* Landing zone cylinder */
1202         p[12] = 0xff;
1203         p[13] =  0xff;
1204         p[14] = 0xff;
1205         /* Medium rotation rate [rpm], 5400 rpm */
1206         p[18] = (5400 >> 8) & 0xff;
1207         p[19] = 5400 & 0xff;
1208         break;
1209 
1210     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
1211         length = 0x1e;
1212         if (page_control == 1) { /* Changeable Values */
1213             break;
1214         }
1215         /* Transfer rate [kbit/s], 5Mbit/s */
1216         p[0] = 5000 >> 8;
1217         p[1] = 5000 & 0xff;
1218         /* if a geometry hint is available, use it */
1219         p[2] = s->qdev.conf.heads & 0xff;
1220         p[3] = s->qdev.conf.secs & 0xff;
1221         p[4] = s->qdev.blocksize >> 8;
1222         p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
1223         p[7] = s->qdev.conf.cyls & 0xff;
1224         /* Write precomp start cylinder, disabled */
1225         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1226         p[9] = s->qdev.conf.cyls & 0xff;
1227         /* Reduced current start cylinder, disabled */
1228         p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
1229         p[11] = s->qdev.conf.cyls & 0xff;
1230         /* Device step rate [100us], 100us */
1231         p[12] = 0;
1232         p[13] = 1;
1233         /* Device step pulse width [us], 1us */
1234         p[14] = 1;
1235         /* Device head settle delay [100us], 100us */
1236         p[15] = 0;
1237         p[16] = 1;
1238         /* Motor on delay [0.1s], 0.1s */
1239         p[17] = 1;
1240         /* Motor off delay [0.1s], 0.1s */
1241         p[18] = 1;
1242         /* Medium rotation rate [rpm], 5400 rpm */
1243         p[26] = (5400 >> 8) & 0xff;
1244         p[27] = 5400 & 0xff;
1245         break;
1246 
1247     case MODE_PAGE_CACHING:
1248         length = 0x12;
1249         if (page_control == 1 || /* Changeable Values */
1250             blk_enable_write_cache(s->qdev.conf.blk)) {
1251             p[0] = 4; /* WCE */
1252         }
1253         break;
1254 
1255     case MODE_PAGE_R_W_ERROR:
1256         length = 10;
1257         if (page_control == 1) { /* Changeable Values */
1258             if (s->qdev.type == TYPE_ROM) {
1259                 /* Automatic Write Reallocation Enabled */
1260                 p[0] = 0x80;
1261             }
1262             break;
1263         }
1264         p[0] = 0x80; /* Automatic Write Reallocation Enabled */
1265         if (s->qdev.type == TYPE_ROM) {
1266             p[1] = 0x20; /* Read Retry Count */
1267         }
1268         break;
1269 
1270     case MODE_PAGE_AUDIO_CTL:
1271         length = 14;
1272         break;
1273 
1274     case MODE_PAGE_CAPABILITIES:
1275         length = 0x14;
1276         if (page_control == 1) { /* Changeable Values */
1277             break;
1278         }
1279 
1280         p[0] = 0x3b; /* CD-R & CD-RW read */
1281         p[1] = 0; /* Writing not supported */
1282         p[2] = 0x7f; /* Audio, composite, digital out,
1283                         mode 2 form 1&2, multi session */
1284         p[3] = 0xff; /* CD DA, DA accurate, RW supported,
1285                         RW corrected, C2 errors, ISRC,
1286                         UPC, Bar code */
1287         p[4] = 0x2d | (s->tray_locked ? 2 : 0);
1288         /* Locking supported, jumper present, eject, tray */
1289         p[5] = 0; /* no volume & mute control, no
1290                      changer */
1291         p[6] = (50 * 176) >> 8; /* 50x read speed */
1292         p[7] = (50 * 176) & 0xff;
1293         p[8] = 2 >> 8; /* Two volume levels */
1294         p[9] = 2 & 0xff;
1295         p[10] = 2048 >> 8; /* 2M buffer */
1296         p[11] = 2048 & 0xff;
1297         p[12] = (16 * 176) >> 8; /* 16x read speed current */
1298         p[13] = (16 * 176) & 0xff;
1299         p[16] = (16 * 176) >> 8; /* 16x write speed */
1300         p[17] = (16 * 176) & 0xff;
1301         p[18] = (16 * 176) >> 8; /* 16x write speed current */
1302         p[19] = (16 * 176) & 0xff;
1303         break;
1304 
1305      case MODE_PAGE_APPLE_VENDOR:
1306         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR)) {
1307             length = 0x1e;
1308             if (page_control == 1) { /* Changeable Values */
1309                 break;
1310             }
1311 
1312             memset(p, 0, length);
1313             strcpy((char *)p + 8, "APPLE COMPUTER, INC   ");
1314             break;
1315         } else {
1316             return -1;
1317         }
1318 
1319     case MODE_PAGE_VENDOR_SPECIFIC:
1320         if (s->qdev.type == TYPE_DISK && (s->quirks &
1321             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1322             length = 0x2;
1323             if (page_control == 1) { /* Changeable Values */
1324                 p[0] = 0xff;
1325                 p[1] = 0xff;
1326                 break;
1327             }
1328             p[0] = 0;
1329             p[1] = 0;
1330             break;
1331         } else {
1332             return -1;
1333         }
1334 
1335     default:
1336         return -1;
1337     }
1338 
1339     assert(length < 256);
1340     (*p_outbuf)[0] = page;
1341     (*p_outbuf)[1] = length;
1342     *p_outbuf += length + 2;
1343     return length + 2;
1344 }
1345 
1346 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
1347 {
1348     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1349     uint64_t nb_sectors;
1350     bool dbd;
1351     int page, buflen, ret, page_control;
1352     uint8_t *p;
1353     uint8_t dev_specific_param;
1354 
1355     dbd = (r->req.cmd.buf[1] & 0x8) != 0;
1356     page = r->req.cmd.buf[2] & 0x3f;
1357     page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
1358 
1359     trace_scsi_disk_emulate_mode_sense((r->req.cmd.buf[0] == MODE_SENSE) ? 6 :
1360                                        10, page, r->req.cmd.xfer, page_control);
1361     memset(outbuf, 0, r->req.cmd.xfer);
1362     p = outbuf;
1363 
1364     if (s->qdev.type == TYPE_DISK) {
1365         dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0;
1366         if (!blk_is_writable(s->qdev.conf.blk)) {
1367             dev_specific_param |= 0x80; /* Readonly.  */
1368         }
1369     } else {
1370         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD)) {
1371             /* Use DBD from the request... */
1372             dev_specific_param = 0x00;
1373 
1374             /*
1375              * ... unless we receive a request for MODE_PAGE_APPLE_VENDOR
1376              * which should never return a block descriptor even though DBD is
1377              * not set, otherwise CDROM detection fails in MacOS
1378              */
1379             if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR) &&
1380                 page == MODE_PAGE_APPLE_VENDOR) {
1381                 dbd = true;
1382             }
1383         } else {
1384             /*
1385              * MMC prescribes that CD/DVD drives have no block descriptors,
1386              * and defines no device-specific parameter.
1387              */
1388             dev_specific_param = 0x00;
1389             dbd = true;
1390         }
1391     }
1392 
1393     if (r->req.cmd.buf[0] == MODE_SENSE) {
1394         p[1] = 0; /* Default media type.  */
1395         p[2] = dev_specific_param;
1396         p[3] = 0; /* Block descriptor length.  */
1397         p += 4;
1398     } else { /* MODE_SENSE_10 */
1399         p[2] = 0; /* Default media type.  */
1400         p[3] = dev_specific_param;
1401         p[6] = p[7] = 0; /* Block descriptor length.  */
1402         p += 8;
1403     }
1404 
1405     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1406     if (!dbd && nb_sectors) {
1407         if (r->req.cmd.buf[0] == MODE_SENSE) {
1408             outbuf[3] = 8; /* Block descriptor length  */
1409         } else { /* MODE_SENSE_10 */
1410             outbuf[7] = 8; /* Block descriptor length  */
1411         }
1412         nb_sectors /= (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1413         if (nb_sectors > 0xffffff) {
1414             nb_sectors = 0;
1415         }
1416         p[0] = 0; /* media density code */
1417         p[1] = (nb_sectors >> 16) & 0xff;
1418         p[2] = (nb_sectors >> 8) & 0xff;
1419         p[3] = nb_sectors & 0xff;
1420         p[4] = 0; /* reserved */
1421         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
1422         p[6] = s->qdev.blocksize >> 8;
1423         p[7] = 0;
1424         p += 8;
1425     }
1426 
1427     if (page_control == 3) {
1428         /* Saved Values */
1429         scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
1430         return -1;
1431     }
1432 
1433     if (page == 0x3f) {
1434         for (page = 0; page <= 0x3e; page++) {
1435             mode_sense_page(s, page, &p, page_control);
1436         }
1437     } else {
1438         ret = mode_sense_page(s, page, &p, page_control);
1439         if (ret == -1) {
1440             return -1;
1441         }
1442     }
1443 
1444     buflen = p - outbuf;
1445     /*
1446      * The mode data length field specifies the length in bytes of the
1447      * following data that is available to be transferred. The mode data
1448      * length does not include itself.
1449      */
1450     if (r->req.cmd.buf[0] == MODE_SENSE) {
1451         outbuf[0] = buflen - 1;
1452     } else { /* MODE_SENSE_10 */
1453         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
1454         outbuf[1] = (buflen - 2) & 0xff;
1455     }
1456     return buflen;
1457 }
1458 
1459 static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
1460 {
1461     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1462     int start_track, format, msf, toclen;
1463     uint64_t nb_sectors;
1464 
1465     msf = req->cmd.buf[1] & 2;
1466     format = req->cmd.buf[2] & 0xf;
1467     start_track = req->cmd.buf[6];
1468     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1469     trace_scsi_disk_emulate_read_toc(start_track, format, msf >> 1);
1470     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
1471     switch (format) {
1472     case 0:
1473         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
1474         break;
1475     case 1:
1476         /* multi session : only a single session defined */
1477         toclen = 12;
1478         memset(outbuf, 0, 12);
1479         outbuf[1] = 0x0a;
1480         outbuf[2] = 0x01;
1481         outbuf[3] = 0x01;
1482         break;
1483     case 2:
1484         toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
1485         break;
1486     default:
1487         return -1;
1488     }
1489     return toclen;
1490 }
1491 
1492 static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
1493 {
1494     SCSIRequest *req = &r->req;
1495     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1496     bool start = req->cmd.buf[4] & 1;
1497     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
1498     int pwrcnd = req->cmd.buf[4] & 0xf0;
1499 
1500     if (pwrcnd) {
1501         /* eject/load only happens for power condition == 0 */
1502         return 0;
1503     }
1504 
1505     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
1506         if (!start && !s->tray_open && s->tray_locked) {
1507             scsi_check_condition(r,
1508                                  blk_is_inserted(s->qdev.conf.blk)
1509                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
1510                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
1511             return -1;
1512         }
1513 
1514         if (s->tray_open != !start) {
1515             blk_eject(s->qdev.conf.blk, !start);
1516             s->tray_open = !start;
1517         }
1518     }
1519     return 0;
1520 }
1521 
1522 static void scsi_disk_emulate_read_data(SCSIRequest *req)
1523 {
1524     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1525     int buflen = r->iov.iov_len;
1526 
1527     if (buflen) {
1528         trace_scsi_disk_emulate_read_data(buflen);
1529         r->iov.iov_len = 0;
1530         r->started = true;
1531         scsi_req_data(&r->req, buflen);
1532         return;
1533     }
1534 
1535     /* This also clears the sense buffer for REQUEST SENSE.  */
1536     scsi_req_complete(&r->req, GOOD);
1537 }
1538 
1539 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
1540                                        uint8_t *inbuf, int inlen)
1541 {
1542     uint8_t mode_current[SCSI_MAX_MODE_LEN];
1543     uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
1544     uint8_t *p;
1545     int len, expected_len, changeable_len, i;
1546 
1547     /* The input buffer does not include the page header, so it is
1548      * off by 2 bytes.
1549      */
1550     expected_len = inlen + 2;
1551     if (expected_len > SCSI_MAX_MODE_LEN) {
1552         return -1;
1553     }
1554 
1555     /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
1556     if (page == MODE_PAGE_ALLS) {
1557         return -1;
1558     }
1559 
1560     p = mode_current;
1561     memset(mode_current, 0, inlen + 2);
1562     len = mode_sense_page(s, page, &p, 0);
1563     if (len < 0 || len != expected_len) {
1564         return -1;
1565     }
1566 
1567     p = mode_changeable;
1568     memset(mode_changeable, 0, inlen + 2);
1569     changeable_len = mode_sense_page(s, page, &p, 1);
1570     assert(changeable_len == len);
1571 
1572     /* Check that unchangeable bits are the same as what MODE SENSE
1573      * would return.
1574      */
1575     for (i = 2; i < len; i++) {
1576         if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
1577             return -1;
1578         }
1579     }
1580     return 0;
1581 }
1582 
1583 static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
1584 {
1585     switch (page) {
1586     case MODE_PAGE_CACHING:
1587         blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0);
1588         break;
1589 
1590     default:
1591         break;
1592     }
1593 }
1594 
1595 static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
1596 {
1597     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1598 
1599     while (len > 0) {
1600         int page, subpage, page_len;
1601 
1602         /* Parse both possible formats for the mode page headers.  */
1603         page = p[0] & 0x3f;
1604         if (p[0] & 0x40) {
1605             if (len < 4) {
1606                 goto invalid_param_len;
1607             }
1608             subpage = p[1];
1609             page_len = lduw_be_p(&p[2]);
1610             p += 4;
1611             len -= 4;
1612         } else {
1613             if (len < 2) {
1614                 goto invalid_param_len;
1615             }
1616             subpage = 0;
1617             page_len = p[1];
1618             p += 2;
1619             len -= 2;
1620         }
1621 
1622         if (subpage) {
1623             goto invalid_param;
1624         }
1625         if (page_len > len) {
1626             if (!(s->quirks & SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED)) {
1627                 goto invalid_param_len;
1628             }
1629             trace_scsi_disk_mode_select_page_truncated(page, page_len, len);
1630         }
1631 
1632         if (!change) {
1633             if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
1634                 goto invalid_param;
1635             }
1636         } else {
1637             scsi_disk_apply_mode_select(s, page, p);
1638         }
1639 
1640         p += page_len;
1641         len -= page_len;
1642     }
1643     return 0;
1644 
1645 invalid_param:
1646     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1647     return -1;
1648 
1649 invalid_param_len:
1650     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1651     return -1;
1652 }
1653 
1654 static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
1655 {
1656     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1657     uint8_t *p = inbuf;
1658     int cmd = r->req.cmd.buf[0];
1659     int len = r->req.cmd.xfer;
1660     int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
1661     int bd_len, bs;
1662     int pass;
1663 
1664     if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
1665         if (!(s->quirks &
1666             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1667             /* We only support PF=1, SP=0.  */
1668             goto invalid_field;
1669         }
1670     }
1671 
1672     if (len < hdr_len) {
1673         goto invalid_param_len;
1674     }
1675 
1676     bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
1677     len -= hdr_len;
1678     p += hdr_len;
1679     if (len < bd_len) {
1680         goto invalid_param_len;
1681     }
1682     if (bd_len != 0 && bd_len != 8) {
1683         goto invalid_param;
1684     }
1685 
1686     /* Allow changing the block size */
1687     if (bd_len) {
1688         bs = p[5] << 16 | p[6] << 8 | p[7];
1689 
1690         /*
1691          * Since the existing code only checks/updates bits 8-15 of the block
1692          * size, restrict ourselves to the same requirement for now to ensure
1693          * that a block size set by a block descriptor and then read back by
1694          * a subsequent SCSI command will be the same. Also disallow a block
1695          * size of 256 since we cannot handle anything below BDRV_SECTOR_SIZE.
1696          */
1697         if (bs && !(bs & ~0xfe00) && bs != s->qdev.blocksize) {
1698             s->qdev.blocksize = bs;
1699             trace_scsi_disk_mode_select_set_blocksize(s->qdev.blocksize);
1700         }
1701     }
1702 
1703     len -= bd_len;
1704     p += bd_len;
1705 
1706     /* Ensure no change is made if there is an error!  */
1707     for (pass = 0; pass < 2; pass++) {
1708         if (mode_select_pages(r, p, len, pass == 1) < 0) {
1709             assert(pass == 0);
1710             return;
1711         }
1712     }
1713     if (!blk_enable_write_cache(s->qdev.conf.blk)) {
1714         /* The request is used as the AIO opaque value, so add a ref.  */
1715         scsi_req_ref(&r->req);
1716         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
1717                          BLOCK_ACCT_FLUSH);
1718         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
1719         return;
1720     }
1721 
1722     scsi_req_complete(&r->req, GOOD);
1723     return;
1724 
1725 invalid_param:
1726     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1727     return;
1728 
1729 invalid_param_len:
1730     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1731     return;
1732 
1733 invalid_field:
1734     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1735 }
1736 
1737 /* sector_num and nb_sectors expected to be in qdev blocksize */
1738 static inline bool check_lba_range(SCSIDiskState *s,
1739                                    uint64_t sector_num, uint32_t nb_sectors)
1740 {
1741     /*
1742      * The first line tests that no overflow happens when computing the last
1743      * sector.  The second line tests that the last accessed sector is in
1744      * range.
1745      *
1746      * Careful, the computations should not underflow for nb_sectors == 0,
1747      * and a 0-block read to the first LBA beyond the end of device is
1748      * valid.
1749      */
1750     return (sector_num <= sector_num + nb_sectors &&
1751             sector_num + nb_sectors <= s->qdev.max_lba + 1);
1752 }
1753 
1754 typedef struct UnmapCBData {
1755     SCSIDiskReq *r;
1756     uint8_t *inbuf;
1757     int count;
1758 } UnmapCBData;
1759 
1760 static void scsi_unmap_complete(void *opaque, int ret);
1761 
1762 static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
1763 {
1764     SCSIDiskReq *r = data->r;
1765     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1766 
1767     assert(r->req.aiocb == NULL);
1768 
1769     if (data->count > 0) {
1770         uint64_t sector_num = ldq_be_p(&data->inbuf[0]);
1771         uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
1772         r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1773         r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1774 
1775         if (!check_lba_range(s, sector_num, nb_sectors)) {
1776             block_acct_invalid(blk_get_stats(s->qdev.conf.blk),
1777                                BLOCK_ACCT_UNMAP);
1778             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1779             goto done;
1780         }
1781 
1782         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1783                          r->sector_count * BDRV_SECTOR_SIZE,
1784                          BLOCK_ACCT_UNMAP);
1785 
1786         r->req.aiocb = blk_aio_pdiscard(s->qdev.conf.blk,
1787                                         r->sector * BDRV_SECTOR_SIZE,
1788                                         r->sector_count * BDRV_SECTOR_SIZE,
1789                                         scsi_unmap_complete, data);
1790         data->count--;
1791         data->inbuf += 16;
1792         return;
1793     }
1794 
1795     scsi_req_complete(&r->req, GOOD);
1796 
1797 done:
1798     scsi_req_unref(&r->req);
1799     g_free(data);
1800 }
1801 
1802 static void scsi_unmap_complete(void *opaque, int ret)
1803 {
1804     UnmapCBData *data = opaque;
1805     SCSIDiskReq *r = data->r;
1806     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1807 
1808     assert(r->req.aiocb != NULL);
1809     r->req.aiocb = NULL;
1810 
1811     if (scsi_disk_req_check_error(r, ret, true)) {
1812         scsi_req_unref(&r->req);
1813         g_free(data);
1814     } else {
1815         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1816         scsi_unmap_complete_noio(data, ret);
1817     }
1818 }
1819 
1820 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
1821 {
1822     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1823     uint8_t *p = inbuf;
1824     int len = r->req.cmd.xfer;
1825     UnmapCBData *data;
1826 
1827     /* Reject ANCHOR=1.  */
1828     if (r->req.cmd.buf[1] & 0x1) {
1829         goto invalid_field;
1830     }
1831 
1832     if (len < 8) {
1833         goto invalid_param_len;
1834     }
1835     if (len < lduw_be_p(&p[0]) + 2) {
1836         goto invalid_param_len;
1837     }
1838     if (len < lduw_be_p(&p[2]) + 8) {
1839         goto invalid_param_len;
1840     }
1841     if (lduw_be_p(&p[2]) & 15) {
1842         goto invalid_param_len;
1843     }
1844 
1845     if (!blk_is_writable(s->qdev.conf.blk)) {
1846         block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1847         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1848         return;
1849     }
1850 
1851     data = g_new0(UnmapCBData, 1);
1852     data->r = r;
1853     data->inbuf = &p[8];
1854     data->count = lduw_be_p(&p[2]) >> 4;
1855 
1856     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
1857     scsi_req_ref(&r->req);
1858     scsi_unmap_complete_noio(data, 0);
1859     return;
1860 
1861 invalid_param_len:
1862     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1863     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1864     return;
1865 
1866 invalid_field:
1867     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1868     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1869 }
1870 
1871 typedef struct WriteSameCBData {
1872     SCSIDiskReq *r;
1873     int64_t sector;
1874     int nb_sectors;
1875     QEMUIOVector qiov;
1876     struct iovec iov;
1877 } WriteSameCBData;
1878 
1879 static void scsi_write_same_complete(void *opaque, int ret)
1880 {
1881     WriteSameCBData *data = opaque;
1882     SCSIDiskReq *r = data->r;
1883     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1884 
1885     assert(r->req.aiocb != NULL);
1886     r->req.aiocb = NULL;
1887 
1888     if (scsi_disk_req_check_error(r, ret, true)) {
1889         goto done;
1890     }
1891 
1892     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1893 
1894     data->nb_sectors -= data->iov.iov_len / BDRV_SECTOR_SIZE;
1895     data->sector += data->iov.iov_len / BDRV_SECTOR_SIZE;
1896     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1897                             data->iov.iov_len);
1898     if (data->iov.iov_len) {
1899         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1900                          data->iov.iov_len, BLOCK_ACCT_WRITE);
1901         /* Reinitialize qiov, to handle unaligned WRITE SAME request
1902          * where final qiov may need smaller size */
1903         qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1904         r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1905                                        data->sector << BDRV_SECTOR_BITS,
1906                                        &data->qiov, 0,
1907                                        scsi_write_same_complete, data);
1908         return;
1909     }
1910 
1911     scsi_req_complete(&r->req, GOOD);
1912 
1913 done:
1914     scsi_req_unref(&r->req);
1915     qemu_vfree(data->iov.iov_base);
1916     g_free(data);
1917 }
1918 
1919 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
1920 {
1921     SCSIRequest *req = &r->req;
1922     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1923     uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf);
1924     WriteSameCBData *data;
1925     uint8_t *buf;
1926     int i, l;
1927 
1928     /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
1929     if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) {
1930         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1931         return;
1932     }
1933 
1934     if (!blk_is_writable(s->qdev.conf.blk)) {
1935         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1936         return;
1937     }
1938     if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
1939         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1940         return;
1941     }
1942 
1943     if ((req->cmd.buf[1] & 0x1) || buffer_is_zero(inbuf, s->qdev.blocksize)) {
1944         int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0;
1945 
1946         /* The request is used as the AIO opaque value, so add a ref.  */
1947         scsi_req_ref(&r->req);
1948         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1949                          nb_sectors * s->qdev.blocksize,
1950                         BLOCK_ACCT_WRITE);
1951         r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk,
1952                                 r->req.cmd.lba * s->qdev.blocksize,
1953                                 nb_sectors * s->qdev.blocksize,
1954                                 flags, scsi_aio_complete, r);
1955         return;
1956     }
1957 
1958     data = g_new0(WriteSameCBData, 1);
1959     data->r = r;
1960     data->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1961     data->nb_sectors = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1962     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1963                             SCSI_WRITE_SAME_MAX);
1964     data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk,
1965                                               data->iov.iov_len);
1966     qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1967 
1968     for (i = 0; i < data->iov.iov_len; i += l) {
1969         l = MIN(s->qdev.blocksize, data->iov.iov_len - i);
1970         memcpy(&buf[i], inbuf, l);
1971     }
1972 
1973     scsi_req_ref(&r->req);
1974     block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1975                      data->iov.iov_len, BLOCK_ACCT_WRITE);
1976     r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1977                                    data->sector << BDRV_SECTOR_BITS,
1978                                    &data->qiov, 0,
1979                                    scsi_write_same_complete, data);
1980 }
1981 
1982 static void scsi_disk_emulate_write_data(SCSIRequest *req)
1983 {
1984     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1985 
1986     if (r->iov.iov_len) {
1987         int buflen = r->iov.iov_len;
1988         trace_scsi_disk_emulate_write_data(buflen);
1989         r->iov.iov_len = 0;
1990         scsi_req_data(&r->req, buflen);
1991         return;
1992     }
1993 
1994     switch (req->cmd.buf[0]) {
1995     case MODE_SELECT:
1996     case MODE_SELECT_10:
1997         /* This also clears the sense buffer for REQUEST SENSE.  */
1998         scsi_disk_emulate_mode_select(r, r->iov.iov_base);
1999         break;
2000 
2001     case UNMAP:
2002         scsi_disk_emulate_unmap(r, r->iov.iov_base);
2003         break;
2004 
2005     case VERIFY_10:
2006     case VERIFY_12:
2007     case VERIFY_16:
2008         if (r->req.status == -1) {
2009             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2010         }
2011         break;
2012 
2013     case WRITE_SAME_10:
2014     case WRITE_SAME_16:
2015         scsi_disk_emulate_write_same(r, r->iov.iov_base);
2016         break;
2017 
2018     case FORMAT_UNIT:
2019         scsi_req_complete(&r->req, GOOD);
2020         break;
2021 
2022     default:
2023         abort();
2024     }
2025 }
2026 
2027 static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
2028 {
2029     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2030     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2031     uint64_t nb_sectors;
2032     uint8_t *outbuf;
2033     int buflen;
2034 
2035     switch (req->cmd.buf[0]) {
2036     case INQUIRY:
2037     case MODE_SENSE:
2038     case MODE_SENSE_10:
2039     case RESERVE:
2040     case RESERVE_10:
2041     case RELEASE:
2042     case RELEASE_10:
2043     case START_STOP:
2044     case ALLOW_MEDIUM_REMOVAL:
2045     case GET_CONFIGURATION:
2046     case GET_EVENT_STATUS_NOTIFICATION:
2047     case MECHANISM_STATUS:
2048     case REQUEST_SENSE:
2049         break;
2050 
2051     default:
2052         if (!blk_is_available(s->qdev.conf.blk)) {
2053             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2054             return 0;
2055         }
2056         break;
2057     }
2058 
2059     /*
2060      * FIXME: we shouldn't return anything bigger than 4k, but the code
2061      * requires the buffer to be as big as req->cmd.xfer in several
2062      * places.  So, do not allow CDBs with a very large ALLOCATION
2063      * LENGTH.  The real fix would be to modify scsi_read_data and
2064      * dma_buf_read, so that they return data beyond the buflen
2065      * as all zeros.
2066      */
2067     if (req->cmd.xfer > 65536) {
2068         goto illegal_request;
2069     }
2070     r->buflen = MAX(4096, req->cmd.xfer);
2071 
2072     if (!r->iov.iov_base) {
2073         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
2074     }
2075 
2076     outbuf = r->iov.iov_base;
2077     memset(outbuf, 0, r->buflen);
2078     switch (req->cmd.buf[0]) {
2079     case TEST_UNIT_READY:
2080         assert(blk_is_available(s->qdev.conf.blk));
2081         break;
2082     case INQUIRY:
2083         buflen = scsi_disk_emulate_inquiry(req, outbuf);
2084         if (buflen < 0) {
2085             goto illegal_request;
2086         }
2087         break;
2088     case MODE_SENSE:
2089     case MODE_SENSE_10:
2090         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
2091         if (buflen < 0) {
2092             goto illegal_request;
2093         }
2094         break;
2095     case READ_TOC:
2096         buflen = scsi_disk_emulate_read_toc(req, outbuf);
2097         if (buflen < 0) {
2098             goto illegal_request;
2099         }
2100         break;
2101     case RESERVE:
2102         if (req->cmd.buf[1] & 1) {
2103             goto illegal_request;
2104         }
2105         break;
2106     case RESERVE_10:
2107         if (req->cmd.buf[1] & 3) {
2108             goto illegal_request;
2109         }
2110         break;
2111     case RELEASE:
2112         if (req->cmd.buf[1] & 1) {
2113             goto illegal_request;
2114         }
2115         break;
2116     case RELEASE_10:
2117         if (req->cmd.buf[1] & 3) {
2118             goto illegal_request;
2119         }
2120         break;
2121     case START_STOP:
2122         if (scsi_disk_emulate_start_stop(r) < 0) {
2123             return 0;
2124         }
2125         break;
2126     case ALLOW_MEDIUM_REMOVAL:
2127         s->tray_locked = req->cmd.buf[4] & 1;
2128         blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1);
2129         break;
2130     case READ_CAPACITY_10:
2131         /* The normal LEN field for this command is zero.  */
2132         memset(outbuf, 0, 8);
2133         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2134         if (!nb_sectors) {
2135             scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2136             return 0;
2137         }
2138         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
2139             goto illegal_request;
2140         }
2141         nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2142         /* Returned value is the address of the last sector.  */
2143         nb_sectors--;
2144         /* Remember the new size for read/write sanity checking. */
2145         s->qdev.max_lba = nb_sectors;
2146         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
2147         if (nb_sectors > UINT32_MAX) {
2148             nb_sectors = UINT32_MAX;
2149         }
2150         outbuf[0] = (nb_sectors >> 24) & 0xff;
2151         outbuf[1] = (nb_sectors >> 16) & 0xff;
2152         outbuf[2] = (nb_sectors >> 8) & 0xff;
2153         outbuf[3] = nb_sectors & 0xff;
2154         outbuf[4] = 0;
2155         outbuf[5] = 0;
2156         outbuf[6] = s->qdev.blocksize >> 8;
2157         outbuf[7] = 0;
2158         break;
2159     case REQUEST_SENSE:
2160         /* Just return "NO SENSE".  */
2161         buflen = scsi_convert_sense(NULL, 0, outbuf, r->buflen,
2162                                     (req->cmd.buf[1] & 1) == 0);
2163         if (buflen < 0) {
2164             goto illegal_request;
2165         }
2166         break;
2167     case MECHANISM_STATUS:
2168         buflen = scsi_emulate_mechanism_status(s, outbuf);
2169         if (buflen < 0) {
2170             goto illegal_request;
2171         }
2172         break;
2173     case GET_CONFIGURATION:
2174         buflen = scsi_get_configuration(s, outbuf);
2175         if (buflen < 0) {
2176             goto illegal_request;
2177         }
2178         break;
2179     case GET_EVENT_STATUS_NOTIFICATION:
2180         buflen = scsi_get_event_status_notification(s, r, outbuf);
2181         if (buflen < 0) {
2182             goto illegal_request;
2183         }
2184         break;
2185     case READ_DISC_INFORMATION:
2186         buflen = scsi_read_disc_information(s, r, outbuf);
2187         if (buflen < 0) {
2188             goto illegal_request;
2189         }
2190         break;
2191     case READ_DVD_STRUCTURE:
2192         buflen = scsi_read_dvd_structure(s, r, outbuf);
2193         if (buflen < 0) {
2194             goto illegal_request;
2195         }
2196         break;
2197     case SERVICE_ACTION_IN_16:
2198         /* Service Action In subcommands. */
2199         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
2200             trace_scsi_disk_emulate_command_SAI_16();
2201             memset(outbuf, 0, req->cmd.xfer);
2202             blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2203             if (!nb_sectors) {
2204                 scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2205                 return 0;
2206             }
2207             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
2208                 goto illegal_request;
2209             }
2210             nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2211             /* Returned value is the address of the last sector.  */
2212             nb_sectors--;
2213             /* Remember the new size for read/write sanity checking. */
2214             s->qdev.max_lba = nb_sectors;
2215             outbuf[0] = (nb_sectors >> 56) & 0xff;
2216             outbuf[1] = (nb_sectors >> 48) & 0xff;
2217             outbuf[2] = (nb_sectors >> 40) & 0xff;
2218             outbuf[3] = (nb_sectors >> 32) & 0xff;
2219             outbuf[4] = (nb_sectors >> 24) & 0xff;
2220             outbuf[5] = (nb_sectors >> 16) & 0xff;
2221             outbuf[6] = (nb_sectors >> 8) & 0xff;
2222             outbuf[7] = nb_sectors & 0xff;
2223             outbuf[8] = 0;
2224             outbuf[9] = 0;
2225             outbuf[10] = s->qdev.blocksize >> 8;
2226             outbuf[11] = 0;
2227             outbuf[12] = 0;
2228             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
2229 
2230             /* set TPE bit if the format supports discard */
2231             if (s->qdev.conf.discard_granularity) {
2232                 outbuf[14] = 0x80;
2233             }
2234 
2235             /* Protection, exponent and lowest lba field left blank. */
2236             break;
2237         }
2238         trace_scsi_disk_emulate_command_SAI_unsupported();
2239         goto illegal_request;
2240     case SYNCHRONIZE_CACHE:
2241         /* The request is used as the AIO opaque value, so add a ref.  */
2242         scsi_req_ref(&r->req);
2243         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
2244                          BLOCK_ACCT_FLUSH);
2245         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
2246         return 0;
2247     case SEEK_10:
2248         trace_scsi_disk_emulate_command_SEEK_10(r->req.cmd.lba);
2249         if (r->req.cmd.lba > s->qdev.max_lba) {
2250             goto illegal_lba;
2251         }
2252         break;
2253     case MODE_SELECT:
2254         trace_scsi_disk_emulate_command_MODE_SELECT(r->req.cmd.xfer);
2255         break;
2256     case MODE_SELECT_10:
2257         trace_scsi_disk_emulate_command_MODE_SELECT_10(r->req.cmd.xfer);
2258         break;
2259     case UNMAP:
2260         trace_scsi_disk_emulate_command_UNMAP(r->req.cmd.xfer);
2261         break;
2262     case VERIFY_10:
2263     case VERIFY_12:
2264     case VERIFY_16:
2265         trace_scsi_disk_emulate_command_VERIFY((req->cmd.buf[1] >> 1) & 3);
2266         if (req->cmd.buf[1] & 6) {
2267             goto illegal_request;
2268         }
2269         break;
2270     case WRITE_SAME_10:
2271     case WRITE_SAME_16:
2272         trace_scsi_disk_emulate_command_WRITE_SAME(
2273                 req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16, r->req.cmd.xfer);
2274         break;
2275     case FORMAT_UNIT:
2276         trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
2277         break;
2278     default:
2279         trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
2280                                                 scsi_command_name(buf[0]));
2281         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
2282         return 0;
2283     }
2284     assert(!r->req.aiocb);
2285     r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
2286     if (r->iov.iov_len == 0) {
2287         scsi_req_complete(&r->req, GOOD);
2288     }
2289     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2290         assert(r->iov.iov_len == req->cmd.xfer);
2291         return -r->iov.iov_len;
2292     } else {
2293         return r->iov.iov_len;
2294     }
2295 
2296 illegal_request:
2297     if (r->req.status == -1) {
2298         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2299     }
2300     return 0;
2301 
2302 illegal_lba:
2303     scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2304     return 0;
2305 }
2306 
2307 /* Execute a scsi command.  Returns the length of the data expected by the
2308    command.  This will be Positive for data transfers from the device
2309    (eg. disk reads), negative for transfers to the device (eg. disk writes),
2310    and zero if the command does not transfer any data.  */
2311 
2312 static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
2313 {
2314     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2315     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2316     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
2317     uint32_t len;
2318     uint8_t command;
2319 
2320     command = buf[0];
2321 
2322     if (!blk_is_available(s->qdev.conf.blk)) {
2323         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2324         return 0;
2325     }
2326 
2327     len = scsi_data_cdb_xfer(r->req.cmd.buf);
2328     switch (command) {
2329     case READ_6:
2330     case READ_10:
2331     case READ_12:
2332     case READ_16:
2333         trace_scsi_disk_dma_command_READ(r->req.cmd.lba, len);
2334         /* Protection information is not supported.  For SCSI versions 2 and
2335          * older (as determined by snooping the guest's INQUIRY commands),
2336          * there is no RD/WR/VRPROTECT, so skip this check in these versions.
2337          */
2338         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2339             goto illegal_request;
2340         }
2341         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2342             goto illegal_lba;
2343         }
2344         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2345         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2346         break;
2347     case WRITE_6:
2348     case WRITE_10:
2349     case WRITE_12:
2350     case WRITE_16:
2351     case WRITE_VERIFY_10:
2352     case WRITE_VERIFY_12:
2353     case WRITE_VERIFY_16:
2354         if (!blk_is_writable(s->qdev.conf.blk)) {
2355             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
2356             return 0;
2357         }
2358         trace_scsi_disk_dma_command_WRITE(
2359                 (command & 0xe) == 0xe ? "And Verify " : "",
2360                 r->req.cmd.lba, len);
2361         /* fall through */
2362     case VERIFY_10:
2363     case VERIFY_12:
2364     case VERIFY_16:
2365         /* We get here only for BYTCHK == 0x01 and only for scsi-block.
2366          * As far as DMA is concerned, we can treat it the same as a write;
2367          * scsi_block_do_sgio will send VERIFY commands.
2368          */
2369         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2370             goto illegal_request;
2371         }
2372         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2373             goto illegal_lba;
2374         }
2375         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2376         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2377         break;
2378     default:
2379         abort();
2380     illegal_request:
2381         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2382         return 0;
2383     illegal_lba:
2384         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2385         return 0;
2386     }
2387     r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
2388     if (r->sector_count == 0) {
2389         scsi_req_complete(&r->req, GOOD);
2390     }
2391     assert(r->iov.iov_len == 0);
2392     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2393         return -r->sector_count * BDRV_SECTOR_SIZE;
2394     } else {
2395         return r->sector_count * BDRV_SECTOR_SIZE;
2396     }
2397 }
2398 
2399 static void scsi_disk_reset(DeviceState *dev)
2400 {
2401     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
2402     uint64_t nb_sectors;
2403 
2404     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
2405 
2406     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2407 
2408     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2409     if (nb_sectors) {
2410         nb_sectors--;
2411     }
2412     s->qdev.max_lba = nb_sectors;
2413     /* reset tray statuses */
2414     s->tray_locked = 0;
2415     s->tray_open = 0;
2416 
2417     s->qdev.scsi_version = s->qdev.default_scsi_version;
2418 }
2419 
2420 static void scsi_disk_drained_begin(void *opaque)
2421 {
2422     SCSIDiskState *s = opaque;
2423 
2424     scsi_device_drained_begin(&s->qdev);
2425 }
2426 
2427 static void scsi_disk_drained_end(void *opaque)
2428 {
2429     SCSIDiskState *s = opaque;
2430 
2431     scsi_device_drained_end(&s->qdev);
2432 }
2433 
2434 static void scsi_disk_resize_cb(void *opaque)
2435 {
2436     SCSIDiskState *s = opaque;
2437 
2438     /* SPC lists this sense code as available only for
2439      * direct-access devices.
2440      */
2441     if (s->qdev.type == TYPE_DISK) {
2442         scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
2443     }
2444 }
2445 
2446 static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
2447 {
2448     SCSIDiskState *s = opaque;
2449 
2450     /*
2451      * When a CD gets changed, we have to report an ejected state and
2452      * then a loaded state to guests so that they detect tray
2453      * open/close and media change events.  Guests that do not use
2454      * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
2455      * states rely on this behavior.
2456      *
2457      * media_changed governs the state machine used for unit attention
2458      * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
2459      */
2460     s->media_changed = load;
2461     s->tray_open = !load;
2462     scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
2463     s->media_event = true;
2464     s->eject_request = false;
2465 }
2466 
2467 static void scsi_cd_eject_request_cb(void *opaque, bool force)
2468 {
2469     SCSIDiskState *s = opaque;
2470 
2471     s->eject_request = true;
2472     if (force) {
2473         s->tray_locked = false;
2474     }
2475 }
2476 
2477 static bool scsi_cd_is_tray_open(void *opaque)
2478 {
2479     return ((SCSIDiskState *)opaque)->tray_open;
2480 }
2481 
2482 static bool scsi_cd_is_medium_locked(void *opaque)
2483 {
2484     return ((SCSIDiskState *)opaque)->tray_locked;
2485 }
2486 
2487 static const BlockDevOps scsi_disk_removable_block_ops = {
2488     .change_media_cb  = scsi_cd_change_media_cb,
2489     .drained_begin    = scsi_disk_drained_begin,
2490     .drained_end      = scsi_disk_drained_end,
2491     .eject_request_cb = scsi_cd_eject_request_cb,
2492     .is_medium_locked = scsi_cd_is_medium_locked,
2493     .is_tray_open     = scsi_cd_is_tray_open,
2494     .resize_cb        = scsi_disk_resize_cb,
2495 };
2496 
2497 static const BlockDevOps scsi_disk_block_ops = {
2498     .drained_begin = scsi_disk_drained_begin,
2499     .drained_end   = scsi_disk_drained_end,
2500     .resize_cb     = scsi_disk_resize_cb,
2501 };
2502 
2503 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
2504 {
2505     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2506     if (s->media_changed) {
2507         s->media_changed = false;
2508         scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
2509     }
2510 }
2511 
2512 static void scsi_realize(SCSIDevice *dev, Error **errp)
2513 {
2514     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2515     bool read_only;
2516 
2517     if (!s->qdev.conf.blk) {
2518         error_setg(errp, "drive property not set");
2519         return;
2520     }
2521 
2522     if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2523         !blk_is_inserted(s->qdev.conf.blk)) {
2524         error_setg(errp, "Device needs media, but drive is empty");
2525         return;
2526     }
2527 
2528     if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2529         return;
2530     }
2531 
2532     if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() &&
2533         !s->qdev.hba_supports_iothread)
2534     {
2535         error_setg(errp, "HBA does not support iothreads");
2536         return;
2537     }
2538 
2539     if (dev->type == TYPE_DISK) {
2540         if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
2541             return;
2542         }
2543     }
2544 
2545     read_only = !blk_supports_write_perm(s->qdev.conf.blk);
2546     if (dev->type == TYPE_ROM) {
2547         read_only = true;
2548     }
2549 
2550     if (!blkconf_apply_backend_options(&dev->conf, read_only,
2551                                        dev->type == TYPE_DISK, errp)) {
2552         return;
2553     }
2554 
2555     if (s->qdev.conf.discard_granularity == -1) {
2556         s->qdev.conf.discard_granularity =
2557             MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
2558     }
2559 
2560     if (!s->version) {
2561         s->version = g_strdup(qemu_hw_version());
2562     }
2563     if (!s->vendor) {
2564         s->vendor = g_strdup("QEMU");
2565     }
2566     if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) {
2567         error_setg(errp, "The serial number can't be longer than %d characters",
2568                    MAX_SERIAL_LEN);
2569         return;
2570     }
2571     if (!s->device_id) {
2572         if (s->serial) {
2573             if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) {
2574                 error_setg(errp, "The serial number can't be longer than %d "
2575                            "characters when it is also used as the default for "
2576                            "device_id", MAX_SERIAL_LEN_FOR_DEVID);
2577                 return;
2578             }
2579             s->device_id = g_strdup(s->serial);
2580         } else {
2581             const char *str = blk_name(s->qdev.conf.blk);
2582             if (str && *str) {
2583                 s->device_id = g_strdup(str);
2584             }
2585         }
2586     }
2587 
2588     if (blk_is_sg(s->qdev.conf.blk)) {
2589         error_setg(errp, "unwanted /dev/sg*");
2590         return;
2591     }
2592 
2593     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2594             !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
2595         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s);
2596     } else {
2597         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
2598     }
2599 
2600     blk_iostatus_enable(s->qdev.conf.blk);
2601 
2602     add_boot_device_lchs(&dev->qdev, NULL,
2603                          dev->conf.lcyls,
2604                          dev->conf.lheads,
2605                          dev->conf.lsecs);
2606 }
2607 
2608 static void scsi_unrealize(SCSIDevice *dev)
2609 {
2610     del_boot_device_lchs(&dev->qdev, NULL);
2611 }
2612 
2613 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
2614 {
2615     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2616 
2617     /* can happen for devices without drive. The error message for missing
2618      * backend will be issued in scsi_realize
2619      */
2620     if (s->qdev.conf.blk) {
2621         if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2622             return;
2623         }
2624     }
2625     s->qdev.blocksize = s->qdev.conf.logical_block_size;
2626     s->qdev.type = TYPE_DISK;
2627     if (!s->product) {
2628         s->product = g_strdup("QEMU HARDDISK");
2629     }
2630     scsi_realize(&s->qdev, errp);
2631 }
2632 
2633 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
2634 {
2635     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2636     int ret;
2637     uint32_t blocksize = 2048;
2638 
2639     if (!dev->conf.blk) {
2640         /* Anonymous BlockBackend for an empty drive. As we put it into
2641          * dev->conf, qdev takes care of detaching on unplug. */
2642         dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
2643         ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
2644         assert(ret == 0);
2645     }
2646 
2647     if (dev->conf.physical_block_size != 0) {
2648         blocksize = dev->conf.physical_block_size;
2649     }
2650 
2651     s->qdev.blocksize = blocksize;
2652     s->qdev.type = TYPE_ROM;
2653     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2654     if (!s->product) {
2655         s->product = g_strdup("QEMU CD-ROM");
2656     }
2657     scsi_realize(&s->qdev, errp);
2658 }
2659 
2660 
2661 static const SCSIReqOps scsi_disk_emulate_reqops = {
2662     .size         = sizeof(SCSIDiskReq),
2663     .free_req     = scsi_free_request,
2664     .send_command = scsi_disk_emulate_command,
2665     .read_data    = scsi_disk_emulate_read_data,
2666     .write_data   = scsi_disk_emulate_write_data,
2667     .get_buf      = scsi_get_buf,
2668     .load_request = scsi_disk_emulate_load_request,
2669     .save_request = scsi_disk_emulate_save_request,
2670 };
2671 
2672 static const SCSIReqOps scsi_disk_dma_reqops = {
2673     .size         = sizeof(SCSIDiskReq),
2674     .free_req     = scsi_free_request,
2675     .send_command = scsi_disk_dma_command,
2676     .read_data    = scsi_read_data,
2677     .write_data   = scsi_write_data,
2678     .get_buf      = scsi_get_buf,
2679     .load_request = scsi_disk_load_request,
2680     .save_request = scsi_disk_save_request,
2681 };
2682 
2683 static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
2684     [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
2685     [INQUIRY]                         = &scsi_disk_emulate_reqops,
2686     [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
2687     [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
2688     [START_STOP]                      = &scsi_disk_emulate_reqops,
2689     [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
2690     [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
2691     [READ_TOC]                        = &scsi_disk_emulate_reqops,
2692     [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
2693     [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
2694     [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
2695     [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
2696     [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
2697     [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
2698     [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
2699     [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
2700     [SEEK_10]                         = &scsi_disk_emulate_reqops,
2701     [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
2702     [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
2703     [UNMAP]                           = &scsi_disk_emulate_reqops,
2704     [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
2705     [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
2706     [VERIFY_10]                       = &scsi_disk_emulate_reqops,
2707     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
2708     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
2709     [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
2710 
2711     [READ_6]                          = &scsi_disk_dma_reqops,
2712     [READ_10]                         = &scsi_disk_dma_reqops,
2713     [READ_12]                         = &scsi_disk_dma_reqops,
2714     [READ_16]                         = &scsi_disk_dma_reqops,
2715     [WRITE_6]                         = &scsi_disk_dma_reqops,
2716     [WRITE_10]                        = &scsi_disk_dma_reqops,
2717     [WRITE_12]                        = &scsi_disk_dma_reqops,
2718     [WRITE_16]                        = &scsi_disk_dma_reqops,
2719     [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
2720     [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
2721     [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
2722 };
2723 
2724 static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf)
2725 {
2726     int len = scsi_cdb_length(buf);
2727     g_autoptr(GString) str = NULL;
2728 
2729     assert(len > 0 && len <= 16);
2730     str = qemu_hexdump_line(NULL, buf, len, 1, 0);
2731     trace_scsi_disk_new_request(lun, tag, str->str);
2732 }
2733 
2734 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
2735                                      uint8_t *buf, void *hba_private)
2736 {
2737     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2738     SCSIRequest *req;
2739     const SCSIReqOps *ops;
2740     uint8_t command;
2741 
2742     command = buf[0];
2743     ops = scsi_disk_reqops_dispatch[command];
2744     if (!ops) {
2745         ops = &scsi_disk_emulate_reqops;
2746     }
2747     req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
2748 
2749     if (trace_event_get_state_backends(TRACE_SCSI_DISK_NEW_REQUEST)) {
2750         scsi_disk_new_request_dump(lun, tag, buf);
2751     }
2752 
2753     return req;
2754 }
2755 
2756 #ifdef __linux__
2757 static int get_device_type(SCSIDiskState *s)
2758 {
2759     uint8_t cmd[16];
2760     uint8_t buf[36];
2761     int ret;
2762 
2763     memset(cmd, 0, sizeof(cmd));
2764     memset(buf, 0, sizeof(buf));
2765     cmd[0] = INQUIRY;
2766     cmd[4] = sizeof(buf);
2767 
2768     ret = scsi_SG_IO_FROM_DEV(s->qdev.conf.blk, cmd, sizeof(cmd),
2769                               buf, sizeof(buf), s->qdev.io_timeout);
2770     if (ret < 0) {
2771         return -1;
2772     }
2773     s->qdev.type = buf[0];
2774     if (buf[1] & 0x80) {
2775         s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2776     }
2777     return 0;
2778 }
2779 
2780 static void scsi_block_realize(SCSIDevice *dev, Error **errp)
2781 {
2782     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2783     int sg_version;
2784     int rc;
2785 
2786     if (!s->qdev.conf.blk) {
2787         error_setg(errp, "drive property not set");
2788         return;
2789     }
2790 
2791     if (s->rotation_rate) {
2792         error_report_once("rotation_rate is specified for scsi-block but is "
2793                           "not implemented. This option is deprecated and will "
2794                           "be removed in a future version");
2795     }
2796 
2797     /* check we are using a driver managing SG_IO (version 3 and after) */
2798     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
2799     if (rc < 0) {
2800         error_setg_errno(errp, -rc, "cannot get SG_IO version number");
2801         if (rc != -EPERM) {
2802             error_append_hint(errp, "Is this a SCSI device?\n");
2803         }
2804         return;
2805     }
2806     if (sg_version < 30000) {
2807         error_setg(errp, "scsi generic interface too old");
2808         return;
2809     }
2810 
2811     /* get device type from INQUIRY data */
2812     rc = get_device_type(s);
2813     if (rc < 0) {
2814         error_setg(errp, "INQUIRY failed");
2815         return;
2816     }
2817 
2818     /* Make a guess for the block size, we'll fix it when the guest sends.
2819      * READ CAPACITY.  If they don't, they likely would assume these sizes
2820      * anyway. (TODO: check in /sys).
2821      */
2822     if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
2823         s->qdev.blocksize = 2048;
2824     } else {
2825         s->qdev.blocksize = 512;
2826     }
2827 
2828     /* Makes the scsi-block device not removable by using HMP and QMP eject
2829      * command.
2830      */
2831     s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
2832 
2833     scsi_realize(&s->qdev, errp);
2834     scsi_generic_read_device_inquiry(&s->qdev);
2835 }
2836 
2837 typedef struct SCSIBlockReq {
2838     SCSIDiskReq req;
2839     sg_io_hdr_t io_header;
2840 
2841     /* Selected bytes of the original CDB, copied into our own CDB.  */
2842     uint8_t cmd, cdb1, group_number;
2843 
2844     /* CDB passed to SG_IO.  */
2845     uint8_t cdb[16];
2846     BlockCompletionFunc *cb;
2847     void *cb_opaque;
2848 } SCSIBlockReq;
2849 
2850 static void scsi_block_sgio_complete(void *opaque, int ret)
2851 {
2852     SCSIBlockReq *req = (SCSIBlockReq *)opaque;
2853     SCSIDiskReq *r = &req->req;
2854     sg_io_hdr_t *io_hdr = &req->io_header;
2855 
2856     if (ret == 0) {
2857         /* FIXME This skips calling req->cb() and any cleanup in it */
2858         if (io_hdr->host_status != SCSI_HOST_OK) {
2859             scsi_req_complete_failed(&r->req, io_hdr->host_status);
2860             scsi_req_unref(&r->req);
2861             return;
2862         }
2863 
2864         if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) {
2865             ret = BUSY;
2866         } else {
2867             ret = io_hdr->status;
2868         }
2869     }
2870 
2871     req->cb(req->cb_opaque, ret);
2872 }
2873 
2874 static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
2875                                       int64_t offset, QEMUIOVector *iov,
2876                                       int direction,
2877                                       BlockCompletionFunc *cb, void *opaque)
2878 {
2879     sg_io_hdr_t *io_header = &req->io_header;
2880     SCSIDiskReq *r = &req->req;
2881     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
2882     int nb_logical_blocks;
2883     uint64_t lba;
2884     BlockAIOCB *aiocb;
2885 
2886     /* This is not supported yet.  It can only happen if the guest does
2887      * reads and writes that are not aligned to one logical sectors
2888      * _and_ cover multiple MemoryRegions.
2889      */
2890     assert(offset % s->qdev.blocksize == 0);
2891     assert(iov->size % s->qdev.blocksize == 0);
2892 
2893     io_header->interface_id = 'S';
2894 
2895     /* The data transfer comes from the QEMUIOVector.  */
2896     io_header->dxfer_direction = direction;
2897     io_header->dxfer_len = iov->size;
2898     io_header->dxferp = (void *)iov->iov;
2899     io_header->iovec_count = iov->niov;
2900     assert(io_header->iovec_count == iov->niov); /* no overflow! */
2901 
2902     /* Build a new CDB with the LBA and length patched in, in case
2903      * DMA helpers split the transfer in multiple segments.  Do not
2904      * build a CDB smaller than what the guest wanted, and only build
2905      * a larger one if strictly necessary.
2906      */
2907     io_header->cmdp = req->cdb;
2908     lba = offset / s->qdev.blocksize;
2909     nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize;
2910 
2911     if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) {
2912         /* 6-byte CDB */
2913         stl_be_p(&req->cdb[0], lba | (req->cmd << 24));
2914         req->cdb[4] = nb_logical_blocks;
2915         req->cdb[5] = 0;
2916         io_header->cmd_len = 6;
2917     } else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) {
2918         /* 10-byte CDB */
2919         req->cdb[0] = (req->cmd & 0x1f) | 0x20;
2920         req->cdb[1] = req->cdb1;
2921         stl_be_p(&req->cdb[2], lba);
2922         req->cdb[6] = req->group_number;
2923         stw_be_p(&req->cdb[7], nb_logical_blocks);
2924         req->cdb[9] = 0;
2925         io_header->cmd_len = 10;
2926     } else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) {
2927         /* 12-byte CDB */
2928         req->cdb[0] = (req->cmd & 0x1f) | 0xA0;
2929         req->cdb[1] = req->cdb1;
2930         stl_be_p(&req->cdb[2], lba);
2931         stl_be_p(&req->cdb[6], nb_logical_blocks);
2932         req->cdb[10] = req->group_number;
2933         req->cdb[11] = 0;
2934         io_header->cmd_len = 12;
2935     } else {
2936         /* 16-byte CDB */
2937         req->cdb[0] = (req->cmd & 0x1f) | 0x80;
2938         req->cdb[1] = req->cdb1;
2939         stq_be_p(&req->cdb[2], lba);
2940         stl_be_p(&req->cdb[10], nb_logical_blocks);
2941         req->cdb[14] = req->group_number;
2942         req->cdb[15] = 0;
2943         io_header->cmd_len = 16;
2944     }
2945 
2946     /* The rest is as in scsi-generic.c.  */
2947     io_header->mx_sb_len = sizeof(r->req.sense);
2948     io_header->sbp = r->req.sense;
2949     io_header->timeout = s->qdev.io_timeout * 1000;
2950     io_header->usr_ptr = r;
2951     io_header->flags |= SG_FLAG_DIRECT_IO;
2952     req->cb = cb;
2953     req->cb_opaque = opaque;
2954     trace_scsi_disk_aio_sgio_command(r->req.tag, req->cdb[0], lba,
2955                                      nb_logical_blocks, io_header->timeout);
2956     aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, scsi_block_sgio_complete, req);
2957     assert(aiocb != NULL);
2958     return aiocb;
2959 }
2960 
2961 static bool scsi_block_no_fua(SCSICommand *cmd)
2962 {
2963     return false;
2964 }
2965 
2966 static BlockAIOCB *scsi_block_dma_readv(int64_t offset,
2967                                         QEMUIOVector *iov,
2968                                         BlockCompletionFunc *cb, void *cb_opaque,
2969                                         void *opaque)
2970 {
2971     SCSIBlockReq *r = opaque;
2972     return scsi_block_do_sgio(r, offset, iov,
2973                               SG_DXFER_FROM_DEV, cb, cb_opaque);
2974 }
2975 
2976 static BlockAIOCB *scsi_block_dma_writev(int64_t offset,
2977                                          QEMUIOVector *iov,
2978                                          BlockCompletionFunc *cb, void *cb_opaque,
2979                                          void *opaque)
2980 {
2981     SCSIBlockReq *r = opaque;
2982     return scsi_block_do_sgio(r, offset, iov,
2983                               SG_DXFER_TO_DEV, cb, cb_opaque);
2984 }
2985 
2986 static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
2987 {
2988     switch (buf[0]) {
2989     case VERIFY_10:
2990     case VERIFY_12:
2991     case VERIFY_16:
2992         /* Check if BYTCHK == 0x01 (data-out buffer contains data
2993          * for the number of logical blocks specified in the length
2994          * field).  For other modes, do not use scatter/gather operation.
2995          */
2996         if ((buf[1] & 6) == 2) {
2997             return false;
2998         }
2999         break;
3000 
3001     case READ_6:
3002     case READ_10:
3003     case READ_12:
3004     case READ_16:
3005     case WRITE_6:
3006     case WRITE_10:
3007     case WRITE_12:
3008     case WRITE_16:
3009     case WRITE_VERIFY_10:
3010     case WRITE_VERIFY_12:
3011     case WRITE_VERIFY_16:
3012         /* MMC writing cannot be done via DMA helpers, because it sometimes
3013          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
3014          * We might use scsi_block_dma_reqops as long as no writing commands are
3015          * seen, but performance usually isn't paramount on optical media.  So,
3016          * just make scsi-block operate the same as scsi-generic for them.
3017          */
3018         if (s->qdev.type != TYPE_ROM) {
3019             return false;
3020         }
3021         break;
3022 
3023     default:
3024         break;
3025     }
3026 
3027     return true;
3028 }
3029 
3030 
3031 static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf)
3032 {
3033     SCSIBlockReq *r = (SCSIBlockReq *)req;
3034     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
3035 
3036     r->cmd = req->cmd.buf[0];
3037     switch (r->cmd >> 5) {
3038     case 0:
3039         /* 6-byte CDB.  */
3040         r->cdb1 = r->group_number = 0;
3041         break;
3042     case 1:
3043         /* 10-byte CDB.  */
3044         r->cdb1 = req->cmd.buf[1];
3045         r->group_number = req->cmd.buf[6];
3046         break;
3047     case 4:
3048         /* 12-byte CDB.  */
3049         r->cdb1 = req->cmd.buf[1];
3050         r->group_number = req->cmd.buf[10];
3051         break;
3052     case 5:
3053         /* 16-byte CDB.  */
3054         r->cdb1 = req->cmd.buf[1];
3055         r->group_number = req->cmd.buf[14];
3056         break;
3057     default:
3058         abort();
3059     }
3060 
3061     /* Protection information is not supported.  For SCSI versions 2 and
3062      * older (as determined by snooping the guest's INQUIRY commands),
3063      * there is no RD/WR/VRPROTECT, so skip this check in these versions.
3064      */
3065     if (s->qdev.scsi_version > 2 && (req->cmd.buf[1] & 0xe0)) {
3066         scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD));
3067         return 0;
3068     }
3069 
3070     return scsi_disk_dma_command(req, buf);
3071 }
3072 
3073 static const SCSIReqOps scsi_block_dma_reqops = {
3074     .size         = sizeof(SCSIBlockReq),
3075     .free_req     = scsi_free_request,
3076     .send_command = scsi_block_dma_command,
3077     .read_data    = scsi_read_data,
3078     .write_data   = scsi_write_data,
3079     .get_buf      = scsi_get_buf,
3080     .load_request = scsi_disk_load_request,
3081     .save_request = scsi_disk_save_request,
3082 };
3083 
3084 static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
3085                                            uint32_t lun, uint8_t *buf,
3086                                            void *hba_private)
3087 {
3088     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3089 
3090     if (scsi_block_is_passthrough(s, buf)) {
3091         return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
3092                               hba_private);
3093     } else {
3094         return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun,
3095                               hba_private);
3096     }
3097 }
3098 
3099 static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
3100                                   uint8_t *buf, size_t buf_len,
3101                                   void *hba_private)
3102 {
3103     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3104 
3105     if (scsi_block_is_passthrough(s, buf)) {
3106         return scsi_bus_parse_cdb(&s->qdev, cmd, buf, buf_len, hba_private);
3107     } else {
3108         return scsi_req_parse_cdb(&s->qdev, cmd, buf, buf_len);
3109     }
3110 }
3111 
3112 static void scsi_block_update_sense(SCSIRequest *req)
3113 {
3114     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
3115     SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r);
3116     r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense));
3117 }
3118 #endif
3119 
3120 static
3121 BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
3122                            BlockCompletionFunc *cb, void *cb_opaque,
3123                            void *opaque)
3124 {
3125     SCSIDiskReq *r = opaque;
3126     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3127     return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3128 }
3129 
3130 static
3131 BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
3132                             BlockCompletionFunc *cb, void *cb_opaque,
3133                             void *opaque)
3134 {
3135     SCSIDiskReq *r = opaque;
3136     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3137     return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3138 }
3139 
3140 static char *scsi_property_get_loadparm(Object *obj, Error **errp)
3141 {
3142     return g_strdup(SCSI_DISK_BASE(obj)->loadparm);
3143 }
3144 
3145 static void scsi_property_set_loadparm(Object *obj, const char *value,
3146                                        Error **errp)
3147 {
3148     void *lp_str;
3149 
3150     if (object_property_get_int(obj, "bootindex", NULL) < 0) {
3151         error_setg(errp, "'loadparm' is only valid for boot devices");
3152         return;
3153     }
3154 
3155     lp_str = g_malloc0(strlen(value) + 1);
3156     if (!qdev_prop_sanitize_s390x_loadparm(lp_str, value, errp)) {
3157         g_free(lp_str);
3158         return;
3159     }
3160     SCSI_DISK_BASE(obj)->loadparm = lp_str;
3161 }
3162 
3163 static void scsi_property_add_specifics(DeviceClass *dc)
3164 {
3165     ObjectClass *oc = OBJECT_CLASS(dc);
3166 
3167     /* The loadparm property is only supported on s390x */
3168     if (arch_type & QEMU_ARCH_S390X) {
3169         object_class_property_add_str(oc, "loadparm",
3170                                       scsi_property_get_loadparm,
3171                                       scsi_property_set_loadparm);
3172         object_class_property_set_description(oc, "loadparm",
3173                                               "load parameter (s390x only)");
3174     }
3175 }
3176 
3177 static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
3178 {
3179     DeviceClass *dc = DEVICE_CLASS(klass);
3180     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3181 
3182     dc->fw_name = "disk";
3183     device_class_set_legacy_reset(dc, scsi_disk_reset);
3184     sdc->dma_readv = scsi_dma_readv;
3185     sdc->dma_writev = scsi_dma_writev;
3186     sdc->need_fua_emulation = scsi_is_cmd_fua;
3187 }
3188 
3189 static const TypeInfo scsi_disk_base_info = {
3190     .name          = TYPE_SCSI_DISK_BASE,
3191     .parent        = TYPE_SCSI_DEVICE,
3192     .class_init    = scsi_disk_base_class_initfn,
3193     .instance_size = sizeof(SCSIDiskState),
3194     .class_size    = sizeof(SCSIDiskClass),
3195     .abstract      = true,
3196 };
3197 
3198 #define DEFINE_SCSI_DISK_PROPERTIES()                                   \
3199     DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk),  \
3200     DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf),             \
3201     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),            \
3202     DEFINE_PROP_STRING("ver", SCSIDiskState, version),                  \
3203     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
3204     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
3205     DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
3206     DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id),          \
3207     DEFINE_PROP_BOOL("migrate-emulated-scsi-request", SCSIDiskState, migrate_emulated_scsi_request, true)
3208 
3209 
3210 static Property scsi_hd_properties[] = {
3211     DEFINE_SCSI_DISK_PROPERTIES(),
3212     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
3213                     SCSI_DISK_F_REMOVABLE, false),
3214     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
3215                     SCSI_DISK_F_DPOFUA, false),
3216     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3217     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3218     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3219     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3220                        DEFAULT_MAX_UNMAP_SIZE),
3221     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3222                        DEFAULT_MAX_IO_SIZE),
3223     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3224     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3225                       5),
3226     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3227                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3228                     0),
3229     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
3230     DEFINE_PROP_END_OF_LIST(),
3231 };
3232 
3233 static const VMStateDescription vmstate_scsi_disk_state = {
3234     .name = "scsi-disk",
3235     .version_id = 1,
3236     .minimum_version_id = 1,
3237     .fields = (const VMStateField[]) {
3238         VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState),
3239         VMSTATE_BOOL(media_changed, SCSIDiskState),
3240         VMSTATE_BOOL(media_event, SCSIDiskState),
3241         VMSTATE_BOOL(eject_request, SCSIDiskState),
3242         VMSTATE_BOOL(tray_open, SCSIDiskState),
3243         VMSTATE_BOOL(tray_locked, SCSIDiskState),
3244         VMSTATE_END_OF_LIST()
3245     }
3246 };
3247 
3248 static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
3249 {
3250     DeviceClass *dc = DEVICE_CLASS(klass);
3251     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3252 
3253     sc->realize      = scsi_hd_realize;
3254     sc->unrealize    = scsi_unrealize;
3255     sc->alloc_req    = scsi_new_request;
3256     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3257     dc->desc = "virtual SCSI disk";
3258     device_class_set_props(dc, scsi_hd_properties);
3259     dc->vmsd  = &vmstate_scsi_disk_state;
3260 
3261     scsi_property_add_specifics(dc);
3262 }
3263 
3264 static const TypeInfo scsi_hd_info = {
3265     .name          = "scsi-hd",
3266     .parent        = TYPE_SCSI_DISK_BASE,
3267     .class_init    = scsi_hd_class_initfn,
3268 };
3269 
3270 static Property scsi_cd_properties[] = {
3271     DEFINE_SCSI_DISK_PROPERTIES(),
3272     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3273     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3274     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3275     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3276                        DEFAULT_MAX_IO_SIZE),
3277     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3278                       5),
3279     DEFINE_PROP_BIT("quirk_mode_page_apple_vendor", SCSIDiskState, quirks,
3280                     SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR, 0),
3281     DEFINE_PROP_BIT("quirk_mode_sense_rom_use_dbd", SCSIDiskState, quirks,
3282                     SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD, 0),
3283     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3284                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3285                     0),
3286     DEFINE_PROP_BIT("quirk_mode_page_truncated", SCSIDiskState, quirks,
3287                     SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED, 0),
3288     DEFINE_PROP_END_OF_LIST(),
3289 };
3290 
3291 static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
3292 {
3293     DeviceClass *dc = DEVICE_CLASS(klass);
3294     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3295 
3296     sc->realize      = scsi_cd_realize;
3297     sc->alloc_req    = scsi_new_request;
3298     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3299     dc->desc = "virtual SCSI CD-ROM";
3300     device_class_set_props(dc, scsi_cd_properties);
3301     dc->vmsd  = &vmstate_scsi_disk_state;
3302 
3303     scsi_property_add_specifics(dc);
3304 }
3305 
3306 static const TypeInfo scsi_cd_info = {
3307     .name          = "scsi-cd",
3308     .parent        = TYPE_SCSI_DISK_BASE,
3309     .class_init    = scsi_cd_class_initfn,
3310 };
3311 
3312 #ifdef __linux__
3313 static Property scsi_block_properties[] = {
3314     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),
3315     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
3316     DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false),
3317     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3318     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3319                        DEFAULT_MAX_UNMAP_SIZE),
3320     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3321                        DEFAULT_MAX_IO_SIZE),
3322     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3323                       -1),
3324     DEFINE_PROP_UINT32("io_timeout", SCSIDiskState, qdev.io_timeout,
3325                        DEFAULT_IO_TIMEOUT),
3326     DEFINE_PROP_END_OF_LIST(),
3327 };
3328 
3329 static void scsi_block_class_initfn(ObjectClass *klass, void *data)
3330 {
3331     DeviceClass *dc = DEVICE_CLASS(klass);
3332     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3333     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3334 
3335     sc->realize      = scsi_block_realize;
3336     sc->alloc_req    = scsi_block_new_request;
3337     sc->parse_cdb    = scsi_block_parse_cdb;
3338     sdc->dma_readv   = scsi_block_dma_readv;
3339     sdc->dma_writev  = scsi_block_dma_writev;
3340     sdc->update_sense = scsi_block_update_sense;
3341     sdc->need_fua_emulation = scsi_block_no_fua;
3342     dc->desc = "SCSI block device passthrough";
3343     device_class_set_props(dc, scsi_block_properties);
3344     dc->vmsd  = &vmstate_scsi_disk_state;
3345 }
3346 
3347 static const TypeInfo scsi_block_info = {
3348     .name          = "scsi-block",
3349     .parent        = TYPE_SCSI_DISK_BASE,
3350     .class_init    = scsi_block_class_initfn,
3351 };
3352 #endif
3353 
3354 static void scsi_disk_register_types(void)
3355 {
3356     type_register_static(&scsi_disk_base_info);
3357     type_register_static(&scsi_hd_info);
3358     type_register_static(&scsi_cd_info);
3359 #ifdef __linux__
3360     type_register_static(&scsi_block_info);
3361 #endif
3362 }
3363 
3364 type_init(scsi_disk_register_types)
3365