xref: /openbmc/qemu/hw/scsi/scsi-disk.c (revision c700d068)
1 /*
2  * SCSI Device emulation
3  *
4  * Copyright (c) 2006 CodeSourcery.
5  * Based on code by Fabrice Bellard
6  *
7  * Written by Paul Brook
8  * Modifications:
9  *  2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case
10  *                                 when the allocation length of CDB is smaller
11  *                                 than 36.
12  *  2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the
13  *                                 MODE SENSE response.
14  *
15  * This code is licensed under the LGPL.
16  *
17  * Note that this file only handles the SCSI architecture model and device
18  * commands.  Emulation of interface/link layer protocols is handled by
19  * the host adapter emulator.
20  */
21 
22 #include "qemu/osdep.h"
23 #include "qemu/units.h"
24 #include "qapi/error.h"
25 #include "qemu/error-report.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/module.h"
28 #include "qemu/hw-version.h"
29 #include "qemu/memalign.h"
30 #include "hw/scsi/scsi.h"
31 #include "migration/qemu-file-types.h"
32 #include "migration/vmstate.h"
33 #include "hw/scsi/emulation.h"
34 #include "scsi/constants.h"
35 #include "sysemu/block-backend.h"
36 #include "sysemu/blockdev.h"
37 #include "hw/block/block.h"
38 #include "hw/qdev-properties.h"
39 #include "hw/qdev-properties-system.h"
40 #include "sysemu/dma.h"
41 #include "sysemu/sysemu.h"
42 #include "qemu/cutils.h"
43 #include "trace.h"
44 #include "qom/object.h"
45 
46 #ifdef __linux
47 #include <scsi/sg.h>
48 #endif
49 
50 #define SCSI_WRITE_SAME_MAX         (512 * KiB)
51 #define SCSI_DMA_BUF_SIZE           (128 * KiB)
52 #define SCSI_MAX_INQUIRY_LEN        256
53 #define SCSI_MAX_MODE_LEN           256
54 
55 #define DEFAULT_DISCARD_GRANULARITY (4 * KiB)
56 #define DEFAULT_MAX_UNMAP_SIZE      (1 * GiB)
57 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
58 
59 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
60 
61 #define MAX_SERIAL_LEN              36
62 #define MAX_SERIAL_LEN_FOR_DEVID    20
63 
64 OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
65 
66 struct SCSIDiskClass {
67     SCSIDeviceClass parent_class;
68     /*
69      * Callbacks receive ret == 0 for success. Errors are represented either as
70      * negative errno values, or as positive SAM status codes.
71      *
72      * Beware: For errors returned in host_status, the function may directly
73      * complete the request and never call the callback.
74      */
75     DMAIOFunc       *dma_readv;
76     DMAIOFunc       *dma_writev;
77     bool            (*need_fua_emulation)(SCSICommand *cmd);
78     void            (*update_sense)(SCSIRequest *r);
79 };
80 
81 typedef struct SCSIDiskReq {
82     SCSIRequest req;
83     /* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes.  */
84     uint64_t sector;
85     uint32_t sector_count;
86     uint32_t buflen;
87     bool started;
88     bool need_fua_emulation;
89     struct iovec iov;
90     QEMUIOVector qiov;
91     BlockAcctCookie acct;
92 } SCSIDiskReq;
93 
94 #define SCSI_DISK_F_REMOVABLE             0
95 #define SCSI_DISK_F_DPOFUA                1
96 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
97 
98 struct SCSIDiskState {
99     SCSIDevice qdev;
100     uint32_t features;
101     bool media_changed;
102     bool media_event;
103     bool eject_request;
104     uint16_t port_index;
105     uint64_t max_unmap_size;
106     uint64_t max_io_size;
107     uint32_t quirks;
108     QEMUBH *bh;
109     char *version;
110     char *serial;
111     char *vendor;
112     char *product;
113     char *device_id;
114     bool tray_open;
115     bool tray_locked;
116     /*
117      * 0x0000        - rotation rate not reported
118      * 0x0001        - non-rotating medium (SSD)
119      * 0x0002-0x0400 - reserved
120      * 0x0401-0xffe  - rotations per minute
121      * 0xffff        - reserved
122      */
123     uint16_t rotation_rate;
124     bool migrate_emulated_scsi_request;
125 };
126 
127 static void scsi_free_request(SCSIRequest *req)
128 {
129     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
130 
131     qemu_vfree(r->iov.iov_base);
132 }
133 
134 /* Helper function for command completion with sense.  */
135 static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense)
136 {
137     trace_scsi_disk_check_condition(r->req.tag, sense.key, sense.asc,
138                                     sense.ascq);
139     scsi_req_build_sense(&r->req, sense);
140     scsi_req_complete(&r->req, CHECK_CONDITION);
141 }
142 
143 static void scsi_init_iovec(SCSIDiskReq *r, size_t size)
144 {
145     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
146 
147     if (!r->iov.iov_base) {
148         r->buflen = size;
149         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
150     }
151     r->iov.iov_len = MIN(r->sector_count * BDRV_SECTOR_SIZE, r->buflen);
152     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
153 }
154 
155 static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
156 {
157     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
158 
159     qemu_put_be64s(f, &r->sector);
160     qemu_put_be32s(f, &r->sector_count);
161     qemu_put_be32s(f, &r->buflen);
162     if (r->buflen) {
163         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
164             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
165         } else if (!req->retry) {
166             uint32_t len = r->iov.iov_len;
167             qemu_put_be32s(f, &len);
168             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
169         }
170     }
171 }
172 
173 static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
174 {
175     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
176 
177     if (s->migrate_emulated_scsi_request) {
178         scsi_disk_save_request(f, req);
179     }
180 }
181 
182 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
183 {
184     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
185 
186     qemu_get_be64s(f, &r->sector);
187     qemu_get_be32s(f, &r->sector_count);
188     qemu_get_be32s(f, &r->buflen);
189     if (r->buflen) {
190         scsi_init_iovec(r, r->buflen);
191         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
192             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
193         } else if (!r->req.retry) {
194             uint32_t len;
195             qemu_get_be32s(f, &len);
196             r->iov.iov_len = len;
197             assert(r->iov.iov_len <= r->buflen);
198             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
199         }
200     }
201 
202     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
203 }
204 
205 static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
206 {
207     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
208 
209     if (s->migrate_emulated_scsi_request) {
210         scsi_disk_load_request(f, req);
211     }
212 }
213 
214 /*
215  * scsi_handle_rw_error has two return values.  False means that the error
216  * must be ignored, true means that the error has been processed and the
217  * caller should not do anything else for this request.  Note that
218  * scsi_handle_rw_error always manages its reference counts, independent
219  * of the return value.
220  */
221 static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
222 {
223     bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
224     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
225     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
226     SCSISense sense = SENSE_CODE(NO_SENSE);
227     int error;
228     bool req_has_sense = false;
229     BlockErrorAction action;
230     int status;
231 
232     if (ret < 0) {
233         status = scsi_sense_from_errno(-ret, &sense);
234         error = -ret;
235     } else {
236         /* A passthrough command has completed with nonzero status.  */
237         status = ret;
238         switch (status) {
239         case CHECK_CONDITION:
240             req_has_sense = true;
241             error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
242             break;
243         case RESERVATION_CONFLICT:
244             /*
245              * Don't apply the error policy, always report to the guest.
246              *
247              * This is a passthrough code path, so it's not a backend error, but
248              * a response to an invalid guest request.
249              *
250              * Windows Failover Cluster validation intentionally sends invalid
251              * requests to verify that reservations work as intended. It is
252              * crucial that it sees the resulting errors.
253              *
254              * Treating a reservation conflict as a guest-side error is obvious
255              * when a pr-manager is in use. Without one, the situation is less
256              * clear, but there might be nothing that can be fixed on the host
257              * (like in the above example), and we don't want to be stuck in a
258              * loop where resuming the VM and retrying the request immediately
259              * stops it again. So always reporting is still the safer option in
260              * this case, too.
261              */
262             error = 0;
263             break;
264         default:
265             error = EINVAL;
266             break;
267         }
268     }
269 
270     /*
271      * Check whether the error has to be handled by the guest or should
272      * rather follow the rerror=/werror= settings.  Guest-handled errors
273      * are usually retried immediately, so do not post them to QMP and
274      * do not account them as failed I/O.
275      */
276     if (!error || (req_has_sense &&
277                    scsi_sense_buf_is_guest_recoverable(r->req.sense,
278                                                        sizeof(r->req.sense)))) {
279         action = BLOCK_ERROR_ACTION_REPORT;
280         acct_failed = false;
281     } else {
282         action = blk_get_error_action(s->qdev.conf.blk, is_read, error);
283         blk_error_action(s->qdev.conf.blk, action, is_read, error);
284     }
285 
286     switch (action) {
287     case BLOCK_ERROR_ACTION_REPORT:
288         if (acct_failed) {
289             block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
290         }
291         if (req_has_sense) {
292             sdc->update_sense(&r->req);
293         } else if (status == CHECK_CONDITION) {
294             scsi_req_build_sense(&r->req, sense);
295         }
296         scsi_req_complete(&r->req, status);
297         return true;
298 
299     case BLOCK_ERROR_ACTION_IGNORE:
300         return false;
301 
302     case BLOCK_ERROR_ACTION_STOP:
303         scsi_req_retry(&r->req);
304         return true;
305 
306     default:
307         g_assert_not_reached();
308     }
309 }
310 
311 static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
312 {
313     if (r->req.io_canceled) {
314         scsi_req_cancel_complete(&r->req);
315         return true;
316     }
317 
318     if (ret != 0) {
319         return scsi_handle_rw_error(r, ret, acct_failed);
320     }
321 
322     return false;
323 }
324 
325 static void scsi_aio_complete(void *opaque, int ret)
326 {
327     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
328     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
329 
330     /* The request must only run in the BlockBackend's AioContext */
331     assert(blk_get_aio_context(s->qdev.conf.blk) ==
332            qemu_get_current_aio_context());
333 
334     assert(r->req.aiocb != NULL);
335     r->req.aiocb = NULL;
336 
337     if (scsi_disk_req_check_error(r, ret, true)) {
338         goto done;
339     }
340 
341     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
342     scsi_req_complete(&r->req, GOOD);
343 
344 done:
345     scsi_req_unref(&r->req);
346 }
347 
348 static bool scsi_is_cmd_fua(SCSICommand *cmd)
349 {
350     switch (cmd->buf[0]) {
351     case READ_10:
352     case READ_12:
353     case READ_16:
354     case WRITE_10:
355     case WRITE_12:
356     case WRITE_16:
357         return (cmd->buf[1] & 8) != 0;
358 
359     case VERIFY_10:
360     case VERIFY_12:
361     case VERIFY_16:
362     case WRITE_VERIFY_10:
363     case WRITE_VERIFY_12:
364     case WRITE_VERIFY_16:
365         return true;
366 
367     case READ_6:
368     case WRITE_6:
369     default:
370         return false;
371     }
372 }
373 
374 static void scsi_write_do_fua(SCSIDiskReq *r)
375 {
376     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
377 
378     assert(r->req.aiocb == NULL);
379     assert(!r->req.io_canceled);
380 
381     if (r->need_fua_emulation) {
382         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
383                          BLOCK_ACCT_FLUSH);
384         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
385         return;
386     }
387 
388     scsi_req_complete(&r->req, GOOD);
389     scsi_req_unref(&r->req);
390 }
391 
392 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
393 {
394     assert(r->req.aiocb == NULL);
395     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
396         goto done;
397     }
398 
399     r->sector += r->sector_count;
400     r->sector_count = 0;
401     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
402         scsi_write_do_fua(r);
403         return;
404     } else {
405         scsi_req_complete(&r->req, GOOD);
406     }
407 
408 done:
409     scsi_req_unref(&r->req);
410 }
411 
412 /* May not be called in all error cases, don't rely on cleanup here */
413 static void scsi_dma_complete(void *opaque, int ret)
414 {
415     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
416     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
417 
418     assert(r->req.aiocb != NULL);
419     r->req.aiocb = NULL;
420 
421     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
422     if (ret < 0) {
423         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
424     } else if (ret == 0) {
425         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
426     }
427     scsi_dma_complete_noio(r, ret);
428 }
429 
430 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
431 {
432     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
433     uint32_t n;
434 
435     /* The request must only run in the BlockBackend's AioContext */
436     assert(blk_get_aio_context(s->qdev.conf.blk) ==
437            qemu_get_current_aio_context());
438 
439     assert(r->req.aiocb == NULL);
440     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
441         goto done;
442     }
443 
444     n = r->qiov.size / BDRV_SECTOR_SIZE;
445     r->sector += n;
446     r->sector_count -= n;
447     scsi_req_data(&r->req, r->qiov.size);
448 
449 done:
450     scsi_req_unref(&r->req);
451 }
452 
453 /* May not be called in all error cases, don't rely on cleanup here */
454 static void scsi_read_complete(void *opaque, int ret)
455 {
456     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
457     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
458 
459     assert(r->req.aiocb != NULL);
460     r->req.aiocb = NULL;
461 
462     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
463     if (ret < 0) {
464         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
465     } else if (ret == 0) {
466         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
467         trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
468     }
469     scsi_read_complete_noio(r, ret);
470 }
471 
472 /* Actually issue a read to the block device.  */
473 static void scsi_do_read(SCSIDiskReq *r, int ret)
474 {
475     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
476     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
477 
478     assert (r->req.aiocb == NULL);
479     if (scsi_disk_req_check_error(r, ret, false)) {
480         goto done;
481     }
482 
483     /* The request is used as the AIO opaque value, so add a ref.  */
484     scsi_req_ref(&r->req);
485 
486     if (r->req.sg) {
487         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
488         r->req.residual -= r->req.sg->size;
489         r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
490                                   r->req.sg, r->sector << BDRV_SECTOR_BITS,
491                                   BDRV_SECTOR_SIZE,
492                                   sdc->dma_readv, r, scsi_dma_complete, r,
493                                   DMA_DIRECTION_FROM_DEVICE);
494     } else {
495         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
496         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
497                          r->qiov.size, BLOCK_ACCT_READ);
498         r->req.aiocb = sdc->dma_readv(r->sector << BDRV_SECTOR_BITS, &r->qiov,
499                                       scsi_read_complete, r, r);
500     }
501 
502 done:
503     scsi_req_unref(&r->req);
504 }
505 
506 static void scsi_do_read_cb(void *opaque, int ret)
507 {
508     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
509     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
510 
511     assert (r->req.aiocb != NULL);
512     r->req.aiocb = NULL;
513 
514     if (ret < 0) {
515         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
516     } else {
517         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
518     }
519     scsi_do_read(opaque, ret);
520 }
521 
522 /* Read more data from scsi device into buffer.  */
523 static void scsi_read_data(SCSIRequest *req)
524 {
525     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
526     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
527     bool first;
528 
529     trace_scsi_disk_read_data_count(r->sector_count);
530     if (r->sector_count == 0) {
531         /* This also clears the sense buffer for REQUEST SENSE.  */
532         scsi_req_complete(&r->req, GOOD);
533         return;
534     }
535 
536     /* No data transfer may already be in progress */
537     assert(r->req.aiocb == NULL);
538 
539     /* The request is used as the AIO opaque value, so add a ref.  */
540     scsi_req_ref(&r->req);
541     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
542         trace_scsi_disk_read_data_invalid();
543         scsi_read_complete_noio(r, -EINVAL);
544         return;
545     }
546 
547     if (!blk_is_available(req->dev->conf.blk)) {
548         scsi_read_complete_noio(r, -ENOMEDIUM);
549         return;
550     }
551 
552     first = !r->started;
553     r->started = true;
554     if (first && r->need_fua_emulation) {
555         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
556                          BLOCK_ACCT_FLUSH);
557         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
558     } else {
559         scsi_do_read(r, 0);
560     }
561 }
562 
563 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
564 {
565     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
566     uint32_t n;
567 
568     /* The request must only run in the BlockBackend's AioContext */
569     assert(blk_get_aio_context(s->qdev.conf.blk) ==
570            qemu_get_current_aio_context());
571 
572     assert (r->req.aiocb == NULL);
573     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
574         goto done;
575     }
576 
577     n = r->qiov.size / BDRV_SECTOR_SIZE;
578     r->sector += n;
579     r->sector_count -= n;
580     if (r->sector_count == 0) {
581         scsi_write_do_fua(r);
582         return;
583     } else {
584         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
585         trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
586         scsi_req_data(&r->req, r->qiov.size);
587     }
588 
589 done:
590     scsi_req_unref(&r->req);
591 }
592 
593 /* May not be called in all error cases, don't rely on cleanup here */
594 static void scsi_write_complete(void * opaque, int ret)
595 {
596     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
597     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
598 
599     assert (r->req.aiocb != NULL);
600     r->req.aiocb = NULL;
601 
602     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
603     if (ret < 0) {
604         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
605     } else if (ret == 0) {
606         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
607     }
608     scsi_write_complete_noio(r, ret);
609 }
610 
611 static void scsi_write_data(SCSIRequest *req)
612 {
613     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
614     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
615     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
616 
617     /* No data transfer may already be in progress */
618     assert(r->req.aiocb == NULL);
619 
620     /* The request is used as the AIO opaque value, so add a ref.  */
621     scsi_req_ref(&r->req);
622     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
623         trace_scsi_disk_write_data_invalid();
624         scsi_write_complete_noio(r, -EINVAL);
625         return;
626     }
627 
628     if (!r->req.sg && !r->qiov.size) {
629         /* Called for the first time.  Ask the driver to send us more data.  */
630         r->started = true;
631         scsi_write_complete_noio(r, 0);
632         return;
633     }
634     if (!blk_is_available(req->dev->conf.blk)) {
635         scsi_write_complete_noio(r, -ENOMEDIUM);
636         return;
637     }
638 
639     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
640         r->req.cmd.buf[0] == VERIFY_16) {
641         if (r->req.sg) {
642             scsi_dma_complete_noio(r, 0);
643         } else {
644             scsi_write_complete_noio(r, 0);
645         }
646         return;
647     }
648 
649     if (r->req.sg) {
650         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
651         r->req.residual -= r->req.sg->size;
652         r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
653                                   r->req.sg, r->sector << BDRV_SECTOR_BITS,
654                                   BDRV_SECTOR_SIZE,
655                                   sdc->dma_writev, r, scsi_dma_complete, r,
656                                   DMA_DIRECTION_TO_DEVICE);
657     } else {
658         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
659                          r->qiov.size, BLOCK_ACCT_WRITE);
660         r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
661                                        scsi_write_complete, r, r);
662     }
663 }
664 
665 /* Return a pointer to the data buffer.  */
666 static uint8_t *scsi_get_buf(SCSIRequest *req)
667 {
668     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
669 
670     return (uint8_t *)r->iov.iov_base;
671 }
672 
673 static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
674 {
675     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
676     uint8_t page_code = req->cmd.buf[2];
677     int start, buflen = 0;
678 
679     outbuf[buflen++] = s->qdev.type & 0x1f;
680     outbuf[buflen++] = page_code;
681     outbuf[buflen++] = 0x00;
682     outbuf[buflen++] = 0x00;
683     start = buflen;
684 
685     switch (page_code) {
686     case 0x00: /* Supported page codes, mandatory */
687     {
688         trace_scsi_disk_emulate_vpd_page_00(req->cmd.xfer);
689         outbuf[buflen++] = 0x00; /* list of supported pages (this page) */
690         if (s->serial) {
691             outbuf[buflen++] = 0x80; /* unit serial number */
692         }
693         outbuf[buflen++] = 0x83; /* device identification */
694         if (s->qdev.type == TYPE_DISK) {
695             outbuf[buflen++] = 0xb0; /* block limits */
696             outbuf[buflen++] = 0xb1; /* block device characteristics */
697             outbuf[buflen++] = 0xb2; /* thin provisioning */
698         }
699         break;
700     }
701     case 0x80: /* Device serial number, optional */
702     {
703         int l;
704 
705         if (!s->serial) {
706             trace_scsi_disk_emulate_vpd_page_80_not_supported();
707             return -1;
708         }
709 
710         l = strlen(s->serial);
711         if (l > MAX_SERIAL_LEN) {
712             l = MAX_SERIAL_LEN;
713         }
714 
715         trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer);
716         memcpy(outbuf + buflen, s->serial, l);
717         buflen += l;
718         break;
719     }
720 
721     case 0x83: /* Device identification page, mandatory */
722     {
723         int id_len = s->device_id ? MIN(strlen(s->device_id), 255 - 8) : 0;
724 
725         trace_scsi_disk_emulate_vpd_page_83(req->cmd.xfer);
726 
727         if (id_len) {
728             outbuf[buflen++] = 0x2; /* ASCII */
729             outbuf[buflen++] = 0;   /* not officially assigned */
730             outbuf[buflen++] = 0;   /* reserved */
731             outbuf[buflen++] = id_len; /* length of data following */
732             memcpy(outbuf + buflen, s->device_id, id_len);
733             buflen += id_len;
734         }
735 
736         if (s->qdev.wwn) {
737             outbuf[buflen++] = 0x1; /* Binary */
738             outbuf[buflen++] = 0x3; /* NAA */
739             outbuf[buflen++] = 0;   /* reserved */
740             outbuf[buflen++] = 8;
741             stq_be_p(&outbuf[buflen], s->qdev.wwn);
742             buflen += 8;
743         }
744 
745         if (s->qdev.port_wwn) {
746             outbuf[buflen++] = 0x61; /* SAS / Binary */
747             outbuf[buflen++] = 0x93; /* PIV / Target port / NAA */
748             outbuf[buflen++] = 0;    /* reserved */
749             outbuf[buflen++] = 8;
750             stq_be_p(&outbuf[buflen], s->qdev.port_wwn);
751             buflen += 8;
752         }
753 
754         if (s->port_index) {
755             outbuf[buflen++] = 0x61; /* SAS / Binary */
756 
757             /* PIV/Target port/relative target port */
758             outbuf[buflen++] = 0x94;
759 
760             outbuf[buflen++] = 0;    /* reserved */
761             outbuf[buflen++] = 4;
762             stw_be_p(&outbuf[buflen + 2], s->port_index);
763             buflen += 4;
764         }
765         break;
766     }
767     case 0xb0: /* block limits */
768     {
769         SCSIBlockLimits bl = {};
770 
771         if (s->qdev.type == TYPE_ROM) {
772             trace_scsi_disk_emulate_vpd_page_b0_not_supported();
773             return -1;
774         }
775         bl.wsnz = 1;
776         bl.unmap_sectors =
777             s->qdev.conf.discard_granularity / s->qdev.blocksize;
778         bl.min_io_size =
779             s->qdev.conf.min_io_size / s->qdev.blocksize;
780         bl.opt_io_size =
781             s->qdev.conf.opt_io_size / s->qdev.blocksize;
782         bl.max_unmap_sectors =
783             s->max_unmap_size / s->qdev.blocksize;
784         bl.max_io_sectors =
785             s->max_io_size / s->qdev.blocksize;
786         /* 255 descriptors fit in 4 KiB with an 8-byte header */
787         bl.max_unmap_descr = 255;
788 
789         if (s->qdev.type == TYPE_DISK) {
790             int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
791             int max_io_sectors_blk =
792                 max_transfer_blk / s->qdev.blocksize;
793 
794             bl.max_io_sectors =
795                 MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
796         }
797         buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
798         break;
799     }
800     case 0xb1: /* block device characteristics */
801     {
802         buflen = 0x40;
803         outbuf[4] = (s->rotation_rate >> 8) & 0xff;
804         outbuf[5] = s->rotation_rate & 0xff;
805         outbuf[6] = 0; /* PRODUCT TYPE */
806         outbuf[7] = 0; /* WABEREQ | WACEREQ | NOMINAL FORM FACTOR */
807         outbuf[8] = 0; /* VBULS */
808         break;
809     }
810     case 0xb2: /* thin provisioning */
811     {
812         buflen = 8;
813         outbuf[4] = 0;
814         outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */
815         outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
816         outbuf[7] = 0;
817         break;
818     }
819     default:
820         return -1;
821     }
822     /* done with EVPD */
823     assert(buflen - start <= 255);
824     outbuf[start - 1] = buflen - start;
825     return buflen;
826 }
827 
828 static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
829 {
830     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
831     int buflen = 0;
832 
833     if (req->cmd.buf[1] & 0x1) {
834         /* Vital product data */
835         return scsi_disk_emulate_vpd_page(req, outbuf);
836     }
837 
838     /* Standard INQUIRY data */
839     if (req->cmd.buf[2] != 0) {
840         return -1;
841     }
842 
843     /* PAGE CODE == 0 */
844     buflen = req->cmd.xfer;
845     if (buflen > SCSI_MAX_INQUIRY_LEN) {
846         buflen = SCSI_MAX_INQUIRY_LEN;
847     }
848 
849     outbuf[0] = s->qdev.type & 0x1f;
850     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
851 
852     strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
853     strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
854 
855     memset(&outbuf[32], 0, 4);
856     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
857     /*
858      * We claim conformance to SPC-3, which is required for guests
859      * to ask for modern features like READ CAPACITY(16) or the
860      * block characteristics VPD page by default.  Not all of SPC-3
861      * is actually implemented, but we're good enough.
862      */
863     outbuf[2] = s->qdev.default_scsi_version;
864     outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
865 
866     if (buflen > 36) {
867         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
868     } else {
869         /* If the allocation length of CDB is too small,
870                the additional length is not adjusted */
871         outbuf[4] = 36 - 5;
872     }
873 
874     /* Sync data transfer and TCQ.  */
875     outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
876     return buflen;
877 }
878 
879 static inline bool media_is_dvd(SCSIDiskState *s)
880 {
881     uint64_t nb_sectors;
882     if (s->qdev.type != TYPE_ROM) {
883         return false;
884     }
885     if (!blk_is_available(s->qdev.conf.blk)) {
886         return false;
887     }
888     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
889     return nb_sectors > CD_MAX_SECTORS;
890 }
891 
892 static inline bool media_is_cd(SCSIDiskState *s)
893 {
894     uint64_t nb_sectors;
895     if (s->qdev.type != TYPE_ROM) {
896         return false;
897     }
898     if (!blk_is_available(s->qdev.conf.blk)) {
899         return false;
900     }
901     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
902     return nb_sectors <= CD_MAX_SECTORS;
903 }
904 
905 static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
906                                       uint8_t *outbuf)
907 {
908     uint8_t type = r->req.cmd.buf[1] & 7;
909 
910     if (s->qdev.type != TYPE_ROM) {
911         return -1;
912     }
913 
914     /* Types 1/2 are only defined for Blu-Ray.  */
915     if (type != 0) {
916         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
917         return -1;
918     }
919 
920     memset(outbuf, 0, 34);
921     outbuf[1] = 32;
922     outbuf[2] = 0xe; /* last session complete, disc finalized */
923     outbuf[3] = 1;   /* first track on disc */
924     outbuf[4] = 1;   /* # of sessions */
925     outbuf[5] = 1;   /* first track of last session */
926     outbuf[6] = 1;   /* last track of last session */
927     outbuf[7] = 0x20; /* unrestricted use */
928     outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
929     /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
930     /* 12-23: not meaningful for CD-ROM or DVD-ROM */
931     /* 24-31: disc bar code */
932     /* 32: disc application code */
933     /* 33: number of OPC tables */
934 
935     return 34;
936 }
937 
938 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
939                                    uint8_t *outbuf)
940 {
941     static const int rds_caps_size[5] = {
942         [0] = 2048 + 4,
943         [1] = 4 + 4,
944         [3] = 188 + 4,
945         [4] = 2048 + 4,
946     };
947 
948     uint8_t media = r->req.cmd.buf[1];
949     uint8_t layer = r->req.cmd.buf[6];
950     uint8_t format = r->req.cmd.buf[7];
951     int size = -1;
952 
953     if (s->qdev.type != TYPE_ROM) {
954         return -1;
955     }
956     if (media != 0) {
957         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
958         return -1;
959     }
960 
961     if (format != 0xff) {
962         if (!blk_is_available(s->qdev.conf.blk)) {
963             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
964             return -1;
965         }
966         if (media_is_cd(s)) {
967             scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
968             return -1;
969         }
970         if (format >= ARRAY_SIZE(rds_caps_size)) {
971             return -1;
972         }
973         size = rds_caps_size[format];
974         memset(outbuf, 0, size);
975     }
976 
977     switch (format) {
978     case 0x00: {
979         /* Physical format information */
980         uint64_t nb_sectors;
981         if (layer != 0) {
982             goto fail;
983         }
984         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
985 
986         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
987         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
988         outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
989         outbuf[7] = 0;   /* default densities */
990 
991         stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
992         stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
993         break;
994     }
995 
996     case 0x01: /* DVD copyright information, all zeros */
997         break;
998 
999     case 0x03: /* BCA information - invalid field for no BCA info */
1000         return -1;
1001 
1002     case 0x04: /* DVD disc manufacturing information, all zeros */
1003         break;
1004 
1005     case 0xff: { /* List capabilities */
1006         int i;
1007         size = 4;
1008         for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
1009             if (!rds_caps_size[i]) {
1010                 continue;
1011             }
1012             outbuf[size] = i;
1013             outbuf[size + 1] = 0x40; /* Not writable, readable */
1014             stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
1015             size += 4;
1016         }
1017         break;
1018      }
1019 
1020     default:
1021         return -1;
1022     }
1023 
1024     /* Size of buffer, not including 2 byte size field */
1025     stw_be_p(outbuf, size - 2);
1026     return size;
1027 
1028 fail:
1029     return -1;
1030 }
1031 
1032 static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
1033 {
1034     uint8_t event_code, media_status;
1035 
1036     media_status = 0;
1037     if (s->tray_open) {
1038         media_status = MS_TRAY_OPEN;
1039     } else if (blk_is_inserted(s->qdev.conf.blk)) {
1040         media_status = MS_MEDIA_PRESENT;
1041     }
1042 
1043     /* Event notification descriptor */
1044     event_code = MEC_NO_CHANGE;
1045     if (media_status != MS_TRAY_OPEN) {
1046         if (s->media_event) {
1047             event_code = MEC_NEW_MEDIA;
1048             s->media_event = false;
1049         } else if (s->eject_request) {
1050             event_code = MEC_EJECT_REQUESTED;
1051             s->eject_request = false;
1052         }
1053     }
1054 
1055     outbuf[0] = event_code;
1056     outbuf[1] = media_status;
1057 
1058     /* These fields are reserved, just clear them. */
1059     outbuf[2] = 0;
1060     outbuf[3] = 0;
1061     return 4;
1062 }
1063 
1064 static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
1065                                               uint8_t *outbuf)
1066 {
1067     int size;
1068     uint8_t *buf = r->req.cmd.buf;
1069     uint8_t notification_class_request = buf[4];
1070     if (s->qdev.type != TYPE_ROM) {
1071         return -1;
1072     }
1073     if ((buf[1] & 1) == 0) {
1074         /* asynchronous */
1075         return -1;
1076     }
1077 
1078     size = 4;
1079     outbuf[0] = outbuf[1] = 0;
1080     outbuf[3] = 1 << GESN_MEDIA; /* supported events */
1081     if (notification_class_request & (1 << GESN_MEDIA)) {
1082         outbuf[2] = GESN_MEDIA;
1083         size += scsi_event_status_media(s, &outbuf[size]);
1084     } else {
1085         outbuf[2] = 0x80;
1086     }
1087     stw_be_p(outbuf, size - 4);
1088     return size;
1089 }
1090 
1091 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
1092 {
1093     int current;
1094 
1095     if (s->qdev.type != TYPE_ROM) {
1096         return -1;
1097     }
1098 
1099     if (media_is_dvd(s)) {
1100         current = MMC_PROFILE_DVD_ROM;
1101     } else if (media_is_cd(s)) {
1102         current = MMC_PROFILE_CD_ROM;
1103     } else {
1104         current = MMC_PROFILE_NONE;
1105     }
1106 
1107     memset(outbuf, 0, 40);
1108     stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
1109     stw_be_p(&outbuf[6], current);
1110     /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
1111     outbuf[10] = 0x03; /* persistent, current */
1112     outbuf[11] = 8; /* two profiles */
1113     stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
1114     outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
1115     stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
1116     outbuf[18] = (current == MMC_PROFILE_CD_ROM);
1117     /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
1118     stw_be_p(&outbuf[20], 1);
1119     outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
1120     outbuf[23] = 8;
1121     stl_be_p(&outbuf[24], 1); /* SCSI */
1122     outbuf[28] = 1; /* DBE = 1, mandatory */
1123     /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
1124     stw_be_p(&outbuf[32], 3);
1125     outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
1126     outbuf[35] = 4;
1127     outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
1128     /* TODO: Random readable, CD read, DVD read, drive serial number,
1129        power management */
1130     return 40;
1131 }
1132 
1133 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
1134 {
1135     if (s->qdev.type != TYPE_ROM) {
1136         return -1;
1137     }
1138     memset(outbuf, 0, 8);
1139     outbuf[5] = 1; /* CD-ROM */
1140     return 8;
1141 }
1142 
1143 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
1144                            int page_control)
1145 {
1146     static const int mode_sense_valid[0x3f] = {
1147         [MODE_PAGE_VENDOR_SPECIFIC]        = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1148         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
1149         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
1150         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1151         [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1152         [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
1153         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
1154         [MODE_PAGE_APPLE_VENDOR]           = (1 << TYPE_ROM),
1155     };
1156 
1157     uint8_t *p = *p_outbuf + 2;
1158     int length;
1159 
1160     assert(page < ARRAY_SIZE(mode_sense_valid));
1161     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
1162         return -1;
1163     }
1164 
1165     /*
1166      * If Changeable Values are requested, a mask denoting those mode parameters
1167      * that are changeable shall be returned. As we currently don't support
1168      * parameter changes via MODE_SELECT all bits are returned set to zero.
1169      * The buffer was already menset to zero by the caller of this function.
1170      *
1171      * The offsets here are off by two compared to the descriptions in the
1172      * SCSI specs, because those include a 2-byte header.  This is unfortunate,
1173      * but it is done so that offsets are consistent within our implementation
1174      * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
1175      * 2-byte and 4-byte headers.
1176      */
1177     switch (page) {
1178     case MODE_PAGE_HD_GEOMETRY:
1179         length = 0x16;
1180         if (page_control == 1) { /* Changeable Values */
1181             break;
1182         }
1183         /* if a geometry hint is available, use it */
1184         p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
1185         p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
1186         p[2] = s->qdev.conf.cyls & 0xff;
1187         p[3] = s->qdev.conf.heads & 0xff;
1188         /* Write precomp start cylinder, disabled */
1189         p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
1190         p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
1191         p[6] = s->qdev.conf.cyls & 0xff;
1192         /* Reduced current start cylinder, disabled */
1193         p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
1194         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1195         p[9] = s->qdev.conf.cyls & 0xff;
1196         /* Device step rate [ns], 200ns */
1197         p[10] = 0;
1198         p[11] = 200;
1199         /* Landing zone cylinder */
1200         p[12] = 0xff;
1201         p[13] =  0xff;
1202         p[14] = 0xff;
1203         /* Medium rotation rate [rpm], 5400 rpm */
1204         p[18] = (5400 >> 8) & 0xff;
1205         p[19] = 5400 & 0xff;
1206         break;
1207 
1208     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
1209         length = 0x1e;
1210         if (page_control == 1) { /* Changeable Values */
1211             break;
1212         }
1213         /* Transfer rate [kbit/s], 5Mbit/s */
1214         p[0] = 5000 >> 8;
1215         p[1] = 5000 & 0xff;
1216         /* if a geometry hint is available, use it */
1217         p[2] = s->qdev.conf.heads & 0xff;
1218         p[3] = s->qdev.conf.secs & 0xff;
1219         p[4] = s->qdev.blocksize >> 8;
1220         p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
1221         p[7] = s->qdev.conf.cyls & 0xff;
1222         /* Write precomp start cylinder, disabled */
1223         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1224         p[9] = s->qdev.conf.cyls & 0xff;
1225         /* Reduced current start cylinder, disabled */
1226         p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
1227         p[11] = s->qdev.conf.cyls & 0xff;
1228         /* Device step rate [100us], 100us */
1229         p[12] = 0;
1230         p[13] = 1;
1231         /* Device step pulse width [us], 1us */
1232         p[14] = 1;
1233         /* Device head settle delay [100us], 100us */
1234         p[15] = 0;
1235         p[16] = 1;
1236         /* Motor on delay [0.1s], 0.1s */
1237         p[17] = 1;
1238         /* Motor off delay [0.1s], 0.1s */
1239         p[18] = 1;
1240         /* Medium rotation rate [rpm], 5400 rpm */
1241         p[26] = (5400 >> 8) & 0xff;
1242         p[27] = 5400 & 0xff;
1243         break;
1244 
1245     case MODE_PAGE_CACHING:
1246         length = 0x12;
1247         if (page_control == 1 || /* Changeable Values */
1248             blk_enable_write_cache(s->qdev.conf.blk)) {
1249             p[0] = 4; /* WCE */
1250         }
1251         break;
1252 
1253     case MODE_PAGE_R_W_ERROR:
1254         length = 10;
1255         if (page_control == 1) { /* Changeable Values */
1256             if (s->qdev.type == TYPE_ROM) {
1257                 /* Automatic Write Reallocation Enabled */
1258                 p[0] = 0x80;
1259             }
1260             break;
1261         }
1262         p[0] = 0x80; /* Automatic Write Reallocation Enabled */
1263         if (s->qdev.type == TYPE_ROM) {
1264             p[1] = 0x20; /* Read Retry Count */
1265         }
1266         break;
1267 
1268     case MODE_PAGE_AUDIO_CTL:
1269         length = 14;
1270         break;
1271 
1272     case MODE_PAGE_CAPABILITIES:
1273         length = 0x14;
1274         if (page_control == 1) { /* Changeable Values */
1275             break;
1276         }
1277 
1278         p[0] = 0x3b; /* CD-R & CD-RW read */
1279         p[1] = 0; /* Writing not supported */
1280         p[2] = 0x7f; /* Audio, composite, digital out,
1281                         mode 2 form 1&2, multi session */
1282         p[3] = 0xff; /* CD DA, DA accurate, RW supported,
1283                         RW corrected, C2 errors, ISRC,
1284                         UPC, Bar code */
1285         p[4] = 0x2d | (s->tray_locked ? 2 : 0);
1286         /* Locking supported, jumper present, eject, tray */
1287         p[5] = 0; /* no volume & mute control, no
1288                      changer */
1289         p[6] = (50 * 176) >> 8; /* 50x read speed */
1290         p[7] = (50 * 176) & 0xff;
1291         p[8] = 2 >> 8; /* Two volume levels */
1292         p[9] = 2 & 0xff;
1293         p[10] = 2048 >> 8; /* 2M buffer */
1294         p[11] = 2048 & 0xff;
1295         p[12] = (16 * 176) >> 8; /* 16x read speed current */
1296         p[13] = (16 * 176) & 0xff;
1297         p[16] = (16 * 176) >> 8; /* 16x write speed */
1298         p[17] = (16 * 176) & 0xff;
1299         p[18] = (16 * 176) >> 8; /* 16x write speed current */
1300         p[19] = (16 * 176) & 0xff;
1301         break;
1302 
1303      case MODE_PAGE_APPLE_VENDOR:
1304         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR)) {
1305             length = 0x1e;
1306             if (page_control == 1) { /* Changeable Values */
1307                 break;
1308             }
1309 
1310             memset(p, 0, length);
1311             strcpy((char *)p + 8, "APPLE COMPUTER, INC   ");
1312             break;
1313         } else {
1314             return -1;
1315         }
1316 
1317     case MODE_PAGE_VENDOR_SPECIFIC:
1318         if (s->qdev.type == TYPE_DISK && (s->quirks &
1319             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1320             length = 0x2;
1321             if (page_control == 1) { /* Changeable Values */
1322                 p[0] = 0xff;
1323                 p[1] = 0xff;
1324                 break;
1325             }
1326             p[0] = 0;
1327             p[1] = 0;
1328             break;
1329         } else {
1330             return -1;
1331         }
1332 
1333     default:
1334         return -1;
1335     }
1336 
1337     assert(length < 256);
1338     (*p_outbuf)[0] = page;
1339     (*p_outbuf)[1] = length;
1340     *p_outbuf += length + 2;
1341     return length + 2;
1342 }
1343 
1344 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
1345 {
1346     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1347     uint64_t nb_sectors;
1348     bool dbd;
1349     int page, buflen, ret, page_control;
1350     uint8_t *p;
1351     uint8_t dev_specific_param;
1352 
1353     dbd = (r->req.cmd.buf[1] & 0x8) != 0;
1354     page = r->req.cmd.buf[2] & 0x3f;
1355     page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
1356 
1357     trace_scsi_disk_emulate_mode_sense((r->req.cmd.buf[0] == MODE_SENSE) ? 6 :
1358                                        10, page, r->req.cmd.xfer, page_control);
1359     memset(outbuf, 0, r->req.cmd.xfer);
1360     p = outbuf;
1361 
1362     if (s->qdev.type == TYPE_DISK) {
1363         dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0;
1364         if (!blk_is_writable(s->qdev.conf.blk)) {
1365             dev_specific_param |= 0x80; /* Readonly.  */
1366         }
1367     } else {
1368         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD)) {
1369             /* Use DBD from the request... */
1370             dev_specific_param = 0x00;
1371 
1372             /*
1373              * ... unless we receive a request for MODE_PAGE_APPLE_VENDOR
1374              * which should never return a block descriptor even though DBD is
1375              * not set, otherwise CDROM detection fails in MacOS
1376              */
1377             if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR) &&
1378                 page == MODE_PAGE_APPLE_VENDOR) {
1379                 dbd = true;
1380             }
1381         } else {
1382             /*
1383              * MMC prescribes that CD/DVD drives have no block descriptors,
1384              * and defines no device-specific parameter.
1385              */
1386             dev_specific_param = 0x00;
1387             dbd = true;
1388         }
1389     }
1390 
1391     if (r->req.cmd.buf[0] == MODE_SENSE) {
1392         p[1] = 0; /* Default media type.  */
1393         p[2] = dev_specific_param;
1394         p[3] = 0; /* Block descriptor length.  */
1395         p += 4;
1396     } else { /* MODE_SENSE_10 */
1397         p[2] = 0; /* Default media type.  */
1398         p[3] = dev_specific_param;
1399         p[6] = p[7] = 0; /* Block descriptor length.  */
1400         p += 8;
1401     }
1402 
1403     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1404     if (!dbd && nb_sectors) {
1405         if (r->req.cmd.buf[0] == MODE_SENSE) {
1406             outbuf[3] = 8; /* Block descriptor length  */
1407         } else { /* MODE_SENSE_10 */
1408             outbuf[7] = 8; /* Block descriptor length  */
1409         }
1410         nb_sectors /= (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1411         if (nb_sectors > 0xffffff) {
1412             nb_sectors = 0;
1413         }
1414         p[0] = 0; /* media density code */
1415         p[1] = (nb_sectors >> 16) & 0xff;
1416         p[2] = (nb_sectors >> 8) & 0xff;
1417         p[3] = nb_sectors & 0xff;
1418         p[4] = 0; /* reserved */
1419         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
1420         p[6] = s->qdev.blocksize >> 8;
1421         p[7] = 0;
1422         p += 8;
1423     }
1424 
1425     if (page_control == 3) {
1426         /* Saved Values */
1427         scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
1428         return -1;
1429     }
1430 
1431     if (page == 0x3f) {
1432         for (page = 0; page <= 0x3e; page++) {
1433             mode_sense_page(s, page, &p, page_control);
1434         }
1435     } else {
1436         ret = mode_sense_page(s, page, &p, page_control);
1437         if (ret == -1) {
1438             return -1;
1439         }
1440     }
1441 
1442     buflen = p - outbuf;
1443     /*
1444      * The mode data length field specifies the length in bytes of the
1445      * following data that is available to be transferred. The mode data
1446      * length does not include itself.
1447      */
1448     if (r->req.cmd.buf[0] == MODE_SENSE) {
1449         outbuf[0] = buflen - 1;
1450     } else { /* MODE_SENSE_10 */
1451         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
1452         outbuf[1] = (buflen - 2) & 0xff;
1453     }
1454     return buflen;
1455 }
1456 
1457 static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
1458 {
1459     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1460     int start_track, format, msf, toclen;
1461     uint64_t nb_sectors;
1462 
1463     msf = req->cmd.buf[1] & 2;
1464     format = req->cmd.buf[2] & 0xf;
1465     start_track = req->cmd.buf[6];
1466     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1467     trace_scsi_disk_emulate_read_toc(start_track, format, msf >> 1);
1468     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
1469     switch (format) {
1470     case 0:
1471         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
1472         break;
1473     case 1:
1474         /* multi session : only a single session defined */
1475         toclen = 12;
1476         memset(outbuf, 0, 12);
1477         outbuf[1] = 0x0a;
1478         outbuf[2] = 0x01;
1479         outbuf[3] = 0x01;
1480         break;
1481     case 2:
1482         toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
1483         break;
1484     default:
1485         return -1;
1486     }
1487     return toclen;
1488 }
1489 
1490 static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
1491 {
1492     SCSIRequest *req = &r->req;
1493     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1494     bool start = req->cmd.buf[4] & 1;
1495     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
1496     int pwrcnd = req->cmd.buf[4] & 0xf0;
1497 
1498     if (pwrcnd) {
1499         /* eject/load only happens for power condition == 0 */
1500         return 0;
1501     }
1502 
1503     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
1504         if (!start && !s->tray_open && s->tray_locked) {
1505             scsi_check_condition(r,
1506                                  blk_is_inserted(s->qdev.conf.blk)
1507                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
1508                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
1509             return -1;
1510         }
1511 
1512         if (s->tray_open != !start) {
1513             blk_eject(s->qdev.conf.blk, !start);
1514             s->tray_open = !start;
1515         }
1516     }
1517     return 0;
1518 }
1519 
1520 static void scsi_disk_emulate_read_data(SCSIRequest *req)
1521 {
1522     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1523     int buflen = r->iov.iov_len;
1524 
1525     if (buflen) {
1526         trace_scsi_disk_emulate_read_data(buflen);
1527         r->iov.iov_len = 0;
1528         r->started = true;
1529         scsi_req_data(&r->req, buflen);
1530         return;
1531     }
1532 
1533     /* This also clears the sense buffer for REQUEST SENSE.  */
1534     scsi_req_complete(&r->req, GOOD);
1535 }
1536 
1537 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
1538                                        uint8_t *inbuf, int inlen)
1539 {
1540     uint8_t mode_current[SCSI_MAX_MODE_LEN];
1541     uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
1542     uint8_t *p;
1543     int len, expected_len, changeable_len, i;
1544 
1545     /* The input buffer does not include the page header, so it is
1546      * off by 2 bytes.
1547      */
1548     expected_len = inlen + 2;
1549     if (expected_len > SCSI_MAX_MODE_LEN) {
1550         return -1;
1551     }
1552 
1553     /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
1554     if (page == MODE_PAGE_ALLS) {
1555         return -1;
1556     }
1557 
1558     p = mode_current;
1559     memset(mode_current, 0, inlen + 2);
1560     len = mode_sense_page(s, page, &p, 0);
1561     if (len < 0 || len != expected_len) {
1562         return -1;
1563     }
1564 
1565     p = mode_changeable;
1566     memset(mode_changeable, 0, inlen + 2);
1567     changeable_len = mode_sense_page(s, page, &p, 1);
1568     assert(changeable_len == len);
1569 
1570     /* Check that unchangeable bits are the same as what MODE SENSE
1571      * would return.
1572      */
1573     for (i = 2; i < len; i++) {
1574         if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
1575             return -1;
1576         }
1577     }
1578     return 0;
1579 }
1580 
1581 static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
1582 {
1583     switch (page) {
1584     case MODE_PAGE_CACHING:
1585         blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0);
1586         break;
1587 
1588     default:
1589         break;
1590     }
1591 }
1592 
1593 static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
1594 {
1595     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1596 
1597     while (len > 0) {
1598         int page, subpage, page_len;
1599 
1600         /* Parse both possible formats for the mode page headers.  */
1601         page = p[0] & 0x3f;
1602         if (p[0] & 0x40) {
1603             if (len < 4) {
1604                 goto invalid_param_len;
1605             }
1606             subpage = p[1];
1607             page_len = lduw_be_p(&p[2]);
1608             p += 4;
1609             len -= 4;
1610         } else {
1611             if (len < 2) {
1612                 goto invalid_param_len;
1613             }
1614             subpage = 0;
1615             page_len = p[1];
1616             p += 2;
1617             len -= 2;
1618         }
1619 
1620         if (subpage) {
1621             goto invalid_param;
1622         }
1623         if (page_len > len) {
1624             if (!(s->quirks & SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED)) {
1625                 goto invalid_param_len;
1626             }
1627             trace_scsi_disk_mode_select_page_truncated(page, page_len, len);
1628         }
1629 
1630         if (!change) {
1631             if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
1632                 goto invalid_param;
1633             }
1634         } else {
1635             scsi_disk_apply_mode_select(s, page, p);
1636         }
1637 
1638         p += page_len;
1639         len -= page_len;
1640     }
1641     return 0;
1642 
1643 invalid_param:
1644     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1645     return -1;
1646 
1647 invalid_param_len:
1648     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1649     return -1;
1650 }
1651 
1652 static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
1653 {
1654     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1655     uint8_t *p = inbuf;
1656     int cmd = r->req.cmd.buf[0];
1657     int len = r->req.cmd.xfer;
1658     int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
1659     int bd_len, bs;
1660     int pass;
1661 
1662     if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
1663         if (!(s->quirks &
1664             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1665             /* We only support PF=1, SP=0.  */
1666             goto invalid_field;
1667         }
1668     }
1669 
1670     if (len < hdr_len) {
1671         goto invalid_param_len;
1672     }
1673 
1674     bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
1675     len -= hdr_len;
1676     p += hdr_len;
1677     if (len < bd_len) {
1678         goto invalid_param_len;
1679     }
1680     if (bd_len != 0 && bd_len != 8) {
1681         goto invalid_param;
1682     }
1683 
1684     /* Allow changing the block size */
1685     if (bd_len) {
1686         bs = p[5] << 16 | p[6] << 8 | p[7];
1687 
1688         /*
1689          * Since the existing code only checks/updates bits 8-15 of the block
1690          * size, restrict ourselves to the same requirement for now to ensure
1691          * that a block size set by a block descriptor and then read back by
1692          * a subsequent SCSI command will be the same. Also disallow a block
1693          * size of 256 since we cannot handle anything below BDRV_SECTOR_SIZE.
1694          */
1695         if (bs && !(bs & ~0xfe00) && bs != s->qdev.blocksize) {
1696             s->qdev.blocksize = bs;
1697             trace_scsi_disk_mode_select_set_blocksize(s->qdev.blocksize);
1698         }
1699     }
1700 
1701     len -= bd_len;
1702     p += bd_len;
1703 
1704     /* Ensure no change is made if there is an error!  */
1705     for (pass = 0; pass < 2; pass++) {
1706         if (mode_select_pages(r, p, len, pass == 1) < 0) {
1707             assert(pass == 0);
1708             return;
1709         }
1710     }
1711     if (!blk_enable_write_cache(s->qdev.conf.blk)) {
1712         /* The request is used as the AIO opaque value, so add a ref.  */
1713         scsi_req_ref(&r->req);
1714         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
1715                          BLOCK_ACCT_FLUSH);
1716         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
1717         return;
1718     }
1719 
1720     scsi_req_complete(&r->req, GOOD);
1721     return;
1722 
1723 invalid_param:
1724     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1725     return;
1726 
1727 invalid_param_len:
1728     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1729     return;
1730 
1731 invalid_field:
1732     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1733 }
1734 
1735 /* sector_num and nb_sectors expected to be in qdev blocksize */
1736 static inline bool check_lba_range(SCSIDiskState *s,
1737                                    uint64_t sector_num, uint32_t nb_sectors)
1738 {
1739     /*
1740      * The first line tests that no overflow happens when computing the last
1741      * sector.  The second line tests that the last accessed sector is in
1742      * range.
1743      *
1744      * Careful, the computations should not underflow for nb_sectors == 0,
1745      * and a 0-block read to the first LBA beyond the end of device is
1746      * valid.
1747      */
1748     return (sector_num <= sector_num + nb_sectors &&
1749             sector_num + nb_sectors <= s->qdev.max_lba + 1);
1750 }
1751 
1752 typedef struct UnmapCBData {
1753     SCSIDiskReq *r;
1754     uint8_t *inbuf;
1755     int count;
1756 } UnmapCBData;
1757 
1758 static void scsi_unmap_complete(void *opaque, int ret);
1759 
1760 static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
1761 {
1762     SCSIDiskReq *r = data->r;
1763     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1764 
1765     assert(r->req.aiocb == NULL);
1766 
1767     if (data->count > 0) {
1768         uint64_t sector_num = ldq_be_p(&data->inbuf[0]);
1769         uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
1770         r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1771         r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1772 
1773         if (!check_lba_range(s, sector_num, nb_sectors)) {
1774             block_acct_invalid(blk_get_stats(s->qdev.conf.blk),
1775                                BLOCK_ACCT_UNMAP);
1776             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1777             goto done;
1778         }
1779 
1780         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1781                          r->sector_count * BDRV_SECTOR_SIZE,
1782                          BLOCK_ACCT_UNMAP);
1783 
1784         r->req.aiocb = blk_aio_pdiscard(s->qdev.conf.blk,
1785                                         r->sector * BDRV_SECTOR_SIZE,
1786                                         r->sector_count * BDRV_SECTOR_SIZE,
1787                                         scsi_unmap_complete, data);
1788         data->count--;
1789         data->inbuf += 16;
1790         return;
1791     }
1792 
1793     scsi_req_complete(&r->req, GOOD);
1794 
1795 done:
1796     scsi_req_unref(&r->req);
1797     g_free(data);
1798 }
1799 
1800 static void scsi_unmap_complete(void *opaque, int ret)
1801 {
1802     UnmapCBData *data = opaque;
1803     SCSIDiskReq *r = data->r;
1804     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1805 
1806     assert(r->req.aiocb != NULL);
1807     r->req.aiocb = NULL;
1808 
1809     if (scsi_disk_req_check_error(r, ret, true)) {
1810         scsi_req_unref(&r->req);
1811         g_free(data);
1812     } else {
1813         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1814         scsi_unmap_complete_noio(data, ret);
1815     }
1816 }
1817 
1818 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
1819 {
1820     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1821     uint8_t *p = inbuf;
1822     int len = r->req.cmd.xfer;
1823     UnmapCBData *data;
1824 
1825     /* Reject ANCHOR=1.  */
1826     if (r->req.cmd.buf[1] & 0x1) {
1827         goto invalid_field;
1828     }
1829 
1830     if (len < 8) {
1831         goto invalid_param_len;
1832     }
1833     if (len < lduw_be_p(&p[0]) + 2) {
1834         goto invalid_param_len;
1835     }
1836     if (len < lduw_be_p(&p[2]) + 8) {
1837         goto invalid_param_len;
1838     }
1839     if (lduw_be_p(&p[2]) & 15) {
1840         goto invalid_param_len;
1841     }
1842 
1843     if (!blk_is_writable(s->qdev.conf.blk)) {
1844         block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1845         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1846         return;
1847     }
1848 
1849     data = g_new0(UnmapCBData, 1);
1850     data->r = r;
1851     data->inbuf = &p[8];
1852     data->count = lduw_be_p(&p[2]) >> 4;
1853 
1854     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
1855     scsi_req_ref(&r->req);
1856     scsi_unmap_complete_noio(data, 0);
1857     return;
1858 
1859 invalid_param_len:
1860     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1861     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1862     return;
1863 
1864 invalid_field:
1865     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1866     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1867 }
1868 
1869 typedef struct WriteSameCBData {
1870     SCSIDiskReq *r;
1871     int64_t sector;
1872     int nb_sectors;
1873     QEMUIOVector qiov;
1874     struct iovec iov;
1875 } WriteSameCBData;
1876 
1877 static void scsi_write_same_complete(void *opaque, int ret)
1878 {
1879     WriteSameCBData *data = opaque;
1880     SCSIDiskReq *r = data->r;
1881     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1882 
1883     assert(r->req.aiocb != NULL);
1884     r->req.aiocb = NULL;
1885 
1886     if (scsi_disk_req_check_error(r, ret, true)) {
1887         goto done;
1888     }
1889 
1890     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1891 
1892     data->nb_sectors -= data->iov.iov_len / BDRV_SECTOR_SIZE;
1893     data->sector += data->iov.iov_len / BDRV_SECTOR_SIZE;
1894     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1895                             data->iov.iov_len);
1896     if (data->iov.iov_len) {
1897         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1898                          data->iov.iov_len, BLOCK_ACCT_WRITE);
1899         /* Reinitialize qiov, to handle unaligned WRITE SAME request
1900          * where final qiov may need smaller size */
1901         qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1902         r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1903                                        data->sector << BDRV_SECTOR_BITS,
1904                                        &data->qiov, 0,
1905                                        scsi_write_same_complete, data);
1906         return;
1907     }
1908 
1909     scsi_req_complete(&r->req, GOOD);
1910 
1911 done:
1912     scsi_req_unref(&r->req);
1913     qemu_vfree(data->iov.iov_base);
1914     g_free(data);
1915 }
1916 
1917 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
1918 {
1919     SCSIRequest *req = &r->req;
1920     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1921     uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf);
1922     WriteSameCBData *data;
1923     uint8_t *buf;
1924     int i, l;
1925 
1926     /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
1927     if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) {
1928         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1929         return;
1930     }
1931 
1932     if (!blk_is_writable(s->qdev.conf.blk)) {
1933         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1934         return;
1935     }
1936     if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
1937         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1938         return;
1939     }
1940 
1941     if ((req->cmd.buf[1] & 0x1) || buffer_is_zero(inbuf, s->qdev.blocksize)) {
1942         int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0;
1943 
1944         /* The request is used as the AIO opaque value, so add a ref.  */
1945         scsi_req_ref(&r->req);
1946         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1947                          nb_sectors * s->qdev.blocksize,
1948                         BLOCK_ACCT_WRITE);
1949         r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk,
1950                                 r->req.cmd.lba * s->qdev.blocksize,
1951                                 nb_sectors * s->qdev.blocksize,
1952                                 flags, scsi_aio_complete, r);
1953         return;
1954     }
1955 
1956     data = g_new0(WriteSameCBData, 1);
1957     data->r = r;
1958     data->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1959     data->nb_sectors = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1960     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1961                             SCSI_WRITE_SAME_MAX);
1962     data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk,
1963                                               data->iov.iov_len);
1964     qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1965 
1966     for (i = 0; i < data->iov.iov_len; i += l) {
1967         l = MIN(s->qdev.blocksize, data->iov.iov_len - i);
1968         memcpy(&buf[i], inbuf, l);
1969     }
1970 
1971     scsi_req_ref(&r->req);
1972     block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1973                      data->iov.iov_len, BLOCK_ACCT_WRITE);
1974     r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1975                                    data->sector << BDRV_SECTOR_BITS,
1976                                    &data->qiov, 0,
1977                                    scsi_write_same_complete, data);
1978 }
1979 
1980 static void scsi_disk_emulate_write_data(SCSIRequest *req)
1981 {
1982     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1983 
1984     if (r->iov.iov_len) {
1985         int buflen = r->iov.iov_len;
1986         trace_scsi_disk_emulate_write_data(buflen);
1987         r->iov.iov_len = 0;
1988         scsi_req_data(&r->req, buflen);
1989         return;
1990     }
1991 
1992     switch (req->cmd.buf[0]) {
1993     case MODE_SELECT:
1994     case MODE_SELECT_10:
1995         /* This also clears the sense buffer for REQUEST SENSE.  */
1996         scsi_disk_emulate_mode_select(r, r->iov.iov_base);
1997         break;
1998 
1999     case UNMAP:
2000         scsi_disk_emulate_unmap(r, r->iov.iov_base);
2001         break;
2002 
2003     case VERIFY_10:
2004     case VERIFY_12:
2005     case VERIFY_16:
2006         if (r->req.status == -1) {
2007             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2008         }
2009         break;
2010 
2011     case WRITE_SAME_10:
2012     case WRITE_SAME_16:
2013         scsi_disk_emulate_write_same(r, r->iov.iov_base);
2014         break;
2015 
2016     case FORMAT_UNIT:
2017         scsi_req_complete(&r->req, GOOD);
2018         break;
2019 
2020     default:
2021         abort();
2022     }
2023 }
2024 
2025 static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
2026 {
2027     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2028     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2029     uint64_t nb_sectors;
2030     uint8_t *outbuf;
2031     int buflen;
2032 
2033     switch (req->cmd.buf[0]) {
2034     case INQUIRY:
2035     case MODE_SENSE:
2036     case MODE_SENSE_10:
2037     case RESERVE:
2038     case RESERVE_10:
2039     case RELEASE:
2040     case RELEASE_10:
2041     case START_STOP:
2042     case ALLOW_MEDIUM_REMOVAL:
2043     case GET_CONFIGURATION:
2044     case GET_EVENT_STATUS_NOTIFICATION:
2045     case MECHANISM_STATUS:
2046     case REQUEST_SENSE:
2047         break;
2048 
2049     default:
2050         if (!blk_is_available(s->qdev.conf.blk)) {
2051             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2052             return 0;
2053         }
2054         break;
2055     }
2056 
2057     /*
2058      * FIXME: we shouldn't return anything bigger than 4k, but the code
2059      * requires the buffer to be as big as req->cmd.xfer in several
2060      * places.  So, do not allow CDBs with a very large ALLOCATION
2061      * LENGTH.  The real fix would be to modify scsi_read_data and
2062      * dma_buf_read, so that they return data beyond the buflen
2063      * as all zeros.
2064      */
2065     if (req->cmd.xfer > 65536) {
2066         goto illegal_request;
2067     }
2068     r->buflen = MAX(4096, req->cmd.xfer);
2069 
2070     if (!r->iov.iov_base) {
2071         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
2072     }
2073 
2074     outbuf = r->iov.iov_base;
2075     memset(outbuf, 0, r->buflen);
2076     switch (req->cmd.buf[0]) {
2077     case TEST_UNIT_READY:
2078         assert(blk_is_available(s->qdev.conf.blk));
2079         break;
2080     case INQUIRY:
2081         buflen = scsi_disk_emulate_inquiry(req, outbuf);
2082         if (buflen < 0) {
2083             goto illegal_request;
2084         }
2085         break;
2086     case MODE_SENSE:
2087     case MODE_SENSE_10:
2088         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
2089         if (buflen < 0) {
2090             goto illegal_request;
2091         }
2092         break;
2093     case READ_TOC:
2094         buflen = scsi_disk_emulate_read_toc(req, outbuf);
2095         if (buflen < 0) {
2096             goto illegal_request;
2097         }
2098         break;
2099     case RESERVE:
2100         if (req->cmd.buf[1] & 1) {
2101             goto illegal_request;
2102         }
2103         break;
2104     case RESERVE_10:
2105         if (req->cmd.buf[1] & 3) {
2106             goto illegal_request;
2107         }
2108         break;
2109     case RELEASE:
2110         if (req->cmd.buf[1] & 1) {
2111             goto illegal_request;
2112         }
2113         break;
2114     case RELEASE_10:
2115         if (req->cmd.buf[1] & 3) {
2116             goto illegal_request;
2117         }
2118         break;
2119     case START_STOP:
2120         if (scsi_disk_emulate_start_stop(r) < 0) {
2121             return 0;
2122         }
2123         break;
2124     case ALLOW_MEDIUM_REMOVAL:
2125         s->tray_locked = req->cmd.buf[4] & 1;
2126         blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1);
2127         break;
2128     case READ_CAPACITY_10:
2129         /* The normal LEN field for this command is zero.  */
2130         memset(outbuf, 0, 8);
2131         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2132         if (!nb_sectors) {
2133             scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2134             return 0;
2135         }
2136         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
2137             goto illegal_request;
2138         }
2139         nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2140         /* Returned value is the address of the last sector.  */
2141         nb_sectors--;
2142         /* Remember the new size for read/write sanity checking. */
2143         s->qdev.max_lba = nb_sectors;
2144         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
2145         if (nb_sectors > UINT32_MAX) {
2146             nb_sectors = UINT32_MAX;
2147         }
2148         outbuf[0] = (nb_sectors >> 24) & 0xff;
2149         outbuf[1] = (nb_sectors >> 16) & 0xff;
2150         outbuf[2] = (nb_sectors >> 8) & 0xff;
2151         outbuf[3] = nb_sectors & 0xff;
2152         outbuf[4] = 0;
2153         outbuf[5] = 0;
2154         outbuf[6] = s->qdev.blocksize >> 8;
2155         outbuf[7] = 0;
2156         break;
2157     case REQUEST_SENSE:
2158         /* Just return "NO SENSE".  */
2159         buflen = scsi_convert_sense(NULL, 0, outbuf, r->buflen,
2160                                     (req->cmd.buf[1] & 1) == 0);
2161         if (buflen < 0) {
2162             goto illegal_request;
2163         }
2164         break;
2165     case MECHANISM_STATUS:
2166         buflen = scsi_emulate_mechanism_status(s, outbuf);
2167         if (buflen < 0) {
2168             goto illegal_request;
2169         }
2170         break;
2171     case GET_CONFIGURATION:
2172         buflen = scsi_get_configuration(s, outbuf);
2173         if (buflen < 0) {
2174             goto illegal_request;
2175         }
2176         break;
2177     case GET_EVENT_STATUS_NOTIFICATION:
2178         buflen = scsi_get_event_status_notification(s, r, outbuf);
2179         if (buflen < 0) {
2180             goto illegal_request;
2181         }
2182         break;
2183     case READ_DISC_INFORMATION:
2184         buflen = scsi_read_disc_information(s, r, outbuf);
2185         if (buflen < 0) {
2186             goto illegal_request;
2187         }
2188         break;
2189     case READ_DVD_STRUCTURE:
2190         buflen = scsi_read_dvd_structure(s, r, outbuf);
2191         if (buflen < 0) {
2192             goto illegal_request;
2193         }
2194         break;
2195     case SERVICE_ACTION_IN_16:
2196         /* Service Action In subcommands. */
2197         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
2198             trace_scsi_disk_emulate_command_SAI_16();
2199             memset(outbuf, 0, req->cmd.xfer);
2200             blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2201             if (!nb_sectors) {
2202                 scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2203                 return 0;
2204             }
2205             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
2206                 goto illegal_request;
2207             }
2208             nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2209             /* Returned value is the address of the last sector.  */
2210             nb_sectors--;
2211             /* Remember the new size for read/write sanity checking. */
2212             s->qdev.max_lba = nb_sectors;
2213             outbuf[0] = (nb_sectors >> 56) & 0xff;
2214             outbuf[1] = (nb_sectors >> 48) & 0xff;
2215             outbuf[2] = (nb_sectors >> 40) & 0xff;
2216             outbuf[3] = (nb_sectors >> 32) & 0xff;
2217             outbuf[4] = (nb_sectors >> 24) & 0xff;
2218             outbuf[5] = (nb_sectors >> 16) & 0xff;
2219             outbuf[6] = (nb_sectors >> 8) & 0xff;
2220             outbuf[7] = nb_sectors & 0xff;
2221             outbuf[8] = 0;
2222             outbuf[9] = 0;
2223             outbuf[10] = s->qdev.blocksize >> 8;
2224             outbuf[11] = 0;
2225             outbuf[12] = 0;
2226             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
2227 
2228             /* set TPE bit if the format supports discard */
2229             if (s->qdev.conf.discard_granularity) {
2230                 outbuf[14] = 0x80;
2231             }
2232 
2233             /* Protection, exponent and lowest lba field left blank. */
2234             break;
2235         }
2236         trace_scsi_disk_emulate_command_SAI_unsupported();
2237         goto illegal_request;
2238     case SYNCHRONIZE_CACHE:
2239         /* The request is used as the AIO opaque value, so add a ref.  */
2240         scsi_req_ref(&r->req);
2241         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
2242                          BLOCK_ACCT_FLUSH);
2243         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
2244         return 0;
2245     case SEEK_10:
2246         trace_scsi_disk_emulate_command_SEEK_10(r->req.cmd.lba);
2247         if (r->req.cmd.lba > s->qdev.max_lba) {
2248             goto illegal_lba;
2249         }
2250         break;
2251     case MODE_SELECT:
2252         trace_scsi_disk_emulate_command_MODE_SELECT(r->req.cmd.xfer);
2253         break;
2254     case MODE_SELECT_10:
2255         trace_scsi_disk_emulate_command_MODE_SELECT_10(r->req.cmd.xfer);
2256         break;
2257     case UNMAP:
2258         trace_scsi_disk_emulate_command_UNMAP(r->req.cmd.xfer);
2259         break;
2260     case VERIFY_10:
2261     case VERIFY_12:
2262     case VERIFY_16:
2263         trace_scsi_disk_emulate_command_VERIFY((req->cmd.buf[1] >> 1) & 3);
2264         if (req->cmd.buf[1] & 6) {
2265             goto illegal_request;
2266         }
2267         break;
2268     case WRITE_SAME_10:
2269     case WRITE_SAME_16:
2270         trace_scsi_disk_emulate_command_WRITE_SAME(
2271                 req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16, r->req.cmd.xfer);
2272         break;
2273     case FORMAT_UNIT:
2274         trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
2275         break;
2276     default:
2277         trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
2278                                                 scsi_command_name(buf[0]));
2279         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
2280         return 0;
2281     }
2282     assert(!r->req.aiocb);
2283     r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
2284     if (r->iov.iov_len == 0) {
2285         scsi_req_complete(&r->req, GOOD);
2286     }
2287     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2288         assert(r->iov.iov_len == req->cmd.xfer);
2289         return -r->iov.iov_len;
2290     } else {
2291         return r->iov.iov_len;
2292     }
2293 
2294 illegal_request:
2295     if (r->req.status == -1) {
2296         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2297     }
2298     return 0;
2299 
2300 illegal_lba:
2301     scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2302     return 0;
2303 }
2304 
2305 /* Execute a scsi command.  Returns the length of the data expected by the
2306    command.  This will be Positive for data transfers from the device
2307    (eg. disk reads), negative for transfers to the device (eg. disk writes),
2308    and zero if the command does not transfer any data.  */
2309 
2310 static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
2311 {
2312     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2313     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2314     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
2315     uint32_t len;
2316     uint8_t command;
2317 
2318     command = buf[0];
2319 
2320     if (!blk_is_available(s->qdev.conf.blk)) {
2321         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2322         return 0;
2323     }
2324 
2325     len = scsi_data_cdb_xfer(r->req.cmd.buf);
2326     switch (command) {
2327     case READ_6:
2328     case READ_10:
2329     case READ_12:
2330     case READ_16:
2331         trace_scsi_disk_dma_command_READ(r->req.cmd.lba, len);
2332         /* Protection information is not supported.  For SCSI versions 2 and
2333          * older (as determined by snooping the guest's INQUIRY commands),
2334          * there is no RD/WR/VRPROTECT, so skip this check in these versions.
2335          */
2336         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2337             goto illegal_request;
2338         }
2339         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2340             goto illegal_lba;
2341         }
2342         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2343         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2344         break;
2345     case WRITE_6:
2346     case WRITE_10:
2347     case WRITE_12:
2348     case WRITE_16:
2349     case WRITE_VERIFY_10:
2350     case WRITE_VERIFY_12:
2351     case WRITE_VERIFY_16:
2352         if (!blk_is_writable(s->qdev.conf.blk)) {
2353             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
2354             return 0;
2355         }
2356         trace_scsi_disk_dma_command_WRITE(
2357                 (command & 0xe) == 0xe ? "And Verify " : "",
2358                 r->req.cmd.lba, len);
2359         /* fall through */
2360     case VERIFY_10:
2361     case VERIFY_12:
2362     case VERIFY_16:
2363         /* We get here only for BYTCHK == 0x01 and only for scsi-block.
2364          * As far as DMA is concerned, we can treat it the same as a write;
2365          * scsi_block_do_sgio will send VERIFY commands.
2366          */
2367         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2368             goto illegal_request;
2369         }
2370         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2371             goto illegal_lba;
2372         }
2373         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2374         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2375         break;
2376     default:
2377         abort();
2378     illegal_request:
2379         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2380         return 0;
2381     illegal_lba:
2382         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2383         return 0;
2384     }
2385     r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
2386     if (r->sector_count == 0) {
2387         scsi_req_complete(&r->req, GOOD);
2388     }
2389     assert(r->iov.iov_len == 0);
2390     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2391         return -r->sector_count * BDRV_SECTOR_SIZE;
2392     } else {
2393         return r->sector_count * BDRV_SECTOR_SIZE;
2394     }
2395 }
2396 
2397 static void scsi_disk_reset(DeviceState *dev)
2398 {
2399     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
2400     uint64_t nb_sectors;
2401 
2402     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
2403 
2404     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2405 
2406     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2407     if (nb_sectors) {
2408         nb_sectors--;
2409     }
2410     s->qdev.max_lba = nb_sectors;
2411     /* reset tray statuses */
2412     s->tray_locked = 0;
2413     s->tray_open = 0;
2414 
2415     s->qdev.scsi_version = s->qdev.default_scsi_version;
2416 }
2417 
2418 static void scsi_disk_drained_begin(void *opaque)
2419 {
2420     SCSIDiskState *s = opaque;
2421 
2422     scsi_device_drained_begin(&s->qdev);
2423 }
2424 
2425 static void scsi_disk_drained_end(void *opaque)
2426 {
2427     SCSIDiskState *s = opaque;
2428 
2429     scsi_device_drained_end(&s->qdev);
2430 }
2431 
2432 static void scsi_disk_resize_cb(void *opaque)
2433 {
2434     SCSIDiskState *s = opaque;
2435 
2436     /* SPC lists this sense code as available only for
2437      * direct-access devices.
2438      */
2439     if (s->qdev.type == TYPE_DISK) {
2440         scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
2441     }
2442 }
2443 
2444 static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
2445 {
2446     SCSIDiskState *s = opaque;
2447 
2448     /*
2449      * When a CD gets changed, we have to report an ejected state and
2450      * then a loaded state to guests so that they detect tray
2451      * open/close and media change events.  Guests that do not use
2452      * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
2453      * states rely on this behavior.
2454      *
2455      * media_changed governs the state machine used for unit attention
2456      * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
2457      */
2458     s->media_changed = load;
2459     s->tray_open = !load;
2460     scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
2461     s->media_event = true;
2462     s->eject_request = false;
2463 }
2464 
2465 static void scsi_cd_eject_request_cb(void *opaque, bool force)
2466 {
2467     SCSIDiskState *s = opaque;
2468 
2469     s->eject_request = true;
2470     if (force) {
2471         s->tray_locked = false;
2472     }
2473 }
2474 
2475 static bool scsi_cd_is_tray_open(void *opaque)
2476 {
2477     return ((SCSIDiskState *)opaque)->tray_open;
2478 }
2479 
2480 static bool scsi_cd_is_medium_locked(void *opaque)
2481 {
2482     return ((SCSIDiskState *)opaque)->tray_locked;
2483 }
2484 
2485 static const BlockDevOps scsi_disk_removable_block_ops = {
2486     .change_media_cb  = scsi_cd_change_media_cb,
2487     .drained_begin    = scsi_disk_drained_begin,
2488     .drained_end      = scsi_disk_drained_end,
2489     .eject_request_cb = scsi_cd_eject_request_cb,
2490     .is_medium_locked = scsi_cd_is_medium_locked,
2491     .is_tray_open     = scsi_cd_is_tray_open,
2492     .resize_cb        = scsi_disk_resize_cb,
2493 };
2494 
2495 static const BlockDevOps scsi_disk_block_ops = {
2496     .drained_begin = scsi_disk_drained_begin,
2497     .drained_end   = scsi_disk_drained_end,
2498     .resize_cb     = scsi_disk_resize_cb,
2499 };
2500 
2501 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
2502 {
2503     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2504     if (s->media_changed) {
2505         s->media_changed = false;
2506         scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
2507     }
2508 }
2509 
2510 static void scsi_realize(SCSIDevice *dev, Error **errp)
2511 {
2512     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2513     bool read_only;
2514 
2515     if (!s->qdev.conf.blk) {
2516         error_setg(errp, "drive property not set");
2517         return;
2518     }
2519 
2520     if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2521         !blk_is_inserted(s->qdev.conf.blk)) {
2522         error_setg(errp, "Device needs media, but drive is empty");
2523         return;
2524     }
2525 
2526     if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2527         return;
2528     }
2529 
2530     if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() &&
2531         !s->qdev.hba_supports_iothread)
2532     {
2533         error_setg(errp, "HBA does not support iothreads");
2534         return;
2535     }
2536 
2537     if (dev->type == TYPE_DISK) {
2538         if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
2539             return;
2540         }
2541     }
2542 
2543     read_only = !blk_supports_write_perm(s->qdev.conf.blk);
2544     if (dev->type == TYPE_ROM) {
2545         read_only = true;
2546     }
2547 
2548     if (!blkconf_apply_backend_options(&dev->conf, read_only,
2549                                        dev->type == TYPE_DISK, errp)) {
2550         return;
2551     }
2552 
2553     if (s->qdev.conf.discard_granularity == -1) {
2554         s->qdev.conf.discard_granularity =
2555             MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
2556     }
2557 
2558     if (!s->version) {
2559         s->version = g_strdup(qemu_hw_version());
2560     }
2561     if (!s->vendor) {
2562         s->vendor = g_strdup("QEMU");
2563     }
2564     if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) {
2565         error_setg(errp, "The serial number can't be longer than %d characters",
2566                    MAX_SERIAL_LEN);
2567         return;
2568     }
2569     if (!s->device_id) {
2570         if (s->serial) {
2571             if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) {
2572                 error_setg(errp, "The serial number can't be longer than %d "
2573                            "characters when it is also used as the default for "
2574                            "device_id", MAX_SERIAL_LEN_FOR_DEVID);
2575                 return;
2576             }
2577             s->device_id = g_strdup(s->serial);
2578         } else {
2579             const char *str = blk_name(s->qdev.conf.blk);
2580             if (str && *str) {
2581                 s->device_id = g_strdup(str);
2582             }
2583         }
2584     }
2585 
2586     if (blk_is_sg(s->qdev.conf.blk)) {
2587         error_setg(errp, "unwanted /dev/sg*");
2588         return;
2589     }
2590 
2591     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2592             !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
2593         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s);
2594     } else {
2595         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
2596     }
2597 
2598     blk_iostatus_enable(s->qdev.conf.blk);
2599 
2600     add_boot_device_lchs(&dev->qdev, NULL,
2601                          dev->conf.lcyls,
2602                          dev->conf.lheads,
2603                          dev->conf.lsecs);
2604 }
2605 
2606 static void scsi_unrealize(SCSIDevice *dev)
2607 {
2608     del_boot_device_lchs(&dev->qdev, NULL);
2609 }
2610 
2611 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
2612 {
2613     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2614 
2615     /* can happen for devices without drive. The error message for missing
2616      * backend will be issued in scsi_realize
2617      */
2618     if (s->qdev.conf.blk) {
2619         if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2620             return;
2621         }
2622     }
2623     s->qdev.blocksize = s->qdev.conf.logical_block_size;
2624     s->qdev.type = TYPE_DISK;
2625     if (!s->product) {
2626         s->product = g_strdup("QEMU HARDDISK");
2627     }
2628     scsi_realize(&s->qdev, errp);
2629 }
2630 
2631 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
2632 {
2633     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2634     int ret;
2635     uint32_t blocksize = 2048;
2636 
2637     if (!dev->conf.blk) {
2638         /* Anonymous BlockBackend for an empty drive. As we put it into
2639          * dev->conf, qdev takes care of detaching on unplug. */
2640         dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
2641         ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
2642         assert(ret == 0);
2643     }
2644 
2645     if (dev->conf.physical_block_size != 0) {
2646         blocksize = dev->conf.physical_block_size;
2647     }
2648 
2649     s->qdev.blocksize = blocksize;
2650     s->qdev.type = TYPE_ROM;
2651     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2652     if (!s->product) {
2653         s->product = g_strdup("QEMU CD-ROM");
2654     }
2655     scsi_realize(&s->qdev, errp);
2656 }
2657 
2658 
2659 static const SCSIReqOps scsi_disk_emulate_reqops = {
2660     .size         = sizeof(SCSIDiskReq),
2661     .free_req     = scsi_free_request,
2662     .send_command = scsi_disk_emulate_command,
2663     .read_data    = scsi_disk_emulate_read_data,
2664     .write_data   = scsi_disk_emulate_write_data,
2665     .get_buf      = scsi_get_buf,
2666     .load_request = scsi_disk_emulate_load_request,
2667     .save_request = scsi_disk_emulate_save_request,
2668 };
2669 
2670 static const SCSIReqOps scsi_disk_dma_reqops = {
2671     .size         = sizeof(SCSIDiskReq),
2672     .free_req     = scsi_free_request,
2673     .send_command = scsi_disk_dma_command,
2674     .read_data    = scsi_read_data,
2675     .write_data   = scsi_write_data,
2676     .get_buf      = scsi_get_buf,
2677     .load_request = scsi_disk_load_request,
2678     .save_request = scsi_disk_save_request,
2679 };
2680 
2681 static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
2682     [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
2683     [INQUIRY]                         = &scsi_disk_emulate_reqops,
2684     [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
2685     [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
2686     [START_STOP]                      = &scsi_disk_emulate_reqops,
2687     [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
2688     [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
2689     [READ_TOC]                        = &scsi_disk_emulate_reqops,
2690     [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
2691     [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
2692     [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
2693     [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
2694     [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
2695     [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
2696     [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
2697     [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
2698     [SEEK_10]                         = &scsi_disk_emulate_reqops,
2699     [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
2700     [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
2701     [UNMAP]                           = &scsi_disk_emulate_reqops,
2702     [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
2703     [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
2704     [VERIFY_10]                       = &scsi_disk_emulate_reqops,
2705     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
2706     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
2707     [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
2708 
2709     [READ_6]                          = &scsi_disk_dma_reqops,
2710     [READ_10]                         = &scsi_disk_dma_reqops,
2711     [READ_12]                         = &scsi_disk_dma_reqops,
2712     [READ_16]                         = &scsi_disk_dma_reqops,
2713     [WRITE_6]                         = &scsi_disk_dma_reqops,
2714     [WRITE_10]                        = &scsi_disk_dma_reqops,
2715     [WRITE_12]                        = &scsi_disk_dma_reqops,
2716     [WRITE_16]                        = &scsi_disk_dma_reqops,
2717     [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
2718     [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
2719     [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
2720 };
2721 
2722 static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf)
2723 {
2724     int len = scsi_cdb_length(buf);
2725     g_autoptr(GString) str = NULL;
2726 
2727     assert(len > 0 && len <= 16);
2728     str = qemu_hexdump_line(NULL, buf, len, 1, 0);
2729     trace_scsi_disk_new_request(lun, tag, str->str);
2730 }
2731 
2732 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
2733                                      uint8_t *buf, void *hba_private)
2734 {
2735     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2736     SCSIRequest *req;
2737     const SCSIReqOps *ops;
2738     uint8_t command;
2739 
2740     command = buf[0];
2741     ops = scsi_disk_reqops_dispatch[command];
2742     if (!ops) {
2743         ops = &scsi_disk_emulate_reqops;
2744     }
2745     req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
2746 
2747     if (trace_event_get_state_backends(TRACE_SCSI_DISK_NEW_REQUEST)) {
2748         scsi_disk_new_request_dump(lun, tag, buf);
2749     }
2750 
2751     return req;
2752 }
2753 
2754 #ifdef __linux__
2755 static int get_device_type(SCSIDiskState *s)
2756 {
2757     uint8_t cmd[16];
2758     uint8_t buf[36];
2759     int ret;
2760 
2761     memset(cmd, 0, sizeof(cmd));
2762     memset(buf, 0, sizeof(buf));
2763     cmd[0] = INQUIRY;
2764     cmd[4] = sizeof(buf);
2765 
2766     ret = scsi_SG_IO_FROM_DEV(s->qdev.conf.blk, cmd, sizeof(cmd),
2767                               buf, sizeof(buf), s->qdev.io_timeout);
2768     if (ret < 0) {
2769         return -1;
2770     }
2771     s->qdev.type = buf[0];
2772     if (buf[1] & 0x80) {
2773         s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2774     }
2775     return 0;
2776 }
2777 
2778 static void scsi_block_realize(SCSIDevice *dev, Error **errp)
2779 {
2780     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2781     int sg_version;
2782     int rc;
2783 
2784     if (!s->qdev.conf.blk) {
2785         error_setg(errp, "drive property not set");
2786         return;
2787     }
2788 
2789     if (s->rotation_rate) {
2790         error_report_once("rotation_rate is specified for scsi-block but is "
2791                           "not implemented. This option is deprecated and will "
2792                           "be removed in a future version");
2793     }
2794 
2795     /* check we are using a driver managing SG_IO (version 3 and after) */
2796     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
2797     if (rc < 0) {
2798         error_setg_errno(errp, -rc, "cannot get SG_IO version number");
2799         if (rc != -EPERM) {
2800             error_append_hint(errp, "Is this a SCSI device?\n");
2801         }
2802         return;
2803     }
2804     if (sg_version < 30000) {
2805         error_setg(errp, "scsi generic interface too old");
2806         return;
2807     }
2808 
2809     /* get device type from INQUIRY data */
2810     rc = get_device_type(s);
2811     if (rc < 0) {
2812         error_setg(errp, "INQUIRY failed");
2813         return;
2814     }
2815 
2816     /* Make a guess for the block size, we'll fix it when the guest sends.
2817      * READ CAPACITY.  If they don't, they likely would assume these sizes
2818      * anyway. (TODO: check in /sys).
2819      */
2820     if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
2821         s->qdev.blocksize = 2048;
2822     } else {
2823         s->qdev.blocksize = 512;
2824     }
2825 
2826     /* Makes the scsi-block device not removable by using HMP and QMP eject
2827      * command.
2828      */
2829     s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
2830 
2831     scsi_realize(&s->qdev, errp);
2832     scsi_generic_read_device_inquiry(&s->qdev);
2833 }
2834 
2835 typedef struct SCSIBlockReq {
2836     SCSIDiskReq req;
2837     sg_io_hdr_t io_header;
2838 
2839     /* Selected bytes of the original CDB, copied into our own CDB.  */
2840     uint8_t cmd, cdb1, group_number;
2841 
2842     /* CDB passed to SG_IO.  */
2843     uint8_t cdb[16];
2844     BlockCompletionFunc *cb;
2845     void *cb_opaque;
2846 } SCSIBlockReq;
2847 
2848 static void scsi_block_sgio_complete(void *opaque, int ret)
2849 {
2850     SCSIBlockReq *req = (SCSIBlockReq *)opaque;
2851     SCSIDiskReq *r = &req->req;
2852     sg_io_hdr_t *io_hdr = &req->io_header;
2853 
2854     if (ret == 0) {
2855         /* FIXME This skips calling req->cb() and any cleanup in it */
2856         if (io_hdr->host_status != SCSI_HOST_OK) {
2857             scsi_req_complete_failed(&r->req, io_hdr->host_status);
2858             scsi_req_unref(&r->req);
2859             return;
2860         }
2861 
2862         if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) {
2863             ret = BUSY;
2864         } else {
2865             ret = io_hdr->status;
2866         }
2867     }
2868 
2869     req->cb(req->cb_opaque, ret);
2870 }
2871 
2872 static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
2873                                       int64_t offset, QEMUIOVector *iov,
2874                                       int direction,
2875                                       BlockCompletionFunc *cb, void *opaque)
2876 {
2877     sg_io_hdr_t *io_header = &req->io_header;
2878     SCSIDiskReq *r = &req->req;
2879     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
2880     int nb_logical_blocks;
2881     uint64_t lba;
2882     BlockAIOCB *aiocb;
2883 
2884     /* This is not supported yet.  It can only happen if the guest does
2885      * reads and writes that are not aligned to one logical sectors
2886      * _and_ cover multiple MemoryRegions.
2887      */
2888     assert(offset % s->qdev.blocksize == 0);
2889     assert(iov->size % s->qdev.blocksize == 0);
2890 
2891     io_header->interface_id = 'S';
2892 
2893     /* The data transfer comes from the QEMUIOVector.  */
2894     io_header->dxfer_direction = direction;
2895     io_header->dxfer_len = iov->size;
2896     io_header->dxferp = (void *)iov->iov;
2897     io_header->iovec_count = iov->niov;
2898     assert(io_header->iovec_count == iov->niov); /* no overflow! */
2899 
2900     /* Build a new CDB with the LBA and length patched in, in case
2901      * DMA helpers split the transfer in multiple segments.  Do not
2902      * build a CDB smaller than what the guest wanted, and only build
2903      * a larger one if strictly necessary.
2904      */
2905     io_header->cmdp = req->cdb;
2906     lba = offset / s->qdev.blocksize;
2907     nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize;
2908 
2909     if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) {
2910         /* 6-byte CDB */
2911         stl_be_p(&req->cdb[0], lba | (req->cmd << 24));
2912         req->cdb[4] = nb_logical_blocks;
2913         req->cdb[5] = 0;
2914         io_header->cmd_len = 6;
2915     } else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) {
2916         /* 10-byte CDB */
2917         req->cdb[0] = (req->cmd & 0x1f) | 0x20;
2918         req->cdb[1] = req->cdb1;
2919         stl_be_p(&req->cdb[2], lba);
2920         req->cdb[6] = req->group_number;
2921         stw_be_p(&req->cdb[7], nb_logical_blocks);
2922         req->cdb[9] = 0;
2923         io_header->cmd_len = 10;
2924     } else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) {
2925         /* 12-byte CDB */
2926         req->cdb[0] = (req->cmd & 0x1f) | 0xA0;
2927         req->cdb[1] = req->cdb1;
2928         stl_be_p(&req->cdb[2], lba);
2929         stl_be_p(&req->cdb[6], nb_logical_blocks);
2930         req->cdb[10] = req->group_number;
2931         req->cdb[11] = 0;
2932         io_header->cmd_len = 12;
2933     } else {
2934         /* 16-byte CDB */
2935         req->cdb[0] = (req->cmd & 0x1f) | 0x80;
2936         req->cdb[1] = req->cdb1;
2937         stq_be_p(&req->cdb[2], lba);
2938         stl_be_p(&req->cdb[10], nb_logical_blocks);
2939         req->cdb[14] = req->group_number;
2940         req->cdb[15] = 0;
2941         io_header->cmd_len = 16;
2942     }
2943 
2944     /* The rest is as in scsi-generic.c.  */
2945     io_header->mx_sb_len = sizeof(r->req.sense);
2946     io_header->sbp = r->req.sense;
2947     io_header->timeout = s->qdev.io_timeout * 1000;
2948     io_header->usr_ptr = r;
2949     io_header->flags |= SG_FLAG_DIRECT_IO;
2950     req->cb = cb;
2951     req->cb_opaque = opaque;
2952     trace_scsi_disk_aio_sgio_command(r->req.tag, req->cdb[0], lba,
2953                                      nb_logical_blocks, io_header->timeout);
2954     aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, scsi_block_sgio_complete, req);
2955     assert(aiocb != NULL);
2956     return aiocb;
2957 }
2958 
2959 static bool scsi_block_no_fua(SCSICommand *cmd)
2960 {
2961     return false;
2962 }
2963 
2964 static BlockAIOCB *scsi_block_dma_readv(int64_t offset,
2965                                         QEMUIOVector *iov,
2966                                         BlockCompletionFunc *cb, void *cb_opaque,
2967                                         void *opaque)
2968 {
2969     SCSIBlockReq *r = opaque;
2970     return scsi_block_do_sgio(r, offset, iov,
2971                               SG_DXFER_FROM_DEV, cb, cb_opaque);
2972 }
2973 
2974 static BlockAIOCB *scsi_block_dma_writev(int64_t offset,
2975                                          QEMUIOVector *iov,
2976                                          BlockCompletionFunc *cb, void *cb_opaque,
2977                                          void *opaque)
2978 {
2979     SCSIBlockReq *r = opaque;
2980     return scsi_block_do_sgio(r, offset, iov,
2981                               SG_DXFER_TO_DEV, cb, cb_opaque);
2982 }
2983 
2984 static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
2985 {
2986     switch (buf[0]) {
2987     case VERIFY_10:
2988     case VERIFY_12:
2989     case VERIFY_16:
2990         /* Check if BYTCHK == 0x01 (data-out buffer contains data
2991          * for the number of logical blocks specified in the length
2992          * field).  For other modes, do not use scatter/gather operation.
2993          */
2994         if ((buf[1] & 6) == 2) {
2995             return false;
2996         }
2997         break;
2998 
2999     case READ_6:
3000     case READ_10:
3001     case READ_12:
3002     case READ_16:
3003     case WRITE_6:
3004     case WRITE_10:
3005     case WRITE_12:
3006     case WRITE_16:
3007     case WRITE_VERIFY_10:
3008     case WRITE_VERIFY_12:
3009     case WRITE_VERIFY_16:
3010         /* MMC writing cannot be done via DMA helpers, because it sometimes
3011          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
3012          * We might use scsi_block_dma_reqops as long as no writing commands are
3013          * seen, but performance usually isn't paramount on optical media.  So,
3014          * just make scsi-block operate the same as scsi-generic for them.
3015          */
3016         if (s->qdev.type != TYPE_ROM) {
3017             return false;
3018         }
3019         break;
3020 
3021     default:
3022         break;
3023     }
3024 
3025     return true;
3026 }
3027 
3028 
3029 static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf)
3030 {
3031     SCSIBlockReq *r = (SCSIBlockReq *)req;
3032     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
3033 
3034     r->cmd = req->cmd.buf[0];
3035     switch (r->cmd >> 5) {
3036     case 0:
3037         /* 6-byte CDB.  */
3038         r->cdb1 = r->group_number = 0;
3039         break;
3040     case 1:
3041         /* 10-byte CDB.  */
3042         r->cdb1 = req->cmd.buf[1];
3043         r->group_number = req->cmd.buf[6];
3044         break;
3045     case 4:
3046         /* 12-byte CDB.  */
3047         r->cdb1 = req->cmd.buf[1];
3048         r->group_number = req->cmd.buf[10];
3049         break;
3050     case 5:
3051         /* 16-byte CDB.  */
3052         r->cdb1 = req->cmd.buf[1];
3053         r->group_number = req->cmd.buf[14];
3054         break;
3055     default:
3056         abort();
3057     }
3058 
3059     /* Protection information is not supported.  For SCSI versions 2 and
3060      * older (as determined by snooping the guest's INQUIRY commands),
3061      * there is no RD/WR/VRPROTECT, so skip this check in these versions.
3062      */
3063     if (s->qdev.scsi_version > 2 && (req->cmd.buf[1] & 0xe0)) {
3064         scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD));
3065         return 0;
3066     }
3067 
3068     return scsi_disk_dma_command(req, buf);
3069 }
3070 
3071 static const SCSIReqOps scsi_block_dma_reqops = {
3072     .size         = sizeof(SCSIBlockReq),
3073     .free_req     = scsi_free_request,
3074     .send_command = scsi_block_dma_command,
3075     .read_data    = scsi_read_data,
3076     .write_data   = scsi_write_data,
3077     .get_buf      = scsi_get_buf,
3078     .load_request = scsi_disk_load_request,
3079     .save_request = scsi_disk_save_request,
3080 };
3081 
3082 static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
3083                                            uint32_t lun, uint8_t *buf,
3084                                            void *hba_private)
3085 {
3086     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3087 
3088     if (scsi_block_is_passthrough(s, buf)) {
3089         return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
3090                               hba_private);
3091     } else {
3092         return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun,
3093                               hba_private);
3094     }
3095 }
3096 
3097 static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
3098                                   uint8_t *buf, size_t buf_len,
3099                                   void *hba_private)
3100 {
3101     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3102 
3103     if (scsi_block_is_passthrough(s, buf)) {
3104         return scsi_bus_parse_cdb(&s->qdev, cmd, buf, buf_len, hba_private);
3105     } else {
3106         return scsi_req_parse_cdb(&s->qdev, cmd, buf, buf_len);
3107     }
3108 }
3109 
3110 static void scsi_block_update_sense(SCSIRequest *req)
3111 {
3112     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
3113     SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r);
3114     r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense));
3115 }
3116 #endif
3117 
3118 static
3119 BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
3120                            BlockCompletionFunc *cb, void *cb_opaque,
3121                            void *opaque)
3122 {
3123     SCSIDiskReq *r = opaque;
3124     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3125     return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3126 }
3127 
3128 static
3129 BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
3130                             BlockCompletionFunc *cb, void *cb_opaque,
3131                             void *opaque)
3132 {
3133     SCSIDiskReq *r = opaque;
3134     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3135     return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3136 }
3137 
3138 static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
3139 {
3140     DeviceClass *dc = DEVICE_CLASS(klass);
3141     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3142 
3143     dc->fw_name = "disk";
3144     device_class_set_legacy_reset(dc, scsi_disk_reset);
3145     sdc->dma_readv = scsi_dma_readv;
3146     sdc->dma_writev = scsi_dma_writev;
3147     sdc->need_fua_emulation = scsi_is_cmd_fua;
3148 }
3149 
3150 static const TypeInfo scsi_disk_base_info = {
3151     .name          = TYPE_SCSI_DISK_BASE,
3152     .parent        = TYPE_SCSI_DEVICE,
3153     .class_init    = scsi_disk_base_class_initfn,
3154     .instance_size = sizeof(SCSIDiskState),
3155     .class_size    = sizeof(SCSIDiskClass),
3156     .abstract      = true,
3157 };
3158 
3159 #define DEFINE_SCSI_DISK_PROPERTIES()                                   \
3160     DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk),  \
3161     DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf),             \
3162     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),            \
3163     DEFINE_PROP_STRING("ver", SCSIDiskState, version),                  \
3164     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
3165     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
3166     DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
3167     DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id),          \
3168     DEFINE_PROP_BOOL("migrate-emulated-scsi-request", SCSIDiskState, migrate_emulated_scsi_request, true)
3169 
3170 
3171 static Property scsi_hd_properties[] = {
3172     DEFINE_SCSI_DISK_PROPERTIES(),
3173     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
3174                     SCSI_DISK_F_REMOVABLE, false),
3175     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
3176                     SCSI_DISK_F_DPOFUA, false),
3177     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3178     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3179     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3180     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3181                        DEFAULT_MAX_UNMAP_SIZE),
3182     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3183                        DEFAULT_MAX_IO_SIZE),
3184     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3185     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3186                       5),
3187     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3188                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3189                     0),
3190     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
3191     DEFINE_PROP_END_OF_LIST(),
3192 };
3193 
3194 static const VMStateDescription vmstate_scsi_disk_state = {
3195     .name = "scsi-disk",
3196     .version_id = 1,
3197     .minimum_version_id = 1,
3198     .fields = (const VMStateField[]) {
3199         VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState),
3200         VMSTATE_BOOL(media_changed, SCSIDiskState),
3201         VMSTATE_BOOL(media_event, SCSIDiskState),
3202         VMSTATE_BOOL(eject_request, SCSIDiskState),
3203         VMSTATE_BOOL(tray_open, SCSIDiskState),
3204         VMSTATE_BOOL(tray_locked, SCSIDiskState),
3205         VMSTATE_END_OF_LIST()
3206     }
3207 };
3208 
3209 static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
3210 {
3211     DeviceClass *dc = DEVICE_CLASS(klass);
3212     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3213 
3214     sc->realize      = scsi_hd_realize;
3215     sc->unrealize    = scsi_unrealize;
3216     sc->alloc_req    = scsi_new_request;
3217     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3218     dc->desc = "virtual SCSI disk";
3219     device_class_set_props(dc, scsi_hd_properties);
3220     dc->vmsd  = &vmstate_scsi_disk_state;
3221 }
3222 
3223 static const TypeInfo scsi_hd_info = {
3224     .name          = "scsi-hd",
3225     .parent        = TYPE_SCSI_DISK_BASE,
3226     .class_init    = scsi_hd_class_initfn,
3227 };
3228 
3229 static Property scsi_cd_properties[] = {
3230     DEFINE_SCSI_DISK_PROPERTIES(),
3231     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3232     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3233     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3234     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3235                        DEFAULT_MAX_IO_SIZE),
3236     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3237                       5),
3238     DEFINE_PROP_BIT("quirk_mode_page_apple_vendor", SCSIDiskState, quirks,
3239                     SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR, 0),
3240     DEFINE_PROP_BIT("quirk_mode_sense_rom_use_dbd", SCSIDiskState, quirks,
3241                     SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD, 0),
3242     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3243                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3244                     0),
3245     DEFINE_PROP_BIT("quirk_mode_page_truncated", SCSIDiskState, quirks,
3246                     SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED, 0),
3247     DEFINE_PROP_END_OF_LIST(),
3248 };
3249 
3250 static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
3251 {
3252     DeviceClass *dc = DEVICE_CLASS(klass);
3253     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3254 
3255     sc->realize      = scsi_cd_realize;
3256     sc->alloc_req    = scsi_new_request;
3257     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3258     dc->desc = "virtual SCSI CD-ROM";
3259     device_class_set_props(dc, scsi_cd_properties);
3260     dc->vmsd  = &vmstate_scsi_disk_state;
3261 }
3262 
3263 static const TypeInfo scsi_cd_info = {
3264     .name          = "scsi-cd",
3265     .parent        = TYPE_SCSI_DISK_BASE,
3266     .class_init    = scsi_cd_class_initfn,
3267 };
3268 
3269 #ifdef __linux__
3270 static Property scsi_block_properties[] = {
3271     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),
3272     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
3273     DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false),
3274     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3275     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3276                        DEFAULT_MAX_UNMAP_SIZE),
3277     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3278                        DEFAULT_MAX_IO_SIZE),
3279     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3280                       -1),
3281     DEFINE_PROP_UINT32("io_timeout", SCSIDiskState, qdev.io_timeout,
3282                        DEFAULT_IO_TIMEOUT),
3283     DEFINE_PROP_END_OF_LIST(),
3284 };
3285 
3286 static void scsi_block_class_initfn(ObjectClass *klass, void *data)
3287 {
3288     DeviceClass *dc = DEVICE_CLASS(klass);
3289     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3290     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3291 
3292     sc->realize      = scsi_block_realize;
3293     sc->alloc_req    = scsi_block_new_request;
3294     sc->parse_cdb    = scsi_block_parse_cdb;
3295     sdc->dma_readv   = scsi_block_dma_readv;
3296     sdc->dma_writev  = scsi_block_dma_writev;
3297     sdc->update_sense = scsi_block_update_sense;
3298     sdc->need_fua_emulation = scsi_block_no_fua;
3299     dc->desc = "SCSI block device passthrough";
3300     device_class_set_props(dc, scsi_block_properties);
3301     dc->vmsd  = &vmstate_scsi_disk_state;
3302 }
3303 
3304 static const TypeInfo scsi_block_info = {
3305     .name          = "scsi-block",
3306     .parent        = TYPE_SCSI_DISK_BASE,
3307     .class_init    = scsi_block_class_initfn,
3308 };
3309 #endif
3310 
3311 static void scsi_disk_register_types(void)
3312 {
3313     type_register_static(&scsi_disk_base_info);
3314     type_register_static(&scsi_hd_info);
3315     type_register_static(&scsi_cd_info);
3316 #ifdef __linux__
3317     type_register_static(&scsi_block_info);
3318 #endif
3319 }
3320 
3321 type_init(scsi_disk_register_types)
3322