xref: /openbmc/qemu/hw/scsi/scsi-disk.c (revision a89c3c9b2cc4107658c7260ecf329d869888fd51)
1 /*
2  * SCSI Device emulation
3  *
4  * Copyright (c) 2006 CodeSourcery.
5  * Based on code by Fabrice Bellard
6  *
7  * Written by Paul Brook
8  * Modifications:
9  *  2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case
10  *                                 when the allocation length of CDB is smaller
11  *                                 than 36.
12  *  2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the
13  *                                 MODE SENSE response.
14  *
15  * This code is licensed under the LGPL.
16  *
17  * Note that this file only handles the SCSI architecture model and device
18  * commands.  Emulation of interface/link layer protocols is handled by
19  * the host adapter emulator.
20  */
21 
22 #include "qemu/osdep.h"
23 #include "qemu/units.h"
24 #include "qapi/error.h"
25 #include "qemu/error-report.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/module.h"
28 #include "qemu/hw-version.h"
29 #include "qemu/memalign.h"
30 #include "hw/scsi/scsi.h"
31 #include "migration/qemu-file-types.h"
32 #include "migration/vmstate.h"
33 #include "hw/scsi/emulation.h"
34 #include "scsi/constants.h"
35 #include "system/arch_init.h"
36 #include "system/block-backend.h"
37 #include "system/blockdev.h"
38 #include "hw/block/block.h"
39 #include "hw/qdev-properties.h"
40 #include "hw/qdev-properties-system.h"
41 #include "system/dma.h"
42 #include "system/system.h"
43 #include "qemu/cutils.h"
44 #include "trace.h"
45 #include "qom/object.h"
46 
47 #ifdef __linux
48 #include <scsi/sg.h>
49 #endif
50 
51 #define SCSI_WRITE_SAME_MAX         (512 * KiB)
52 #define SCSI_DMA_BUF_SIZE           (128 * KiB)
53 #define SCSI_MAX_INQUIRY_LEN        256
54 #define SCSI_MAX_MODE_LEN           256
55 
56 #define DEFAULT_DISCARD_GRANULARITY (4 * KiB)
57 #define DEFAULT_MAX_UNMAP_SIZE      (1 * GiB)
58 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
59 
60 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
61 
62 #define MAX_SERIAL_LEN              36
63 #define MAX_SERIAL_LEN_FOR_DEVID    20
64 
65 OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
66 
67 struct SCSIDiskClass {
68     SCSIDeviceClass parent_class;
69     /*
70      * Callbacks receive ret == 0 for success. Errors are represented either as
71      * negative errno values, or as positive SAM status codes.
72      *
73      * Beware: For errors returned in host_status, the function may directly
74      * complete the request and never call the callback.
75      */
76     DMAIOFunc       *dma_readv;
77     DMAIOFunc       *dma_writev;
78     bool            (*need_fua_emulation)(SCSICommand *cmd);
79     void            (*update_sense)(SCSIRequest *r);
80 };
81 
82 typedef struct SCSIDiskReq {
83     SCSIRequest req;
84     /* Both sector and sector_count are in terms of BDRV_SECTOR_SIZE bytes.  */
85     uint64_t sector;
86     uint32_t sector_count;
87     uint32_t buflen;
88     bool started;
89     bool need_fua_emulation;
90     struct iovec iov;
91     QEMUIOVector qiov;
92     BlockAcctCookie acct;
93 } SCSIDiskReq;
94 
95 #define SCSI_DISK_F_REMOVABLE             0
96 #define SCSI_DISK_F_DPOFUA                1
97 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
98 
99 struct SCSIDiskState {
100     SCSIDevice qdev;
101     uint32_t features;
102     bool media_changed;
103     bool media_event;
104     bool eject_request;
105     uint16_t port_index;
106     uint64_t max_unmap_size;
107     uint64_t max_io_size;
108     uint32_t quirks;
109     char *version;
110     char *serial;
111     char *vendor;
112     char *product;
113     char *device_id;
114     char *loadparm;     /* only for s390x */
115     bool tray_open;
116     bool tray_locked;
117     /*
118      * 0x0000        - rotation rate not reported
119      * 0x0001        - non-rotating medium (SSD)
120      * 0x0002-0x0400 - reserved
121      * 0x0401-0xffe  - rotations per minute
122      * 0xffff        - reserved
123      */
124     uint16_t rotation_rate;
125     bool migrate_emulated_scsi_request;
126 };
127 
128 static void scsi_free_request(SCSIRequest *req)
129 {
130     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
131 
132     qemu_vfree(r->iov.iov_base);
133 }
134 
135 /* Helper function for command completion with sense.  */
136 static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense)
137 {
138     trace_scsi_disk_check_condition(r->req.tag, sense.key, sense.asc,
139                                     sense.ascq);
140     scsi_req_build_sense(&r->req, sense);
141     scsi_req_complete(&r->req, CHECK_CONDITION);
142 }
143 
144 static void scsi_init_iovec(SCSIDiskReq *r, size_t size)
145 {
146     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
147 
148     if (!r->iov.iov_base) {
149         r->buflen = size;
150         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
151     }
152     r->iov.iov_len = MIN(r->sector_count * BDRV_SECTOR_SIZE, r->buflen);
153     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
154 }
155 
156 static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
157 {
158     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
159 
160     qemu_put_be64s(f, &r->sector);
161     qemu_put_be32s(f, &r->sector_count);
162     qemu_put_be32s(f, &r->buflen);
163     if (r->buflen) {
164         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
165             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
166         } else if (!req->retry) {
167             uint32_t len = r->iov.iov_len;
168             qemu_put_be32s(f, &len);
169             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
170         }
171     }
172 }
173 
174 static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
175 {
176     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
177 
178     if (s->migrate_emulated_scsi_request) {
179         scsi_disk_save_request(f, req);
180     }
181 }
182 
183 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
184 {
185     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
186 
187     qemu_get_be64s(f, &r->sector);
188     qemu_get_be32s(f, &r->sector_count);
189     qemu_get_be32s(f, &r->buflen);
190     if (r->buflen) {
191         scsi_init_iovec(r, r->buflen);
192         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
193             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
194         } else if (!r->req.retry) {
195             uint32_t len;
196             qemu_get_be32s(f, &len);
197             r->iov.iov_len = len;
198             assert(r->iov.iov_len <= r->buflen);
199             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
200         }
201     }
202 
203     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
204 }
205 
206 static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
207 {
208     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
209 
210     if (s->migrate_emulated_scsi_request) {
211         scsi_disk_load_request(f, req);
212     }
213 }
214 
215 /*
216  * scsi_handle_rw_error has two return values.  False means that the error
217  * must be ignored, true means that the error has been processed and the
218  * caller should not do anything else for this request.  Note that
219  * scsi_handle_rw_error always manages its reference counts, independent
220  * of the return value.
221  */
222 static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
223 {
224     bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
225     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
226     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
227     SCSISense sense = SENSE_CODE(NO_SENSE);
228     int error;
229     bool req_has_sense = false;
230     BlockErrorAction action;
231     int status;
232 
233     if (ret < 0) {
234         status = scsi_sense_from_errno(-ret, &sense);
235         error = -ret;
236     } else {
237         /* A passthrough command has completed with nonzero status.  */
238         status = ret;
239         switch (status) {
240         case CHECK_CONDITION:
241             req_has_sense = true;
242             error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
243             break;
244         case RESERVATION_CONFLICT:
245             /*
246              * Don't apply the error policy, always report to the guest.
247              *
248              * This is a passthrough code path, so it's not a backend error, but
249              * a response to an invalid guest request.
250              *
251              * Windows Failover Cluster validation intentionally sends invalid
252              * requests to verify that reservations work as intended. It is
253              * crucial that it sees the resulting errors.
254              *
255              * Treating a reservation conflict as a guest-side error is obvious
256              * when a pr-manager is in use. Without one, the situation is less
257              * clear, but there might be nothing that can be fixed on the host
258              * (like in the above example), and we don't want to be stuck in a
259              * loop where resuming the VM and retrying the request immediately
260              * stops it again. So always reporting is still the safer option in
261              * this case, too.
262              */
263             error = 0;
264             break;
265         default:
266             error = EINVAL;
267             break;
268         }
269     }
270 
271     /*
272      * Check whether the error has to be handled by the guest or should
273      * rather follow the rerror=/werror= settings.  Guest-handled errors
274      * are usually retried immediately, so do not post them to QMP and
275      * do not account them as failed I/O.
276      */
277     if (!error || (req_has_sense &&
278                    scsi_sense_buf_is_guest_recoverable(r->req.sense,
279                                                        sizeof(r->req.sense)))) {
280         action = BLOCK_ERROR_ACTION_REPORT;
281         acct_failed = false;
282     } else {
283         action = blk_get_error_action(s->qdev.conf.blk, is_read, error);
284         blk_error_action(s->qdev.conf.blk, action, is_read, error);
285     }
286 
287     switch (action) {
288     case BLOCK_ERROR_ACTION_REPORT:
289         if (acct_failed) {
290             block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
291         }
292         if (req_has_sense) {
293             sdc->update_sense(&r->req);
294         } else if (status == CHECK_CONDITION) {
295             scsi_req_build_sense(&r->req, sense);
296         }
297         scsi_req_complete(&r->req, status);
298         return true;
299 
300     case BLOCK_ERROR_ACTION_IGNORE:
301         return false;
302 
303     case BLOCK_ERROR_ACTION_STOP:
304         scsi_req_retry(&r->req);
305         return true;
306 
307     default:
308         g_assert_not_reached();
309     }
310 }
311 
312 static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
313 {
314     if (r->req.io_canceled) {
315         scsi_req_cancel_complete(&r->req);
316         return true;
317     }
318 
319     if (ret != 0) {
320         return scsi_handle_rw_error(r, ret, acct_failed);
321     }
322 
323     return false;
324 }
325 
326 static void scsi_aio_complete(void *opaque, int ret)
327 {
328     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
329     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
330 
331     /* The request must only run in the BlockBackend's AioContext */
332     assert(blk_get_aio_context(s->qdev.conf.blk) ==
333            qemu_get_current_aio_context());
334 
335     assert(r->req.aiocb != NULL);
336     r->req.aiocb = NULL;
337 
338     if (scsi_disk_req_check_error(r, ret, true)) {
339         goto done;
340     }
341 
342     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
343     scsi_req_complete(&r->req, GOOD);
344 
345 done:
346     scsi_req_unref(&r->req);
347 }
348 
349 static bool scsi_is_cmd_fua(SCSICommand *cmd)
350 {
351     switch (cmd->buf[0]) {
352     case READ_10:
353     case READ_12:
354     case READ_16:
355     case WRITE_10:
356     case WRITE_12:
357     case WRITE_16:
358         return (cmd->buf[1] & 8) != 0;
359 
360     case VERIFY_10:
361     case VERIFY_12:
362     case VERIFY_16:
363     case WRITE_VERIFY_10:
364     case WRITE_VERIFY_12:
365     case WRITE_VERIFY_16:
366         return true;
367 
368     case READ_6:
369     case WRITE_6:
370     default:
371         return false;
372     }
373 }
374 
375 static void scsi_write_do_fua(SCSIDiskReq *r)
376 {
377     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
378 
379     assert(r->req.aiocb == NULL);
380     assert(!r->req.io_canceled);
381 
382     if (r->need_fua_emulation) {
383         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
384                          BLOCK_ACCT_FLUSH);
385         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
386         return;
387     }
388 
389     scsi_req_complete(&r->req, GOOD);
390     scsi_req_unref(&r->req);
391 }
392 
393 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
394 {
395     assert(r->req.aiocb == NULL);
396     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
397         goto done;
398     }
399 
400     r->sector += r->sector_count;
401     r->sector_count = 0;
402     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
403         scsi_write_do_fua(r);
404         return;
405     } else {
406         scsi_req_complete(&r->req, GOOD);
407     }
408 
409 done:
410     scsi_req_unref(&r->req);
411 }
412 
413 /* May not be called in all error cases, don't rely on cleanup here */
414 static void scsi_dma_complete(void *opaque, int ret)
415 {
416     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
417     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
418 
419     assert(r->req.aiocb != NULL);
420     r->req.aiocb = NULL;
421 
422     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
423     if (ret < 0) {
424         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
425     } else if (ret == 0) {
426         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
427     }
428     scsi_dma_complete_noio(r, ret);
429 }
430 
431 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
432 {
433     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
434     uint32_t n;
435 
436     /* The request must only run in the BlockBackend's AioContext */
437     assert(blk_get_aio_context(s->qdev.conf.blk) ==
438            qemu_get_current_aio_context());
439 
440     assert(r->req.aiocb == NULL);
441     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
442         goto done;
443     }
444 
445     n = r->qiov.size / BDRV_SECTOR_SIZE;
446     r->sector += n;
447     r->sector_count -= n;
448     scsi_req_data(&r->req, r->qiov.size);
449 
450 done:
451     scsi_req_unref(&r->req);
452 }
453 
454 /* May not be called in all error cases, don't rely on cleanup here */
455 static void scsi_read_complete(void *opaque, int ret)
456 {
457     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
458     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
459 
460     assert(r->req.aiocb != NULL);
461     r->req.aiocb = NULL;
462 
463     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
464     if (ret < 0) {
465         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
466     } else if (ret == 0) {
467         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
468         trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
469     }
470     scsi_read_complete_noio(r, ret);
471 }
472 
473 /* Actually issue a read to the block device.  */
474 static void scsi_do_read(SCSIDiskReq *r, int ret)
475 {
476     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
477     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
478 
479     assert (r->req.aiocb == NULL);
480     if (scsi_disk_req_check_error(r, ret, false)) {
481         goto done;
482     }
483 
484     /* The request is used as the AIO opaque value, so add a ref.  */
485     scsi_req_ref(&r->req);
486 
487     if (r->req.sg) {
488         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
489         r->req.residual -= r->req.sg->size;
490         r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
491                                   BDRV_SECTOR_SIZE,
492                                   sdc->dma_readv, r, scsi_dma_complete, r,
493                                   DMA_DIRECTION_FROM_DEVICE);
494     } else {
495         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
496         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
497                          r->qiov.size, BLOCK_ACCT_READ);
498         r->req.aiocb = sdc->dma_readv(r->sector << BDRV_SECTOR_BITS, &r->qiov,
499                                       scsi_read_complete, r, r);
500     }
501 
502 done:
503     scsi_req_unref(&r->req);
504 }
505 
506 static void scsi_do_read_cb(void *opaque, int ret)
507 {
508     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
509     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
510 
511     assert (r->req.aiocb != NULL);
512     r->req.aiocb = NULL;
513 
514     if (ret < 0) {
515         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
516     } else {
517         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
518     }
519     scsi_do_read(opaque, ret);
520 }
521 
522 /* Read more data from scsi device into buffer.  */
523 static void scsi_read_data(SCSIRequest *req)
524 {
525     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
526     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
527     bool first;
528 
529     trace_scsi_disk_read_data_count(r->sector_count);
530     if (r->sector_count == 0) {
531         /* This also clears the sense buffer for REQUEST SENSE.  */
532         scsi_req_complete(&r->req, GOOD);
533         return;
534     }
535 
536     /* No data transfer may already be in progress */
537     assert(r->req.aiocb == NULL);
538 
539     /* The request is used as the AIO opaque value, so add a ref.  */
540     scsi_req_ref(&r->req);
541     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
542         trace_scsi_disk_read_data_invalid();
543         scsi_read_complete_noio(r, -EINVAL);
544         return;
545     }
546 
547     if (!blk_is_available(req->dev->conf.blk)) {
548         scsi_read_complete_noio(r, -ENOMEDIUM);
549         return;
550     }
551 
552     first = !r->started;
553     r->started = true;
554     if (first && r->need_fua_emulation) {
555         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
556                          BLOCK_ACCT_FLUSH);
557         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
558     } else {
559         scsi_do_read(r, 0);
560     }
561 }
562 
563 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
564 {
565     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
566     uint32_t n;
567 
568     /* The request must only run in the BlockBackend's AioContext */
569     assert(blk_get_aio_context(s->qdev.conf.blk) ==
570            qemu_get_current_aio_context());
571 
572     assert (r->req.aiocb == NULL);
573     if (scsi_disk_req_check_error(r, ret, ret > 0)) {
574         goto done;
575     }
576 
577     n = r->qiov.size / BDRV_SECTOR_SIZE;
578     r->sector += n;
579     r->sector_count -= n;
580     if (r->sector_count == 0) {
581         scsi_write_do_fua(r);
582         return;
583     } else {
584         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
585         trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
586         scsi_req_data(&r->req, r->qiov.size);
587     }
588 
589 done:
590     scsi_req_unref(&r->req);
591 }
592 
593 /* May not be called in all error cases, don't rely on cleanup here */
594 static void scsi_write_complete(void * opaque, int ret)
595 {
596     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
597     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
598 
599     assert (r->req.aiocb != NULL);
600     r->req.aiocb = NULL;
601 
602     /* ret > 0 is accounted for in scsi_disk_req_check_error() */
603     if (ret < 0) {
604         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
605     } else if (ret == 0) {
606         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
607     }
608     scsi_write_complete_noio(r, ret);
609 }
610 
611 static void scsi_write_data(SCSIRequest *req)
612 {
613     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
614     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
615     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
616 
617     /* No data transfer may already be in progress */
618     assert(r->req.aiocb == NULL);
619 
620     /* The request is used as the AIO opaque value, so add a ref.  */
621     scsi_req_ref(&r->req);
622     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
623         trace_scsi_disk_write_data_invalid();
624         scsi_write_complete_noio(r, -EINVAL);
625         return;
626     }
627 
628     if (!r->req.sg && !r->qiov.size) {
629         /* Called for the first time.  Ask the driver to send us more data.  */
630         r->started = true;
631         scsi_write_complete_noio(r, 0);
632         return;
633     }
634     if (!blk_is_available(req->dev->conf.blk)) {
635         scsi_write_complete_noio(r, -ENOMEDIUM);
636         return;
637     }
638 
639     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
640         r->req.cmd.buf[0] == VERIFY_16) {
641         if (r->req.sg) {
642             scsi_dma_complete_noio(r, 0);
643         } else {
644             scsi_write_complete_noio(r, 0);
645         }
646         return;
647     }
648 
649     if (r->req.sg) {
650         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
651         r->req.residual -= r->req.sg->size;
652         r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
653                                   BDRV_SECTOR_SIZE,
654                                   sdc->dma_writev, r, scsi_dma_complete, r,
655                                   DMA_DIRECTION_TO_DEVICE);
656     } else {
657         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
658                          r->qiov.size, BLOCK_ACCT_WRITE);
659         r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
660                                        scsi_write_complete, r, r);
661     }
662 }
663 
664 /* Return a pointer to the data buffer.  */
665 static uint8_t *scsi_get_buf(SCSIRequest *req)
666 {
667     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
668 
669     return (uint8_t *)r->iov.iov_base;
670 }
671 
672 static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
673 {
674     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
675     uint8_t page_code = req->cmd.buf[2];
676     int start, buflen = 0;
677 
678     outbuf[buflen++] = s->qdev.type & 0x1f;
679     outbuf[buflen++] = page_code;
680     outbuf[buflen++] = 0x00;
681     outbuf[buflen++] = 0x00;
682     start = buflen;
683 
684     switch (page_code) {
685     case 0x00: /* Supported page codes, mandatory */
686     {
687         trace_scsi_disk_emulate_vpd_page_00(req->cmd.xfer);
688         outbuf[buflen++] = 0x00; /* list of supported pages (this page) */
689         if (s->serial) {
690             outbuf[buflen++] = 0x80; /* unit serial number */
691         }
692         outbuf[buflen++] = 0x83; /* device identification */
693         if (s->qdev.type == TYPE_DISK) {
694             outbuf[buflen++] = 0xb0; /* block limits */
695             outbuf[buflen++] = 0xb1; /* block device characteristics */
696             outbuf[buflen++] = 0xb2; /* thin provisioning */
697         }
698         break;
699     }
700     case 0x80: /* Device serial number, optional */
701     {
702         int l;
703 
704         if (!s->serial) {
705             trace_scsi_disk_emulate_vpd_page_80_not_supported();
706             return -1;
707         }
708 
709         l = strlen(s->serial);
710         if (l > MAX_SERIAL_LEN) {
711             l = MAX_SERIAL_LEN;
712         }
713 
714         trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer);
715         memcpy(outbuf + buflen, s->serial, l);
716         buflen += l;
717         break;
718     }
719 
720     case 0x83: /* Device identification page, mandatory */
721     {
722         int id_len = s->device_id ? MIN(strlen(s->device_id), 255 - 8) : 0;
723 
724         trace_scsi_disk_emulate_vpd_page_83(req->cmd.xfer);
725 
726         if (id_len) {
727             outbuf[buflen++] = 0x2; /* ASCII */
728             outbuf[buflen++] = 0;   /* not officially assigned */
729             outbuf[buflen++] = 0;   /* reserved */
730             outbuf[buflen++] = id_len; /* length of data following */
731             memcpy(outbuf + buflen, s->device_id, id_len);
732             buflen += id_len;
733         }
734 
735         if (s->qdev.wwn) {
736             outbuf[buflen++] = 0x1; /* Binary */
737             outbuf[buflen++] = 0x3; /* NAA */
738             outbuf[buflen++] = 0;   /* reserved */
739             outbuf[buflen++] = 8;
740             stq_be_p(&outbuf[buflen], s->qdev.wwn);
741             buflen += 8;
742         }
743 
744         if (s->qdev.port_wwn) {
745             outbuf[buflen++] = 0x61; /* SAS / Binary */
746             outbuf[buflen++] = 0x93; /* PIV / Target port / NAA */
747             outbuf[buflen++] = 0;    /* reserved */
748             outbuf[buflen++] = 8;
749             stq_be_p(&outbuf[buflen], s->qdev.port_wwn);
750             buflen += 8;
751         }
752 
753         if (s->port_index) {
754             outbuf[buflen++] = 0x61; /* SAS / Binary */
755 
756             /* PIV/Target port/relative target port */
757             outbuf[buflen++] = 0x94;
758 
759             outbuf[buflen++] = 0;    /* reserved */
760             outbuf[buflen++] = 4;
761             stw_be_p(&outbuf[buflen + 2], s->port_index);
762             buflen += 4;
763         }
764         break;
765     }
766     case 0xb0: /* block limits */
767     {
768         SCSIBlockLimits bl = {};
769 
770         if (s->qdev.type == TYPE_ROM) {
771             trace_scsi_disk_emulate_vpd_page_b0_not_supported();
772             return -1;
773         }
774         bl.wsnz = 1;
775         bl.unmap_sectors =
776             s->qdev.conf.discard_granularity / s->qdev.blocksize;
777         bl.min_io_size =
778             s->qdev.conf.min_io_size / s->qdev.blocksize;
779         bl.opt_io_size =
780             s->qdev.conf.opt_io_size / s->qdev.blocksize;
781         bl.max_unmap_sectors =
782             s->max_unmap_size / s->qdev.blocksize;
783         bl.max_io_sectors =
784             s->max_io_size / s->qdev.blocksize;
785         /* 255 descriptors fit in 4 KiB with an 8-byte header */
786         bl.max_unmap_descr = 255;
787 
788         if (s->qdev.type == TYPE_DISK) {
789             int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
790             int max_io_sectors_blk =
791                 max_transfer_blk / s->qdev.blocksize;
792 
793             bl.max_io_sectors =
794                 MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
795         }
796         buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
797         break;
798     }
799     case 0xb1: /* block device characteristics */
800     {
801         buflen = 0x40;
802         outbuf[4] = (s->rotation_rate >> 8) & 0xff;
803         outbuf[5] = s->rotation_rate & 0xff;
804         outbuf[6] = 0; /* PRODUCT TYPE */
805         outbuf[7] = 0; /* WABEREQ | WACEREQ | NOMINAL FORM FACTOR */
806         outbuf[8] = 0; /* VBULS */
807         break;
808     }
809     case 0xb2: /* thin provisioning */
810     {
811         buflen = 8;
812         outbuf[4] = 0;
813         outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */
814         outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
815         outbuf[7] = 0;
816         break;
817     }
818     default:
819         return -1;
820     }
821     /* done with EVPD */
822     assert(buflen - start <= 255);
823     outbuf[start - 1] = buflen - start;
824     return buflen;
825 }
826 
827 static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
828 {
829     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
830     int buflen = 0;
831 
832     if (req->cmd.buf[1] & 0x1) {
833         /* Vital product data */
834         return scsi_disk_emulate_vpd_page(req, outbuf);
835     }
836 
837     /* Standard INQUIRY data */
838     if (req->cmd.buf[2] != 0) {
839         return -1;
840     }
841 
842     /* PAGE CODE == 0 */
843     buflen = req->cmd.xfer;
844     if (buflen > SCSI_MAX_INQUIRY_LEN) {
845         buflen = SCSI_MAX_INQUIRY_LEN;
846     }
847 
848     outbuf[0] = s->qdev.type & 0x1f;
849     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
850 
851     strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
852     strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
853 
854     memset(&outbuf[32], 0, 4);
855     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
856     /*
857      * We claim conformance to SPC-3, which is required for guests
858      * to ask for modern features like READ CAPACITY(16) or the
859      * block characteristics VPD page by default.  Not all of SPC-3
860      * is actually implemented, but we're good enough.
861      */
862     outbuf[2] = s->qdev.default_scsi_version;
863     outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
864 
865     if (buflen > 36) {
866         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
867     } else {
868         /* If the allocation length of CDB is too small,
869                the additional length is not adjusted */
870         outbuf[4] = 36 - 5;
871     }
872 
873     /* Sync data transfer and TCQ.  */
874     outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
875     return buflen;
876 }
877 
878 static inline bool media_is_dvd(SCSIDiskState *s)
879 {
880     uint64_t nb_sectors;
881     if (s->qdev.type != TYPE_ROM) {
882         return false;
883     }
884     if (!blk_is_available(s->qdev.conf.blk)) {
885         return false;
886     }
887     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
888     return nb_sectors > CD_MAX_SECTORS;
889 }
890 
891 static inline bool media_is_cd(SCSIDiskState *s)
892 {
893     uint64_t nb_sectors;
894     if (s->qdev.type != TYPE_ROM) {
895         return false;
896     }
897     if (!blk_is_available(s->qdev.conf.blk)) {
898         return false;
899     }
900     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
901     return nb_sectors <= CD_MAX_SECTORS;
902 }
903 
904 static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
905                                       uint8_t *outbuf)
906 {
907     uint8_t type = r->req.cmd.buf[1] & 7;
908 
909     if (s->qdev.type != TYPE_ROM) {
910         return -1;
911     }
912 
913     /* Types 1/2 are only defined for Blu-Ray.  */
914     if (type != 0) {
915         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
916         return -1;
917     }
918 
919     memset(outbuf, 0, 34);
920     outbuf[1] = 32;
921     outbuf[2] = 0xe; /* last session complete, disc finalized */
922     outbuf[3] = 1;   /* first track on disc */
923     outbuf[4] = 1;   /* # of sessions */
924     outbuf[5] = 1;   /* first track of last session */
925     outbuf[6] = 1;   /* last track of last session */
926     outbuf[7] = 0x20; /* unrestricted use */
927     outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
928     /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
929     /* 12-23: not meaningful for CD-ROM or DVD-ROM */
930     /* 24-31: disc bar code */
931     /* 32: disc application code */
932     /* 33: number of OPC tables */
933 
934     return 34;
935 }
936 
937 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
938                                    uint8_t *outbuf)
939 {
940     static const int rds_caps_size[5] = {
941         [0] = 2048 + 4,
942         [1] = 4 + 4,
943         [3] = 188 + 4,
944         [4] = 2048 + 4,
945     };
946 
947     uint8_t media = r->req.cmd.buf[1];
948     uint8_t layer = r->req.cmd.buf[6];
949     uint8_t format = r->req.cmd.buf[7];
950     int size = -1;
951 
952     if (s->qdev.type != TYPE_ROM) {
953         return -1;
954     }
955     if (media != 0) {
956         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
957         return -1;
958     }
959 
960     if (format != 0xff) {
961         if (!blk_is_available(s->qdev.conf.blk)) {
962             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
963             return -1;
964         }
965         if (media_is_cd(s)) {
966             scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
967             return -1;
968         }
969         if (format >= ARRAY_SIZE(rds_caps_size)) {
970             return -1;
971         }
972         size = rds_caps_size[format];
973         memset(outbuf, 0, size);
974     }
975 
976     switch (format) {
977     case 0x00: {
978         /* Physical format information */
979         uint64_t nb_sectors;
980         if (layer != 0) {
981             goto fail;
982         }
983         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
984 
985         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
986         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
987         outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
988         outbuf[7] = 0;   /* default densities */
989 
990         stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
991         stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
992         break;
993     }
994 
995     case 0x01: /* DVD copyright information, all zeros */
996         break;
997 
998     case 0x03: /* BCA information - invalid field for no BCA info */
999         return -1;
1000 
1001     case 0x04: /* DVD disc manufacturing information, all zeros */
1002         break;
1003 
1004     case 0xff: { /* List capabilities */
1005         int i;
1006         size = 4;
1007         for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
1008             if (!rds_caps_size[i]) {
1009                 continue;
1010             }
1011             outbuf[size] = i;
1012             outbuf[size + 1] = 0x40; /* Not writable, readable */
1013             stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
1014             size += 4;
1015         }
1016         break;
1017      }
1018 
1019     default:
1020         return -1;
1021     }
1022 
1023     /* Size of buffer, not including 2 byte size field */
1024     stw_be_p(outbuf, size - 2);
1025     return size;
1026 
1027 fail:
1028     return -1;
1029 }
1030 
1031 static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
1032 {
1033     uint8_t event_code, media_status;
1034 
1035     media_status = 0;
1036     if (s->tray_open) {
1037         media_status = MS_TRAY_OPEN;
1038     } else if (blk_is_inserted(s->qdev.conf.blk)) {
1039         media_status = MS_MEDIA_PRESENT;
1040     }
1041 
1042     /* Event notification descriptor */
1043     event_code = MEC_NO_CHANGE;
1044     if (media_status != MS_TRAY_OPEN) {
1045         if (s->media_event) {
1046             event_code = MEC_NEW_MEDIA;
1047             s->media_event = false;
1048         } else if (s->eject_request) {
1049             event_code = MEC_EJECT_REQUESTED;
1050             s->eject_request = false;
1051         }
1052     }
1053 
1054     outbuf[0] = event_code;
1055     outbuf[1] = media_status;
1056 
1057     /* These fields are reserved, just clear them. */
1058     outbuf[2] = 0;
1059     outbuf[3] = 0;
1060     return 4;
1061 }
1062 
1063 static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
1064                                               uint8_t *outbuf)
1065 {
1066     int size;
1067     uint8_t *buf = r->req.cmd.buf;
1068     uint8_t notification_class_request = buf[4];
1069     if (s->qdev.type != TYPE_ROM) {
1070         return -1;
1071     }
1072     if ((buf[1] & 1) == 0) {
1073         /* asynchronous */
1074         return -1;
1075     }
1076 
1077     size = 4;
1078     outbuf[0] = outbuf[1] = 0;
1079     outbuf[3] = 1 << GESN_MEDIA; /* supported events */
1080     if (notification_class_request & (1 << GESN_MEDIA)) {
1081         outbuf[2] = GESN_MEDIA;
1082         size += scsi_event_status_media(s, &outbuf[size]);
1083     } else {
1084         outbuf[2] = 0x80;
1085     }
1086     stw_be_p(outbuf, size - 4);
1087     return size;
1088 }
1089 
1090 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
1091 {
1092     int current;
1093 
1094     if (s->qdev.type != TYPE_ROM) {
1095         return -1;
1096     }
1097 
1098     if (media_is_dvd(s)) {
1099         current = MMC_PROFILE_DVD_ROM;
1100     } else if (media_is_cd(s)) {
1101         current = MMC_PROFILE_CD_ROM;
1102     } else {
1103         current = MMC_PROFILE_NONE;
1104     }
1105 
1106     memset(outbuf, 0, 40);
1107     stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
1108     stw_be_p(&outbuf[6], current);
1109     /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
1110     outbuf[10] = 0x03; /* persistent, current */
1111     outbuf[11] = 8; /* two profiles */
1112     stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
1113     outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
1114     stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
1115     outbuf[18] = (current == MMC_PROFILE_CD_ROM);
1116     /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
1117     stw_be_p(&outbuf[20], 1);
1118     outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
1119     outbuf[23] = 8;
1120     stl_be_p(&outbuf[24], 1); /* SCSI */
1121     outbuf[28] = 1; /* DBE = 1, mandatory */
1122     /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
1123     stw_be_p(&outbuf[32], 3);
1124     outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
1125     outbuf[35] = 4;
1126     outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
1127     /* TODO: Random readable, CD read, DVD read, drive serial number,
1128        power management */
1129     return 40;
1130 }
1131 
1132 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
1133 {
1134     if (s->qdev.type != TYPE_ROM) {
1135         return -1;
1136     }
1137     memset(outbuf, 0, 8);
1138     outbuf[5] = 1; /* CD-ROM */
1139     return 8;
1140 }
1141 
1142 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
1143                            int page_control)
1144 {
1145     static const int mode_sense_valid[0x3f] = {
1146         [MODE_PAGE_VENDOR_SPECIFIC]        = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1147         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
1148         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
1149         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1150         [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1151         [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
1152         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
1153         [MODE_PAGE_APPLE_VENDOR]           = (1 << TYPE_ROM),
1154     };
1155 
1156     uint8_t *p = *p_outbuf + 2;
1157     int length;
1158 
1159     assert(page < ARRAY_SIZE(mode_sense_valid));
1160     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
1161         return -1;
1162     }
1163 
1164     /*
1165      * If Changeable Values are requested, a mask denoting those mode parameters
1166      * that are changeable shall be returned. As we currently don't support
1167      * parameter changes via MODE_SELECT all bits are returned set to zero.
1168      * The buffer was already menset to zero by the caller of this function.
1169      *
1170      * The offsets here are off by two compared to the descriptions in the
1171      * SCSI specs, because those include a 2-byte header.  This is unfortunate,
1172      * but it is done so that offsets are consistent within our implementation
1173      * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
1174      * 2-byte and 4-byte headers.
1175      */
1176     switch (page) {
1177     case MODE_PAGE_HD_GEOMETRY:
1178         length = 0x16;
1179         if (page_control == 1) { /* Changeable Values */
1180             break;
1181         }
1182         /* if a geometry hint is available, use it */
1183         p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
1184         p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
1185         p[2] = s->qdev.conf.cyls & 0xff;
1186         p[3] = s->qdev.conf.heads & 0xff;
1187         /* Write precomp start cylinder, disabled */
1188         p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
1189         p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
1190         p[6] = s->qdev.conf.cyls & 0xff;
1191         /* Reduced current start cylinder, disabled */
1192         p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
1193         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1194         p[9] = s->qdev.conf.cyls & 0xff;
1195         /* Device step rate [ns], 200ns */
1196         p[10] = 0;
1197         p[11] = 200;
1198         /* Landing zone cylinder */
1199         p[12] = 0xff;
1200         p[13] =  0xff;
1201         p[14] = 0xff;
1202         /* Medium rotation rate [rpm], 5400 rpm */
1203         p[18] = (5400 >> 8) & 0xff;
1204         p[19] = 5400 & 0xff;
1205         break;
1206 
1207     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
1208         length = 0x1e;
1209         if (page_control == 1) { /* Changeable Values */
1210             break;
1211         }
1212         /* Transfer rate [kbit/s], 5Mbit/s */
1213         p[0] = 5000 >> 8;
1214         p[1] = 5000 & 0xff;
1215         /* if a geometry hint is available, use it */
1216         p[2] = s->qdev.conf.heads & 0xff;
1217         p[3] = s->qdev.conf.secs & 0xff;
1218         p[4] = s->qdev.blocksize >> 8;
1219         p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
1220         p[7] = s->qdev.conf.cyls & 0xff;
1221         /* Write precomp start cylinder, disabled */
1222         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1223         p[9] = s->qdev.conf.cyls & 0xff;
1224         /* Reduced current start cylinder, disabled */
1225         p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
1226         p[11] = s->qdev.conf.cyls & 0xff;
1227         /* Device step rate [100us], 100us */
1228         p[12] = 0;
1229         p[13] = 1;
1230         /* Device step pulse width [us], 1us */
1231         p[14] = 1;
1232         /* Device head settle delay [100us], 100us */
1233         p[15] = 0;
1234         p[16] = 1;
1235         /* Motor on delay [0.1s], 0.1s */
1236         p[17] = 1;
1237         /* Motor off delay [0.1s], 0.1s */
1238         p[18] = 1;
1239         /* Medium rotation rate [rpm], 5400 rpm */
1240         p[26] = (5400 >> 8) & 0xff;
1241         p[27] = 5400 & 0xff;
1242         break;
1243 
1244     case MODE_PAGE_CACHING:
1245         length = 0x12;
1246         if (page_control == 1 || /* Changeable Values */
1247             blk_enable_write_cache(s->qdev.conf.blk)) {
1248             p[0] = 4; /* WCE */
1249         }
1250         break;
1251 
1252     case MODE_PAGE_R_W_ERROR:
1253         length = 10;
1254         if (page_control == 1) { /* Changeable Values */
1255             if (s->qdev.type == TYPE_ROM) {
1256                 /* Automatic Write Reallocation Enabled */
1257                 p[0] = 0x80;
1258             }
1259             break;
1260         }
1261         p[0] = 0x80; /* Automatic Write Reallocation Enabled */
1262         if (s->qdev.type == TYPE_ROM) {
1263             p[1] = 0x20; /* Read Retry Count */
1264         }
1265         break;
1266 
1267     case MODE_PAGE_AUDIO_CTL:
1268         length = 14;
1269         break;
1270 
1271     case MODE_PAGE_CAPABILITIES:
1272         length = 0x14;
1273         if (page_control == 1) { /* Changeable Values */
1274             break;
1275         }
1276 
1277         p[0] = 0x3b; /* CD-R & CD-RW read */
1278         p[1] = 0; /* Writing not supported */
1279         p[2] = 0x7f; /* Audio, composite, digital out,
1280                         mode 2 form 1&2, multi session */
1281         p[3] = 0xff; /* CD DA, DA accurate, RW supported,
1282                         RW corrected, C2 errors, ISRC,
1283                         UPC, Bar code */
1284         p[4] = 0x2d | (s->tray_locked ? 2 : 0);
1285         /* Locking supported, jumper present, eject, tray */
1286         p[5] = 0; /* no volume & mute control, no
1287                      changer */
1288         p[6] = (50 * 176) >> 8; /* 50x read speed */
1289         p[7] = (50 * 176) & 0xff;
1290         p[8] = 2 >> 8; /* Two volume levels */
1291         p[9] = 2 & 0xff;
1292         p[10] = 2048 >> 8; /* 2M buffer */
1293         p[11] = 2048 & 0xff;
1294         p[12] = (16 * 176) >> 8; /* 16x read speed current */
1295         p[13] = (16 * 176) & 0xff;
1296         p[16] = (16 * 176) >> 8; /* 16x write speed */
1297         p[17] = (16 * 176) & 0xff;
1298         p[18] = (16 * 176) >> 8; /* 16x write speed current */
1299         p[19] = (16 * 176) & 0xff;
1300         break;
1301 
1302      case MODE_PAGE_APPLE_VENDOR:
1303         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR)) {
1304             length = 0x1e;
1305             if (page_control == 1) { /* Changeable Values */
1306                 break;
1307             }
1308 
1309             memset(p, 0, length);
1310             strcpy((char *)p + 8, "APPLE COMPUTER, INC   ");
1311             break;
1312         } else {
1313             return -1;
1314         }
1315 
1316     case MODE_PAGE_VENDOR_SPECIFIC:
1317         if (s->qdev.type == TYPE_DISK && (s->quirks &
1318             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1319             length = 0x2;
1320             if (page_control == 1) { /* Changeable Values */
1321                 p[0] = 0xff;
1322                 p[1] = 0xff;
1323                 break;
1324             }
1325             p[0] = 0;
1326             p[1] = 0;
1327             break;
1328         } else {
1329             return -1;
1330         }
1331 
1332     default:
1333         return -1;
1334     }
1335 
1336     assert(length < 256);
1337     (*p_outbuf)[0] = page;
1338     (*p_outbuf)[1] = length;
1339     *p_outbuf += length + 2;
1340     return length + 2;
1341 }
1342 
1343 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
1344 {
1345     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1346     uint64_t nb_sectors;
1347     bool dbd;
1348     int page, buflen, ret, page_control;
1349     uint8_t *p;
1350     uint8_t dev_specific_param;
1351 
1352     dbd = (r->req.cmd.buf[1] & 0x8) != 0;
1353     page = r->req.cmd.buf[2] & 0x3f;
1354     page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
1355 
1356     trace_scsi_disk_emulate_mode_sense((r->req.cmd.buf[0] == MODE_SENSE) ? 6 :
1357                                        10, page, r->req.cmd.xfer, page_control);
1358     memset(outbuf, 0, r->req.cmd.xfer);
1359     p = outbuf;
1360 
1361     if (s->qdev.type == TYPE_DISK) {
1362         dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0;
1363         if (!blk_is_writable(s->qdev.conf.blk)) {
1364             dev_specific_param |= 0x80; /* Readonly.  */
1365         }
1366     } else {
1367         if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD)) {
1368             /* Use DBD from the request... */
1369             dev_specific_param = 0x00;
1370 
1371             /*
1372              * ... unless we receive a request for MODE_PAGE_APPLE_VENDOR
1373              * which should never return a block descriptor even though DBD is
1374              * not set, otherwise CDROM detection fails in MacOS
1375              */
1376             if (s->quirks & (1 << SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR) &&
1377                 page == MODE_PAGE_APPLE_VENDOR) {
1378                 dbd = true;
1379             }
1380         } else {
1381             /*
1382              * MMC prescribes that CD/DVD drives have no block descriptors,
1383              * and defines no device-specific parameter.
1384              */
1385             dev_specific_param = 0x00;
1386             dbd = true;
1387         }
1388     }
1389 
1390     if (r->req.cmd.buf[0] == MODE_SENSE) {
1391         p[1] = 0; /* Default media type.  */
1392         p[2] = dev_specific_param;
1393         p[3] = 0; /* Block descriptor length.  */
1394         p += 4;
1395     } else { /* MODE_SENSE_10 */
1396         p[2] = 0; /* Default media type.  */
1397         p[3] = dev_specific_param;
1398         p[6] = p[7] = 0; /* Block descriptor length.  */
1399         p += 8;
1400     }
1401 
1402     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1403     if (!dbd && nb_sectors) {
1404         if (r->req.cmd.buf[0] == MODE_SENSE) {
1405             outbuf[3] = 8; /* Block descriptor length  */
1406         } else { /* MODE_SENSE_10 */
1407             outbuf[7] = 8; /* Block descriptor length  */
1408         }
1409         nb_sectors /= (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1410         if (nb_sectors > 0xffffff) {
1411             nb_sectors = 0;
1412         }
1413         p[0] = 0; /* media density code */
1414         p[1] = (nb_sectors >> 16) & 0xff;
1415         p[2] = (nb_sectors >> 8) & 0xff;
1416         p[3] = nb_sectors & 0xff;
1417         p[4] = 0; /* reserved */
1418         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
1419         p[6] = s->qdev.blocksize >> 8;
1420         p[7] = 0;
1421         p += 8;
1422     }
1423 
1424     if (page_control == 3) {
1425         /* Saved Values */
1426         scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
1427         return -1;
1428     }
1429 
1430     if (page == 0x3f) {
1431         for (page = 0; page <= 0x3e; page++) {
1432             mode_sense_page(s, page, &p, page_control);
1433         }
1434     } else {
1435         ret = mode_sense_page(s, page, &p, page_control);
1436         if (ret == -1) {
1437             return -1;
1438         }
1439     }
1440 
1441     buflen = p - outbuf;
1442     /*
1443      * The mode data length field specifies the length in bytes of the
1444      * following data that is available to be transferred. The mode data
1445      * length does not include itself.
1446      */
1447     if (r->req.cmd.buf[0] == MODE_SENSE) {
1448         outbuf[0] = buflen - 1;
1449     } else { /* MODE_SENSE_10 */
1450         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
1451         outbuf[1] = (buflen - 2) & 0xff;
1452     }
1453     return buflen;
1454 }
1455 
1456 static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
1457 {
1458     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1459     int start_track, format, msf, toclen;
1460     uint64_t nb_sectors;
1461 
1462     msf = req->cmd.buf[1] & 2;
1463     format = req->cmd.buf[2] & 0xf;
1464     start_track = req->cmd.buf[6];
1465     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1466     trace_scsi_disk_emulate_read_toc(start_track, format, msf >> 1);
1467     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
1468     switch (format) {
1469     case 0:
1470         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
1471         break;
1472     case 1:
1473         /* multi session : only a single session defined */
1474         toclen = 12;
1475         memset(outbuf, 0, 12);
1476         outbuf[1] = 0x0a;
1477         outbuf[2] = 0x01;
1478         outbuf[3] = 0x01;
1479         break;
1480     case 2:
1481         toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
1482         break;
1483     default:
1484         return -1;
1485     }
1486     return toclen;
1487 }
1488 
1489 static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
1490 {
1491     SCSIRequest *req = &r->req;
1492     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1493     bool start = req->cmd.buf[4] & 1;
1494     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
1495     int pwrcnd = req->cmd.buf[4] & 0xf0;
1496 
1497     if (pwrcnd) {
1498         /* eject/load only happens for power condition == 0 */
1499         return 0;
1500     }
1501 
1502     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
1503         if (!start && !s->tray_open && s->tray_locked) {
1504             scsi_check_condition(r,
1505                                  blk_is_inserted(s->qdev.conf.blk)
1506                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
1507                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
1508             return -1;
1509         }
1510 
1511         if (s->tray_open != !start) {
1512             blk_eject(s->qdev.conf.blk, !start);
1513             s->tray_open = !start;
1514         }
1515     }
1516     return 0;
1517 }
1518 
1519 static void scsi_disk_emulate_read_data(SCSIRequest *req)
1520 {
1521     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1522     int buflen = r->iov.iov_len;
1523 
1524     if (buflen) {
1525         trace_scsi_disk_emulate_read_data(buflen);
1526         r->iov.iov_len = 0;
1527         r->started = true;
1528         scsi_req_data(&r->req, buflen);
1529         return;
1530     }
1531 
1532     /* This also clears the sense buffer for REQUEST SENSE.  */
1533     scsi_req_complete(&r->req, GOOD);
1534 }
1535 
1536 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
1537                                        uint8_t *inbuf, int inlen)
1538 {
1539     uint8_t mode_current[SCSI_MAX_MODE_LEN];
1540     uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
1541     uint8_t *p;
1542     int len, expected_len, changeable_len, i;
1543 
1544     /* The input buffer does not include the page header, so it is
1545      * off by 2 bytes.
1546      */
1547     expected_len = inlen + 2;
1548     if (expected_len > SCSI_MAX_MODE_LEN) {
1549         return -1;
1550     }
1551 
1552     /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
1553     if (page == MODE_PAGE_ALLS) {
1554         return -1;
1555     }
1556 
1557     p = mode_current;
1558     memset(mode_current, 0, inlen + 2);
1559     len = mode_sense_page(s, page, &p, 0);
1560     if (len < 0 || len != expected_len) {
1561         return -1;
1562     }
1563 
1564     p = mode_changeable;
1565     memset(mode_changeable, 0, inlen + 2);
1566     changeable_len = mode_sense_page(s, page, &p, 1);
1567     assert(changeable_len == len);
1568 
1569     /* Check that unchangeable bits are the same as what MODE SENSE
1570      * would return.
1571      */
1572     for (i = 2; i < len; i++) {
1573         if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
1574             return -1;
1575         }
1576     }
1577     return 0;
1578 }
1579 
1580 static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
1581 {
1582     switch (page) {
1583     case MODE_PAGE_CACHING:
1584         blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0);
1585         break;
1586 
1587     default:
1588         break;
1589     }
1590 }
1591 
1592 static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
1593 {
1594     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1595 
1596     while (len > 0) {
1597         int page, subpage, page_len;
1598 
1599         /* Parse both possible formats for the mode page headers.  */
1600         page = p[0] & 0x3f;
1601         if (p[0] & 0x40) {
1602             if (len < 4) {
1603                 goto invalid_param_len;
1604             }
1605             subpage = p[1];
1606             page_len = lduw_be_p(&p[2]);
1607             p += 4;
1608             len -= 4;
1609         } else {
1610             if (len < 2) {
1611                 goto invalid_param_len;
1612             }
1613             subpage = 0;
1614             page_len = p[1];
1615             p += 2;
1616             len -= 2;
1617         }
1618 
1619         if (subpage) {
1620             goto invalid_param;
1621         }
1622         if (page_len > len) {
1623             if (!(s->quirks & SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED)) {
1624                 goto invalid_param_len;
1625             }
1626             trace_scsi_disk_mode_select_page_truncated(page, page_len, len);
1627         }
1628 
1629         if (!change) {
1630             if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
1631                 goto invalid_param;
1632             }
1633         } else {
1634             scsi_disk_apply_mode_select(s, page, p);
1635         }
1636 
1637         p += page_len;
1638         len -= page_len;
1639     }
1640     return 0;
1641 
1642 invalid_param:
1643     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1644     return -1;
1645 
1646 invalid_param_len:
1647     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1648     return -1;
1649 }
1650 
1651 static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
1652 {
1653     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1654     uint8_t *p = inbuf;
1655     int cmd = r->req.cmd.buf[0];
1656     int len = r->req.cmd.xfer;
1657     int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
1658     int bd_len, bs;
1659     int pass;
1660 
1661     if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
1662         if (!(s->quirks &
1663             (1 << SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE))) {
1664             /* We only support PF=1, SP=0.  */
1665             goto invalid_field;
1666         }
1667     }
1668 
1669     if (len < hdr_len) {
1670         goto invalid_param_len;
1671     }
1672 
1673     bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
1674     len -= hdr_len;
1675     p += hdr_len;
1676     if (len < bd_len) {
1677         goto invalid_param_len;
1678     }
1679     if (bd_len != 0 && bd_len != 8) {
1680         goto invalid_param;
1681     }
1682 
1683     /* Allow changing the block size */
1684     if (bd_len) {
1685         bs = p[5] << 16 | p[6] << 8 | p[7];
1686 
1687         /*
1688          * Since the existing code only checks/updates bits 8-15 of the block
1689          * size, restrict ourselves to the same requirement for now to ensure
1690          * that a block size set by a block descriptor and then read back by
1691          * a subsequent SCSI command will be the same. Also disallow a block
1692          * size of 256 since we cannot handle anything below BDRV_SECTOR_SIZE.
1693          */
1694         if (bs && !(bs & ~0xfe00) && bs != s->qdev.blocksize) {
1695             s->qdev.blocksize = bs;
1696             trace_scsi_disk_mode_select_set_blocksize(s->qdev.blocksize);
1697         }
1698     }
1699 
1700     len -= bd_len;
1701     p += bd_len;
1702 
1703     /* Ensure no change is made if there is an error!  */
1704     for (pass = 0; pass < 2; pass++) {
1705         if (mode_select_pages(r, p, len, pass == 1) < 0) {
1706             assert(pass == 0);
1707             return;
1708         }
1709     }
1710     if (!blk_enable_write_cache(s->qdev.conf.blk)) {
1711         /* The request is used as the AIO opaque value, so add a ref.  */
1712         scsi_req_ref(&r->req);
1713         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
1714                          BLOCK_ACCT_FLUSH);
1715         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
1716         return;
1717     }
1718 
1719     scsi_req_complete(&r->req, GOOD);
1720     return;
1721 
1722 invalid_param:
1723     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1724     return;
1725 
1726 invalid_param_len:
1727     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1728     return;
1729 
1730 invalid_field:
1731     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1732 }
1733 
1734 /* sector_num and nb_sectors expected to be in qdev blocksize */
1735 static inline bool check_lba_range(SCSIDiskState *s,
1736                                    uint64_t sector_num, uint32_t nb_sectors)
1737 {
1738     /*
1739      * The first line tests that no overflow happens when computing the last
1740      * sector.  The second line tests that the last accessed sector is in
1741      * range.
1742      *
1743      * Careful, the computations should not underflow for nb_sectors == 0,
1744      * and a 0-block read to the first LBA beyond the end of device is
1745      * valid.
1746      */
1747     return (sector_num <= sector_num + nb_sectors &&
1748             sector_num + nb_sectors <= s->qdev.max_lba + 1);
1749 }
1750 
1751 typedef struct UnmapCBData {
1752     SCSIDiskReq *r;
1753     uint8_t *inbuf;
1754     int count;
1755 } UnmapCBData;
1756 
1757 static void scsi_unmap_complete(void *opaque, int ret);
1758 
1759 static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
1760 {
1761     SCSIDiskReq *r = data->r;
1762     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1763 
1764     assert(r->req.aiocb == NULL);
1765 
1766     if (data->count > 0) {
1767         uint64_t sector_num = ldq_be_p(&data->inbuf[0]);
1768         uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
1769         r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1770         r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1771 
1772         if (!check_lba_range(s, sector_num, nb_sectors)) {
1773             block_acct_invalid(blk_get_stats(s->qdev.conf.blk),
1774                                BLOCK_ACCT_UNMAP);
1775             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1776             goto done;
1777         }
1778 
1779         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1780                          r->sector_count * BDRV_SECTOR_SIZE,
1781                          BLOCK_ACCT_UNMAP);
1782 
1783         r->req.aiocb = blk_aio_pdiscard(s->qdev.conf.blk,
1784                                         r->sector * BDRV_SECTOR_SIZE,
1785                                         r->sector_count * BDRV_SECTOR_SIZE,
1786                                         scsi_unmap_complete, data);
1787         data->count--;
1788         data->inbuf += 16;
1789         return;
1790     }
1791 
1792     scsi_req_complete(&r->req, GOOD);
1793 
1794 done:
1795     scsi_req_unref(&r->req);
1796     g_free(data);
1797 }
1798 
1799 static void scsi_unmap_complete(void *opaque, int ret)
1800 {
1801     UnmapCBData *data = opaque;
1802     SCSIDiskReq *r = data->r;
1803     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1804 
1805     assert(r->req.aiocb != NULL);
1806     r->req.aiocb = NULL;
1807 
1808     if (scsi_disk_req_check_error(r, ret, true)) {
1809         scsi_req_unref(&r->req);
1810         g_free(data);
1811     } else {
1812         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1813         scsi_unmap_complete_noio(data, ret);
1814     }
1815 }
1816 
1817 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
1818 {
1819     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1820     uint8_t *p = inbuf;
1821     int len = r->req.cmd.xfer;
1822     UnmapCBData *data;
1823 
1824     /* Reject ANCHOR=1.  */
1825     if (r->req.cmd.buf[1] & 0x1) {
1826         goto invalid_field;
1827     }
1828 
1829     if (len < 8) {
1830         goto invalid_param_len;
1831     }
1832     if (len < lduw_be_p(&p[0]) + 2) {
1833         goto invalid_param_len;
1834     }
1835     if (len < lduw_be_p(&p[2]) + 8) {
1836         goto invalid_param_len;
1837     }
1838     if (lduw_be_p(&p[2]) & 15) {
1839         goto invalid_param_len;
1840     }
1841 
1842     if (!blk_is_writable(s->qdev.conf.blk)) {
1843         block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1844         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1845         return;
1846     }
1847 
1848     data = g_new0(UnmapCBData, 1);
1849     data->r = r;
1850     data->inbuf = &p[8];
1851     data->count = lduw_be_p(&p[2]) >> 4;
1852 
1853     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
1854     scsi_req_ref(&r->req);
1855     scsi_unmap_complete_noio(data, 0);
1856     return;
1857 
1858 invalid_param_len:
1859     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1860     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1861     return;
1862 
1863 invalid_field:
1864     block_acct_invalid(blk_get_stats(s->qdev.conf.blk), BLOCK_ACCT_UNMAP);
1865     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1866 }
1867 
1868 typedef struct WriteSameCBData {
1869     SCSIDiskReq *r;
1870     int64_t sector;
1871     int nb_sectors;
1872     QEMUIOVector qiov;
1873     struct iovec iov;
1874 } WriteSameCBData;
1875 
1876 static void scsi_write_same_complete(void *opaque, int ret)
1877 {
1878     WriteSameCBData *data = opaque;
1879     SCSIDiskReq *r = data->r;
1880     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1881 
1882     assert(r->req.aiocb != NULL);
1883     r->req.aiocb = NULL;
1884 
1885     if (scsi_disk_req_check_error(r, ret, true)) {
1886         goto done;
1887     }
1888 
1889     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1890 
1891     data->nb_sectors -= data->iov.iov_len / BDRV_SECTOR_SIZE;
1892     data->sector += data->iov.iov_len / BDRV_SECTOR_SIZE;
1893     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1894                             data->iov.iov_len);
1895     if (data->iov.iov_len) {
1896         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1897                          data->iov.iov_len, BLOCK_ACCT_WRITE);
1898         /* Reinitialize qiov, to handle unaligned WRITE SAME request
1899          * where final qiov may need smaller size */
1900         qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1901         r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1902                                        data->sector << BDRV_SECTOR_BITS,
1903                                        &data->qiov, 0,
1904                                        scsi_write_same_complete, data);
1905         return;
1906     }
1907 
1908     scsi_req_complete(&r->req, GOOD);
1909 
1910 done:
1911     scsi_req_unref(&r->req);
1912     qemu_vfree(data->iov.iov_base);
1913     g_free(data);
1914 }
1915 
1916 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
1917 {
1918     SCSIRequest *req = &r->req;
1919     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1920     uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf);
1921     WriteSameCBData *data;
1922     uint8_t *buf;
1923     int i, l;
1924 
1925     /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
1926     if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) {
1927         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1928         return;
1929     }
1930 
1931     if (!blk_is_writable(s->qdev.conf.blk)) {
1932         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1933         return;
1934     }
1935     if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
1936         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1937         return;
1938     }
1939 
1940     if ((req->cmd.buf[1] & 0x1) || buffer_is_zero(inbuf, s->qdev.blocksize)) {
1941         int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0;
1942 
1943         /* The request is used as the AIO opaque value, so add a ref.  */
1944         scsi_req_ref(&r->req);
1945         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1946                          nb_sectors * s->qdev.blocksize,
1947                         BLOCK_ACCT_WRITE);
1948         r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk,
1949                                 r->req.cmd.lba * s->qdev.blocksize,
1950                                 nb_sectors * s->qdev.blocksize,
1951                                 flags, scsi_aio_complete, r);
1952         return;
1953     }
1954 
1955     data = g_new0(WriteSameCBData, 1);
1956     data->r = r;
1957     data->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1958     data->nb_sectors = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
1959     data->iov.iov_len = MIN(data->nb_sectors * BDRV_SECTOR_SIZE,
1960                             SCSI_WRITE_SAME_MAX);
1961     data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk,
1962                                               data->iov.iov_len);
1963     qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1964 
1965     for (i = 0; i < data->iov.iov_len; i += l) {
1966         l = MIN(s->qdev.blocksize, data->iov.iov_len - i);
1967         memcpy(&buf[i], inbuf, l);
1968     }
1969 
1970     scsi_req_ref(&r->req);
1971     block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1972                      data->iov.iov_len, BLOCK_ACCT_WRITE);
1973     r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
1974                                    data->sector << BDRV_SECTOR_BITS,
1975                                    &data->qiov, 0,
1976                                    scsi_write_same_complete, data);
1977 }
1978 
1979 static void scsi_disk_emulate_write_data(SCSIRequest *req)
1980 {
1981     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1982 
1983     if (r->iov.iov_len) {
1984         int buflen = r->iov.iov_len;
1985         trace_scsi_disk_emulate_write_data(buflen);
1986         r->iov.iov_len = 0;
1987         scsi_req_data(&r->req, buflen);
1988         return;
1989     }
1990 
1991     switch (req->cmd.buf[0]) {
1992     case MODE_SELECT:
1993     case MODE_SELECT_10:
1994         /* This also clears the sense buffer for REQUEST SENSE.  */
1995         scsi_disk_emulate_mode_select(r, r->iov.iov_base);
1996         break;
1997 
1998     case UNMAP:
1999         scsi_disk_emulate_unmap(r, r->iov.iov_base);
2000         break;
2001 
2002     case VERIFY_10:
2003     case VERIFY_12:
2004     case VERIFY_16:
2005         if (r->req.status == -1) {
2006             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2007         }
2008         break;
2009 
2010     case WRITE_SAME_10:
2011     case WRITE_SAME_16:
2012         scsi_disk_emulate_write_same(r, r->iov.iov_base);
2013         break;
2014 
2015     case FORMAT_UNIT:
2016         scsi_req_complete(&r->req, GOOD);
2017         break;
2018 
2019     default:
2020         abort();
2021     }
2022 }
2023 
2024 static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
2025 {
2026     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2027     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2028     uint64_t nb_sectors;
2029     uint8_t *outbuf;
2030     int buflen;
2031 
2032     switch (req->cmd.buf[0]) {
2033     case INQUIRY:
2034     case MODE_SENSE:
2035     case MODE_SENSE_10:
2036     case RESERVE:
2037     case RESERVE_10:
2038     case RELEASE:
2039     case RELEASE_10:
2040     case START_STOP:
2041     case ALLOW_MEDIUM_REMOVAL:
2042     case GET_CONFIGURATION:
2043     case GET_EVENT_STATUS_NOTIFICATION:
2044     case MECHANISM_STATUS:
2045     case REQUEST_SENSE:
2046         break;
2047 
2048     default:
2049         if (!blk_is_available(s->qdev.conf.blk)) {
2050             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2051             return 0;
2052         }
2053         break;
2054     }
2055 
2056     /*
2057      * FIXME: we shouldn't return anything bigger than 4k, but the code
2058      * requires the buffer to be as big as req->cmd.xfer in several
2059      * places.  So, do not allow CDBs with a very large ALLOCATION
2060      * LENGTH.  The real fix would be to modify scsi_read_data and
2061      * dma_buf_read, so that they return data beyond the buflen
2062      * as all zeros.
2063      */
2064     if (req->cmd.xfer > 65536) {
2065         goto illegal_request;
2066     }
2067     r->buflen = MAX(4096, req->cmd.xfer);
2068 
2069     if (!r->iov.iov_base) {
2070         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
2071     }
2072 
2073     outbuf = r->iov.iov_base;
2074     memset(outbuf, 0, r->buflen);
2075     switch (req->cmd.buf[0]) {
2076     case TEST_UNIT_READY:
2077         assert(blk_is_available(s->qdev.conf.blk));
2078         break;
2079     case INQUIRY:
2080         buflen = scsi_disk_emulate_inquiry(req, outbuf);
2081         if (buflen < 0) {
2082             goto illegal_request;
2083         }
2084         break;
2085     case MODE_SENSE:
2086     case MODE_SENSE_10:
2087         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
2088         if (buflen < 0) {
2089             goto illegal_request;
2090         }
2091         break;
2092     case READ_TOC:
2093         buflen = scsi_disk_emulate_read_toc(req, outbuf);
2094         if (buflen < 0) {
2095             goto illegal_request;
2096         }
2097         break;
2098     case RESERVE:
2099         if (req->cmd.buf[1] & 1) {
2100             goto illegal_request;
2101         }
2102         break;
2103     case RESERVE_10:
2104         if (req->cmd.buf[1] & 3) {
2105             goto illegal_request;
2106         }
2107         break;
2108     case RELEASE:
2109         if (req->cmd.buf[1] & 1) {
2110             goto illegal_request;
2111         }
2112         break;
2113     case RELEASE_10:
2114         if (req->cmd.buf[1] & 3) {
2115             goto illegal_request;
2116         }
2117         break;
2118     case START_STOP:
2119         if (scsi_disk_emulate_start_stop(r) < 0) {
2120             return 0;
2121         }
2122         break;
2123     case ALLOW_MEDIUM_REMOVAL:
2124         s->tray_locked = req->cmd.buf[4] & 1;
2125         blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1);
2126         break;
2127     case READ_CAPACITY_10:
2128         /* The normal LEN field for this command is zero.  */
2129         memset(outbuf, 0, 8);
2130         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2131         if (!nb_sectors) {
2132             scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2133             return 0;
2134         }
2135         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
2136             goto illegal_request;
2137         }
2138         nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2139         /* Returned value is the address of the last sector.  */
2140         nb_sectors--;
2141         /* Remember the new size for read/write sanity checking. */
2142         s->qdev.max_lba = nb_sectors;
2143         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
2144         if (nb_sectors > UINT32_MAX) {
2145             nb_sectors = UINT32_MAX;
2146         }
2147         outbuf[0] = (nb_sectors >> 24) & 0xff;
2148         outbuf[1] = (nb_sectors >> 16) & 0xff;
2149         outbuf[2] = (nb_sectors >> 8) & 0xff;
2150         outbuf[3] = nb_sectors & 0xff;
2151         outbuf[4] = 0;
2152         outbuf[5] = 0;
2153         outbuf[6] = s->qdev.blocksize >> 8;
2154         outbuf[7] = 0;
2155         break;
2156     case REQUEST_SENSE:
2157         /* Just return "NO SENSE".  */
2158         buflen = scsi_convert_sense(NULL, 0, outbuf, r->buflen,
2159                                     (req->cmd.buf[1] & 1) == 0);
2160         if (buflen < 0) {
2161             goto illegal_request;
2162         }
2163         break;
2164     case MECHANISM_STATUS:
2165         buflen = scsi_emulate_mechanism_status(s, outbuf);
2166         if (buflen < 0) {
2167             goto illegal_request;
2168         }
2169         break;
2170     case GET_CONFIGURATION:
2171         buflen = scsi_get_configuration(s, outbuf);
2172         if (buflen < 0) {
2173             goto illegal_request;
2174         }
2175         break;
2176     case GET_EVENT_STATUS_NOTIFICATION:
2177         buflen = scsi_get_event_status_notification(s, r, outbuf);
2178         if (buflen < 0) {
2179             goto illegal_request;
2180         }
2181         break;
2182     case READ_DISC_INFORMATION:
2183         buflen = scsi_read_disc_information(s, r, outbuf);
2184         if (buflen < 0) {
2185             goto illegal_request;
2186         }
2187         break;
2188     case READ_DVD_STRUCTURE:
2189         buflen = scsi_read_dvd_structure(s, r, outbuf);
2190         if (buflen < 0) {
2191             goto illegal_request;
2192         }
2193         break;
2194     case SERVICE_ACTION_IN_16:
2195         /* Service Action In subcommands. */
2196         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
2197             trace_scsi_disk_emulate_command_SAI_16();
2198             memset(outbuf, 0, req->cmd.xfer);
2199             blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2200             if (!nb_sectors) {
2201                 scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2202                 return 0;
2203             }
2204             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
2205                 goto illegal_request;
2206             }
2207             nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2208             /* Returned value is the address of the last sector.  */
2209             nb_sectors--;
2210             /* Remember the new size for read/write sanity checking. */
2211             s->qdev.max_lba = nb_sectors;
2212             outbuf[0] = (nb_sectors >> 56) & 0xff;
2213             outbuf[1] = (nb_sectors >> 48) & 0xff;
2214             outbuf[2] = (nb_sectors >> 40) & 0xff;
2215             outbuf[3] = (nb_sectors >> 32) & 0xff;
2216             outbuf[4] = (nb_sectors >> 24) & 0xff;
2217             outbuf[5] = (nb_sectors >> 16) & 0xff;
2218             outbuf[6] = (nb_sectors >> 8) & 0xff;
2219             outbuf[7] = nb_sectors & 0xff;
2220             outbuf[8] = 0;
2221             outbuf[9] = 0;
2222             outbuf[10] = s->qdev.blocksize >> 8;
2223             outbuf[11] = 0;
2224             outbuf[12] = 0;
2225             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
2226 
2227             /* set TPE bit if the format supports discard */
2228             if (s->qdev.conf.discard_granularity) {
2229                 outbuf[14] = 0x80;
2230             }
2231 
2232             /* Protection, exponent and lowest lba field left blank. */
2233             break;
2234         }
2235         trace_scsi_disk_emulate_command_SAI_unsupported();
2236         goto illegal_request;
2237     case SYNCHRONIZE_CACHE:
2238         /* The request is used as the AIO opaque value, so add a ref.  */
2239         scsi_req_ref(&r->req);
2240         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
2241                          BLOCK_ACCT_FLUSH);
2242         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
2243         return 0;
2244     case SEEK_10:
2245         trace_scsi_disk_emulate_command_SEEK_10(r->req.cmd.lba);
2246         if (r->req.cmd.lba > s->qdev.max_lba) {
2247             goto illegal_lba;
2248         }
2249         break;
2250     case MODE_SELECT:
2251         trace_scsi_disk_emulate_command_MODE_SELECT(r->req.cmd.xfer);
2252         break;
2253     case MODE_SELECT_10:
2254         trace_scsi_disk_emulate_command_MODE_SELECT_10(r->req.cmd.xfer);
2255         break;
2256     case UNMAP:
2257         trace_scsi_disk_emulate_command_UNMAP(r->req.cmd.xfer);
2258         break;
2259     case VERIFY_10:
2260     case VERIFY_12:
2261     case VERIFY_16:
2262         trace_scsi_disk_emulate_command_VERIFY((req->cmd.buf[1] >> 1) & 3);
2263         if (req->cmd.buf[1] & 6) {
2264             goto illegal_request;
2265         }
2266         break;
2267     case WRITE_SAME_10:
2268     case WRITE_SAME_16:
2269         trace_scsi_disk_emulate_command_WRITE_SAME(
2270                 req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16, r->req.cmd.xfer);
2271         break;
2272     case FORMAT_UNIT:
2273         trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
2274         break;
2275     default:
2276         trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
2277                                                 scsi_command_name(buf[0]));
2278         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
2279         return 0;
2280     }
2281     assert(!r->req.aiocb);
2282     r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
2283     if (r->iov.iov_len == 0) {
2284         scsi_req_complete(&r->req, GOOD);
2285     }
2286     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2287         assert(r->iov.iov_len == req->cmd.xfer);
2288         return -r->iov.iov_len;
2289     } else {
2290         return r->iov.iov_len;
2291     }
2292 
2293 illegal_request:
2294     if (r->req.status == -1) {
2295         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2296     }
2297     return 0;
2298 
2299 illegal_lba:
2300     scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2301     return 0;
2302 }
2303 
2304 /* Execute a scsi command.  Returns the length of the data expected by the
2305    command.  This will be Positive for data transfers from the device
2306    (eg. disk reads), negative for transfers to the device (eg. disk writes),
2307    and zero if the command does not transfer any data.  */
2308 
2309 static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
2310 {
2311     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2312     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2313     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
2314     uint32_t len;
2315     uint8_t command;
2316 
2317     command = buf[0];
2318 
2319     if (!blk_is_available(s->qdev.conf.blk)) {
2320         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2321         return 0;
2322     }
2323 
2324     len = scsi_data_cdb_xfer(r->req.cmd.buf);
2325     switch (command) {
2326     case READ_6:
2327     case READ_10:
2328     case READ_12:
2329     case READ_16:
2330         trace_scsi_disk_dma_command_READ(r->req.cmd.lba, len);
2331         /* Protection information is not supported.  For SCSI versions 2 and
2332          * older (as determined by snooping the guest's INQUIRY commands),
2333          * there is no RD/WR/VRPROTECT, so skip this check in these versions.
2334          */
2335         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2336             goto illegal_request;
2337         }
2338         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2339             goto illegal_lba;
2340         }
2341         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2342         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2343         break;
2344     case WRITE_6:
2345     case WRITE_10:
2346     case WRITE_12:
2347     case WRITE_16:
2348     case WRITE_VERIFY_10:
2349     case WRITE_VERIFY_12:
2350     case WRITE_VERIFY_16:
2351         if (!blk_is_writable(s->qdev.conf.blk)) {
2352             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
2353             return 0;
2354         }
2355         trace_scsi_disk_dma_command_WRITE(
2356                 (command & 0xe) == 0xe ? "And Verify " : "",
2357                 r->req.cmd.lba, len);
2358         /* fall through */
2359     case VERIFY_10:
2360     case VERIFY_12:
2361     case VERIFY_16:
2362         /* We get here only for BYTCHK == 0x01 and only for scsi-block.
2363          * As far as DMA is concerned, we can treat it the same as a write;
2364          * scsi_block_do_sgio will send VERIFY commands.
2365          */
2366         if (s->qdev.scsi_version > 2 && (r->req.cmd.buf[1] & 0xe0)) {
2367             goto illegal_request;
2368         }
2369         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2370             goto illegal_lba;
2371         }
2372         r->sector = r->req.cmd.lba * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2373         r->sector_count = len * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
2374         break;
2375     default:
2376         abort();
2377     illegal_request:
2378         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2379         return 0;
2380     illegal_lba:
2381         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2382         return 0;
2383     }
2384     r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
2385     if (r->sector_count == 0) {
2386         scsi_req_complete(&r->req, GOOD);
2387     }
2388     assert(r->iov.iov_len == 0);
2389     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2390         return -r->sector_count * BDRV_SECTOR_SIZE;
2391     } else {
2392         return r->sector_count * BDRV_SECTOR_SIZE;
2393     }
2394 }
2395 
2396 static void scsi_disk_reset(DeviceState *dev)
2397 {
2398     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
2399     uint64_t nb_sectors;
2400 
2401     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
2402 
2403     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2404 
2405     nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
2406     if (nb_sectors) {
2407         nb_sectors--;
2408     }
2409     s->qdev.max_lba = nb_sectors;
2410     /* reset tray statuses */
2411     s->tray_locked = 0;
2412     s->tray_open = 0;
2413 
2414     s->qdev.scsi_version = s->qdev.default_scsi_version;
2415 }
2416 
2417 static void scsi_disk_drained_begin(void *opaque)
2418 {
2419     SCSIDiskState *s = opaque;
2420 
2421     scsi_device_drained_begin(&s->qdev);
2422 }
2423 
2424 static void scsi_disk_drained_end(void *opaque)
2425 {
2426     SCSIDiskState *s = opaque;
2427 
2428     scsi_device_drained_end(&s->qdev);
2429 }
2430 
2431 static void scsi_disk_resize_cb(void *opaque)
2432 {
2433     SCSIDiskState *s = opaque;
2434 
2435     /* SPC lists this sense code as available only for
2436      * direct-access devices.
2437      */
2438     if (s->qdev.type == TYPE_DISK) {
2439         scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
2440     }
2441 }
2442 
2443 static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
2444 {
2445     SCSIDiskState *s = opaque;
2446 
2447     /*
2448      * When a CD gets changed, we have to report an ejected state and
2449      * then a loaded state to guests so that they detect tray
2450      * open/close and media change events.  Guests that do not use
2451      * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
2452      * states rely on this behavior.
2453      *
2454      * media_changed governs the state machine used for unit attention
2455      * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
2456      */
2457     s->media_changed = load;
2458     s->tray_open = !load;
2459     scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
2460     s->media_event = true;
2461     s->eject_request = false;
2462 }
2463 
2464 static void scsi_cd_eject_request_cb(void *opaque, bool force)
2465 {
2466     SCSIDiskState *s = opaque;
2467 
2468     s->eject_request = true;
2469     if (force) {
2470         s->tray_locked = false;
2471     }
2472 }
2473 
2474 static bool scsi_cd_is_tray_open(void *opaque)
2475 {
2476     return ((SCSIDiskState *)opaque)->tray_open;
2477 }
2478 
2479 static bool scsi_cd_is_medium_locked(void *opaque)
2480 {
2481     return ((SCSIDiskState *)opaque)->tray_locked;
2482 }
2483 
2484 static const BlockDevOps scsi_disk_removable_block_ops = {
2485     .change_media_cb  = scsi_cd_change_media_cb,
2486     .drained_begin    = scsi_disk_drained_begin,
2487     .drained_end      = scsi_disk_drained_end,
2488     .eject_request_cb = scsi_cd_eject_request_cb,
2489     .is_medium_locked = scsi_cd_is_medium_locked,
2490     .is_tray_open     = scsi_cd_is_tray_open,
2491     .resize_cb        = scsi_disk_resize_cb,
2492 };
2493 
2494 static const BlockDevOps scsi_disk_block_ops = {
2495     .drained_begin = scsi_disk_drained_begin,
2496     .drained_end   = scsi_disk_drained_end,
2497     .resize_cb     = scsi_disk_resize_cb,
2498 };
2499 
2500 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
2501 {
2502     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2503     if (s->media_changed) {
2504         s->media_changed = false;
2505         scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
2506     }
2507 }
2508 
2509 static void scsi_realize(SCSIDevice *dev, Error **errp)
2510 {
2511     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2512     bool read_only;
2513 
2514     if (!s->qdev.conf.blk) {
2515         error_setg(errp, "drive property not set");
2516         return;
2517     }
2518 
2519     if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2520         !blk_is_inserted(s->qdev.conf.blk)) {
2521         error_setg(errp, "Device needs media, but drive is empty");
2522         return;
2523     }
2524 
2525     if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2526         return;
2527     }
2528 
2529     if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() &&
2530         !s->qdev.hba_supports_iothread)
2531     {
2532         error_setg(errp, "HBA does not support iothreads");
2533         return;
2534     }
2535 
2536     if (dev->type == TYPE_DISK) {
2537         if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
2538             return;
2539         }
2540     }
2541 
2542     read_only = !blk_supports_write_perm(s->qdev.conf.blk);
2543     if (dev->type == TYPE_ROM) {
2544         read_only = true;
2545     }
2546 
2547     if (!blkconf_apply_backend_options(&dev->conf, read_only,
2548                                        dev->type == TYPE_DISK, errp)) {
2549         return;
2550     }
2551 
2552     if (s->qdev.conf.discard_granularity == -1) {
2553         s->qdev.conf.discard_granularity =
2554             MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
2555     }
2556 
2557     if (!s->version) {
2558         s->version = g_strdup(qemu_hw_version());
2559     }
2560     if (!s->vendor) {
2561         s->vendor = g_strdup("QEMU");
2562     }
2563     if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) {
2564         error_setg(errp, "The serial number can't be longer than %d characters",
2565                    MAX_SERIAL_LEN);
2566         return;
2567     }
2568     if (!s->device_id) {
2569         if (s->serial) {
2570             if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) {
2571                 error_setg(errp, "The serial number can't be longer than %d "
2572                            "characters when it is also used as the default for "
2573                            "device_id", MAX_SERIAL_LEN_FOR_DEVID);
2574                 return;
2575             }
2576             s->device_id = g_strdup(s->serial);
2577         } else {
2578             const char *str = blk_name(s->qdev.conf.blk);
2579             if (str && *str) {
2580                 s->device_id = g_strdup(str);
2581             }
2582         }
2583     }
2584 
2585     if (blk_is_sg(s->qdev.conf.blk)) {
2586         error_setg(errp, "unwanted /dev/sg*");
2587         return;
2588     }
2589 
2590     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2591             !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
2592         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s);
2593     } else {
2594         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
2595     }
2596 
2597     blk_iostatus_enable(s->qdev.conf.blk);
2598 
2599     add_boot_device_lchs(&dev->qdev, NULL,
2600                          dev->conf.lcyls,
2601                          dev->conf.lheads,
2602                          dev->conf.lsecs);
2603 }
2604 
2605 static void scsi_unrealize(SCSIDevice *dev)
2606 {
2607     del_boot_device_lchs(&dev->qdev, NULL);
2608 }
2609 
2610 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
2611 {
2612     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2613 
2614     /* can happen for devices without drive. The error message for missing
2615      * backend will be issued in scsi_realize
2616      */
2617     if (s->qdev.conf.blk) {
2618         if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
2619             return;
2620         }
2621     }
2622     s->qdev.blocksize = s->qdev.conf.logical_block_size;
2623     s->qdev.type = TYPE_DISK;
2624     if (!s->product) {
2625         s->product = g_strdup("QEMU HARDDISK");
2626     }
2627     scsi_realize(&s->qdev, errp);
2628 }
2629 
2630 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
2631 {
2632     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2633     int ret;
2634     uint32_t blocksize = 2048;
2635 
2636     if (!dev->conf.blk) {
2637         /* Anonymous BlockBackend for an empty drive. As we put it into
2638          * dev->conf, qdev takes care of detaching on unplug. */
2639         dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
2640         ret = blk_attach_dev(dev->conf.blk, &dev->qdev);
2641         assert(ret == 0);
2642     }
2643 
2644     if (dev->conf.physical_block_size != 0) {
2645         blocksize = dev->conf.physical_block_size;
2646     }
2647 
2648     s->qdev.blocksize = blocksize;
2649     s->qdev.type = TYPE_ROM;
2650     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2651     if (!s->product) {
2652         s->product = g_strdup("QEMU CD-ROM");
2653     }
2654     scsi_realize(&s->qdev, errp);
2655 }
2656 
2657 
2658 static const SCSIReqOps scsi_disk_emulate_reqops = {
2659     .size         = sizeof(SCSIDiskReq),
2660     .free_req     = scsi_free_request,
2661     .send_command = scsi_disk_emulate_command,
2662     .read_data    = scsi_disk_emulate_read_data,
2663     .write_data   = scsi_disk_emulate_write_data,
2664     .get_buf      = scsi_get_buf,
2665     .load_request = scsi_disk_emulate_load_request,
2666     .save_request = scsi_disk_emulate_save_request,
2667 };
2668 
2669 static const SCSIReqOps scsi_disk_dma_reqops = {
2670     .size         = sizeof(SCSIDiskReq),
2671     .free_req     = scsi_free_request,
2672     .send_command = scsi_disk_dma_command,
2673     .read_data    = scsi_read_data,
2674     .write_data   = scsi_write_data,
2675     .get_buf      = scsi_get_buf,
2676     .load_request = scsi_disk_load_request,
2677     .save_request = scsi_disk_save_request,
2678 };
2679 
2680 static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
2681     [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
2682     [INQUIRY]                         = &scsi_disk_emulate_reqops,
2683     [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
2684     [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
2685     [START_STOP]                      = &scsi_disk_emulate_reqops,
2686     [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
2687     [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
2688     [READ_TOC]                        = &scsi_disk_emulate_reqops,
2689     [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
2690     [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
2691     [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
2692     [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
2693     [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
2694     [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
2695     [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
2696     [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
2697     [SEEK_10]                         = &scsi_disk_emulate_reqops,
2698     [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
2699     [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
2700     [UNMAP]                           = &scsi_disk_emulate_reqops,
2701     [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
2702     [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
2703     [VERIFY_10]                       = &scsi_disk_emulate_reqops,
2704     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
2705     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
2706     [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
2707 
2708     [READ_6]                          = &scsi_disk_dma_reqops,
2709     [READ_10]                         = &scsi_disk_dma_reqops,
2710     [READ_12]                         = &scsi_disk_dma_reqops,
2711     [READ_16]                         = &scsi_disk_dma_reqops,
2712     [WRITE_6]                         = &scsi_disk_dma_reqops,
2713     [WRITE_10]                        = &scsi_disk_dma_reqops,
2714     [WRITE_12]                        = &scsi_disk_dma_reqops,
2715     [WRITE_16]                        = &scsi_disk_dma_reqops,
2716     [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
2717     [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
2718     [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
2719 };
2720 
2721 static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf)
2722 {
2723     int len = scsi_cdb_length(buf);
2724     g_autoptr(GString) str = NULL;
2725 
2726     assert(len > 0 && len <= 16);
2727     str = qemu_hexdump_line(NULL, buf, len, 1, 0);
2728     trace_scsi_disk_new_request(lun, tag, str->str);
2729 }
2730 
2731 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
2732                                      uint8_t *buf, void *hba_private)
2733 {
2734     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2735     SCSIRequest *req;
2736     const SCSIReqOps *ops;
2737     uint8_t command;
2738 
2739     command = buf[0];
2740     ops = scsi_disk_reqops_dispatch[command];
2741     if (!ops) {
2742         ops = &scsi_disk_emulate_reqops;
2743     }
2744     req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
2745 
2746     if (trace_event_get_state_backends(TRACE_SCSI_DISK_NEW_REQUEST)) {
2747         scsi_disk_new_request_dump(lun, tag, buf);
2748     }
2749 
2750     return req;
2751 }
2752 
2753 #ifdef __linux__
2754 static int get_device_type(SCSIDiskState *s)
2755 {
2756     uint8_t cmd[16];
2757     uint8_t buf[36];
2758     int ret;
2759 
2760     memset(cmd, 0, sizeof(cmd));
2761     memset(buf, 0, sizeof(buf));
2762     cmd[0] = INQUIRY;
2763     cmd[4] = sizeof(buf);
2764 
2765     ret = scsi_SG_IO_FROM_DEV(s->qdev.conf.blk, cmd, sizeof(cmd),
2766                               buf, sizeof(buf), s->qdev.io_timeout);
2767     if (ret < 0) {
2768         return -1;
2769     }
2770     s->qdev.type = buf[0];
2771     if (buf[1] & 0x80) {
2772         s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2773     }
2774     return 0;
2775 }
2776 
2777 static void scsi_block_realize(SCSIDevice *dev, Error **errp)
2778 {
2779     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2780     int sg_version;
2781     int rc;
2782 
2783     if (!s->qdev.conf.blk) {
2784         error_setg(errp, "drive property not set");
2785         return;
2786     }
2787 
2788     if (s->rotation_rate) {
2789         error_report_once("rotation_rate is specified for scsi-block but is "
2790                           "not implemented. This option is deprecated and will "
2791                           "be removed in a future version");
2792     }
2793 
2794     /* check we are using a driver managing SG_IO (version 3 and after) */
2795     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
2796     if (rc < 0) {
2797         error_setg_errno(errp, -rc, "cannot get SG_IO version number");
2798         if (rc != -EPERM) {
2799             error_append_hint(errp, "Is this a SCSI device?\n");
2800         }
2801         return;
2802     }
2803     if (sg_version < 30000) {
2804         error_setg(errp, "scsi generic interface too old");
2805         return;
2806     }
2807 
2808     /* get device type from INQUIRY data */
2809     rc = get_device_type(s);
2810     if (rc < 0) {
2811         error_setg(errp, "INQUIRY failed");
2812         return;
2813     }
2814 
2815     /* Make a guess for the block size, we'll fix it when the guest sends.
2816      * READ CAPACITY.  If they don't, they likely would assume these sizes
2817      * anyway. (TODO: check in /sys).
2818      */
2819     if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
2820         s->qdev.blocksize = 2048;
2821     } else {
2822         s->qdev.blocksize = 512;
2823     }
2824 
2825     /* Makes the scsi-block device not removable by using HMP and QMP eject
2826      * command.
2827      */
2828     s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
2829 
2830     scsi_realize(&s->qdev, errp);
2831     scsi_generic_read_device_inquiry(&s->qdev);
2832 }
2833 
2834 typedef struct SCSIBlockReq {
2835     SCSIDiskReq req;
2836     sg_io_hdr_t io_header;
2837 
2838     /* Selected bytes of the original CDB, copied into our own CDB.  */
2839     uint8_t cmd, cdb1, group_number;
2840 
2841     /* CDB passed to SG_IO.  */
2842     uint8_t cdb[16];
2843     BlockCompletionFunc *cb;
2844     void *cb_opaque;
2845 } SCSIBlockReq;
2846 
2847 static void scsi_block_sgio_complete(void *opaque, int ret)
2848 {
2849     SCSIBlockReq *req = (SCSIBlockReq *)opaque;
2850     SCSIDiskReq *r = &req->req;
2851     sg_io_hdr_t *io_hdr = &req->io_header;
2852 
2853     if (ret == 0) {
2854         /* FIXME This skips calling req->cb() and any cleanup in it */
2855         if (io_hdr->host_status != SCSI_HOST_OK) {
2856             scsi_req_complete_failed(&r->req, io_hdr->host_status);
2857             scsi_req_unref(&r->req);
2858             return;
2859         }
2860 
2861         if (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT) {
2862             ret = BUSY;
2863         } else {
2864             ret = io_hdr->status;
2865         }
2866     }
2867 
2868     req->cb(req->cb_opaque, ret);
2869 }
2870 
2871 static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
2872                                       int64_t offset, QEMUIOVector *iov,
2873                                       int direction,
2874                                       BlockCompletionFunc *cb, void *opaque)
2875 {
2876     sg_io_hdr_t *io_header = &req->io_header;
2877     SCSIDiskReq *r = &req->req;
2878     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
2879     int nb_logical_blocks;
2880     uint64_t lba;
2881     BlockAIOCB *aiocb;
2882 
2883     /* This is not supported yet.  It can only happen if the guest does
2884      * reads and writes that are not aligned to one logical sectors
2885      * _and_ cover multiple MemoryRegions.
2886      */
2887     assert(offset % s->qdev.blocksize == 0);
2888     assert(iov->size % s->qdev.blocksize == 0);
2889 
2890     io_header->interface_id = 'S';
2891 
2892     /* The data transfer comes from the QEMUIOVector.  */
2893     io_header->dxfer_direction = direction;
2894     io_header->dxfer_len = iov->size;
2895     io_header->dxferp = (void *)iov->iov;
2896     io_header->iovec_count = iov->niov;
2897     assert(io_header->iovec_count == iov->niov); /* no overflow! */
2898 
2899     /* Build a new CDB with the LBA and length patched in, in case
2900      * DMA helpers split the transfer in multiple segments.  Do not
2901      * build a CDB smaller than what the guest wanted, and only build
2902      * a larger one if strictly necessary.
2903      */
2904     io_header->cmdp = req->cdb;
2905     lba = offset / s->qdev.blocksize;
2906     nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize;
2907 
2908     if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) {
2909         /* 6-byte CDB */
2910         stl_be_p(&req->cdb[0], lba | (req->cmd << 24));
2911         req->cdb[4] = nb_logical_blocks;
2912         req->cdb[5] = 0;
2913         io_header->cmd_len = 6;
2914     } else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) {
2915         /* 10-byte CDB */
2916         req->cdb[0] = (req->cmd & 0x1f) | 0x20;
2917         req->cdb[1] = req->cdb1;
2918         stl_be_p(&req->cdb[2], lba);
2919         req->cdb[6] = req->group_number;
2920         stw_be_p(&req->cdb[7], nb_logical_blocks);
2921         req->cdb[9] = 0;
2922         io_header->cmd_len = 10;
2923     } else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) {
2924         /* 12-byte CDB */
2925         req->cdb[0] = (req->cmd & 0x1f) | 0xA0;
2926         req->cdb[1] = req->cdb1;
2927         stl_be_p(&req->cdb[2], lba);
2928         stl_be_p(&req->cdb[6], nb_logical_blocks);
2929         req->cdb[10] = req->group_number;
2930         req->cdb[11] = 0;
2931         io_header->cmd_len = 12;
2932     } else {
2933         /* 16-byte CDB */
2934         req->cdb[0] = (req->cmd & 0x1f) | 0x80;
2935         req->cdb[1] = req->cdb1;
2936         stq_be_p(&req->cdb[2], lba);
2937         stl_be_p(&req->cdb[10], nb_logical_blocks);
2938         req->cdb[14] = req->group_number;
2939         req->cdb[15] = 0;
2940         io_header->cmd_len = 16;
2941     }
2942 
2943     /* The rest is as in scsi-generic.c.  */
2944     io_header->mx_sb_len = sizeof(r->req.sense);
2945     io_header->sbp = r->req.sense;
2946     io_header->timeout = s->qdev.io_timeout * 1000;
2947     io_header->usr_ptr = r;
2948     io_header->flags |= SG_FLAG_DIRECT_IO;
2949     req->cb = cb;
2950     req->cb_opaque = opaque;
2951     trace_scsi_disk_aio_sgio_command(r->req.tag, req->cdb[0], lba,
2952                                      nb_logical_blocks, io_header->timeout);
2953     aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, scsi_block_sgio_complete, req);
2954     assert(aiocb != NULL);
2955     return aiocb;
2956 }
2957 
2958 static bool scsi_block_no_fua(SCSICommand *cmd)
2959 {
2960     return false;
2961 }
2962 
2963 static BlockAIOCB *scsi_block_dma_readv(int64_t offset,
2964                                         QEMUIOVector *iov,
2965                                         BlockCompletionFunc *cb, void *cb_opaque,
2966                                         void *opaque)
2967 {
2968     SCSIBlockReq *r = opaque;
2969     return scsi_block_do_sgio(r, offset, iov,
2970                               SG_DXFER_FROM_DEV, cb, cb_opaque);
2971 }
2972 
2973 static BlockAIOCB *scsi_block_dma_writev(int64_t offset,
2974                                          QEMUIOVector *iov,
2975                                          BlockCompletionFunc *cb, void *cb_opaque,
2976                                          void *opaque)
2977 {
2978     SCSIBlockReq *r = opaque;
2979     return scsi_block_do_sgio(r, offset, iov,
2980                               SG_DXFER_TO_DEV, cb, cb_opaque);
2981 }
2982 
2983 static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
2984 {
2985     switch (buf[0]) {
2986     case VERIFY_10:
2987     case VERIFY_12:
2988     case VERIFY_16:
2989         /* Check if BYTCHK == 0x01 (data-out buffer contains data
2990          * for the number of logical blocks specified in the length
2991          * field).  For other modes, do not use scatter/gather operation.
2992          */
2993         if ((buf[1] & 6) == 2) {
2994             return false;
2995         }
2996         break;
2997 
2998     case READ_6:
2999     case READ_10:
3000     case READ_12:
3001     case READ_16:
3002     case WRITE_6:
3003     case WRITE_10:
3004     case WRITE_12:
3005     case WRITE_16:
3006     case WRITE_VERIFY_10:
3007     case WRITE_VERIFY_12:
3008     case WRITE_VERIFY_16:
3009         /* MMC writing cannot be done via DMA helpers, because it sometimes
3010          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
3011          * We might use scsi_block_dma_reqops as long as no writing commands are
3012          * seen, but performance usually isn't paramount on optical media.  So,
3013          * just make scsi-block operate the same as scsi-generic for them.
3014          */
3015         if (s->qdev.type != TYPE_ROM) {
3016             return false;
3017         }
3018         break;
3019 
3020     default:
3021         break;
3022     }
3023 
3024     return true;
3025 }
3026 
3027 
3028 static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf)
3029 {
3030     SCSIBlockReq *r = (SCSIBlockReq *)req;
3031     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
3032 
3033     r->cmd = req->cmd.buf[0];
3034     switch (r->cmd >> 5) {
3035     case 0:
3036         /* 6-byte CDB.  */
3037         r->cdb1 = r->group_number = 0;
3038         break;
3039     case 1:
3040         /* 10-byte CDB.  */
3041         r->cdb1 = req->cmd.buf[1];
3042         r->group_number = req->cmd.buf[6];
3043         break;
3044     case 4:
3045         /* 12-byte CDB.  */
3046         r->cdb1 = req->cmd.buf[1];
3047         r->group_number = req->cmd.buf[10];
3048         break;
3049     case 5:
3050         /* 16-byte CDB.  */
3051         r->cdb1 = req->cmd.buf[1];
3052         r->group_number = req->cmd.buf[14];
3053         break;
3054     default:
3055         abort();
3056     }
3057 
3058     /* Protection information is not supported.  For SCSI versions 2 and
3059      * older (as determined by snooping the guest's INQUIRY commands),
3060      * there is no RD/WR/VRPROTECT, so skip this check in these versions.
3061      */
3062     if (s->qdev.scsi_version > 2 && (req->cmd.buf[1] & 0xe0)) {
3063         scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD));
3064         return 0;
3065     }
3066 
3067     return scsi_disk_dma_command(req, buf);
3068 }
3069 
3070 static const SCSIReqOps scsi_block_dma_reqops = {
3071     .size         = sizeof(SCSIBlockReq),
3072     .free_req     = scsi_free_request,
3073     .send_command = scsi_block_dma_command,
3074     .read_data    = scsi_read_data,
3075     .write_data   = scsi_write_data,
3076     .get_buf      = scsi_get_buf,
3077     .load_request = scsi_disk_load_request,
3078     .save_request = scsi_disk_save_request,
3079 };
3080 
3081 static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
3082                                            uint32_t lun, uint8_t *buf,
3083                                            void *hba_private)
3084 {
3085     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3086 
3087     if (scsi_block_is_passthrough(s, buf)) {
3088         return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
3089                               hba_private);
3090     } else {
3091         return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun,
3092                               hba_private);
3093     }
3094 }
3095 
3096 static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
3097                                   uint8_t *buf, size_t buf_len,
3098                                   void *hba_private)
3099 {
3100     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
3101 
3102     if (scsi_block_is_passthrough(s, buf)) {
3103         return scsi_bus_parse_cdb(&s->qdev, cmd, buf, buf_len, hba_private);
3104     } else {
3105         return scsi_req_parse_cdb(&s->qdev, cmd, buf, buf_len);
3106     }
3107 }
3108 
3109 static void scsi_block_update_sense(SCSIRequest *req)
3110 {
3111     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
3112     SCSIBlockReq *br = DO_UPCAST(SCSIBlockReq, req, r);
3113     r->req.sense_len = MIN(br->io_header.sb_len_wr, sizeof(r->req.sense));
3114 }
3115 #endif
3116 
3117 static
3118 BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
3119                            BlockCompletionFunc *cb, void *cb_opaque,
3120                            void *opaque)
3121 {
3122     SCSIDiskReq *r = opaque;
3123     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3124     return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3125 }
3126 
3127 static
3128 BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
3129                             BlockCompletionFunc *cb, void *cb_opaque,
3130                             void *opaque)
3131 {
3132     SCSIDiskReq *r = opaque;
3133     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
3134     return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
3135 }
3136 
3137 static char *scsi_property_get_loadparm(Object *obj, Error **errp)
3138 {
3139     return g_strdup(SCSI_DISK_BASE(obj)->loadparm);
3140 }
3141 
3142 static void scsi_property_set_loadparm(Object *obj, const char *value,
3143                                        Error **errp)
3144 {
3145     void *lp_str;
3146 
3147     if (object_property_get_int(obj, "bootindex", NULL) < 0) {
3148         error_setg(errp, "'loadparm' is only valid for boot devices");
3149         return;
3150     }
3151 
3152     lp_str = g_malloc0(strlen(value) + 1);
3153     if (!qdev_prop_sanitize_s390x_loadparm(lp_str, value, errp)) {
3154         g_free(lp_str);
3155         return;
3156     }
3157     SCSI_DISK_BASE(obj)->loadparm = lp_str;
3158 }
3159 
3160 static void scsi_property_add_specifics(DeviceClass *dc)
3161 {
3162     ObjectClass *oc = OBJECT_CLASS(dc);
3163 
3164     /* The loadparm property is only supported on s390x */
3165     if (arch_type & QEMU_ARCH_S390X) {
3166         object_class_property_add_str(oc, "loadparm",
3167                                       scsi_property_get_loadparm,
3168                                       scsi_property_set_loadparm);
3169         object_class_property_set_description(oc, "loadparm",
3170                                               "load parameter (s390x only)");
3171     }
3172 }
3173 
3174 static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
3175 {
3176     DeviceClass *dc = DEVICE_CLASS(klass);
3177     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3178 
3179     dc->fw_name = "disk";
3180     device_class_set_legacy_reset(dc, scsi_disk_reset);
3181     sdc->dma_readv = scsi_dma_readv;
3182     sdc->dma_writev = scsi_dma_writev;
3183     sdc->need_fua_emulation = scsi_is_cmd_fua;
3184 }
3185 
3186 static const TypeInfo scsi_disk_base_info = {
3187     .name          = TYPE_SCSI_DISK_BASE,
3188     .parent        = TYPE_SCSI_DEVICE,
3189     .class_init    = scsi_disk_base_class_initfn,
3190     .instance_size = sizeof(SCSIDiskState),
3191     .class_size    = sizeof(SCSIDiskClass),
3192     .abstract      = true,
3193 };
3194 
3195 #define DEFINE_SCSI_DISK_PROPERTIES()                                   \
3196     DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk),  \
3197     DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf),             \
3198     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),            \
3199     DEFINE_PROP_STRING("ver", SCSIDiskState, version),                  \
3200     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
3201     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
3202     DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
3203     DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id),          \
3204     DEFINE_PROP_BOOL("migrate-emulated-scsi-request", SCSIDiskState, migrate_emulated_scsi_request, true)
3205 
3206 
3207 static const Property scsi_hd_properties[] = {
3208     DEFINE_SCSI_DISK_PROPERTIES(),
3209     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
3210                     SCSI_DISK_F_REMOVABLE, false),
3211     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
3212                     SCSI_DISK_F_DPOFUA, false),
3213     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3214     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3215     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3216     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3217                        DEFAULT_MAX_UNMAP_SIZE),
3218     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3219                        DEFAULT_MAX_IO_SIZE),
3220     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3221     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3222                       5),
3223     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3224                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3225                     0),
3226     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
3227 };
3228 
3229 static const VMStateDescription vmstate_scsi_disk_state = {
3230     .name = "scsi-disk",
3231     .version_id = 1,
3232     .minimum_version_id = 1,
3233     .fields = (const VMStateField[]) {
3234         VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState),
3235         VMSTATE_BOOL(media_changed, SCSIDiskState),
3236         VMSTATE_BOOL(media_event, SCSIDiskState),
3237         VMSTATE_BOOL(eject_request, SCSIDiskState),
3238         VMSTATE_BOOL(tray_open, SCSIDiskState),
3239         VMSTATE_BOOL(tray_locked, SCSIDiskState),
3240         VMSTATE_END_OF_LIST()
3241     }
3242 };
3243 
3244 static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
3245 {
3246     DeviceClass *dc = DEVICE_CLASS(klass);
3247     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3248 
3249     sc->realize      = scsi_hd_realize;
3250     sc->unrealize    = scsi_unrealize;
3251     sc->alloc_req    = scsi_new_request;
3252     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3253     dc->desc = "virtual SCSI disk";
3254     device_class_set_props(dc, scsi_hd_properties);
3255     dc->vmsd  = &vmstate_scsi_disk_state;
3256 
3257     scsi_property_add_specifics(dc);
3258 }
3259 
3260 static const TypeInfo scsi_hd_info = {
3261     .name          = "scsi-hd",
3262     .parent        = TYPE_SCSI_DISK_BASE,
3263     .class_init    = scsi_hd_class_initfn,
3264 };
3265 
3266 static const Property scsi_cd_properties[] = {
3267     DEFINE_SCSI_DISK_PROPERTIES(),
3268     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
3269     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
3270     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
3271     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3272                        DEFAULT_MAX_IO_SIZE),
3273     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3274                       5),
3275     DEFINE_PROP_BIT("quirk_mode_page_apple_vendor", SCSIDiskState, quirks,
3276                     SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR, 0),
3277     DEFINE_PROP_BIT("quirk_mode_sense_rom_use_dbd", SCSIDiskState, quirks,
3278                     SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD, 0),
3279     DEFINE_PROP_BIT("quirk_mode_page_vendor_specific_apple", SCSIDiskState,
3280                     quirks, SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE,
3281                     0),
3282     DEFINE_PROP_BIT("quirk_mode_page_truncated", SCSIDiskState, quirks,
3283                     SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED, 0),
3284 };
3285 
3286 static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
3287 {
3288     DeviceClass *dc = DEVICE_CLASS(klass);
3289     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3290 
3291     sc->realize      = scsi_cd_realize;
3292     sc->alloc_req    = scsi_new_request;
3293     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
3294     dc->desc = "virtual SCSI CD-ROM";
3295     device_class_set_props(dc, scsi_cd_properties);
3296     dc->vmsd  = &vmstate_scsi_disk_state;
3297 
3298     scsi_property_add_specifics(dc);
3299 }
3300 
3301 static const TypeInfo scsi_cd_info = {
3302     .name          = "scsi-cd",
3303     .parent        = TYPE_SCSI_DISK_BASE,
3304     .class_init    = scsi_cd_class_initfn,
3305 };
3306 
3307 #ifdef __linux__
3308 static const Property scsi_block_properties[] = {
3309     DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf),
3310     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
3311     DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false),
3312     DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0),
3313     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
3314                        DEFAULT_MAX_UNMAP_SIZE),
3315     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
3316                        DEFAULT_MAX_IO_SIZE),
3317     DEFINE_PROP_INT32("scsi_version", SCSIDiskState, qdev.default_scsi_version,
3318                       -1),
3319     DEFINE_PROP_UINT32("io_timeout", SCSIDiskState, qdev.io_timeout,
3320                        DEFAULT_IO_TIMEOUT),
3321 };
3322 
3323 static void scsi_block_class_initfn(ObjectClass *klass, void *data)
3324 {
3325     DeviceClass *dc = DEVICE_CLASS(klass);
3326     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
3327     SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
3328 
3329     sc->realize      = scsi_block_realize;
3330     sc->alloc_req    = scsi_block_new_request;
3331     sc->parse_cdb    = scsi_block_parse_cdb;
3332     sdc->dma_readv   = scsi_block_dma_readv;
3333     sdc->dma_writev  = scsi_block_dma_writev;
3334     sdc->update_sense = scsi_block_update_sense;
3335     sdc->need_fua_emulation = scsi_block_no_fua;
3336     dc->desc = "SCSI block device passthrough";
3337     device_class_set_props(dc, scsi_block_properties);
3338     dc->vmsd  = &vmstate_scsi_disk_state;
3339 }
3340 
3341 static const TypeInfo scsi_block_info = {
3342     .name          = "scsi-block",
3343     .parent        = TYPE_SCSI_DISK_BASE,
3344     .class_init    = scsi_block_class_initfn,
3345 };
3346 #endif
3347 
3348 static void scsi_disk_register_types(void)
3349 {
3350     type_register_static(&scsi_disk_base_info);
3351     type_register_static(&scsi_hd_info);
3352     type_register_static(&scsi_cd_info);
3353 #ifdef __linux__
3354     type_register_static(&scsi_block_info);
3355 #endif
3356 }
3357 
3358 type_init(scsi_disk_register_types)
3359