xref: /openbmc/qemu/hw/scsi/scsi-disk.c (revision b45c03f5)
1 /*
2  * SCSI Device emulation
3  *
4  * Copyright (c) 2006 CodeSourcery.
5  * Based on code by Fabrice Bellard
6  *
7  * Written by Paul Brook
8  * Modifications:
9  *  2009-Dec-12 Artyom Tarasenko : implemented stamdard inquiry for the case
10  *                                 when the allocation length of CDB is smaller
11  *                                 than 36.
12  *  2009-Oct-13 Artyom Tarasenko : implemented the block descriptor in the
13  *                                 MODE SENSE response.
14  *
15  * This code is licensed under the LGPL.
16  *
17  * Note that this file only handles the SCSI architecture model and device
18  * commands.  Emulation of interface/link layer protocols is handled by
19  * the host adapter emulator.
20  */
21 
22 //#define DEBUG_SCSI
23 
24 #ifdef DEBUG_SCSI
25 #define DPRINTF(fmt, ...) \
26 do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
27 #else
28 #define DPRINTF(fmt, ...) do {} while(0)
29 #endif
30 
31 #include "qemu-common.h"
32 #include "qemu/error-report.h"
33 #include "hw/scsi/scsi.h"
34 #include "block/scsi.h"
35 #include "sysemu/sysemu.h"
36 #include "sysemu/block-backend.h"
37 #include "sysemu/blockdev.h"
38 #include "hw/block/block.h"
39 #include "sysemu/dma.h"
40 
41 #ifdef __linux
42 #include <scsi/sg.h>
43 #endif
44 
45 #define SCSI_WRITE_SAME_MAX         524288
46 #define SCSI_DMA_BUF_SIZE           131072
47 #define SCSI_MAX_INQUIRY_LEN        256
48 #define SCSI_MAX_MODE_LEN           256
49 
50 #define DEFAULT_DISCARD_GRANULARITY 4096
51 #define DEFAULT_MAX_UNMAP_SIZE      (1 << 30)   /* 1 GB */
52 #define DEFAULT_MAX_IO_SIZE         INT_MAX     /* 2 GB - 1 block */
53 
54 typedef struct SCSIDiskState SCSIDiskState;
55 
56 typedef struct SCSIDiskReq {
57     SCSIRequest req;
58     /* Both sector and sector_count are in terms of qemu 512 byte blocks.  */
59     uint64_t sector;
60     uint32_t sector_count;
61     uint32_t buflen;
62     bool started;
63     struct iovec iov;
64     QEMUIOVector qiov;
65     BlockAcctCookie acct;
66 } SCSIDiskReq;
67 
68 #define SCSI_DISK_F_REMOVABLE             0
69 #define SCSI_DISK_F_DPOFUA                1
70 #define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
71 
72 struct SCSIDiskState
73 {
74     SCSIDevice qdev;
75     uint32_t features;
76     bool media_changed;
77     bool media_event;
78     bool eject_request;
79     uint64_t wwn;
80     uint64_t port_wwn;
81     uint16_t port_index;
82     uint64_t max_unmap_size;
83     uint64_t max_io_size;
84     QEMUBH *bh;
85     char *version;
86     char *serial;
87     char *vendor;
88     char *product;
89     bool tray_open;
90     bool tray_locked;
91 };
92 
93 static int scsi_handle_rw_error(SCSIDiskReq *r, int error);
94 
95 static void scsi_free_request(SCSIRequest *req)
96 {
97     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
98 
99     qemu_vfree(r->iov.iov_base);
100 }
101 
102 /* Helper function for command completion with sense.  */
103 static void scsi_check_condition(SCSIDiskReq *r, SCSISense sense)
104 {
105     DPRINTF("Command complete tag=0x%x sense=%d/%d/%d\n",
106             r->req.tag, sense.key, sense.asc, sense.ascq);
107     scsi_req_build_sense(&r->req, sense);
108     scsi_req_complete(&r->req, CHECK_CONDITION);
109 }
110 
111 static uint32_t scsi_init_iovec(SCSIDiskReq *r, size_t size)
112 {
113     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
114 
115     if (!r->iov.iov_base) {
116         r->buflen = size;
117         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
118     }
119     r->iov.iov_len = MIN(r->sector_count * 512, r->buflen);
120     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
121     return r->qiov.size / 512;
122 }
123 
124 static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req)
125 {
126     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
127 
128     qemu_put_be64s(f, &r->sector);
129     qemu_put_be32s(f, &r->sector_count);
130     qemu_put_be32s(f, &r->buflen);
131     if (r->buflen) {
132         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
133             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
134         } else if (!req->retry) {
135             uint32_t len = r->iov.iov_len;
136             qemu_put_be32s(f, &len);
137             qemu_put_buffer(f, r->iov.iov_base, r->iov.iov_len);
138         }
139     }
140 }
141 
142 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
143 {
144     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
145 
146     qemu_get_be64s(f, &r->sector);
147     qemu_get_be32s(f, &r->sector_count);
148     qemu_get_be32s(f, &r->buflen);
149     if (r->buflen) {
150         scsi_init_iovec(r, r->buflen);
151         if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
152             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
153         } else if (!r->req.retry) {
154             uint32_t len;
155             qemu_get_be32s(f, &len);
156             r->iov.iov_len = len;
157             assert(r->iov.iov_len <= r->buflen);
158             qemu_get_buffer(f, r->iov.iov_base, r->iov.iov_len);
159         }
160     }
161 
162     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
163 }
164 
165 static void scsi_aio_complete(void *opaque, int ret)
166 {
167     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
168     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
169 
170     assert(r->req.aiocb != NULL);
171     r->req.aiocb = NULL;
172     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
173     if (r->req.io_canceled) {
174         scsi_req_cancel_complete(&r->req);
175         goto done;
176     }
177 
178     if (ret < 0) {
179         if (scsi_handle_rw_error(r, -ret)) {
180             goto done;
181         }
182     }
183 
184     scsi_req_complete(&r->req, GOOD);
185 
186 done:
187     scsi_req_unref(&r->req);
188 }
189 
190 static bool scsi_is_cmd_fua(SCSICommand *cmd)
191 {
192     switch (cmd->buf[0]) {
193     case READ_10:
194     case READ_12:
195     case READ_16:
196     case WRITE_10:
197     case WRITE_12:
198     case WRITE_16:
199         return (cmd->buf[1] & 8) != 0;
200 
201     case VERIFY_10:
202     case VERIFY_12:
203     case VERIFY_16:
204     case WRITE_VERIFY_10:
205     case WRITE_VERIFY_12:
206     case WRITE_VERIFY_16:
207         return true;
208 
209     case READ_6:
210     case WRITE_6:
211     default:
212         return false;
213     }
214 }
215 
216 static void scsi_write_do_fua(SCSIDiskReq *r)
217 {
218     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
219 
220     assert(r->req.aiocb == NULL);
221 
222     if (r->req.io_canceled) {
223         scsi_req_cancel_complete(&r->req);
224         goto done;
225     }
226 
227     if (scsi_is_cmd_fua(&r->req.cmd)) {
228         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
229                          BLOCK_ACCT_FLUSH);
230         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
231         return;
232     }
233 
234     scsi_req_complete(&r->req, GOOD);
235 
236 done:
237     scsi_req_unref(&r->req);
238 }
239 
240 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
241 {
242     assert(r->req.aiocb == NULL);
243 
244     if (r->req.io_canceled) {
245         scsi_req_cancel_complete(&r->req);
246         goto done;
247     }
248 
249     if (ret < 0) {
250         if (scsi_handle_rw_error(r, -ret)) {
251             goto done;
252         }
253     }
254 
255     r->sector += r->sector_count;
256     r->sector_count = 0;
257     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
258         scsi_write_do_fua(r);
259         return;
260     } else {
261         scsi_req_complete(&r->req, GOOD);
262     }
263 
264 done:
265     scsi_req_unref(&r->req);
266 }
267 
268 static void scsi_dma_complete(void *opaque, int ret)
269 {
270     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
271     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
272 
273     assert(r->req.aiocb != NULL);
274     r->req.aiocb = NULL;
275 
276     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
277     scsi_dma_complete_noio(r, ret);
278 }
279 
280 static void scsi_read_complete(void * opaque, int ret)
281 {
282     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
283     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
284     int n;
285 
286     assert(r->req.aiocb != NULL);
287     r->req.aiocb = NULL;
288     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
289     if (r->req.io_canceled) {
290         scsi_req_cancel_complete(&r->req);
291         goto done;
292     }
293 
294     if (ret < 0) {
295         if (scsi_handle_rw_error(r, -ret)) {
296             goto done;
297         }
298     }
299 
300     DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size);
301 
302     n = r->qiov.size / 512;
303     r->sector += n;
304     r->sector_count -= n;
305     scsi_req_data(&r->req, r->qiov.size);
306 
307 done:
308     scsi_req_unref(&r->req);
309 }
310 
311 /* Actually issue a read to the block device.  */
312 static void scsi_do_read(SCSIDiskReq *r, int ret)
313 {
314     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
315     uint32_t n;
316 
317     assert (r->req.aiocb == NULL);
318 
319     if (r->req.io_canceled) {
320         scsi_req_cancel_complete(&r->req);
321         goto done;
322     }
323 
324     if (ret < 0) {
325         if (scsi_handle_rw_error(r, -ret)) {
326             goto done;
327         }
328     }
329 
330     /* The request is used as the AIO opaque value, so add a ref.  */
331     scsi_req_ref(&r->req);
332 
333     if (r->req.sg) {
334         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
335         r->req.resid -= r->req.sg->size;
336         r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, r->sector,
337                                     scsi_dma_complete, r);
338     } else {
339         n = scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
340         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
341                          n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
342         r->req.aiocb = blk_aio_readv(s->qdev.conf.blk, r->sector, &r->qiov, n,
343                                      scsi_read_complete, r);
344     }
345 
346 done:
347     scsi_req_unref(&r->req);
348 }
349 
350 static void scsi_do_read_cb(void *opaque, int ret)
351 {
352     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
353     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
354 
355     assert (r->req.aiocb != NULL);
356     r->req.aiocb = NULL;
357 
358     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
359     scsi_do_read(opaque, ret);
360 }
361 
362 /* Read more data from scsi device into buffer.  */
363 static void scsi_read_data(SCSIRequest *req)
364 {
365     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
366     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
367     bool first;
368 
369     DPRINTF("Read sector_count=%d\n", r->sector_count);
370     if (r->sector_count == 0) {
371         /* This also clears the sense buffer for REQUEST SENSE.  */
372         scsi_req_complete(&r->req, GOOD);
373         return;
374     }
375 
376     /* No data transfer may already be in progress */
377     assert(r->req.aiocb == NULL);
378 
379     /* The request is used as the AIO opaque value, so add a ref.  */
380     scsi_req_ref(&r->req);
381     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
382         DPRINTF("Data transfer direction invalid\n");
383         scsi_read_complete(r, -EINVAL);
384         return;
385     }
386 
387     if (s->tray_open) {
388         scsi_read_complete(r, -ENOMEDIUM);
389         return;
390     }
391 
392     first = !r->started;
393     r->started = true;
394     if (first && scsi_is_cmd_fua(&r->req.cmd)) {
395         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
396                          BLOCK_ACCT_FLUSH);
397         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
398     } else {
399         scsi_do_read(r, 0);
400     }
401 }
402 
403 /*
404  * scsi_handle_rw_error has two return values.  0 means that the error
405  * must be ignored, 1 means that the error has been processed and the
406  * caller should not do anything else for this request.  Note that
407  * scsi_handle_rw_error always manages its reference counts, independent
408  * of the return value.
409  */
410 static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
411 {
412     bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV);
413     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
414     BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk,
415                                                    is_read, error);
416 
417     if (action == BLOCK_ERROR_ACTION_REPORT) {
418         switch (error) {
419         case ENOMEDIUM:
420             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
421             break;
422         case ENOMEM:
423             scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE));
424             break;
425         case EINVAL:
426             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
427             break;
428         case ENOSPC:
429             scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED));
430             break;
431         default:
432             scsi_check_condition(r, SENSE_CODE(IO_ERROR));
433             break;
434         }
435     }
436     blk_error_action(s->qdev.conf.blk, action, is_read, error);
437     if (action == BLOCK_ERROR_ACTION_STOP) {
438         scsi_req_retry(&r->req);
439     }
440     return action != BLOCK_ERROR_ACTION_IGNORE;
441 }
442 
443 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
444 {
445     uint32_t n;
446 
447     assert (r->req.aiocb == NULL);
448 
449     if (r->req.io_canceled) {
450         scsi_req_cancel_complete(&r->req);
451         goto done;
452     }
453 
454     if (ret < 0) {
455         if (scsi_handle_rw_error(r, -ret)) {
456             goto done;
457         }
458     }
459 
460     n = r->qiov.size / 512;
461     r->sector += n;
462     r->sector_count -= n;
463     if (r->sector_count == 0) {
464         scsi_write_do_fua(r);
465         return;
466     } else {
467         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
468         DPRINTF("Write complete tag=0x%x more=%zd\n", r->req.tag, r->qiov.size);
469         scsi_req_data(&r->req, r->qiov.size);
470     }
471 
472 done:
473     scsi_req_unref(&r->req);
474 }
475 
476 static void scsi_write_complete(void * opaque, int ret)
477 {
478     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
479     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
480 
481     assert (r->req.aiocb != NULL);
482     r->req.aiocb = NULL;
483 
484     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
485     scsi_write_complete_noio(r, ret);
486 }
487 
488 static void scsi_write_data(SCSIRequest *req)
489 {
490     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
491     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
492     uint32_t n;
493 
494     /* No data transfer may already be in progress */
495     assert(r->req.aiocb == NULL);
496 
497     /* The request is used as the AIO opaque value, so add a ref.  */
498     scsi_req_ref(&r->req);
499     if (r->req.cmd.mode != SCSI_XFER_TO_DEV) {
500         DPRINTF("Data transfer direction invalid\n");
501         scsi_write_complete_noio(r, -EINVAL);
502         return;
503     }
504 
505     if (!r->req.sg && !r->qiov.size) {
506         /* Called for the first time.  Ask the driver to send us more data.  */
507         r->started = true;
508         scsi_write_complete_noio(r, 0);
509         return;
510     }
511     if (s->tray_open) {
512         scsi_write_complete_noio(r, -ENOMEDIUM);
513         return;
514     }
515 
516     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
517         r->req.cmd.buf[0] == VERIFY_16) {
518         if (r->req.sg) {
519             scsi_dma_complete_noio(r, 0);
520         } else {
521             scsi_write_complete_noio(r, 0);
522         }
523         return;
524     }
525 
526     if (r->req.sg) {
527         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
528         r->req.resid -= r->req.sg->size;
529         r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector,
530                                      scsi_dma_complete, r);
531     } else {
532         n = r->qiov.size / 512;
533         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
534                          n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE);
535         r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, r->sector, &r->qiov, n,
536                                       scsi_write_complete, r);
537     }
538 }
539 
540 /* Return a pointer to the data buffer.  */
541 static uint8_t *scsi_get_buf(SCSIRequest *req)
542 {
543     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
544 
545     return (uint8_t *)r->iov.iov_base;
546 }
547 
548 static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
549 {
550     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
551     int buflen = 0;
552     int start;
553 
554     if (req->cmd.buf[1] & 0x1) {
555         /* Vital product data */
556         uint8_t page_code = req->cmd.buf[2];
557 
558         outbuf[buflen++] = s->qdev.type & 0x1f;
559         outbuf[buflen++] = page_code ; // this page
560         outbuf[buflen++] = 0x00;
561         outbuf[buflen++] = 0x00;
562         start = buflen;
563 
564         switch (page_code) {
565         case 0x00: /* Supported page codes, mandatory */
566         {
567             DPRINTF("Inquiry EVPD[Supported pages] "
568                     "buffer size %zd\n", req->cmd.xfer);
569             outbuf[buflen++] = 0x00; // list of supported pages (this page)
570             if (s->serial) {
571                 outbuf[buflen++] = 0x80; // unit serial number
572             }
573             outbuf[buflen++] = 0x83; // device identification
574             if (s->qdev.type == TYPE_DISK) {
575                 outbuf[buflen++] = 0xb0; // block limits
576                 outbuf[buflen++] = 0xb2; // thin provisioning
577             }
578             break;
579         }
580         case 0x80: /* Device serial number, optional */
581         {
582             int l;
583 
584             if (!s->serial) {
585                 DPRINTF("Inquiry (EVPD[Serial number] not supported\n");
586                 return -1;
587             }
588 
589             l = strlen(s->serial);
590             if (l > 20) {
591                 l = 20;
592             }
593 
594             DPRINTF("Inquiry EVPD[Serial number] "
595                     "buffer size %zd\n", req->cmd.xfer);
596             memcpy(outbuf+buflen, s->serial, l);
597             buflen += l;
598             break;
599         }
600 
601         case 0x83: /* Device identification page, mandatory */
602         {
603             const char *str = s->serial ?: blk_name(s->qdev.conf.blk);
604             int max_len = s->serial ? 20 : 255 - 8;
605             int id_len = strlen(str);
606 
607             if (id_len > max_len) {
608                 id_len = max_len;
609             }
610             DPRINTF("Inquiry EVPD[Device identification] "
611                     "buffer size %zd\n", req->cmd.xfer);
612 
613             outbuf[buflen++] = 0x2; // ASCII
614             outbuf[buflen++] = 0;   // not officially assigned
615             outbuf[buflen++] = 0;   // reserved
616             outbuf[buflen++] = id_len; // length of data following
617             memcpy(outbuf+buflen, str, id_len);
618             buflen += id_len;
619 
620             if (s->wwn) {
621                 outbuf[buflen++] = 0x1; // Binary
622                 outbuf[buflen++] = 0x3; // NAA
623                 outbuf[buflen++] = 0;   // reserved
624                 outbuf[buflen++] = 8;
625                 stq_be_p(&outbuf[buflen], s->wwn);
626                 buflen += 8;
627             }
628 
629             if (s->port_wwn) {
630                 outbuf[buflen++] = 0x61; // SAS / Binary
631                 outbuf[buflen++] = 0x93; // PIV / Target port / NAA
632                 outbuf[buflen++] = 0;    // reserved
633                 outbuf[buflen++] = 8;
634                 stq_be_p(&outbuf[buflen], s->port_wwn);
635                 buflen += 8;
636             }
637 
638             if (s->port_index) {
639                 outbuf[buflen++] = 0x61; // SAS / Binary
640                 outbuf[buflen++] = 0x94; // PIV / Target port / relative target port
641                 outbuf[buflen++] = 0;    // reserved
642                 outbuf[buflen++] = 4;
643                 stw_be_p(&outbuf[buflen + 2], s->port_index);
644                 buflen += 4;
645             }
646             break;
647         }
648         case 0xb0: /* block limits */
649         {
650             unsigned int unmap_sectors =
651                     s->qdev.conf.discard_granularity / s->qdev.blocksize;
652             unsigned int min_io_size =
653                     s->qdev.conf.min_io_size / s->qdev.blocksize;
654             unsigned int opt_io_size =
655                     s->qdev.conf.opt_io_size / s->qdev.blocksize;
656             unsigned int max_unmap_sectors =
657                     s->max_unmap_size / s->qdev.blocksize;
658             unsigned int max_io_sectors =
659                     s->max_io_size / s->qdev.blocksize;
660 
661             if (s->qdev.type == TYPE_ROM) {
662                 DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
663                         page_code);
664                 return -1;
665             }
666             /* required VPD size with unmap support */
667             buflen = 0x40;
668             memset(outbuf + 4, 0, buflen - 4);
669 
670             outbuf[4] = 0x1; /* wsnz */
671 
672             /* optimal transfer length granularity */
673             outbuf[6] = (min_io_size >> 8) & 0xff;
674             outbuf[7] = min_io_size & 0xff;
675 
676             /* maximum transfer length */
677             outbuf[8] = (max_io_sectors >> 24) & 0xff;
678             outbuf[9] = (max_io_sectors >> 16) & 0xff;
679             outbuf[10] = (max_io_sectors >> 8) & 0xff;
680             outbuf[11] = max_io_sectors & 0xff;
681 
682             /* optimal transfer length */
683             outbuf[12] = (opt_io_size >> 24) & 0xff;
684             outbuf[13] = (opt_io_size >> 16) & 0xff;
685             outbuf[14] = (opt_io_size >> 8) & 0xff;
686             outbuf[15] = opt_io_size & 0xff;
687 
688             /* max unmap LBA count, default is 1GB */
689             outbuf[20] = (max_unmap_sectors >> 24) & 0xff;
690             outbuf[21] = (max_unmap_sectors >> 16) & 0xff;
691             outbuf[22] = (max_unmap_sectors >> 8) & 0xff;
692             outbuf[23] = max_unmap_sectors & 0xff;
693 
694             /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header.  */
695             outbuf[24] = 0;
696             outbuf[25] = 0;
697             outbuf[26] = 0;
698             outbuf[27] = 255;
699 
700             /* optimal unmap granularity */
701             outbuf[28] = (unmap_sectors >> 24) & 0xff;
702             outbuf[29] = (unmap_sectors >> 16) & 0xff;
703             outbuf[30] = (unmap_sectors >> 8) & 0xff;
704             outbuf[31] = unmap_sectors & 0xff;
705 
706             /* max write same size */
707             outbuf[36] = 0;
708             outbuf[37] = 0;
709             outbuf[38] = 0;
710             outbuf[39] = 0;
711 
712             outbuf[40] = (max_io_sectors >> 24) & 0xff;
713             outbuf[41] = (max_io_sectors >> 16) & 0xff;
714             outbuf[42] = (max_io_sectors >> 8) & 0xff;
715             outbuf[43] = max_io_sectors & 0xff;
716             break;
717         }
718         case 0xb2: /* thin provisioning */
719         {
720             buflen = 8;
721             outbuf[4] = 0;
722             outbuf[5] = 0xe0; /* unmap & write_same 10/16 all supported */
723             outbuf[6] = s->qdev.conf.discard_granularity ? 2 : 1;
724             outbuf[7] = 0;
725             break;
726         }
727         default:
728             return -1;
729         }
730         /* done with EVPD */
731         assert(buflen - start <= 255);
732         outbuf[start - 1] = buflen - start;
733         return buflen;
734     }
735 
736     /* Standard INQUIRY data */
737     if (req->cmd.buf[2] != 0) {
738         return -1;
739     }
740 
741     /* PAGE CODE == 0 */
742     buflen = req->cmd.xfer;
743     if (buflen > SCSI_MAX_INQUIRY_LEN) {
744         buflen = SCSI_MAX_INQUIRY_LEN;
745     }
746 
747     outbuf[0] = s->qdev.type & 0x1f;
748     outbuf[1] = (s->features & (1 << SCSI_DISK_F_REMOVABLE)) ? 0x80 : 0;
749 
750     strpadcpy((char *) &outbuf[16], 16, s->product, ' ');
751     strpadcpy((char *) &outbuf[8], 8, s->vendor, ' ');
752 
753     memset(&outbuf[32], 0, 4);
754     memcpy(&outbuf[32], s->version, MIN(4, strlen(s->version)));
755     /*
756      * We claim conformance to SPC-3, which is required for guests
757      * to ask for modern features like READ CAPACITY(16) or the
758      * block characteristics VPD page by default.  Not all of SPC-3
759      * is actually implemented, but we're good enough.
760      */
761     outbuf[2] = 5;
762     outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
763 
764     if (buflen > 36) {
765         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
766     } else {
767         /* If the allocation length of CDB is too small,
768                the additional length is not adjusted */
769         outbuf[4] = 36 - 5;
770     }
771 
772     /* Sync data transfer and TCQ.  */
773     outbuf[7] = 0x10 | (req->bus->info->tcq ? 0x02 : 0);
774     return buflen;
775 }
776 
777 static inline bool media_is_dvd(SCSIDiskState *s)
778 {
779     uint64_t nb_sectors;
780     if (s->qdev.type != TYPE_ROM) {
781         return false;
782     }
783     if (!blk_is_inserted(s->qdev.conf.blk)) {
784         return false;
785     }
786     if (s->tray_open) {
787         return false;
788     }
789     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
790     return nb_sectors > CD_MAX_SECTORS;
791 }
792 
793 static inline bool media_is_cd(SCSIDiskState *s)
794 {
795     uint64_t nb_sectors;
796     if (s->qdev.type != TYPE_ROM) {
797         return false;
798     }
799     if (!blk_is_inserted(s->qdev.conf.blk)) {
800         return false;
801     }
802     if (s->tray_open) {
803         return false;
804     }
805     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
806     return nb_sectors <= CD_MAX_SECTORS;
807 }
808 
809 static int scsi_read_disc_information(SCSIDiskState *s, SCSIDiskReq *r,
810                                       uint8_t *outbuf)
811 {
812     uint8_t type = r->req.cmd.buf[1] & 7;
813 
814     if (s->qdev.type != TYPE_ROM) {
815         return -1;
816     }
817 
818     /* Types 1/2 are only defined for Blu-Ray.  */
819     if (type != 0) {
820         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
821         return -1;
822     }
823 
824     memset(outbuf, 0, 34);
825     outbuf[1] = 32;
826     outbuf[2] = 0xe; /* last session complete, disc finalized */
827     outbuf[3] = 1;   /* first track on disc */
828     outbuf[4] = 1;   /* # of sessions */
829     outbuf[5] = 1;   /* first track of last session */
830     outbuf[6] = 1;   /* last track of last session */
831     outbuf[7] = 0x20; /* unrestricted use */
832     outbuf[8] = 0x00; /* CD-ROM or DVD-ROM */
833     /* 9-10-11: most significant byte corresponding bytes 4-5-6 */
834     /* 12-23: not meaningful for CD-ROM or DVD-ROM */
835     /* 24-31: disc bar code */
836     /* 32: disc application code */
837     /* 33: number of OPC tables */
838 
839     return 34;
840 }
841 
842 static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
843                                    uint8_t *outbuf)
844 {
845     static const int rds_caps_size[5] = {
846         [0] = 2048 + 4,
847         [1] = 4 + 4,
848         [3] = 188 + 4,
849         [4] = 2048 + 4,
850     };
851 
852     uint8_t media = r->req.cmd.buf[1];
853     uint8_t layer = r->req.cmd.buf[6];
854     uint8_t format = r->req.cmd.buf[7];
855     int size = -1;
856 
857     if (s->qdev.type != TYPE_ROM) {
858         return -1;
859     }
860     if (media != 0) {
861         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
862         return -1;
863     }
864 
865     if (format != 0xff) {
866         if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
867             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
868             return -1;
869         }
870         if (media_is_cd(s)) {
871             scsi_check_condition(r, SENSE_CODE(INCOMPATIBLE_FORMAT));
872             return -1;
873         }
874         if (format >= ARRAY_SIZE(rds_caps_size)) {
875             return -1;
876         }
877         size = rds_caps_size[format];
878         memset(outbuf, 0, size);
879     }
880 
881     switch (format) {
882     case 0x00: {
883         /* Physical format information */
884         uint64_t nb_sectors;
885         if (layer != 0) {
886             goto fail;
887         }
888         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
889 
890         outbuf[4] = 1;   /* DVD-ROM, part version 1 */
891         outbuf[5] = 0xf; /* 120mm disc, minimum rate unspecified */
892         outbuf[6] = 1;   /* one layer, read-only (per MMC-2 spec) */
893         outbuf[7] = 0;   /* default densities */
894 
895         stl_be_p(&outbuf[12], (nb_sectors >> 2) - 1); /* end sector */
896         stl_be_p(&outbuf[16], (nb_sectors >> 2) - 1); /* l0 end sector */
897         break;
898     }
899 
900     case 0x01: /* DVD copyright information, all zeros */
901         break;
902 
903     case 0x03: /* BCA information - invalid field for no BCA info */
904         return -1;
905 
906     case 0x04: /* DVD disc manufacturing information, all zeros */
907         break;
908 
909     case 0xff: { /* List capabilities */
910         int i;
911         size = 4;
912         for (i = 0; i < ARRAY_SIZE(rds_caps_size); i++) {
913             if (!rds_caps_size[i]) {
914                 continue;
915             }
916             outbuf[size] = i;
917             outbuf[size + 1] = 0x40; /* Not writable, readable */
918             stw_be_p(&outbuf[size + 2], rds_caps_size[i]);
919             size += 4;
920         }
921         break;
922      }
923 
924     default:
925         return -1;
926     }
927 
928     /* Size of buffer, not including 2 byte size field */
929     stw_be_p(outbuf, size - 2);
930     return size;
931 
932 fail:
933     return -1;
934 }
935 
936 static int scsi_event_status_media(SCSIDiskState *s, uint8_t *outbuf)
937 {
938     uint8_t event_code, media_status;
939 
940     media_status = 0;
941     if (s->tray_open) {
942         media_status = MS_TRAY_OPEN;
943     } else if (blk_is_inserted(s->qdev.conf.blk)) {
944         media_status = MS_MEDIA_PRESENT;
945     }
946 
947     /* Event notification descriptor */
948     event_code = MEC_NO_CHANGE;
949     if (media_status != MS_TRAY_OPEN) {
950         if (s->media_event) {
951             event_code = MEC_NEW_MEDIA;
952             s->media_event = false;
953         } else if (s->eject_request) {
954             event_code = MEC_EJECT_REQUESTED;
955             s->eject_request = false;
956         }
957     }
958 
959     outbuf[0] = event_code;
960     outbuf[1] = media_status;
961 
962     /* These fields are reserved, just clear them. */
963     outbuf[2] = 0;
964     outbuf[3] = 0;
965     return 4;
966 }
967 
968 static int scsi_get_event_status_notification(SCSIDiskState *s, SCSIDiskReq *r,
969                                               uint8_t *outbuf)
970 {
971     int size;
972     uint8_t *buf = r->req.cmd.buf;
973     uint8_t notification_class_request = buf[4];
974     if (s->qdev.type != TYPE_ROM) {
975         return -1;
976     }
977     if ((buf[1] & 1) == 0) {
978         /* asynchronous */
979         return -1;
980     }
981 
982     size = 4;
983     outbuf[0] = outbuf[1] = 0;
984     outbuf[3] = 1 << GESN_MEDIA; /* supported events */
985     if (notification_class_request & (1 << GESN_MEDIA)) {
986         outbuf[2] = GESN_MEDIA;
987         size += scsi_event_status_media(s, &outbuf[size]);
988     } else {
989         outbuf[2] = 0x80;
990     }
991     stw_be_p(outbuf, size - 4);
992     return size;
993 }
994 
995 static int scsi_get_configuration(SCSIDiskState *s, uint8_t *outbuf)
996 {
997     int current;
998 
999     if (s->qdev.type != TYPE_ROM) {
1000         return -1;
1001     }
1002 
1003     if (media_is_dvd(s)) {
1004         current = MMC_PROFILE_DVD_ROM;
1005     } else if (media_is_cd(s)) {
1006         current = MMC_PROFILE_CD_ROM;
1007     } else {
1008         current = MMC_PROFILE_NONE;
1009     }
1010 
1011     memset(outbuf, 0, 40);
1012     stl_be_p(&outbuf[0], 36); /* Bytes after the data length field */
1013     stw_be_p(&outbuf[6], current);
1014     /* outbuf[8] - outbuf[19]: Feature 0 - Profile list */
1015     outbuf[10] = 0x03; /* persistent, current */
1016     outbuf[11] = 8; /* two profiles */
1017     stw_be_p(&outbuf[12], MMC_PROFILE_DVD_ROM);
1018     outbuf[14] = (current == MMC_PROFILE_DVD_ROM);
1019     stw_be_p(&outbuf[16], MMC_PROFILE_CD_ROM);
1020     outbuf[18] = (current == MMC_PROFILE_CD_ROM);
1021     /* outbuf[20] - outbuf[31]: Feature 1 - Core feature */
1022     stw_be_p(&outbuf[20], 1);
1023     outbuf[22] = 0x08 | 0x03; /* version 2, persistent, current */
1024     outbuf[23] = 8;
1025     stl_be_p(&outbuf[24], 1); /* SCSI */
1026     outbuf[28] = 1; /* DBE = 1, mandatory */
1027     /* outbuf[32] - outbuf[39]: Feature 3 - Removable media feature */
1028     stw_be_p(&outbuf[32], 3);
1029     outbuf[34] = 0x08 | 0x03; /* version 2, persistent, current */
1030     outbuf[35] = 4;
1031     outbuf[36] = 0x39; /* tray, load=1, eject=1, unlocked at powerup, lock=1 */
1032     /* TODO: Random readable, CD read, DVD read, drive serial number,
1033        power management */
1034     return 40;
1035 }
1036 
1037 static int scsi_emulate_mechanism_status(SCSIDiskState *s, uint8_t *outbuf)
1038 {
1039     if (s->qdev.type != TYPE_ROM) {
1040         return -1;
1041     }
1042     memset(outbuf, 0, 8);
1043     outbuf[5] = 1; /* CD-ROM */
1044     return 8;
1045 }
1046 
1047 static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
1048                            int page_control)
1049 {
1050     static const int mode_sense_valid[0x3f] = {
1051         [MODE_PAGE_HD_GEOMETRY]            = (1 << TYPE_DISK),
1052         [MODE_PAGE_FLEXIBLE_DISK_GEOMETRY] = (1 << TYPE_DISK),
1053         [MODE_PAGE_CACHING]                = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1054         [MODE_PAGE_R_W_ERROR]              = (1 << TYPE_DISK) | (1 << TYPE_ROM),
1055         [MODE_PAGE_AUDIO_CTL]              = (1 << TYPE_ROM),
1056         [MODE_PAGE_CAPABILITIES]           = (1 << TYPE_ROM),
1057     };
1058 
1059     uint8_t *p = *p_outbuf + 2;
1060     int length;
1061 
1062     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
1063         return -1;
1064     }
1065 
1066     /*
1067      * If Changeable Values are requested, a mask denoting those mode parameters
1068      * that are changeable shall be returned. As we currently don't support
1069      * parameter changes via MODE_SELECT all bits are returned set to zero.
1070      * The buffer was already menset to zero by the caller of this function.
1071      *
1072      * The offsets here are off by two compared to the descriptions in the
1073      * SCSI specs, because those include a 2-byte header.  This is unfortunate,
1074      * but it is done so that offsets are consistent within our implementation
1075      * of MODE SENSE and MODE SELECT.  MODE SELECT has to deal with both
1076      * 2-byte and 4-byte headers.
1077      */
1078     switch (page) {
1079     case MODE_PAGE_HD_GEOMETRY:
1080         length = 0x16;
1081         if (page_control == 1) { /* Changeable Values */
1082             break;
1083         }
1084         /* if a geometry hint is available, use it */
1085         p[0] = (s->qdev.conf.cyls >> 16) & 0xff;
1086         p[1] = (s->qdev.conf.cyls >> 8) & 0xff;
1087         p[2] = s->qdev.conf.cyls & 0xff;
1088         p[3] = s->qdev.conf.heads & 0xff;
1089         /* Write precomp start cylinder, disabled */
1090         p[4] = (s->qdev.conf.cyls >> 16) & 0xff;
1091         p[5] = (s->qdev.conf.cyls >> 8) & 0xff;
1092         p[6] = s->qdev.conf.cyls & 0xff;
1093         /* Reduced current start cylinder, disabled */
1094         p[7] = (s->qdev.conf.cyls >> 16) & 0xff;
1095         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1096         p[9] = s->qdev.conf.cyls & 0xff;
1097         /* Device step rate [ns], 200ns */
1098         p[10] = 0;
1099         p[11] = 200;
1100         /* Landing zone cylinder */
1101         p[12] = 0xff;
1102         p[13] =  0xff;
1103         p[14] = 0xff;
1104         /* Medium rotation rate [rpm], 5400 rpm */
1105         p[18] = (5400 >> 8) & 0xff;
1106         p[19] = 5400 & 0xff;
1107         break;
1108 
1109     case MODE_PAGE_FLEXIBLE_DISK_GEOMETRY:
1110         length = 0x1e;
1111         if (page_control == 1) { /* Changeable Values */
1112             break;
1113         }
1114         /* Transfer rate [kbit/s], 5Mbit/s */
1115         p[0] = 5000 >> 8;
1116         p[1] = 5000 & 0xff;
1117         /* if a geometry hint is available, use it */
1118         p[2] = s->qdev.conf.heads & 0xff;
1119         p[3] = s->qdev.conf.secs & 0xff;
1120         p[4] = s->qdev.blocksize >> 8;
1121         p[6] = (s->qdev.conf.cyls >> 8) & 0xff;
1122         p[7] = s->qdev.conf.cyls & 0xff;
1123         /* Write precomp start cylinder, disabled */
1124         p[8] = (s->qdev.conf.cyls >> 8) & 0xff;
1125         p[9] = s->qdev.conf.cyls & 0xff;
1126         /* Reduced current start cylinder, disabled */
1127         p[10] = (s->qdev.conf.cyls >> 8) & 0xff;
1128         p[11] = s->qdev.conf.cyls & 0xff;
1129         /* Device step rate [100us], 100us */
1130         p[12] = 0;
1131         p[13] = 1;
1132         /* Device step pulse width [us], 1us */
1133         p[14] = 1;
1134         /* Device head settle delay [100us], 100us */
1135         p[15] = 0;
1136         p[16] = 1;
1137         /* Motor on delay [0.1s], 0.1s */
1138         p[17] = 1;
1139         /* Motor off delay [0.1s], 0.1s */
1140         p[18] = 1;
1141         /* Medium rotation rate [rpm], 5400 rpm */
1142         p[26] = (5400 >> 8) & 0xff;
1143         p[27] = 5400 & 0xff;
1144         break;
1145 
1146     case MODE_PAGE_CACHING:
1147         length = 0x12;
1148         if (page_control == 1 || /* Changeable Values */
1149             blk_enable_write_cache(s->qdev.conf.blk)) {
1150             p[0] = 4; /* WCE */
1151         }
1152         break;
1153 
1154     case MODE_PAGE_R_W_ERROR:
1155         length = 10;
1156         if (page_control == 1) { /* Changeable Values */
1157             break;
1158         }
1159         p[0] = 0x80; /* Automatic Write Reallocation Enabled */
1160         if (s->qdev.type == TYPE_ROM) {
1161             p[1] = 0x20; /* Read Retry Count */
1162         }
1163         break;
1164 
1165     case MODE_PAGE_AUDIO_CTL:
1166         length = 14;
1167         break;
1168 
1169     case MODE_PAGE_CAPABILITIES:
1170         length = 0x14;
1171         if (page_control == 1) { /* Changeable Values */
1172             break;
1173         }
1174 
1175         p[0] = 0x3b; /* CD-R & CD-RW read */
1176         p[1] = 0; /* Writing not supported */
1177         p[2] = 0x7f; /* Audio, composite, digital out,
1178                         mode 2 form 1&2, multi session */
1179         p[3] = 0xff; /* CD DA, DA accurate, RW supported,
1180                         RW corrected, C2 errors, ISRC,
1181                         UPC, Bar code */
1182         p[4] = 0x2d | (s->tray_locked ? 2 : 0);
1183         /* Locking supported, jumper present, eject, tray */
1184         p[5] = 0; /* no volume & mute control, no
1185                      changer */
1186         p[6] = (50 * 176) >> 8; /* 50x read speed */
1187         p[7] = (50 * 176) & 0xff;
1188         p[8] = 2 >> 8; /* Two volume levels */
1189         p[9] = 2 & 0xff;
1190         p[10] = 2048 >> 8; /* 2M buffer */
1191         p[11] = 2048 & 0xff;
1192         p[12] = (16 * 176) >> 8; /* 16x read speed current */
1193         p[13] = (16 * 176) & 0xff;
1194         p[16] = (16 * 176) >> 8; /* 16x write speed */
1195         p[17] = (16 * 176) & 0xff;
1196         p[18] = (16 * 176) >> 8; /* 16x write speed current */
1197         p[19] = (16 * 176) & 0xff;
1198         break;
1199 
1200     default:
1201         return -1;
1202     }
1203 
1204     assert(length < 256);
1205     (*p_outbuf)[0] = page;
1206     (*p_outbuf)[1] = length;
1207     *p_outbuf += length + 2;
1208     return length + 2;
1209 }
1210 
1211 static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
1212 {
1213     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1214     uint64_t nb_sectors;
1215     bool dbd;
1216     int page, buflen, ret, page_control;
1217     uint8_t *p;
1218     uint8_t dev_specific_param;
1219 
1220     dbd = (r->req.cmd.buf[1] & 0x8) != 0;
1221     page = r->req.cmd.buf[2] & 0x3f;
1222     page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
1223     DPRINTF("Mode Sense(%d) (page %d, xfer %zd, page_control %d)\n",
1224         (r->req.cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, r->req.cmd.xfer, page_control);
1225     memset(outbuf, 0, r->req.cmd.xfer);
1226     p = outbuf;
1227 
1228     if (s->qdev.type == TYPE_DISK) {
1229         dev_specific_param = s->features & (1 << SCSI_DISK_F_DPOFUA) ? 0x10 : 0;
1230         if (blk_is_read_only(s->qdev.conf.blk)) {
1231             dev_specific_param |= 0x80; /* Readonly.  */
1232         }
1233     } else {
1234         /* MMC prescribes that CD/DVD drives have no block descriptors,
1235          * and defines no device-specific parameter.  */
1236         dev_specific_param = 0x00;
1237         dbd = true;
1238     }
1239 
1240     if (r->req.cmd.buf[0] == MODE_SENSE) {
1241         p[1] = 0; /* Default media type.  */
1242         p[2] = dev_specific_param;
1243         p[3] = 0; /* Block descriptor length.  */
1244         p += 4;
1245     } else { /* MODE_SENSE_10 */
1246         p[2] = 0; /* Default media type.  */
1247         p[3] = dev_specific_param;
1248         p[6] = p[7] = 0; /* Block descriptor length.  */
1249         p += 8;
1250     }
1251 
1252     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1253     if (!dbd && nb_sectors) {
1254         if (r->req.cmd.buf[0] == MODE_SENSE) {
1255             outbuf[3] = 8; /* Block descriptor length  */
1256         } else { /* MODE_SENSE_10 */
1257             outbuf[7] = 8; /* Block descriptor length  */
1258         }
1259         nb_sectors /= (s->qdev.blocksize / 512);
1260         if (nb_sectors > 0xffffff) {
1261             nb_sectors = 0;
1262         }
1263         p[0] = 0; /* media density code */
1264         p[1] = (nb_sectors >> 16) & 0xff;
1265         p[2] = (nb_sectors >> 8) & 0xff;
1266         p[3] = nb_sectors & 0xff;
1267         p[4] = 0; /* reserved */
1268         p[5] = 0; /* bytes 5-7 are the sector size in bytes */
1269         p[6] = s->qdev.blocksize >> 8;
1270         p[7] = 0;
1271         p += 8;
1272     }
1273 
1274     if (page_control == 3) {
1275         /* Saved Values */
1276         scsi_check_condition(r, SENSE_CODE(SAVING_PARAMS_NOT_SUPPORTED));
1277         return -1;
1278     }
1279 
1280     if (page == 0x3f) {
1281         for (page = 0; page <= 0x3e; page++) {
1282             mode_sense_page(s, page, &p, page_control);
1283         }
1284     } else {
1285         ret = mode_sense_page(s, page, &p, page_control);
1286         if (ret == -1) {
1287             return -1;
1288         }
1289     }
1290 
1291     buflen = p - outbuf;
1292     /*
1293      * The mode data length field specifies the length in bytes of the
1294      * following data that is available to be transferred. The mode data
1295      * length does not include itself.
1296      */
1297     if (r->req.cmd.buf[0] == MODE_SENSE) {
1298         outbuf[0] = buflen - 1;
1299     } else { /* MODE_SENSE_10 */
1300         outbuf[0] = ((buflen - 2) >> 8) & 0xff;
1301         outbuf[1] = (buflen - 2) & 0xff;
1302     }
1303     return buflen;
1304 }
1305 
1306 static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf)
1307 {
1308     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1309     int start_track, format, msf, toclen;
1310     uint64_t nb_sectors;
1311 
1312     msf = req->cmd.buf[1] & 2;
1313     format = req->cmd.buf[2] & 0xf;
1314     start_track = req->cmd.buf[6];
1315     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1316     DPRINTF("Read TOC (track %d format %d msf %d)\n", start_track, format, msf >> 1);
1317     nb_sectors /= s->qdev.blocksize / 512;
1318     switch (format) {
1319     case 0:
1320         toclen = cdrom_read_toc(nb_sectors, outbuf, msf, start_track);
1321         break;
1322     case 1:
1323         /* multi session : only a single session defined */
1324         toclen = 12;
1325         memset(outbuf, 0, 12);
1326         outbuf[1] = 0x0a;
1327         outbuf[2] = 0x01;
1328         outbuf[3] = 0x01;
1329         break;
1330     case 2:
1331         toclen = cdrom_read_toc_raw(nb_sectors, outbuf, msf, start_track);
1332         break;
1333     default:
1334         return -1;
1335     }
1336     return toclen;
1337 }
1338 
1339 static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
1340 {
1341     SCSIRequest *req = &r->req;
1342     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1343     bool start = req->cmd.buf[4] & 1;
1344     bool loej = req->cmd.buf[4] & 2; /* load on start, eject on !start */
1345     int pwrcnd = req->cmd.buf[4] & 0xf0;
1346 
1347     if (pwrcnd) {
1348         /* eject/load only happens for power condition == 0 */
1349         return 0;
1350     }
1351 
1352     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) && loej) {
1353         if (!start && !s->tray_open && s->tray_locked) {
1354             scsi_check_condition(r,
1355                                  blk_is_inserted(s->qdev.conf.blk)
1356                                  ? SENSE_CODE(ILLEGAL_REQ_REMOVAL_PREVENTED)
1357                                  : SENSE_CODE(NOT_READY_REMOVAL_PREVENTED));
1358             return -1;
1359         }
1360 
1361         if (s->tray_open != !start) {
1362             blk_eject(s->qdev.conf.blk, !start);
1363             s->tray_open = !start;
1364         }
1365     }
1366     return 0;
1367 }
1368 
1369 static void scsi_disk_emulate_read_data(SCSIRequest *req)
1370 {
1371     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1372     int buflen = r->iov.iov_len;
1373 
1374     if (buflen) {
1375         DPRINTF("Read buf_len=%d\n", buflen);
1376         r->iov.iov_len = 0;
1377         r->started = true;
1378         scsi_req_data(&r->req, buflen);
1379         return;
1380     }
1381 
1382     /* This also clears the sense buffer for REQUEST SENSE.  */
1383     scsi_req_complete(&r->req, GOOD);
1384 }
1385 
1386 static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
1387                                        uint8_t *inbuf, int inlen)
1388 {
1389     uint8_t mode_current[SCSI_MAX_MODE_LEN];
1390     uint8_t mode_changeable[SCSI_MAX_MODE_LEN];
1391     uint8_t *p;
1392     int len, expected_len, changeable_len, i;
1393 
1394     /* The input buffer does not include the page header, so it is
1395      * off by 2 bytes.
1396      */
1397     expected_len = inlen + 2;
1398     if (expected_len > SCSI_MAX_MODE_LEN) {
1399         return -1;
1400     }
1401 
1402     p = mode_current;
1403     memset(mode_current, 0, inlen + 2);
1404     len = mode_sense_page(s, page, &p, 0);
1405     if (len < 0 || len != expected_len) {
1406         return -1;
1407     }
1408 
1409     p = mode_changeable;
1410     memset(mode_changeable, 0, inlen + 2);
1411     changeable_len = mode_sense_page(s, page, &p, 1);
1412     assert(changeable_len == len);
1413 
1414     /* Check that unchangeable bits are the same as what MODE SENSE
1415      * would return.
1416      */
1417     for (i = 2; i < len; i++) {
1418         if (((mode_current[i] ^ inbuf[i - 2]) & ~mode_changeable[i]) != 0) {
1419             return -1;
1420         }
1421     }
1422     return 0;
1423 }
1424 
1425 static void scsi_disk_apply_mode_select(SCSIDiskState *s, int page, uint8_t *p)
1426 {
1427     switch (page) {
1428     case MODE_PAGE_CACHING:
1429         blk_set_enable_write_cache(s->qdev.conf.blk, (p[0] & 4) != 0);
1430         break;
1431 
1432     default:
1433         break;
1434     }
1435 }
1436 
1437 static int mode_select_pages(SCSIDiskReq *r, uint8_t *p, int len, bool change)
1438 {
1439     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1440 
1441     while (len > 0) {
1442         int page, subpage, page_len;
1443 
1444         /* Parse both possible formats for the mode page headers.  */
1445         page = p[0] & 0x3f;
1446         if (p[0] & 0x40) {
1447             if (len < 4) {
1448                 goto invalid_param_len;
1449             }
1450             subpage = p[1];
1451             page_len = lduw_be_p(&p[2]);
1452             p += 4;
1453             len -= 4;
1454         } else {
1455             if (len < 2) {
1456                 goto invalid_param_len;
1457             }
1458             subpage = 0;
1459             page_len = p[1];
1460             p += 2;
1461             len -= 2;
1462         }
1463 
1464         if (subpage) {
1465             goto invalid_param;
1466         }
1467         if (page_len > len) {
1468             goto invalid_param_len;
1469         }
1470 
1471         if (!change) {
1472             if (scsi_disk_check_mode_select(s, page, p, page_len) < 0) {
1473                 goto invalid_param;
1474             }
1475         } else {
1476             scsi_disk_apply_mode_select(s, page, p);
1477         }
1478 
1479         p += page_len;
1480         len -= page_len;
1481     }
1482     return 0;
1483 
1484 invalid_param:
1485     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1486     return -1;
1487 
1488 invalid_param_len:
1489     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1490     return -1;
1491 }
1492 
1493 static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
1494 {
1495     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1496     uint8_t *p = inbuf;
1497     int cmd = r->req.cmd.buf[0];
1498     int len = r->req.cmd.xfer;
1499     int hdr_len = (cmd == MODE_SELECT ? 4 : 8);
1500     int bd_len;
1501     int pass;
1502 
1503     /* We only support PF=1, SP=0.  */
1504     if ((r->req.cmd.buf[1] & 0x11) != 0x10) {
1505         goto invalid_field;
1506     }
1507 
1508     if (len < hdr_len) {
1509         goto invalid_param_len;
1510     }
1511 
1512     bd_len = (cmd == MODE_SELECT ? p[3] : lduw_be_p(&p[6]));
1513     len -= hdr_len;
1514     p += hdr_len;
1515     if (len < bd_len) {
1516         goto invalid_param_len;
1517     }
1518     if (bd_len != 0 && bd_len != 8) {
1519         goto invalid_param;
1520     }
1521 
1522     len -= bd_len;
1523     p += bd_len;
1524 
1525     /* Ensure no change is made if there is an error!  */
1526     for (pass = 0; pass < 2; pass++) {
1527         if (mode_select_pages(r, p, len, pass == 1) < 0) {
1528             assert(pass == 0);
1529             return;
1530         }
1531     }
1532     if (!blk_enable_write_cache(s->qdev.conf.blk)) {
1533         /* The request is used as the AIO opaque value, so add a ref.  */
1534         scsi_req_ref(&r->req);
1535         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
1536                          BLOCK_ACCT_FLUSH);
1537         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
1538         return;
1539     }
1540 
1541     scsi_req_complete(&r->req, GOOD);
1542     return;
1543 
1544 invalid_param:
1545     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM));
1546     return;
1547 
1548 invalid_param_len:
1549     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1550     return;
1551 
1552 invalid_field:
1553     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1554 }
1555 
1556 static inline bool check_lba_range(SCSIDiskState *s,
1557                                    uint64_t sector_num, uint32_t nb_sectors)
1558 {
1559     /*
1560      * The first line tests that no overflow happens when computing the last
1561      * sector.  The second line tests that the last accessed sector is in
1562      * range.
1563      *
1564      * Careful, the computations should not underflow for nb_sectors == 0,
1565      * and a 0-block read to the first LBA beyond the end of device is
1566      * valid.
1567      */
1568     return (sector_num <= sector_num + nb_sectors &&
1569             sector_num + nb_sectors <= s->qdev.max_lba + 1);
1570 }
1571 
1572 typedef struct UnmapCBData {
1573     SCSIDiskReq *r;
1574     uint8_t *inbuf;
1575     int count;
1576 } UnmapCBData;
1577 
1578 static void scsi_unmap_complete(void *opaque, int ret);
1579 
1580 static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
1581 {
1582     SCSIDiskReq *r = data->r;
1583     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1584     uint64_t sector_num;
1585     uint32_t nb_sectors;
1586 
1587     assert(r->req.aiocb == NULL);
1588 
1589     if (r->req.io_canceled) {
1590         scsi_req_cancel_complete(&r->req);
1591         goto done;
1592     }
1593 
1594     if (ret < 0) {
1595         if (scsi_handle_rw_error(r, -ret)) {
1596             goto done;
1597         }
1598     }
1599 
1600     if (data->count > 0) {
1601         sector_num = ldq_be_p(&data->inbuf[0]);
1602         nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
1603         if (!check_lba_range(s, sector_num, nb_sectors)) {
1604             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1605             goto done;
1606         }
1607 
1608         r->req.aiocb = blk_aio_discard(s->qdev.conf.blk,
1609                                        sector_num * (s->qdev.blocksize / 512),
1610                                        nb_sectors * (s->qdev.blocksize / 512),
1611                                        scsi_unmap_complete, data);
1612         data->count--;
1613         data->inbuf += 16;
1614         return;
1615     }
1616 
1617     scsi_req_complete(&r->req, GOOD);
1618 
1619 done:
1620     scsi_req_unref(&r->req);
1621     g_free(data);
1622 }
1623 
1624 static void scsi_unmap_complete(void *opaque, int ret)
1625 {
1626     UnmapCBData *data = opaque;
1627     SCSIDiskReq *r = data->r;
1628 
1629     assert(r->req.aiocb != NULL);
1630     r->req.aiocb = NULL;
1631 
1632     scsi_unmap_complete_noio(data, ret);
1633 }
1634 
1635 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
1636 {
1637     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1638     uint8_t *p = inbuf;
1639     int len = r->req.cmd.xfer;
1640     UnmapCBData *data;
1641 
1642     /* Reject ANCHOR=1.  */
1643     if (r->req.cmd.buf[1] & 0x1) {
1644         goto invalid_field;
1645     }
1646 
1647     if (len < 8) {
1648         goto invalid_param_len;
1649     }
1650     if (len < lduw_be_p(&p[0]) + 2) {
1651         goto invalid_param_len;
1652     }
1653     if (len < lduw_be_p(&p[2]) + 8) {
1654         goto invalid_param_len;
1655     }
1656     if (lduw_be_p(&p[2]) & 15) {
1657         goto invalid_param_len;
1658     }
1659 
1660     if (blk_is_read_only(s->qdev.conf.blk)) {
1661         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1662         return;
1663     }
1664 
1665     data = g_new0(UnmapCBData, 1);
1666     data->r = r;
1667     data->inbuf = &p[8];
1668     data->count = lduw_be_p(&p[2]) >> 4;
1669 
1670     /* The matching unref is in scsi_unmap_complete, before data is freed.  */
1671     scsi_req_ref(&r->req);
1672     scsi_unmap_complete_noio(data, 0);
1673     return;
1674 
1675 invalid_param_len:
1676     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
1677     return;
1678 
1679 invalid_field:
1680     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1681 }
1682 
1683 typedef struct WriteSameCBData {
1684     SCSIDiskReq *r;
1685     int64_t sector;
1686     int nb_sectors;
1687     QEMUIOVector qiov;
1688     struct iovec iov;
1689 } WriteSameCBData;
1690 
1691 static void scsi_write_same_complete(void *opaque, int ret)
1692 {
1693     WriteSameCBData *data = opaque;
1694     SCSIDiskReq *r = data->r;
1695     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
1696 
1697     assert(r->req.aiocb != NULL);
1698     r->req.aiocb = NULL;
1699     block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
1700     if (r->req.io_canceled) {
1701         scsi_req_cancel_complete(&r->req);
1702         goto done;
1703     }
1704 
1705     if (ret < 0) {
1706         if (scsi_handle_rw_error(r, -ret)) {
1707             goto done;
1708         }
1709     }
1710 
1711     data->nb_sectors -= data->iov.iov_len / 512;
1712     data->sector += data->iov.iov_len / 512;
1713     data->iov.iov_len = MIN(data->nb_sectors * 512, data->iov.iov_len);
1714     if (data->iov.iov_len) {
1715         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1716                          data->iov.iov_len, BLOCK_ACCT_WRITE);
1717         /* blk_aio_write doesn't like the qiov size being different from
1718          * nb_sectors, make sure they match.
1719          */
1720         qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1721         r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
1722                                       &data->qiov, data->iov.iov_len / 512,
1723                                       scsi_write_same_complete, data);
1724         return;
1725     }
1726 
1727     scsi_req_complete(&r->req, GOOD);
1728 
1729 done:
1730     scsi_req_unref(&r->req);
1731     qemu_vfree(data->iov.iov_base);
1732     g_free(data);
1733 }
1734 
1735 static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
1736 {
1737     SCSIRequest *req = &r->req;
1738     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1739     uint32_t nb_sectors = scsi_data_cdb_xfer(r->req.cmd.buf);
1740     WriteSameCBData *data;
1741     uint8_t *buf;
1742     int i;
1743 
1744     /* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
1745     if (nb_sectors == 0 || (req->cmd.buf[1] & 0x16)) {
1746         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1747         return;
1748     }
1749 
1750     if (blk_is_read_only(s->qdev.conf.blk)) {
1751         scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
1752         return;
1753     }
1754     if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
1755         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
1756         return;
1757     }
1758 
1759     if (buffer_is_zero(inbuf, s->qdev.blocksize)) {
1760         int flags = (req->cmd.buf[1] & 0x8) ? BDRV_REQ_MAY_UNMAP : 0;
1761 
1762         /* The request is used as the AIO opaque value, so add a ref.  */
1763         scsi_req_ref(&r->req);
1764         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1765                          nb_sectors * s->qdev.blocksize,
1766                         BLOCK_ACCT_WRITE);
1767         r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk,
1768                                 r->req.cmd.lba * (s->qdev.blocksize / 512),
1769                                 nb_sectors * (s->qdev.blocksize / 512),
1770                                 flags, scsi_aio_complete, r);
1771         return;
1772     }
1773 
1774     data = g_new0(WriteSameCBData, 1);
1775     data->r = r;
1776     data->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
1777     data->nb_sectors = nb_sectors * (s->qdev.blocksize / 512);
1778     data->iov.iov_len = MIN(data->nb_sectors * 512, SCSI_WRITE_SAME_MAX);
1779     data->iov.iov_base = buf = blk_blockalign(s->qdev.conf.blk,
1780                                               data->iov.iov_len);
1781     qemu_iovec_init_external(&data->qiov, &data->iov, 1);
1782 
1783     for (i = 0; i < data->iov.iov_len; i += s->qdev.blocksize) {
1784         memcpy(&buf[i], inbuf, s->qdev.blocksize);
1785     }
1786 
1787     scsi_req_ref(&r->req);
1788     block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
1789                      data->iov.iov_len, BLOCK_ACCT_WRITE);
1790     r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
1791                                   &data->qiov, data->iov.iov_len / 512,
1792                                   scsi_write_same_complete, data);
1793 }
1794 
1795 static void scsi_disk_emulate_write_data(SCSIRequest *req)
1796 {
1797     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1798 
1799     if (r->iov.iov_len) {
1800         int buflen = r->iov.iov_len;
1801         DPRINTF("Write buf_len=%d\n", buflen);
1802         r->iov.iov_len = 0;
1803         scsi_req_data(&r->req, buflen);
1804         return;
1805     }
1806 
1807     switch (req->cmd.buf[0]) {
1808     case MODE_SELECT:
1809     case MODE_SELECT_10:
1810         /* This also clears the sense buffer for REQUEST SENSE.  */
1811         scsi_disk_emulate_mode_select(r, r->iov.iov_base);
1812         break;
1813 
1814     case UNMAP:
1815         scsi_disk_emulate_unmap(r, r->iov.iov_base);
1816         break;
1817 
1818     case VERIFY_10:
1819     case VERIFY_12:
1820     case VERIFY_16:
1821         if (r->req.status == -1) {
1822             scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
1823         }
1824         break;
1825 
1826     case WRITE_SAME_10:
1827     case WRITE_SAME_16:
1828         scsi_disk_emulate_write_same(r, r->iov.iov_base);
1829         break;
1830 
1831     default:
1832         abort();
1833     }
1834 }
1835 
1836 static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
1837 {
1838     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
1839     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
1840     uint64_t nb_sectors;
1841     uint8_t *outbuf;
1842     int buflen;
1843 
1844     switch (req->cmd.buf[0]) {
1845     case INQUIRY:
1846     case MODE_SENSE:
1847     case MODE_SENSE_10:
1848     case RESERVE:
1849     case RESERVE_10:
1850     case RELEASE:
1851     case RELEASE_10:
1852     case START_STOP:
1853     case ALLOW_MEDIUM_REMOVAL:
1854     case GET_CONFIGURATION:
1855     case GET_EVENT_STATUS_NOTIFICATION:
1856     case MECHANISM_STATUS:
1857     case REQUEST_SENSE:
1858         break;
1859 
1860     default:
1861         if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
1862             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
1863             return 0;
1864         }
1865         break;
1866     }
1867 
1868     /*
1869      * FIXME: we shouldn't return anything bigger than 4k, but the code
1870      * requires the buffer to be as big as req->cmd.xfer in several
1871      * places.  So, do not allow CDBs with a very large ALLOCATION
1872      * LENGTH.  The real fix would be to modify scsi_read_data and
1873      * dma_buf_read, so that they return data beyond the buflen
1874      * as all zeros.
1875      */
1876     if (req->cmd.xfer > 65536) {
1877         goto illegal_request;
1878     }
1879     r->buflen = MAX(4096, req->cmd.xfer);
1880 
1881     if (!r->iov.iov_base) {
1882         r->iov.iov_base = blk_blockalign(s->qdev.conf.blk, r->buflen);
1883     }
1884 
1885     buflen = req->cmd.xfer;
1886     outbuf = r->iov.iov_base;
1887     memset(outbuf, 0, r->buflen);
1888     switch (req->cmd.buf[0]) {
1889     case TEST_UNIT_READY:
1890         assert(!s->tray_open && blk_is_inserted(s->qdev.conf.blk));
1891         break;
1892     case INQUIRY:
1893         buflen = scsi_disk_emulate_inquiry(req, outbuf);
1894         if (buflen < 0) {
1895             goto illegal_request;
1896         }
1897         break;
1898     case MODE_SENSE:
1899     case MODE_SENSE_10:
1900         buflen = scsi_disk_emulate_mode_sense(r, outbuf);
1901         if (buflen < 0) {
1902             goto illegal_request;
1903         }
1904         break;
1905     case READ_TOC:
1906         buflen = scsi_disk_emulate_read_toc(req, outbuf);
1907         if (buflen < 0) {
1908             goto illegal_request;
1909         }
1910         break;
1911     case RESERVE:
1912         if (req->cmd.buf[1] & 1) {
1913             goto illegal_request;
1914         }
1915         break;
1916     case RESERVE_10:
1917         if (req->cmd.buf[1] & 3) {
1918             goto illegal_request;
1919         }
1920         break;
1921     case RELEASE:
1922         if (req->cmd.buf[1] & 1) {
1923             goto illegal_request;
1924         }
1925         break;
1926     case RELEASE_10:
1927         if (req->cmd.buf[1] & 3) {
1928             goto illegal_request;
1929         }
1930         break;
1931     case START_STOP:
1932         if (scsi_disk_emulate_start_stop(r) < 0) {
1933             return 0;
1934         }
1935         break;
1936     case ALLOW_MEDIUM_REMOVAL:
1937         s->tray_locked = req->cmd.buf[4] & 1;
1938         blk_lock_medium(s->qdev.conf.blk, req->cmd.buf[4] & 1);
1939         break;
1940     case READ_CAPACITY_10:
1941         /* The normal LEN field for this command is zero.  */
1942         memset(outbuf, 0, 8);
1943         blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
1944         if (!nb_sectors) {
1945             scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
1946             return 0;
1947         }
1948         if ((req->cmd.buf[8] & 1) == 0 && req->cmd.lba) {
1949             goto illegal_request;
1950         }
1951         nb_sectors /= s->qdev.blocksize / 512;
1952         /* Returned value is the address of the last sector.  */
1953         nb_sectors--;
1954         /* Remember the new size for read/write sanity checking. */
1955         s->qdev.max_lba = nb_sectors;
1956         /* Clip to 2TB, instead of returning capacity modulo 2TB. */
1957         if (nb_sectors > UINT32_MAX) {
1958             nb_sectors = UINT32_MAX;
1959         }
1960         outbuf[0] = (nb_sectors >> 24) & 0xff;
1961         outbuf[1] = (nb_sectors >> 16) & 0xff;
1962         outbuf[2] = (nb_sectors >> 8) & 0xff;
1963         outbuf[3] = nb_sectors & 0xff;
1964         outbuf[4] = 0;
1965         outbuf[5] = 0;
1966         outbuf[6] = s->qdev.blocksize >> 8;
1967         outbuf[7] = 0;
1968         break;
1969     case REQUEST_SENSE:
1970         /* Just return "NO SENSE".  */
1971         buflen = scsi_build_sense(NULL, 0, outbuf, r->buflen,
1972                                   (req->cmd.buf[1] & 1) == 0);
1973         if (buflen < 0) {
1974             goto illegal_request;
1975         }
1976         break;
1977     case MECHANISM_STATUS:
1978         buflen = scsi_emulate_mechanism_status(s, outbuf);
1979         if (buflen < 0) {
1980             goto illegal_request;
1981         }
1982         break;
1983     case GET_CONFIGURATION:
1984         buflen = scsi_get_configuration(s, outbuf);
1985         if (buflen < 0) {
1986             goto illegal_request;
1987         }
1988         break;
1989     case GET_EVENT_STATUS_NOTIFICATION:
1990         buflen = scsi_get_event_status_notification(s, r, outbuf);
1991         if (buflen < 0) {
1992             goto illegal_request;
1993         }
1994         break;
1995     case READ_DISC_INFORMATION:
1996         buflen = scsi_read_disc_information(s, r, outbuf);
1997         if (buflen < 0) {
1998             goto illegal_request;
1999         }
2000         break;
2001     case READ_DVD_STRUCTURE:
2002         buflen = scsi_read_dvd_structure(s, r, outbuf);
2003         if (buflen < 0) {
2004             goto illegal_request;
2005         }
2006         break;
2007     case SERVICE_ACTION_IN_16:
2008         /* Service Action In subcommands. */
2009         if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
2010             DPRINTF("SAI READ CAPACITY(16)\n");
2011             memset(outbuf, 0, req->cmd.xfer);
2012             blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2013             if (!nb_sectors) {
2014                 scsi_check_condition(r, SENSE_CODE(LUN_NOT_READY));
2015                 return 0;
2016             }
2017             if ((req->cmd.buf[14] & 1) == 0 && req->cmd.lba) {
2018                 goto illegal_request;
2019             }
2020             nb_sectors /= s->qdev.blocksize / 512;
2021             /* Returned value is the address of the last sector.  */
2022             nb_sectors--;
2023             /* Remember the new size for read/write sanity checking. */
2024             s->qdev.max_lba = nb_sectors;
2025             outbuf[0] = (nb_sectors >> 56) & 0xff;
2026             outbuf[1] = (nb_sectors >> 48) & 0xff;
2027             outbuf[2] = (nb_sectors >> 40) & 0xff;
2028             outbuf[3] = (nb_sectors >> 32) & 0xff;
2029             outbuf[4] = (nb_sectors >> 24) & 0xff;
2030             outbuf[5] = (nb_sectors >> 16) & 0xff;
2031             outbuf[6] = (nb_sectors >> 8) & 0xff;
2032             outbuf[7] = nb_sectors & 0xff;
2033             outbuf[8] = 0;
2034             outbuf[9] = 0;
2035             outbuf[10] = s->qdev.blocksize >> 8;
2036             outbuf[11] = 0;
2037             outbuf[12] = 0;
2038             outbuf[13] = get_physical_block_exp(&s->qdev.conf);
2039 
2040             /* set TPE bit if the format supports discard */
2041             if (s->qdev.conf.discard_granularity) {
2042                 outbuf[14] = 0x80;
2043             }
2044 
2045             /* Protection, exponent and lowest lba field left blank. */
2046             break;
2047         }
2048         DPRINTF("Unsupported Service Action In\n");
2049         goto illegal_request;
2050     case SYNCHRONIZE_CACHE:
2051         /* The request is used as the AIO opaque value, so add a ref.  */
2052         scsi_req_ref(&r->req);
2053         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
2054                          BLOCK_ACCT_FLUSH);
2055         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
2056         return 0;
2057     case SEEK_10:
2058         DPRINTF("Seek(10) (sector %" PRId64 ")\n", r->req.cmd.lba);
2059         if (r->req.cmd.lba > s->qdev.max_lba) {
2060             goto illegal_lba;
2061         }
2062         break;
2063     case MODE_SELECT:
2064         DPRINTF("Mode Select(6) (len %lu)\n", (long)r->req.cmd.xfer);
2065         break;
2066     case MODE_SELECT_10:
2067         DPRINTF("Mode Select(10) (len %lu)\n", (long)r->req.cmd.xfer);
2068         break;
2069     case UNMAP:
2070         DPRINTF("Unmap (len %lu)\n", (long)r->req.cmd.xfer);
2071         break;
2072     case VERIFY_10:
2073     case VERIFY_12:
2074     case VERIFY_16:
2075         DPRINTF("Verify (bytchk %d)\n", (req->cmd.buf[1] >> 1) & 3);
2076         if (req->cmd.buf[1] & 6) {
2077             goto illegal_request;
2078         }
2079         break;
2080     case WRITE_SAME_10:
2081     case WRITE_SAME_16:
2082         DPRINTF("WRITE SAME %d (len %lu)\n",
2083                 req->cmd.buf[0] == WRITE_SAME_10 ? 10 : 16,
2084                 (long)r->req.cmd.xfer);
2085         break;
2086     default:
2087         DPRINTF("Unknown SCSI command (%2.2x=%s)\n", buf[0],
2088                 scsi_command_name(buf[0]));
2089         scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE));
2090         return 0;
2091     }
2092     assert(!r->req.aiocb);
2093     r->iov.iov_len = MIN(r->buflen, req->cmd.xfer);
2094     if (r->iov.iov_len == 0) {
2095         scsi_req_complete(&r->req, GOOD);
2096     }
2097     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2098         assert(r->iov.iov_len == req->cmd.xfer);
2099         return -r->iov.iov_len;
2100     } else {
2101         return r->iov.iov_len;
2102     }
2103 
2104 illegal_request:
2105     if (r->req.status == -1) {
2106         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2107     }
2108     return 0;
2109 
2110 illegal_lba:
2111     scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2112     return 0;
2113 }
2114 
2115 /* Execute a scsi command.  Returns the length of the data expected by the
2116    command.  This will be Positive for data transfers from the device
2117    (eg. disk reads), negative for transfers to the device (eg. disk writes),
2118    and zero if the command does not transfer any data.  */
2119 
2120 static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
2121 {
2122     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
2123     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
2124     uint32_t len;
2125     uint8_t command;
2126 
2127     command = buf[0];
2128 
2129     if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
2130         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
2131         return 0;
2132     }
2133 
2134     len = scsi_data_cdb_xfer(r->req.cmd.buf);
2135     switch (command) {
2136     case READ_6:
2137     case READ_10:
2138     case READ_12:
2139     case READ_16:
2140         DPRINTF("Read (sector %" PRId64 ", count %u)\n", r->req.cmd.lba, len);
2141         if (r->req.cmd.buf[1] & 0xe0) {
2142             goto illegal_request;
2143         }
2144         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2145             goto illegal_lba;
2146         }
2147         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
2148         r->sector_count = len * (s->qdev.blocksize / 512);
2149         break;
2150     case WRITE_6:
2151     case WRITE_10:
2152     case WRITE_12:
2153     case WRITE_16:
2154     case WRITE_VERIFY_10:
2155     case WRITE_VERIFY_12:
2156     case WRITE_VERIFY_16:
2157         if (blk_is_read_only(s->qdev.conf.blk)) {
2158             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
2159             return 0;
2160         }
2161         DPRINTF("Write %s(sector %" PRId64 ", count %u)\n",
2162                 (command & 0xe) == 0xe ? "And Verify " : "",
2163                 r->req.cmd.lba, len);
2164         if (r->req.cmd.buf[1] & 0xe0) {
2165             goto illegal_request;
2166         }
2167         if (!check_lba_range(s, r->req.cmd.lba, len)) {
2168             goto illegal_lba;
2169         }
2170         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
2171         r->sector_count = len * (s->qdev.blocksize / 512);
2172         break;
2173     default:
2174         abort();
2175     illegal_request:
2176         scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
2177         return 0;
2178     illegal_lba:
2179         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
2180         return 0;
2181     }
2182     if (r->sector_count == 0) {
2183         scsi_req_complete(&r->req, GOOD);
2184     }
2185     assert(r->iov.iov_len == 0);
2186     if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
2187         return -r->sector_count * 512;
2188     } else {
2189         return r->sector_count * 512;
2190     }
2191 }
2192 
2193 static void scsi_disk_reset(DeviceState *dev)
2194 {
2195     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
2196     uint64_t nb_sectors;
2197 
2198     scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
2199 
2200     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
2201     nb_sectors /= s->qdev.blocksize / 512;
2202     if (nb_sectors) {
2203         nb_sectors--;
2204     }
2205     s->qdev.max_lba = nb_sectors;
2206     /* reset tray statuses */
2207     s->tray_locked = 0;
2208     s->tray_open = 0;
2209 }
2210 
2211 static void scsi_disk_resize_cb(void *opaque)
2212 {
2213     SCSIDiskState *s = opaque;
2214 
2215     /* SPC lists this sense code as available only for
2216      * direct-access devices.
2217      */
2218     if (s->qdev.type == TYPE_DISK) {
2219         scsi_device_report_change(&s->qdev, SENSE_CODE(CAPACITY_CHANGED));
2220     }
2221 }
2222 
2223 static void scsi_cd_change_media_cb(void *opaque, bool load)
2224 {
2225     SCSIDiskState *s = opaque;
2226 
2227     /*
2228      * When a CD gets changed, we have to report an ejected state and
2229      * then a loaded state to guests so that they detect tray
2230      * open/close and media change events.  Guests that do not use
2231      * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close
2232      * states rely on this behavior.
2233      *
2234      * media_changed governs the state machine used for unit attention
2235      * report.  media_event is used by GET EVENT STATUS NOTIFICATION.
2236      */
2237     s->media_changed = load;
2238     s->tray_open = !load;
2239     scsi_device_set_ua(&s->qdev, SENSE_CODE(UNIT_ATTENTION_NO_MEDIUM));
2240     s->media_event = true;
2241     s->eject_request = false;
2242 }
2243 
2244 static void scsi_cd_eject_request_cb(void *opaque, bool force)
2245 {
2246     SCSIDiskState *s = opaque;
2247 
2248     s->eject_request = true;
2249     if (force) {
2250         s->tray_locked = false;
2251     }
2252 }
2253 
2254 static bool scsi_cd_is_tray_open(void *opaque)
2255 {
2256     return ((SCSIDiskState *)opaque)->tray_open;
2257 }
2258 
2259 static bool scsi_cd_is_medium_locked(void *opaque)
2260 {
2261     return ((SCSIDiskState *)opaque)->tray_locked;
2262 }
2263 
2264 static const BlockDevOps scsi_disk_removable_block_ops = {
2265     .change_media_cb = scsi_cd_change_media_cb,
2266     .eject_request_cb = scsi_cd_eject_request_cb,
2267     .is_tray_open = scsi_cd_is_tray_open,
2268     .is_medium_locked = scsi_cd_is_medium_locked,
2269 
2270     .resize_cb = scsi_disk_resize_cb,
2271 };
2272 
2273 static const BlockDevOps scsi_disk_block_ops = {
2274     .resize_cb = scsi_disk_resize_cb,
2275 };
2276 
2277 static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
2278 {
2279     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2280     if (s->media_changed) {
2281         s->media_changed = false;
2282         scsi_device_set_ua(&s->qdev, SENSE_CODE(MEDIUM_CHANGED));
2283     }
2284 }
2285 
2286 static void scsi_realize(SCSIDevice *dev, Error **errp)
2287 {
2288     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2289     Error *err = NULL;
2290 
2291     if (!s->qdev.conf.blk) {
2292         error_setg(errp, "drive property not set");
2293         return;
2294     }
2295 
2296     if (!(s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2297         !blk_is_inserted(s->qdev.conf.blk)) {
2298         error_setg(errp, "Device needs media, but drive is empty");
2299         return;
2300     }
2301 
2302     blkconf_serial(&s->qdev.conf, &s->serial);
2303     blkconf_blocksizes(&s->qdev.conf);
2304     if (dev->type == TYPE_DISK) {
2305         blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, &err);
2306         if (err) {
2307             error_propagate(errp, err);
2308             return;
2309         }
2310     }
2311 
2312     if (s->qdev.conf.discard_granularity == -1) {
2313         s->qdev.conf.discard_granularity =
2314             MAX(s->qdev.conf.logical_block_size, DEFAULT_DISCARD_GRANULARITY);
2315     }
2316 
2317     if (!s->version) {
2318         s->version = g_strdup(qemu_get_version());
2319     }
2320     if (!s->vendor) {
2321         s->vendor = g_strdup("QEMU");
2322     }
2323 
2324     if (blk_is_sg(s->qdev.conf.blk)) {
2325         error_setg(errp, "unwanted /dev/sg*");
2326         return;
2327     }
2328 
2329     if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
2330             !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
2331         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_removable_block_ops, s);
2332     } else {
2333         blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
2334     }
2335     blk_set_guest_block_size(s->qdev.conf.blk, s->qdev.blocksize);
2336 
2337     blk_iostatus_enable(s->qdev.conf.blk);
2338 }
2339 
2340 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
2341 {
2342     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2343     /* can happen for devices without drive. The error message for missing
2344      * backend will be issued in scsi_realize
2345      */
2346     if (s->qdev.conf.blk) {
2347         blkconf_blocksizes(&s->qdev.conf);
2348     }
2349     s->qdev.blocksize = s->qdev.conf.logical_block_size;
2350     s->qdev.type = TYPE_DISK;
2351     if (!s->product) {
2352         s->product = g_strdup("QEMU HARDDISK");
2353     }
2354     scsi_realize(&s->qdev, errp);
2355 }
2356 
2357 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
2358 {
2359     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2360     s->qdev.blocksize = 2048;
2361     s->qdev.type = TYPE_ROM;
2362     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2363     if (!s->product) {
2364         s->product = g_strdup("QEMU CD-ROM");
2365     }
2366     scsi_realize(&s->qdev, errp);
2367 }
2368 
2369 static void scsi_disk_realize(SCSIDevice *dev, Error **errp)
2370 {
2371     DriveInfo *dinfo;
2372     Error *local_err = NULL;
2373 
2374     if (!dev->conf.blk) {
2375         scsi_realize(dev, &local_err);
2376         assert(local_err);
2377         error_propagate(errp, local_err);
2378         return;
2379     }
2380 
2381     dinfo = blk_legacy_dinfo(dev->conf.blk);
2382     if (dinfo && dinfo->media_cd) {
2383         scsi_cd_realize(dev, errp);
2384     } else {
2385         scsi_hd_realize(dev, errp);
2386     }
2387 }
2388 
2389 static const SCSIReqOps scsi_disk_emulate_reqops = {
2390     .size         = sizeof(SCSIDiskReq),
2391     .free_req     = scsi_free_request,
2392     .send_command = scsi_disk_emulate_command,
2393     .read_data    = scsi_disk_emulate_read_data,
2394     .write_data   = scsi_disk_emulate_write_data,
2395     .get_buf      = scsi_get_buf,
2396 };
2397 
2398 static const SCSIReqOps scsi_disk_dma_reqops = {
2399     .size         = sizeof(SCSIDiskReq),
2400     .free_req     = scsi_free_request,
2401     .send_command = scsi_disk_dma_command,
2402     .read_data    = scsi_read_data,
2403     .write_data   = scsi_write_data,
2404     .get_buf      = scsi_get_buf,
2405     .load_request = scsi_disk_load_request,
2406     .save_request = scsi_disk_save_request,
2407 };
2408 
2409 static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
2410     [TEST_UNIT_READY]                 = &scsi_disk_emulate_reqops,
2411     [INQUIRY]                         = &scsi_disk_emulate_reqops,
2412     [MODE_SENSE]                      = &scsi_disk_emulate_reqops,
2413     [MODE_SENSE_10]                   = &scsi_disk_emulate_reqops,
2414     [START_STOP]                      = &scsi_disk_emulate_reqops,
2415     [ALLOW_MEDIUM_REMOVAL]            = &scsi_disk_emulate_reqops,
2416     [READ_CAPACITY_10]                = &scsi_disk_emulate_reqops,
2417     [READ_TOC]                        = &scsi_disk_emulate_reqops,
2418     [READ_DVD_STRUCTURE]              = &scsi_disk_emulate_reqops,
2419     [READ_DISC_INFORMATION]           = &scsi_disk_emulate_reqops,
2420     [GET_CONFIGURATION]               = &scsi_disk_emulate_reqops,
2421     [GET_EVENT_STATUS_NOTIFICATION]   = &scsi_disk_emulate_reqops,
2422     [MECHANISM_STATUS]                = &scsi_disk_emulate_reqops,
2423     [SERVICE_ACTION_IN_16]            = &scsi_disk_emulate_reqops,
2424     [REQUEST_SENSE]                   = &scsi_disk_emulate_reqops,
2425     [SYNCHRONIZE_CACHE]               = &scsi_disk_emulate_reqops,
2426     [SEEK_10]                         = &scsi_disk_emulate_reqops,
2427     [MODE_SELECT]                     = &scsi_disk_emulate_reqops,
2428     [MODE_SELECT_10]                  = &scsi_disk_emulate_reqops,
2429     [UNMAP]                           = &scsi_disk_emulate_reqops,
2430     [WRITE_SAME_10]                   = &scsi_disk_emulate_reqops,
2431     [WRITE_SAME_16]                   = &scsi_disk_emulate_reqops,
2432     [VERIFY_10]                       = &scsi_disk_emulate_reqops,
2433     [VERIFY_12]                       = &scsi_disk_emulate_reqops,
2434     [VERIFY_16]                       = &scsi_disk_emulate_reqops,
2435 
2436     [READ_6]                          = &scsi_disk_dma_reqops,
2437     [READ_10]                         = &scsi_disk_dma_reqops,
2438     [READ_12]                         = &scsi_disk_dma_reqops,
2439     [READ_16]                         = &scsi_disk_dma_reqops,
2440     [WRITE_6]                         = &scsi_disk_dma_reqops,
2441     [WRITE_10]                        = &scsi_disk_dma_reqops,
2442     [WRITE_12]                        = &scsi_disk_dma_reqops,
2443     [WRITE_16]                        = &scsi_disk_dma_reqops,
2444     [WRITE_VERIFY_10]                 = &scsi_disk_dma_reqops,
2445     [WRITE_VERIFY_12]                 = &scsi_disk_dma_reqops,
2446     [WRITE_VERIFY_16]                 = &scsi_disk_dma_reqops,
2447 };
2448 
2449 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
2450                                      uint8_t *buf, void *hba_private)
2451 {
2452     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2453     SCSIRequest *req;
2454     const SCSIReqOps *ops;
2455     uint8_t command;
2456 
2457     command = buf[0];
2458     ops = scsi_disk_reqops_dispatch[command];
2459     if (!ops) {
2460         ops = &scsi_disk_emulate_reqops;
2461     }
2462     req = scsi_req_alloc(ops, &s->qdev, tag, lun, hba_private);
2463 
2464 #ifdef DEBUG_SCSI
2465     DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]);
2466     {
2467         int i;
2468         for (i = 1; i < scsi_cdb_length(buf); i++) {
2469             printf(" 0x%02x", buf[i]);
2470         }
2471         printf("\n");
2472     }
2473 #endif
2474 
2475     return req;
2476 }
2477 
2478 #ifdef __linux__
2479 static int get_device_type(SCSIDiskState *s)
2480 {
2481     uint8_t cmd[16];
2482     uint8_t buf[36];
2483     uint8_t sensebuf[8];
2484     sg_io_hdr_t io_header;
2485     int ret;
2486 
2487     memset(cmd, 0, sizeof(cmd));
2488     memset(buf, 0, sizeof(buf));
2489     cmd[0] = INQUIRY;
2490     cmd[4] = sizeof(buf);
2491 
2492     memset(&io_header, 0, sizeof(io_header));
2493     io_header.interface_id = 'S';
2494     io_header.dxfer_direction = SG_DXFER_FROM_DEV;
2495     io_header.dxfer_len = sizeof(buf);
2496     io_header.dxferp = buf;
2497     io_header.cmdp = cmd;
2498     io_header.cmd_len = sizeof(cmd);
2499     io_header.mx_sb_len = sizeof(sensebuf);
2500     io_header.sbp = sensebuf;
2501     io_header.timeout = 6000; /* XXX */
2502 
2503     ret = blk_ioctl(s->qdev.conf.blk, SG_IO, &io_header);
2504     if (ret < 0 || io_header.driver_status || io_header.host_status) {
2505         return -1;
2506     }
2507     s->qdev.type = buf[0];
2508     if (buf[1] & 0x80) {
2509         s->features |= 1 << SCSI_DISK_F_REMOVABLE;
2510     }
2511     return 0;
2512 }
2513 
2514 static void scsi_block_realize(SCSIDevice *dev, Error **errp)
2515 {
2516     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
2517     int sg_version;
2518     int rc;
2519 
2520     if (!s->qdev.conf.blk) {
2521         error_setg(errp, "drive property not set");
2522         return;
2523     }
2524 
2525     /* check we are using a driver managing SG_IO (version 3 and after) */
2526     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
2527     if (rc < 0) {
2528         error_setg(errp, "cannot get SG_IO version number: %s.  "
2529                      "Is this a SCSI device?",
2530                      strerror(-rc));
2531         return;
2532     }
2533     if (sg_version < 30000) {
2534         error_setg(errp, "scsi generic interface too old");
2535         return;
2536     }
2537 
2538     /* get device type from INQUIRY data */
2539     rc = get_device_type(s);
2540     if (rc < 0) {
2541         error_setg(errp, "INQUIRY failed");
2542         return;
2543     }
2544 
2545     /* Make a guess for the block size, we'll fix it when the guest sends.
2546      * READ CAPACITY.  If they don't, they likely would assume these sizes
2547      * anyway. (TODO: check in /sys).
2548      */
2549     if (s->qdev.type == TYPE_ROM || s->qdev.type == TYPE_WORM) {
2550         s->qdev.blocksize = 2048;
2551     } else {
2552         s->qdev.blocksize = 512;
2553     }
2554 
2555     /* Makes the scsi-block device not removable by using HMP and QMP eject
2556      * command.
2557      */
2558     s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
2559 
2560     scsi_realize(&s->qdev, errp);
2561 }
2562 
2563 static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
2564 {
2565     switch (buf[0]) {
2566     case READ_6:
2567     case READ_10:
2568     case READ_12:
2569     case READ_16:
2570     case VERIFY_10:
2571     case VERIFY_12:
2572     case VERIFY_16:
2573     case WRITE_6:
2574     case WRITE_10:
2575     case WRITE_12:
2576     case WRITE_16:
2577     case WRITE_VERIFY_10:
2578     case WRITE_VERIFY_12:
2579     case WRITE_VERIFY_16:
2580         /* If we are not using O_DIRECT, we might read stale data from the
2581          * host cache if writes were made using other commands than these
2582          * ones (such as WRITE SAME or EXTENDED COPY, etc.).  So, without
2583          * O_DIRECT everything must go through SG_IO.
2584          */
2585         if (!(blk_get_flags(s->qdev.conf.blk) & BDRV_O_NOCACHE)) {
2586             break;
2587         }
2588 
2589         /* MMC writing cannot be done via pread/pwrite, because it sometimes
2590          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
2591          * And once you do these writes, reading from the block device is
2592          * unreliable, too.  It is even possible that reads deliver random data
2593          * from the host page cache (this is probably a Linux bug).
2594          *
2595          * We might use scsi_disk_dma_reqops as long as no writing commands are
2596          * seen, but performance usually isn't paramount on optical media.  So,
2597          * just make scsi-block operate the same as scsi-generic for them.
2598          */
2599         if (s->qdev.type != TYPE_ROM) {
2600             return false;
2601         }
2602         break;
2603 
2604     default:
2605         break;
2606     }
2607 
2608     return true;
2609 }
2610 
2611 
2612 static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
2613                                            uint32_t lun, uint8_t *buf,
2614                                            void *hba_private)
2615 {
2616     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2617 
2618     if (scsi_block_is_passthrough(s, buf)) {
2619         return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
2620                               hba_private);
2621     } else {
2622         return scsi_req_alloc(&scsi_disk_dma_reqops, &s->qdev, tag, lun,
2623                               hba_private);
2624     }
2625 }
2626 
2627 static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
2628                                   uint8_t *buf, void *hba_private)
2629 {
2630     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
2631 
2632     if (scsi_block_is_passthrough(s, buf)) {
2633         return scsi_bus_parse_cdb(&s->qdev, cmd, buf, hba_private);
2634     } else {
2635         return scsi_req_parse_cdb(&s->qdev, cmd, buf);
2636     }
2637 }
2638 
2639 #endif
2640 
2641 #define DEFINE_SCSI_DISK_PROPERTIES()                                \
2642     DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf),               \
2643     DEFINE_PROP_STRING("ver", SCSIDiskState, version),               \
2644     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),             \
2645     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),             \
2646     DEFINE_PROP_STRING("product", SCSIDiskState, product)
2647 
2648 static Property scsi_hd_properties[] = {
2649     DEFINE_SCSI_DISK_PROPERTIES(),
2650     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
2651                     SCSI_DISK_F_REMOVABLE, false),
2652     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
2653                     SCSI_DISK_F_DPOFUA, false),
2654     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
2655     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
2656     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
2657     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
2658                        DEFAULT_MAX_UNMAP_SIZE),
2659     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
2660                        DEFAULT_MAX_IO_SIZE),
2661     DEFINE_BLOCK_CHS_PROPERTIES(SCSIDiskState, qdev.conf),
2662     DEFINE_PROP_END_OF_LIST(),
2663 };
2664 
2665 static const VMStateDescription vmstate_scsi_disk_state = {
2666     .name = "scsi-disk",
2667     .version_id = 1,
2668     .minimum_version_id = 1,
2669     .fields = (VMStateField[]) {
2670         VMSTATE_SCSI_DEVICE(qdev, SCSIDiskState),
2671         VMSTATE_BOOL(media_changed, SCSIDiskState),
2672         VMSTATE_BOOL(media_event, SCSIDiskState),
2673         VMSTATE_BOOL(eject_request, SCSIDiskState),
2674         VMSTATE_BOOL(tray_open, SCSIDiskState),
2675         VMSTATE_BOOL(tray_locked, SCSIDiskState),
2676         VMSTATE_END_OF_LIST()
2677     }
2678 };
2679 
2680 static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
2681 {
2682     DeviceClass *dc = DEVICE_CLASS(klass);
2683     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2684 
2685     sc->realize      = scsi_hd_realize;
2686     sc->alloc_req    = scsi_new_request;
2687     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
2688     dc->fw_name = "disk";
2689     dc->desc = "virtual SCSI disk";
2690     dc->reset = scsi_disk_reset;
2691     dc->props = scsi_hd_properties;
2692     dc->vmsd  = &vmstate_scsi_disk_state;
2693 }
2694 
2695 static const TypeInfo scsi_hd_info = {
2696     .name          = "scsi-hd",
2697     .parent        = TYPE_SCSI_DEVICE,
2698     .instance_size = sizeof(SCSIDiskState),
2699     .class_init    = scsi_hd_class_initfn,
2700 };
2701 
2702 static Property scsi_cd_properties[] = {
2703     DEFINE_SCSI_DISK_PROPERTIES(),
2704     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
2705     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
2706     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
2707     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
2708                        DEFAULT_MAX_IO_SIZE),
2709     DEFINE_PROP_END_OF_LIST(),
2710 };
2711 
2712 static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
2713 {
2714     DeviceClass *dc = DEVICE_CLASS(klass);
2715     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2716 
2717     sc->realize      = scsi_cd_realize;
2718     sc->alloc_req    = scsi_new_request;
2719     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
2720     dc->fw_name = "disk";
2721     dc->desc = "virtual SCSI CD-ROM";
2722     dc->reset = scsi_disk_reset;
2723     dc->props = scsi_cd_properties;
2724     dc->vmsd  = &vmstate_scsi_disk_state;
2725 }
2726 
2727 static const TypeInfo scsi_cd_info = {
2728     .name          = "scsi-cd",
2729     .parent        = TYPE_SCSI_DEVICE,
2730     .instance_size = sizeof(SCSIDiskState),
2731     .class_init    = scsi_cd_class_initfn,
2732 };
2733 
2734 #ifdef __linux__
2735 static Property scsi_block_properties[] = {
2736     DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk),
2737     DEFINE_PROP_END_OF_LIST(),
2738 };
2739 
2740 static void scsi_block_class_initfn(ObjectClass *klass, void *data)
2741 {
2742     DeviceClass *dc = DEVICE_CLASS(klass);
2743     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2744 
2745     sc->realize      = scsi_block_realize;
2746     sc->alloc_req    = scsi_block_new_request;
2747     sc->parse_cdb    = scsi_block_parse_cdb;
2748     dc->fw_name = "disk";
2749     dc->desc = "SCSI block device passthrough";
2750     dc->reset = scsi_disk_reset;
2751     dc->props = scsi_block_properties;
2752     dc->vmsd  = &vmstate_scsi_disk_state;
2753 }
2754 
2755 static const TypeInfo scsi_block_info = {
2756     .name          = "scsi-block",
2757     .parent        = TYPE_SCSI_DEVICE,
2758     .instance_size = sizeof(SCSIDiskState),
2759     .class_init    = scsi_block_class_initfn,
2760 };
2761 #endif
2762 
2763 static Property scsi_disk_properties[] = {
2764     DEFINE_SCSI_DISK_PROPERTIES(),
2765     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
2766                     SCSI_DISK_F_REMOVABLE, false),
2767     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
2768                     SCSI_DISK_F_DPOFUA, false),
2769     DEFINE_PROP_UINT64("wwn", SCSIDiskState, wwn, 0),
2770     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, port_wwn, 0),
2771     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
2772     DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size,
2773                        DEFAULT_MAX_UNMAP_SIZE),
2774     DEFINE_PROP_UINT64("max_io_size", SCSIDiskState, max_io_size,
2775                        DEFAULT_MAX_IO_SIZE),
2776     DEFINE_PROP_END_OF_LIST(),
2777 };
2778 
2779 static void scsi_disk_class_initfn(ObjectClass *klass, void *data)
2780 {
2781     DeviceClass *dc = DEVICE_CLASS(klass);
2782     SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
2783 
2784     sc->realize      = scsi_disk_realize;
2785     sc->alloc_req    = scsi_new_request;
2786     sc->unit_attention_reported = scsi_disk_unit_attention_reported;
2787     dc->fw_name = "disk";
2788     dc->desc = "virtual SCSI disk or CD-ROM (legacy)";
2789     dc->reset = scsi_disk_reset;
2790     dc->props = scsi_disk_properties;
2791     dc->vmsd  = &vmstate_scsi_disk_state;
2792 }
2793 
2794 static const TypeInfo scsi_disk_info = {
2795     .name          = "scsi-disk",
2796     .parent        = TYPE_SCSI_DEVICE,
2797     .instance_size = sizeof(SCSIDiskState),
2798     .class_init    = scsi_disk_class_initfn,
2799 };
2800 
2801 static void scsi_disk_register_types(void)
2802 {
2803     type_register_static(&scsi_hd_info);
2804     type_register_static(&scsi_cd_info);
2805 #ifdef __linux__
2806     type_register_static(&scsi_block_info);
2807 #endif
2808     type_register_static(&scsi_disk_info);
2809 }
2810 
2811 type_init(scsi_disk_register_types)
2812