15c368029SXie Yongji /*
25c368029SXie Yongji  * Handler for virtio-blk I/O
35c368029SXie Yongji  *
45c368029SXie Yongji  * Copyright (c) 2020 Red Hat, Inc.
55c368029SXie Yongji  * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
65c368029SXie Yongji  *
75c368029SXie Yongji  * Author:
85c368029SXie Yongji  *   Coiby Xu <coiby.xu@gmail.com>
95c368029SXie Yongji  *   Xie Yongji <xieyongji@bytedance.com>
105c368029SXie Yongji  *
115c368029SXie Yongji  * This work is licensed under the terms of the GNU GPL, version 2 or
125c368029SXie Yongji  * later.  See the COPYING file in the top-level directory.
135c368029SXie Yongji  */
145c368029SXie Yongji 
155c368029SXie Yongji #include "qemu/osdep.h"
165c368029SXie Yongji #include "qemu/error-report.h"
175c368029SXie Yongji #include "virtio-blk-handler.h"
185c368029SXie Yongji 
195c368029SXie Yongji #include "standard-headers/linux/virtio_blk.h"
205c368029SXie Yongji 
215c368029SXie Yongji struct virtio_blk_inhdr {
225c368029SXie Yongji     unsigned char status;
235c368029SXie Yongji };
245c368029SXie Yongji 
25*d8fbf9aaSKevin Wolf static bool coroutine_fn
virtio_blk_sect_range_ok(BlockBackend * blk,uint32_t block_size,uint64_t sector,size_t size)26*d8fbf9aaSKevin Wolf virtio_blk_sect_range_ok(BlockBackend *blk, uint32_t block_size,
275c368029SXie Yongji                          uint64_t sector, size_t size)
285c368029SXie Yongji {
295c368029SXie Yongji     uint64_t nb_sectors;
305c368029SXie Yongji     uint64_t total_sectors;
315c368029SXie Yongji 
325c368029SXie Yongji     if (size % VIRTIO_BLK_SECTOR_SIZE) {
335c368029SXie Yongji         return false;
345c368029SXie Yongji     }
355c368029SXie Yongji 
365c368029SXie Yongji     nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
375c368029SXie Yongji 
385c368029SXie Yongji     QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
395c368029SXie Yongji     if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
405c368029SXie Yongji         return false;
415c368029SXie Yongji     }
425c368029SXie Yongji     if ((sector << VIRTIO_BLK_SECTOR_BITS) % block_size) {
435c368029SXie Yongji         return false;
445c368029SXie Yongji     }
45*d8fbf9aaSKevin Wolf     blk_co_get_geometry(blk, &total_sectors);
465c368029SXie Yongji     if (sector > total_sectors || nb_sectors > total_sectors - sector) {
475c368029SXie Yongji         return false;
485c368029SXie Yongji     }
495c368029SXie Yongji     return true;
505c368029SXie Yongji }
515c368029SXie Yongji 
525c368029SXie Yongji static int coroutine_fn
virtio_blk_discard_write_zeroes(VirtioBlkHandler * handler,struct iovec * iov,uint32_t iovcnt,uint32_t type)535c368029SXie Yongji virtio_blk_discard_write_zeroes(VirtioBlkHandler *handler, struct iovec *iov,
545c368029SXie Yongji                                 uint32_t iovcnt, uint32_t type)
555c368029SXie Yongji {
565c368029SXie Yongji     BlockBackend *blk = handler->blk;
575c368029SXie Yongji     struct virtio_blk_discard_write_zeroes desc;
585c368029SXie Yongji     ssize_t size;
595c368029SXie Yongji     uint64_t sector;
605c368029SXie Yongji     uint32_t num_sectors;
615c368029SXie Yongji     uint32_t max_sectors;
625c368029SXie Yongji     uint32_t flags;
635c368029SXie Yongji     int bytes;
645c368029SXie Yongji 
655c368029SXie Yongji     /* Only one desc is currently supported */
665c368029SXie Yongji     if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
675c368029SXie Yongji         return VIRTIO_BLK_S_UNSUPP;
685c368029SXie Yongji     }
695c368029SXie Yongji 
705c368029SXie Yongji     size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
715c368029SXie Yongji     if (unlikely(size != sizeof(desc))) {
725c368029SXie Yongji         error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
735c368029SXie Yongji         return VIRTIO_BLK_S_IOERR;
745c368029SXie Yongji     }
755c368029SXie Yongji 
765c368029SXie Yongji     sector = le64_to_cpu(desc.sector);
775c368029SXie Yongji     num_sectors = le32_to_cpu(desc.num_sectors);
785c368029SXie Yongji     flags = le32_to_cpu(desc.flags);
795c368029SXie Yongji     max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
805c368029SXie Yongji                   VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS :
815c368029SXie Yongji                   VIRTIO_BLK_MAX_DISCARD_SECTORS;
825c368029SXie Yongji 
835c368029SXie Yongji     /* This check ensures that 'bytes' fits in an int */
845c368029SXie Yongji     if (unlikely(num_sectors > max_sectors)) {
855c368029SXie Yongji         return VIRTIO_BLK_S_IOERR;
865c368029SXie Yongji     }
875c368029SXie Yongji 
885c368029SXie Yongji     bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
895c368029SXie Yongji 
905c368029SXie Yongji     if (unlikely(!virtio_blk_sect_range_ok(blk, handler->logical_block_size,
915c368029SXie Yongji                                            sector, bytes))) {
925c368029SXie Yongji         return VIRTIO_BLK_S_IOERR;
935c368029SXie Yongji     }
945c368029SXie Yongji 
955c368029SXie Yongji     /*
965c368029SXie Yongji      * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
975c368029SXie Yongji      * and write zeroes commands if any unknown flag is set.
985c368029SXie Yongji      */
995c368029SXie Yongji     if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
1005c368029SXie Yongji         return VIRTIO_BLK_S_UNSUPP;
1015c368029SXie Yongji     }
1025c368029SXie Yongji 
1035c368029SXie Yongji     if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
1045c368029SXie Yongji         int blk_flags = 0;
1055c368029SXie Yongji 
1065c368029SXie Yongji         if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
1075c368029SXie Yongji             blk_flags |= BDRV_REQ_MAY_UNMAP;
1085c368029SXie Yongji         }
1095c368029SXie Yongji 
1105c368029SXie Yongji         if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
1115c368029SXie Yongji                                  bytes, blk_flags) == 0) {
1125c368029SXie Yongji             return VIRTIO_BLK_S_OK;
1135c368029SXie Yongji         }
1145c368029SXie Yongji     } else if (type == VIRTIO_BLK_T_DISCARD) {
1155c368029SXie Yongji         /*
1165c368029SXie Yongji          * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
1175c368029SXie Yongji          * discard commands if the unmap flag is set.
1185c368029SXie Yongji          */
1195c368029SXie Yongji         if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
1205c368029SXie Yongji             return VIRTIO_BLK_S_UNSUPP;
1215c368029SXie Yongji         }
1225c368029SXie Yongji 
1235c368029SXie Yongji         if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
1245c368029SXie Yongji                             bytes) == 0) {
1255c368029SXie Yongji             return VIRTIO_BLK_S_OK;
1265c368029SXie Yongji         }
1275c368029SXie Yongji     }
1285c368029SXie Yongji 
1295c368029SXie Yongji     return VIRTIO_BLK_S_IOERR;
1305c368029SXie Yongji }
1315c368029SXie Yongji 
virtio_blk_process_req(VirtioBlkHandler * handler,struct iovec * in_iov,struct iovec * out_iov,unsigned int in_num,unsigned int out_num)1325c368029SXie Yongji int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
1335c368029SXie Yongji                                         struct iovec *in_iov,
1345c368029SXie Yongji                                         struct iovec *out_iov,
1355c368029SXie Yongji                                         unsigned int in_num,
1365c368029SXie Yongji                                         unsigned int out_num)
1375c368029SXie Yongji {
1385c368029SXie Yongji     BlockBackend *blk = handler->blk;
1395c368029SXie Yongji     struct virtio_blk_inhdr *in;
1405c368029SXie Yongji     struct virtio_blk_outhdr out;
1415c368029SXie Yongji     uint32_t type;
1425c368029SXie Yongji     int in_len;
1435c368029SXie Yongji 
1445c368029SXie Yongji     if (out_num < 1 || in_num < 1) {
1455c368029SXie Yongji         error_report("virtio-blk request missing headers");
1465c368029SXie Yongji         return -EINVAL;
1475c368029SXie Yongji     }
1485c368029SXie Yongji 
1495c368029SXie Yongji     if (unlikely(iov_to_buf(out_iov, out_num, 0, &out,
1505c368029SXie Yongji                             sizeof(out)) != sizeof(out))) {
1515c368029SXie Yongji         error_report("virtio-blk request outhdr too short");
1525c368029SXie Yongji         return -EINVAL;
1535c368029SXie Yongji     }
1545c368029SXie Yongji 
1555c368029SXie Yongji     iov_discard_front(&out_iov, &out_num, sizeof(out));
1565c368029SXie Yongji 
1575c368029SXie Yongji     if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
1585c368029SXie Yongji         error_report("virtio-blk request inhdr too short");
1595c368029SXie Yongji         return -EINVAL;
1605c368029SXie Yongji     }
1615c368029SXie Yongji 
1625c368029SXie Yongji     /* We always touch the last byte, so just see how big in_iov is. */
1635c368029SXie Yongji     in_len = iov_size(in_iov, in_num);
1645c368029SXie Yongji     in = (void *)in_iov[in_num - 1].iov_base
1655c368029SXie Yongji                  + in_iov[in_num - 1].iov_len
1665c368029SXie Yongji                  - sizeof(struct virtio_blk_inhdr);
1675c368029SXie Yongji     iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
1685c368029SXie Yongji 
1695c368029SXie Yongji     type = le32_to_cpu(out.type);
1705c368029SXie Yongji     switch (type & ~VIRTIO_BLK_T_BARRIER) {
1715c368029SXie Yongji     case VIRTIO_BLK_T_IN:
1725c368029SXie Yongji     case VIRTIO_BLK_T_OUT: {
1735c368029SXie Yongji         QEMUIOVector qiov;
1745c368029SXie Yongji         int64_t offset;
1755c368029SXie Yongji         ssize_t ret = 0;
1765c368029SXie Yongji         bool is_write = type & VIRTIO_BLK_T_OUT;
1775c368029SXie Yongji         int64_t sector_num = le64_to_cpu(out.sector);
1785c368029SXie Yongji 
1795c368029SXie Yongji         if (is_write && !handler->writable) {
1805c368029SXie Yongji             in->status = VIRTIO_BLK_S_IOERR;
1815c368029SXie Yongji             break;
1825c368029SXie Yongji         }
1835c368029SXie Yongji 
1845c368029SXie Yongji         if (is_write) {
1855c368029SXie Yongji             qemu_iovec_init_external(&qiov, out_iov, out_num);
1865c368029SXie Yongji         } else {
1875c368029SXie Yongji             qemu_iovec_init_external(&qiov, in_iov, in_num);
1885c368029SXie Yongji         }
1895c368029SXie Yongji 
1905c368029SXie Yongji         if (unlikely(!virtio_blk_sect_range_ok(blk,
1915c368029SXie Yongji                                                handler->logical_block_size,
1925c368029SXie Yongji                                                sector_num, qiov.size))) {
1935c368029SXie Yongji             in->status = VIRTIO_BLK_S_IOERR;
1945c368029SXie Yongji             break;
1955c368029SXie Yongji         }
1965c368029SXie Yongji 
1975c368029SXie Yongji         offset = sector_num << VIRTIO_BLK_SECTOR_BITS;
1985c368029SXie Yongji 
1995c368029SXie Yongji         if (is_write) {
2005c368029SXie Yongji             ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
2015c368029SXie Yongji         } else {
2025c368029SXie Yongji             ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
2035c368029SXie Yongji         }
2045c368029SXie Yongji         if (ret >= 0) {
2055c368029SXie Yongji             in->status = VIRTIO_BLK_S_OK;
2065c368029SXie Yongji         } else {
2075c368029SXie Yongji             in->status = VIRTIO_BLK_S_IOERR;
2085c368029SXie Yongji         }
2095c368029SXie Yongji         break;
2105c368029SXie Yongji     }
2115c368029SXie Yongji     case VIRTIO_BLK_T_FLUSH:
2125c368029SXie Yongji         if (blk_co_flush(blk) == 0) {
2135c368029SXie Yongji             in->status = VIRTIO_BLK_S_OK;
2145c368029SXie Yongji         } else {
2155c368029SXie Yongji             in->status = VIRTIO_BLK_S_IOERR;
2165c368029SXie Yongji         }
2175c368029SXie Yongji         break;
2185c368029SXie Yongji     case VIRTIO_BLK_T_GET_ID: {
2195c368029SXie Yongji         size_t size = MIN(strlen(handler->serial) + 1,
2205c368029SXie Yongji                           MIN(iov_size(in_iov, in_num),
2215c368029SXie Yongji                               VIRTIO_BLK_ID_BYTES));
2225c368029SXie Yongji         iov_from_buf(in_iov, in_num, 0, handler->serial, size);
2235c368029SXie Yongji         in->status = VIRTIO_BLK_S_OK;
2245c368029SXie Yongji         break;
2255c368029SXie Yongji     }
2265c368029SXie Yongji     case VIRTIO_BLK_T_DISCARD:
2275c368029SXie Yongji     case VIRTIO_BLK_T_WRITE_ZEROES:
2285c368029SXie Yongji         if (!handler->writable) {
2295c368029SXie Yongji             in->status = VIRTIO_BLK_S_IOERR;
2305c368029SXie Yongji             break;
2315c368029SXie Yongji         }
2325c368029SXie Yongji         in->status = virtio_blk_discard_write_zeroes(handler, out_iov,
2335c368029SXie Yongji                                                      out_num, type);
2345c368029SXie Yongji         break;
2355c368029SXie Yongji     default:
2365c368029SXie Yongji         in->status = VIRTIO_BLK_S_UNSUPP;
2375c368029SXie Yongji         break;
2385c368029SXie Yongji     }
2395c368029SXie Yongji 
2405c368029SXie Yongji     return in_len;
2415c368029SXie Yongji }
242