15c368029SXie Yongji /*
25c368029SXie Yongji * Handler for virtio-blk I/O
35c368029SXie Yongji *
45c368029SXie Yongji * Copyright (c) 2020 Red Hat, Inc.
55c368029SXie Yongji * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
65c368029SXie Yongji *
75c368029SXie Yongji * Author:
85c368029SXie Yongji * Coiby Xu <coiby.xu@gmail.com>
95c368029SXie Yongji * Xie Yongji <xieyongji@bytedance.com>
105c368029SXie Yongji *
115c368029SXie Yongji * This work is licensed under the terms of the GNU GPL, version 2 or
125c368029SXie Yongji * later. See the COPYING file in the top-level directory.
135c368029SXie Yongji */
145c368029SXie Yongji
155c368029SXie Yongji #include "qemu/osdep.h"
165c368029SXie Yongji #include "qemu/error-report.h"
175c368029SXie Yongji #include "virtio-blk-handler.h"
185c368029SXie Yongji
195c368029SXie Yongji #include "standard-headers/linux/virtio_blk.h"
205c368029SXie Yongji
215c368029SXie Yongji struct virtio_blk_inhdr {
225c368029SXie Yongji unsigned char status;
235c368029SXie Yongji };
245c368029SXie Yongji
25*d8fbf9aaSKevin Wolf static bool coroutine_fn
virtio_blk_sect_range_ok(BlockBackend * blk,uint32_t block_size,uint64_t sector,size_t size)26*d8fbf9aaSKevin Wolf virtio_blk_sect_range_ok(BlockBackend *blk, uint32_t block_size,
275c368029SXie Yongji uint64_t sector, size_t size)
285c368029SXie Yongji {
295c368029SXie Yongji uint64_t nb_sectors;
305c368029SXie Yongji uint64_t total_sectors;
315c368029SXie Yongji
325c368029SXie Yongji if (size % VIRTIO_BLK_SECTOR_SIZE) {
335c368029SXie Yongji return false;
345c368029SXie Yongji }
355c368029SXie Yongji
365c368029SXie Yongji nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
375c368029SXie Yongji
385c368029SXie Yongji QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
395c368029SXie Yongji if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
405c368029SXie Yongji return false;
415c368029SXie Yongji }
425c368029SXie Yongji if ((sector << VIRTIO_BLK_SECTOR_BITS) % block_size) {
435c368029SXie Yongji return false;
445c368029SXie Yongji }
45*d8fbf9aaSKevin Wolf blk_co_get_geometry(blk, &total_sectors);
465c368029SXie Yongji if (sector > total_sectors || nb_sectors > total_sectors - sector) {
475c368029SXie Yongji return false;
485c368029SXie Yongji }
495c368029SXie Yongji return true;
505c368029SXie Yongji }
515c368029SXie Yongji
525c368029SXie Yongji static int coroutine_fn
virtio_blk_discard_write_zeroes(VirtioBlkHandler * handler,struct iovec * iov,uint32_t iovcnt,uint32_t type)535c368029SXie Yongji virtio_blk_discard_write_zeroes(VirtioBlkHandler *handler, struct iovec *iov,
545c368029SXie Yongji uint32_t iovcnt, uint32_t type)
555c368029SXie Yongji {
565c368029SXie Yongji BlockBackend *blk = handler->blk;
575c368029SXie Yongji struct virtio_blk_discard_write_zeroes desc;
585c368029SXie Yongji ssize_t size;
595c368029SXie Yongji uint64_t sector;
605c368029SXie Yongji uint32_t num_sectors;
615c368029SXie Yongji uint32_t max_sectors;
625c368029SXie Yongji uint32_t flags;
635c368029SXie Yongji int bytes;
645c368029SXie Yongji
655c368029SXie Yongji /* Only one desc is currently supported */
665c368029SXie Yongji if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
675c368029SXie Yongji return VIRTIO_BLK_S_UNSUPP;
685c368029SXie Yongji }
695c368029SXie Yongji
705c368029SXie Yongji size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
715c368029SXie Yongji if (unlikely(size != sizeof(desc))) {
725c368029SXie Yongji error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
735c368029SXie Yongji return VIRTIO_BLK_S_IOERR;
745c368029SXie Yongji }
755c368029SXie Yongji
765c368029SXie Yongji sector = le64_to_cpu(desc.sector);
775c368029SXie Yongji num_sectors = le32_to_cpu(desc.num_sectors);
785c368029SXie Yongji flags = le32_to_cpu(desc.flags);
795c368029SXie Yongji max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
805c368029SXie Yongji VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS :
815c368029SXie Yongji VIRTIO_BLK_MAX_DISCARD_SECTORS;
825c368029SXie Yongji
835c368029SXie Yongji /* This check ensures that 'bytes' fits in an int */
845c368029SXie Yongji if (unlikely(num_sectors > max_sectors)) {
855c368029SXie Yongji return VIRTIO_BLK_S_IOERR;
865c368029SXie Yongji }
875c368029SXie Yongji
885c368029SXie Yongji bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
895c368029SXie Yongji
905c368029SXie Yongji if (unlikely(!virtio_blk_sect_range_ok(blk, handler->logical_block_size,
915c368029SXie Yongji sector, bytes))) {
925c368029SXie Yongji return VIRTIO_BLK_S_IOERR;
935c368029SXie Yongji }
945c368029SXie Yongji
955c368029SXie Yongji /*
965c368029SXie Yongji * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
975c368029SXie Yongji * and write zeroes commands if any unknown flag is set.
985c368029SXie Yongji */
995c368029SXie Yongji if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
1005c368029SXie Yongji return VIRTIO_BLK_S_UNSUPP;
1015c368029SXie Yongji }
1025c368029SXie Yongji
1035c368029SXie Yongji if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
1045c368029SXie Yongji int blk_flags = 0;
1055c368029SXie Yongji
1065c368029SXie Yongji if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
1075c368029SXie Yongji blk_flags |= BDRV_REQ_MAY_UNMAP;
1085c368029SXie Yongji }
1095c368029SXie Yongji
1105c368029SXie Yongji if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
1115c368029SXie Yongji bytes, blk_flags) == 0) {
1125c368029SXie Yongji return VIRTIO_BLK_S_OK;
1135c368029SXie Yongji }
1145c368029SXie Yongji } else if (type == VIRTIO_BLK_T_DISCARD) {
1155c368029SXie Yongji /*
1165c368029SXie Yongji * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
1175c368029SXie Yongji * discard commands if the unmap flag is set.
1185c368029SXie Yongji */
1195c368029SXie Yongji if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
1205c368029SXie Yongji return VIRTIO_BLK_S_UNSUPP;
1215c368029SXie Yongji }
1225c368029SXie Yongji
1235c368029SXie Yongji if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
1245c368029SXie Yongji bytes) == 0) {
1255c368029SXie Yongji return VIRTIO_BLK_S_OK;
1265c368029SXie Yongji }
1275c368029SXie Yongji }
1285c368029SXie Yongji
1295c368029SXie Yongji return VIRTIO_BLK_S_IOERR;
1305c368029SXie Yongji }
1315c368029SXie Yongji
virtio_blk_process_req(VirtioBlkHandler * handler,struct iovec * in_iov,struct iovec * out_iov,unsigned int in_num,unsigned int out_num)1325c368029SXie Yongji int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
1335c368029SXie Yongji struct iovec *in_iov,
1345c368029SXie Yongji struct iovec *out_iov,
1355c368029SXie Yongji unsigned int in_num,
1365c368029SXie Yongji unsigned int out_num)
1375c368029SXie Yongji {
1385c368029SXie Yongji BlockBackend *blk = handler->blk;
1395c368029SXie Yongji struct virtio_blk_inhdr *in;
1405c368029SXie Yongji struct virtio_blk_outhdr out;
1415c368029SXie Yongji uint32_t type;
1425c368029SXie Yongji int in_len;
1435c368029SXie Yongji
1445c368029SXie Yongji if (out_num < 1 || in_num < 1) {
1455c368029SXie Yongji error_report("virtio-blk request missing headers");
1465c368029SXie Yongji return -EINVAL;
1475c368029SXie Yongji }
1485c368029SXie Yongji
1495c368029SXie Yongji if (unlikely(iov_to_buf(out_iov, out_num, 0, &out,
1505c368029SXie Yongji sizeof(out)) != sizeof(out))) {
1515c368029SXie Yongji error_report("virtio-blk request outhdr too short");
1525c368029SXie Yongji return -EINVAL;
1535c368029SXie Yongji }
1545c368029SXie Yongji
1555c368029SXie Yongji iov_discard_front(&out_iov, &out_num, sizeof(out));
1565c368029SXie Yongji
1575c368029SXie Yongji if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
1585c368029SXie Yongji error_report("virtio-blk request inhdr too short");
1595c368029SXie Yongji return -EINVAL;
1605c368029SXie Yongji }
1615c368029SXie Yongji
1625c368029SXie Yongji /* We always touch the last byte, so just see how big in_iov is. */
1635c368029SXie Yongji in_len = iov_size(in_iov, in_num);
1645c368029SXie Yongji in = (void *)in_iov[in_num - 1].iov_base
1655c368029SXie Yongji + in_iov[in_num - 1].iov_len
1665c368029SXie Yongji - sizeof(struct virtio_blk_inhdr);
1675c368029SXie Yongji iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
1685c368029SXie Yongji
1695c368029SXie Yongji type = le32_to_cpu(out.type);
1705c368029SXie Yongji switch (type & ~VIRTIO_BLK_T_BARRIER) {
1715c368029SXie Yongji case VIRTIO_BLK_T_IN:
1725c368029SXie Yongji case VIRTIO_BLK_T_OUT: {
1735c368029SXie Yongji QEMUIOVector qiov;
1745c368029SXie Yongji int64_t offset;
1755c368029SXie Yongji ssize_t ret = 0;
1765c368029SXie Yongji bool is_write = type & VIRTIO_BLK_T_OUT;
1775c368029SXie Yongji int64_t sector_num = le64_to_cpu(out.sector);
1785c368029SXie Yongji
1795c368029SXie Yongji if (is_write && !handler->writable) {
1805c368029SXie Yongji in->status = VIRTIO_BLK_S_IOERR;
1815c368029SXie Yongji break;
1825c368029SXie Yongji }
1835c368029SXie Yongji
1845c368029SXie Yongji if (is_write) {
1855c368029SXie Yongji qemu_iovec_init_external(&qiov, out_iov, out_num);
1865c368029SXie Yongji } else {
1875c368029SXie Yongji qemu_iovec_init_external(&qiov, in_iov, in_num);
1885c368029SXie Yongji }
1895c368029SXie Yongji
1905c368029SXie Yongji if (unlikely(!virtio_blk_sect_range_ok(blk,
1915c368029SXie Yongji handler->logical_block_size,
1925c368029SXie Yongji sector_num, qiov.size))) {
1935c368029SXie Yongji in->status = VIRTIO_BLK_S_IOERR;
1945c368029SXie Yongji break;
1955c368029SXie Yongji }
1965c368029SXie Yongji
1975c368029SXie Yongji offset = sector_num << VIRTIO_BLK_SECTOR_BITS;
1985c368029SXie Yongji
1995c368029SXie Yongji if (is_write) {
2005c368029SXie Yongji ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
2015c368029SXie Yongji } else {
2025c368029SXie Yongji ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
2035c368029SXie Yongji }
2045c368029SXie Yongji if (ret >= 0) {
2055c368029SXie Yongji in->status = VIRTIO_BLK_S_OK;
2065c368029SXie Yongji } else {
2075c368029SXie Yongji in->status = VIRTIO_BLK_S_IOERR;
2085c368029SXie Yongji }
2095c368029SXie Yongji break;
2105c368029SXie Yongji }
2115c368029SXie Yongji case VIRTIO_BLK_T_FLUSH:
2125c368029SXie Yongji if (blk_co_flush(blk) == 0) {
2135c368029SXie Yongji in->status = VIRTIO_BLK_S_OK;
2145c368029SXie Yongji } else {
2155c368029SXie Yongji in->status = VIRTIO_BLK_S_IOERR;
2165c368029SXie Yongji }
2175c368029SXie Yongji break;
2185c368029SXie Yongji case VIRTIO_BLK_T_GET_ID: {
2195c368029SXie Yongji size_t size = MIN(strlen(handler->serial) + 1,
2205c368029SXie Yongji MIN(iov_size(in_iov, in_num),
2215c368029SXie Yongji VIRTIO_BLK_ID_BYTES));
2225c368029SXie Yongji iov_from_buf(in_iov, in_num, 0, handler->serial, size);
2235c368029SXie Yongji in->status = VIRTIO_BLK_S_OK;
2245c368029SXie Yongji break;
2255c368029SXie Yongji }
2265c368029SXie Yongji case VIRTIO_BLK_T_DISCARD:
2275c368029SXie Yongji case VIRTIO_BLK_T_WRITE_ZEROES:
2285c368029SXie Yongji if (!handler->writable) {
2295c368029SXie Yongji in->status = VIRTIO_BLK_S_IOERR;
2305c368029SXie Yongji break;
2315c368029SXie Yongji }
2325c368029SXie Yongji in->status = virtio_blk_discard_write_zeroes(handler, out_iov,
2335c368029SXie Yongji out_num, type);
2345c368029SXie Yongji break;
2355c368029SXie Yongji default:
2365c368029SXie Yongji in->status = VIRTIO_BLK_S_UNSUPP;
2375c368029SXie Yongji break;
2385c368029SXie Yongji }
2395c368029SXie Yongji
2405c368029SXie Yongji return in_len;
2415c368029SXie Yongji }
242