1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDPA simulator for block device. 4 * 5 * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2021, Red Hat Inc. All rights reserved. 7 * 8 */ 9 10 #include <linux/init.h> 11 #include <linux/module.h> 12 #include <linux/device.h> 13 #include <linux/kernel.h> 14 #include <linux/sched.h> 15 #include <linux/blkdev.h> 16 #include <linux/vringh.h> 17 #include <linux/vdpa.h> 18 #include <uapi/linux/virtio_blk.h> 19 20 #include "vdpa_sim.h" 21 22 #define DRV_VERSION "0.1" 23 #define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>" 24 #define DRV_DESC "vDPA Device Simulator for block device" 25 #define DRV_LICENSE "GPL v2" 26 27 #define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \ 28 (1ULL << VIRTIO_BLK_F_FLUSH) | \ 29 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ 30 (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ 31 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ 32 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ 33 (1ULL << VIRTIO_BLK_F_MQ) | \ 34 (1ULL << VIRTIO_BLK_F_DISCARD) | \ 35 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES)) 36 37 #define VDPASIM_BLK_CAPACITY 0x40000 38 #define VDPASIM_BLK_SIZE_MAX 0x1000 39 #define VDPASIM_BLK_SEG_MAX 32 40 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX 41 42 /* 1 virtqueue, 1 address space, 1 virtqueue group */ 43 #define VDPASIM_BLK_VQ_NUM 1 44 #define VDPASIM_BLK_AS_NUM 1 45 #define VDPASIM_BLK_GROUP_NUM 1 46 47 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; 48 49 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector, 50 u64 num_sectors, u64 max_sectors) 51 { 52 if (start_sector > VDPASIM_BLK_CAPACITY) { 53 dev_dbg(&vdpasim->vdpa.dev, 54 "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n", 55 start_sector, VDPASIM_BLK_CAPACITY); 56 } 57 58 if (num_sectors > max_sectors) { 59 dev_dbg(&vdpasim->vdpa.dev, 60 "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n", 61 num_sectors, max_sectors); 62 return false; 63 } 64 65 if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) { 66 dev_dbg(&vdpasim->vdpa.dev, 67 "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n", 68 start_sector, num_sectors, VDPASIM_BLK_CAPACITY); 69 return false; 70 } 71 72 return true; 73 } 74 75 /* Returns 'true' if the request is handled (with or without an I/O error) 76 * and the status is correctly written in the last byte of the 'in iov', 77 * 'false' otherwise. 78 */ 79 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, 80 struct vdpasim_virtqueue *vq) 81 { 82 size_t pushed = 0, to_pull, to_push; 83 struct virtio_blk_outhdr hdr; 84 bool handled = false; 85 ssize_t bytes; 86 loff_t offset; 87 u64 sector; 88 u8 status; 89 u32 type; 90 int ret; 91 92 ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, 93 &vq->head, GFP_ATOMIC); 94 if (ret != 1) 95 return false; 96 97 if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { 98 dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n", 99 vq->out_iov.used, vq->in_iov.used); 100 goto err; 101 } 102 103 if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) { 104 dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n"); 105 goto err; 106 } 107 108 /* The last byte is the status and we checked if the last iov has 109 * enough room for it. 110 */ 111 to_push = vringh_kiov_length(&vq->in_iov) - 1; 112 113 to_pull = vringh_kiov_length(&vq->out_iov); 114 115 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr, 116 sizeof(hdr)); 117 if (bytes != sizeof(hdr)) { 118 dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n"); 119 goto err; 120 } 121 122 to_pull -= bytes; 123 124 type = vdpasim32_to_cpu(vdpasim, hdr.type); 125 sector = vdpasim64_to_cpu(vdpasim, hdr.sector); 126 offset = sector << SECTOR_SHIFT; 127 status = VIRTIO_BLK_S_OK; 128 129 if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT && 130 sector != 0) { 131 dev_dbg(&vdpasim->vdpa.dev, 132 "sector must be 0 for %u request - sector: 0x%llx\n", 133 type, sector); 134 status = VIRTIO_BLK_S_IOERR; 135 goto err_status; 136 } 137 138 switch (type) { 139 case VIRTIO_BLK_T_IN: 140 if (!vdpasim_blk_check_range(vdpasim, sector, 141 to_push >> SECTOR_SHIFT, 142 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { 143 status = VIRTIO_BLK_S_IOERR; 144 break; 145 } 146 147 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, 148 vdpasim->buffer + offset, 149 to_push); 150 if (bytes < 0) { 151 dev_dbg(&vdpasim->vdpa.dev, 152 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 153 bytes, offset, to_push); 154 status = VIRTIO_BLK_S_IOERR; 155 break; 156 } 157 158 pushed += bytes; 159 break; 160 161 case VIRTIO_BLK_T_OUT: 162 if (!vdpasim_blk_check_range(vdpasim, sector, 163 to_pull >> SECTOR_SHIFT, 164 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { 165 status = VIRTIO_BLK_S_IOERR; 166 break; 167 } 168 169 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, 170 vdpasim->buffer + offset, 171 to_pull); 172 if (bytes < 0) { 173 dev_dbg(&vdpasim->vdpa.dev, 174 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 175 bytes, offset, to_pull); 176 status = VIRTIO_BLK_S_IOERR; 177 break; 178 } 179 break; 180 181 case VIRTIO_BLK_T_GET_ID: 182 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, 183 vdpasim_blk_id, 184 VIRTIO_BLK_ID_BYTES); 185 if (bytes < 0) { 186 dev_dbg(&vdpasim->vdpa.dev, 187 "vringh_iov_push_iotlb() error: %zd\n", bytes); 188 status = VIRTIO_BLK_S_IOERR; 189 break; 190 } 191 192 pushed += bytes; 193 break; 194 195 case VIRTIO_BLK_T_FLUSH: 196 /* nothing to do */ 197 break; 198 199 case VIRTIO_BLK_T_DISCARD: 200 case VIRTIO_BLK_T_WRITE_ZEROES: { 201 struct virtio_blk_discard_write_zeroes range; 202 u32 num_sectors, flags; 203 204 if (to_pull != sizeof(range)) { 205 dev_dbg(&vdpasim->vdpa.dev, 206 "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n", 207 to_pull, sizeof(range)); 208 status = VIRTIO_BLK_S_IOERR; 209 break; 210 } 211 212 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range, 213 to_pull); 214 if (bytes < 0) { 215 dev_dbg(&vdpasim->vdpa.dev, 216 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 217 bytes, offset, to_pull); 218 status = VIRTIO_BLK_S_IOERR; 219 break; 220 } 221 222 sector = le64_to_cpu(range.sector); 223 offset = sector << SECTOR_SHIFT; 224 num_sectors = le32_to_cpu(range.num_sectors); 225 flags = le32_to_cpu(range.flags); 226 227 if (type == VIRTIO_BLK_T_DISCARD && flags != 0) { 228 dev_dbg(&vdpasim->vdpa.dev, 229 "discard unexpected flags set - flags: 0x%x\n", 230 flags); 231 status = VIRTIO_BLK_S_UNSUPP; 232 break; 233 } 234 235 if (type == VIRTIO_BLK_T_WRITE_ZEROES && 236 flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 237 dev_dbg(&vdpasim->vdpa.dev, 238 "write_zeroes unexpected flags set - flags: 0x%x\n", 239 flags); 240 status = VIRTIO_BLK_S_UNSUPP; 241 break; 242 } 243 244 if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors, 245 VDPASIM_BLK_DWZ_MAX_SECTORS)) { 246 status = VIRTIO_BLK_S_IOERR; 247 break; 248 } 249 250 if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 251 memset(vdpasim->buffer + offset, 0, 252 num_sectors << SECTOR_SHIFT); 253 } 254 255 break; 256 } 257 default: 258 dev_dbg(&vdpasim->vdpa.dev, 259 "Unsupported request type %d\n", type); 260 status = VIRTIO_BLK_S_IOERR; 261 break; 262 } 263 264 err_status: 265 /* If some operations fail, we need to skip the remaining bytes 266 * to put the status in the last byte 267 */ 268 if (to_push - pushed > 0) 269 vringh_kiov_advance(&vq->in_iov, to_push - pushed); 270 271 /* Last byte is the status */ 272 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1); 273 if (bytes != 1) 274 goto err; 275 276 pushed += bytes; 277 278 /* Make sure data is wrote before advancing index */ 279 smp_wmb(); 280 281 handled = true; 282 283 err: 284 vringh_complete_iotlb(&vq->vring, vq->head, pushed); 285 286 return handled; 287 } 288 289 static void vdpasim_blk_work(struct work_struct *work) 290 { 291 struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); 292 bool reschedule = false; 293 int i; 294 295 spin_lock(&vdpasim->lock); 296 297 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) 298 goto out; 299 300 if (!vdpasim->running) 301 goto out; 302 303 for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) { 304 struct vdpasim_virtqueue *vq = &vdpasim->vqs[i]; 305 int reqs = 0; 306 307 if (!vq->ready) 308 continue; 309 310 while (vdpasim_blk_handle_req(vdpasim, vq)) { 311 /* Make sure used is visible before rasing the interrupt. */ 312 smp_wmb(); 313 314 local_bh_disable(); 315 if (vringh_need_notify_iotlb(&vq->vring) > 0) 316 vringh_notify(&vq->vring); 317 local_bh_enable(); 318 319 if (++reqs > 4) { 320 reschedule = true; 321 break; 322 } 323 } 324 } 325 out: 326 spin_unlock(&vdpasim->lock); 327 328 if (reschedule) 329 schedule_work(&vdpasim->work); 330 } 331 332 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) 333 { 334 struct virtio_blk_config *blk_config = config; 335 336 memset(config, 0, sizeof(struct virtio_blk_config)); 337 338 blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY); 339 blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX); 340 blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX); 341 blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM); 342 blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1); 343 blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1); 344 blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); 345 /* VIRTIO_BLK_F_DISCARD */ 346 blk_config->discard_sector_alignment = 347 cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); 348 blk_config->max_discard_sectors = 349 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); 350 blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1); 351 /* VIRTIO_BLK_F_WRITE_ZEROES */ 352 blk_config->max_write_zeroes_sectors = 353 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); 354 blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1); 355 356 } 357 358 static void vdpasim_blk_mgmtdev_release(struct device *dev) 359 { 360 } 361 362 static struct device vdpasim_blk_mgmtdev = { 363 .init_name = "vdpasim_blk", 364 .release = vdpasim_blk_mgmtdev_release, 365 }; 366 367 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 368 const struct vdpa_dev_set_config *config) 369 { 370 struct vdpasim_dev_attr dev_attr = {}; 371 struct vdpasim *simdev; 372 int ret; 373 374 dev_attr.mgmt_dev = mdev; 375 dev_attr.name = name; 376 dev_attr.id = VIRTIO_ID_BLOCK; 377 dev_attr.supported_features = VDPASIM_BLK_FEATURES; 378 dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; 379 dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM; 380 dev_attr.nas = VDPASIM_BLK_AS_NUM; 381 dev_attr.config_size = sizeof(struct virtio_blk_config); 382 dev_attr.get_config = vdpasim_blk_get_config; 383 dev_attr.work_fn = vdpasim_blk_work; 384 dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT; 385 386 simdev = vdpasim_create(&dev_attr); 387 if (IS_ERR(simdev)) 388 return PTR_ERR(simdev); 389 390 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM); 391 if (ret) 392 goto put_dev; 393 394 return 0; 395 396 put_dev: 397 put_device(&simdev->vdpa.dev); 398 return ret; 399 } 400 401 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev, 402 struct vdpa_device *dev) 403 { 404 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); 405 406 _vdpa_unregister_device(&simdev->vdpa); 407 } 408 409 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = { 410 .dev_add = vdpasim_blk_dev_add, 411 .dev_del = vdpasim_blk_dev_del 412 }; 413 414 static struct virtio_device_id id_table[] = { 415 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 416 { 0 }, 417 }; 418 419 static struct vdpa_mgmt_dev mgmt_dev = { 420 .device = &vdpasim_blk_mgmtdev, 421 .id_table = id_table, 422 .ops = &vdpasim_blk_mgmtdev_ops, 423 }; 424 425 static int __init vdpasim_blk_init(void) 426 { 427 int ret; 428 429 ret = device_register(&vdpasim_blk_mgmtdev); 430 if (ret) 431 return ret; 432 433 ret = vdpa_mgmtdev_register(&mgmt_dev); 434 if (ret) 435 goto parent_err; 436 437 return 0; 438 439 parent_err: 440 device_unregister(&vdpasim_blk_mgmtdev); 441 return ret; 442 } 443 444 static void __exit vdpasim_blk_exit(void) 445 { 446 vdpa_mgmtdev_unregister(&mgmt_dev); 447 device_unregister(&vdpasim_blk_mgmtdev); 448 } 449 450 module_init(vdpasim_blk_init) 451 module_exit(vdpasim_blk_exit) 452 453 MODULE_VERSION(DRV_VERSION); 454 MODULE_LICENSE(DRV_LICENSE); 455 MODULE_AUTHOR(DRV_AUTHOR); 456 MODULE_DESCRIPTION(DRV_DESC); 457