1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDPA simulator for block device. 4 * 5 * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2021, Red Hat Inc. All rights reserved. 7 * 8 */ 9 10 #include <linux/init.h> 11 #include <linux/module.h> 12 #include <linux/device.h> 13 #include <linux/kernel.h> 14 #include <linux/blkdev.h> 15 #include <linux/vringh.h> 16 #include <linux/vdpa.h> 17 #include <uapi/linux/virtio_blk.h> 18 19 #include "vdpa_sim.h" 20 21 #define DRV_VERSION "0.1" 22 #define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>" 23 #define DRV_DESC "vDPA Device Simulator for block device" 24 #define DRV_LICENSE "GPL v2" 25 26 #define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \ 27 (1ULL << VIRTIO_BLK_F_FLUSH) | \ 28 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ 29 (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ 30 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ 31 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ 32 (1ULL << VIRTIO_BLK_F_MQ) | \ 33 (1ULL << VIRTIO_BLK_F_DISCARD) | \ 34 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES)) 35 36 #define VDPASIM_BLK_CAPACITY 0x40000 37 #define VDPASIM_BLK_SIZE_MAX 0x1000 38 #define VDPASIM_BLK_SEG_MAX 32 39 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX 40 41 /* 1 virtqueue, 1 address space, 1 virtqueue group */ 42 #define VDPASIM_BLK_VQ_NUM 1 43 #define VDPASIM_BLK_AS_NUM 1 44 #define VDPASIM_BLK_GROUP_NUM 1 45 46 struct vdpasim_blk { 47 struct vdpasim vdpasim; 48 void *buffer; 49 bool shared_backend; 50 }; 51 52 static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim) 53 { 54 return container_of(vdpasim, struct vdpasim_blk, vdpasim); 55 } 56 57 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; 58 59 static bool shared_backend; 60 module_param(shared_backend, bool, 0444); 61 MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices"); 62 63 static void *shared_buffer; 64 /* mutex to synchronize shared_buffer access */ 65 static DEFINE_MUTEX(shared_buffer_mutex); 66 67 static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk) 68 { 69 if (blk->shared_backend) 70 mutex_lock(&shared_buffer_mutex); 71 } 72 73 static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk) 74 { 75 if (blk->shared_backend) 76 mutex_unlock(&shared_buffer_mutex); 77 } 78 79 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector, 80 u64 num_sectors, u64 max_sectors) 81 { 82 if (start_sector > VDPASIM_BLK_CAPACITY) { 83 dev_dbg(&vdpasim->vdpa.dev, 84 "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n", 85 start_sector, VDPASIM_BLK_CAPACITY); 86 } 87 88 if (num_sectors > max_sectors) { 89 dev_dbg(&vdpasim->vdpa.dev, 90 "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n", 91 num_sectors, max_sectors); 92 return false; 93 } 94 95 if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) { 96 dev_dbg(&vdpasim->vdpa.dev, 97 "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n", 98 start_sector, num_sectors, VDPASIM_BLK_CAPACITY); 99 return false; 100 } 101 102 return true; 103 } 104 105 /* Returns 'true' if the request is handled (with or without an I/O error) 106 * and the status is correctly written in the last byte of the 'in iov', 107 * 'false' otherwise. 108 */ 109 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, 110 struct vdpasim_virtqueue *vq) 111 { 112 struct vdpasim_blk *blk = sim_to_blk(vdpasim); 113 size_t pushed = 0, to_pull, to_push; 114 struct virtio_blk_outhdr hdr; 115 bool handled = false; 116 ssize_t bytes; 117 loff_t offset; 118 u64 sector; 119 u8 status; 120 u32 type; 121 int ret; 122 123 ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, 124 &vq->head, GFP_ATOMIC); 125 if (ret != 1) 126 return false; 127 128 if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { 129 dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n", 130 vq->out_iov.used, vq->in_iov.used); 131 goto err; 132 } 133 134 if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) { 135 dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n"); 136 goto err; 137 } 138 139 /* The last byte is the status and we checked if the last iov has 140 * enough room for it. 141 */ 142 to_push = vringh_kiov_length(&vq->in_iov) - 1; 143 144 to_pull = vringh_kiov_length(&vq->out_iov); 145 146 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr, 147 sizeof(hdr)); 148 if (bytes != sizeof(hdr)) { 149 dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n"); 150 goto err; 151 } 152 153 to_pull -= bytes; 154 155 type = vdpasim32_to_cpu(vdpasim, hdr.type); 156 sector = vdpasim64_to_cpu(vdpasim, hdr.sector); 157 offset = sector << SECTOR_SHIFT; 158 status = VIRTIO_BLK_S_OK; 159 160 if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT && 161 sector != 0) { 162 dev_dbg(&vdpasim->vdpa.dev, 163 "sector must be 0 for %u request - sector: 0x%llx\n", 164 type, sector); 165 status = VIRTIO_BLK_S_IOERR; 166 goto err_status; 167 } 168 169 switch (type) { 170 case VIRTIO_BLK_T_IN: 171 if (!vdpasim_blk_check_range(vdpasim, sector, 172 to_push >> SECTOR_SHIFT, 173 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { 174 status = VIRTIO_BLK_S_IOERR; 175 break; 176 } 177 178 vdpasim_blk_buffer_lock(blk); 179 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, 180 blk->buffer + offset, to_push); 181 vdpasim_blk_buffer_unlock(blk); 182 if (bytes < 0) { 183 dev_dbg(&vdpasim->vdpa.dev, 184 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 185 bytes, offset, to_push); 186 status = VIRTIO_BLK_S_IOERR; 187 break; 188 } 189 190 pushed += bytes; 191 break; 192 193 case VIRTIO_BLK_T_OUT: 194 if (!vdpasim_blk_check_range(vdpasim, sector, 195 to_pull >> SECTOR_SHIFT, 196 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { 197 status = VIRTIO_BLK_S_IOERR; 198 break; 199 } 200 201 vdpasim_blk_buffer_lock(blk); 202 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, 203 blk->buffer + offset, to_pull); 204 vdpasim_blk_buffer_unlock(blk); 205 if (bytes < 0) { 206 dev_dbg(&vdpasim->vdpa.dev, 207 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 208 bytes, offset, to_pull); 209 status = VIRTIO_BLK_S_IOERR; 210 break; 211 } 212 break; 213 214 case VIRTIO_BLK_T_GET_ID: 215 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, 216 vdpasim_blk_id, 217 VIRTIO_BLK_ID_BYTES); 218 if (bytes < 0) { 219 dev_dbg(&vdpasim->vdpa.dev, 220 "vringh_iov_push_iotlb() error: %zd\n", bytes); 221 status = VIRTIO_BLK_S_IOERR; 222 break; 223 } 224 225 pushed += bytes; 226 break; 227 228 case VIRTIO_BLK_T_FLUSH: 229 /* nothing to do */ 230 break; 231 232 case VIRTIO_BLK_T_DISCARD: 233 case VIRTIO_BLK_T_WRITE_ZEROES: { 234 struct virtio_blk_discard_write_zeroes range; 235 u32 num_sectors, flags; 236 237 if (to_pull != sizeof(range)) { 238 dev_dbg(&vdpasim->vdpa.dev, 239 "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n", 240 to_pull, sizeof(range)); 241 status = VIRTIO_BLK_S_IOERR; 242 break; 243 } 244 245 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range, 246 to_pull); 247 if (bytes < 0) { 248 dev_dbg(&vdpasim->vdpa.dev, 249 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", 250 bytes, offset, to_pull); 251 status = VIRTIO_BLK_S_IOERR; 252 break; 253 } 254 255 sector = le64_to_cpu(range.sector); 256 offset = sector << SECTOR_SHIFT; 257 num_sectors = le32_to_cpu(range.num_sectors); 258 flags = le32_to_cpu(range.flags); 259 260 if (type == VIRTIO_BLK_T_DISCARD && flags != 0) { 261 dev_dbg(&vdpasim->vdpa.dev, 262 "discard unexpected flags set - flags: 0x%x\n", 263 flags); 264 status = VIRTIO_BLK_S_UNSUPP; 265 break; 266 } 267 268 if (type == VIRTIO_BLK_T_WRITE_ZEROES && 269 flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { 270 dev_dbg(&vdpasim->vdpa.dev, 271 "write_zeroes unexpected flags set - flags: 0x%x\n", 272 flags); 273 status = VIRTIO_BLK_S_UNSUPP; 274 break; 275 } 276 277 if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors, 278 VDPASIM_BLK_DWZ_MAX_SECTORS)) { 279 status = VIRTIO_BLK_S_IOERR; 280 break; 281 } 282 283 if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 284 vdpasim_blk_buffer_lock(blk); 285 memset(blk->buffer + offset, 0, 286 num_sectors << SECTOR_SHIFT); 287 vdpasim_blk_buffer_unlock(blk); 288 } 289 290 break; 291 } 292 default: 293 dev_dbg(&vdpasim->vdpa.dev, 294 "Unsupported request type %d\n", type); 295 status = VIRTIO_BLK_S_IOERR; 296 break; 297 } 298 299 err_status: 300 /* If some operations fail, we need to skip the remaining bytes 301 * to put the status in the last byte 302 */ 303 if (to_push - pushed > 0) 304 vringh_kiov_advance(&vq->in_iov, to_push - pushed); 305 306 /* Last byte is the status */ 307 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1); 308 if (bytes != 1) 309 goto err; 310 311 pushed += bytes; 312 313 /* Make sure data is wrote before advancing index */ 314 smp_wmb(); 315 316 handled = true; 317 318 err: 319 vringh_complete_iotlb(&vq->vring, vq->head, pushed); 320 321 return handled; 322 } 323 324 static void vdpasim_blk_work(struct vdpasim *vdpasim) 325 { 326 bool reschedule = false; 327 int i; 328 329 mutex_lock(&vdpasim->mutex); 330 331 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) 332 goto out; 333 334 if (!vdpasim->running) 335 goto out; 336 337 for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) { 338 struct vdpasim_virtqueue *vq = &vdpasim->vqs[i]; 339 int reqs = 0; 340 341 if (!vq->ready) 342 continue; 343 344 while (vdpasim_blk_handle_req(vdpasim, vq)) { 345 /* Make sure used is visible before rasing the interrupt. */ 346 smp_wmb(); 347 348 local_bh_disable(); 349 if (vringh_need_notify_iotlb(&vq->vring) > 0) 350 vringh_notify(&vq->vring); 351 local_bh_enable(); 352 353 if (++reqs > 4) { 354 reschedule = true; 355 break; 356 } 357 } 358 } 359 out: 360 mutex_unlock(&vdpasim->mutex); 361 362 if (reschedule) 363 vdpasim_schedule_work(vdpasim); 364 } 365 366 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) 367 { 368 struct virtio_blk_config *blk_config = config; 369 370 memset(config, 0, sizeof(struct virtio_blk_config)); 371 372 blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY); 373 blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX); 374 blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX); 375 blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM); 376 blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1); 377 blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1); 378 blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); 379 /* VIRTIO_BLK_F_DISCARD */ 380 blk_config->discard_sector_alignment = 381 cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); 382 blk_config->max_discard_sectors = 383 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); 384 blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1); 385 /* VIRTIO_BLK_F_WRITE_ZEROES */ 386 blk_config->max_write_zeroes_sectors = 387 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); 388 blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1); 389 390 } 391 392 static void vdpasim_blk_free(struct vdpasim *vdpasim) 393 { 394 struct vdpasim_blk *blk = sim_to_blk(vdpasim); 395 396 if (!blk->shared_backend) 397 kvfree(blk->buffer); 398 } 399 400 static void vdpasim_blk_mgmtdev_release(struct device *dev) 401 { 402 } 403 404 static struct device vdpasim_blk_mgmtdev = { 405 .init_name = "vdpasim_blk", 406 .release = vdpasim_blk_mgmtdev_release, 407 }; 408 409 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 410 const struct vdpa_dev_set_config *config) 411 { 412 struct vdpasim_dev_attr dev_attr = {}; 413 struct vdpasim_blk *blk; 414 struct vdpasim *simdev; 415 int ret; 416 417 dev_attr.mgmt_dev = mdev; 418 dev_attr.name = name; 419 dev_attr.id = VIRTIO_ID_BLOCK; 420 dev_attr.supported_features = VDPASIM_BLK_FEATURES; 421 dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; 422 dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM; 423 dev_attr.nas = VDPASIM_BLK_AS_NUM; 424 dev_attr.alloc_size = sizeof(struct vdpasim_blk); 425 dev_attr.config_size = sizeof(struct virtio_blk_config); 426 dev_attr.get_config = vdpasim_blk_get_config; 427 dev_attr.work_fn = vdpasim_blk_work; 428 dev_attr.free = vdpasim_blk_free; 429 430 simdev = vdpasim_create(&dev_attr, config); 431 if (IS_ERR(simdev)) 432 return PTR_ERR(simdev); 433 434 blk = sim_to_blk(simdev); 435 blk->shared_backend = shared_backend; 436 437 if (blk->shared_backend) { 438 blk->buffer = shared_buffer; 439 } else { 440 blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, 441 GFP_KERNEL); 442 if (!blk->buffer) { 443 ret = -ENOMEM; 444 goto put_dev; 445 } 446 } 447 448 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM); 449 if (ret) 450 goto put_dev; 451 452 return 0; 453 454 put_dev: 455 put_device(&simdev->vdpa.dev); 456 return ret; 457 } 458 459 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev, 460 struct vdpa_device *dev) 461 { 462 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); 463 464 _vdpa_unregister_device(&simdev->vdpa); 465 } 466 467 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = { 468 .dev_add = vdpasim_blk_dev_add, 469 .dev_del = vdpasim_blk_dev_del 470 }; 471 472 static struct virtio_device_id id_table[] = { 473 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 474 { 0 }, 475 }; 476 477 static struct vdpa_mgmt_dev mgmt_dev = { 478 .device = &vdpasim_blk_mgmtdev, 479 .id_table = id_table, 480 .ops = &vdpasim_blk_mgmtdev_ops, 481 }; 482 483 static int __init vdpasim_blk_init(void) 484 { 485 int ret; 486 487 ret = device_register(&vdpasim_blk_mgmtdev); 488 if (ret) { 489 put_device(&vdpasim_blk_mgmtdev); 490 return ret; 491 } 492 493 ret = vdpa_mgmtdev_register(&mgmt_dev); 494 if (ret) 495 goto parent_err; 496 497 if (shared_backend) { 498 shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, 499 GFP_KERNEL); 500 if (!shared_buffer) { 501 ret = -ENOMEM; 502 goto mgmt_dev_err; 503 } 504 } 505 506 return 0; 507 mgmt_dev_err: 508 vdpa_mgmtdev_unregister(&mgmt_dev); 509 parent_err: 510 device_unregister(&vdpasim_blk_mgmtdev); 511 return ret; 512 } 513 514 static void __exit vdpasim_blk_exit(void) 515 { 516 kvfree(shared_buffer); 517 vdpa_mgmtdev_unregister(&mgmt_dev); 518 device_unregister(&vdpasim_blk_mgmtdev); 519 } 520 521 module_init(vdpasim_blk_init) 522 module_exit(vdpasim_blk_exit) 523 524 MODULE_VERSION(DRV_VERSION); 525 MODULE_LICENSE(DRV_LICENSE); 526 MODULE_AUTHOR(DRV_AUTHOR); 527 MODULE_DESCRIPTION(DRV_DESC); 528