1 /* BlockDriver implementation for "raw" format driver 2 * 3 * Copyright (C) 2010-2016 Red Hat, Inc. 4 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com> 5 * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com> 6 * 7 * Author: 8 * Laszlo Ersek <lersek@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to 12 * deal in the Software without restriction, including without limitation the 13 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 14 * sell copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26 * IN THE SOFTWARE. 27 */ 28 29 #include "qemu/osdep.h" 30 #include "block/block_int.h" 31 #include "qapi/error.h" 32 #include "qemu/option.h" 33 34 typedef struct BDRVRawState { 35 uint64_t offset; 36 uint64_t size; 37 bool has_size; 38 } BDRVRawState; 39 40 static const char *const mutable_opts[] = { "offset", "size", NULL }; 41 42 static QemuOptsList raw_runtime_opts = { 43 .name = "raw", 44 .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head), 45 .desc = { 46 { 47 .name = "offset", 48 .type = QEMU_OPT_SIZE, 49 .help = "offset in the disk where the image starts", 50 }, 51 { 52 .name = "size", 53 .type = QEMU_OPT_SIZE, 54 .help = "virtual disk size", 55 }, 56 { /* end of list */ } 57 }, 58 }; 59 60 static QemuOptsList raw_create_opts = { 61 .name = "raw-create-opts", 62 .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), 63 .desc = { 64 { 65 .name = BLOCK_OPT_SIZE, 66 .type = QEMU_OPT_SIZE, 67 .help = "Virtual disk size" 68 }, 69 { /* end of list */ } 70 } 71 }; 72 73 static int raw_read_options(QDict *options, BlockDriverState *bs, 74 BDRVRawState *s, Error **errp) 75 { 76 Error *local_err = NULL; 77 QemuOpts *opts = NULL; 78 int64_t real_size = 0; 79 int ret; 80 81 real_size = bdrv_getlength(bs->file->bs); 82 if (real_size < 0) { 83 error_setg_errno(errp, -real_size, "Could not get image size"); 84 return real_size; 85 } 86 87 opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); 88 qemu_opts_absorb_qdict(opts, options, &local_err); 89 if (local_err) { 90 error_propagate(errp, local_err); 91 ret = -EINVAL; 92 goto end; 93 } 94 95 s->offset = qemu_opt_get_size(opts, "offset", 0); 96 if (s->offset > real_size) { 97 error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than " 98 "size of the containing file (%" PRId64 ")", 99 s->offset, real_size); 100 ret = -EINVAL; 101 goto end; 102 } 103 104 if (qemu_opt_find(opts, "size") != NULL) { 105 s->size = qemu_opt_get_size(opts, "size", 0); 106 s->has_size = true; 107 } else { 108 s->has_size = false; 109 s->size = real_size - s->offset; 110 } 111 112 /* Check size and offset */ 113 if ((real_size - s->offset) < s->size) { 114 error_setg(errp, "The sum of offset (%" PRIu64 ") and size " 115 "(%" PRIu64 ") has to be smaller or equal to the " 116 " actual size of the containing file (%" PRId64 ")", 117 s->offset, s->size, real_size); 118 ret = -EINVAL; 119 goto end; 120 } 121 122 /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding 123 * up and leaking out of the specified area. */ 124 if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) { 125 error_setg(errp, "Specified size is not multiple of %llu", 126 BDRV_SECTOR_SIZE); 127 ret = -EINVAL; 128 goto end; 129 } 130 131 ret = 0; 132 133 end: 134 135 qemu_opts_del(opts); 136 137 return ret; 138 } 139 140 static int raw_reopen_prepare(BDRVReopenState *reopen_state, 141 BlockReopenQueue *queue, Error **errp) 142 { 143 assert(reopen_state != NULL); 144 assert(reopen_state->bs != NULL); 145 146 reopen_state->opaque = g_new0(BDRVRawState, 1); 147 148 return raw_read_options( 149 reopen_state->options, 150 reopen_state->bs, 151 reopen_state->opaque, 152 errp); 153 } 154 155 static void raw_reopen_commit(BDRVReopenState *state) 156 { 157 BDRVRawState *new_s = state->opaque; 158 BDRVRawState *s = state->bs->opaque; 159 160 memcpy(s, new_s, sizeof(BDRVRawState)); 161 162 g_free(state->opaque); 163 state->opaque = NULL; 164 } 165 166 static void raw_reopen_abort(BDRVReopenState *state) 167 { 168 g_free(state->opaque); 169 state->opaque = NULL; 170 } 171 172 /* Check and adjust the offset, against 'offset' and 'size' options. */ 173 static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset, 174 uint64_t bytes, bool is_write) 175 { 176 BDRVRawState *s = bs->opaque; 177 178 if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) { 179 /* There's not enough space for the write, or the read request is 180 * out-of-range. Don't read/write anything to prevent leaking out of 181 * the size specified in options. */ 182 return is_write ? -ENOSPC : -EINVAL; 183 } 184 185 if (*offset > INT64_MAX - s->offset) { 186 return -EINVAL; 187 } 188 *offset += s->offset; 189 190 return 0; 191 } 192 193 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, 194 uint64_t bytes, QEMUIOVector *qiov, 195 int flags) 196 { 197 int ret; 198 199 ret = raw_adjust_offset(bs, &offset, bytes, false); 200 if (ret) { 201 return ret; 202 } 203 204 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 205 return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); 206 } 207 208 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, 209 uint64_t bytes, QEMUIOVector *qiov, 210 int flags) 211 { 212 void *buf = NULL; 213 BlockDriver *drv; 214 QEMUIOVector local_qiov; 215 int ret; 216 217 if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { 218 /* Handling partial writes would be a pain - so we just 219 * require that guests have 512-byte request alignment if 220 * probing occurred */ 221 QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512); 222 QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512); 223 assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE); 224 225 buf = qemu_try_blockalign(bs->file->bs, 512); 226 if (!buf) { 227 ret = -ENOMEM; 228 goto fail; 229 } 230 231 ret = qemu_iovec_to_buf(qiov, 0, buf, 512); 232 if (ret != 512) { 233 ret = -EINVAL; 234 goto fail; 235 } 236 237 drv = bdrv_probe_all(buf, 512, NULL); 238 if (drv != bs->drv) { 239 ret = -EPERM; 240 goto fail; 241 } 242 243 /* Use the checked buffer, a malicious guest might be overwriting its 244 * original buffer in the background. */ 245 qemu_iovec_init(&local_qiov, qiov->niov + 1); 246 qemu_iovec_add(&local_qiov, buf, 512); 247 qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512); 248 qiov = &local_qiov; 249 } 250 251 ret = raw_adjust_offset(bs, &offset, bytes, true); 252 if (ret) { 253 goto fail; 254 } 255 256 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 257 ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); 258 259 fail: 260 if (qiov == &local_qiov) { 261 qemu_iovec_destroy(&local_qiov); 262 } 263 qemu_vfree(buf); 264 return ret; 265 } 266 267 static int coroutine_fn raw_co_block_status(BlockDriverState *bs, 268 bool want_zero, int64_t offset, 269 int64_t bytes, int64_t *pnum, 270 int64_t *map, 271 BlockDriverState **file) 272 { 273 BDRVRawState *s = bs->opaque; 274 *pnum = bytes; 275 *file = bs->file->bs; 276 *map = offset + s->offset; 277 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; 278 } 279 280 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, 281 int64_t offset, int bytes, 282 BdrvRequestFlags flags) 283 { 284 int ret; 285 286 ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); 287 if (ret) { 288 return ret; 289 } 290 return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); 291 } 292 293 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, 294 int64_t offset, int bytes) 295 { 296 int ret; 297 298 ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); 299 if (ret) { 300 return ret; 301 } 302 return bdrv_co_pdiscard(bs->file, offset, bytes); 303 } 304 305 static int64_t raw_getlength(BlockDriverState *bs) 306 { 307 int64_t len; 308 BDRVRawState *s = bs->opaque; 309 310 /* Update size. It should not change unless the file was externally 311 * modified. */ 312 len = bdrv_getlength(bs->file->bs); 313 if (len < 0) { 314 return len; 315 } 316 317 if (len < s->offset) { 318 s->size = 0; 319 } else { 320 if (s->has_size) { 321 /* Try to honour the size */ 322 s->size = MIN(s->size, len - s->offset); 323 } else { 324 s->size = len - s->offset; 325 } 326 } 327 328 return s->size; 329 } 330 331 static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs, 332 Error **errp) 333 { 334 BlockMeasureInfo *info; 335 int64_t required; 336 337 if (in_bs) { 338 required = bdrv_getlength(in_bs); 339 if (required < 0) { 340 error_setg_errno(errp, -required, "Unable to get image size"); 341 return NULL; 342 } 343 } else { 344 required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), 345 BDRV_SECTOR_SIZE); 346 } 347 348 info = g_new(BlockMeasureInfo, 1); 349 info->required = required; 350 351 /* Unallocated sectors count towards the file size in raw images */ 352 info->fully_allocated = info->required; 353 return info; 354 } 355 356 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 357 { 358 return bdrv_get_info(bs->file->bs, bdi); 359 } 360 361 static void raw_refresh_limits(BlockDriverState *bs, Error **errp) 362 { 363 if (bs->probed) { 364 /* To make it easier to protect the first sector, any probed 365 * image is restricted to read-modify-write on sub-sector 366 * operations. */ 367 bs->bl.request_alignment = BDRV_SECTOR_SIZE; 368 } 369 } 370 371 static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, 372 PreallocMode prealloc, Error **errp) 373 { 374 BDRVRawState *s = bs->opaque; 375 376 if (s->has_size) { 377 error_setg(errp, "Cannot resize fixed-size raw disks"); 378 return -ENOTSUP; 379 } 380 381 if (INT64_MAX - offset < s->offset) { 382 error_setg(errp, "Disk size too large for the chosen offset"); 383 return -EINVAL; 384 } 385 386 s->size = offset; 387 offset += s->offset; 388 return bdrv_co_truncate(bs->file, offset, prealloc, errp); 389 } 390 391 static void raw_eject(BlockDriverState *bs, bool eject_flag) 392 { 393 bdrv_eject(bs->file->bs, eject_flag); 394 } 395 396 static void raw_lock_medium(BlockDriverState *bs, bool locked) 397 { 398 bdrv_lock_medium(bs->file->bs, locked); 399 } 400 401 static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 402 { 403 BDRVRawState *s = bs->opaque; 404 if (s->offset || s->has_size) { 405 return -ENOTSUP; 406 } 407 return bdrv_co_ioctl(bs->file->bs, req, buf); 408 } 409 410 static int raw_has_zero_init(BlockDriverState *bs) 411 { 412 return bdrv_has_zero_init(bs->file->bs); 413 } 414 415 static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, 416 Error **errp) 417 { 418 return bdrv_create_file(filename, opts, errp); 419 } 420 421 static int raw_open(BlockDriverState *bs, QDict *options, int flags, 422 Error **errp) 423 { 424 BDRVRawState *s = bs->opaque; 425 int ret; 426 427 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 428 false, errp); 429 if (!bs->file) { 430 return -EINVAL; 431 } 432 433 bs->sg = bs->file->bs->sg; 434 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | 435 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); 436 bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 437 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & 438 bs->file->bs->supported_zero_flags); 439 440 if (bs->probed && !bdrv_is_read_only(bs)) { 441 bdrv_refresh_filename(bs->file->bs); 442 fprintf(stderr, 443 "WARNING: Image format was not specified for '%s' and probing " 444 "guessed raw.\n" 445 " Automatically detecting the format is dangerous for " 446 "raw images, write operations on block 0 will be restricted.\n" 447 " Specify the 'raw' format explicitly to remove the " 448 "restrictions.\n", 449 bs->file->bs->filename); 450 } 451 452 ret = raw_read_options(options, bs, s, errp); 453 if (ret < 0) { 454 return ret; 455 } 456 457 if (bs->sg && (s->offset || s->has_size)) { 458 error_setg(errp, "Cannot use offset/size with SCSI generic devices"); 459 return -EINVAL; 460 } 461 462 return 0; 463 } 464 465 static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) 466 { 467 /* smallest possible positive score so that raw is used if and only if no 468 * other block driver works 469 */ 470 return 1; 471 } 472 473 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 474 { 475 BDRVRawState *s = bs->opaque; 476 int ret; 477 478 ret = bdrv_probe_blocksizes(bs->file->bs, bsz); 479 if (ret < 0) { 480 return ret; 481 } 482 483 if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) { 484 return -ENOTSUP; 485 } 486 487 return 0; 488 } 489 490 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 491 { 492 BDRVRawState *s = bs->opaque; 493 if (s->offset || s->has_size) { 494 return -ENOTSUP; 495 } 496 return bdrv_probe_geometry(bs->file->bs, geo); 497 } 498 499 static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, 500 BdrvChild *src, 501 uint64_t src_offset, 502 BdrvChild *dst, 503 uint64_t dst_offset, 504 uint64_t bytes, 505 BdrvRequestFlags read_flags, 506 BdrvRequestFlags write_flags) 507 { 508 int ret; 509 510 ret = raw_adjust_offset(bs, &src_offset, bytes, false); 511 if (ret) { 512 return ret; 513 } 514 return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset, 515 bytes, read_flags, write_flags); 516 } 517 518 static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, 519 BdrvChild *src, 520 uint64_t src_offset, 521 BdrvChild *dst, 522 uint64_t dst_offset, 523 uint64_t bytes, 524 BdrvRequestFlags read_flags, 525 BdrvRequestFlags write_flags) 526 { 527 int ret; 528 529 ret = raw_adjust_offset(bs, &dst_offset, bytes, true); 530 if (ret) { 531 return ret; 532 } 533 return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes, 534 read_flags, write_flags); 535 } 536 537 static const char *const raw_strong_runtime_opts[] = { 538 "offset", 539 "size", 540 541 NULL 542 }; 543 544 BlockDriver bdrv_raw = { 545 .format_name = "raw", 546 .instance_size = sizeof(BDRVRawState), 547 .bdrv_probe = &raw_probe, 548 .bdrv_reopen_prepare = &raw_reopen_prepare, 549 .bdrv_reopen_commit = &raw_reopen_commit, 550 .bdrv_reopen_abort = &raw_reopen_abort, 551 .bdrv_open = &raw_open, 552 .bdrv_child_perm = bdrv_filter_default_perms, 553 .bdrv_co_create_opts = &raw_co_create_opts, 554 .bdrv_co_preadv = &raw_co_preadv, 555 .bdrv_co_pwritev = &raw_co_pwritev, 556 .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, 557 .bdrv_co_pdiscard = &raw_co_pdiscard, 558 .bdrv_co_block_status = &raw_co_block_status, 559 .bdrv_co_copy_range_from = &raw_co_copy_range_from, 560 .bdrv_co_copy_range_to = &raw_co_copy_range_to, 561 .bdrv_co_truncate = &raw_co_truncate, 562 .bdrv_getlength = &raw_getlength, 563 .has_variable_length = true, 564 .bdrv_measure = &raw_measure, 565 .bdrv_get_info = &raw_get_info, 566 .bdrv_refresh_limits = &raw_refresh_limits, 567 .bdrv_probe_blocksizes = &raw_probe_blocksizes, 568 .bdrv_probe_geometry = &raw_probe_geometry, 569 .bdrv_eject = &raw_eject, 570 .bdrv_lock_medium = &raw_lock_medium, 571 .bdrv_co_ioctl = &raw_co_ioctl, 572 .create_opts = &raw_create_opts, 573 .bdrv_has_zero_init = &raw_has_zero_init, 574 .strong_runtime_opts = raw_strong_runtime_opts, 575 .mutable_opts = mutable_opts, 576 }; 577 578 static void bdrv_raw_init(void) 579 { 580 bdrv_register(&bdrv_raw); 581 } 582 583 block_init(bdrv_raw_init); 584