1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2011 IBM Corp. 5 * Copyright (c) 2012 Red Hat, Inc. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu-common.h" 28 #include "trace.h" 29 #include "block/block.h" 30 #include "block/blockjob.h" 31 #include "block/block_int.h" 32 #include "sysemu/block-backend.h" 33 #include "qapi/qmp/qerror.h" 34 #include "qapi/qmp/qjson.h" 35 #include "qemu/coroutine.h" 36 #include "qmp-commands.h" 37 #include "qemu/timer.h" 38 #include "qapi-event.h" 39 40 /* Transactional group of block jobs */ 41 struct BlockJobTxn { 42 43 /* Is this txn being cancelled? */ 44 bool aborting; 45 46 /* List of jobs */ 47 QLIST_HEAD(, BlockJob) jobs; 48 49 /* Reference count */ 50 int refcnt; 51 }; 52 53 static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); 54 55 BlockJob *block_job_next(BlockJob *job) 56 { 57 if (!job) { 58 return QLIST_FIRST(&block_jobs); 59 } 60 return QLIST_NEXT(job, job_list); 61 } 62 63 /* Normally the job runs in its BlockBackend's AioContext. The exception is 64 * block_job_defer_to_main_loop() where it runs in the QEMU main loop. Code 65 * that supports both cases uses this helper function. 66 */ 67 static AioContext *block_job_get_aio_context(BlockJob *job) 68 { 69 return job->deferred_to_main_loop ? 70 qemu_get_aio_context() : 71 blk_get_aio_context(job->blk); 72 } 73 74 static void block_job_attached_aio_context(AioContext *new_context, 75 void *opaque) 76 { 77 BlockJob *job = opaque; 78 79 if (job->driver->attached_aio_context) { 80 job->driver->attached_aio_context(job, new_context); 81 } 82 83 block_job_resume(job); 84 } 85 86 static void block_job_detach_aio_context(void *opaque) 87 { 88 BlockJob *job = opaque; 89 90 /* In case the job terminates during aio_poll()... */ 91 block_job_ref(job); 92 93 block_job_pause(job); 94 95 if (!job->paused) { 96 /* If job is !job->busy this kicks it into the next pause point. */ 97 block_job_enter(job); 98 } 99 while (!job->paused && !job->completed) { 100 aio_poll(block_job_get_aio_context(job), true); 101 } 102 103 block_job_unref(job); 104 } 105 106 void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, 107 int64_t speed, BlockCompletionFunc *cb, 108 void *opaque, Error **errp) 109 { 110 BlockBackend *blk; 111 BlockJob *job; 112 113 assert(cb); 114 if (bs->job) { 115 error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); 116 return NULL; 117 } 118 119 blk = blk_new(); 120 blk_insert_bs(blk, bs); 121 122 job = g_malloc0(driver->instance_size); 123 error_setg(&job->blocker, "block device is in use by block job: %s", 124 BlockJobType_lookup[driver->job_type]); 125 bdrv_op_block_all(bs, job->blocker); 126 bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); 127 128 job->driver = driver; 129 job->id = g_strdup(bdrv_get_device_name(bs)); 130 job->blk = blk; 131 job->cb = cb; 132 job->opaque = opaque; 133 job->busy = true; 134 job->refcnt = 1; 135 bs->job = job; 136 137 QLIST_INSERT_HEAD(&block_jobs, job, job_list); 138 139 blk_add_aio_context_notifier(blk, block_job_attached_aio_context, 140 block_job_detach_aio_context, job); 141 142 /* Only set speed when necessary to avoid NotSupported error */ 143 if (speed != 0) { 144 Error *local_err = NULL; 145 146 block_job_set_speed(job, speed, &local_err); 147 if (local_err) { 148 block_job_unref(job); 149 error_propagate(errp, local_err); 150 return NULL; 151 } 152 } 153 return job; 154 } 155 156 void block_job_ref(BlockJob *job) 157 { 158 ++job->refcnt; 159 } 160 161 void block_job_unref(BlockJob *job) 162 { 163 if (--job->refcnt == 0) { 164 BlockDriverState *bs = blk_bs(job->blk); 165 bs->job = NULL; 166 bdrv_op_unblock_all(bs, job->blocker); 167 blk_remove_aio_context_notifier(job->blk, 168 block_job_attached_aio_context, 169 block_job_detach_aio_context, job); 170 blk_unref(job->blk); 171 error_free(job->blocker); 172 g_free(job->id); 173 QLIST_REMOVE(job, job_list); 174 g_free(job); 175 } 176 } 177 178 static void block_job_completed_single(BlockJob *job) 179 { 180 if (!job->ret) { 181 if (job->driver->commit) { 182 job->driver->commit(job); 183 } 184 } else { 185 if (job->driver->abort) { 186 job->driver->abort(job); 187 } 188 } 189 job->cb(job->opaque, job->ret); 190 if (job->txn) { 191 block_job_txn_unref(job->txn); 192 } 193 block_job_unref(job); 194 } 195 196 static void block_job_completed_txn_abort(BlockJob *job) 197 { 198 AioContext *ctx; 199 BlockJobTxn *txn = job->txn; 200 BlockJob *other_job, *next; 201 202 if (txn->aborting) { 203 /* 204 * We are cancelled by another job, which will handle everything. 205 */ 206 return; 207 } 208 txn->aborting = true; 209 /* We are the first failed job. Cancel other jobs. */ 210 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 211 ctx = blk_get_aio_context(other_job->blk); 212 aio_context_acquire(ctx); 213 } 214 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 215 if (other_job == job || other_job->completed) { 216 /* Other jobs are "effectively" cancelled by us, set the status for 217 * them; this job, however, may or may not be cancelled, depending 218 * on the caller, so leave it. */ 219 if (other_job != job) { 220 other_job->cancelled = true; 221 } 222 continue; 223 } 224 block_job_cancel_sync(other_job); 225 assert(other_job->completed); 226 } 227 QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { 228 ctx = blk_get_aio_context(other_job->blk); 229 block_job_completed_single(other_job); 230 aio_context_release(ctx); 231 } 232 } 233 234 static void block_job_completed_txn_success(BlockJob *job) 235 { 236 AioContext *ctx; 237 BlockJobTxn *txn = job->txn; 238 BlockJob *other_job, *next; 239 /* 240 * Successful completion, see if there are other running jobs in this 241 * txn. 242 */ 243 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 244 if (!other_job->completed) { 245 return; 246 } 247 } 248 /* We are the last completed job, commit the transaction. */ 249 QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { 250 ctx = blk_get_aio_context(other_job->blk); 251 aio_context_acquire(ctx); 252 assert(other_job->ret == 0); 253 block_job_completed_single(other_job); 254 aio_context_release(ctx); 255 } 256 } 257 258 void block_job_completed(BlockJob *job, int ret) 259 { 260 assert(blk_bs(job->blk)->job == job); 261 assert(!job->completed); 262 job->completed = true; 263 job->ret = ret; 264 if (!job->txn) { 265 block_job_completed_single(job); 266 } else if (ret < 0 || block_job_is_cancelled(job)) { 267 block_job_completed_txn_abort(job); 268 } else { 269 block_job_completed_txn_success(job); 270 } 271 } 272 273 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) 274 { 275 Error *local_err = NULL; 276 277 if (!job->driver->set_speed) { 278 error_setg(errp, QERR_UNSUPPORTED); 279 return; 280 } 281 job->driver->set_speed(job, speed, &local_err); 282 if (local_err) { 283 error_propagate(errp, local_err); 284 return; 285 } 286 287 job->speed = speed; 288 } 289 290 void block_job_complete(BlockJob *job, Error **errp) 291 { 292 if (job->pause_count || job->cancelled || !job->driver->complete) { 293 error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id); 294 return; 295 } 296 297 job->driver->complete(job, errp); 298 } 299 300 void block_job_pause(BlockJob *job) 301 { 302 job->pause_count++; 303 } 304 305 static bool block_job_should_pause(BlockJob *job) 306 { 307 return job->pause_count > 0; 308 } 309 310 void coroutine_fn block_job_pause_point(BlockJob *job) 311 { 312 if (!block_job_should_pause(job)) { 313 return; 314 } 315 if (block_job_is_cancelled(job)) { 316 return; 317 } 318 319 if (job->driver->pause) { 320 job->driver->pause(job); 321 } 322 323 if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { 324 job->paused = true; 325 job->busy = false; 326 qemu_coroutine_yield(); /* wait for block_job_resume() */ 327 job->busy = true; 328 job->paused = false; 329 } 330 331 if (job->driver->resume) { 332 job->driver->resume(job); 333 } 334 } 335 336 void block_job_resume(BlockJob *job) 337 { 338 assert(job->pause_count > 0); 339 job->pause_count--; 340 if (job->pause_count) { 341 return; 342 } 343 block_job_enter(job); 344 } 345 346 void block_job_enter(BlockJob *job) 347 { 348 if (job->co && !job->busy) { 349 qemu_coroutine_enter(job->co, NULL); 350 } 351 } 352 353 void block_job_cancel(BlockJob *job) 354 { 355 job->cancelled = true; 356 block_job_iostatus_reset(job); 357 block_job_enter(job); 358 } 359 360 bool block_job_is_cancelled(BlockJob *job) 361 { 362 return job->cancelled; 363 } 364 365 void block_job_iostatus_reset(BlockJob *job) 366 { 367 job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 368 if (job->driver->iostatus_reset) { 369 job->driver->iostatus_reset(job); 370 } 371 } 372 373 static int block_job_finish_sync(BlockJob *job, 374 void (*finish)(BlockJob *, Error **errp), 375 Error **errp) 376 { 377 Error *local_err = NULL; 378 int ret; 379 380 assert(blk_bs(job->blk)->job == job); 381 382 block_job_ref(job); 383 finish(job, &local_err); 384 if (local_err) { 385 error_propagate(errp, local_err); 386 block_job_unref(job); 387 return -EBUSY; 388 } 389 while (!job->completed) { 390 aio_poll(block_job_get_aio_context(job), true); 391 } 392 ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; 393 block_job_unref(job); 394 return ret; 395 } 396 397 /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be 398 * used with block_job_finish_sync() without the need for (rather nasty) 399 * function pointer casts there. */ 400 static void block_job_cancel_err(BlockJob *job, Error **errp) 401 { 402 block_job_cancel(job); 403 } 404 405 int block_job_cancel_sync(BlockJob *job) 406 { 407 return block_job_finish_sync(job, &block_job_cancel_err, NULL); 408 } 409 410 void block_job_cancel_sync_all(void) 411 { 412 BlockJob *job; 413 AioContext *aio_context; 414 415 while ((job = QLIST_FIRST(&block_jobs))) { 416 aio_context = blk_get_aio_context(job->blk); 417 aio_context_acquire(aio_context); 418 block_job_cancel_sync(job); 419 aio_context_release(aio_context); 420 } 421 } 422 423 int block_job_complete_sync(BlockJob *job, Error **errp) 424 { 425 return block_job_finish_sync(job, &block_job_complete, errp); 426 } 427 428 void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) 429 { 430 assert(job->busy); 431 432 /* Check cancellation *before* setting busy = false, too! */ 433 if (block_job_is_cancelled(job)) { 434 return; 435 } 436 437 job->busy = false; 438 if (!block_job_should_pause(job)) { 439 co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); 440 } 441 job->busy = true; 442 443 block_job_pause_point(job); 444 } 445 446 void block_job_yield(BlockJob *job) 447 { 448 assert(job->busy); 449 450 /* Check cancellation *before* setting busy = false, too! */ 451 if (block_job_is_cancelled(job)) { 452 return; 453 } 454 455 job->busy = false; 456 if (!block_job_should_pause(job)) { 457 qemu_coroutine_yield(); 458 } 459 job->busy = true; 460 461 block_job_pause_point(job); 462 } 463 464 BlockJobInfo *block_job_query(BlockJob *job) 465 { 466 BlockJobInfo *info = g_new0(BlockJobInfo, 1); 467 info->type = g_strdup(BlockJobType_lookup[job->driver->job_type]); 468 info->device = g_strdup(job->id); 469 info->len = job->len; 470 info->busy = job->busy; 471 info->paused = job->pause_count > 0; 472 info->offset = job->offset; 473 info->speed = job->speed; 474 info->io_status = job->iostatus; 475 info->ready = job->ready; 476 return info; 477 } 478 479 static void block_job_iostatus_set_err(BlockJob *job, int error) 480 { 481 if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 482 job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 483 BLOCK_DEVICE_IO_STATUS_FAILED; 484 } 485 } 486 487 void block_job_event_cancelled(BlockJob *job) 488 { 489 qapi_event_send_block_job_cancelled(job->driver->job_type, 490 job->id, 491 job->len, 492 job->offset, 493 job->speed, 494 &error_abort); 495 } 496 497 void block_job_event_completed(BlockJob *job, const char *msg) 498 { 499 qapi_event_send_block_job_completed(job->driver->job_type, 500 job->id, 501 job->len, 502 job->offset, 503 job->speed, 504 !!msg, 505 msg, 506 &error_abort); 507 } 508 509 void block_job_event_ready(BlockJob *job) 510 { 511 job->ready = true; 512 513 qapi_event_send_block_job_ready(job->driver->job_type, 514 job->id, 515 job->len, 516 job->offset, 517 job->speed, &error_abort); 518 } 519 520 BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, 521 int is_read, int error) 522 { 523 BlockErrorAction action; 524 525 switch (on_err) { 526 case BLOCKDEV_ON_ERROR_ENOSPC: 527 action = (error == ENOSPC) ? 528 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 529 break; 530 case BLOCKDEV_ON_ERROR_STOP: 531 action = BLOCK_ERROR_ACTION_STOP; 532 break; 533 case BLOCKDEV_ON_ERROR_REPORT: 534 action = BLOCK_ERROR_ACTION_REPORT; 535 break; 536 case BLOCKDEV_ON_ERROR_IGNORE: 537 action = BLOCK_ERROR_ACTION_IGNORE; 538 break; 539 default: 540 abort(); 541 } 542 qapi_event_send_block_job_error(job->id, 543 is_read ? IO_OPERATION_TYPE_READ : 544 IO_OPERATION_TYPE_WRITE, 545 action, &error_abort); 546 if (action == BLOCK_ERROR_ACTION_STOP) { 547 /* make the pause user visible, which will be resumed from QMP. */ 548 job->user_paused = true; 549 block_job_pause(job); 550 block_job_iostatus_set_err(job, error); 551 } 552 return action; 553 } 554 555 typedef struct { 556 BlockJob *job; 557 QEMUBH *bh; 558 AioContext *aio_context; 559 BlockJobDeferToMainLoopFn *fn; 560 void *opaque; 561 } BlockJobDeferToMainLoopData; 562 563 static void block_job_defer_to_main_loop_bh(void *opaque) 564 { 565 BlockJobDeferToMainLoopData *data = opaque; 566 AioContext *aio_context; 567 568 qemu_bh_delete(data->bh); 569 570 /* Prevent race with block_job_defer_to_main_loop() */ 571 aio_context_acquire(data->aio_context); 572 573 /* Fetch BDS AioContext again, in case it has changed */ 574 aio_context = blk_get_aio_context(data->job->blk); 575 aio_context_acquire(aio_context); 576 577 data->job->deferred_to_main_loop = false; 578 data->fn(data->job, data->opaque); 579 580 aio_context_release(aio_context); 581 582 aio_context_release(data->aio_context); 583 584 g_free(data); 585 } 586 587 void block_job_defer_to_main_loop(BlockJob *job, 588 BlockJobDeferToMainLoopFn *fn, 589 void *opaque) 590 { 591 BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data)); 592 data->job = job; 593 data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data); 594 data->aio_context = blk_get_aio_context(job->blk); 595 data->fn = fn; 596 data->opaque = opaque; 597 job->deferred_to_main_loop = true; 598 599 qemu_bh_schedule(data->bh); 600 } 601 602 BlockJobTxn *block_job_txn_new(void) 603 { 604 BlockJobTxn *txn = g_new0(BlockJobTxn, 1); 605 QLIST_INIT(&txn->jobs); 606 txn->refcnt = 1; 607 return txn; 608 } 609 610 static void block_job_txn_ref(BlockJobTxn *txn) 611 { 612 txn->refcnt++; 613 } 614 615 void block_job_txn_unref(BlockJobTxn *txn) 616 { 617 if (txn && --txn->refcnt == 0) { 618 g_free(txn); 619 } 620 } 621 622 void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) 623 { 624 if (!txn) { 625 return; 626 } 627 628 assert(!job->txn); 629 job->txn = txn; 630 631 QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); 632 block_job_txn_ref(txn); 633 } 634