1 /* 2 * Background jobs (long-running operations) 3 * 4 * Copyright (c) 2011 IBM Corp. 5 * Copyright (c) 2012, 2018 Red Hat, Inc. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qapi/error.h" 28 #include "qemu/job.h" 29 #include "qemu/id.h" 30 #include "qemu/main-loop.h" 31 #include "block/aio-wait.h" 32 #include "trace/trace-root.h" 33 #include "qapi/qapi-events-job.h" 34 35 /* 36 * The job API is composed of two categories of functions. 37 * 38 * The first includes functions used by the monitor. The monitor is 39 * peculiar in that it accesses the job list with job_get, and 40 * therefore needs consistency across job_get and the actual operation 41 * (e.g. job_user_cancel). To achieve this consistency, the caller 42 * calls job_lock/job_unlock itself around the whole operation. 43 * 44 * 45 * The second includes functions used by the job drivers and sometimes 46 * by the core block layer. These delegate the locking to the callee instead. 47 */ 48 49 /* 50 * job_mutex protects the jobs list, but also makes the 51 * struct job fields thread-safe. 52 */ 53 QemuMutex job_mutex; 54 55 /* Protected by job_mutex */ 56 static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs); 57 58 /* Job State Transition Table */ 59 bool JobSTT[JOB_STATUS__MAX][JOB_STATUS__MAX] = { 60 /* U, C, R, P, Y, S, W, D, X, E, N */ 61 /* U: */ [JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 62 /* C: */ [JOB_STATUS_CREATED] = {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1}, 63 /* R: */ [JOB_STATUS_RUNNING] = {0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0}, 64 /* P: */ [JOB_STATUS_PAUSED] = {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 65 /* Y: */ [JOB_STATUS_READY] = {0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0}, 66 /* S: */ [JOB_STATUS_STANDBY] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, 67 /* W: */ [JOB_STATUS_WAITING] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}, 68 /* D: */ [JOB_STATUS_PENDING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0}, 69 /* X: */ [JOB_STATUS_ABORTING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0}, 70 /* E: */ [JOB_STATUS_CONCLUDED] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, 71 /* N: */ [JOB_STATUS_NULL] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 72 }; 73 74 bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = { 75 /* U, C, R, P, Y, S, W, D, X, E, N */ 76 [JOB_VERB_CANCEL] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0}, 77 [JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, 78 [JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, 79 [JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, 80 [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}, 81 [JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, 82 [JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, 83 [JOB_VERB_CHANGE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, 84 }; 85 86 /* Transactional group of jobs */ 87 struct JobTxn { 88 89 /* Is this txn being cancelled? */ 90 bool aborting; 91 92 /* List of jobs */ 93 QLIST_HEAD(, Job) jobs; 94 95 /* Reference count */ 96 int refcnt; 97 }; 98 99 void job_lock(void) 100 { 101 qemu_mutex_lock(&job_mutex); 102 } 103 104 void job_unlock(void) 105 { 106 qemu_mutex_unlock(&job_mutex); 107 } 108 109 static void __attribute__((__constructor__)) job_init(void) 110 { 111 qemu_mutex_init(&job_mutex); 112 } 113 114 JobTxn *job_txn_new(void) 115 { 116 JobTxn *txn = g_new0(JobTxn, 1); 117 QLIST_INIT(&txn->jobs); 118 txn->refcnt = 1; 119 return txn; 120 } 121 122 /* Called with job_mutex held. */ 123 static void job_txn_ref_locked(JobTxn *txn) 124 { 125 txn->refcnt++; 126 } 127 128 void job_txn_unref_locked(JobTxn *txn) 129 { 130 if (txn && --txn->refcnt == 0) { 131 g_free(txn); 132 } 133 } 134 135 void job_txn_unref(JobTxn *txn) 136 { 137 JOB_LOCK_GUARD(); 138 job_txn_unref_locked(txn); 139 } 140 141 /** 142 * @txn: The transaction (may be NULL) 143 * @job: Job to add to the transaction 144 * 145 * Add @job to the transaction. The @job must not already be in a transaction. 146 * The caller must call either job_txn_unref() or job_completed() to release 147 * the reference that is automatically grabbed here. 148 * 149 * If @txn is NULL, the function does nothing. 150 * 151 * Called with job_mutex held. 152 */ 153 static void job_txn_add_job_locked(JobTxn *txn, Job *job) 154 { 155 if (!txn) { 156 return; 157 } 158 159 assert(!job->txn); 160 job->txn = txn; 161 162 QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); 163 job_txn_ref_locked(txn); 164 } 165 166 /* Called with job_mutex held. */ 167 static void job_txn_del_job_locked(Job *job) 168 { 169 if (job->txn) { 170 QLIST_REMOVE(job, txn_list); 171 job_txn_unref_locked(job->txn); 172 job->txn = NULL; 173 } 174 } 175 176 /* Called with job_mutex held, but releases it temporarily. */ 177 static int job_txn_apply_locked(Job *job, int fn(Job *)) 178 { 179 Job *other_job, *next; 180 JobTxn *txn = job->txn; 181 int rc = 0; 182 183 /* 184 * Similar to job_completed_txn_abort, we take each job's lock before 185 * applying fn, but since we assume that outer_ctx is held by the caller, 186 * we need to release it here to avoid holding the lock twice - which would 187 * break AIO_WAIT_WHILE from within fn. 188 */ 189 job_ref_locked(job); 190 191 QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { 192 rc = fn(other_job); 193 if (rc) { 194 break; 195 } 196 } 197 198 job_unref_locked(job); 199 return rc; 200 } 201 202 bool job_is_internal(Job *job) 203 { 204 return (job->id == NULL); 205 } 206 207 /* Called with job_mutex held. */ 208 static void job_state_transition_locked(Job *job, JobStatus s1) 209 { 210 JobStatus s0 = job->status; 211 assert(s1 >= 0 && s1 < JOB_STATUS__MAX); 212 trace_job_state_transition(job, job->ret, 213 JobSTT[s0][s1] ? "allowed" : "disallowed", 214 JobStatus_str(s0), JobStatus_str(s1)); 215 assert(JobSTT[s0][s1]); 216 job->status = s1; 217 218 if (!job_is_internal(job) && s1 != s0) { 219 qapi_event_send_job_status_change(job->id, job->status); 220 } 221 } 222 223 int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp) 224 { 225 JobStatus s0 = job->status; 226 assert(verb >= 0 && verb < JOB_VERB__MAX); 227 trace_job_apply_verb(job, JobStatus_str(s0), JobVerb_str(verb), 228 JobVerbTable[verb][s0] ? "allowed" : "prohibited"); 229 if (JobVerbTable[verb][s0]) { 230 return 0; 231 } 232 error_setg(errp, "Job '%s' in state '%s' cannot accept command verb '%s'", 233 job->id, JobStatus_str(s0), JobVerb_str(verb)); 234 return -EPERM; 235 } 236 237 JobType job_type(const Job *job) 238 { 239 return job->driver->job_type; 240 } 241 242 const char *job_type_str(const Job *job) 243 { 244 return JobType_str(job_type(job)); 245 } 246 247 bool job_is_cancelled_locked(Job *job) 248 { 249 /* force_cancel may be true only if cancelled is true, too */ 250 assert(job->cancelled || !job->force_cancel); 251 return job->force_cancel; 252 } 253 254 bool job_is_cancelled(Job *job) 255 { 256 JOB_LOCK_GUARD(); 257 return job_is_cancelled_locked(job); 258 } 259 260 /* Called with job_mutex held. */ 261 static bool job_cancel_requested_locked(Job *job) 262 { 263 return job->cancelled; 264 } 265 266 bool job_cancel_requested(Job *job) 267 { 268 JOB_LOCK_GUARD(); 269 return job_cancel_requested_locked(job); 270 } 271 272 bool job_is_ready_locked(Job *job) 273 { 274 switch (job->status) { 275 case JOB_STATUS_UNDEFINED: 276 case JOB_STATUS_CREATED: 277 case JOB_STATUS_RUNNING: 278 case JOB_STATUS_PAUSED: 279 case JOB_STATUS_WAITING: 280 case JOB_STATUS_PENDING: 281 case JOB_STATUS_ABORTING: 282 case JOB_STATUS_CONCLUDED: 283 case JOB_STATUS_NULL: 284 return false; 285 case JOB_STATUS_READY: 286 case JOB_STATUS_STANDBY: 287 return true; 288 default: 289 g_assert_not_reached(); 290 } 291 return false; 292 } 293 294 bool job_is_ready(Job *job) 295 { 296 JOB_LOCK_GUARD(); 297 return job_is_ready_locked(job); 298 } 299 300 bool job_is_completed_locked(Job *job) 301 { 302 switch (job->status) { 303 case JOB_STATUS_UNDEFINED: 304 case JOB_STATUS_CREATED: 305 case JOB_STATUS_RUNNING: 306 case JOB_STATUS_PAUSED: 307 case JOB_STATUS_READY: 308 case JOB_STATUS_STANDBY: 309 return false; 310 case JOB_STATUS_WAITING: 311 case JOB_STATUS_PENDING: 312 case JOB_STATUS_ABORTING: 313 case JOB_STATUS_CONCLUDED: 314 case JOB_STATUS_NULL: 315 return true; 316 default: 317 g_assert_not_reached(); 318 } 319 return false; 320 } 321 322 static bool job_is_completed(Job *job) 323 { 324 JOB_LOCK_GUARD(); 325 return job_is_completed_locked(job); 326 } 327 328 static bool job_started_locked(Job *job) 329 { 330 return job->co; 331 } 332 333 /* Called with job_mutex held. */ 334 static bool job_should_pause_locked(Job *job) 335 { 336 return job->pause_count > 0; 337 } 338 339 Job *job_next_locked(Job *job) 340 { 341 if (!job) { 342 return QLIST_FIRST(&jobs); 343 } 344 return QLIST_NEXT(job, job_list); 345 } 346 347 Job *job_next(Job *job) 348 { 349 JOB_LOCK_GUARD(); 350 return job_next_locked(job); 351 } 352 353 Job *job_get_locked(const char *id) 354 { 355 Job *job; 356 357 QLIST_FOREACH(job, &jobs, job_list) { 358 if (job->id && !strcmp(id, job->id)) { 359 return job; 360 } 361 } 362 363 return NULL; 364 } 365 366 void job_set_aio_context(Job *job, AioContext *ctx) 367 { 368 /* protect against read in job_finish_sync_locked and job_start */ 369 GLOBAL_STATE_CODE(); 370 /* protect against read in job_do_yield_locked */ 371 JOB_LOCK_GUARD(); 372 /* ensure the job is quiescent while the AioContext is changed */ 373 assert(job->paused || job_is_completed_locked(job)); 374 job->aio_context = ctx; 375 } 376 377 /* Called with job_mutex *not* held. */ 378 static void job_sleep_timer_cb(void *opaque) 379 { 380 Job *job = opaque; 381 382 job_enter(job); 383 } 384 385 void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, 386 AioContext *ctx, int flags, BlockCompletionFunc *cb, 387 void *opaque, Error **errp) 388 { 389 Job *job; 390 391 JOB_LOCK_GUARD(); 392 393 if (job_id) { 394 if (flags & JOB_INTERNAL) { 395 error_setg(errp, "Cannot specify job ID for internal job"); 396 return NULL; 397 } 398 if (!id_wellformed(job_id)) { 399 error_setg(errp, "Invalid job ID '%s'", job_id); 400 return NULL; 401 } 402 if (job_get_locked(job_id)) { 403 error_setg(errp, "Job ID '%s' already in use", job_id); 404 return NULL; 405 } 406 } else if (!(flags & JOB_INTERNAL)) { 407 error_setg(errp, "An explicit job ID is required"); 408 return NULL; 409 } 410 411 job = g_malloc0(driver->instance_size); 412 job->driver = driver; 413 job->id = g_strdup(job_id); 414 job->refcnt = 1; 415 job->aio_context = ctx; 416 job->busy = false; 417 job->paused = true; 418 job->pause_count = 1; 419 job->auto_finalize = !(flags & JOB_MANUAL_FINALIZE); 420 job->auto_dismiss = !(flags & JOB_MANUAL_DISMISS); 421 job->cb = cb; 422 job->opaque = opaque; 423 424 progress_init(&job->progress); 425 426 notifier_list_init(&job->on_finalize_cancelled); 427 notifier_list_init(&job->on_finalize_completed); 428 notifier_list_init(&job->on_pending); 429 notifier_list_init(&job->on_ready); 430 notifier_list_init(&job->on_idle); 431 432 job_state_transition_locked(job, JOB_STATUS_CREATED); 433 aio_timer_init(qemu_get_aio_context(), &job->sleep_timer, 434 QEMU_CLOCK_REALTIME, SCALE_NS, 435 job_sleep_timer_cb, job); 436 437 QLIST_INSERT_HEAD(&jobs, job, job_list); 438 439 /* Single jobs are modeled as single-job transactions for sake of 440 * consolidating the job management logic */ 441 if (!txn) { 442 txn = job_txn_new(); 443 job_txn_add_job_locked(txn, job); 444 job_txn_unref_locked(txn); 445 } else { 446 job_txn_add_job_locked(txn, job); 447 } 448 449 return job; 450 } 451 452 void job_ref_locked(Job *job) 453 { 454 ++job->refcnt; 455 } 456 457 void job_unref_locked(Job *job) 458 { 459 GLOBAL_STATE_CODE(); 460 461 if (--job->refcnt == 0) { 462 assert(job->status == JOB_STATUS_NULL); 463 assert(!timer_pending(&job->sleep_timer)); 464 assert(!job->txn); 465 466 if (job->driver->free) { 467 AioContext *aio_context = job->aio_context; 468 job_unlock(); 469 /* FIXME: aiocontext lock is required because cb calls blk_unref */ 470 aio_context_acquire(aio_context); 471 job->driver->free(job); 472 aio_context_release(aio_context); 473 job_lock(); 474 } 475 476 QLIST_REMOVE(job, job_list); 477 478 progress_destroy(&job->progress); 479 error_free(job->err); 480 g_free(job->id); 481 g_free(job); 482 } 483 } 484 485 void job_progress_update(Job *job, uint64_t done) 486 { 487 progress_work_done(&job->progress, done); 488 } 489 490 void job_progress_set_remaining(Job *job, uint64_t remaining) 491 { 492 progress_set_remaining(&job->progress, remaining); 493 } 494 495 void job_progress_increase_remaining(Job *job, uint64_t delta) 496 { 497 progress_increase_remaining(&job->progress, delta); 498 } 499 500 /** 501 * To be called when a cancelled job is finalised. 502 * Called with job_mutex held. 503 */ 504 static void job_event_cancelled_locked(Job *job) 505 { 506 notifier_list_notify(&job->on_finalize_cancelled, job); 507 } 508 509 /** 510 * To be called when a successfully completed job is finalised. 511 * Called with job_mutex held. 512 */ 513 static void job_event_completed_locked(Job *job) 514 { 515 notifier_list_notify(&job->on_finalize_completed, job); 516 } 517 518 /* Called with job_mutex held. */ 519 static void job_event_pending_locked(Job *job) 520 { 521 notifier_list_notify(&job->on_pending, job); 522 } 523 524 /* Called with job_mutex held. */ 525 static void job_event_ready_locked(Job *job) 526 { 527 notifier_list_notify(&job->on_ready, job); 528 } 529 530 /* Called with job_mutex held. */ 531 static void job_event_idle_locked(Job *job) 532 { 533 notifier_list_notify(&job->on_idle, job); 534 } 535 536 void job_enter_cond_locked(Job *job, bool(*fn)(Job *job)) 537 { 538 if (!job_started_locked(job)) { 539 return; 540 } 541 if (job->deferred_to_main_loop) { 542 return; 543 } 544 545 if (job->busy) { 546 return; 547 } 548 549 if (fn && !fn(job)) { 550 return; 551 } 552 553 assert(!job->deferred_to_main_loop); 554 timer_del(&job->sleep_timer); 555 job->busy = true; 556 job_unlock(); 557 aio_co_wake(job->co); 558 job_lock(); 559 } 560 561 void job_enter(Job *job) 562 { 563 JOB_LOCK_GUARD(); 564 job_enter_cond_locked(job, NULL); 565 } 566 567 /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds. 568 * Reentering the job coroutine with job_enter() before the timer has expired 569 * is allowed and cancels the timer. 570 * 571 * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be 572 * called explicitly. 573 * 574 * Called with job_mutex held, but releases it temporarily. 575 */ 576 static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns) 577 { 578 AioContext *next_aio_context; 579 580 if (ns != -1) { 581 timer_mod(&job->sleep_timer, ns); 582 } 583 job->busy = false; 584 job_event_idle_locked(job); 585 job_unlock(); 586 qemu_coroutine_yield(); 587 job_lock(); 588 589 next_aio_context = job->aio_context; 590 /* 591 * Coroutine has resumed, but in the meanwhile the job AioContext 592 * might have changed via bdrv_try_change_aio_context(), so we need to move 593 * the coroutine too in the new aiocontext. 594 */ 595 while (qemu_get_current_aio_context() != next_aio_context) { 596 job_unlock(); 597 aio_co_reschedule_self(next_aio_context); 598 job_lock(); 599 next_aio_context = job->aio_context; 600 } 601 602 /* Set by job_enter_cond_locked() before re-entering the coroutine. */ 603 assert(job->busy); 604 } 605 606 /* Called with job_mutex held, but releases it temporarily. */ 607 static void coroutine_fn job_pause_point_locked(Job *job) 608 { 609 assert(job && job_started_locked(job)); 610 611 if (!job_should_pause_locked(job)) { 612 return; 613 } 614 if (job_is_cancelled_locked(job)) { 615 return; 616 } 617 618 if (job->driver->pause) { 619 job_unlock(); 620 job->driver->pause(job); 621 job_lock(); 622 } 623 624 if (job_should_pause_locked(job) && !job_is_cancelled_locked(job)) { 625 JobStatus status = job->status; 626 job_state_transition_locked(job, status == JOB_STATUS_READY 627 ? JOB_STATUS_STANDBY 628 : JOB_STATUS_PAUSED); 629 job->paused = true; 630 job_do_yield_locked(job, -1); 631 job->paused = false; 632 job_state_transition_locked(job, status); 633 } 634 635 if (job->driver->resume) { 636 job_unlock(); 637 job->driver->resume(job); 638 job_lock(); 639 } 640 } 641 642 void coroutine_fn job_pause_point(Job *job) 643 { 644 JOB_LOCK_GUARD(); 645 job_pause_point_locked(job); 646 } 647 648 void coroutine_fn job_yield(Job *job) 649 { 650 JOB_LOCK_GUARD(); 651 assert(job->busy); 652 653 /* Check cancellation *before* setting busy = false, too! */ 654 if (job_is_cancelled_locked(job)) { 655 return; 656 } 657 658 if (!job_should_pause_locked(job)) { 659 job_do_yield_locked(job, -1); 660 } 661 662 job_pause_point_locked(job); 663 } 664 665 void coroutine_fn job_sleep_ns(Job *job, int64_t ns) 666 { 667 JOB_LOCK_GUARD(); 668 assert(job->busy); 669 670 /* Check cancellation *before* setting busy = false, too! */ 671 if (job_is_cancelled_locked(job)) { 672 return; 673 } 674 675 if (!job_should_pause_locked(job)) { 676 job_do_yield_locked(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns); 677 } 678 679 job_pause_point_locked(job); 680 } 681 682 /* Assumes the job_mutex is held */ 683 static bool job_timer_not_pending_locked(Job *job) 684 { 685 return !timer_pending(&job->sleep_timer); 686 } 687 688 void job_pause_locked(Job *job) 689 { 690 job->pause_count++; 691 if (!job->paused) { 692 job_enter_cond_locked(job, NULL); 693 } 694 } 695 696 void job_pause(Job *job) 697 { 698 JOB_LOCK_GUARD(); 699 job_pause_locked(job); 700 } 701 702 void job_resume_locked(Job *job) 703 { 704 assert(job->pause_count > 0); 705 job->pause_count--; 706 if (job->pause_count) { 707 return; 708 } 709 710 /* kick only if no timer is pending */ 711 job_enter_cond_locked(job, job_timer_not_pending_locked); 712 } 713 714 void job_resume(Job *job) 715 { 716 JOB_LOCK_GUARD(); 717 job_resume_locked(job); 718 } 719 720 void job_user_pause_locked(Job *job, Error **errp) 721 { 722 if (job_apply_verb_locked(job, JOB_VERB_PAUSE, errp)) { 723 return; 724 } 725 if (job->user_paused) { 726 error_setg(errp, "Job is already paused"); 727 return; 728 } 729 job->user_paused = true; 730 job_pause_locked(job); 731 } 732 733 bool job_user_paused_locked(Job *job) 734 { 735 return job->user_paused; 736 } 737 738 void job_user_resume_locked(Job *job, Error **errp) 739 { 740 assert(job); 741 GLOBAL_STATE_CODE(); 742 if (!job->user_paused || job->pause_count <= 0) { 743 error_setg(errp, "Can't resume a job that was not paused"); 744 return; 745 } 746 if (job_apply_verb_locked(job, JOB_VERB_RESUME, errp)) { 747 return; 748 } 749 if (job->driver->user_resume) { 750 job_unlock(); 751 job->driver->user_resume(job); 752 job_lock(); 753 } 754 job->user_paused = false; 755 job_resume_locked(job); 756 } 757 758 /* Called with job_mutex held, but releases it temporarily. */ 759 static void job_do_dismiss_locked(Job *job) 760 { 761 assert(job); 762 job->busy = false; 763 job->paused = false; 764 job->deferred_to_main_loop = true; 765 766 job_txn_del_job_locked(job); 767 768 job_state_transition_locked(job, JOB_STATUS_NULL); 769 job_unref_locked(job); 770 } 771 772 void job_dismiss_locked(Job **jobptr, Error **errp) 773 { 774 Job *job = *jobptr; 775 /* similarly to _complete, this is QMP-interface only. */ 776 assert(job->id); 777 if (job_apply_verb_locked(job, JOB_VERB_DISMISS, errp)) { 778 return; 779 } 780 781 job_do_dismiss_locked(job); 782 *jobptr = NULL; 783 } 784 785 void job_early_fail(Job *job) 786 { 787 JOB_LOCK_GUARD(); 788 assert(job->status == JOB_STATUS_CREATED); 789 job_do_dismiss_locked(job); 790 } 791 792 /* Called with job_mutex held. */ 793 static void job_conclude_locked(Job *job) 794 { 795 job_state_transition_locked(job, JOB_STATUS_CONCLUDED); 796 if (job->auto_dismiss || !job_started_locked(job)) { 797 job_do_dismiss_locked(job); 798 } 799 } 800 801 /* Called with job_mutex held. */ 802 static void job_update_rc_locked(Job *job) 803 { 804 if (!job->ret && job_is_cancelled_locked(job)) { 805 job->ret = -ECANCELED; 806 } 807 if (job->ret) { 808 if (!job->err) { 809 error_setg(&job->err, "%s", strerror(-job->ret)); 810 } 811 job_state_transition_locked(job, JOB_STATUS_ABORTING); 812 } 813 } 814 815 static void job_commit(Job *job) 816 { 817 assert(!job->ret); 818 GLOBAL_STATE_CODE(); 819 if (job->driver->commit) { 820 job->driver->commit(job); 821 } 822 } 823 824 static void job_abort(Job *job) 825 { 826 assert(job->ret); 827 GLOBAL_STATE_CODE(); 828 if (job->driver->abort) { 829 job->driver->abort(job); 830 } 831 } 832 833 static void job_clean(Job *job) 834 { 835 GLOBAL_STATE_CODE(); 836 if (job->driver->clean) { 837 job->driver->clean(job); 838 } 839 } 840 841 /* 842 * Called with job_mutex held, but releases it temporarily. 843 * Takes AioContext lock internally to invoke a job->driver callback. 844 */ 845 static int job_finalize_single_locked(Job *job) 846 { 847 int job_ret; 848 AioContext *ctx = job->aio_context; 849 850 assert(job_is_completed_locked(job)); 851 852 /* Ensure abort is called for late-transactional failures */ 853 job_update_rc_locked(job); 854 855 job_ret = job->ret; 856 job_unlock(); 857 aio_context_acquire(ctx); 858 859 if (!job_ret) { 860 job_commit(job); 861 } else { 862 job_abort(job); 863 } 864 job_clean(job); 865 866 if (job->cb) { 867 job->cb(job->opaque, job_ret); 868 } 869 870 aio_context_release(ctx); 871 job_lock(); 872 873 /* Emit events only if we actually started */ 874 if (job_started_locked(job)) { 875 if (job_is_cancelled_locked(job)) { 876 job_event_cancelled_locked(job); 877 } else { 878 job_event_completed_locked(job); 879 } 880 } 881 882 job_txn_del_job_locked(job); 883 job_conclude_locked(job); 884 return 0; 885 } 886 887 /* 888 * Called with job_mutex held, but releases it temporarily. 889 * Takes AioContext lock internally to invoke a job->driver callback. 890 */ 891 static void job_cancel_async_locked(Job *job, bool force) 892 { 893 AioContext *ctx = job->aio_context; 894 GLOBAL_STATE_CODE(); 895 if (job->driver->cancel) { 896 job_unlock(); 897 aio_context_acquire(ctx); 898 force = job->driver->cancel(job, force); 899 aio_context_release(ctx); 900 job_lock(); 901 } else { 902 /* No .cancel() means the job will behave as if force-cancelled */ 903 force = true; 904 } 905 906 if (job->user_paused) { 907 /* Do not call job_enter here, the caller will handle it. */ 908 if (job->driver->user_resume) { 909 job_unlock(); 910 job->driver->user_resume(job); 911 job_lock(); 912 } 913 job->user_paused = false; 914 assert(job->pause_count > 0); 915 job->pause_count--; 916 } 917 918 /* 919 * Ignore soft cancel requests after the job is already done 920 * (We will still invoke job->driver->cancel() above, but if the 921 * job driver supports soft cancelling and the job is done, that 922 * should be a no-op, too. We still call it so it can override 923 * @force.) 924 */ 925 if (force || !job->deferred_to_main_loop) { 926 job->cancelled = true; 927 /* To prevent 'force == false' overriding a previous 'force == true' */ 928 job->force_cancel |= force; 929 } 930 } 931 932 /* 933 * Called with job_mutex held, but releases it temporarily. 934 * Takes AioContext lock internally to invoke a job->driver callback. 935 */ 936 static void job_completed_txn_abort_locked(Job *job) 937 { 938 JobTxn *txn = job->txn; 939 Job *other_job; 940 941 if (txn->aborting) { 942 /* 943 * We are cancelled by another job, which will handle everything. 944 */ 945 return; 946 } 947 txn->aborting = true; 948 job_txn_ref_locked(txn); 949 950 job_ref_locked(job); 951 952 /* Other jobs are effectively cancelled by us, set the status for 953 * them; this job, however, may or may not be cancelled, depending 954 * on the caller, so leave it. */ 955 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 956 if (other_job != job) { 957 /* 958 * This is a transaction: If one job failed, no result will matter. 959 * Therefore, pass force=true to terminate all other jobs as quickly 960 * as possible. 961 */ 962 job_cancel_async_locked(other_job, true); 963 } 964 } 965 while (!QLIST_EMPTY(&txn->jobs)) { 966 other_job = QLIST_FIRST(&txn->jobs); 967 if (!job_is_completed_locked(other_job)) { 968 assert(job_cancel_requested_locked(other_job)); 969 job_finish_sync_locked(other_job, NULL, NULL); 970 } 971 job_finalize_single_locked(other_job); 972 } 973 974 job_unref_locked(job); 975 job_txn_unref_locked(txn); 976 } 977 978 /* Called with job_mutex held, but releases it temporarily */ 979 static int job_prepare_locked(Job *job) 980 { 981 int ret; 982 AioContext *ctx = job->aio_context; 983 984 GLOBAL_STATE_CODE(); 985 986 if (job->ret == 0 && job->driver->prepare) { 987 job_unlock(); 988 aio_context_acquire(ctx); 989 ret = job->driver->prepare(job); 990 aio_context_release(ctx); 991 job_lock(); 992 job->ret = ret; 993 job_update_rc_locked(job); 994 } 995 996 return job->ret; 997 } 998 999 /* Called with job_mutex held */ 1000 static int job_needs_finalize_locked(Job *job) 1001 { 1002 return !job->auto_finalize; 1003 } 1004 1005 /* Called with job_mutex held */ 1006 static void job_do_finalize_locked(Job *job) 1007 { 1008 int rc; 1009 assert(job && job->txn); 1010 1011 /* prepare the transaction to complete */ 1012 rc = job_txn_apply_locked(job, job_prepare_locked); 1013 if (rc) { 1014 job_completed_txn_abort_locked(job); 1015 } else { 1016 job_txn_apply_locked(job, job_finalize_single_locked); 1017 } 1018 } 1019 1020 void job_finalize_locked(Job *job, Error **errp) 1021 { 1022 assert(job && job->id); 1023 if (job_apply_verb_locked(job, JOB_VERB_FINALIZE, errp)) { 1024 return; 1025 } 1026 job_do_finalize_locked(job); 1027 } 1028 1029 /* Called with job_mutex held. */ 1030 static int job_transition_to_pending_locked(Job *job) 1031 { 1032 job_state_transition_locked(job, JOB_STATUS_PENDING); 1033 if (!job->auto_finalize) { 1034 job_event_pending_locked(job); 1035 } 1036 return 0; 1037 } 1038 1039 void job_transition_to_ready(Job *job) 1040 { 1041 JOB_LOCK_GUARD(); 1042 job_state_transition_locked(job, JOB_STATUS_READY); 1043 job_event_ready_locked(job); 1044 } 1045 1046 /* Called with job_mutex held. */ 1047 static void job_completed_txn_success_locked(Job *job) 1048 { 1049 JobTxn *txn = job->txn; 1050 Job *other_job; 1051 1052 job_state_transition_locked(job, JOB_STATUS_WAITING); 1053 1054 /* 1055 * Successful completion, see if there are other running jobs in this 1056 * txn. 1057 */ 1058 QLIST_FOREACH(other_job, &txn->jobs, txn_list) { 1059 if (!job_is_completed_locked(other_job)) { 1060 return; 1061 } 1062 assert(other_job->ret == 0); 1063 } 1064 1065 job_txn_apply_locked(job, job_transition_to_pending_locked); 1066 1067 /* If no jobs need manual finalization, automatically do so */ 1068 if (job_txn_apply_locked(job, job_needs_finalize_locked) == 0) { 1069 job_do_finalize_locked(job); 1070 } 1071 } 1072 1073 /* Called with job_mutex held. */ 1074 static void job_completed_locked(Job *job) 1075 { 1076 assert(job && job->txn && !job_is_completed_locked(job)); 1077 1078 job_update_rc_locked(job); 1079 trace_job_completed(job, job->ret); 1080 if (job->ret) { 1081 job_completed_txn_abort_locked(job); 1082 } else { 1083 job_completed_txn_success_locked(job); 1084 } 1085 } 1086 1087 /** 1088 * Useful only as a type shim for aio_bh_schedule_oneshot. 1089 * Called with job_mutex *not* held. 1090 */ 1091 static void job_exit(void *opaque) 1092 { 1093 Job *job = (Job *)opaque; 1094 JOB_LOCK_GUARD(); 1095 job_ref_locked(job); 1096 1097 /* This is a lie, we're not quiescent, but still doing the completion 1098 * callbacks. However, completion callbacks tend to involve operations that 1099 * drain block nodes, and if .drained_poll still returned true, we would 1100 * deadlock. */ 1101 job->busy = false; 1102 job_event_idle_locked(job); 1103 1104 job_completed_locked(job); 1105 job_unref_locked(job); 1106 } 1107 1108 /** 1109 * All jobs must allow a pause point before entering their job proper. This 1110 * ensures that jobs can be paused prior to being started, then resumed later. 1111 */ 1112 static void coroutine_fn job_co_entry(void *opaque) 1113 { 1114 Job *job = opaque; 1115 int ret; 1116 1117 assert(job && job->driver && job->driver->run); 1118 WITH_JOB_LOCK_GUARD() { 1119 assert(job->aio_context == qemu_get_current_aio_context()); 1120 job_pause_point_locked(job); 1121 } 1122 ret = job->driver->run(job, &job->err); 1123 WITH_JOB_LOCK_GUARD() { 1124 job->ret = ret; 1125 job->deferred_to_main_loop = true; 1126 job->busy = true; 1127 } 1128 aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); 1129 } 1130 1131 void job_start(Job *job) 1132 { 1133 assert(qemu_in_main_thread()); 1134 1135 WITH_JOB_LOCK_GUARD() { 1136 assert(job && !job_started_locked(job) && job->paused && 1137 job->driver && job->driver->run); 1138 job->co = qemu_coroutine_create(job_co_entry, job); 1139 job->pause_count--; 1140 job->busy = true; 1141 job->paused = false; 1142 job_state_transition_locked(job, JOB_STATUS_RUNNING); 1143 } 1144 aio_co_enter(job->aio_context, job->co); 1145 } 1146 1147 void job_cancel_locked(Job *job, bool force) 1148 { 1149 if (job->status == JOB_STATUS_CONCLUDED) { 1150 job_do_dismiss_locked(job); 1151 return; 1152 } 1153 job_cancel_async_locked(job, force); 1154 if (!job_started_locked(job)) { 1155 job_completed_locked(job); 1156 } else if (job->deferred_to_main_loop) { 1157 /* 1158 * job_cancel_async() ignores soft-cancel requests for jobs 1159 * that are already done (i.e. deferred to the main loop). We 1160 * have to check again whether the job is really cancelled. 1161 * (job_cancel_requested() and job_is_cancelled() are equivalent 1162 * here, because job_cancel_async() will make soft-cancel 1163 * requests no-ops when deferred_to_main_loop is true. We 1164 * choose to call job_is_cancelled() to show that we invoke 1165 * job_completed_txn_abort() only for force-cancelled jobs.) 1166 */ 1167 if (job_is_cancelled_locked(job)) { 1168 job_completed_txn_abort_locked(job); 1169 } 1170 } else { 1171 job_enter_cond_locked(job, NULL); 1172 } 1173 } 1174 1175 void job_user_cancel_locked(Job *job, bool force, Error **errp) 1176 { 1177 if (job_apply_verb_locked(job, JOB_VERB_CANCEL, errp)) { 1178 return; 1179 } 1180 job_cancel_locked(job, force); 1181 } 1182 1183 /* A wrapper around job_cancel_locked() taking an Error ** parameter so it may 1184 * be used with job_finish_sync_locked() without the need for (rather nasty) 1185 * function pointer casts there. 1186 * 1187 * Called with job_mutex held. 1188 */ 1189 static void job_cancel_err_locked(Job *job, Error **errp) 1190 { 1191 job_cancel_locked(job, false); 1192 } 1193 1194 /** 1195 * Same as job_cancel_err(), but force-cancel. 1196 * Called with job_mutex held. 1197 */ 1198 static void job_force_cancel_err_locked(Job *job, Error **errp) 1199 { 1200 job_cancel_locked(job, true); 1201 } 1202 1203 int job_cancel_sync_locked(Job *job, bool force) 1204 { 1205 if (force) { 1206 return job_finish_sync_locked(job, &job_force_cancel_err_locked, NULL); 1207 } else { 1208 return job_finish_sync_locked(job, &job_cancel_err_locked, NULL); 1209 } 1210 } 1211 1212 int job_cancel_sync(Job *job, bool force) 1213 { 1214 JOB_LOCK_GUARD(); 1215 return job_cancel_sync_locked(job, force); 1216 } 1217 1218 void job_cancel_sync_all(void) 1219 { 1220 Job *job; 1221 JOB_LOCK_GUARD(); 1222 1223 while ((job = job_next_locked(NULL))) { 1224 job_cancel_sync_locked(job, true); 1225 } 1226 } 1227 1228 int job_complete_sync_locked(Job *job, Error **errp) 1229 { 1230 return job_finish_sync_locked(job, job_complete_locked, errp); 1231 } 1232 1233 void job_complete_locked(Job *job, Error **errp) 1234 { 1235 /* Should not be reachable via external interface for internal jobs */ 1236 assert(job->id); 1237 GLOBAL_STATE_CODE(); 1238 if (job_apply_verb_locked(job, JOB_VERB_COMPLETE, errp)) { 1239 return; 1240 } 1241 if (job_cancel_requested_locked(job) || !job->driver->complete) { 1242 error_setg(errp, "The active block job '%s' cannot be completed", 1243 job->id); 1244 return; 1245 } 1246 1247 job_unlock(); 1248 job->driver->complete(job, errp); 1249 job_lock(); 1250 } 1251 1252 int job_finish_sync_locked(Job *job, 1253 void (*finish)(Job *, Error **errp), 1254 Error **errp) 1255 { 1256 Error *local_err = NULL; 1257 int ret; 1258 GLOBAL_STATE_CODE(); 1259 1260 job_ref_locked(job); 1261 1262 if (finish) { 1263 finish(job, &local_err); 1264 } 1265 if (local_err) { 1266 error_propagate(errp, local_err); 1267 job_unref_locked(job); 1268 return -EBUSY; 1269 } 1270 1271 job_unlock(); 1272 AIO_WAIT_WHILE_UNLOCKED(job->aio_context, 1273 (job_enter(job), !job_is_completed(job))); 1274 job_lock(); 1275 1276 ret = (job_is_cancelled_locked(job) && job->ret == 0) 1277 ? -ECANCELED : job->ret; 1278 job_unref_locked(job); 1279 return ret; 1280 } 1281