1 /* 2 * Block node draining tests 3 * 4 * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "block/block.h" 27 #include "block/blockjob_int.h" 28 #include "sysemu/block-backend.h" 29 #include "qapi/error.h" 30 #include "qemu/main-loop.h" 31 #include "iothread.h" 32 33 static QemuEvent done_event; 34 35 typedef struct BDRVTestState { 36 int drain_count; 37 AioContext *bh_indirection_ctx; 38 bool sleep_in_drain_begin; 39 } BDRVTestState; 40 41 static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) 42 { 43 BDRVTestState *s = bs->opaque; 44 s->drain_count++; 45 if (s->sleep_in_drain_begin) { 46 qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); 47 } 48 } 49 50 static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) 51 { 52 BDRVTestState *s = bs->opaque; 53 s->drain_count--; 54 } 55 56 static void bdrv_test_close(BlockDriverState *bs) 57 { 58 BDRVTestState *s = bs->opaque; 59 g_assert_cmpint(s->drain_count, >, 0); 60 } 61 62 static void co_reenter_bh(void *opaque) 63 { 64 aio_co_wake(opaque); 65 } 66 67 static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, 68 uint64_t offset, uint64_t bytes, 69 QEMUIOVector *qiov, int flags) 70 { 71 BDRVTestState *s = bs->opaque; 72 73 /* We want this request to stay until the polling loop in drain waits for 74 * it to complete. We need to sleep a while as bdrv_drain_invoke() comes 75 * first and polls its result, too, but it shouldn't accidentally complete 76 * this request yet. */ 77 qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); 78 79 if (s->bh_indirection_ctx) { 80 aio_bh_schedule_oneshot(s->bh_indirection_ctx, co_reenter_bh, 81 qemu_coroutine_self()); 82 qemu_coroutine_yield(); 83 } 84 85 return 0; 86 } 87 88 static int bdrv_test_change_backing_file(BlockDriverState *bs, 89 const char *backing_file, 90 const char *backing_fmt) 91 { 92 return 0; 93 } 94 95 static BlockDriver bdrv_test = { 96 .format_name = "test", 97 .instance_size = sizeof(BDRVTestState), 98 .supports_backing = true, 99 100 .bdrv_close = bdrv_test_close, 101 .bdrv_co_preadv = bdrv_test_co_preadv, 102 103 .bdrv_co_drain_begin = bdrv_test_co_drain_begin, 104 .bdrv_co_drain_end = bdrv_test_co_drain_end, 105 106 .bdrv_child_perm = bdrv_default_perms, 107 108 .bdrv_change_backing_file = bdrv_test_change_backing_file, 109 }; 110 111 static void aio_ret_cb(void *opaque, int ret) 112 { 113 int *aio_ret = opaque; 114 *aio_ret = ret; 115 } 116 117 typedef struct CallInCoroutineData { 118 void (*entry)(void); 119 bool done; 120 } CallInCoroutineData; 121 122 static coroutine_fn void call_in_coroutine_entry(void *opaque) 123 { 124 CallInCoroutineData *data = opaque; 125 126 data->entry(); 127 data->done = true; 128 } 129 130 static void call_in_coroutine(void (*entry)(void)) 131 { 132 Coroutine *co; 133 CallInCoroutineData data = { 134 .entry = entry, 135 .done = false, 136 }; 137 138 co = qemu_coroutine_create(call_in_coroutine_entry, &data); 139 qemu_coroutine_enter(co); 140 while (!data.done) { 141 aio_poll(qemu_get_aio_context(), true); 142 } 143 } 144 145 enum drain_type { 146 BDRV_DRAIN_ALL, 147 BDRV_DRAIN, 148 BDRV_SUBTREE_DRAIN, 149 DRAIN_TYPE_MAX, 150 }; 151 152 static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) 153 { 154 switch (drain_type) { 155 case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; 156 case BDRV_DRAIN: bdrv_drained_begin(bs); break; 157 case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; 158 default: g_assert_not_reached(); 159 } 160 } 161 162 static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) 163 { 164 switch (drain_type) { 165 case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; 166 case BDRV_DRAIN: bdrv_drained_end(bs); break; 167 case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; 168 default: g_assert_not_reached(); 169 } 170 } 171 172 static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs) 173 { 174 if (drain_type != BDRV_DRAIN_ALL) { 175 aio_context_acquire(bdrv_get_aio_context(bs)); 176 } 177 do_drain_begin(drain_type, bs); 178 if (drain_type != BDRV_DRAIN_ALL) { 179 aio_context_release(bdrv_get_aio_context(bs)); 180 } 181 } 182 183 static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) 184 { 185 if (drain_type != BDRV_DRAIN_ALL) { 186 aio_context_acquire(bdrv_get_aio_context(bs)); 187 } 188 do_drain_end(drain_type, bs); 189 if (drain_type != BDRV_DRAIN_ALL) { 190 aio_context_release(bdrv_get_aio_context(bs)); 191 } 192 } 193 194 static void test_drv_cb_common(enum drain_type drain_type, bool recursive) 195 { 196 BlockBackend *blk; 197 BlockDriverState *bs, *backing; 198 BDRVTestState *s, *backing_s; 199 BlockAIOCB *acb; 200 int aio_ret; 201 202 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); 203 204 blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 205 bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, 206 &error_abort); 207 s = bs->opaque; 208 blk_insert_bs(blk, bs, &error_abort); 209 210 backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); 211 backing_s = backing->opaque; 212 bdrv_set_backing_hd(bs, backing, &error_abort); 213 214 /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ 215 g_assert_cmpint(s->drain_count, ==, 0); 216 g_assert_cmpint(backing_s->drain_count, ==, 0); 217 218 do_drain_begin(drain_type, bs); 219 220 g_assert_cmpint(s->drain_count, ==, 1); 221 g_assert_cmpint(backing_s->drain_count, ==, !!recursive); 222 223 do_drain_end(drain_type, bs); 224 225 g_assert_cmpint(s->drain_count, ==, 0); 226 g_assert_cmpint(backing_s->drain_count, ==, 0); 227 228 /* Now do the same while a request is pending */ 229 aio_ret = -EINPROGRESS; 230 acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); 231 g_assert(acb != NULL); 232 g_assert_cmpint(aio_ret, ==, -EINPROGRESS); 233 234 g_assert_cmpint(s->drain_count, ==, 0); 235 g_assert_cmpint(backing_s->drain_count, ==, 0); 236 237 do_drain_begin(drain_type, bs); 238 239 g_assert_cmpint(aio_ret, ==, 0); 240 g_assert_cmpint(s->drain_count, ==, 1); 241 g_assert_cmpint(backing_s->drain_count, ==, !!recursive); 242 243 do_drain_end(drain_type, bs); 244 245 g_assert_cmpint(s->drain_count, ==, 0); 246 g_assert_cmpint(backing_s->drain_count, ==, 0); 247 248 bdrv_unref(backing); 249 bdrv_unref(bs); 250 blk_unref(blk); 251 } 252 253 static void test_drv_cb_drain_all(void) 254 { 255 test_drv_cb_common(BDRV_DRAIN_ALL, true); 256 } 257 258 static void test_drv_cb_drain(void) 259 { 260 test_drv_cb_common(BDRV_DRAIN, false); 261 } 262 263 static void test_drv_cb_drain_subtree(void) 264 { 265 test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); 266 } 267 268 static void test_drv_cb_co_drain_all(void) 269 { 270 call_in_coroutine(test_drv_cb_drain_all); 271 } 272 273 static void test_drv_cb_co_drain(void) 274 { 275 call_in_coroutine(test_drv_cb_drain); 276 } 277 278 static void test_drv_cb_co_drain_subtree(void) 279 { 280 call_in_coroutine(test_drv_cb_drain_subtree); 281 } 282 283 static void test_quiesce_common(enum drain_type drain_type, bool recursive) 284 { 285 BlockBackend *blk; 286 BlockDriverState *bs, *backing; 287 288 blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 289 bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, 290 &error_abort); 291 blk_insert_bs(blk, bs, &error_abort); 292 293 backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); 294 bdrv_set_backing_hd(bs, backing, &error_abort); 295 296 g_assert_cmpint(bs->quiesce_counter, ==, 0); 297 g_assert_cmpint(backing->quiesce_counter, ==, 0); 298 299 do_drain_begin(drain_type, bs); 300 301 g_assert_cmpint(bs->quiesce_counter, ==, 1); 302 g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); 303 304 do_drain_end(drain_type, bs); 305 306 g_assert_cmpint(bs->quiesce_counter, ==, 0); 307 g_assert_cmpint(backing->quiesce_counter, ==, 0); 308 309 bdrv_unref(backing); 310 bdrv_unref(bs); 311 blk_unref(blk); 312 } 313 314 static void test_quiesce_drain_all(void) 315 { 316 test_quiesce_common(BDRV_DRAIN_ALL, true); 317 } 318 319 static void test_quiesce_drain(void) 320 { 321 test_quiesce_common(BDRV_DRAIN, false); 322 } 323 324 static void test_quiesce_drain_subtree(void) 325 { 326 test_quiesce_common(BDRV_SUBTREE_DRAIN, true); 327 } 328 329 static void test_quiesce_co_drain_all(void) 330 { 331 call_in_coroutine(test_quiesce_drain_all); 332 } 333 334 static void test_quiesce_co_drain(void) 335 { 336 call_in_coroutine(test_quiesce_drain); 337 } 338 339 static void test_quiesce_co_drain_subtree(void) 340 { 341 call_in_coroutine(test_quiesce_drain_subtree); 342 } 343 344 static void test_nested(void) 345 { 346 BlockBackend *blk; 347 BlockDriverState *bs, *backing; 348 BDRVTestState *s, *backing_s; 349 enum drain_type outer, inner; 350 351 blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 352 bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, 353 &error_abort); 354 s = bs->opaque; 355 blk_insert_bs(blk, bs, &error_abort); 356 357 backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); 358 backing_s = backing->opaque; 359 bdrv_set_backing_hd(bs, backing, &error_abort); 360 361 for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { 362 for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { 363 int backing_quiesce = (outer != BDRV_DRAIN) + 364 (inner != BDRV_DRAIN); 365 366 g_assert_cmpint(bs->quiesce_counter, ==, 0); 367 g_assert_cmpint(backing->quiesce_counter, ==, 0); 368 g_assert_cmpint(s->drain_count, ==, 0); 369 g_assert_cmpint(backing_s->drain_count, ==, 0); 370 371 do_drain_begin(outer, bs); 372 do_drain_begin(inner, bs); 373 374 g_assert_cmpint(bs->quiesce_counter, ==, 2); 375 g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); 376 g_assert_cmpint(s->drain_count, ==, 2); 377 g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); 378 379 do_drain_end(inner, bs); 380 do_drain_end(outer, bs); 381 382 g_assert_cmpint(bs->quiesce_counter, ==, 0); 383 g_assert_cmpint(backing->quiesce_counter, ==, 0); 384 g_assert_cmpint(s->drain_count, ==, 0); 385 g_assert_cmpint(backing_s->drain_count, ==, 0); 386 } 387 } 388 389 bdrv_unref(backing); 390 bdrv_unref(bs); 391 blk_unref(blk); 392 } 393 394 static void test_multiparent(void) 395 { 396 BlockBackend *blk_a, *blk_b; 397 BlockDriverState *bs_a, *bs_b, *backing; 398 BDRVTestState *a_s, *b_s, *backing_s; 399 400 blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 401 bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, 402 &error_abort); 403 a_s = bs_a->opaque; 404 blk_insert_bs(blk_a, bs_a, &error_abort); 405 406 blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 407 bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, 408 &error_abort); 409 b_s = bs_b->opaque; 410 blk_insert_bs(blk_b, bs_b, &error_abort); 411 412 backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); 413 backing_s = backing->opaque; 414 bdrv_set_backing_hd(bs_a, backing, &error_abort); 415 bdrv_set_backing_hd(bs_b, backing, &error_abort); 416 417 g_assert_cmpint(bs_a->quiesce_counter, ==, 0); 418 g_assert_cmpint(bs_b->quiesce_counter, ==, 0); 419 g_assert_cmpint(backing->quiesce_counter, ==, 0); 420 g_assert_cmpint(a_s->drain_count, ==, 0); 421 g_assert_cmpint(b_s->drain_count, ==, 0); 422 g_assert_cmpint(backing_s->drain_count, ==, 0); 423 424 do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); 425 426 g_assert_cmpint(bs_a->quiesce_counter, ==, 1); 427 g_assert_cmpint(bs_b->quiesce_counter, ==, 1); 428 g_assert_cmpint(backing->quiesce_counter, ==, 1); 429 g_assert_cmpint(a_s->drain_count, ==, 1); 430 g_assert_cmpint(b_s->drain_count, ==, 1); 431 g_assert_cmpint(backing_s->drain_count, ==, 1); 432 433 do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); 434 435 g_assert_cmpint(bs_a->quiesce_counter, ==, 2); 436 g_assert_cmpint(bs_b->quiesce_counter, ==, 2); 437 g_assert_cmpint(backing->quiesce_counter, ==, 2); 438 g_assert_cmpint(a_s->drain_count, ==, 2); 439 g_assert_cmpint(b_s->drain_count, ==, 2); 440 g_assert_cmpint(backing_s->drain_count, ==, 2); 441 442 do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); 443 444 g_assert_cmpint(bs_a->quiesce_counter, ==, 1); 445 g_assert_cmpint(bs_b->quiesce_counter, ==, 1); 446 g_assert_cmpint(backing->quiesce_counter, ==, 1); 447 g_assert_cmpint(a_s->drain_count, ==, 1); 448 g_assert_cmpint(b_s->drain_count, ==, 1); 449 g_assert_cmpint(backing_s->drain_count, ==, 1); 450 451 do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); 452 453 g_assert_cmpint(bs_a->quiesce_counter, ==, 0); 454 g_assert_cmpint(bs_b->quiesce_counter, ==, 0); 455 g_assert_cmpint(backing->quiesce_counter, ==, 0); 456 g_assert_cmpint(a_s->drain_count, ==, 0); 457 g_assert_cmpint(b_s->drain_count, ==, 0); 458 g_assert_cmpint(backing_s->drain_count, ==, 0); 459 460 bdrv_unref(backing); 461 bdrv_unref(bs_a); 462 bdrv_unref(bs_b); 463 blk_unref(blk_a); 464 blk_unref(blk_b); 465 } 466 467 static void test_graph_change_drain_subtree(void) 468 { 469 BlockBackend *blk_a, *blk_b; 470 BlockDriverState *bs_a, *bs_b, *backing; 471 BDRVTestState *a_s, *b_s, *backing_s; 472 473 blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 474 bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, 475 &error_abort); 476 a_s = bs_a->opaque; 477 blk_insert_bs(blk_a, bs_a, &error_abort); 478 479 blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 480 bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, 481 &error_abort); 482 b_s = bs_b->opaque; 483 blk_insert_bs(blk_b, bs_b, &error_abort); 484 485 backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); 486 backing_s = backing->opaque; 487 bdrv_set_backing_hd(bs_a, backing, &error_abort); 488 489 g_assert_cmpint(bs_a->quiesce_counter, ==, 0); 490 g_assert_cmpint(bs_b->quiesce_counter, ==, 0); 491 g_assert_cmpint(backing->quiesce_counter, ==, 0); 492 g_assert_cmpint(a_s->drain_count, ==, 0); 493 g_assert_cmpint(b_s->drain_count, ==, 0); 494 g_assert_cmpint(backing_s->drain_count, ==, 0); 495 496 do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); 497 do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); 498 do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); 499 do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); 500 do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); 501 502 bdrv_set_backing_hd(bs_b, backing, &error_abort); 503 g_assert_cmpint(bs_a->quiesce_counter, ==, 5); 504 g_assert_cmpint(bs_b->quiesce_counter, ==, 5); 505 g_assert_cmpint(backing->quiesce_counter, ==, 5); 506 g_assert_cmpint(a_s->drain_count, ==, 5); 507 g_assert_cmpint(b_s->drain_count, ==, 5); 508 g_assert_cmpint(backing_s->drain_count, ==, 5); 509 510 bdrv_set_backing_hd(bs_b, NULL, &error_abort); 511 g_assert_cmpint(bs_a->quiesce_counter, ==, 3); 512 g_assert_cmpint(bs_b->quiesce_counter, ==, 2); 513 g_assert_cmpint(backing->quiesce_counter, ==, 3); 514 g_assert_cmpint(a_s->drain_count, ==, 3); 515 g_assert_cmpint(b_s->drain_count, ==, 2); 516 g_assert_cmpint(backing_s->drain_count, ==, 3); 517 518 bdrv_set_backing_hd(bs_b, backing, &error_abort); 519 g_assert_cmpint(bs_a->quiesce_counter, ==, 5); 520 g_assert_cmpint(bs_b->quiesce_counter, ==, 5); 521 g_assert_cmpint(backing->quiesce_counter, ==, 5); 522 g_assert_cmpint(a_s->drain_count, ==, 5); 523 g_assert_cmpint(b_s->drain_count, ==, 5); 524 g_assert_cmpint(backing_s->drain_count, ==, 5); 525 526 do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); 527 do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); 528 do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); 529 do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); 530 do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); 531 532 g_assert_cmpint(bs_a->quiesce_counter, ==, 0); 533 g_assert_cmpint(bs_b->quiesce_counter, ==, 0); 534 g_assert_cmpint(backing->quiesce_counter, ==, 0); 535 g_assert_cmpint(a_s->drain_count, ==, 0); 536 g_assert_cmpint(b_s->drain_count, ==, 0); 537 g_assert_cmpint(backing_s->drain_count, ==, 0); 538 539 bdrv_unref(backing); 540 bdrv_unref(bs_a); 541 bdrv_unref(bs_b); 542 blk_unref(blk_a); 543 blk_unref(blk_b); 544 } 545 546 static void test_graph_change_drain_all(void) 547 { 548 BlockBackend *blk_a, *blk_b; 549 BlockDriverState *bs_a, *bs_b; 550 BDRVTestState *a_s, *b_s; 551 552 /* Create node A with a BlockBackend */ 553 blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 554 bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, 555 &error_abort); 556 a_s = bs_a->opaque; 557 blk_insert_bs(blk_a, bs_a, &error_abort); 558 559 g_assert_cmpint(bs_a->quiesce_counter, ==, 0); 560 g_assert_cmpint(a_s->drain_count, ==, 0); 561 562 /* Call bdrv_drain_all_begin() */ 563 bdrv_drain_all_begin(); 564 565 g_assert_cmpint(bs_a->quiesce_counter, ==, 1); 566 g_assert_cmpint(a_s->drain_count, ==, 1); 567 568 /* Create node B with a BlockBackend */ 569 blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 570 bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, 571 &error_abort); 572 b_s = bs_b->opaque; 573 blk_insert_bs(blk_b, bs_b, &error_abort); 574 575 g_assert_cmpint(bs_a->quiesce_counter, ==, 1); 576 g_assert_cmpint(bs_b->quiesce_counter, ==, 1); 577 g_assert_cmpint(a_s->drain_count, ==, 1); 578 g_assert_cmpint(b_s->drain_count, ==, 1); 579 580 /* Unref and finally delete node A */ 581 blk_unref(blk_a); 582 583 g_assert_cmpint(bs_a->quiesce_counter, ==, 1); 584 g_assert_cmpint(bs_b->quiesce_counter, ==, 1); 585 g_assert_cmpint(a_s->drain_count, ==, 1); 586 g_assert_cmpint(b_s->drain_count, ==, 1); 587 588 bdrv_unref(bs_a); 589 590 g_assert_cmpint(bs_b->quiesce_counter, ==, 1); 591 g_assert_cmpint(b_s->drain_count, ==, 1); 592 593 /* End the drained section */ 594 bdrv_drain_all_end(); 595 596 g_assert_cmpint(bs_b->quiesce_counter, ==, 0); 597 g_assert_cmpint(b_s->drain_count, ==, 0); 598 g_assert_cmpint(qemu_get_aio_context()->external_disable_cnt, ==, 0); 599 600 bdrv_unref(bs_b); 601 blk_unref(blk_b); 602 } 603 604 struct test_iothread_data { 605 BlockDriverState *bs; 606 enum drain_type drain_type; 607 int *aio_ret; 608 }; 609 610 static void test_iothread_drain_entry(void *opaque) 611 { 612 struct test_iothread_data *data = opaque; 613 614 aio_context_acquire(bdrv_get_aio_context(data->bs)); 615 do_drain_begin(data->drain_type, data->bs); 616 g_assert_cmpint(*data->aio_ret, ==, 0); 617 do_drain_end(data->drain_type, data->bs); 618 aio_context_release(bdrv_get_aio_context(data->bs)); 619 620 qemu_event_set(&done_event); 621 } 622 623 static void test_iothread_aio_cb(void *opaque, int ret) 624 { 625 int *aio_ret = opaque; 626 *aio_ret = ret; 627 qemu_event_set(&done_event); 628 } 629 630 static void test_iothread_main_thread_bh(void *opaque) 631 { 632 struct test_iothread_data *data = opaque; 633 634 /* Test that the AioContext is not yet locked in a random BH that is 635 * executed during drain, otherwise this would deadlock. */ 636 aio_context_acquire(bdrv_get_aio_context(data->bs)); 637 bdrv_flush(data->bs); 638 aio_context_release(bdrv_get_aio_context(data->bs)); 639 } 640 641 /* 642 * Starts an AIO request on a BDS that runs in the AioContext of iothread 1. 643 * The request involves a BH on iothread 2 before it can complete. 644 * 645 * @drain_thread = 0 means that do_drain_begin/end are called from the main 646 * thread, @drain_thread = 1 means that they are called from iothread 1. Drain 647 * for this BDS cannot be called from iothread 2 because only the main thread 648 * may do cross-AioContext polling. 649 */ 650 static void test_iothread_common(enum drain_type drain_type, int drain_thread) 651 { 652 BlockBackend *blk; 653 BlockDriverState *bs; 654 BDRVTestState *s; 655 BlockAIOCB *acb; 656 int aio_ret; 657 struct test_iothread_data data; 658 659 IOThread *a = iothread_new(); 660 IOThread *b = iothread_new(); 661 AioContext *ctx_a = iothread_get_aio_context(a); 662 AioContext *ctx_b = iothread_get_aio_context(b); 663 664 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); 665 666 /* bdrv_drain_all() may only be called from the main loop thread */ 667 if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) { 668 goto out; 669 } 670 671 blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 672 bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, 673 &error_abort); 674 s = bs->opaque; 675 blk_insert_bs(blk, bs, &error_abort); 676 blk_set_disable_request_queuing(blk, true); 677 678 blk_set_aio_context(blk, ctx_a, &error_abort); 679 aio_context_acquire(ctx_a); 680 681 s->bh_indirection_ctx = ctx_b; 682 683 aio_ret = -EINPROGRESS; 684 qemu_event_reset(&done_event); 685 686 if (drain_thread == 0) { 687 acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret); 688 } else { 689 acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); 690 } 691 g_assert(acb != NULL); 692 g_assert_cmpint(aio_ret, ==, -EINPROGRESS); 693 694 aio_context_release(ctx_a); 695 696 data = (struct test_iothread_data) { 697 .bs = bs, 698 .drain_type = drain_type, 699 .aio_ret = &aio_ret, 700 }; 701 702 switch (drain_thread) { 703 case 0: 704 if (drain_type != BDRV_DRAIN_ALL) { 705 aio_context_acquire(ctx_a); 706 } 707 708 aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data); 709 710 /* The request is running on the IOThread a. Draining its block device 711 * will make sure that it has completed as far as the BDS is concerned, 712 * but the drain in this thread can continue immediately after 713 * bdrv_dec_in_flight() and aio_ret might be assigned only slightly 714 * later. */ 715 do_drain_begin(drain_type, bs); 716 g_assert_cmpint(bs->in_flight, ==, 0); 717 718 if (drain_type != BDRV_DRAIN_ALL) { 719 aio_context_release(ctx_a); 720 } 721 qemu_event_wait(&done_event); 722 if (drain_type != BDRV_DRAIN_ALL) { 723 aio_context_acquire(ctx_a); 724 } 725 726 g_assert_cmpint(aio_ret, ==, 0); 727 do_drain_end(drain_type, bs); 728 729 if (drain_type != BDRV_DRAIN_ALL) { 730 aio_context_release(ctx_a); 731 } 732 break; 733 case 1: 734 aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data); 735 qemu_event_wait(&done_event); 736 break; 737 default: 738 g_assert_not_reached(); 739 } 740 741 aio_context_acquire(ctx_a); 742 blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); 743 aio_context_release(ctx_a); 744 745 bdrv_unref(bs); 746 blk_unref(blk); 747 748 out: 749 iothread_join(a); 750 iothread_join(b); 751 } 752 753 static void test_iothread_drain_all(void) 754 { 755 test_iothread_common(BDRV_DRAIN_ALL, 0); 756 test_iothread_common(BDRV_DRAIN_ALL, 1); 757 } 758 759 static void test_iothread_drain(void) 760 { 761 test_iothread_common(BDRV_DRAIN, 0); 762 test_iothread_common(BDRV_DRAIN, 1); 763 } 764 765 static void test_iothread_drain_subtree(void) 766 { 767 test_iothread_common(BDRV_SUBTREE_DRAIN, 0); 768 test_iothread_common(BDRV_SUBTREE_DRAIN, 1); 769 } 770 771 772 typedef struct TestBlockJob { 773 BlockJob common; 774 int run_ret; 775 int prepare_ret; 776 bool running; 777 bool should_complete; 778 } TestBlockJob; 779 780 static int test_job_prepare(Job *job) 781 { 782 TestBlockJob *s = container_of(job, TestBlockJob, common.job); 783 784 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ 785 blk_flush(s->common.blk); 786 return s->prepare_ret; 787 } 788 789 static void test_job_commit(Job *job) 790 { 791 TestBlockJob *s = container_of(job, TestBlockJob, common.job); 792 793 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ 794 blk_flush(s->common.blk); 795 } 796 797 static void test_job_abort(Job *job) 798 { 799 TestBlockJob *s = container_of(job, TestBlockJob, common.job); 800 801 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ 802 blk_flush(s->common.blk); 803 } 804 805 static int coroutine_fn test_job_run(Job *job, Error **errp) 806 { 807 TestBlockJob *s = container_of(job, TestBlockJob, common.job); 808 809 /* We are running the actual job code past the pause point in 810 * job_co_entry(). */ 811 s->running = true; 812 813 job_transition_to_ready(&s->common.job); 814 while (!s->should_complete) { 815 /* Avoid job_sleep_ns() because it marks the job as !busy. We want to 816 * emulate some actual activity (probably some I/O) here so that drain 817 * has to wait for this activity to stop. */ 818 qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); 819 820 job_pause_point(&s->common.job); 821 } 822 823 return s->run_ret; 824 } 825 826 static void test_job_complete(Job *job, Error **errp) 827 { 828 TestBlockJob *s = container_of(job, TestBlockJob, common.job); 829 s->should_complete = true; 830 } 831 832 BlockJobDriver test_job_driver = { 833 .job_driver = { 834 .instance_size = sizeof(TestBlockJob), 835 .free = block_job_free, 836 .user_resume = block_job_user_resume, 837 .run = test_job_run, 838 .complete = test_job_complete, 839 .prepare = test_job_prepare, 840 .commit = test_job_commit, 841 .abort = test_job_abort, 842 }, 843 }; 844 845 enum test_job_result { 846 TEST_JOB_SUCCESS, 847 TEST_JOB_FAIL_RUN, 848 TEST_JOB_FAIL_PREPARE, 849 }; 850 851 enum test_job_drain_node { 852 TEST_JOB_DRAIN_SRC, 853 TEST_JOB_DRAIN_SRC_CHILD, 854 TEST_JOB_DRAIN_SRC_PARENT, 855 }; 856 857 static void test_blockjob_common_drain_node(enum drain_type drain_type, 858 bool use_iothread, 859 enum test_job_result result, 860 enum test_job_drain_node drain_node) 861 { 862 BlockBackend *blk_src, *blk_target; 863 BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs; 864 BlockJob *job; 865 TestBlockJob *tjob; 866 IOThread *iothread = NULL; 867 AioContext *ctx; 868 int ret; 869 870 src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, 871 &error_abort); 872 src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing", 873 BDRV_O_RDWR, &error_abort); 874 src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay", 875 BDRV_O_RDWR, &error_abort); 876 877 bdrv_set_backing_hd(src_overlay, src, &error_abort); 878 bdrv_unref(src); 879 bdrv_set_backing_hd(src, src_backing, &error_abort); 880 bdrv_unref(src_backing); 881 882 blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 883 blk_insert_bs(blk_src, src_overlay, &error_abort); 884 885 switch (drain_node) { 886 case TEST_JOB_DRAIN_SRC: 887 drain_bs = src; 888 break; 889 case TEST_JOB_DRAIN_SRC_CHILD: 890 drain_bs = src_backing; 891 break; 892 case TEST_JOB_DRAIN_SRC_PARENT: 893 drain_bs = src_overlay; 894 break; 895 default: 896 g_assert_not_reached(); 897 } 898 899 if (use_iothread) { 900 iothread = iothread_new(); 901 ctx = iothread_get_aio_context(iothread); 902 blk_set_aio_context(blk_src, ctx, &error_abort); 903 } else { 904 ctx = qemu_get_aio_context(); 905 } 906 907 target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, 908 &error_abort); 909 blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 910 blk_insert_bs(blk_target, target, &error_abort); 911 blk_set_allow_aio_context_change(blk_target, true); 912 913 aio_context_acquire(ctx); 914 tjob = block_job_create("job0", &test_job_driver, NULL, src, 915 0, BLK_PERM_ALL, 916 0, 0, NULL, NULL, &error_abort); 917 job = &tjob->common; 918 block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); 919 920 switch (result) { 921 case TEST_JOB_SUCCESS: 922 break; 923 case TEST_JOB_FAIL_RUN: 924 tjob->run_ret = -EIO; 925 break; 926 case TEST_JOB_FAIL_PREPARE: 927 tjob->prepare_ret = -EIO; 928 break; 929 } 930 931 job_start(&job->job); 932 aio_context_release(ctx); 933 934 if (use_iothread) { 935 /* job_co_entry() is run in the I/O thread, wait for the actual job 936 * code to start (we don't want to catch the job in the pause point in 937 * job_co_entry(). */ 938 while (!tjob->running) { 939 aio_poll(qemu_get_aio_context(), false); 940 } 941 } 942 943 g_assert_cmpint(job->job.pause_count, ==, 0); 944 g_assert_false(job->job.paused); 945 g_assert_true(tjob->running); 946 g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ 947 948 do_drain_begin_unlocked(drain_type, drain_bs); 949 950 if (drain_type == BDRV_DRAIN_ALL) { 951 /* bdrv_drain_all() drains both src and target */ 952 g_assert_cmpint(job->job.pause_count, ==, 2); 953 } else { 954 g_assert_cmpint(job->job.pause_count, ==, 1); 955 } 956 g_assert_true(job->job.paused); 957 g_assert_false(job->job.busy); /* The job is paused */ 958 959 do_drain_end_unlocked(drain_type, drain_bs); 960 961 if (use_iothread) { 962 /* paused is reset in the I/O thread, wait for it */ 963 while (job->job.paused) { 964 aio_poll(qemu_get_aio_context(), false); 965 } 966 } 967 968 g_assert_cmpint(job->job.pause_count, ==, 0); 969 g_assert_false(job->job.paused); 970 g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ 971 972 do_drain_begin_unlocked(drain_type, target); 973 974 if (drain_type == BDRV_DRAIN_ALL) { 975 /* bdrv_drain_all() drains both src and target */ 976 g_assert_cmpint(job->job.pause_count, ==, 2); 977 } else { 978 g_assert_cmpint(job->job.pause_count, ==, 1); 979 } 980 g_assert_true(job->job.paused); 981 g_assert_false(job->job.busy); /* The job is paused */ 982 983 do_drain_end_unlocked(drain_type, target); 984 985 if (use_iothread) { 986 /* paused is reset in the I/O thread, wait for it */ 987 while (job->job.paused) { 988 aio_poll(qemu_get_aio_context(), false); 989 } 990 } 991 992 g_assert_cmpint(job->job.pause_count, ==, 0); 993 g_assert_false(job->job.paused); 994 g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ 995 996 aio_context_acquire(ctx); 997 ret = job_complete_sync(&job->job, &error_abort); 998 g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); 999 1000 if (use_iothread) { 1001 blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); 1002 assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); 1003 } 1004 aio_context_release(ctx); 1005 1006 blk_unref(blk_src); 1007 blk_unref(blk_target); 1008 bdrv_unref(src_overlay); 1009 bdrv_unref(target); 1010 1011 if (iothread) { 1012 iothread_join(iothread); 1013 } 1014 } 1015 1016 static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, 1017 enum test_job_result result) 1018 { 1019 test_blockjob_common_drain_node(drain_type, use_iothread, result, 1020 TEST_JOB_DRAIN_SRC); 1021 test_blockjob_common_drain_node(drain_type, use_iothread, result, 1022 TEST_JOB_DRAIN_SRC_CHILD); 1023 if (drain_type == BDRV_SUBTREE_DRAIN) { 1024 test_blockjob_common_drain_node(drain_type, use_iothread, result, 1025 TEST_JOB_DRAIN_SRC_PARENT); 1026 } 1027 } 1028 1029 static void test_blockjob_drain_all(void) 1030 { 1031 test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS); 1032 } 1033 1034 static void test_blockjob_drain(void) 1035 { 1036 test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); 1037 } 1038 1039 static void test_blockjob_drain_subtree(void) 1040 { 1041 test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); 1042 } 1043 1044 static void test_blockjob_error_drain_all(void) 1045 { 1046 test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); 1047 test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE); 1048 } 1049 1050 static void test_blockjob_error_drain(void) 1051 { 1052 test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN); 1053 test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); 1054 } 1055 1056 static void test_blockjob_error_drain_subtree(void) 1057 { 1058 test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); 1059 test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); 1060 } 1061 1062 static void test_blockjob_iothread_drain_all(void) 1063 { 1064 test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); 1065 } 1066 1067 static void test_blockjob_iothread_drain(void) 1068 { 1069 test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); 1070 } 1071 1072 static void test_blockjob_iothread_drain_subtree(void) 1073 { 1074 test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); 1075 } 1076 1077 static void test_blockjob_iothread_error_drain_all(void) 1078 { 1079 test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); 1080 test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE); 1081 } 1082 1083 static void test_blockjob_iothread_error_drain(void) 1084 { 1085 test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN); 1086 test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); 1087 } 1088 1089 static void test_blockjob_iothread_error_drain_subtree(void) 1090 { 1091 test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); 1092 test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); 1093 } 1094 1095 1096 typedef struct BDRVTestTopState { 1097 BdrvChild *wait_child; 1098 } BDRVTestTopState; 1099 1100 static void bdrv_test_top_close(BlockDriverState *bs) 1101 { 1102 BdrvChild *c, *next_c; 1103 QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { 1104 bdrv_unref_child(bs, c); 1105 } 1106 } 1107 1108 static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs, 1109 uint64_t offset, uint64_t bytes, 1110 QEMUIOVector *qiov, int flags) 1111 { 1112 BDRVTestTopState *tts = bs->opaque; 1113 return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags); 1114 } 1115 1116 static BlockDriver bdrv_test_top_driver = { 1117 .format_name = "test_top_driver", 1118 .instance_size = sizeof(BDRVTestTopState), 1119 1120 .bdrv_close = bdrv_test_top_close, 1121 .bdrv_co_preadv = bdrv_test_top_co_preadv, 1122 1123 .bdrv_child_perm = bdrv_default_perms, 1124 }; 1125 1126 typedef struct TestCoDeleteByDrainData { 1127 BlockBackend *blk; 1128 bool detach_instead_of_delete; 1129 bool done; 1130 } TestCoDeleteByDrainData; 1131 1132 static void coroutine_fn test_co_delete_by_drain(void *opaque) 1133 { 1134 TestCoDeleteByDrainData *dbdd = opaque; 1135 BlockBackend *blk = dbdd->blk; 1136 BlockDriverState *bs = blk_bs(blk); 1137 BDRVTestTopState *tts = bs->opaque; 1138 void *buffer = g_malloc(65536); 1139 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536); 1140 1141 /* Pretend some internal write operation from parent to child. 1142 * Important: We have to read from the child, not from the parent! 1143 * Draining works by first propagating it all up the tree to the 1144 * root and then waiting for drainage from root to the leaves 1145 * (protocol nodes). If we have a request waiting on the root, 1146 * everything will be drained before we go back down the tree, but 1147 * we do not want that. We want to be in the middle of draining 1148 * when this following requests returns. */ 1149 bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0); 1150 1151 g_assert_cmpint(bs->refcnt, ==, 1); 1152 1153 if (!dbdd->detach_instead_of_delete) { 1154 blk_unref(blk); 1155 } else { 1156 BdrvChild *c, *next_c; 1157 QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { 1158 bdrv_unref_child(bs, c); 1159 } 1160 } 1161 1162 dbdd->done = true; 1163 g_free(buffer); 1164 } 1165 1166 /** 1167 * Test what happens when some BDS has some children, you drain one of 1168 * them and this results in the BDS being deleted. 1169 * 1170 * If @detach_instead_of_delete is set, the BDS is not going to be 1171 * deleted but will only detach all of its children. 1172 */ 1173 static void do_test_delete_by_drain(bool detach_instead_of_delete, 1174 enum drain_type drain_type) 1175 { 1176 BlockBackend *blk; 1177 BlockDriverState *bs, *child_bs, *null_bs; 1178 BDRVTestTopState *tts; 1179 TestCoDeleteByDrainData dbdd; 1180 Coroutine *co; 1181 1182 bs = bdrv_new_open_driver(&bdrv_test_top_driver, "top", BDRV_O_RDWR, 1183 &error_abort); 1184 bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; 1185 tts = bs->opaque; 1186 1187 null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, 1188 &error_abort); 1189 bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, 1190 BDRV_CHILD_DATA, &error_abort); 1191 1192 /* This child will be the one to pass to requests through to, and 1193 * it will stall until a drain occurs */ 1194 child_bs = bdrv_new_open_driver(&bdrv_test, "child", BDRV_O_RDWR, 1195 &error_abort); 1196 child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; 1197 /* Takes our reference to child_bs */ 1198 tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", 1199 &child_of_bds, 1200 BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, 1201 &error_abort); 1202 1203 /* This child is just there to be deleted 1204 * (for detach_instead_of_delete == true) */ 1205 null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, 1206 &error_abort); 1207 bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, 1208 &error_abort); 1209 1210 blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 1211 blk_insert_bs(blk, bs, &error_abort); 1212 1213 /* Referenced by blk now */ 1214 bdrv_unref(bs); 1215 1216 g_assert_cmpint(bs->refcnt, ==, 1); 1217 g_assert_cmpint(child_bs->refcnt, ==, 1); 1218 g_assert_cmpint(null_bs->refcnt, ==, 1); 1219 1220 1221 dbdd = (TestCoDeleteByDrainData){ 1222 .blk = blk, 1223 .detach_instead_of_delete = detach_instead_of_delete, 1224 .done = false, 1225 }; 1226 co = qemu_coroutine_create(test_co_delete_by_drain, &dbdd); 1227 qemu_coroutine_enter(co); 1228 1229 /* Drain the child while the read operation is still pending. 1230 * This should result in the operation finishing and 1231 * test_co_delete_by_drain() resuming. Thus, @bs will be deleted 1232 * and the coroutine will exit while this drain operation is still 1233 * in progress. */ 1234 switch (drain_type) { 1235 case BDRV_DRAIN: 1236 bdrv_ref(child_bs); 1237 bdrv_drain(child_bs); 1238 bdrv_unref(child_bs); 1239 break; 1240 case BDRV_SUBTREE_DRAIN: 1241 /* Would have to ref/unref bs here for !detach_instead_of_delete, but 1242 * then the whole test becomes pointless because the graph changes 1243 * don't occur during the drain any more. */ 1244 assert(detach_instead_of_delete); 1245 bdrv_subtree_drained_begin(bs); 1246 bdrv_subtree_drained_end(bs); 1247 break; 1248 case BDRV_DRAIN_ALL: 1249 bdrv_drain_all_begin(); 1250 bdrv_drain_all_end(); 1251 break; 1252 default: 1253 g_assert_not_reached(); 1254 } 1255 1256 while (!dbdd.done) { 1257 aio_poll(qemu_get_aio_context(), true); 1258 } 1259 1260 if (detach_instead_of_delete) { 1261 /* Here, the reference has not passed over to the coroutine, 1262 * so we have to delete the BB ourselves */ 1263 blk_unref(blk); 1264 } 1265 } 1266 1267 static void test_delete_by_drain(void) 1268 { 1269 do_test_delete_by_drain(false, BDRV_DRAIN); 1270 } 1271 1272 static void test_detach_by_drain_all(void) 1273 { 1274 do_test_delete_by_drain(true, BDRV_DRAIN_ALL); 1275 } 1276 1277 static void test_detach_by_drain(void) 1278 { 1279 do_test_delete_by_drain(true, BDRV_DRAIN); 1280 } 1281 1282 static void test_detach_by_drain_subtree(void) 1283 { 1284 do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); 1285 } 1286 1287 1288 struct detach_by_parent_data { 1289 BlockDriverState *parent_b; 1290 BdrvChild *child_b; 1291 BlockDriverState *c; 1292 BdrvChild *child_c; 1293 bool by_parent_cb; 1294 }; 1295 static struct detach_by_parent_data detach_by_parent_data; 1296 1297 static void detach_indirect_bh(void *opaque) 1298 { 1299 struct detach_by_parent_data *data = opaque; 1300 1301 bdrv_unref_child(data->parent_b, data->child_b); 1302 1303 bdrv_ref(data->c); 1304 data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", 1305 &child_of_bds, BDRV_CHILD_DATA, 1306 &error_abort); 1307 } 1308 1309 static void detach_by_parent_aio_cb(void *opaque, int ret) 1310 { 1311 struct detach_by_parent_data *data = &detach_by_parent_data; 1312 1313 g_assert_cmpint(ret, ==, 0); 1314 if (data->by_parent_cb) { 1315 detach_indirect_bh(data); 1316 } 1317 } 1318 1319 static void detach_by_driver_cb_drained_begin(BdrvChild *child) 1320 { 1321 aio_bh_schedule_oneshot(qemu_get_current_aio_context(), 1322 detach_indirect_bh, &detach_by_parent_data); 1323 child_of_bds.drained_begin(child); 1324 } 1325 1326 static BdrvChildClass detach_by_driver_cb_class; 1327 1328 /* 1329 * Initial graph: 1330 * 1331 * PA PB 1332 * \ / \ 1333 * A B C 1334 * 1335 * by_parent_cb == true: Test that parent callbacks don't poll 1336 * 1337 * PA has a pending write request whose callback changes the child nodes of 1338 * PB: It removes B and adds C instead. The subtree of PB is drained, which 1339 * will indirectly drain the write request, too. 1340 * 1341 * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll 1342 * 1343 * PA's BdrvChildClass has a .drained_begin callback that schedules a BH 1344 * that does the same graph change. If bdrv_drain_invoke() calls it, the 1345 * state is messed up, but if it is only polled in the single 1346 * BDRV_POLL_WHILE() at the end of the drain, this should work fine. 1347 */ 1348 static void test_detach_indirect(bool by_parent_cb) 1349 { 1350 BlockBackend *blk; 1351 BlockDriverState *parent_a, *parent_b, *a, *b, *c; 1352 BdrvChild *child_a, *child_b; 1353 BlockAIOCB *acb; 1354 1355 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); 1356 1357 if (!by_parent_cb) { 1358 detach_by_driver_cb_class = child_of_bds; 1359 detach_by_driver_cb_class.drained_begin = 1360 detach_by_driver_cb_drained_begin; 1361 } 1362 1363 /* Create all involved nodes */ 1364 parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR, 1365 &error_abort); 1366 parent_b = bdrv_new_open_driver(&bdrv_test, "parent-b", 0, 1367 &error_abort); 1368 1369 a = bdrv_new_open_driver(&bdrv_test, "a", BDRV_O_RDWR, &error_abort); 1370 b = bdrv_new_open_driver(&bdrv_test, "b", BDRV_O_RDWR, &error_abort); 1371 c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort); 1372 1373 /* blk is a BB for parent-a */ 1374 blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 1375 blk_insert_bs(blk, parent_a, &error_abort); 1376 bdrv_unref(parent_a); 1377 1378 /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver 1379 * callback must not return immediately. */ 1380 if (!by_parent_cb) { 1381 BDRVTestState *s = parent_a->opaque; 1382 s->sleep_in_drain_begin = true; 1383 } 1384 1385 /* Set child relationships */ 1386 bdrv_ref(b); 1387 bdrv_ref(a); 1388 child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, 1389 BDRV_CHILD_DATA, &error_abort); 1390 child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds, 1391 BDRV_CHILD_COW, &error_abort); 1392 1393 bdrv_ref(a); 1394 bdrv_attach_child(parent_a, a, "PA-A", 1395 by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, 1396 BDRV_CHILD_DATA, &error_abort); 1397 1398 g_assert_cmpint(parent_a->refcnt, ==, 1); 1399 g_assert_cmpint(parent_b->refcnt, ==, 1); 1400 g_assert_cmpint(a->refcnt, ==, 3); 1401 g_assert_cmpint(b->refcnt, ==, 2); 1402 g_assert_cmpint(c->refcnt, ==, 1); 1403 1404 g_assert(QLIST_FIRST(&parent_b->children) == child_a); 1405 g_assert(QLIST_NEXT(child_a, next) == child_b); 1406 g_assert(QLIST_NEXT(child_b, next) == NULL); 1407 1408 /* Start the evil write request */ 1409 detach_by_parent_data = (struct detach_by_parent_data) { 1410 .parent_b = parent_b, 1411 .child_b = child_b, 1412 .c = c, 1413 .by_parent_cb = by_parent_cb, 1414 }; 1415 acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL); 1416 g_assert(acb != NULL); 1417 1418 /* Drain and check the expected result */ 1419 bdrv_subtree_drained_begin(parent_b); 1420 1421 g_assert(detach_by_parent_data.child_c != NULL); 1422 1423 g_assert_cmpint(parent_a->refcnt, ==, 1); 1424 g_assert_cmpint(parent_b->refcnt, ==, 1); 1425 g_assert_cmpint(a->refcnt, ==, 3); 1426 g_assert_cmpint(b->refcnt, ==, 1); 1427 g_assert_cmpint(c->refcnt, ==, 2); 1428 1429 g_assert(QLIST_FIRST(&parent_b->children) == detach_by_parent_data.child_c); 1430 g_assert(QLIST_NEXT(detach_by_parent_data.child_c, next) == child_a); 1431 g_assert(QLIST_NEXT(child_a, next) == NULL); 1432 1433 g_assert_cmpint(parent_a->quiesce_counter, ==, 1); 1434 g_assert_cmpint(parent_b->quiesce_counter, ==, 1); 1435 g_assert_cmpint(a->quiesce_counter, ==, 1); 1436 g_assert_cmpint(b->quiesce_counter, ==, 0); 1437 g_assert_cmpint(c->quiesce_counter, ==, 1); 1438 1439 bdrv_subtree_drained_end(parent_b); 1440 1441 bdrv_unref(parent_b); 1442 blk_unref(blk); 1443 1444 g_assert_cmpint(a->refcnt, ==, 1); 1445 g_assert_cmpint(b->refcnt, ==, 1); 1446 g_assert_cmpint(c->refcnt, ==, 1); 1447 bdrv_unref(a); 1448 bdrv_unref(b); 1449 bdrv_unref(c); 1450 } 1451 1452 static void test_detach_by_parent_cb(void) 1453 { 1454 test_detach_indirect(true); 1455 } 1456 1457 static void test_detach_by_driver_cb(void) 1458 { 1459 test_detach_indirect(false); 1460 } 1461 1462 static void test_append_to_drained(void) 1463 { 1464 BlockBackend *blk; 1465 BlockDriverState *base, *overlay; 1466 BDRVTestState *base_s, *overlay_s; 1467 1468 blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); 1469 base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); 1470 base_s = base->opaque; 1471 blk_insert_bs(blk, base, &error_abort); 1472 1473 overlay = bdrv_new_open_driver(&bdrv_test, "overlay", BDRV_O_RDWR, 1474 &error_abort); 1475 overlay_s = overlay->opaque; 1476 1477 do_drain_begin(BDRV_DRAIN, base); 1478 g_assert_cmpint(base->quiesce_counter, ==, 1); 1479 g_assert_cmpint(base_s->drain_count, ==, 1); 1480 g_assert_cmpint(base->in_flight, ==, 0); 1481 1482 bdrv_append(overlay, base, &error_abort); 1483 g_assert_cmpint(base->in_flight, ==, 0); 1484 g_assert_cmpint(overlay->in_flight, ==, 0); 1485 1486 g_assert_cmpint(base->quiesce_counter, ==, 1); 1487 g_assert_cmpint(base_s->drain_count, ==, 1); 1488 g_assert_cmpint(overlay->quiesce_counter, ==, 1); 1489 g_assert_cmpint(overlay_s->drain_count, ==, 1); 1490 1491 do_drain_end(BDRV_DRAIN, base); 1492 1493 g_assert_cmpint(base->quiesce_counter, ==, 0); 1494 g_assert_cmpint(base_s->drain_count, ==, 0); 1495 g_assert_cmpint(overlay->quiesce_counter, ==, 0); 1496 g_assert_cmpint(overlay_s->drain_count, ==, 0); 1497 1498 bdrv_unref(overlay); 1499 bdrv_unref(base); 1500 blk_unref(blk); 1501 } 1502 1503 static void test_set_aio_context(void) 1504 { 1505 BlockDriverState *bs; 1506 IOThread *a = iothread_new(); 1507 IOThread *b = iothread_new(); 1508 AioContext *ctx_a = iothread_get_aio_context(a); 1509 AioContext *ctx_b = iothread_get_aio_context(b); 1510 1511 bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, 1512 &error_abort); 1513 1514 bdrv_drained_begin(bs); 1515 bdrv_try_set_aio_context(bs, ctx_a, &error_abort); 1516 1517 aio_context_acquire(ctx_a); 1518 bdrv_drained_end(bs); 1519 1520 bdrv_drained_begin(bs); 1521 bdrv_try_set_aio_context(bs, ctx_b, &error_abort); 1522 aio_context_release(ctx_a); 1523 aio_context_acquire(ctx_b); 1524 bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort); 1525 aio_context_release(ctx_b); 1526 bdrv_drained_end(bs); 1527 1528 bdrv_unref(bs); 1529 iothread_join(a); 1530 iothread_join(b); 1531 } 1532 1533 1534 typedef struct TestDropBackingBlockJob { 1535 BlockJob common; 1536 bool should_complete; 1537 bool *did_complete; 1538 BlockDriverState *detach_also; 1539 } TestDropBackingBlockJob; 1540 1541 static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp) 1542 { 1543 TestDropBackingBlockJob *s = 1544 container_of(job, TestDropBackingBlockJob, common.job); 1545 1546 while (!s->should_complete) { 1547 job_sleep_ns(job, 0); 1548 } 1549 1550 return 0; 1551 } 1552 1553 static void test_drop_backing_job_commit(Job *job) 1554 { 1555 TestDropBackingBlockJob *s = 1556 container_of(job, TestDropBackingBlockJob, common.job); 1557 1558 bdrv_set_backing_hd(blk_bs(s->common.blk), NULL, &error_abort); 1559 bdrv_set_backing_hd(s->detach_also, NULL, &error_abort); 1560 1561 *s->did_complete = true; 1562 } 1563 1564 static const BlockJobDriver test_drop_backing_job_driver = { 1565 .job_driver = { 1566 .instance_size = sizeof(TestDropBackingBlockJob), 1567 .free = block_job_free, 1568 .user_resume = block_job_user_resume, 1569 .run = test_drop_backing_job_run, 1570 .commit = test_drop_backing_job_commit, 1571 } 1572 }; 1573 1574 /** 1575 * Creates a child node with three parent nodes on it, and then runs a 1576 * block job on the final one, parent-node-2. 1577 * 1578 * The job is then asked to complete before a section where the child 1579 * is drained. 1580 * 1581 * Ending this section will undrain the child's parents, first 1582 * parent-node-2, then parent-node-1, then parent-node-0 -- the parent 1583 * list is in reverse order of how they were added. Ending the drain 1584 * on parent-node-2 will resume the job, thus completing it and 1585 * scheduling job_exit(). 1586 * 1587 * Ending the drain on parent-node-1 will poll the AioContext, which 1588 * lets job_exit() and thus test_drop_backing_job_commit() run. That 1589 * function first removes the child as parent-node-2's backing file. 1590 * 1591 * In old (and buggy) implementations, there are two problems with 1592 * that: 1593 * (A) bdrv_drain_invoke() polls for every node that leaves the 1594 * drained section. This means that job_exit() is scheduled 1595 * before the child has left the drained section. Its 1596 * quiesce_counter is therefore still 1 when it is removed from 1597 * parent-node-2. 1598 * 1599 * (B) bdrv_replace_child_noperm() calls drained_end() on the old 1600 * child's parents as many times as the child is quiesced. This 1601 * means it will call drained_end() on parent-node-2 once. 1602 * Because parent-node-2 is no longer quiesced at this point, this 1603 * will fail. 1604 * 1605 * bdrv_replace_child_noperm() therefore must call drained_end() on 1606 * the parent only if it really is still drained because the child is 1607 * drained. 1608 * 1609 * If removing child from parent-node-2 was successful (as it should 1610 * be), test_drop_backing_job_commit() will then also remove the child 1611 * from parent-node-0. 1612 * 1613 * With an old version of our drain infrastructure ((A) above), that 1614 * resulted in the following flow: 1615 * 1616 * 1. child attempts to leave its drained section. The call recurses 1617 * to its parents. 1618 * 1619 * 2. parent-node-2 leaves the drained section. Polling in 1620 * bdrv_drain_invoke() will schedule job_exit(). 1621 * 1622 * 3. parent-node-1 leaves the drained section. Polling in 1623 * bdrv_drain_invoke() will run job_exit(), thus disconnecting 1624 * parent-node-0 from the child node. 1625 * 1626 * 4. bdrv_parent_drained_end() uses a QLIST_FOREACH_SAFE() loop to 1627 * iterate over the parents. Thus, it now accesses the BdrvChild 1628 * object that used to connect parent-node-0 and the child node. 1629 * However, that object no longer exists, so it accesses a dangling 1630 * pointer. 1631 * 1632 * The solution is to only poll once when running a bdrv_drained_end() 1633 * operation, specifically at the end when all drained_end() 1634 * operations for all involved nodes have been scheduled. 1635 * Note that this also solves (A) above, thus hiding (B). 1636 */ 1637 static void test_blockjob_commit_by_drained_end(void) 1638 { 1639 BlockDriverState *bs_child, *bs_parents[3]; 1640 TestDropBackingBlockJob *job; 1641 bool job_has_completed = false; 1642 int i; 1643 1644 bs_child = bdrv_new_open_driver(&bdrv_test, "child-node", BDRV_O_RDWR, 1645 &error_abort); 1646 1647 for (i = 0; i < 3; i++) { 1648 char name[32]; 1649 snprintf(name, sizeof(name), "parent-node-%i", i); 1650 bs_parents[i] = bdrv_new_open_driver(&bdrv_test, name, BDRV_O_RDWR, 1651 &error_abort); 1652 bdrv_set_backing_hd(bs_parents[i], bs_child, &error_abort); 1653 } 1654 1655 job = block_job_create("job", &test_drop_backing_job_driver, NULL, 1656 bs_parents[2], 0, BLK_PERM_ALL, 0, 0, NULL, NULL, 1657 &error_abort); 1658 1659 job->detach_also = bs_parents[0]; 1660 job->did_complete = &job_has_completed; 1661 1662 job_start(&job->common.job); 1663 1664 job->should_complete = true; 1665 bdrv_drained_begin(bs_child); 1666 g_assert(!job_has_completed); 1667 bdrv_drained_end(bs_child); 1668 g_assert(job_has_completed); 1669 1670 bdrv_unref(bs_parents[0]); 1671 bdrv_unref(bs_parents[1]); 1672 bdrv_unref(bs_parents[2]); 1673 bdrv_unref(bs_child); 1674 } 1675 1676 1677 typedef struct TestSimpleBlockJob { 1678 BlockJob common; 1679 bool should_complete; 1680 bool *did_complete; 1681 } TestSimpleBlockJob; 1682 1683 static int coroutine_fn test_simple_job_run(Job *job, Error **errp) 1684 { 1685 TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); 1686 1687 while (!s->should_complete) { 1688 job_sleep_ns(job, 0); 1689 } 1690 1691 return 0; 1692 } 1693 1694 static void test_simple_job_clean(Job *job) 1695 { 1696 TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); 1697 *s->did_complete = true; 1698 } 1699 1700 static const BlockJobDriver test_simple_job_driver = { 1701 .job_driver = { 1702 .instance_size = sizeof(TestSimpleBlockJob), 1703 .free = block_job_free, 1704 .user_resume = block_job_user_resume, 1705 .run = test_simple_job_run, 1706 .clean = test_simple_job_clean, 1707 }, 1708 }; 1709 1710 static int drop_intermediate_poll_update_filename(BdrvChild *child, 1711 BlockDriverState *new_base, 1712 const char *filename, 1713 Error **errp) 1714 { 1715 /* 1716 * We are free to poll here, which may change the block graph, if 1717 * it is not drained. 1718 */ 1719 1720 /* If the job is not drained: Complete it, schedule job_exit() */ 1721 aio_poll(qemu_get_current_aio_context(), false); 1722 /* If the job is not drained: Run job_exit(), finish the job */ 1723 aio_poll(qemu_get_current_aio_context(), false); 1724 1725 return 0; 1726 } 1727 1728 /** 1729 * Test a poll in the midst of bdrv_drop_intermediate(). 1730 * 1731 * bdrv_drop_intermediate() calls BdrvChildClass.update_filename(), 1732 * which can yield or poll. This may lead to graph changes, unless 1733 * the whole subtree in question is drained. 1734 * 1735 * We test this on the following graph: 1736 * 1737 * Job 1738 * 1739 * | 1740 * job-node 1741 * | 1742 * v 1743 * 1744 * job-node 1745 * 1746 * | 1747 * backing 1748 * | 1749 * v 1750 * 1751 * node-2 --chain--> node-1 --chain--> node-0 1752 * 1753 * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0). 1754 * 1755 * This first updates node-2's backing filename by invoking 1756 * drop_intermediate_poll_update_filename(), which polls twice. This 1757 * causes the job to finish, which in turns causes the job-node to be 1758 * deleted. 1759 * 1760 * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it 1761 * already has a pointer to the BdrvChild edge between job-node and 1762 * node-1. When it tries to handle that edge, we probably get a 1763 * segmentation fault because the object no longer exists. 1764 * 1765 * 1766 * The solution is for bdrv_drop_intermediate() to drain top's 1767 * subtree. This prevents graph changes from happening just because 1768 * BdrvChildClass.update_filename() yields or polls. Thus, the block 1769 * job is paused during that drained section and must finish before or 1770 * after. 1771 * 1772 * (In addition, bdrv_replace_child() must keep the job paused.) 1773 */ 1774 static void test_drop_intermediate_poll(void) 1775 { 1776 static BdrvChildClass chain_child_class; 1777 BlockDriverState *chain[3]; 1778 TestSimpleBlockJob *job; 1779 BlockDriverState *job_node; 1780 bool job_has_completed = false; 1781 int i; 1782 int ret; 1783 1784 chain_child_class = child_of_bds; 1785 chain_child_class.update_filename = drop_intermediate_poll_update_filename; 1786 1787 for (i = 0; i < 3; i++) { 1788 char name[32]; 1789 snprintf(name, 32, "node-%i", i); 1790 1791 chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort); 1792 } 1793 1794 job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR, 1795 &error_abort); 1796 bdrv_set_backing_hd(job_node, chain[1], &error_abort); 1797 1798 /* 1799 * Establish the chain last, so the chain links are the first 1800 * elements in the BDS.parents lists 1801 */ 1802 for (i = 0; i < 3; i++) { 1803 if (i) { 1804 /* Takes the reference to chain[i - 1] */ 1805 chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1], 1806 "chain", &chain_child_class, 1807 BDRV_CHILD_COW, &error_abort); 1808 } 1809 } 1810 1811 job = block_job_create("job", &test_simple_job_driver, NULL, job_node, 1812 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); 1813 1814 /* The job has a reference now */ 1815 bdrv_unref(job_node); 1816 1817 job->did_complete = &job_has_completed; 1818 1819 job_start(&job->common.job); 1820 job->should_complete = true; 1821 1822 g_assert(!job_has_completed); 1823 ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); 1824 g_assert(ret == 0); 1825 g_assert(job_has_completed); 1826 1827 bdrv_unref(chain[2]); 1828 } 1829 1830 1831 typedef struct BDRVReplaceTestState { 1832 bool was_drained; 1833 bool was_undrained; 1834 bool has_read; 1835 1836 int drain_count; 1837 1838 bool yield_before_read; 1839 Coroutine *io_co; 1840 Coroutine *drain_co; 1841 } BDRVReplaceTestState; 1842 1843 static void bdrv_replace_test_close(BlockDriverState *bs) 1844 { 1845 } 1846 1847 /** 1848 * If @bs has a backing file: 1849 * Yield if .yield_before_read is true (and wait for drain_begin to 1850 * wake us up). 1851 * Forward the read to bs->backing. Set .has_read to true. 1852 * If drain_begin has woken us, wake it in turn. 1853 * 1854 * Otherwise: 1855 * Set .has_read to true and return success. 1856 */ 1857 static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, 1858 uint64_t offset, 1859 uint64_t bytes, 1860 QEMUIOVector *qiov, 1861 int flags) 1862 { 1863 BDRVReplaceTestState *s = bs->opaque; 1864 1865 if (bs->backing) { 1866 int ret; 1867 1868 g_assert(!s->drain_count); 1869 1870 s->io_co = qemu_coroutine_self(); 1871 if (s->yield_before_read) { 1872 s->yield_before_read = false; 1873 qemu_coroutine_yield(); 1874 } 1875 s->io_co = NULL; 1876 1877 ret = bdrv_co_preadv(bs->backing, offset, bytes, qiov, 0); 1878 s->has_read = true; 1879 1880 /* Wake up drain_co if it runs */ 1881 if (s->drain_co) { 1882 aio_co_wake(s->drain_co); 1883 } 1884 1885 return ret; 1886 } 1887 1888 s->has_read = true; 1889 return 0; 1890 } 1891 1892 /** 1893 * If .drain_count is 0, wake up .io_co if there is one; and set 1894 * .was_drained. 1895 * Increment .drain_count. 1896 */ 1897 static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) 1898 { 1899 BDRVReplaceTestState *s = bs->opaque; 1900 1901 if (!s->drain_count) { 1902 /* Keep waking io_co up until it is done */ 1903 s->drain_co = qemu_coroutine_self(); 1904 while (s->io_co) { 1905 aio_co_wake(s->io_co); 1906 s->io_co = NULL; 1907 qemu_coroutine_yield(); 1908 } 1909 s->drain_co = NULL; 1910 1911 s->was_drained = true; 1912 } 1913 s->drain_count++; 1914 } 1915 1916 /** 1917 * Reduce .drain_count, set .was_undrained once it reaches 0. 1918 * If .drain_count reaches 0 and the node has a backing file, issue a 1919 * read request. 1920 */ 1921 static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) 1922 { 1923 BDRVReplaceTestState *s = bs->opaque; 1924 1925 g_assert(s->drain_count > 0); 1926 if (!--s->drain_count) { 1927 int ret; 1928 1929 s->was_undrained = true; 1930 1931 if (bs->backing) { 1932 char data; 1933 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); 1934 1935 /* Queue a read request post-drain */ 1936 ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); 1937 g_assert(ret >= 0); 1938 } 1939 } 1940 } 1941 1942 static BlockDriver bdrv_replace_test = { 1943 .format_name = "replace_test", 1944 .instance_size = sizeof(BDRVReplaceTestState), 1945 1946 .bdrv_close = bdrv_replace_test_close, 1947 .bdrv_co_preadv = bdrv_replace_test_co_preadv, 1948 1949 .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, 1950 .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, 1951 1952 .bdrv_child_perm = bdrv_default_perms, 1953 }; 1954 1955 static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque) 1956 { 1957 int ret; 1958 char data; 1959 1960 ret = blk_co_pread(opaque, 0, 1, &data, 0); 1961 g_assert(ret >= 0); 1962 } 1963 1964 /** 1965 * We test two things: 1966 * (1) bdrv_replace_child_noperm() must not undrain the parent if both 1967 * children are drained. 1968 * (2) bdrv_replace_child_noperm() must never flush I/O requests to a 1969 * drained child. If the old child is drained, it must flush I/O 1970 * requests after the new one has been attached. If the new child 1971 * is drained, it must flush I/O requests before the old one is 1972 * detached. 1973 * 1974 * To do so, we create one parent node and two child nodes; then 1975 * attach one of the children (old_child_bs) to the parent, then 1976 * drain both old_child_bs and new_child_bs according to 1977 * old_drain_count and new_drain_count, respectively, and finally 1978 * we invoke bdrv_replace_node() to replace old_child_bs by 1979 * new_child_bs. 1980 * 1981 * The test block driver we use here (bdrv_replace_test) has a read 1982 * function that: 1983 * - For the parent node, can optionally yield, and then forwards the 1984 * read to bdrv_preadv(), 1985 * - For the child node, just returns immediately. 1986 * 1987 * If the read yields, the drain_begin function will wake it up. 1988 * 1989 * The drain_end function issues a read on the parent once it is fully 1990 * undrained (which simulates requests starting to come in again). 1991 */ 1992 static void do_test_replace_child_mid_drain(int old_drain_count, 1993 int new_drain_count) 1994 { 1995 BlockBackend *parent_blk; 1996 BlockDriverState *parent_bs; 1997 BlockDriverState *old_child_bs, *new_child_bs; 1998 BDRVReplaceTestState *parent_s; 1999 BDRVReplaceTestState *old_child_s, *new_child_s; 2000 Coroutine *io_co; 2001 int i; 2002 2003 parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0, 2004 &error_abort); 2005 parent_s = parent_bs->opaque; 2006 2007 parent_blk = blk_new(qemu_get_aio_context(), 2008 BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); 2009 blk_insert_bs(parent_blk, parent_bs, &error_abort); 2010 2011 old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0, 2012 &error_abort); 2013 new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0, 2014 &error_abort); 2015 old_child_s = old_child_bs->opaque; 2016 new_child_s = new_child_bs->opaque; 2017 2018 /* So that we can read something */ 2019 parent_bs->total_sectors = 1; 2020 old_child_bs->total_sectors = 1; 2021 new_child_bs->total_sectors = 1; 2022 2023 bdrv_ref(old_child_bs); 2024 parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child", 2025 &child_of_bds, BDRV_CHILD_COW, 2026 &error_abort); 2027 2028 for (i = 0; i < old_drain_count; i++) { 2029 bdrv_drained_begin(old_child_bs); 2030 } 2031 for (i = 0; i < new_drain_count; i++) { 2032 bdrv_drained_begin(new_child_bs); 2033 } 2034 2035 if (!old_drain_count) { 2036 /* 2037 * Start a read operation that will yield, so it will not 2038 * complete before the node is drained. 2039 */ 2040 parent_s->yield_before_read = true; 2041 io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co, 2042 parent_blk); 2043 qemu_coroutine_enter(io_co); 2044 } 2045 2046 /* If we have started a read operation, it should have yielded */ 2047 g_assert(!parent_s->has_read); 2048 2049 /* Reset drained status so we can see what bdrv_replace_node() does */ 2050 parent_s->was_drained = false; 2051 parent_s->was_undrained = false; 2052 2053 g_assert(parent_bs->quiesce_counter == old_drain_count); 2054 bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); 2055 g_assert(parent_bs->quiesce_counter == new_drain_count); 2056 2057 if (!old_drain_count && !new_drain_count) { 2058 /* 2059 * From undrained to undrained drains and undrains the parent, 2060 * because bdrv_replace_node() contains a drained section for 2061 * @old_child_bs. 2062 */ 2063 g_assert(parent_s->was_drained && parent_s->was_undrained); 2064 } else if (!old_drain_count && new_drain_count) { 2065 /* 2066 * From undrained to drained should drain the parent and keep 2067 * it that way. 2068 */ 2069 g_assert(parent_s->was_drained && !parent_s->was_undrained); 2070 } else if (old_drain_count && !new_drain_count) { 2071 /* 2072 * From drained to undrained should undrain the parent and 2073 * keep it that way. 2074 */ 2075 g_assert(!parent_s->was_drained && parent_s->was_undrained); 2076 } else /* if (old_drain_count && new_drain_count) */ { 2077 /* 2078 * From drained to drained must not undrain the parent at any 2079 * point 2080 */ 2081 g_assert(!parent_s->was_drained && !parent_s->was_undrained); 2082 } 2083 2084 if (!old_drain_count || !new_drain_count) { 2085 /* 2086 * If !old_drain_count, we have started a read request before 2087 * bdrv_replace_node(). If !new_drain_count, the parent must 2088 * have been undrained at some point, and 2089 * bdrv_replace_test_co_drain_end() starts a read request 2090 * then. 2091 */ 2092 g_assert(parent_s->has_read); 2093 } else { 2094 /* 2095 * If the parent was never undrained, there is no way to start 2096 * a read request. 2097 */ 2098 g_assert(!parent_s->has_read); 2099 } 2100 2101 /* A drained child must have not received any request */ 2102 g_assert(!(old_drain_count && old_child_s->has_read)); 2103 g_assert(!(new_drain_count && new_child_s->has_read)); 2104 2105 for (i = 0; i < new_drain_count; i++) { 2106 bdrv_drained_end(new_child_bs); 2107 } 2108 for (i = 0; i < old_drain_count; i++) { 2109 bdrv_drained_end(old_child_bs); 2110 } 2111 2112 /* 2113 * By now, bdrv_replace_test_co_drain_end() must have been called 2114 * at some point while the new child was attached to the parent. 2115 */ 2116 g_assert(parent_s->has_read); 2117 g_assert(new_child_s->has_read); 2118 2119 blk_unref(parent_blk); 2120 bdrv_unref(parent_bs); 2121 bdrv_unref(old_child_bs); 2122 bdrv_unref(new_child_bs); 2123 } 2124 2125 static void test_replace_child_mid_drain(void) 2126 { 2127 int old_drain_count, new_drain_count; 2128 2129 for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) { 2130 for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) { 2131 do_test_replace_child_mid_drain(old_drain_count, new_drain_count); 2132 } 2133 } 2134 } 2135 2136 int main(int argc, char **argv) 2137 { 2138 int ret; 2139 2140 bdrv_init(); 2141 qemu_init_main_loop(&error_abort); 2142 2143 g_test_init(&argc, &argv, NULL); 2144 qemu_event_init(&done_event, false); 2145 2146 g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); 2147 g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); 2148 g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", 2149 test_drv_cb_drain_subtree); 2150 2151 g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", 2152 test_drv_cb_co_drain_all); 2153 g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); 2154 g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", 2155 test_drv_cb_co_drain_subtree); 2156 2157 2158 g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); 2159 g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); 2160 g_test_add_func("/bdrv-drain/quiesce/drain_subtree", 2161 test_quiesce_drain_subtree); 2162 2163 g_test_add_func("/bdrv-drain/quiesce/co/drain_all", 2164 test_quiesce_co_drain_all); 2165 g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); 2166 g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", 2167 test_quiesce_co_drain_subtree); 2168 2169 g_test_add_func("/bdrv-drain/nested", test_nested); 2170 g_test_add_func("/bdrv-drain/multiparent", test_multiparent); 2171 2172 g_test_add_func("/bdrv-drain/graph-change/drain_subtree", 2173 test_graph_change_drain_subtree); 2174 g_test_add_func("/bdrv-drain/graph-change/drain_all", 2175 test_graph_change_drain_all); 2176 2177 g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); 2178 g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); 2179 g_test_add_func("/bdrv-drain/iothread/drain_subtree", 2180 test_iothread_drain_subtree); 2181 2182 g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); 2183 g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); 2184 g_test_add_func("/bdrv-drain/blockjob/drain_subtree", 2185 test_blockjob_drain_subtree); 2186 2187 g_test_add_func("/bdrv-drain/blockjob/error/drain_all", 2188 test_blockjob_error_drain_all); 2189 g_test_add_func("/bdrv-drain/blockjob/error/drain", 2190 test_blockjob_error_drain); 2191 g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", 2192 test_blockjob_error_drain_subtree); 2193 2194 g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", 2195 test_blockjob_iothread_drain_all); 2196 g_test_add_func("/bdrv-drain/blockjob/iothread/drain", 2197 test_blockjob_iothread_drain); 2198 g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", 2199 test_blockjob_iothread_drain_subtree); 2200 2201 g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", 2202 test_blockjob_iothread_error_drain_all); 2203 g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", 2204 test_blockjob_iothread_error_drain); 2205 g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", 2206 test_blockjob_iothread_error_drain_subtree); 2207 2208 g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); 2209 g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); 2210 g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); 2211 g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); 2212 g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); 2213 g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); 2214 2215 g_test_add_func("/bdrv-drain/attach/drain", test_append_to_drained); 2216 2217 g_test_add_func("/bdrv-drain/set_aio_context", test_set_aio_context); 2218 2219 g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end", 2220 test_blockjob_commit_by_drained_end); 2221 2222 g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll", 2223 test_drop_intermediate_poll); 2224 2225 g_test_add_func("/bdrv-drain/replace_child/mid-drain", 2226 test_replace_child_mid_drain); 2227 2228 ret = g_test_run(); 2229 qemu_event_destroy(&done_event); 2230 return ret; 2231 } 2232