2 * Block node draining tests
4 * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "block/block_int.h"
27 #include "block/blockjob_int.h"
28 #include "sysemu/block-backend.h"
29 #include "qapi/error.h"
30 #include "qemu/main-loop.h"
33 static QemuEvent done_event
;
35 typedef struct BDRVTestState
{
37 AioContext
*bh_indirection_ctx
;
38 bool sleep_in_drain_begin
;
41 static void coroutine_fn
sleep_in_drain_begin(void *opaque
)
43 BlockDriverState
*bs
= opaque
;
45 qemu_co_sleep_ns(QEMU_CLOCK_REALTIME
, 100000);
46 bdrv_dec_in_flight(bs
);
49 static void bdrv_test_drain_begin(BlockDriverState
*bs
)
51 BDRVTestState
*s
= bs
->opaque
;
53 if (s
->sleep_in_drain_begin
) {
54 Coroutine
*co
= qemu_coroutine_create(sleep_in_drain_begin
, bs
);
55 bdrv_inc_in_flight(bs
);
56 aio_co_enter(bdrv_get_aio_context(bs
), co
);
60 static void bdrv_test_drain_end(BlockDriverState
*bs
)
62 BDRVTestState
*s
= bs
->opaque
;
66 static void bdrv_test_close(BlockDriverState
*bs
)
68 BDRVTestState
*s
= bs
->opaque
;
69 g_assert_cmpint(s
->drain_count
, >, 0);
72 static void co_reenter_bh(void *opaque
)
77 static int coroutine_fn
bdrv_test_co_preadv(BlockDriverState
*bs
,
78 int64_t offset
, int64_t bytes
,
80 BdrvRequestFlags flags
)
82 BDRVTestState
*s
= bs
->opaque
;
84 /* We want this request to stay until the polling loop in drain waits for
85 * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
86 * first and polls its result, too, but it shouldn't accidentally complete
87 * this request yet. */
88 qemu_co_sleep_ns(QEMU_CLOCK_REALTIME
, 100000);
90 if (s
->bh_indirection_ctx
) {
91 aio_bh_schedule_oneshot(s
->bh_indirection_ctx
, co_reenter_bh
,
92 qemu_coroutine_self());
93 qemu_coroutine_yield();
99 static int bdrv_test_co_change_backing_file(BlockDriverState
*bs
,
100 const char *backing_file
,
101 const char *backing_fmt
)
106 static BlockDriver bdrv_test
= {
107 .format_name
= "test",
108 .instance_size
= sizeof(BDRVTestState
),
109 .supports_backing
= true,
111 .bdrv_close
= bdrv_test_close
,
112 .bdrv_co_preadv
= bdrv_test_co_preadv
,
114 .bdrv_drain_begin
= bdrv_test_drain_begin
,
115 .bdrv_drain_end
= bdrv_test_drain_end
,
117 .bdrv_child_perm
= bdrv_default_perms
,
119 .bdrv_co_change_backing_file
= bdrv_test_co_change_backing_file
,
122 static void aio_ret_cb(void *opaque
, int ret
)
124 int *aio_ret
= opaque
;
128 typedef struct CallInCoroutineData
{
131 } CallInCoroutineData
;
133 static coroutine_fn
void call_in_coroutine_entry(void *opaque
)
135 CallInCoroutineData
*data
= opaque
;
141 static void call_in_coroutine(void (*entry
)(void))
144 CallInCoroutineData data
= {
149 co
= qemu_coroutine_create(call_in_coroutine_entry
, &data
);
150 qemu_coroutine_enter(co
);
152 aio_poll(qemu_get_aio_context(), true);
162 static void do_drain_begin(enum drain_type drain_type
, BlockDriverState
*bs
)
164 switch (drain_type
) {
165 case BDRV_DRAIN_ALL
: bdrv_drain_all_begin(); break;
166 case BDRV_DRAIN
: bdrv_drained_begin(bs
); break;
167 default: g_assert_not_reached();
171 static void do_drain_end(enum drain_type drain_type
, BlockDriverState
*bs
)
173 switch (drain_type
) {
174 case BDRV_DRAIN_ALL
: bdrv_drain_all_end(); break;
175 case BDRV_DRAIN
: bdrv_drained_end(bs
); break;
176 default: g_assert_not_reached();
180 static void do_drain_begin_unlocked(enum drain_type drain_type
, BlockDriverState
*bs
)
182 do_drain_begin(drain_type
, bs
);
185 static BlockBackend
* no_coroutine_fn
test_setup(void)
188 BlockDriverState
*bs
, *backing
;
190 blk
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
191 bs
= bdrv_new_open_driver(&bdrv_test
, "test-node", BDRV_O_RDWR
,
193 blk_insert_bs(blk
, bs
, &error_abort
);
195 backing
= bdrv_new_open_driver(&bdrv_test
, "backing", 0, &error_abort
);
196 bdrv_set_backing_hd(bs
, backing
, &error_abort
);
204 static void do_drain_end_unlocked(enum drain_type drain_type
, BlockDriverState
*bs
)
206 do_drain_end(drain_type
, bs
);
210 * Locking the block graph would be a bit cumbersome here because this function
211 * is called both in coroutine and non-coroutine context. We know this is a test
212 * and nothing else is running, so don't bother with TSA.
214 static void coroutine_mixed_fn TSA_NO_TSA
215 test_drv_cb_common(BlockBackend
*blk
, enum drain_type drain_type
,
218 BlockDriverState
*bs
= blk_bs(blk
);
219 BlockDriverState
*backing
= bs
->backing
->bs
;
220 BDRVTestState
*s
, *backing_s
;
224 QEMUIOVector qiov
= QEMU_IOVEC_INIT_BUF(qiov
, NULL
, 0);
227 backing_s
= backing
->opaque
;
229 /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
230 g_assert_cmpint(s
->drain_count
, ==, 0);
231 g_assert_cmpint(backing_s
->drain_count
, ==, 0);
233 do_drain_begin(drain_type
, bs
);
235 g_assert_cmpint(s
->drain_count
, ==, 1);
236 g_assert_cmpint(backing_s
->drain_count
, ==, !!recursive
);
238 do_drain_end(drain_type
, bs
);
240 g_assert_cmpint(s
->drain_count
, ==, 0);
241 g_assert_cmpint(backing_s
->drain_count
, ==, 0);
243 /* Now do the same while a request is pending */
244 aio_ret
= -EINPROGRESS
;
245 acb
= blk_aio_preadv(blk
, 0, &qiov
, 0, aio_ret_cb
, &aio_ret
);
246 g_assert(acb
!= NULL
);
247 g_assert_cmpint(aio_ret
, ==, -EINPROGRESS
);
249 g_assert_cmpint(s
->drain_count
, ==, 0);
250 g_assert_cmpint(backing_s
->drain_count
, ==, 0);
252 do_drain_begin(drain_type
, bs
);
254 g_assert_cmpint(aio_ret
, ==, 0);
255 g_assert_cmpint(s
->drain_count
, ==, 1);
256 g_assert_cmpint(backing_s
->drain_count
, ==, !!recursive
);
258 do_drain_end(drain_type
, bs
);
260 g_assert_cmpint(s
->drain_count
, ==, 0);
261 g_assert_cmpint(backing_s
->drain_count
, ==, 0);
264 static void test_drv_cb_drain_all(void)
266 BlockBackend
*blk
= test_setup();
267 test_drv_cb_common(blk
, BDRV_DRAIN_ALL
, true);
271 static void test_drv_cb_drain(void)
273 BlockBackend
*blk
= test_setup();
274 test_drv_cb_common(blk
, BDRV_DRAIN
, false);
278 static void coroutine_fn
test_drv_cb_co_drain_all_entry(void)
280 BlockBackend
*blk
= blk_all_next(NULL
);
281 test_drv_cb_common(blk
, BDRV_DRAIN_ALL
, true);
284 static void test_drv_cb_co_drain_all(void)
286 BlockBackend
*blk
= test_setup();
287 call_in_coroutine(test_drv_cb_co_drain_all_entry
);
291 static void coroutine_fn
test_drv_cb_co_drain_entry(void)
293 BlockBackend
*blk
= blk_all_next(NULL
);
294 test_drv_cb_common(blk
, BDRV_DRAIN
, false);
297 static void test_drv_cb_co_drain(void)
299 BlockBackend
*blk
= test_setup();
300 call_in_coroutine(test_drv_cb_co_drain_entry
);
305 * Locking the block graph would be a bit cumbersome here because this function
306 * is called both in coroutine and non-coroutine context. We know this is a test
307 * and nothing else is running, so don't bother with TSA.
309 static void coroutine_mixed_fn TSA_NO_TSA
310 test_quiesce_common(BlockBackend
*blk
, enum drain_type drain_type
,
313 BlockDriverState
*bs
= blk_bs(blk
);
314 BlockDriverState
*backing
= bs
->backing
->bs
;
316 g_assert_cmpint(bs
->quiesce_counter
, ==, 0);
317 g_assert_cmpint(backing
->quiesce_counter
, ==, 0);
319 do_drain_begin(drain_type
, bs
);
321 if (drain_type
== BDRV_DRAIN_ALL
) {
322 g_assert_cmpint(bs
->quiesce_counter
, ==, 2);
324 g_assert_cmpint(bs
->quiesce_counter
, ==, 1);
326 g_assert_cmpint(backing
->quiesce_counter
, ==, !!recursive
);
328 do_drain_end(drain_type
, bs
);
330 g_assert_cmpint(bs
->quiesce_counter
, ==, 0);
331 g_assert_cmpint(backing
->quiesce_counter
, ==, 0);
334 static void test_quiesce_drain_all(void)
336 BlockBackend
*blk
= test_setup();
337 test_quiesce_common(blk
, BDRV_DRAIN_ALL
, true);
341 static void test_quiesce_drain(void)
343 BlockBackend
*blk
= test_setup();
344 test_quiesce_common(blk
, BDRV_DRAIN
, false);
348 static void coroutine_fn
test_quiesce_co_drain_all_entry(void)
350 BlockBackend
*blk
= blk_all_next(NULL
);
351 test_quiesce_common(blk
, BDRV_DRAIN_ALL
, true);
354 static void test_quiesce_co_drain_all(void)
356 BlockBackend
*blk
= test_setup();
357 call_in_coroutine(test_quiesce_co_drain_all_entry
);
361 static void coroutine_fn
test_quiesce_co_drain_entry(void)
363 BlockBackend
*blk
= blk_all_next(NULL
);
364 test_quiesce_common(blk
, BDRV_DRAIN
, false);
367 static void test_quiesce_co_drain(void)
369 BlockBackend
*blk
= test_setup();
370 call_in_coroutine(test_quiesce_co_drain_entry
);
374 static void test_nested(void)
377 BlockDriverState
*bs
, *backing
;
378 BDRVTestState
*s
, *backing_s
;
379 enum drain_type outer
, inner
;
381 blk
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
382 bs
= bdrv_new_open_driver(&bdrv_test
, "test-node", BDRV_O_RDWR
,
385 blk_insert_bs(blk
, bs
, &error_abort
);
387 backing
= bdrv_new_open_driver(&bdrv_test
, "backing", 0, &error_abort
);
388 backing_s
= backing
->opaque
;
389 bdrv_set_backing_hd(bs
, backing
, &error_abort
);
391 for (outer
= 0; outer
< DRAIN_TYPE_MAX
; outer
++) {
392 for (inner
= 0; inner
< DRAIN_TYPE_MAX
; inner
++) {
393 int backing_quiesce
= (outer
== BDRV_DRAIN_ALL
) +
394 (inner
== BDRV_DRAIN_ALL
);
396 g_assert_cmpint(bs
->quiesce_counter
, ==, 0);
397 g_assert_cmpint(backing
->quiesce_counter
, ==, 0);
398 g_assert_cmpint(s
->drain_count
, ==, 0);
399 g_assert_cmpint(backing_s
->drain_count
, ==, 0);
401 do_drain_begin(outer
, bs
);
402 do_drain_begin(inner
, bs
);
404 g_assert_cmpint(bs
->quiesce_counter
, ==, 2 + !!backing_quiesce
);
405 g_assert_cmpint(backing
->quiesce_counter
, ==, backing_quiesce
);
406 g_assert_cmpint(s
->drain_count
, ==, 1);
407 g_assert_cmpint(backing_s
->drain_count
, ==, !!backing_quiesce
);
409 do_drain_end(inner
, bs
);
410 do_drain_end(outer
, bs
);
412 g_assert_cmpint(bs
->quiesce_counter
, ==, 0);
413 g_assert_cmpint(backing
->quiesce_counter
, ==, 0);
414 g_assert_cmpint(s
->drain_count
, ==, 0);
415 g_assert_cmpint(backing_s
->drain_count
, ==, 0);
424 static void test_graph_change_drain_all(void)
426 BlockBackend
*blk_a
, *blk_b
;
427 BlockDriverState
*bs_a
, *bs_b
;
428 BDRVTestState
*a_s
, *b_s
;
430 /* Create node A with a BlockBackend */
431 blk_a
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
432 bs_a
= bdrv_new_open_driver(&bdrv_test
, "test-node-a", BDRV_O_RDWR
,
435 blk_insert_bs(blk_a
, bs_a
, &error_abort
);
437 g_assert_cmpint(bs_a
->quiesce_counter
, ==, 0);
438 g_assert_cmpint(a_s
->drain_count
, ==, 0);
440 /* Call bdrv_drain_all_begin() */
441 bdrv_drain_all_begin();
443 g_assert_cmpint(bs_a
->quiesce_counter
, ==, 1);
444 g_assert_cmpint(a_s
->drain_count
, ==, 1);
446 /* Create node B with a BlockBackend */
447 blk_b
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
448 bs_b
= bdrv_new_open_driver(&bdrv_test
, "test-node-b", BDRV_O_RDWR
,
451 blk_insert_bs(blk_b
, bs_b
, &error_abort
);
453 g_assert_cmpint(bs_a
->quiesce_counter
, ==, 1);
454 g_assert_cmpint(bs_b
->quiesce_counter
, ==, 1);
455 g_assert_cmpint(a_s
->drain_count
, ==, 1);
456 g_assert_cmpint(b_s
->drain_count
, ==, 1);
458 /* Unref and finally delete node A */
461 g_assert_cmpint(bs_a
->quiesce_counter
, ==, 1);
462 g_assert_cmpint(bs_b
->quiesce_counter
, ==, 1);
463 g_assert_cmpint(a_s
->drain_count
, ==, 1);
464 g_assert_cmpint(b_s
->drain_count
, ==, 1);
468 g_assert_cmpint(bs_b
->quiesce_counter
, ==, 1);
469 g_assert_cmpint(b_s
->drain_count
, ==, 1);
471 /* End the drained section */
472 bdrv_drain_all_end();
474 g_assert_cmpint(bs_b
->quiesce_counter
, ==, 0);
475 g_assert_cmpint(b_s
->drain_count
, ==, 0);
481 struct test_iothread_data
{
482 BlockDriverState
*bs
;
483 enum drain_type drain_type
;
488 static void coroutine_fn
test_iothread_drain_co_entry(void *opaque
)
490 struct test_iothread_data
*data
= opaque
;
492 do_drain_begin(data
->drain_type
, data
->bs
);
493 g_assert_cmpint(*data
->aio_ret
, ==, 0);
494 do_drain_end(data
->drain_type
, data
->bs
);
496 data
->co_done
= true;
500 static void test_iothread_aio_cb(void *opaque
, int ret
)
502 int *aio_ret
= opaque
;
504 qemu_event_set(&done_event
);
507 static void test_iothread_main_thread_bh(void *opaque
)
509 struct test_iothread_data
*data
= opaque
;
511 bdrv_flush(data
->bs
);
512 bdrv_dec_in_flight(data
->bs
); /* incremented by test_iothread_common() */
516 * Starts an AIO request on a BDS that runs in the AioContext of iothread 1.
517 * The request involves a BH on iothread 2 before it can complete.
519 * @drain_thread = 0 means that do_drain_begin/end are called from the main
520 * thread, @drain_thread = 1 means that they are called from iothread 1. Drain
521 * for this BDS cannot be called from iothread 2 because only the main thread
522 * may do cross-AioContext polling.
524 static void test_iothread_common(enum drain_type drain_type
, int drain_thread
)
527 BlockDriverState
*bs
;
532 struct test_iothread_data data
;
534 IOThread
*a
= iothread_new();
535 IOThread
*b
= iothread_new();
536 AioContext
*ctx_a
= iothread_get_aio_context(a
);
537 AioContext
*ctx_b
= iothread_get_aio_context(b
);
539 QEMUIOVector qiov
= QEMU_IOVEC_INIT_BUF(qiov
, NULL
, 0);
541 /* bdrv_drain_all() may only be called from the main loop thread */
542 if (drain_type
== BDRV_DRAIN_ALL
&& drain_thread
!= 0) {
546 blk
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
547 bs
= bdrv_new_open_driver(&bdrv_test
, "test-node", BDRV_O_RDWR
,
550 blk_insert_bs(blk
, bs
, &error_abort
);
551 blk_set_disable_request_queuing(blk
, true);
553 blk_set_aio_context(blk
, ctx_a
, &error_abort
);
555 s
->bh_indirection_ctx
= ctx_b
;
557 aio_ret
= -EINPROGRESS
;
558 qemu_event_reset(&done_event
);
560 if (drain_thread
== 0) {
561 acb
= blk_aio_preadv(blk
, 0, &qiov
, 0, test_iothread_aio_cb
, &aio_ret
);
563 acb
= blk_aio_preadv(blk
, 0, &qiov
, 0, aio_ret_cb
, &aio_ret
);
565 g_assert(acb
!= NULL
);
566 g_assert_cmpint(aio_ret
, ==, -EINPROGRESS
);
568 data
= (struct test_iothread_data
) {
570 .drain_type
= drain_type
,
574 switch (drain_thread
) {
577 * Increment in_flight so that do_drain_begin() waits for
578 * test_iothread_main_thread_bh(). This prevents the race between
579 * test_iothread_main_thread_bh() in IOThread a and do_drain_begin() in
580 * this thread. test_iothread_main_thread_bh() decrements in_flight.
582 bdrv_inc_in_flight(bs
);
583 aio_bh_schedule_oneshot(ctx_a
, test_iothread_main_thread_bh
, &data
);
585 /* The request is running on the IOThread a. Draining its block device
586 * will make sure that it has completed as far as the BDS is concerned,
587 * but the drain in this thread can continue immediately after
588 * bdrv_dec_in_flight() and aio_ret might be assigned only slightly
590 do_drain_begin(drain_type
, bs
);
591 g_assert_cmpint(bs
->in_flight
, ==, 0);
593 qemu_event_wait(&done_event
);
595 g_assert_cmpint(aio_ret
, ==, 0);
596 do_drain_end(drain_type
, bs
);
599 co
= qemu_coroutine_create(test_iothread_drain_co_entry
, &data
);
600 aio_co_enter(ctx_a
, co
);
601 AIO_WAIT_WHILE_UNLOCKED(NULL
, !data
.co_done
);
604 g_assert_not_reached();
607 blk_set_aio_context(blk
, qemu_get_aio_context(), &error_abort
);
617 static void test_iothread_drain_all(void)
619 test_iothread_common(BDRV_DRAIN_ALL
, 0);
620 test_iothread_common(BDRV_DRAIN_ALL
, 1);
623 static void test_iothread_drain(void)
625 test_iothread_common(BDRV_DRAIN
, 0);
626 test_iothread_common(BDRV_DRAIN
, 1);
630 typedef struct TestBlockJob
{
632 BlockDriverState
*bs
;
636 bool should_complete
;
639 static int test_job_prepare(Job
*job
)
641 TestBlockJob
*s
= container_of(job
, TestBlockJob
, common
.job
);
643 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
645 return s
->prepare_ret
;
648 static void test_job_commit(Job
*job
)
650 TestBlockJob
*s
= container_of(job
, TestBlockJob
, common
.job
);
652 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
656 static void test_job_abort(Job
*job
)
658 TestBlockJob
*s
= container_of(job
, TestBlockJob
, common
.job
);
660 /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
664 static int coroutine_fn
test_job_run(Job
*job
, Error
**errp
)
666 TestBlockJob
*s
= container_of(job
, TestBlockJob
, common
.job
);
668 /* We are running the actual job code past the pause point in
672 job_transition_to_ready(&s
->common
.job
);
673 while (!s
->should_complete
) {
674 /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
675 * emulate some actual activity (probably some I/O) here so that drain
676 * has to wait for this activity to stop. */
677 qemu_co_sleep_ns(QEMU_CLOCK_REALTIME
, 1000000);
679 job_pause_point(&s
->common
.job
);
685 static void test_job_complete(Job
*job
, Error
**errp
)
687 TestBlockJob
*s
= container_of(job
, TestBlockJob
, common
.job
);
688 s
->should_complete
= true;
691 BlockJobDriver test_job_driver
= {
693 .instance_size
= sizeof(TestBlockJob
),
694 .free
= block_job_free
,
695 .user_resume
= block_job_user_resume
,
697 .complete
= test_job_complete
,
698 .prepare
= test_job_prepare
,
699 .commit
= test_job_commit
,
700 .abort
= test_job_abort
,
704 enum test_job_result
{
707 TEST_JOB_FAIL_PREPARE
,
710 enum test_job_drain_node
{
712 TEST_JOB_DRAIN_SRC_CHILD
,
715 static void test_blockjob_common_drain_node(enum drain_type drain_type
,
717 enum test_job_result result
,
718 enum test_job_drain_node drain_node
)
720 BlockBackend
*blk_src
, *blk_target
;
721 BlockDriverState
*src
, *src_backing
, *src_overlay
, *target
, *drain_bs
;
724 IOThread
*iothread
= NULL
;
727 src
= bdrv_new_open_driver(&bdrv_test
, "source", BDRV_O_RDWR
,
729 src_backing
= bdrv_new_open_driver(&bdrv_test
, "source-backing",
730 BDRV_O_RDWR
, &error_abort
);
731 src_overlay
= bdrv_new_open_driver(&bdrv_test
, "source-overlay",
732 BDRV_O_RDWR
, &error_abort
);
734 bdrv_set_backing_hd(src_overlay
, src
, &error_abort
);
736 bdrv_set_backing_hd(src
, src_backing
, &error_abort
);
737 bdrv_unref(src_backing
);
739 blk_src
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
740 blk_insert_bs(blk_src
, src_overlay
, &error_abort
);
742 switch (drain_node
) {
743 case TEST_JOB_DRAIN_SRC
:
746 case TEST_JOB_DRAIN_SRC_CHILD
:
747 drain_bs
= src_backing
;
750 g_assert_not_reached();
756 iothread
= iothread_new();
757 ctx
= iothread_get_aio_context(iothread
);
758 blk_set_aio_context(blk_src
, ctx
, &error_abort
);
761 target
= bdrv_new_open_driver(&bdrv_test
, "target", BDRV_O_RDWR
,
763 blk_target
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
764 blk_insert_bs(blk_target
, target
, &error_abort
);
765 blk_set_allow_aio_context_change(blk_target
, true);
767 tjob
= block_job_create("job0", &test_job_driver
, NULL
, src
,
769 0, 0, NULL
, NULL
, &error_abort
);
774 block_job_add_bdrv(job
, "target", target
, 0, BLK_PERM_ALL
, &error_abort
);
775 bdrv_graph_wrunlock();
778 case TEST_JOB_SUCCESS
:
780 case TEST_JOB_FAIL_RUN
:
781 tjob
->run_ret
= -EIO
;
783 case TEST_JOB_FAIL_PREPARE
:
784 tjob
->prepare_ret
= -EIO
;
788 job_start(&job
->job
);
791 /* job_co_entry() is run in the I/O thread, wait for the actual job
792 * code to start (we don't want to catch the job in the pause point in
794 while (!tjob
->running
) {
795 aio_poll(qemu_get_aio_context(), false);
799 WITH_JOB_LOCK_GUARD() {
800 g_assert_cmpint(job
->job
.pause_count
, ==, 0);
801 g_assert_false(job
->job
.paused
);
802 g_assert_true(tjob
->running
);
803 g_assert_true(job
->job
.busy
); /* We're in qemu_co_sleep_ns() */
806 do_drain_begin_unlocked(drain_type
, drain_bs
);
808 WITH_JOB_LOCK_GUARD() {
809 if (drain_type
== BDRV_DRAIN_ALL
) {
810 /* bdrv_drain_all() drains both src and target */
811 g_assert_cmpint(job
->job
.pause_count
, ==, 2);
813 g_assert_cmpint(job
->job
.pause_count
, ==, 1);
815 g_assert_true(job
->job
.paused
);
816 g_assert_false(job
->job
.busy
); /* The job is paused */
819 do_drain_end_unlocked(drain_type
, drain_bs
);
823 * Here we are waiting for the paused status to change,
824 * so don't bother protecting the read every time.
826 * paused is reset in the I/O thread, wait for it
828 while (job
->job
.paused
) {
829 aio_poll(qemu_get_aio_context(), false);
833 WITH_JOB_LOCK_GUARD() {
834 g_assert_cmpint(job
->job
.pause_count
, ==, 0);
835 g_assert_false(job
->job
.paused
);
836 g_assert_true(job
->job
.busy
); /* We're in qemu_co_sleep_ns() */
839 do_drain_begin_unlocked(drain_type
, target
);
841 WITH_JOB_LOCK_GUARD() {
842 if (drain_type
== BDRV_DRAIN_ALL
) {
843 /* bdrv_drain_all() drains both src and target */
844 g_assert_cmpint(job
->job
.pause_count
, ==, 2);
846 g_assert_cmpint(job
->job
.pause_count
, ==, 1);
848 g_assert_true(job
->job
.paused
);
849 g_assert_false(job
->job
.busy
); /* The job is paused */
852 do_drain_end_unlocked(drain_type
, target
);
856 * Here we are waiting for the paused status to change,
857 * so don't bother protecting the read every time.
859 * paused is reset in the I/O thread, wait for it
861 while (job
->job
.paused
) {
862 aio_poll(qemu_get_aio_context(), false);
866 WITH_JOB_LOCK_GUARD() {
867 g_assert_cmpint(job
->job
.pause_count
, ==, 0);
868 g_assert_false(job
->job
.paused
);
869 g_assert_true(job
->job
.busy
); /* We're in qemu_co_sleep_ns() */
872 WITH_JOB_LOCK_GUARD() {
873 ret
= job_complete_sync_locked(&job
->job
, &error_abort
);
875 g_assert_cmpint(ret
, ==, (result
== TEST_JOB_SUCCESS
? 0 : -EIO
));
878 blk_set_aio_context(blk_src
, qemu_get_aio_context(), &error_abort
);
879 assert(blk_get_aio_context(blk_target
) == qemu_get_aio_context());
883 blk_unref(blk_target
);
884 bdrv_unref(src_overlay
);
888 iothread_join(iothread
);
892 static void test_blockjob_common(enum drain_type drain_type
, bool use_iothread
,
893 enum test_job_result result
)
895 test_blockjob_common_drain_node(drain_type
, use_iothread
, result
,
897 test_blockjob_common_drain_node(drain_type
, use_iothread
, result
,
898 TEST_JOB_DRAIN_SRC_CHILD
);
901 static void test_blockjob_drain_all(void)
903 test_blockjob_common(BDRV_DRAIN_ALL
, false, TEST_JOB_SUCCESS
);
906 static void test_blockjob_drain(void)
908 test_blockjob_common(BDRV_DRAIN
, false, TEST_JOB_SUCCESS
);
911 static void test_blockjob_error_drain_all(void)
913 test_blockjob_common(BDRV_DRAIN_ALL
, false, TEST_JOB_FAIL_RUN
);
914 test_blockjob_common(BDRV_DRAIN_ALL
, false, TEST_JOB_FAIL_PREPARE
);
917 static void test_blockjob_error_drain(void)
919 test_blockjob_common(BDRV_DRAIN
, false, TEST_JOB_FAIL_RUN
);
920 test_blockjob_common(BDRV_DRAIN
, false, TEST_JOB_FAIL_PREPARE
);
923 static void test_blockjob_iothread_drain_all(void)
925 test_blockjob_common(BDRV_DRAIN_ALL
, true, TEST_JOB_SUCCESS
);
928 static void test_blockjob_iothread_drain(void)
930 test_blockjob_common(BDRV_DRAIN
, true, TEST_JOB_SUCCESS
);
933 static void test_blockjob_iothread_error_drain_all(void)
935 test_blockjob_common(BDRV_DRAIN_ALL
, true, TEST_JOB_FAIL_RUN
);
936 test_blockjob_common(BDRV_DRAIN_ALL
, true, TEST_JOB_FAIL_PREPARE
);
939 static void test_blockjob_iothread_error_drain(void)
941 test_blockjob_common(BDRV_DRAIN
, true, TEST_JOB_FAIL_RUN
);
942 test_blockjob_common(BDRV_DRAIN
, true, TEST_JOB_FAIL_PREPARE
);
946 typedef struct BDRVTestTopState
{
947 BdrvChild
*wait_child
;
950 static void bdrv_test_top_close(BlockDriverState
*bs
)
952 BdrvChild
*c
, *next_c
;
955 QLIST_FOREACH_SAFE(c
, &bs
->children
, next
, next_c
) {
956 bdrv_unref_child(bs
, c
);
958 bdrv_graph_wrunlock();
961 static int coroutine_fn GRAPH_RDLOCK
962 bdrv_test_top_co_preadv(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
,
963 QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
965 BDRVTestTopState
*tts
= bs
->opaque
;
966 return bdrv_co_preadv(tts
->wait_child
, offset
, bytes
, qiov
, flags
);
969 static BlockDriver bdrv_test_top_driver
= {
970 .format_name
= "test_top_driver",
971 .instance_size
= sizeof(BDRVTestTopState
),
973 .bdrv_close
= bdrv_test_top_close
,
974 .bdrv_co_preadv
= bdrv_test_top_co_preadv
,
976 .bdrv_child_perm
= bdrv_default_perms
,
979 typedef struct TestCoDeleteByDrainData
{
981 bool detach_instead_of_delete
;
983 } TestCoDeleteByDrainData
;
985 static void coroutine_fn
test_co_delete_by_drain(void *opaque
)
987 TestCoDeleteByDrainData
*dbdd
= opaque
;
988 BlockBackend
*blk
= dbdd
->blk
;
989 BlockDriverState
*bs
= blk_bs(blk
);
990 BDRVTestTopState
*tts
= bs
->opaque
;
991 void *buffer
= g_malloc(65536);
992 QEMUIOVector qiov
= QEMU_IOVEC_INIT_BUF(qiov
, buffer
, 65536);
994 /* Pretend some internal write operation from parent to child.
995 * Important: We have to read from the child, not from the parent!
996 * Draining works by first propagating it all up the tree to the
997 * root and then waiting for drainage from root to the leaves
998 * (protocol nodes). If we have a request waiting on the root,
999 * everything will be drained before we go back down the tree, but
1000 * we do not want that. We want to be in the middle of draining
1001 * when this following requests returns. */
1002 bdrv_graph_co_rdlock();
1003 bdrv_co_preadv(tts
->wait_child
, 0, 65536, &qiov
, 0);
1004 bdrv_graph_co_rdunlock();
1006 g_assert_cmpint(bs
->refcnt
, ==, 1);
1008 if (!dbdd
->detach_instead_of_delete
) {
1011 BdrvChild
*c
, *next_c
;
1012 bdrv_graph_co_rdlock();
1013 QLIST_FOREACH_SAFE(c
, &bs
->children
, next
, next_c
) {
1014 bdrv_graph_co_rdunlock();
1015 bdrv_co_unref_child(bs
, c
);
1016 bdrv_graph_co_rdlock();
1018 bdrv_graph_co_rdunlock();
1026 * Test what happens when some BDS has some children, you drain one of
1027 * them and this results in the BDS being deleted.
1029 * If @detach_instead_of_delete is set, the BDS is not going to be
1030 * deleted but will only detach all of its children.
1032 static void do_test_delete_by_drain(bool detach_instead_of_delete
,
1033 enum drain_type drain_type
)
1036 BlockDriverState
*bs
, *child_bs
, *null_bs
;
1037 BDRVTestTopState
*tts
;
1038 TestCoDeleteByDrainData dbdd
;
1041 bs
= bdrv_new_open_driver(&bdrv_test_top_driver
, "top", BDRV_O_RDWR
,
1043 bs
->total_sectors
= 65536 >> BDRV_SECTOR_BITS
;
1046 null_bs
= bdrv_open("null-co://", NULL
, NULL
, BDRV_O_RDWR
| BDRV_O_PROTOCOL
,
1048 bdrv_graph_wrlock();
1049 bdrv_attach_child(bs
, null_bs
, "null-child", &child_of_bds
,
1050 BDRV_CHILD_DATA
, &error_abort
);
1051 bdrv_graph_wrunlock();
1053 /* This child will be the one to pass to requests through to, and
1054 * it will stall until a drain occurs */
1055 child_bs
= bdrv_new_open_driver(&bdrv_test
, "child", BDRV_O_RDWR
,
1057 child_bs
->total_sectors
= 65536 >> BDRV_SECTOR_BITS
;
1058 /* Takes our reference to child_bs */
1059 bdrv_graph_wrlock();
1060 tts
->wait_child
= bdrv_attach_child(bs
, child_bs
, "wait-child",
1062 BDRV_CHILD_DATA
| BDRV_CHILD_PRIMARY
,
1064 bdrv_graph_wrunlock();
1066 /* This child is just there to be deleted
1067 * (for detach_instead_of_delete == true) */
1068 null_bs
= bdrv_open("null-co://", NULL
, NULL
, BDRV_O_RDWR
| BDRV_O_PROTOCOL
,
1070 bdrv_graph_wrlock();
1071 bdrv_attach_child(bs
, null_bs
, "null-child", &child_of_bds
, BDRV_CHILD_DATA
,
1073 bdrv_graph_wrunlock();
1075 blk
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
1076 blk_insert_bs(blk
, bs
, &error_abort
);
1078 /* Referenced by blk now */
1081 g_assert_cmpint(bs
->refcnt
, ==, 1);
1082 g_assert_cmpint(child_bs
->refcnt
, ==, 1);
1083 g_assert_cmpint(null_bs
->refcnt
, ==, 1);
1086 dbdd
= (TestCoDeleteByDrainData
){
1088 .detach_instead_of_delete
= detach_instead_of_delete
,
1091 co
= qemu_coroutine_create(test_co_delete_by_drain
, &dbdd
);
1092 qemu_coroutine_enter(co
);
1094 /* Drain the child while the read operation is still pending.
1095 * This should result in the operation finishing and
1096 * test_co_delete_by_drain() resuming. Thus, @bs will be deleted
1097 * and the coroutine will exit while this drain operation is still
1099 switch (drain_type
) {
1102 bdrv_drain(child_bs
);
1103 bdrv_unref(child_bs
);
1105 case BDRV_DRAIN_ALL
:
1106 bdrv_drain_all_begin();
1107 bdrv_drain_all_end();
1110 g_assert_not_reached();
1113 while (!dbdd
.done
) {
1114 aio_poll(qemu_get_aio_context(), true);
1117 if (detach_instead_of_delete
) {
1118 /* Here, the reference has not passed over to the coroutine,
1119 * so we have to delete the BB ourselves */
1124 static void test_delete_by_drain(void)
1126 do_test_delete_by_drain(false, BDRV_DRAIN
);
1129 static void test_detach_by_drain_all(void)
1131 do_test_delete_by_drain(true, BDRV_DRAIN_ALL
);
1134 static void test_detach_by_drain(void)
1136 do_test_delete_by_drain(true, BDRV_DRAIN
);
1140 struct detach_by_parent_data
{
1141 BlockDriverState
*parent_b
;
1143 BlockDriverState
*c
;
1146 bool detach_on_drain
;
1148 static struct detach_by_parent_data detach_by_parent_data
;
1150 static void no_coroutine_fn
detach_indirect_bh(void *opaque
)
1152 struct detach_by_parent_data
*data
= opaque
;
1154 bdrv_dec_in_flight(data
->child_b
->bs
);
1156 bdrv_graph_wrlock();
1157 bdrv_unref_child(data
->parent_b
, data
->child_b
);
1160 data
->child_c
= bdrv_attach_child(data
->parent_b
, data
->c
, "PB-C",
1161 &child_of_bds
, BDRV_CHILD_DATA
,
1163 bdrv_graph_wrunlock();
1166 static void coroutine_mixed_fn
detach_by_parent_aio_cb(void *opaque
, int ret
)
1168 struct detach_by_parent_data
*data
= &detach_by_parent_data
;
1170 g_assert_cmpint(ret
, ==, 0);
1171 if (data
->by_parent_cb
) {
1172 bdrv_inc_in_flight(data
->child_b
->bs
);
1173 aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
1174 detach_indirect_bh
, &detach_by_parent_data
);
1178 static void GRAPH_RDLOCK
detach_by_driver_cb_drained_begin(BdrvChild
*child
)
1180 struct detach_by_parent_data
*data
= &detach_by_parent_data
;
1182 if (!data
->detach_on_drain
) {
1185 data
->detach_on_drain
= false;
1187 bdrv_inc_in_flight(data
->child_b
->bs
);
1188 aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
1189 detach_indirect_bh
, &detach_by_parent_data
);
1190 child_of_bds
.drained_begin(child
);
1193 static BdrvChildClass detach_by_driver_cb_class
;
1202 * by_parent_cb == true: Test that parent callbacks don't poll
1204 * PA has a pending write request whose callback changes the child nodes of
1205 * PB: It removes B and adds C instead. The subtree of PB is drained, which
1206 * will indirectly drain the write request, too.
1208 * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll
1210 * PA's BdrvChildClass has a .drained_begin callback that schedules a BH
1211 * that does the same graph change. If bdrv_drain_invoke() calls it, the
1212 * state is messed up, but if it is only polled in the single
1213 * BDRV_POLL_WHILE() at the end of the drain, this should work fine.
1215 static void TSA_NO_TSA
test_detach_indirect(bool by_parent_cb
)
1218 BlockDriverState
*parent_a
, *parent_b
, *a
, *b
, *c
;
1219 BdrvChild
*child_a
, *child_b
;
1222 QEMUIOVector qiov
= QEMU_IOVEC_INIT_BUF(qiov
, NULL
, 0);
1224 if (!by_parent_cb
) {
1225 detach_by_driver_cb_class
= child_of_bds
;
1226 detach_by_driver_cb_class
.drained_begin
=
1227 detach_by_driver_cb_drained_begin
;
1228 detach_by_driver_cb_class
.drained_end
= NULL
;
1229 detach_by_driver_cb_class
.drained_poll
= NULL
;
1232 detach_by_parent_data
= (struct detach_by_parent_data
) {
1233 .detach_on_drain
= false,
1236 /* Create all involved nodes */
1237 parent_a
= bdrv_new_open_driver(&bdrv_test
, "parent-a", BDRV_O_RDWR
,
1239 parent_b
= bdrv_new_open_driver(&bdrv_test
, "parent-b", 0,
1242 a
= bdrv_new_open_driver(&bdrv_test
, "a", BDRV_O_RDWR
, &error_abort
);
1243 b
= bdrv_new_open_driver(&bdrv_test
, "b", BDRV_O_RDWR
, &error_abort
);
1244 c
= bdrv_new_open_driver(&bdrv_test
, "c", BDRV_O_RDWR
, &error_abort
);
1246 /* blk is a BB for parent-a */
1247 blk
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
1248 blk_insert_bs(blk
, parent_a
, &error_abort
);
1249 bdrv_unref(parent_a
);
1251 /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver
1252 * callback must not return immediately. */
1253 if (!by_parent_cb
) {
1254 BDRVTestState
*s
= parent_a
->opaque
;
1255 s
->sleep_in_drain_begin
= true;
1258 /* Set child relationships */
1261 bdrv_graph_wrlock();
1262 child_b
= bdrv_attach_child(parent_b
, b
, "PB-B", &child_of_bds
,
1263 BDRV_CHILD_DATA
, &error_abort
);
1264 child_a
= bdrv_attach_child(parent_b
, a
, "PB-A", &child_of_bds
,
1265 BDRV_CHILD_COW
, &error_abort
);
1268 bdrv_attach_child(parent_a
, a
, "PA-A",
1269 by_parent_cb
? &child_of_bds
: &detach_by_driver_cb_class
,
1270 BDRV_CHILD_DATA
, &error_abort
);
1271 bdrv_graph_wrunlock();
1273 g_assert_cmpint(parent_a
->refcnt
, ==, 1);
1274 g_assert_cmpint(parent_b
->refcnt
, ==, 1);
1275 g_assert_cmpint(a
->refcnt
, ==, 3);
1276 g_assert_cmpint(b
->refcnt
, ==, 2);
1277 g_assert_cmpint(c
->refcnt
, ==, 1);
1279 g_assert(QLIST_FIRST(&parent_b
->children
) == child_a
);
1280 g_assert(QLIST_NEXT(child_a
, next
) == child_b
);
1281 g_assert(QLIST_NEXT(child_b
, next
) == NULL
);
1283 /* Start the evil write request */
1284 detach_by_parent_data
= (struct detach_by_parent_data
) {
1285 .parent_b
= parent_b
,
1288 .by_parent_cb
= by_parent_cb
,
1289 .detach_on_drain
= true,
1291 acb
= blk_aio_preadv(blk
, 0, &qiov
, 0, detach_by_parent_aio_cb
, NULL
);
1292 g_assert(acb
!= NULL
);
1294 /* Drain and check the expected result */
1295 bdrv_drained_begin(parent_b
);
1296 bdrv_drained_begin(a
);
1297 bdrv_drained_begin(b
);
1298 bdrv_drained_begin(c
);
1300 g_assert(detach_by_parent_data
.child_c
!= NULL
);
1302 g_assert_cmpint(parent_a
->refcnt
, ==, 1);
1303 g_assert_cmpint(parent_b
->refcnt
, ==, 1);
1304 g_assert_cmpint(a
->refcnt
, ==, 3);
1305 g_assert_cmpint(b
->refcnt
, ==, 1);
1306 g_assert_cmpint(c
->refcnt
, ==, 2);
1308 g_assert(QLIST_FIRST(&parent_b
->children
) == detach_by_parent_data
.child_c
);
1309 g_assert(QLIST_NEXT(detach_by_parent_data
.child_c
, next
) == child_a
);
1310 g_assert(QLIST_NEXT(child_a
, next
) == NULL
);
1312 g_assert_cmpint(parent_a
->quiesce_counter
, ==, 1);
1313 g_assert_cmpint(parent_b
->quiesce_counter
, ==, 3);
1314 g_assert_cmpint(a
->quiesce_counter
, ==, 1);
1315 g_assert_cmpint(b
->quiesce_counter
, ==, 1);
1316 g_assert_cmpint(c
->quiesce_counter
, ==, 1);
1318 bdrv_drained_end(parent_b
);
1319 bdrv_drained_end(a
);
1320 bdrv_drained_end(b
);
1321 bdrv_drained_end(c
);
1323 bdrv_unref(parent_b
);
1326 g_assert_cmpint(a
->refcnt
, ==, 1);
1327 g_assert_cmpint(b
->refcnt
, ==, 1);
1328 g_assert_cmpint(c
->refcnt
, ==, 1);
1334 static void test_detach_by_parent_cb(void)
1336 test_detach_indirect(true);
1339 static void test_detach_by_driver_cb(void)
1341 test_detach_indirect(false);
1344 static void test_append_to_drained(void)
1347 BlockDriverState
*base
, *overlay
;
1348 BDRVTestState
*base_s
, *overlay_s
;
1350 blk
= blk_new(qemu_get_aio_context(), BLK_PERM_ALL
, BLK_PERM_ALL
);
1351 base
= bdrv_new_open_driver(&bdrv_test
, "base", BDRV_O_RDWR
, &error_abort
);
1352 base_s
= base
->opaque
;
1353 blk_insert_bs(blk
, base
, &error_abort
);
1355 overlay
= bdrv_new_open_driver(&bdrv_test
, "overlay", BDRV_O_RDWR
,
1357 overlay_s
= overlay
->opaque
;
1359 do_drain_begin(BDRV_DRAIN
, base
);
1360 g_assert_cmpint(base
->quiesce_counter
, ==, 1);
1361 g_assert_cmpint(base_s
->drain_count
, ==, 1);
1362 g_assert_cmpint(base
->in_flight
, ==, 0);
1364 bdrv_append(overlay
, base
, &error_abort
);
1366 g_assert_cmpint(base
->in_flight
, ==, 0);
1367 g_assert_cmpint(overlay
->in_flight
, ==, 0);
1369 g_assert_cmpint(base
->quiesce_counter
, ==, 1);
1370 g_assert_cmpint(base_s
->drain_count
, ==, 1);
1371 g_assert_cmpint(overlay
->quiesce_counter
, ==, 1);
1372 g_assert_cmpint(overlay_s
->drain_count
, ==, 1);
1374 do_drain_end(BDRV_DRAIN
, base
);
1376 g_assert_cmpint(base
->quiesce_counter
, ==, 0);
1377 g_assert_cmpint(base_s
->drain_count
, ==, 0);
1378 g_assert_cmpint(overlay
->quiesce_counter
, ==, 0);
1379 g_assert_cmpint(overlay_s
->drain_count
, ==, 0);
1381 bdrv_unref(overlay
);
1386 static void test_set_aio_context(void)
1388 BlockDriverState
*bs
;
1389 IOThread
*a
= iothread_new();
1390 IOThread
*b
= iothread_new();
1391 AioContext
*ctx_a
= iothread_get_aio_context(a
);
1392 AioContext
*ctx_b
= iothread_get_aio_context(b
);
1394 bs
= bdrv_new_open_driver(&bdrv_test
, "test-node", BDRV_O_RDWR
,
1397 bdrv_drained_begin(bs
);
1398 bdrv_try_change_aio_context(bs
, ctx_a
, NULL
, &error_abort
);
1399 bdrv_drained_end(bs
);
1401 bdrv_drained_begin(bs
);
1402 bdrv_try_change_aio_context(bs
, ctx_b
, NULL
, &error_abort
);
1403 bdrv_try_change_aio_context(bs
, qemu_get_aio_context(), NULL
, &error_abort
);
1404 bdrv_drained_end(bs
);
1412 typedef struct TestDropBackingBlockJob
{
1414 bool should_complete
;
1416 BlockDriverState
*detach_also
;
1417 BlockDriverState
*bs
;
1418 } TestDropBackingBlockJob
;
1420 static int coroutine_fn
test_drop_backing_job_run(Job
*job
, Error
**errp
)
1422 TestDropBackingBlockJob
*s
=
1423 container_of(job
, TestDropBackingBlockJob
, common
.job
);
1425 while (!s
->should_complete
) {
1426 job_sleep_ns(job
, 0);
1432 static void test_drop_backing_job_commit(Job
*job
)
1434 TestDropBackingBlockJob
*s
=
1435 container_of(job
, TestDropBackingBlockJob
, common
.job
);
1437 bdrv_set_backing_hd(s
->bs
, NULL
, &error_abort
);
1438 bdrv_set_backing_hd(s
->detach_also
, NULL
, &error_abort
);
1440 *s
->did_complete
= true;
1443 static const BlockJobDriver test_drop_backing_job_driver
= {
1445 .instance_size
= sizeof(TestDropBackingBlockJob
),
1446 .free
= block_job_free
,
1447 .user_resume
= block_job_user_resume
,
1448 .run
= test_drop_backing_job_run
,
1449 .commit
= test_drop_backing_job_commit
,
1454 * Creates a child node with three parent nodes on it, and then runs a
1455 * block job on the final one, parent-node-2.
1457 * The job is then asked to complete before a section where the child
1460 * Ending this section will undrain the child's parents, first
1461 * parent-node-2, then parent-node-1, then parent-node-0 -- the parent
1462 * list is in reverse order of how they were added. Ending the drain
1463 * on parent-node-2 will resume the job, thus completing it and
1464 * scheduling job_exit().
1466 * Ending the drain on parent-node-1 will poll the AioContext, which
1467 * lets job_exit() and thus test_drop_backing_job_commit() run. That
1468 * function first removes the child as parent-node-2's backing file.
1470 * In old (and buggy) implementations, there are two problems with
1472 * (A) bdrv_drain_invoke() polls for every node that leaves the
1473 * drained section. This means that job_exit() is scheduled
1474 * before the child has left the drained section. Its
1475 * quiesce_counter is therefore still 1 when it is removed from
1478 * (B) bdrv_replace_child_noperm() calls drained_end() on the old
1479 * child's parents as many times as the child is quiesced. This
1480 * means it will call drained_end() on parent-node-2 once.
1481 * Because parent-node-2 is no longer quiesced at this point, this
1484 * bdrv_replace_child_noperm() therefore must call drained_end() on
1485 * the parent only if it really is still drained because the child is
1488 * If removing child from parent-node-2 was successful (as it should
1489 * be), test_drop_backing_job_commit() will then also remove the child
1490 * from parent-node-0.
1492 * With an old version of our drain infrastructure ((A) above), that
1493 * resulted in the following flow:
1495 * 1. child attempts to leave its drained section. The call recurses
1498 * 2. parent-node-2 leaves the drained section. Polling in
1499 * bdrv_drain_invoke() will schedule job_exit().
1501 * 3. parent-node-1 leaves the drained section. Polling in
1502 * bdrv_drain_invoke() will run job_exit(), thus disconnecting
1503 * parent-node-0 from the child node.
1505 * 4. bdrv_parent_drained_end() uses a QLIST_FOREACH_SAFE() loop to
1506 * iterate over the parents. Thus, it now accesses the BdrvChild
1507 * object that used to connect parent-node-0 and the child node.
1508 * However, that object no longer exists, so it accesses a dangling
1511 * The solution is to only poll once when running a bdrv_drained_end()
1512 * operation, specifically at the end when all drained_end()
1513 * operations for all involved nodes have been scheduled.
1514 * Note that this also solves (A) above, thus hiding (B).
1516 static void test_blockjob_commit_by_drained_end(void)
1518 BlockDriverState
*bs_child
, *bs_parents
[3];
1519 TestDropBackingBlockJob
*job
;
1520 bool job_has_completed
= false;
1523 bs_child
= bdrv_new_open_driver(&bdrv_test
, "child-node", BDRV_O_RDWR
,
1526 for (i
= 0; i
< 3; i
++) {
1528 snprintf(name
, sizeof(name
), "parent-node-%i", i
);
1529 bs_parents
[i
] = bdrv_new_open_driver(&bdrv_test
, name
, BDRV_O_RDWR
,
1531 bdrv_set_backing_hd(bs_parents
[i
], bs_child
, &error_abort
);
1534 job
= block_job_create("job", &test_drop_backing_job_driver
, NULL
,
1535 bs_parents
[2], 0, BLK_PERM_ALL
, 0, 0, NULL
, NULL
,
1537 job
->bs
= bs_parents
[2];
1539 job
->detach_also
= bs_parents
[0];
1540 job
->did_complete
= &job_has_completed
;
1542 job_start(&job
->common
.job
);
1544 job
->should_complete
= true;
1545 bdrv_drained_begin(bs_child
);
1546 g_assert(!job_has_completed
);
1547 bdrv_drained_end(bs_child
);
1548 aio_poll(qemu_get_aio_context(), false);
1549 g_assert(job_has_completed
);
1551 bdrv_unref(bs_parents
[0]);
1552 bdrv_unref(bs_parents
[1]);
1553 bdrv_unref(bs_parents
[2]);
1554 bdrv_unref(bs_child
);
1558 typedef struct TestSimpleBlockJob
{
1560 bool should_complete
;
1562 } TestSimpleBlockJob
;
1564 static int coroutine_fn
test_simple_job_run(Job
*job
, Error
**errp
)
1566 TestSimpleBlockJob
*s
= container_of(job
, TestSimpleBlockJob
, common
.job
);
1568 while (!s
->should_complete
) {
1569 job_sleep_ns(job
, 0);
1575 static void test_simple_job_clean(Job
*job
)
1577 TestSimpleBlockJob
*s
= container_of(job
, TestSimpleBlockJob
, common
.job
);
1578 *s
->did_complete
= true;
1581 static const BlockJobDriver test_simple_job_driver
= {
1583 .instance_size
= sizeof(TestSimpleBlockJob
),
1584 .free
= block_job_free
,
1585 .user_resume
= block_job_user_resume
,
1586 .run
= test_simple_job_run
,
1587 .clean
= test_simple_job_clean
,
1591 static int drop_intermediate_poll_update_filename(BdrvChild
*child
,
1592 BlockDriverState
*new_base
,
1593 const char *filename
,
1597 * We are free to poll here, which may change the block graph, if
1598 * it is not drained.
1601 /* If the job is not drained: Complete it, schedule job_exit() */
1602 aio_poll(qemu_get_current_aio_context(), false);
1603 /* If the job is not drained: Run job_exit(), finish the job */
1604 aio_poll(qemu_get_current_aio_context(), false);
1610 * Test a poll in the midst of bdrv_drop_intermediate().
1612 * bdrv_drop_intermediate() calls BdrvChildClass.update_filename(),
1613 * which can yield or poll. This may lead to graph changes, unless
1614 * the whole subtree in question is drained.
1616 * We test this on the following graph:
1632 * node-2 --chain--> node-1 --chain--> node-0
1634 * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0).
1636 * This first updates node-2's backing filename by invoking
1637 * drop_intermediate_poll_update_filename(), which polls twice. This
1638 * causes the job to finish, which in turns causes the job-node to be
1641 * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it
1642 * already has a pointer to the BdrvChild edge between job-node and
1643 * node-1. When it tries to handle that edge, we probably get a
1644 * segmentation fault because the object no longer exists.
1647 * The solution is for bdrv_drop_intermediate() to drain top's
1648 * subtree. This prevents graph changes from happening just because
1649 * BdrvChildClass.update_filename() yields or polls. Thus, the block
1650 * job is paused during that drained section and must finish before or
1653 * (In addition, bdrv_replace_child() must keep the job paused.)
1655 static void test_drop_intermediate_poll(void)
1657 static BdrvChildClass chain_child_class
;
1658 BlockDriverState
*chain
[3];
1659 TestSimpleBlockJob
*job
;
1660 BlockDriverState
*job_node
;
1661 bool job_has_completed
= false;
1665 chain_child_class
= child_of_bds
;
1666 chain_child_class
.update_filename
= drop_intermediate_poll_update_filename
;
1668 for (i
= 0; i
< 3; i
++) {
1670 snprintf(name
, 32, "node-%i", i
);
1672 chain
[i
] = bdrv_new_open_driver(&bdrv_test
, name
, 0, &error_abort
);
1675 job_node
= bdrv_new_open_driver(&bdrv_test
, "job-node", BDRV_O_RDWR
,
1677 bdrv_set_backing_hd(job_node
, chain
[1], &error_abort
);
1680 * Establish the chain last, so the chain links are the first
1681 * elements in the BDS.parents lists
1683 bdrv_graph_wrlock();
1684 for (i
= 0; i
< 3; i
++) {
1686 /* Takes the reference to chain[i - 1] */
1687 bdrv_attach_child(chain
[i
], chain
[i
- 1], "chain",
1688 &chain_child_class
, BDRV_CHILD_COW
, &error_abort
);
1691 bdrv_graph_wrunlock();
1693 job
= block_job_create("job", &test_simple_job_driver
, NULL
, job_node
,
1694 0, BLK_PERM_ALL
, 0, 0, NULL
, NULL
, &error_abort
);
1696 /* The job has a reference now */
1697 bdrv_unref(job_node
);
1699 job
->did_complete
= &job_has_completed
;
1701 job_start(&job
->common
.job
);
1702 job
->should_complete
= true;
1704 g_assert(!job_has_completed
);
1705 ret
= bdrv_drop_intermediate(chain
[1], chain
[0], NULL
);
1706 aio_poll(qemu_get_aio_context(), false);
1708 g_assert(job_has_completed
);
1710 bdrv_unref(chain
[2]);
1714 typedef struct BDRVReplaceTestState
{
1715 bool setup_completed
;
1722 bool yield_before_read
;
1724 Coroutine
*drain_co
;
1725 } BDRVReplaceTestState
;
1727 static void bdrv_replace_test_close(BlockDriverState
*bs
)
1732 * If @bs has a backing file:
1733 * Yield if .yield_before_read is true (and wait for drain_begin to
1735 * Forward the read to bs->backing. Set .has_read to true.
1736 * If drain_begin has woken us, wake it in turn.
1739 * Set .has_read to true and return success.
1741 static int coroutine_fn GRAPH_RDLOCK
1742 bdrv_replace_test_co_preadv(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
,
1743 QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
1745 BDRVReplaceTestState
*s
= bs
->opaque
;
1750 g_assert(!s
->drain_count
);
1752 s
->io_co
= qemu_coroutine_self();
1753 if (s
->yield_before_read
) {
1754 s
->yield_before_read
= false;
1755 qemu_coroutine_yield();
1759 ret
= bdrv_co_preadv(bs
->backing
, offset
, bytes
, qiov
, 0);
1762 /* Wake up drain_co if it runs */
1764 aio_co_wake(s
->drain_co
);
1774 static void coroutine_fn
bdrv_replace_test_drain_co(void *opaque
)
1776 BlockDriverState
*bs
= opaque
;
1777 BDRVReplaceTestState
*s
= bs
->opaque
;
1779 /* Keep waking io_co up until it is done */
1781 aio_co_wake(s
->io_co
);
1783 qemu_coroutine_yield();
1786 bdrv_dec_in_flight(bs
);
1790 * If .drain_count is 0, wake up .io_co if there is one; and set
1792 * Increment .drain_count.
1794 static void bdrv_replace_test_drain_begin(BlockDriverState
*bs
)
1796 BDRVReplaceTestState
*s
= bs
->opaque
;
1798 if (!s
->setup_completed
) {
1802 if (!s
->drain_count
) {
1803 s
->drain_co
= qemu_coroutine_create(bdrv_replace_test_drain_co
, bs
);
1804 bdrv_inc_in_flight(bs
);
1805 aio_co_enter(bdrv_get_aio_context(bs
), s
->drain_co
);
1806 s
->was_drained
= true;
1811 static void coroutine_fn
bdrv_replace_test_read_entry(void *opaque
)
1813 BlockDriverState
*bs
= opaque
;
1815 QEMUIOVector qiov
= QEMU_IOVEC_INIT_BUF(qiov
, &data
, 1);
1818 /* Queue a read request post-drain */
1819 bdrv_graph_co_rdlock();
1820 ret
= bdrv_replace_test_co_preadv(bs
, 0, 1, &qiov
, 0);
1821 bdrv_graph_co_rdunlock();
1824 bdrv_dec_in_flight(bs
);
1828 * Reduce .drain_count, set .was_undrained once it reaches 0.
1829 * If .drain_count reaches 0 and the node has a backing file, issue a
1832 static void bdrv_replace_test_drain_end(BlockDriverState
*bs
)
1834 BDRVReplaceTestState
*s
= bs
->opaque
;
1836 GRAPH_RDLOCK_GUARD_MAINLOOP();
1838 if (!s
->setup_completed
) {
1842 g_assert(s
->drain_count
> 0);
1843 if (!--s
->drain_count
) {
1844 s
->was_undrained
= true;
1847 Coroutine
*co
= qemu_coroutine_create(bdrv_replace_test_read_entry
,
1849 bdrv_inc_in_flight(bs
);
1850 aio_co_enter(bdrv_get_aio_context(bs
), co
);
1855 static BlockDriver bdrv_replace_test
= {
1856 .format_name
= "replace_test",
1857 .instance_size
= sizeof(BDRVReplaceTestState
),
1858 .supports_backing
= true,
1860 .bdrv_close
= bdrv_replace_test_close
,
1861 .bdrv_co_preadv
= bdrv_replace_test_co_preadv
,
1863 .bdrv_drain_begin
= bdrv_replace_test_drain_begin
,
1864 .bdrv_drain_end
= bdrv_replace_test_drain_end
,
1866 .bdrv_child_perm
= bdrv_default_perms
,
1869 static void coroutine_fn
test_replace_child_mid_drain_read_co(void *opaque
)
1874 ret
= blk_co_pread(opaque
, 0, 1, &data
, 0);
1879 * We test two things:
1880 * (1) bdrv_replace_child_noperm() must not undrain the parent if both
1881 * children are drained.
1882 * (2) bdrv_replace_child_noperm() must never flush I/O requests to a
1883 * drained child. If the old child is drained, it must flush I/O
1884 * requests after the new one has been attached. If the new child
1885 * is drained, it must flush I/O requests before the old one is
1888 * To do so, we create one parent node and two child nodes; then
1889 * attach one of the children (old_child_bs) to the parent, then
1890 * drain both old_child_bs and new_child_bs according to
1891 * old_drain_count and new_drain_count, respectively, and finally
1892 * we invoke bdrv_replace_node() to replace old_child_bs by
1895 * The test block driver we use here (bdrv_replace_test) has a read
1897 * - For the parent node, can optionally yield, and then forwards the
1898 * read to bdrv_preadv(),
1899 * - For the child node, just returns immediately.
1901 * If the read yields, the drain_begin function will wake it up.
1903 * The drain_end function issues a read on the parent once it is fully
1904 * undrained (which simulates requests starting to come in again).
1906 static void do_test_replace_child_mid_drain(int old_drain_count
,
1907 int new_drain_count
)
1909 BlockBackend
*parent_blk
;
1910 BlockDriverState
*parent_bs
;
1911 BlockDriverState
*old_child_bs
, *new_child_bs
;
1912 BDRVReplaceTestState
*parent_s
;
1913 BDRVReplaceTestState
*old_child_s
, *new_child_s
;
1917 parent_bs
= bdrv_new_open_driver(&bdrv_replace_test
, "parent", 0,
1919 parent_s
= parent_bs
->opaque
;
1921 parent_blk
= blk_new(qemu_get_aio_context(),
1922 BLK_PERM_CONSISTENT_READ
, BLK_PERM_ALL
);
1923 blk_insert_bs(parent_blk
, parent_bs
, &error_abort
);
1925 old_child_bs
= bdrv_new_open_driver(&bdrv_replace_test
, "old-child", 0,
1927 new_child_bs
= bdrv_new_open_driver(&bdrv_replace_test
, "new-child", 0,
1929 old_child_s
= old_child_bs
->opaque
;
1930 new_child_s
= new_child_bs
->opaque
;
1932 /* So that we can read something */
1933 parent_bs
->total_sectors
= 1;
1934 old_child_bs
->total_sectors
= 1;
1935 new_child_bs
->total_sectors
= 1;
1937 bdrv_ref(old_child_bs
);
1938 bdrv_graph_wrlock();
1939 bdrv_attach_child(parent_bs
, old_child_bs
, "child", &child_of_bds
,
1940 BDRV_CHILD_COW
, &error_abort
);
1941 bdrv_graph_wrunlock();
1942 parent_s
->setup_completed
= true;
1944 for (i
= 0; i
< old_drain_count
; i
++) {
1945 bdrv_drained_begin(old_child_bs
);
1947 for (i
= 0; i
< new_drain_count
; i
++) {
1948 bdrv_drained_begin(new_child_bs
);
1951 if (!old_drain_count
) {
1953 * Start a read operation that will yield, so it will not
1954 * complete before the node is drained.
1956 parent_s
->yield_before_read
= true;
1957 io_co
= qemu_coroutine_create(test_replace_child_mid_drain_read_co
,
1959 qemu_coroutine_enter(io_co
);
1962 /* If we have started a read operation, it should have yielded */
1963 g_assert(!parent_s
->has_read
);
1965 /* Reset drained status so we can see what bdrv_replace_node() does */
1966 parent_s
->was_drained
= false;
1967 parent_s
->was_undrained
= false;
1969 g_assert(parent_bs
->quiesce_counter
== old_drain_count
);
1970 bdrv_drained_begin(old_child_bs
);
1971 bdrv_drained_begin(new_child_bs
);
1972 bdrv_graph_wrlock();
1973 bdrv_replace_node(old_child_bs
, new_child_bs
, &error_abort
);
1974 bdrv_graph_wrunlock();
1975 bdrv_drained_end(new_child_bs
);
1976 bdrv_drained_end(old_child_bs
);
1977 g_assert(parent_bs
->quiesce_counter
== new_drain_count
);
1979 if (!old_drain_count
&& !new_drain_count
) {
1981 * From undrained to undrained drains and undrains the parent,
1982 * because bdrv_replace_node() contains a drained section for
1985 g_assert(parent_s
->was_drained
&& parent_s
->was_undrained
);
1986 } else if (!old_drain_count
&& new_drain_count
) {
1988 * From undrained to drained should drain the parent and keep
1991 g_assert(parent_s
->was_drained
&& !parent_s
->was_undrained
);
1992 } else if (old_drain_count
&& !new_drain_count
) {
1994 * From drained to undrained should undrain the parent and
1997 g_assert(!parent_s
->was_drained
&& parent_s
->was_undrained
);
1998 } else /* if (old_drain_count && new_drain_count) */ {
2000 * From drained to drained must not undrain the parent at any
2003 g_assert(!parent_s
->was_drained
&& !parent_s
->was_undrained
);
2006 if (!old_drain_count
|| !new_drain_count
) {
2008 * If !old_drain_count, we have started a read request before
2009 * bdrv_replace_node(). If !new_drain_count, the parent must
2010 * have been undrained at some point, and
2011 * bdrv_replace_test_co_drain_end() starts a read request
2014 g_assert(parent_s
->has_read
);
2017 * If the parent was never undrained, there is no way to start
2020 g_assert(!parent_s
->has_read
);
2023 /* A drained child must have not received any request */
2024 g_assert(!(old_drain_count
&& old_child_s
->has_read
));
2025 g_assert(!(new_drain_count
&& new_child_s
->has_read
));
2027 for (i
= 0; i
< new_drain_count
; i
++) {
2028 bdrv_drained_end(new_child_bs
);
2030 for (i
= 0; i
< old_drain_count
; i
++) {
2031 bdrv_drained_end(old_child_bs
);
2035 * By now, bdrv_replace_test_co_drain_end() must have been called
2036 * at some point while the new child was attached to the parent.
2038 g_assert(parent_s
->has_read
);
2039 g_assert(new_child_s
->has_read
);
2041 blk_unref(parent_blk
);
2042 bdrv_unref(parent_bs
);
2043 bdrv_unref(old_child_bs
);
2044 bdrv_unref(new_child_bs
);
2047 static void test_replace_child_mid_drain(void)
2049 int old_drain_count
, new_drain_count
;
2051 for (old_drain_count
= 0; old_drain_count
< 2; old_drain_count
++) {
2052 for (new_drain_count
= 0; new_drain_count
< 2; new_drain_count
++) {
2053 do_test_replace_child_mid_drain(old_drain_count
, new_drain_count
);
2058 int main(int argc
, char **argv
)
2063 qemu_init_main_loop(&error_abort
);
2065 g_test_init(&argc
, &argv
, NULL
);
2066 qemu_event_init(&done_event
, false);
2068 g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all
);
2069 g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain
);
2071 g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
2072 test_drv_cb_co_drain_all
);
2073 g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain
);
2075 g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all
);
2076 g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain
);
2078 g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
2079 test_quiesce_co_drain_all
);
2080 g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain
);
2082 g_test_add_func("/bdrv-drain/nested", test_nested
);
2084 g_test_add_func("/bdrv-drain/graph-change/drain_all",
2085 test_graph_change_drain_all
);
2087 g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all
);
2088 g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain
);
2090 g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all
);
2091 g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain
);
2093 g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
2094 test_blockjob_error_drain_all
);
2095 g_test_add_func("/bdrv-drain/blockjob/error/drain",
2096 test_blockjob_error_drain
);
2098 g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
2099 test_blockjob_iothread_drain_all
);
2100 g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
2101 test_blockjob_iothread_drain
);
2103 g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
2104 test_blockjob_iothread_error_drain_all
);
2105 g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
2106 test_blockjob_iothread_error_drain
);
2108 g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain
);
2109 g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all
);
2110 g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain
);
2111 g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb
);
2112 g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb
);
2114 g_test_add_func("/bdrv-drain/attach/drain", test_append_to_drained
);
2116 g_test_add_func("/bdrv-drain/set_aio_context", test_set_aio_context
);
2118 g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end",
2119 test_blockjob_commit_by_drained_end
);
2121 g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
2122 test_drop_intermediate_poll
);
2124 g_test_add_func("/bdrv-drain/replace_child/mid-drain",
2125 test_replace_child_mid_drain
);
2128 qemu_event_destroy(&done_event
);