4 * Copyright (C) 2013 Proxmox Server Solutions
5 * Copyright (c) 2019 Virtuozzo International GmbH.
8 * Dietmar Maurer (dietmar@proxmox.com)
9 * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
15 #include "qemu/osdep.h"
18 #include "qapi/error.h"
19 #include "block/block-copy.h"
20 #include "sysemu/block-backend.h"
21 #include "qemu/units.h"
23 #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
24 #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
25 #define BLOCK_COPY_MAX_MEM (128 * MiB)
27 static void coroutine_fn
block_copy_wait_inflight_reqs(BlockCopyState
*s
,
31 BlockCopyInFlightReq
*req
;
36 QLIST_FOREACH(req
, &s
->inflight_reqs
, list
) {
37 if (end
> req
->start_byte
&& start
< req
->end_byte
) {
38 qemu_co_queue_wait(&req
->wait_queue
, NULL
);
46 static void block_copy_inflight_req_begin(BlockCopyState
*s
,
47 BlockCopyInFlightReq
*req
,
48 int64_t start
, int64_t end
)
50 req
->start_byte
= start
;
52 qemu_co_queue_init(&req
->wait_queue
);
53 QLIST_INSERT_HEAD(&s
->inflight_reqs
, req
, list
);
56 static void coroutine_fn
block_copy_inflight_req_end(BlockCopyInFlightReq
*req
)
58 QLIST_REMOVE(req
, list
);
59 qemu_co_queue_restart_all(&req
->wait_queue
);
62 void block_copy_state_free(BlockCopyState
*s
)
68 bdrv_release_dirty_bitmap(s
->copy_bitmap
);
69 shres_destroy(s
->mem
);
73 BlockCopyState
*block_copy_state_new(BdrvChild
*source
, BdrvChild
*target
,
75 BdrvRequestFlags write_flags
, Error
**errp
)
78 BdrvDirtyBitmap
*copy_bitmap
;
79 uint32_t max_transfer
=
81 MIN_NON_ZERO(source
->bs
->bl
.max_transfer
,
82 target
->bs
->bl
.max_transfer
));
84 copy_bitmap
= bdrv_create_dirty_bitmap(source
->bs
, cluster_size
, NULL
,
89 bdrv_disable_dirty_bitmap(copy_bitmap
);
91 s
= g_new(BlockCopyState
, 1);
92 *s
= (BlockCopyState
) {
95 .copy_bitmap
= copy_bitmap
,
96 .cluster_size
= cluster_size
,
97 .len
= bdrv_dirty_bitmap_size(copy_bitmap
),
98 .write_flags
= write_flags
,
99 .mem
= shres_create(BLOCK_COPY_MAX_MEM
),
102 if (max_transfer
< cluster_size
) {
104 * copy_range does not respect max_transfer. We don't want to bother
105 * with requests smaller than block-copy cluster size, so fallback to
106 * buffered copying (read and write respect max_transfer on their
109 s
->use_copy_range
= false;
110 s
->copy_size
= cluster_size
;
111 } else if (write_flags
& BDRV_REQ_WRITE_COMPRESSED
) {
112 /* Compression supports only cluster-size writes and no copy-range. */
113 s
->use_copy_range
= false;
114 s
->copy_size
= cluster_size
;
117 * copy_range does not respect max_transfer (it's a TODO), so we factor
120 s
->use_copy_range
= true;
121 s
->copy_size
= MIN(MAX(cluster_size
, BLOCK_COPY_MAX_COPY_RANGE
),
122 QEMU_ALIGN_DOWN(max_transfer
, cluster_size
));
125 QLIST_INIT(&s
->inflight_reqs
);
130 void block_copy_set_callbacks(
132 ProgressBytesCallbackFunc progress_bytes_callback
,
133 ProgressResetCallbackFunc progress_reset_callback
,
134 void *progress_opaque
)
136 s
->progress_bytes_callback
= progress_bytes_callback
;
137 s
->progress_reset_callback
= progress_reset_callback
;
138 s
->progress_opaque
= progress_opaque
;
144 * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
145 * cover last cluster when s->len is not aligned to clusters.
147 * No sync here: nor bitmap neighter intersecting requests handling, only copy.
149 * Returns 0 on success.
151 static int coroutine_fn
block_copy_do_copy(BlockCopyState
*s
,
152 int64_t start
, int64_t end
,
156 int nbytes
= MIN(end
, s
->len
) - start
;
157 void *bounce_buffer
= NULL
;
159 assert(QEMU_IS_ALIGNED(start
, s
->cluster_size
));
160 assert(QEMU_IS_ALIGNED(end
, s
->cluster_size
));
161 assert(end
< s
->len
|| end
== QEMU_ALIGN_UP(s
->len
, s
->cluster_size
));
163 if (s
->use_copy_range
) {
164 ret
= bdrv_co_copy_range(s
->source
, start
, s
->target
, start
, nbytes
,
167 trace_block_copy_copy_range_fail(s
, start
, ret
);
168 s
->use_copy_range
= false;
169 s
->copy_size
= MAX(s
->cluster_size
, BLOCK_COPY_MAX_BUFFER
);
170 /* Fallback to read+write with allocated buffer */
177 * In case of failed copy_range request above, we may proceed with buffered
178 * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
179 * be properly limited, so don't care too much.
182 bounce_buffer
= qemu_blockalign(s
->source
->bs
, nbytes
);
184 ret
= bdrv_co_pread(s
->source
, start
, nbytes
, bounce_buffer
, 0);
186 trace_block_copy_read_fail(s
, start
, ret
);
188 *error_is_read
= true;
193 ret
= bdrv_co_pwrite(s
->target
, start
, nbytes
, bounce_buffer
,
196 trace_block_copy_write_fail(s
, start
, ret
);
198 *error_is_read
= false;
204 qemu_vfree(bounce_buffer
);
210 * Check if the cluster starting at offset is allocated or not.
211 * return via pnum the number of contiguous clusters sharing this allocation.
213 static int block_copy_is_cluster_allocated(BlockCopyState
*s
, int64_t offset
,
216 BlockDriverState
*bs
= s
->source
->bs
;
217 int64_t count
, total_count
= 0;
218 int64_t bytes
= s
->len
- offset
;
221 assert(QEMU_IS_ALIGNED(offset
, s
->cluster_size
));
224 ret
= bdrv_is_allocated(bs
, offset
, bytes
, &count
);
229 total_count
+= count
;
231 if (ret
|| count
== 0) {
233 * ret: partial segment(s) are considered allocated.
234 * otherwise: unallocated tail is treated as an entire segment.
236 *pnum
= DIV_ROUND_UP(total_count
, s
->cluster_size
);
240 /* Unallocated segment(s) with uncertain following segment(s) */
241 if (total_count
>= s
->cluster_size
) {
242 *pnum
= total_count
/ s
->cluster_size
;
252 * Reset bits in copy_bitmap starting at offset if they represent unallocated
253 * data in the image. May reset subsequent contiguous bits.
254 * @return 0 when the cluster at @offset was unallocated,
255 * 1 otherwise, and -ret on error.
257 int64_t block_copy_reset_unallocated(BlockCopyState
*s
,
258 int64_t offset
, int64_t *count
)
261 int64_t clusters
, bytes
;
263 ret
= block_copy_is_cluster_allocated(s
, offset
, &clusters
);
268 bytes
= clusters
* s
->cluster_size
;
271 bdrv_reset_dirty_bitmap(s
->copy_bitmap
, offset
, bytes
);
272 s
->progress_reset_callback(s
->progress_opaque
);
279 int coroutine_fn
block_copy(BlockCopyState
*s
,
280 int64_t start
, uint64_t bytes
,
284 int64_t end
= bytes
+ start
; /* bytes */
285 int64_t status_bytes
;
286 BlockCopyInFlightReq req
;
289 * block_copy() user is responsible for keeping source and target in same
292 assert(bdrv_get_aio_context(s
->source
->bs
) ==
293 bdrv_get_aio_context(s
->target
->bs
));
295 assert(QEMU_IS_ALIGNED(start
, s
->cluster_size
));
296 assert(QEMU_IS_ALIGNED(end
, s
->cluster_size
));
298 block_copy_wait_inflight_reqs(s
, start
, bytes
);
299 block_copy_inflight_req_begin(s
, &req
, start
, end
);
301 while (start
< end
) {
302 int64_t next_zero
, chunk_end
;
304 if (!bdrv_dirty_bitmap_get(s
->copy_bitmap
, start
)) {
305 trace_block_copy_skip(s
, start
);
306 start
+= s
->cluster_size
;
307 continue; /* already copied */
310 chunk_end
= MIN(end
, start
+ s
->copy_size
);
312 next_zero
= bdrv_dirty_bitmap_next_zero(s
->copy_bitmap
, start
,
314 if (next_zero
>= 0) {
315 assert(next_zero
> start
); /* start is dirty */
316 assert(next_zero
< chunk_end
); /* no need to do MIN() */
317 chunk_end
= next_zero
;
320 if (s
->skip_unallocated
) {
321 ret
= block_copy_reset_unallocated(s
, start
, &status_bytes
);
323 trace_block_copy_skip_range(s
, start
, status_bytes
);
324 start
+= status_bytes
;
327 /* Clamp to known allocated region */
328 chunk_end
= MIN(chunk_end
, start
+ status_bytes
);
331 trace_block_copy_process(s
, start
);
333 bdrv_reset_dirty_bitmap(s
->copy_bitmap
, start
, chunk_end
- start
);
335 co_get_from_shres(s
->mem
, chunk_end
- start
);
336 ret
= block_copy_do_copy(s
, start
, chunk_end
, error_is_read
);
337 co_put_to_shres(s
->mem
, chunk_end
- start
);
339 bdrv_set_dirty_bitmap(s
->copy_bitmap
, start
, chunk_end
- start
);
343 s
->progress_bytes_callback(chunk_end
- start
, s
->progress_opaque
);
348 block_copy_inflight_req_end(&req
);