2 * Copyright (c) 2010-2011 IBM
5 * Chunqiang Tang <ctang@us.ibm.com>
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 * See the COPYING file in the top-level directory.
11 /*=============================================================================
12 * A short description: this FVD module implements loading data from a
14 *============================================================================*/
16 static void aio_wrapper_bh (void *opaque
);
17 static void finish_load_data_from_compact_image (void *opaque
, int ret
);
18 static inline FvdAIOCB
*init_load_acb (FvdAIOCB
* parent_acb
,
19 BlockDriverState
* bs
,
21 QEMUIOVector
* orig_qiov
, int nb_sectors
,
22 BlockDriverCompletionFunc
* cb
,
25 static inline BlockDriverAIOCB
*load_data (FvdAIOCB
* parent_acb
,
26 BlockDriverState
* bs
,
28 QEMUIOVector
* orig_qiov
,
30 BlockDriverCompletionFunc
* cb
,
33 BDRVFvdState
*s
= bs
->opaque
;
36 /* Load directly since it is not a compact image. */
37 return bdrv_aio_readv (s
->fvd_data
, s
->data_offset
+ sector_num
,
38 orig_qiov
, nb_sectors
, cb
, opaque
);
40 return load_data_from_compact_image (NULL
, parent_acb
, bs
, sector_num
,
41 orig_qiov
, nb_sectors
, cb
, opaque
);
45 static BlockDriverAIOCB
*
46 load_data_from_compact_image (FvdAIOCB
* acb
, FvdAIOCB
* parent_acb
,
47 BlockDriverState
* bs
, int64_t sector_num
,
48 QEMUIOVector
* orig_qiov
, int nb_sectors
,
49 BlockDriverCompletionFunc
* cb
, void *opaque
)
51 BDRVFvdState
*s
= bs
->opaque
;
52 const uint32_t first_chunk
= sector_num
/ s
->chunk_size
;
53 const uint32_t last_chunk
= (sector_num
+ nb_sectors
- 1) / s
->chunk_size
;
58 if (first_chunk
== last_chunk
) {
59 goto handle_one_continuous_region
;
62 /* Count the number of qiov and iov needed to cover the continuous regions
63 * of the compact image. */
65 size_t iov_left
= orig_qiov
->iov
[0].iov_len
;
66 uint8_t *iov_buf
= orig_qiov
->iov
[0].iov_base
;
68 int nziov
= 0; /* Number of empty regions. */
70 uint32_t prev
= READ_TABLE2 (s
->table
[first_chunk
]);
72 /* Amount of data in the first chunk. */
73 int nb
= s
->chunk_size
- (sector_num
% s
->chunk_size
);
75 for (chunk
= first_chunk
+ 1; chunk
<= last_chunk
; chunk
++) {
76 uint32_t current
= READ_TABLE2 (s
->table
[chunk
]);
78 if (chunk
< last_chunk
) {
79 data_size
= s
->chunk_size
;
81 data_size
= (sector_num
+ nb_sectors
) % s
->chunk_size
;
83 data_size
= s
->chunk_size
;
87 if ((IS_EMPTY (current
) && IS_EMPTY (prev
)) ||
88 (!IS_EMPTY (prev
) && !IS_EMPTY (current
) && current
== prev
+ 1)) {
89 nb
+= data_size
; /* Belong to the previous continuous region. */
91 /* Terminate the previous continuous region. */
92 if (IS_EMPTY (prev
)) {
93 /* Skip this empty region. */
94 count_iov (orig_qiov
->iov
, &iov_index
, &iov_buf
,
98 niov
+= count_iov (orig_qiov
->iov
, &iov_index
, &iov_buf
,
102 nb
= data_size
; /* Data in the new region. */
107 if (nqiov
== 0 && nziov
== 0) {
108 /* All data can be read in one qiov. Reuse orig_qiov. */
109 handle_one_continuous_region
:
110 if (IS_EMPTY (s
->table
[first_chunk
])) {
111 /* Fill qiov with zeros. */
112 for (i
= 0; i
< orig_qiov
->niov
; i
++) {
113 memset (orig_qiov
->iov
[i
].iov_base
,
114 0, orig_qiov
->iov
[i
].iov_len
);
117 /* Use a bh to invoke the callback. */
119 if (!(acb
= my_qemu_aio_get (&fvd_aio_pool
, bs
, cb
, opaque
))) {
122 COPY_UUID (acb
, parent_acb
);
125 QDEBUG ("LOAD: acb%llu-%p load_fill_all_with_zeros\n",
127 acb
->type
= OP_WRAPPER
;
128 acb
->wrapper
.bh
= qemu_bh_new (aio_wrapper_bh
, acb
);
129 qemu_bh_schedule (acb
->wrapper
.bh
);
133 /* A non-empty region. */
134 start_sec
= READ_TABLE (s
->table
[first_chunk
]) * s
->chunk_size
+
135 (sector_num
% s
->chunk_size
);
138 QDEBUG ("LOAD: acb%llu-%p "
139 "load_directly_as_one_continuous_region\n",
140 parent_acb
->uuid
, acb
);
142 return bdrv_aio_readv (s
->fvd_data
, s
->data_offset
+ start_sec
,
143 orig_qiov
, nb_sectors
, cb
, opaque
);
146 QDEBUG ("LOAD: acb%llu-%p load_directly_as_one_continuous_region\n",
148 acb
->load
.num_children
= 1;
149 acb
->load
.one_child
.hd_acb
=
150 bdrv_aio_readv (s
->fvd_data
, s
->data_offset
+ start_sec
, orig_qiov
,
151 nb_sectors
, finish_load_data_from_compact_image
,
152 &acb
->load
.one_child
);
153 if (acb
->load
.one_child
.hd_acb
) {
154 acb
->load
.one_child
.acb
= acb
;
157 my_qemu_aio_unref (acb
);
162 /* qiov for the last continuous region. */
163 if (!IS_EMPTY (prev
)) {
164 niov
+= count_iov (orig_qiov
->iov
, &iov_index
, &iov_buf
,
165 &iov_left
, nb
* 512);
167 ASSERT (iov_index
== orig_qiov
->niov
- 1 && iov_left
== 0);
170 /* Need to submit multiple requests to the lower layer. Initialize acb. */
171 if (!acb
&& !(acb
= init_load_acb (parent_acb
, bs
, sector_num
,
172 orig_qiov
, nb_sectors
, cb
, opaque
))) {
175 acb
->load
.num_children
= nqiov
;
177 /* Allocate memory and create multiple requests. */
178 acb
->load
.children
= my_qemu_malloc ((sizeof (CompactChildCB
) +
179 sizeof (QEMUIOVector
)) * nqiov
+
180 sizeof (struct iovec
) * niov
);
181 QEMUIOVector
*q
= (QEMUIOVector
*) (acb
->load
.children
+ nqiov
);
182 struct iovec
*v
= (struct iovec
*) (q
+ nqiov
);
184 /* Set up iov and qiov. */
187 iov_left
= orig_qiov
->iov
[0].iov_len
;
188 iov_buf
= orig_qiov
->iov
[0].iov_base
;
189 nb
= s
->chunk_size
- (sector_num
% s
->chunk_size
); /* Data in first chunk.*/
190 prev
= READ_TABLE2 (s
->table
[first_chunk
]);
192 /* if (IS_EMPTY(prev)), start_sec will not be used later, and hence safe. */
193 start_sec
= prev
* s
->chunk_size
+ (sector_num
% s
->chunk_size
);
195 for (chunk
= first_chunk
+ 1; chunk
<= last_chunk
; chunk
++) {
196 uint32_t current
= READ_TABLE2 (s
->table
[chunk
]);
198 if (chunk
< last_chunk
) {
199 data_size
= s
->chunk_size
;
201 data_size
= (sector_num
+ nb_sectors
) % s
->chunk_size
;
202 if (data_size
== 0) {
203 data_size
= s
->chunk_size
;
207 if ((IS_EMPTY (prev
) && IS_EMPTY (current
)) ||
208 (!IS_EMPTY (prev
) && !IS_EMPTY (current
) && current
== prev
+ 1)) {
209 nb
+= data_size
; /* Continue the previous region. */
211 /* Terminate the previous continuous region. */
212 if (IS_EMPTY (prev
)) {
213 zero_iov (orig_qiov
->iov
, &iov_index
, &iov_buf
, &iov_left
,
214 nb
* 512); /* Fill iov data with zeros. */
216 niov
= setup_iov (orig_qiov
->iov
, v
, &iov_index
, &iov_buf
,
217 &iov_left
, nb
* 512);
218 qemu_iovec_init_external (q
, v
, niov
);
219 QDEBUG ("LOAD: acb%llu-%p create_child %d sector_num=%" PRId64
220 " nb_sectors=%d niov=%d\n", acb
->uuid
, acb
, nqiov
,
221 start_sec
, nb
, niov
);
222 acb
->load
.children
[nqiov
].hd_acb
=
223 bdrv_aio_readv (s
->fvd_data
, s
->data_offset
+ start_sec
, q
,
224 nb
, finish_load_data_from_compact_image
,
225 &acb
->load
.children
[nqiov
]);
226 if (!acb
->load
.children
[nqiov
].hd_acb
) {
229 acb
->load
.children
[nqiov
].acb
= acb
;
237 /* if (IS_EMPTY(current)), start_sec will not be used later. */
238 start_sec
= current
* s
->chunk_size
;
243 /* The last continuous region. */
244 if (IS_EMPTY (prev
)) {
245 zero_iov (orig_qiov
->iov
, &iov_index
, &iov_buf
, &iov_left
, nb
* 512);
247 niov
= setup_iov (orig_qiov
->iov
, v
, &iov_index
, &iov_buf
,
248 &iov_left
, nb
* 512);
249 qemu_iovec_init_external (q
, v
, niov
);
250 QDEBUG ("LOAD: acb%llu-%p create_child %d sector_num=%" PRId64
251 " nb_sectors=%d niov=%d\n", acb
->uuid
, acb
, nqiov
, start_sec
,
253 acb
->load
.children
[nqiov
].hd_acb
=
254 bdrv_aio_readv (s
->fvd_data
, s
->data_offset
+ start_sec
, q
, nb
,
255 finish_load_data_from_compact_image
,
256 &acb
->load
.children
[nqiov
]);
257 if (!acb
->load
.children
[nqiov
].hd_acb
) {
260 acb
->load
.children
[nqiov
].acb
= acb
;
262 ASSERT (iov_index
== orig_qiov
->niov
- 1 && iov_left
== 0);
267 for (i
= 0; i
< nqiov
; i
++) {
268 bdrv_aio_cancel (acb
->load
.children
[i
].hd_acb
);
270 my_qemu_free (acb
->load
.children
);
271 my_qemu_aio_unref (acb
);
275 static void aio_wrapper_bh (void *opaque
)
277 FvdAIOCB
*acb
= opaque
;
278 acb
->common
.cb (acb
->common
.opaque
, 0);
279 qemu_bh_delete (acb
->wrapper
.bh
);
280 my_qemu_aio_unref (acb
);
283 static void finish_load_data_from_compact_image (void *opaque
, int ret
)
285 CompactChildCB
*child
= opaque
;
286 FvdAIOCB
*acb
= child
->acb
;
288 /* Now fvd_store_compact_cancel(), if invoked, won't cancel this child
290 child
->hd_acb
= NULL
;
292 if (acb
->load
.ret
== 0) {
295 QDEBUG ("LOAD: acb%llu-%p load_child=%d total_children=%d "
296 "error ret=%d\n", acb
->uuid
, acb
, acb
->load
.finished_children
,
297 acb
->load
.num_children
, ret
);
300 acb
->load
.finished_children
++;
301 if (acb
->load
.finished_children
< acb
->load
.num_children
) {
302 QDEBUG ("LOAD: acb%llu-%p load_finished_children=%d "
303 "total_children=%d\n", acb
->uuid
, acb
,
304 acb
->load
.finished_children
, acb
->load
.num_children
);
308 QDEBUG ("LOAD: acb%llu-%p load_last_child_finished ret=%d\n", acb
->uuid
,
310 acb
->common
.cb (acb
->common
.opaque
, acb
->load
.ret
);
311 if (acb
->load
.children
) {
312 my_qemu_free (acb
->load
.children
);
314 my_qemu_aio_unref (acb
);
317 static inline FvdAIOCB
*init_load_acb (FvdAIOCB
* parent_acb
,
318 BlockDriverState
* bs
,
320 QEMUIOVector
* orig_qiov
,
322 BlockDriverCompletionFunc
* cb
,
325 FvdAIOCB
*const acb
= my_qemu_aio_get (&fvd_aio_pool
, bs
, cb
, opaque
);
329 acb
->type
= OP_LOAD_COMPACT
;
330 acb
->sector_num
= sector_num
;
331 acb
->nb_sectors
= nb_sectors
;
332 acb
->load
.parent_acb
= parent_acb
;
333 acb
->load
.finished_children
= 0;
334 acb
->load
.children
= NULL
;
335 acb
->load
.one_child
.hd_acb
= NULL
;
336 acb
->load
.orig_qiov
= orig_qiov
;
338 COPY_UUID (acb
, parent_acb
);
343 static void fvd_wrapper_cancel (FvdAIOCB
* acb
)
345 qemu_bh_cancel (acb
->wrapper
.bh
);
346 qemu_bh_delete (acb
->wrapper
.bh
);
347 my_qemu_aio_unref (acb
);
350 static void fvd_load_compact_cancel (FvdAIOCB
* acb
)
352 if (acb
->load
.children
) {
354 for (i
= 0; i
< acb
->load
.num_children
; i
++) {
355 if (acb
->load
.children
[i
].hd_acb
) {
356 bdrv_aio_cancel (acb
->load
.children
[i
].hd_acb
);
359 my_qemu_free (acb
->load
.children
);
361 if (acb
->load
.one_child
.hd_acb
) {
362 bdrv_aio_cancel (acb
->load
.one_child
.hd_acb
);
364 my_qemu_aio_unref (acb
);