2 * Copyright (c) 2010-2011 IBM
5 * Chunqiang Tang <ctang@us.ibm.com>
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 * See the COPYING file in the top-level directory.
11 /*==============================================================================
12 * A short description: this module implements basic utility functions for
13 * the Fast Virtual Disk (FVD) format.
14 *============================================================================*/
16 static inline int stale_bitmap_show_sector_in_base_img (int64_t sector_num
,
17 const BDRVFvdState
* s
)
19 if (sector_num
>= s
->nb_sectors_in_base_img
) {
23 int64_t block_num
= sector_num
/ s
->block_size
;
24 int64_t bitmap_byte_offset
= block_num
/ 8;
25 uint8_t bitmap_bit_offset
= block_num
% 8;
26 uint8_t b
= s
->stale_bitmap
[bitmap_byte_offset
];
27 return 0 == (int) ((b
>> bitmap_bit_offset
) & 0x01);
31 fresh_bitmap_show_sector_in_base_img (int64_t sector_num
,
32 const BDRVFvdState
* s
)
34 if (sector_num
>= s
->nb_sectors_in_base_img
) {
38 int64_t block_num
= sector_num
/ s
->block_size
;
39 int64_t bitmap_byte_offset
= block_num
/ 8;
40 uint8_t bitmap_bit_offset
= block_num
% 8;
41 uint8_t b
= s
->fresh_bitmap
[bitmap_byte_offset
];
42 return 0 == (int) ((b
>> bitmap_bit_offset
) & 0x01);
45 static inline void update_fresh_bitmap (int64_t sector_num
, int nb_sectors
,
46 const BDRVFvdState
* s
)
48 if (sector_num
>= s
->nb_sectors_in_base_img
) {
52 int64_t end
= sector_num
+ nb_sectors
;
53 if (end
> s
->nb_sectors_in_base_img
) {
54 end
= s
->nb_sectors_in_base_img
;
57 int64_t block_num
= sector_num
/ s
->block_size
;
58 int64_t block_end
= (end
- 1) / s
->block_size
;
60 for (; block_num
<= block_end
; block_num
++) {
61 int64_t bitmap_byte_offset
= block_num
/ 8;
62 uint8_t bitmap_bit_offset
= block_num
% 8;
63 uint8_t mask
= (uint8_t) (0x01 << bitmap_bit_offset
);
64 uint8_t b
= s
->fresh_bitmap
[bitmap_byte_offset
];
67 s
->fresh_bitmap
[bitmap_byte_offset
] = b
;
72 static void update_stale_bitmap (BDRVFvdState
* s
, int64_t sector_num
,
75 if (sector_num
>= s
->nb_sectors_in_base_img
) {
79 int64_t end
= sector_num
+ nb_sectors
;
80 if (end
> s
->nb_sectors_in_base_img
) {
81 end
= s
->nb_sectors_in_base_img
;
84 int64_t block_num
= sector_num
/ s
->block_size
;
85 const int64_t block_end
= (end
- 1) / s
->block_size
;
87 for (; block_num
<= block_end
; block_num
++) {
88 int64_t bitmap_byte_offset
= block_num
/ 8;
89 uint8_t bitmap_bit_offset
= block_num
% 8;
90 uint8_t mask
= (uint8_t) (0x01 << bitmap_bit_offset
);
91 uint8_t b
= s
->stale_bitmap
[bitmap_byte_offset
];
93 ASSERT (s
->stale_bitmap
== s
->fresh_bitmap
||
94 (s
->fresh_bitmap
[bitmap_byte_offset
] & mask
));
96 s
->stale_bitmap
[bitmap_byte_offset
] = b
;
101 static void update_both_bitmaps (BDRVFvdState
* s
, int64_t sector_num
,
104 if (sector_num
>= s
->nb_sectors_in_base_img
) {
108 int64_t end
= sector_num
+ nb_sectors
;
109 if (end
> s
->nb_sectors_in_base_img
) {
110 end
= s
->nb_sectors_in_base_img
;
113 int64_t block_num
= sector_num
/ s
->block_size
;
114 const int64_t block_end
= (end
- 1) / s
->block_size
;
116 for (; block_num
<= block_end
; block_num
++) {
117 int64_t bitmap_byte_offset
= block_num
/ 8;
118 uint8_t bitmap_bit_offset
= block_num
% 8;
119 uint8_t mask
= (uint8_t) (0x01 << bitmap_bit_offset
);
120 uint8_t b
= s
->fresh_bitmap
[bitmap_byte_offset
];
123 s
->fresh_bitmap
[bitmap_byte_offset
] =
124 s
->stale_bitmap
[bitmap_byte_offset
] = b
;
129 /* Return TRUE if a valid region is found. */
130 static int find_region_in_base_img (BDRVFvdState
* s
, int64_t * from
,
134 int64_t last_sec
= *to
;
136 if (last_sec
> s
->nb_sectors_in_base_img
) {
137 last_sec
= s
->nb_sectors_in_base_img
;
140 if (sec
>= last_sec
) {
144 if (!fresh_bitmap_show_sector_in_base_img (sec
, s
)) {
145 /* Find the first sector in the base image. */
147 sec
= ROUND_UP (sec
+ 1, s
->block_size
); /* Begin of next block. */
149 if (sec
>= last_sec
) {
152 if (fresh_bitmap_show_sector_in_base_img (sec
, s
)) {
155 sec
+= s
->block_size
; /* Begin of the next block. */
159 /* Find the end of the region in the base image. */
160 int64_t first_sec
= sec
;
161 sec
= ROUND_UP (sec
+ 1, s
->block_size
); /* Begin of next block. */
163 if (sec
>= last_sec
) {
167 if (!fresh_bitmap_show_sector_in_base_img (sec
, s
)) {
170 sec
+= s
->block_size
; /* Begin of the next block. */
174 /* Check conflicting copy-on-reads. */
176 QLIST_FOREACH (old
, &s
->copy_locks
, copy_lock
.next
) {
177 if (old
->copy_lock
.begin
<= first_sec
178 && first_sec
< old
->copy_lock
.end
) {
179 first_sec
= old
->copy_lock
.end
;
181 if (old
->copy_lock
.begin
< last_sec
&& last_sec
<= old
->copy_lock
.end
) {
182 last_sec
= old
->copy_lock
.begin
;
186 if (first_sec
>= last_sec
) {
187 return FALSE
; /* The entire region is already covered. */
190 /* This loop cannot be merged with the loop above. Otherwise, the logic
191 * would be incorrect. This loop covers the case that an old request
192 * spans over a subset of the region being checked. */
193 QLIST_FOREACH (old
, &s
->copy_locks
, copy_lock
.next
) {
194 if (first_sec
<= old
->copy_lock
.begin
195 && old
->copy_lock
.begin
< last_sec
) {
196 last_sec
= old
->copy_lock
.begin
;
200 /* Check conflicting writes. */
201 QLIST_FOREACH (old
, &s
->write_locks
, write
.next_write_lock
) {
202 int64_t old_end
= old
->sector_num
+ old
->nb_sectors
;
203 if (old
->sector_num
<= first_sec
&& first_sec
< old_end
) {
206 if (old
->sector_num
< last_sec
&& last_sec
<= old_end
) {
207 last_sec
= old
->sector_num
;
211 if (first_sec
>= last_sec
) {
212 return FALSE
; /* The entire region is already covered. */
215 /* This loop cannot be merged with the loop above. Otherwise, the logic
216 * would be incorrect. This loop covers the case that an old request
217 * spans over a subset of the region being checked. */
218 QLIST_FOREACH (old
, &s
->write_locks
, write
.next_write_lock
) {
219 if (first_sec
<= old
->sector_num
&& old
->sector_num
< last_sec
) {
220 last_sec
= old
->sector_num
;
224 ASSERT (first_sec
% s
->block_size
== 0 && (last_sec
% s
->block_size
== 0
225 || last_sec
== s
->nb_sectors_in_base_img
));
232 static inline int bitmap_show_sector_in_base_img (int64_t sector_num
,
233 const BDRVFvdState
* s
,
237 if (sector_num
>= s
->nb_sectors_in_base_img
) {
241 int64_t block_num
= sector_num
/ s
->block_size
;
242 int64_t bitmap_byte_offset
= block_num
/ 8 - bitmap_offset
;
243 uint8_t bitmap_bit_offset
= block_num
% 8;
244 uint8_t b
= bitmap
[bitmap_byte_offset
];
245 return 0 == (int) ((b
>> bitmap_bit_offset
) & 0x01);
248 static inline void copy_to_iov (struct iovec
*iov
, int *p_index
,
249 uint8_t ** p_buf
, int *p_left
,
250 uint8_t * source
, int total
)
252 int index
= *p_index
;
253 uint8_t *buf
= *p_buf
;
258 buf
= iov
[index
].iov_base
;
259 left
= iov
[index
].iov_len
;
264 memcpy (buf
, source
, total
);
265 *p_buf
= buf
+ total
;
266 *p_left
= left
- total
;
271 memcpy (buf
, source
, left
);
275 buf
= iov
[index
].iov_base
;
276 left
= iov
[index
].iov_len
;
280 static inline void init_data_region (BDRVFvdState
* s
)
282 bdrv_truncate (s
->fvd_data
, s
->data_offset
* 512 + s
->virtual_disk_size
);
283 s
->data_region_prepared
= TRUE
;
286 static inline void update_clean_shutdown_flag (BDRVFvdState
* s
, int clean
)
289 if (!read_fvd_header (s
, &header
)) {
290 header
.clean_shutdown
= clean
;
292 if (!update_fvd_header (s
, &header
)) {
293 QDEBUG ("Set clean_shutdown to %s\n", BOOL (clean
));
298 static inline int stale_bitmap_need_update (FvdAIOCB
* acb
)
300 BlockDriverState
*bs
= acb
->common
.bs
;
301 BDRVFvdState
*s
= bs
->opaque
;
302 int64_t end
= acb
->sector_num
+ acb
->nb_sectors
;
304 if (end
> s
->nb_sectors_in_base_img
) {
305 end
= s
->nb_sectors_in_base_img
;
307 int64_t block_end
= (end
- 1) / s
->block_size
;
308 int64_t block_num
= acb
->sector_num
/ s
->block_size
;
310 for (; block_num
<= block_end
; block_num
++) {
311 int64_t bitmap_byte_offset
= block_num
/ 8;
312 uint8_t bitmap_bit_offset
= block_num
% 8;
313 uint8_t mask
= (uint8_t) (0x01 << bitmap_bit_offset
);
314 uint8_t b
= s
->stale_bitmap
[bitmap_byte_offset
];
323 static int update_fresh_bitmap_and_check_stale_bitmap (FvdAIOCB
* acb
)
325 BlockDriverState
*bs
= acb
->common
.bs
;
326 BDRVFvdState
*s
= bs
->opaque
;
328 if (acb
->sector_num
>= s
->nb_sectors_in_base_img
) {
332 int need_update
= FALSE
;
333 int64_t end
= acb
->sector_num
+ acb
->nb_sectors
;
335 if (end
> s
->nb_sectors_in_base_img
) {
336 end
= s
->nb_sectors_in_base_img
;
339 int64_t block_end
= (end
- 1) / s
->block_size
;
340 int64_t block_num
= acb
->sector_num
/ s
->block_size
;
342 for (; block_num
<= block_end
; block_num
++) {
343 int64_t bitmap_byte_offset
= block_num
/ 8;
344 uint8_t bitmap_bit_offset
= block_num
% 8;
345 uint8_t mask
= (uint8_t) (0x01 << bitmap_bit_offset
);
346 uint8_t b
= s
->stale_bitmap
[bitmap_byte_offset
];
348 /* If the bit in stale_bitmap is set, the corresponding bit in
349 * fresh_bitmap must be set already. */
354 b
= s
->fresh_bitmap
[bitmap_byte_offset
];
357 s
->fresh_bitmap
[bitmap_byte_offset
] = b
;
364 static void fvd_header_cpu_to_le (FvdHeader
* header
)
366 cpu_to_le32s (&header
->magic
);
367 cpu_to_le32s (&header
->version
);
368 cpu_to_le32s ((uint32_t *) & header
->all_data_in_fvd_img
);
369 cpu_to_le32s ((uint32_t *) & header
->generate_prefetch_profile
);
370 cpu_to_le64s ((uint64_t *) & header
->metadata_size
);
371 cpu_to_le64s ((uint64_t *) & header
->virtual_disk_size
);
372 cpu_to_le64s ((uint64_t *) & header
->base_img_size
);
373 cpu_to_le64s ((uint64_t *) & header
->max_outstanding_copy_on_read_data
);
374 cpu_to_le64s ((uint64_t *) & header
->bitmap_offset
);
375 cpu_to_le64s ((uint64_t *) & header
->prefetch_profile_offset
);
376 cpu_to_le64s ((uint64_t *) & header
->prefetch_profile_entries
);
377 cpu_to_le64s ((uint64_t *) & header
->bitmap_size
);
378 cpu_to_le32s ((uint32_t *) & header
->copy_on_read
);
379 cpu_to_le32s ((uint32_t *) & header
->need_zero_init
);
380 cpu_to_le32s ((uint32_t *) & header
->prefetch_start_delay
);
381 cpu_to_le32s ((uint32_t *) & header
->profile_directed_prefetch_start_delay
);
382 cpu_to_le32s ((uint32_t *) & header
->num_prefetch_slots
);
383 cpu_to_le32s ((uint32_t *) & header
->bytes_per_prefetch
);
384 cpu_to_le32s ((uint32_t *) & header
->prefetch_throttle_time
);
385 cpu_to_le32s ((uint32_t *) & header
->prefetch_read_throughput_measure_time
);
386 cpu_to_le32s ((uint32_t *) &header
->prefetch_write_throughput_measure_time
);
387 cpu_to_le32s ((uint32_t *) & header
->prefetch_perf_calc_alpha
);
388 cpu_to_le32s ((uint32_t *) & header
->prefetch_min_read_throughput
);
389 cpu_to_le32s ((uint32_t *) & header
->prefetch_min_write_throughput
);
390 cpu_to_le32s ((uint32_t *) & header
->prefetch_max_read_throughput
);
391 cpu_to_le32s ((uint32_t *) & header
->prefetch_max_write_throughput
);
392 cpu_to_le32s ((uint32_t *) & header
->block_size
);
393 cpu_to_le32s ((uint32_t *) & header
->unit_of_PrefetchProfileEntry_len
);
394 cpu_to_le32s ((uint32_t *) & header
->compact_image
);
395 cpu_to_le64s ((uint64_t *) & header
->chunk_size
);
396 cpu_to_le64s ((uint64_t *) & header
->storage_grow_unit
);
397 cpu_to_le64s ((uint64_t *) & header
->table_offset
);
398 cpu_to_le32s ((uint32_t *) & header
->clean_shutdown
);
399 cpu_to_le64s ((uint64_t *) & header
->journal_offset
);
400 cpu_to_le64s ((uint64_t *) & header
->journal_size
);
403 static void fvd_header_le_to_cpu (FvdHeader
* header
)
405 le32_to_cpus (&header
->magic
);
406 le32_to_cpus (&header
->version
);
407 le32_to_cpus ((uint32_t *) & header
->all_data_in_fvd_img
);
408 le32_to_cpus ((uint32_t *) & header
->generate_prefetch_profile
);
409 le64_to_cpus ((uint64_t *) & header
->metadata_size
);
410 le64_to_cpus ((uint64_t *) & header
->virtual_disk_size
);
411 le64_to_cpus ((uint64_t *) & header
->base_img_size
);
412 le64_to_cpus ((uint64_t *) & header
->max_outstanding_copy_on_read_data
);
413 le64_to_cpus ((uint64_t *) & header
->bitmap_offset
);
414 le64_to_cpus ((uint64_t *) & header
->prefetch_profile_offset
);
415 le64_to_cpus ((uint64_t *) & header
->prefetch_profile_entries
);
416 le64_to_cpus ((uint64_t *) & header
->bitmap_size
);
417 le32_to_cpus ((uint32_t *) & header
->copy_on_read
);
418 le32_to_cpus ((uint32_t *) & header
->need_zero_init
);
419 le32_to_cpus ((uint32_t *) & header
->prefetch_start_delay
);
420 le32_to_cpus ((uint32_t *) & header
->profile_directed_prefetch_start_delay
);
421 le32_to_cpus ((uint32_t *) & header
->num_prefetch_slots
);
422 le32_to_cpus ((uint32_t *) & header
->bytes_per_prefetch
);
423 le32_to_cpus ((uint32_t *) & header
->prefetch_throttle_time
);
424 le32_to_cpus ((uint32_t *) & header
->prefetch_read_throughput_measure_time
);
425 le32_to_cpus ((uint32_t *) &header
->prefetch_write_throughput_measure_time
);
426 le32_to_cpus ((uint32_t *) & header
->prefetch_perf_calc_alpha
);
427 le32_to_cpus ((uint32_t *) & header
->prefetch_min_read_throughput
);
428 le32_to_cpus ((uint32_t *) & header
->prefetch_min_write_throughput
);
429 le32_to_cpus ((uint32_t *) & header
->prefetch_max_read_throughput
);
430 le32_to_cpus ((uint32_t *) & header
->prefetch_max_write_throughput
);
431 le32_to_cpus ((uint32_t *) & header
->block_size
);
432 le32_to_cpus ((uint32_t *) & header
->unit_of_PrefetchProfileEntry_len
);
433 le32_to_cpus ((uint32_t *) & header
->compact_image
);
434 le64_to_cpus ((uint64_t *) & header
->chunk_size
);
435 le64_to_cpus ((uint64_t *) & header
->storage_grow_unit
);
436 le64_to_cpus ((uint64_t *) & header
->table_offset
);
437 le32_to_cpus ((uint32_t *) & header
->clean_shutdown
);
438 le64_to_cpus ((uint64_t *) & header
->journal_offset
);
439 le64_to_cpus ((uint64_t *) & header
->journal_size
);
442 static void flush_metadata_to_disk (BlockDriverState
* bs
)
444 BDRVFvdState
*s
= bs
->opaque
;
446 if (bs
->read_only
|| !s
->fvd_metadata
) {
450 if (s
->stale_bitmap
) {
451 /* Flush fresh_bitmap to disk. */
452 int nb
= (int) (s
->bitmap_size
/ 512);
453 QDEBUG ("Flush FVD bitmap (%d sectors) to disk\n", nb
);
454 bdrv_write (s
->fvd_metadata
, s
->bitmap_offset
, s
->fresh_bitmap
, nb
);
458 /* Flush table to disk. */
460 (int) (ROUND_UP (s
->virtual_disk_size
, s
->chunk_size
* 512) /
461 (s
->chunk_size
* 512));
463 /* Clean the DIRTY_TABLE bit. */
465 for (i
= 0; i
< table_entries
; i
++) {
466 CLEAN_DIRTY (s
->table
[i
]);
469 int64_t table_size
= sizeof (uint32_t) * table_entries
;
470 table_size
= ROUND_UP (table_size
, DEF_PAGE_SIZE
);
471 int nb
= (int) (table_size
/ 512);
472 QDEBUG ("Flush FVD table (%d sectors) to disk\n", nb
);
473 bdrv_write (s
->fvd_metadata
, s
->table_offset
, (uint8_t *) s
->table
, nb
);
477 static int read_fvd_header (BDRVFvdState
* s
, FvdHeader
* header
)
479 if (bdrv_pread (s
->fvd_metadata
, 0, header
, sizeof (FvdHeader
)) !=
480 sizeof (FvdHeader
)) {
481 fprintf (stderr
, "Failed to read the FVD header.\n");
485 fvd_header_le_to_cpu (header
);
487 if (header
->magic
!= FVD_MAGIC
|| header
->version
!= FVD_VERSION
) {
488 fprintf (stderr
, "Error: image does not have the correct FVD format "
489 "magic number in header\n");
496 static int update_fvd_header (BDRVFvdState
* s
, FvdHeader
* header
)
498 fvd_header_cpu_to_le (header
);
499 int ret
= bdrv_pwrite (s
->fvd_metadata
, 0, header
, sizeof (FvdHeader
));
501 if (ret
!= sizeof (FvdHeader
)) {
502 fprintf (stderr
, "Failed to update the FVD header.\n");
510 static void null_prefetch_cb (void *opaque
, int ret
)
512 /* Nothing to do and will never be invoked. Only need it to distinguish
513 * copy-on-read from prefetch. */
517 static int count_iov (struct iovec
*orig_iov
, int *p_index
, uint8_t ** p_buf
,
518 size_t * p_left
, size_t total
)
520 int index
= *p_index
;
521 uint8_t *buf
= *p_buf
;
527 buf
= orig_iov
[index
].iov_base
;
528 left
= orig_iov
[index
].iov_len
;
533 *p_buf
= buf
+ total
;
534 *p_left
= left
- total
;
541 buf
= orig_iov
[index
].iov_base
;
542 left
= orig_iov
[index
].iov_len
;
547 static int setup_iov (struct iovec
*orig_iov
, struct iovec
*new_iov
,
548 int *p_index
, uint8_t ** p_buf
, size_t * p_left
,
551 int index
= *p_index
;
552 uint8_t *buf
= *p_buf
;
558 buf
= orig_iov
[index
].iov_base
;
559 left
= orig_iov
[index
].iov_len
;
564 new_iov
[count
].iov_base
= buf
;
565 new_iov
[count
].iov_len
= total
;
566 *p_buf
= buf
+ total
;
567 *p_left
= left
- total
;
572 new_iov
[count
].iov_base
= buf
;
573 new_iov
[count
].iov_len
= left
;
576 buf
= orig_iov
[index
].iov_base
;
577 left
= orig_iov
[index
].iov_len
;
582 static int zero_iov (struct iovec
*orig_iov
, int *p_index
, uint8_t ** p_buf
,
583 size_t * p_left
, size_t total
)
585 int index
= *p_index
;
586 uint8_t *buf
= *p_buf
;
592 buf
= orig_iov
[index
].iov_base
;
593 left
= orig_iov
[index
].iov_len
;
598 memset (buf
, 0, total
);
599 *p_buf
= buf
+ total
;
600 *p_left
= left
- total
;
605 memset (buf
, 0, left
);
608 buf
= orig_iov
[index
].iov_base
;
609 left
= orig_iov
[index
].iov_len
;