2 * Block driver for Parallels disk image format
4 * Copyright (c) 2007 Alex Beregszaszi
5 * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
7 * This code was originally based on comparing different disk images created
8 * by Parallels. Currently it is based on opened OpenVZ sources
10 * http://git.openvz.org/?p=ploop;a=summary
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this software and associated documentation files (the "Software"), to deal
14 * in the Software without restriction, including without limitation the rights
15 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 * copies of the Software, and to permit persons to whom the Software is
17 * furnished to do so, subject to the following conditions:
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30 #include "qemu/osdep.h"
31 #include "qapi/error.h"
32 #include "qemu-common.h"
33 #include "block/block_int.h"
34 #include "sysemu/block-backend.h"
35 #include "qemu/module.h"
36 #include "qemu/bswap.h"
37 #include "qemu/bitmap.h"
38 #include "qapi/util.h"
40 /**************************************************************/
42 #define HEADER_MAGIC "WithoutFreeSpace"
43 #define HEADER_MAGIC2 "WithouFreSpacExt"
44 #define HEADER_VERSION 2
45 #define HEADER_INUSE_MAGIC (0x746F6E59)
46 #define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
48 #define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */
51 // always little-endian
52 typedef struct ParallelsHeader
{
53 char magic
[16]; // "WithoutFreeSpace"
63 } QEMU_PACKED ParallelsHeader
;
66 typedef enum ParallelsPreallocMode
{
67 PRL_PREALLOC_MODE_FALLOCATE
= 0,
68 PRL_PREALLOC_MODE_TRUNCATE
= 1,
69 PRL_PREALLOC_MODE__MAX
= 2,
70 } ParallelsPreallocMode
;
72 static const char *prealloc_mode_lookup
[] = {
79 typedef struct BDRVParallelsState
{
80 /** Locking is conservative, the lock protects
81 * - image file extending (truncate, fallocate)
82 * - any access to block allocation table
86 ParallelsHeader
*header
;
90 unsigned long *bat_dirty_bmap
;
91 unsigned int bat_dirty_block
;
94 unsigned int bat_size
;
97 uint64_t prealloc_size
;
98 ParallelsPreallocMode prealloc_mode
;
102 unsigned int off_multiplier
;
103 } BDRVParallelsState
;
106 #define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode"
107 #define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size"
109 static QemuOptsList parallels_runtime_opts
= {
111 .head
= QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts
.head
),
114 .name
= PARALLELS_OPT_PREALLOC_SIZE
,
115 .type
= QEMU_OPT_SIZE
,
116 .help
= "Preallocation size on image expansion",
117 .def_value_str
= "128MiB",
120 .name
= PARALLELS_OPT_PREALLOC_MODE
,
121 .type
= QEMU_OPT_STRING
,
122 .help
= "Preallocation mode on image expansion "
123 "(allowed values: falloc, truncate)",
124 .def_value_str
= "falloc",
126 { /* end of list */ },
131 static int64_t bat2sect(BDRVParallelsState
*s
, uint32_t idx
)
133 return (uint64_t)le32_to_cpu(s
->bat_bitmap
[idx
]) * s
->off_multiplier
;
136 static uint32_t bat_entry_off(uint32_t idx
)
138 return sizeof(ParallelsHeader
) + sizeof(uint32_t) * idx
;
141 static int64_t seek_to_sector(BDRVParallelsState
*s
, int64_t sector_num
)
143 uint32_t index
, offset
;
145 index
= sector_num
/ s
->tracks
;
146 offset
= sector_num
% s
->tracks
;
149 if ((index
>= s
->bat_size
) || (s
->bat_bitmap
[index
] == 0)) {
152 return bat2sect(s
, index
) + offset
;
155 static int cluster_remainder(BDRVParallelsState
*s
, int64_t sector_num
,
158 int ret
= s
->tracks
- sector_num
% s
->tracks
;
159 return MIN(nb_sectors
, ret
);
162 static int64_t block_status(BDRVParallelsState
*s
, int64_t sector_num
,
163 int nb_sectors
, int *pnum
)
165 int64_t start_off
= -2, prev_end_off
= -2;
168 while (nb_sectors
> 0 || start_off
== -2) {
169 int64_t offset
= seek_to_sector(s
, sector_num
);
172 if (start_off
== -2) {
174 prev_end_off
= offset
;
175 } else if (offset
!= prev_end_off
) {
179 to_end
= cluster_remainder(s
, sector_num
, nb_sectors
);
180 nb_sectors
-= to_end
;
181 sector_num
+= to_end
;
185 prev_end_off
+= to_end
;
191 static int64_t allocate_clusters(BlockDriverState
*bs
, int64_t sector_num
,
192 int nb_sectors
, int *pnum
)
194 BDRVParallelsState
*s
= bs
->opaque
;
195 uint32_t idx
, to_allocate
, i
;
198 pos
= block_status(s
, sector_num
, nb_sectors
, pnum
);
203 idx
= sector_num
/ s
->tracks
;
204 if (idx
>= s
->bat_size
) {
208 to_allocate
= DIV_ROUND_UP(sector_num
+ *pnum
, s
->tracks
) - idx
;
209 space
= to_allocate
* s
->tracks
;
210 if (s
->data_end
+ space
> bdrv_getlength(bs
->file
->bs
) >> BDRV_SECTOR_BITS
) {
212 space
+= s
->prealloc_size
;
213 if (s
->prealloc_mode
== PRL_PREALLOC_MODE_FALLOCATE
) {
214 ret
= bdrv_pwrite_zeroes(bs
->file
,
215 s
->data_end
<< BDRV_SECTOR_BITS
,
216 space
<< BDRV_SECTOR_BITS
, 0);
218 ret
= bdrv_truncate(bs
->file
,
219 (s
->data_end
+ space
) << BDRV_SECTOR_BITS
);
226 for (i
= 0; i
< to_allocate
; i
++) {
227 s
->bat_bitmap
[idx
+ i
] = cpu_to_le32(s
->data_end
/ s
->off_multiplier
);
228 s
->data_end
+= s
->tracks
;
229 bitmap_set(s
->bat_dirty_bmap
,
230 bat_entry_off(idx
+ i
) / s
->bat_dirty_block
, 1);
233 return bat2sect(s
, idx
) + sector_num
% s
->tracks
;
237 static coroutine_fn
int parallels_co_flush_to_os(BlockDriverState
*bs
)
239 BDRVParallelsState
*s
= bs
->opaque
;
240 unsigned long size
= DIV_ROUND_UP(s
->header_size
, s
->bat_dirty_block
);
243 qemu_co_mutex_lock(&s
->lock
);
245 bit
= find_first_bit(s
->bat_dirty_bmap
, size
);
247 uint32_t off
= bit
* s
->bat_dirty_block
;
248 uint32_t to_write
= s
->bat_dirty_block
;
251 if (off
+ to_write
> s
->header_size
) {
252 to_write
= s
->header_size
- off
;
254 ret
= bdrv_pwrite(bs
->file
, off
, (uint8_t *)s
->header
+ off
,
257 qemu_co_mutex_unlock(&s
->lock
);
260 bit
= find_next_bit(s
->bat_dirty_bmap
, size
, bit
+ 1);
262 bitmap_zero(s
->bat_dirty_bmap
, size
);
264 qemu_co_mutex_unlock(&s
->lock
);
269 static int64_t coroutine_fn
parallels_co_get_block_status(BlockDriverState
*bs
,
270 int64_t sector_num
, int nb_sectors
, int *pnum
, BlockDriverState
**file
)
272 BDRVParallelsState
*s
= bs
->opaque
;
275 qemu_co_mutex_lock(&s
->lock
);
276 offset
= block_status(s
, sector_num
, nb_sectors
, pnum
);
277 qemu_co_mutex_unlock(&s
->lock
);
283 *file
= bs
->file
->bs
;
284 return (offset
<< BDRV_SECTOR_BITS
) |
285 BDRV_BLOCK_DATA
| BDRV_BLOCK_OFFSET_VALID
;
288 static coroutine_fn
int parallels_co_writev(BlockDriverState
*bs
,
289 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
291 BDRVParallelsState
*s
= bs
->opaque
;
292 uint64_t bytes_done
= 0;
293 QEMUIOVector hd_qiov
;
296 qemu_iovec_init(&hd_qiov
, qiov
->niov
);
298 while (nb_sectors
> 0) {
302 qemu_co_mutex_lock(&s
->lock
);
303 position
= allocate_clusters(bs
, sector_num
, nb_sectors
, &n
);
304 qemu_co_mutex_unlock(&s
->lock
);
310 nbytes
= n
<< BDRV_SECTOR_BITS
;
312 qemu_iovec_reset(&hd_qiov
);
313 qemu_iovec_concat(&hd_qiov
, qiov
, bytes_done
, nbytes
);
315 ret
= bdrv_co_writev(bs
->file
, position
, n
, &hd_qiov
);
322 bytes_done
+= nbytes
;
325 qemu_iovec_destroy(&hd_qiov
);
329 static coroutine_fn
int parallels_co_readv(BlockDriverState
*bs
,
330 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
332 BDRVParallelsState
*s
= bs
->opaque
;
333 uint64_t bytes_done
= 0;
334 QEMUIOVector hd_qiov
;
337 qemu_iovec_init(&hd_qiov
, qiov
->niov
);
339 while (nb_sectors
> 0) {
343 qemu_co_mutex_lock(&s
->lock
);
344 position
= block_status(s
, sector_num
, nb_sectors
, &n
);
345 qemu_co_mutex_unlock(&s
->lock
);
347 nbytes
= n
<< BDRV_SECTOR_BITS
;
350 qemu_iovec_memset(qiov
, bytes_done
, 0, nbytes
);
352 qemu_iovec_reset(&hd_qiov
);
353 qemu_iovec_concat(&hd_qiov
, qiov
, bytes_done
, nbytes
);
355 ret
= bdrv_co_readv(bs
->file
, position
, n
, &hd_qiov
);
363 bytes_done
+= nbytes
;
366 qemu_iovec_destroy(&hd_qiov
);
371 static int parallels_check(BlockDriverState
*bs
, BdrvCheckResult
*res
,
374 BDRVParallelsState
*s
= bs
->opaque
;
375 int64_t size
, prev_off
, high_off
;
378 bool flush_bat
= false;
379 int cluster_size
= s
->tracks
<< BDRV_SECTOR_BITS
;
381 size
= bdrv_getlength(bs
->file
->bs
);
387 if (s
->header_unclean
) {
388 fprintf(stderr
, "%s image was not closed correctly\n",
389 fix
& BDRV_FIX_ERRORS
? "Repairing" : "ERROR");
391 if (fix
& BDRV_FIX_ERRORS
) {
392 /* parallels_close will do the job right */
393 res
->corruptions_fixed
++;
394 s
->header_unclean
= false;
398 res
->bfi
.total_clusters
= s
->bat_size
;
399 res
->bfi
.compressed_clusters
= 0; /* compression is not supported */
403 for (i
= 0; i
< s
->bat_size
; i
++) {
404 int64_t off
= bat2sect(s
, i
) << BDRV_SECTOR_BITS
;
410 /* cluster outside the image */
412 fprintf(stderr
, "%s cluster %u is outside image\n",
413 fix
& BDRV_FIX_ERRORS
? "Repairing" : "ERROR", i
);
415 if (fix
& BDRV_FIX_ERRORS
) {
417 s
->bat_bitmap
[i
] = 0;
418 res
->corruptions_fixed
++;
424 res
->bfi
.allocated_clusters
++;
425 if (off
> high_off
) {
429 if (prev_off
!= 0 && (prev_off
+ cluster_size
) != off
) {
430 res
->bfi
.fragmented_clusters
++;
436 ret
= bdrv_pwrite_sync(bs
->file
, 0, s
->header
, s
->header_size
);
443 res
->image_end_offset
= high_off
+ cluster_size
;
444 if (size
> res
->image_end_offset
) {
446 count
= DIV_ROUND_UP(size
- res
->image_end_offset
, cluster_size
);
447 fprintf(stderr
, "%s space leaked at the end of the image %" PRId64
"\n",
448 fix
& BDRV_FIX_LEAKS
? "Repairing" : "ERROR",
449 size
- res
->image_end_offset
);
451 if (fix
& BDRV_FIX_LEAKS
) {
452 ret
= bdrv_truncate(bs
->file
, res
->image_end_offset
);
457 res
->leaks_fixed
+= count
;
465 static int parallels_create(const char *filename
, QemuOpts
*opts
, Error
**errp
)
467 int64_t total_size
, cl_size
;
468 uint8_t tmp
[BDRV_SECTOR_SIZE
];
469 Error
*local_err
= NULL
;
471 uint32_t bat_entries
, bat_sectors
;
472 ParallelsHeader header
;
475 total_size
= ROUND_UP(qemu_opt_get_size_del(opts
, BLOCK_OPT_SIZE
, 0),
477 cl_size
= ROUND_UP(qemu_opt_get_size_del(opts
, BLOCK_OPT_CLUSTER_SIZE
,
478 DEFAULT_CLUSTER_SIZE
), BDRV_SECTOR_SIZE
);
479 if (total_size
>= MAX_PARALLELS_IMAGE_FACTOR
* cl_size
) {
480 error_propagate(errp
, local_err
);
484 ret
= bdrv_create_file(filename
, opts
, &local_err
);
486 error_propagate(errp
, local_err
);
490 file
= blk_new_open(filename
, NULL
, NULL
,
491 BDRV_O_RDWR
| BDRV_O_RESIZE
| BDRV_O_PROTOCOL
,
494 error_propagate(errp
, local_err
);
498 blk_set_allow_write_beyond_eof(file
, true);
500 ret
= blk_truncate(file
, 0);
505 bat_entries
= DIV_ROUND_UP(total_size
, cl_size
);
506 bat_sectors
= DIV_ROUND_UP(bat_entry_off(bat_entries
), cl_size
);
507 bat_sectors
= (bat_sectors
* cl_size
) >> BDRV_SECTOR_BITS
;
509 memset(&header
, 0, sizeof(header
));
510 memcpy(header
.magic
, HEADER_MAGIC2
, sizeof(header
.magic
));
511 header
.version
= cpu_to_le32(HEADER_VERSION
);
512 /* don't care much about geometry, it is not used on image level */
513 header
.heads
= cpu_to_le32(16);
514 header
.cylinders
= cpu_to_le32(total_size
/ BDRV_SECTOR_SIZE
/ 16 / 32);
515 header
.tracks
= cpu_to_le32(cl_size
>> BDRV_SECTOR_BITS
);
516 header
.bat_entries
= cpu_to_le32(bat_entries
);
517 header
.nb_sectors
= cpu_to_le64(DIV_ROUND_UP(total_size
, BDRV_SECTOR_SIZE
));
518 header
.data_off
= cpu_to_le32(bat_sectors
);
520 /* write all the data */
521 memset(tmp
, 0, sizeof(tmp
));
522 memcpy(tmp
, &header
, sizeof(header
));
524 ret
= blk_pwrite(file
, 0, tmp
, BDRV_SECTOR_SIZE
, 0);
528 ret
= blk_pwrite_zeroes(file
, BDRV_SECTOR_SIZE
,
529 (bat_sectors
- 1) << BDRV_SECTOR_BITS
, 0);
540 error_setg_errno(errp
, -ret
, "Failed to create Parallels image");
545 static int parallels_probe(const uint8_t *buf
, int buf_size
,
546 const char *filename
)
548 const ParallelsHeader
*ph
= (const void *)buf
;
550 if (buf_size
< sizeof(ParallelsHeader
)) {
554 if ((!memcmp(ph
->magic
, HEADER_MAGIC
, 16) ||
555 !memcmp(ph
->magic
, HEADER_MAGIC2
, 16)) &&
556 (le32_to_cpu(ph
->version
) == HEADER_VERSION
)) {
563 static int parallels_update_header(BlockDriverState
*bs
)
565 BDRVParallelsState
*s
= bs
->opaque
;
566 unsigned size
= MAX(bdrv_opt_mem_align(bs
->file
->bs
),
567 sizeof(ParallelsHeader
));
569 if (size
> s
->header_size
) {
570 size
= s
->header_size
;
572 return bdrv_pwrite_sync(bs
->file
, 0, s
->header
, size
);
575 static int parallels_open(BlockDriverState
*bs
, QDict
*options
, int flags
,
578 BDRVParallelsState
*s
= bs
->opaque
;
581 QemuOpts
*opts
= NULL
;
582 Error
*local_err
= NULL
;
585 bs
->file
= bdrv_open_child(NULL
, options
, "file", bs
, &child_file
,
591 ret
= bdrv_pread(bs
->file
, 0, &ph
, sizeof(ph
));
596 bs
->total_sectors
= le64_to_cpu(ph
.nb_sectors
);
598 if (le32_to_cpu(ph
.version
) != HEADER_VERSION
) {
601 if (!memcmp(ph
.magic
, HEADER_MAGIC
, 16)) {
602 s
->off_multiplier
= 1;
603 bs
->total_sectors
= 0xffffffff & bs
->total_sectors
;
604 } else if (!memcmp(ph
.magic
, HEADER_MAGIC2
, 16)) {
605 s
->off_multiplier
= le32_to_cpu(ph
.tracks
);
610 s
->tracks
= le32_to_cpu(ph
.tracks
);
611 if (s
->tracks
== 0) {
612 error_setg(errp
, "Invalid image: Zero sectors per track");
616 if (s
->tracks
> INT32_MAX
/513) {
617 error_setg(errp
, "Invalid image: Too big cluster");
622 s
->bat_size
= le32_to_cpu(ph
.bat_entries
);
623 if (s
->bat_size
> INT_MAX
/ sizeof(uint32_t)) {
624 error_setg(errp
, "Catalog too large");
629 size
= bat_entry_off(s
->bat_size
);
630 s
->header_size
= ROUND_UP(size
, bdrv_opt_mem_align(bs
->file
->bs
));
631 s
->header
= qemu_try_blockalign(bs
->file
->bs
, s
->header_size
);
632 if (s
->header
== NULL
) {
636 s
->data_end
= le32_to_cpu(ph
.data_off
);
637 if (s
->data_end
== 0) {
638 s
->data_end
= ROUND_UP(bat_entry_off(s
->bat_size
), BDRV_SECTOR_SIZE
);
640 if (s
->data_end
< s
->header_size
) {
641 /* there is not enough unused space to fit to block align between BAT
642 and actual data. We can't avoid read-modify-write... */
643 s
->header_size
= size
;
646 ret
= bdrv_pread(bs
->file
, 0, s
->header
, s
->header_size
);
650 s
->bat_bitmap
= (uint32_t *)(s
->header
+ 1);
652 for (i
= 0; i
< s
->bat_size
; i
++) {
653 int64_t off
= bat2sect(s
, i
);
654 if (off
>= s
->data_end
) {
655 s
->data_end
= off
+ s
->tracks
;
659 if (le32_to_cpu(ph
.inuse
) == HEADER_INUSE_MAGIC
) {
660 /* Image was not closed correctly. The check is mandatory */
661 s
->header_unclean
= true;
662 if ((flags
& BDRV_O_RDWR
) && !(flags
& BDRV_O_CHECK
)) {
663 error_setg(errp
, "parallels: Image was not closed correctly; "
664 "cannot be opened read/write");
670 opts
= qemu_opts_create(¶llels_runtime_opts
, NULL
, 0, &local_err
);
671 if (local_err
!= NULL
) {
675 qemu_opts_absorb_qdict(opts
, options
, &local_err
);
676 if (local_err
!= NULL
) {
681 qemu_opt_get_size_del(opts
, PARALLELS_OPT_PREALLOC_SIZE
, 0);
682 s
->prealloc_size
= MAX(s
->tracks
, s
->prealloc_size
>> BDRV_SECTOR_BITS
);
683 buf
= qemu_opt_get_del(opts
, PARALLELS_OPT_PREALLOC_MODE
);
684 s
->prealloc_mode
= qapi_enum_parse(prealloc_mode_lookup
, buf
,
685 PRL_PREALLOC_MODE__MAX
, PRL_PREALLOC_MODE_FALLOCATE
, &local_err
);
687 if (local_err
!= NULL
) {
690 if (!bdrv_has_zero_init(bs
->file
->bs
) ||
691 bdrv_truncate(bs
->file
, bdrv_getlength(bs
->file
->bs
)) != 0) {
692 s
->prealloc_mode
= PRL_PREALLOC_MODE_FALLOCATE
;
695 if (flags
& BDRV_O_RDWR
) {
696 s
->header
->inuse
= cpu_to_le32(HEADER_INUSE_MAGIC
);
697 ret
= parallels_update_header(bs
);
703 s
->bat_dirty_block
= 4 * getpagesize();
705 bitmap_new(DIV_ROUND_UP(s
->header_size
, s
->bat_dirty_block
));
707 qemu_co_mutex_init(&s
->lock
);
711 error_setg(errp
, "Image not in Parallels format");
714 qemu_vfree(s
->header
);
718 error_propagate(errp
, local_err
);
724 static void parallels_close(BlockDriverState
*bs
)
726 BDRVParallelsState
*s
= bs
->opaque
;
728 if (bs
->open_flags
& BDRV_O_RDWR
) {
729 s
->header
->inuse
= 0;
730 parallels_update_header(bs
);
733 if (bs
->open_flags
& BDRV_O_RDWR
) {
734 bdrv_truncate(bs
->file
, s
->data_end
<< BDRV_SECTOR_BITS
);
737 g_free(s
->bat_dirty_bmap
);
738 qemu_vfree(s
->header
);
741 static QemuOptsList parallels_create_opts
= {
742 .name
= "parallels-create-opts",
743 .head
= QTAILQ_HEAD_INITIALIZER(parallels_create_opts
.head
),
746 .name
= BLOCK_OPT_SIZE
,
747 .type
= QEMU_OPT_SIZE
,
748 .help
= "Virtual disk size",
751 .name
= BLOCK_OPT_CLUSTER_SIZE
,
752 .type
= QEMU_OPT_SIZE
,
753 .help
= "Parallels image cluster size",
754 .def_value_str
= stringify(DEFAULT_CLUSTER_SIZE
),
756 { /* end of list */ }
760 static BlockDriver bdrv_parallels
= {
761 .format_name
= "parallels",
762 .instance_size
= sizeof(BDRVParallelsState
),
763 .bdrv_probe
= parallels_probe
,
764 .bdrv_open
= parallels_open
,
765 .bdrv_close
= parallels_close
,
766 .bdrv_child_perm
= bdrv_format_default_perms
,
767 .bdrv_co_get_block_status
= parallels_co_get_block_status
,
768 .bdrv_has_zero_init
= bdrv_has_zero_init_1
,
769 .bdrv_co_flush_to_os
= parallels_co_flush_to_os
,
770 .bdrv_co_readv
= parallels_co_readv
,
771 .bdrv_co_writev
= parallels_co_writev
,
773 .bdrv_create
= parallels_create
,
774 .bdrv_check
= parallels_check
,
775 .create_opts
= ¶llels_create_opts
,
778 static void bdrv_parallels_init(void)
780 bdrv_register(&bdrv_parallels
);
783 block_init(bdrv_parallels_init
);