VMDK: probe for monolithicFlat images
[qemu.git] / block / vmdk.c
blobf8a815c749aecd0d5ee438840f2536ec12d6652c
1 /*
2 * Block driver for the VMDK format
4 * Copyright (c) 2004 Fabrice Bellard
5 * Copyright (c) 2005 Filip Navara
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "qemu-common.h"
27 #include "block_int.h"
28 #include "module.h"
30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
33 typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44 } VMDK3Header;
46 typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
59 } __attribute__((packed)) VMDK4Header;
61 #define L2_CACHE_SIZE 16
63 typedef struct VmdkExtent {
64 BlockDriverState *file;
65 bool flat;
66 int64_t sectors;
67 int64_t end_sector;
68 int64_t l1_table_offset;
69 int64_t l1_backup_table_offset;
70 uint32_t *l1_table;
71 uint32_t *l1_backup_table;
72 unsigned int l1_size;
73 uint32_t l1_entry_sectors;
75 unsigned int l2_size;
76 uint32_t *l2_cache;
77 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
78 uint32_t l2_cache_counts[L2_CACHE_SIZE];
80 unsigned int cluster_sectors;
81 } VmdkExtent;
83 typedef struct BDRVVmdkState {
84 uint32_t parent_cid;
85 int num_extents;
86 /* Extent array with num_extents entries, ascend ordered by address */
87 VmdkExtent *extents;
88 } BDRVVmdkState;
90 typedef struct VmdkMetaData {
91 uint32_t offset;
92 unsigned int l1_index;
93 unsigned int l2_index;
94 unsigned int l2_offset;
95 int valid;
96 } VmdkMetaData;
98 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
100 uint32_t magic;
102 if (buf_size < 4)
103 return 0;
104 magic = be32_to_cpu(*(uint32_t *)buf);
105 if (magic == VMDK3_MAGIC ||
106 magic == VMDK4_MAGIC) {
107 return 100;
108 } else {
109 const char *p = (const char *)buf;
110 const char *end = p + buf_size;
111 while (p < end) {
112 if (*p == '#') {
113 /* skip comment line */
114 while (p < end && *p != '\n') {
115 p++;
117 p++;
118 continue;
120 if (*p == ' ') {
121 while (p < end && *p == ' ') {
122 p++;
124 /* skip '\r' if windows line endings used. */
125 if (p < end && *p == '\r') {
126 p++;
128 /* only accept blank lines before 'version=' line */
129 if (p == end || *p != '\n') {
130 return 0;
132 p++;
133 continue;
135 if (end - p >= strlen("version=X\n")) {
136 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
137 strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
138 return 100;
141 if (end - p >= strlen("version=X\r\n")) {
142 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
143 strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
144 return 100;
147 return 0;
149 return 0;
153 #define CHECK_CID 1
155 #define SECTOR_SIZE 512
156 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
157 #define HEADER_SIZE 512 // first sector of 512 bytes
159 static void vmdk_free_extents(BlockDriverState *bs)
161 int i;
162 BDRVVmdkState *s = bs->opaque;
164 for (i = 0; i < s->num_extents; i++) {
165 qemu_free(s->extents[i].l1_table);
166 qemu_free(s->extents[i].l2_cache);
167 qemu_free(s->extents[i].l1_backup_table);
169 qemu_free(s->extents);
172 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
174 char desc[DESC_SIZE];
175 uint32_t cid;
176 const char *p_name, *cid_str;
177 size_t cid_str_size;
179 /* the descriptor offset = 0x200 */
180 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
181 return 0;
183 if (parent) {
184 cid_str = "parentCID";
185 cid_str_size = sizeof("parentCID");
186 } else {
187 cid_str = "CID";
188 cid_str_size = sizeof("CID");
191 if ((p_name = strstr(desc,cid_str)) != NULL) {
192 p_name += cid_str_size;
193 sscanf(p_name,"%x",&cid);
196 return cid;
199 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
201 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
202 char *p_name, *tmp_str;
204 /* the descriptor offset = 0x200 */
205 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
206 return -1;
208 tmp_str = strstr(desc,"parentCID");
209 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
210 if ((p_name = strstr(desc,"CID")) != NULL) {
211 p_name += sizeof("CID");
212 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
213 pstrcat(desc, sizeof(desc), tmp_desc);
216 if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
217 return -1;
218 return 0;
221 static int vmdk_is_cid_valid(BlockDriverState *bs)
223 #ifdef CHECK_CID
224 BDRVVmdkState *s = bs->opaque;
225 BlockDriverState *p_bs = bs->backing_hd;
226 uint32_t cur_pcid;
228 if (p_bs) {
229 cur_pcid = vmdk_read_cid(p_bs,0);
230 if (s->parent_cid != cur_pcid)
231 // CID not valid
232 return 0;
234 #endif
235 // CID valid
236 return 1;
239 static int vmdk_snapshot_create(const char *filename, const char *backing_file)
241 int snp_fd, p_fd;
242 int ret;
243 uint32_t p_cid;
244 char *p_name, *gd_buf, *rgd_buf;
245 const char *real_filename, *temp_str;
246 VMDK4Header header;
247 uint32_t gde_entries, gd_size;
248 int64_t gd_offset, rgd_offset, capacity, gt_size;
249 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
250 static const char desc_template[] =
251 "# Disk DescriptorFile\n"
252 "version=1\n"
253 "CID=%x\n"
254 "parentCID=%x\n"
255 "createType=\"monolithicSparse\"\n"
256 "parentFileNameHint=\"%s\"\n"
257 "\n"
258 "# Extent description\n"
259 "RW %u SPARSE \"%s\"\n"
260 "\n"
261 "# The Disk Data Base \n"
262 "#DDB\n"
263 "\n";
265 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
266 if (snp_fd < 0)
267 return -errno;
268 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
269 if (p_fd < 0) {
270 close(snp_fd);
271 return -errno;
274 /* read the header */
275 if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
276 ret = -errno;
277 goto fail;
279 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
280 ret = -errno;
281 goto fail;
284 /* write the header */
285 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
286 ret = -errno;
287 goto fail;
289 if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
290 ret = -errno;
291 goto fail;
294 memset(&header, 0, sizeof(header));
295 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
297 if (ftruncate(snp_fd, header.grain_offset << 9)) {
298 ret = -errno;
299 goto fail;
301 /* the descriptor offset = 0x200 */
302 if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
303 ret = -errno;
304 goto fail;
306 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
307 ret = -errno;
308 goto fail;
311 if ((p_name = strstr(p_desc,"CID")) != NULL) {
312 p_name += sizeof("CID");
313 sscanf(p_name,"%x",&p_cid);
316 real_filename = filename;
317 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
318 real_filename = temp_str + 1;
319 if ((temp_str = strrchr(real_filename, '/')) != NULL)
320 real_filename = temp_str + 1;
321 if ((temp_str = strrchr(real_filename, ':')) != NULL)
322 real_filename = temp_str + 1;
324 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
325 (uint32_t)header.capacity, real_filename);
327 /* write the descriptor */
328 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
329 ret = -errno;
330 goto fail;
332 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
333 ret = -errno;
334 goto fail;
337 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
338 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
339 capacity = header.capacity * SECTOR_SIZE; // Extent size
341 * Each GDE span 32M disk, means:
342 * 512 GTE per GT, each GTE points to grain
344 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
345 if (!gt_size) {
346 ret = -EINVAL;
347 goto fail;
349 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
350 gd_size = gde_entries * sizeof(uint32_t);
352 /* write RGD */
353 rgd_buf = qemu_malloc(gd_size);
354 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
355 ret = -errno;
356 goto fail_rgd;
358 if (read(p_fd, rgd_buf, gd_size) != gd_size) {
359 ret = -errno;
360 goto fail_rgd;
362 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
363 ret = -errno;
364 goto fail_rgd;
366 if (write(snp_fd, rgd_buf, gd_size) == -1) {
367 ret = -errno;
368 goto fail_rgd;
371 /* write GD */
372 gd_buf = qemu_malloc(gd_size);
373 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
374 ret = -errno;
375 goto fail_gd;
377 if (read(p_fd, gd_buf, gd_size) != gd_size) {
378 ret = -errno;
379 goto fail_gd;
381 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
382 ret = -errno;
383 goto fail_gd;
385 if (write(snp_fd, gd_buf, gd_size) == -1) {
386 ret = -errno;
387 goto fail_gd;
389 ret = 0;
391 fail_gd:
392 qemu_free(gd_buf);
393 fail_rgd:
394 qemu_free(rgd_buf);
395 fail:
396 close(p_fd);
397 close(snp_fd);
398 return ret;
401 static int vmdk_parent_open(BlockDriverState *bs)
403 char *p_name;
404 char desc[DESC_SIZE];
406 /* the descriptor offset = 0x200 */
407 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
408 return -1;
410 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
411 char *end_name;
413 p_name += sizeof("parentFileNameHint") + 1;
414 if ((end_name = strchr(p_name,'\"')) == NULL)
415 return -1;
416 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
417 return -1;
419 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
422 return 0;
425 /* Create and append extent to the extent array. Return the added VmdkExtent
426 * address. return NULL if allocation failed. */
427 static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
428 BlockDriverState *file, bool flat, int64_t sectors,
429 int64_t l1_offset, int64_t l1_backup_offset,
430 uint32_t l1_size,
431 int l2_size, unsigned int cluster_sectors)
433 VmdkExtent *extent;
434 BDRVVmdkState *s = bs->opaque;
436 s->extents = qemu_realloc(s->extents,
437 (s->num_extents + 1) * sizeof(VmdkExtent));
438 extent = &s->extents[s->num_extents];
439 s->num_extents++;
441 memset(extent, 0, sizeof(VmdkExtent));
442 extent->file = file;
443 extent->flat = flat;
444 extent->sectors = sectors;
445 extent->l1_table_offset = l1_offset;
446 extent->l1_backup_table_offset = l1_backup_offset;
447 extent->l1_size = l1_size;
448 extent->l1_entry_sectors = l2_size * cluster_sectors;
449 extent->l2_size = l2_size;
450 extent->cluster_sectors = cluster_sectors;
452 if (s->num_extents > 1) {
453 extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
454 } else {
455 extent->end_sector = extent->sectors;
457 bs->total_sectors = extent->end_sector;
458 return extent;
462 static int vmdk_open(BlockDriverState *bs, int flags)
464 BDRVVmdkState *s = bs->opaque;
465 uint32_t magic;
466 int i;
467 uint32_t l1_size, l1_entry_sectors;
468 VmdkExtent *extent = NULL;
470 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
471 goto fail;
473 magic = be32_to_cpu(magic);
474 if (magic == VMDK3_MAGIC) {
475 VMDK3Header header;
476 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
477 != sizeof(header)) {
478 goto fail;
480 extent = vmdk_add_extent(bs, bs->file, false,
481 le32_to_cpu(header.disk_sectors),
482 le32_to_cpu(header.l1dir_offset) << 9, 0,
483 1 << 6, 1 << 9, le32_to_cpu(header.granularity));
484 } else if (magic == VMDK4_MAGIC) {
485 VMDK4Header header;
486 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
487 != sizeof(header)) {
488 goto fail;
490 l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
491 * le64_to_cpu(header.granularity);
492 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
493 / l1_entry_sectors;
494 extent = vmdk_add_extent(bs, bs->file, false,
495 le64_to_cpu(header.capacity),
496 le64_to_cpu(header.gd_offset) << 9,
497 le64_to_cpu(header.rgd_offset) << 9,
498 l1_size,
499 le32_to_cpu(header.num_gtes_per_gte),
500 le64_to_cpu(header.granularity));
501 if (extent->l1_entry_sectors <= 0) {
502 goto fail;
504 // try to open parent images, if exist
505 if (vmdk_parent_open(bs) != 0)
506 goto fail;
507 // write the CID once after the image creation
508 s->parent_cid = vmdk_read_cid(bs,1);
509 } else {
510 goto fail;
513 /* read the L1 table */
514 l1_size = extent->l1_size * sizeof(uint32_t);
515 extent->l1_table = qemu_malloc(l1_size);
516 if (bdrv_pread(bs->file,
517 extent->l1_table_offset,
518 extent->l1_table,
519 l1_size)
520 != l1_size) {
521 goto fail;
523 for (i = 0; i < extent->l1_size; i++) {
524 le32_to_cpus(&extent->l1_table[i]);
527 if (extent->l1_backup_table_offset) {
528 extent->l1_backup_table = qemu_malloc(l1_size);
529 if (bdrv_pread(bs->file,
530 extent->l1_backup_table_offset,
531 extent->l1_backup_table,
532 l1_size)
533 != l1_size) {
534 goto fail;
536 for (i = 0; i < extent->l1_size; i++) {
537 le32_to_cpus(&extent->l1_backup_table[i]);
541 extent->l2_cache =
542 qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
543 return 0;
544 fail:
545 vmdk_free_extents(bs);
546 return -1;
549 static int get_whole_cluster(BlockDriverState *bs,
550 VmdkExtent *extent,
551 uint64_t cluster_offset,
552 uint64_t offset,
553 bool allocate)
555 /* 128 sectors * 512 bytes each = grain size 64KB */
556 uint8_t whole_grain[extent->cluster_sectors * 512];
558 /* we will be here if it's first write on non-exist grain(cluster).
559 * try to read from parent image, if exist */
560 if (bs->backing_hd) {
561 int ret;
563 if (!vmdk_is_cid_valid(bs))
564 return -1;
566 /* floor offset to cluster */
567 offset -= offset % (extent->cluster_sectors * 512);
568 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
569 extent->cluster_sectors);
570 if (ret < 0) {
571 return -1;
574 /* Write grain only into the active image */
575 ret = bdrv_write(extent->file, cluster_offset, whole_grain,
576 extent->cluster_sectors);
577 if (ret < 0) {
578 return -1;
581 return 0;
584 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
586 /* update L2 table */
587 if (bdrv_pwrite_sync(
588 extent->file,
589 ((int64_t)m_data->l2_offset * 512)
590 + (m_data->l2_index * sizeof(m_data->offset)),
591 &(m_data->offset),
592 sizeof(m_data->offset)
593 ) < 0) {
594 return -1;
596 /* update backup L2 table */
597 if (extent->l1_backup_table_offset != 0) {
598 m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
599 if (bdrv_pwrite_sync(
600 extent->file,
601 ((int64_t)m_data->l2_offset * 512)
602 + (m_data->l2_index * sizeof(m_data->offset)),
603 &(m_data->offset), sizeof(m_data->offset)
604 ) < 0) {
605 return -1;
609 return 0;
612 static uint64_t get_cluster_offset(BlockDriverState *bs,
613 VmdkExtent *extent,
614 VmdkMetaData *m_data,
615 uint64_t offset, int allocate)
617 unsigned int l1_index, l2_offset, l2_index;
618 int min_index, i, j;
619 uint32_t min_count, *l2_table, tmp = 0;
620 uint64_t cluster_offset;
622 if (m_data)
623 m_data->valid = 0;
625 l1_index = (offset >> 9) / extent->l1_entry_sectors;
626 if (l1_index >= extent->l1_size) {
627 return 0;
629 l2_offset = extent->l1_table[l1_index];
630 if (!l2_offset) {
631 return 0;
633 for(i = 0; i < L2_CACHE_SIZE; i++) {
634 if (l2_offset == extent->l2_cache_offsets[i]) {
635 /* increment the hit count */
636 if (++extent->l2_cache_counts[i] == 0xffffffff) {
637 for(j = 0; j < L2_CACHE_SIZE; j++) {
638 extent->l2_cache_counts[j] >>= 1;
641 l2_table = extent->l2_cache + (i * extent->l2_size);
642 goto found;
645 /* not found: load a new entry in the least used one */
646 min_index = 0;
647 min_count = 0xffffffff;
648 for(i = 0; i < L2_CACHE_SIZE; i++) {
649 if (extent->l2_cache_counts[i] < min_count) {
650 min_count = extent->l2_cache_counts[i];
651 min_index = i;
654 l2_table = extent->l2_cache + (min_index * extent->l2_size);
655 if (bdrv_pread(
656 extent->file,
657 (int64_t)l2_offset * 512,
658 l2_table,
659 extent->l2_size * sizeof(uint32_t)
660 ) != extent->l2_size * sizeof(uint32_t)) {
661 return 0;
664 extent->l2_cache_offsets[min_index] = l2_offset;
665 extent->l2_cache_counts[min_index] = 1;
666 found:
667 l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
668 cluster_offset = le32_to_cpu(l2_table[l2_index]);
670 if (!cluster_offset) {
671 if (!allocate)
672 return 0;
674 // Avoid the L2 tables update for the images that have snapshots.
675 cluster_offset = bdrv_getlength(extent->file);
676 bdrv_truncate(
677 extent->file,
678 cluster_offset + (extent->cluster_sectors << 9)
681 cluster_offset >>= 9;
682 tmp = cpu_to_le32(cluster_offset);
683 l2_table[l2_index] = tmp;
685 /* First of all we write grain itself, to avoid race condition
686 * that may to corrupt the image.
687 * This problem may occur because of insufficient space on host disk
688 * or inappropriate VM shutdown.
690 if (get_whole_cluster(
691 bs, extent, cluster_offset, offset, allocate) == -1)
692 return 0;
694 if (m_data) {
695 m_data->offset = tmp;
696 m_data->l1_index = l1_index;
697 m_data->l2_index = l2_index;
698 m_data->l2_offset = l2_offset;
699 m_data->valid = 1;
702 cluster_offset <<= 9;
703 return cluster_offset;
706 static VmdkExtent *find_extent(BDRVVmdkState *s,
707 int64_t sector_num, VmdkExtent *start_hint)
709 VmdkExtent *extent = start_hint;
711 if (!extent) {
712 extent = &s->extents[0];
714 while (extent < &s->extents[s->num_extents]) {
715 if (sector_num < extent->end_sector) {
716 return extent;
718 extent++;
720 return NULL;
723 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
724 int nb_sectors, int *pnum)
726 BDRVVmdkState *s = bs->opaque;
728 int64_t index_in_cluster, n, ret;
729 uint64_t offset;
730 VmdkExtent *extent;
732 extent = find_extent(s, sector_num, NULL);
733 if (!extent) {
734 return 0;
736 if (extent->flat) {
737 n = extent->end_sector - sector_num;
738 ret = 1;
739 } else {
740 offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
741 index_in_cluster = sector_num % extent->cluster_sectors;
742 n = extent->cluster_sectors - index_in_cluster;
743 ret = offset ? 1 : 0;
745 if (n > nb_sectors)
746 n = nb_sectors;
747 *pnum = n;
748 return ret;
751 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
752 uint8_t *buf, int nb_sectors)
754 BDRVVmdkState *s = bs->opaque;
755 int ret;
756 uint64_t n, index_in_cluster;
757 VmdkExtent *extent = NULL;
758 uint64_t cluster_offset;
760 while (nb_sectors > 0) {
761 extent = find_extent(s, sector_num, extent);
762 if (!extent) {
763 return -EIO;
765 cluster_offset = get_cluster_offset(
766 bs, extent, NULL, sector_num << 9, 0);
767 index_in_cluster = sector_num % extent->cluster_sectors;
768 n = extent->cluster_sectors - index_in_cluster;
769 if (n > nb_sectors)
770 n = nb_sectors;
771 if (!cluster_offset) {
772 // try to read from parent image, if exist
773 if (bs->backing_hd) {
774 if (!vmdk_is_cid_valid(bs))
775 return -1;
776 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
777 if (ret < 0)
778 return -1;
779 } else {
780 memset(buf, 0, 512 * n);
782 } else {
783 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
784 return -1;
786 nb_sectors -= n;
787 sector_num += n;
788 buf += n * 512;
790 return 0;
793 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
794 const uint8_t *buf, int nb_sectors)
796 BDRVVmdkState *s = bs->opaque;
797 VmdkExtent *extent = NULL;
798 int n;
799 int64_t index_in_cluster;
800 uint64_t cluster_offset;
801 static int cid_update = 0;
802 VmdkMetaData m_data;
804 if (sector_num > bs->total_sectors) {
805 fprintf(stderr,
806 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
807 " total_sectors=0x%" PRIx64 "\n",
808 sector_num, bs->total_sectors);
809 return -1;
812 while (nb_sectors > 0) {
813 extent = find_extent(s, sector_num, extent);
814 if (!extent) {
815 return -EIO;
817 cluster_offset = get_cluster_offset(
819 extent,
820 &m_data,
821 sector_num << 9, 1);
822 if (!cluster_offset) {
823 return -1;
825 index_in_cluster = sector_num % extent->cluster_sectors;
826 n = extent->cluster_sectors - index_in_cluster;
827 if (n > nb_sectors) {
828 n = nb_sectors;
831 if (bdrv_pwrite(bs->file,
832 cluster_offset + index_in_cluster * 512,
833 buf, n * 512)
834 != n * 512) {
835 return -1;
837 if (m_data.valid) {
838 /* update L2 tables */
839 if (vmdk_L2update(extent, &m_data) == -1) {
840 return -1;
843 nb_sectors -= n;
844 sector_num += n;
845 buf += n * 512;
847 // update CID on the first write every time the virtual disk is opened
848 if (!cid_update) {
849 vmdk_write_cid(bs, time(NULL));
850 cid_update++;
853 return 0;
856 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
858 int fd, i;
859 VMDK4Header header;
860 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
861 static const char desc_template[] =
862 "# Disk DescriptorFile\n"
863 "version=1\n"
864 "CID=%x\n"
865 "parentCID=ffffffff\n"
866 "createType=\"monolithicSparse\"\n"
867 "\n"
868 "# Extent description\n"
869 "RW %" PRId64 " SPARSE \"%s\"\n"
870 "\n"
871 "# The Disk Data Base \n"
872 "#DDB\n"
873 "\n"
874 "ddb.virtualHWVersion = \"%d\"\n"
875 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
876 "ddb.geometry.heads = \"16\"\n"
877 "ddb.geometry.sectors = \"63\"\n"
878 "ddb.adapterType = \"ide\"\n";
879 char desc[1024];
880 const char *real_filename, *temp_str;
881 int64_t total_size = 0;
882 const char *backing_file = NULL;
883 int flags = 0;
884 int ret;
886 // Read out options
887 while (options && options->name) {
888 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
889 total_size = options->value.n / 512;
890 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
891 backing_file = options->value.s;
892 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
893 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
895 options++;
898 /* XXX: add support for backing file */
899 if (backing_file) {
900 return vmdk_snapshot_create(filename, backing_file);
903 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
904 0644);
905 if (fd < 0)
906 return -errno;
907 magic = cpu_to_be32(VMDK4_MAGIC);
908 memset(&header, 0, sizeof(header));
909 header.version = 1;
910 header.flags = 3; /* ?? */
911 header.capacity = total_size;
912 header.granularity = 128;
913 header.num_gtes_per_gte = 512;
915 grains = (total_size + header.granularity - 1) / header.granularity;
916 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
917 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
918 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
920 header.desc_offset = 1;
921 header.desc_size = 20;
922 header.rgd_offset = header.desc_offset + header.desc_size;
923 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
924 header.grain_offset =
925 ((header.gd_offset + gd_size + (gt_size * gt_count) +
926 header.granularity - 1) / header.granularity) *
927 header.granularity;
929 /* swap endianness for all header fields */
930 header.version = cpu_to_le32(header.version);
931 header.flags = cpu_to_le32(header.flags);
932 header.capacity = cpu_to_le64(header.capacity);
933 header.granularity = cpu_to_le64(header.granularity);
934 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
935 header.desc_offset = cpu_to_le64(header.desc_offset);
936 header.desc_size = cpu_to_le64(header.desc_size);
937 header.rgd_offset = cpu_to_le64(header.rgd_offset);
938 header.gd_offset = cpu_to_le64(header.gd_offset);
939 header.grain_offset = cpu_to_le64(header.grain_offset);
941 header.check_bytes[0] = 0xa;
942 header.check_bytes[1] = 0x20;
943 header.check_bytes[2] = 0xd;
944 header.check_bytes[3] = 0xa;
946 /* write all the data */
947 ret = qemu_write_full(fd, &magic, sizeof(magic));
948 if (ret != sizeof(magic)) {
949 ret = -errno;
950 goto exit;
952 ret = qemu_write_full(fd, &header, sizeof(header));
953 if (ret != sizeof(header)) {
954 ret = -errno;
955 goto exit;
958 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
959 if (ret < 0) {
960 ret = -errno;
961 goto exit;
964 /* write grain directory */
965 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
966 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
967 i < gt_count; i++, tmp += gt_size) {
968 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
969 if (ret != sizeof(tmp)) {
970 ret = -errno;
971 goto exit;
975 /* write backup grain directory */
976 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
977 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
978 i < gt_count; i++, tmp += gt_size) {
979 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
980 if (ret != sizeof(tmp)) {
981 ret = -errno;
982 goto exit;
986 /* compose the descriptor */
987 real_filename = filename;
988 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
989 real_filename = temp_str + 1;
990 if ((temp_str = strrchr(real_filename, '/')) != NULL)
991 real_filename = temp_str + 1;
992 if ((temp_str = strrchr(real_filename, ':')) != NULL)
993 real_filename = temp_str + 1;
994 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
995 total_size, real_filename,
996 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
997 total_size / (int64_t)(63 * 16));
999 /* write the descriptor */
1000 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
1001 ret = qemu_write_full(fd, desc, strlen(desc));
1002 if (ret != strlen(desc)) {
1003 ret = -errno;
1004 goto exit;
1007 ret = 0;
1008 exit:
1009 close(fd);
1010 return ret;
1013 static void vmdk_close(BlockDriverState *bs)
1015 vmdk_free_extents(bs);
1018 static int vmdk_flush(BlockDriverState *bs)
1020 return bdrv_flush(bs->file);
1024 static QEMUOptionParameter vmdk_create_options[] = {
1026 .name = BLOCK_OPT_SIZE,
1027 .type = OPT_SIZE,
1028 .help = "Virtual disk size"
1031 .name = BLOCK_OPT_BACKING_FILE,
1032 .type = OPT_STRING,
1033 .help = "File name of a base image"
1036 .name = BLOCK_OPT_COMPAT6,
1037 .type = OPT_FLAG,
1038 .help = "VMDK version 6 image"
1040 { NULL }
1043 static BlockDriver bdrv_vmdk = {
1044 .format_name = "vmdk",
1045 .instance_size = sizeof(BDRVVmdkState),
1046 .bdrv_probe = vmdk_probe,
1047 .bdrv_open = vmdk_open,
1048 .bdrv_read = vmdk_read,
1049 .bdrv_write = vmdk_write,
1050 .bdrv_close = vmdk_close,
1051 .bdrv_create = vmdk_create,
1052 .bdrv_flush = vmdk_flush,
1053 .bdrv_is_allocated = vmdk_is_allocated,
1055 .create_options = vmdk_create_options,
1058 static void bdrv_vmdk_init(void)
1060 bdrv_register(&bdrv_vmdk);
1063 block_init(bdrv_vmdk_init);