VMDK: bugfix, align offset to cluster in get_whole_cluster
[qemu.git] / block / vmdk.c
blob03a46192018ce572bba9aec87cf6c231b9e86c56
1 /*
2 * Block driver for the VMDK format
4 * Copyright (c) 2004 Fabrice Bellard
5 * Copyright (c) 2005 Filip Navara
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "qemu-common.h"
27 #include "block_int.h"
28 #include "module.h"
30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
33 typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44 } VMDK3Header;
46 typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
59 } __attribute__((packed)) VMDK4Header;
61 #define L2_CACHE_SIZE 16
63 typedef struct VmdkExtent {
64 BlockDriverState *file;
65 bool flat;
66 int64_t sectors;
67 int64_t end_sector;
68 int64_t l1_table_offset;
69 int64_t l1_backup_table_offset;
70 uint32_t *l1_table;
71 uint32_t *l1_backup_table;
72 unsigned int l1_size;
73 uint32_t l1_entry_sectors;
75 unsigned int l2_size;
76 uint32_t *l2_cache;
77 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
78 uint32_t l2_cache_counts[L2_CACHE_SIZE];
80 unsigned int cluster_sectors;
81 } VmdkExtent;
83 typedef struct BDRVVmdkState {
84 uint32_t parent_cid;
85 int num_extents;
86 /* Extent array with num_extents entries, ascend ordered by address */
87 VmdkExtent *extents;
88 } BDRVVmdkState;
90 typedef struct VmdkMetaData {
91 uint32_t offset;
92 unsigned int l1_index;
93 unsigned int l2_index;
94 unsigned int l2_offset;
95 int valid;
96 } VmdkMetaData;
98 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
100 uint32_t magic;
102 if (buf_size < 4)
103 return 0;
104 magic = be32_to_cpu(*(uint32_t *)buf);
105 if (magic == VMDK3_MAGIC ||
106 magic == VMDK4_MAGIC)
107 return 100;
108 else
109 return 0;
112 #define CHECK_CID 1
114 #define SECTOR_SIZE 512
115 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
116 #define HEADER_SIZE 512 // first sector of 512 bytes
118 static void vmdk_free_extents(BlockDriverState *bs)
120 int i;
121 BDRVVmdkState *s = bs->opaque;
123 for (i = 0; i < s->num_extents; i++) {
124 qemu_free(s->extents[i].l1_table);
125 qemu_free(s->extents[i].l2_cache);
126 qemu_free(s->extents[i].l1_backup_table);
128 qemu_free(s->extents);
131 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
133 char desc[DESC_SIZE];
134 uint32_t cid;
135 const char *p_name, *cid_str;
136 size_t cid_str_size;
138 /* the descriptor offset = 0x200 */
139 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
140 return 0;
142 if (parent) {
143 cid_str = "parentCID";
144 cid_str_size = sizeof("parentCID");
145 } else {
146 cid_str = "CID";
147 cid_str_size = sizeof("CID");
150 if ((p_name = strstr(desc,cid_str)) != NULL) {
151 p_name += cid_str_size;
152 sscanf(p_name,"%x",&cid);
155 return cid;
158 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
160 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
161 char *p_name, *tmp_str;
163 /* the descriptor offset = 0x200 */
164 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
165 return -1;
167 tmp_str = strstr(desc,"parentCID");
168 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
169 if ((p_name = strstr(desc,"CID")) != NULL) {
170 p_name += sizeof("CID");
171 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
172 pstrcat(desc, sizeof(desc), tmp_desc);
175 if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
176 return -1;
177 return 0;
180 static int vmdk_is_cid_valid(BlockDriverState *bs)
182 #ifdef CHECK_CID
183 BDRVVmdkState *s = bs->opaque;
184 BlockDriverState *p_bs = bs->backing_hd;
185 uint32_t cur_pcid;
187 if (p_bs) {
188 cur_pcid = vmdk_read_cid(p_bs,0);
189 if (s->parent_cid != cur_pcid)
190 // CID not valid
191 return 0;
193 #endif
194 // CID valid
195 return 1;
198 static int vmdk_snapshot_create(const char *filename, const char *backing_file)
200 int snp_fd, p_fd;
201 int ret;
202 uint32_t p_cid;
203 char *p_name, *gd_buf, *rgd_buf;
204 const char *real_filename, *temp_str;
205 VMDK4Header header;
206 uint32_t gde_entries, gd_size;
207 int64_t gd_offset, rgd_offset, capacity, gt_size;
208 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
209 static const char desc_template[] =
210 "# Disk DescriptorFile\n"
211 "version=1\n"
212 "CID=%x\n"
213 "parentCID=%x\n"
214 "createType=\"monolithicSparse\"\n"
215 "parentFileNameHint=\"%s\"\n"
216 "\n"
217 "# Extent description\n"
218 "RW %u SPARSE \"%s\"\n"
219 "\n"
220 "# The Disk Data Base \n"
221 "#DDB\n"
222 "\n";
224 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
225 if (snp_fd < 0)
226 return -errno;
227 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
228 if (p_fd < 0) {
229 close(snp_fd);
230 return -errno;
233 /* read the header */
234 if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
235 ret = -errno;
236 goto fail;
238 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
239 ret = -errno;
240 goto fail;
243 /* write the header */
244 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
245 ret = -errno;
246 goto fail;
248 if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
249 ret = -errno;
250 goto fail;
253 memset(&header, 0, sizeof(header));
254 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
256 if (ftruncate(snp_fd, header.grain_offset << 9)) {
257 ret = -errno;
258 goto fail;
260 /* the descriptor offset = 0x200 */
261 if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
262 ret = -errno;
263 goto fail;
265 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
266 ret = -errno;
267 goto fail;
270 if ((p_name = strstr(p_desc,"CID")) != NULL) {
271 p_name += sizeof("CID");
272 sscanf(p_name,"%x",&p_cid);
275 real_filename = filename;
276 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
277 real_filename = temp_str + 1;
278 if ((temp_str = strrchr(real_filename, '/')) != NULL)
279 real_filename = temp_str + 1;
280 if ((temp_str = strrchr(real_filename, ':')) != NULL)
281 real_filename = temp_str + 1;
283 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
284 (uint32_t)header.capacity, real_filename);
286 /* write the descriptor */
287 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
288 ret = -errno;
289 goto fail;
291 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
292 ret = -errno;
293 goto fail;
296 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
297 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
298 capacity = header.capacity * SECTOR_SIZE; // Extent size
300 * Each GDE span 32M disk, means:
301 * 512 GTE per GT, each GTE points to grain
303 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
304 if (!gt_size) {
305 ret = -EINVAL;
306 goto fail;
308 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
309 gd_size = gde_entries * sizeof(uint32_t);
311 /* write RGD */
312 rgd_buf = qemu_malloc(gd_size);
313 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
314 ret = -errno;
315 goto fail_rgd;
317 if (read(p_fd, rgd_buf, gd_size) != gd_size) {
318 ret = -errno;
319 goto fail_rgd;
321 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
322 ret = -errno;
323 goto fail_rgd;
325 if (write(snp_fd, rgd_buf, gd_size) == -1) {
326 ret = -errno;
327 goto fail_rgd;
330 /* write GD */
331 gd_buf = qemu_malloc(gd_size);
332 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
333 ret = -errno;
334 goto fail_gd;
336 if (read(p_fd, gd_buf, gd_size) != gd_size) {
337 ret = -errno;
338 goto fail_gd;
340 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
341 ret = -errno;
342 goto fail_gd;
344 if (write(snp_fd, gd_buf, gd_size) == -1) {
345 ret = -errno;
346 goto fail_gd;
348 ret = 0;
350 fail_gd:
351 qemu_free(gd_buf);
352 fail_rgd:
353 qemu_free(rgd_buf);
354 fail:
355 close(p_fd);
356 close(snp_fd);
357 return ret;
360 static int vmdk_parent_open(BlockDriverState *bs)
362 char *p_name;
363 char desc[DESC_SIZE];
365 /* the descriptor offset = 0x200 */
366 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
367 return -1;
369 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
370 char *end_name;
372 p_name += sizeof("parentFileNameHint") + 1;
373 if ((end_name = strchr(p_name,'\"')) == NULL)
374 return -1;
375 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
376 return -1;
378 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
381 return 0;
384 /* Create and append extent to the extent array. Return the added VmdkExtent
385 * address. return NULL if allocation failed. */
386 static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
387 BlockDriverState *file, bool flat, int64_t sectors,
388 int64_t l1_offset, int64_t l1_backup_offset,
389 uint32_t l1_size,
390 int l2_size, unsigned int cluster_sectors)
392 VmdkExtent *extent;
393 BDRVVmdkState *s = bs->opaque;
395 s->extents = qemu_realloc(s->extents,
396 (s->num_extents + 1) * sizeof(VmdkExtent));
397 extent = &s->extents[s->num_extents];
398 s->num_extents++;
400 memset(extent, 0, sizeof(VmdkExtent));
401 extent->file = file;
402 extent->flat = flat;
403 extent->sectors = sectors;
404 extent->l1_table_offset = l1_offset;
405 extent->l1_backup_table_offset = l1_backup_offset;
406 extent->l1_size = l1_size;
407 extent->l1_entry_sectors = l2_size * cluster_sectors;
408 extent->l2_size = l2_size;
409 extent->cluster_sectors = cluster_sectors;
411 if (s->num_extents > 1) {
412 extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
413 } else {
414 extent->end_sector = extent->sectors;
416 bs->total_sectors = extent->end_sector;
417 return extent;
421 static int vmdk_open(BlockDriverState *bs, int flags)
423 BDRVVmdkState *s = bs->opaque;
424 uint32_t magic;
425 int i;
426 uint32_t l1_size, l1_entry_sectors;
427 VmdkExtent *extent = NULL;
429 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
430 goto fail;
432 magic = be32_to_cpu(magic);
433 if (magic == VMDK3_MAGIC) {
434 VMDK3Header header;
435 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
436 != sizeof(header)) {
437 goto fail;
439 extent = vmdk_add_extent(bs, bs->file, false,
440 le32_to_cpu(header.disk_sectors),
441 le32_to_cpu(header.l1dir_offset) << 9, 0,
442 1 << 6, 1 << 9, le32_to_cpu(header.granularity));
443 } else if (magic == VMDK4_MAGIC) {
444 VMDK4Header header;
445 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
446 != sizeof(header)) {
447 goto fail;
449 l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
450 * le64_to_cpu(header.granularity);
451 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
452 / l1_entry_sectors;
453 extent = vmdk_add_extent(bs, bs->file, false,
454 le64_to_cpu(header.capacity),
455 le64_to_cpu(header.gd_offset) << 9,
456 le64_to_cpu(header.rgd_offset) << 9,
457 l1_size,
458 le32_to_cpu(header.num_gtes_per_gte),
459 le64_to_cpu(header.granularity));
460 if (extent->l1_entry_sectors <= 0) {
461 goto fail;
463 // try to open parent images, if exist
464 if (vmdk_parent_open(bs) != 0)
465 goto fail;
466 // write the CID once after the image creation
467 s->parent_cid = vmdk_read_cid(bs,1);
468 } else {
469 goto fail;
472 /* read the L1 table */
473 l1_size = extent->l1_size * sizeof(uint32_t);
474 extent->l1_table = qemu_malloc(l1_size);
475 if (bdrv_pread(bs->file,
476 extent->l1_table_offset,
477 extent->l1_table,
478 l1_size)
479 != l1_size) {
480 goto fail;
482 for (i = 0; i < extent->l1_size; i++) {
483 le32_to_cpus(&extent->l1_table[i]);
486 if (extent->l1_backup_table_offset) {
487 extent->l1_backup_table = qemu_malloc(l1_size);
488 if (bdrv_pread(bs->file,
489 extent->l1_backup_table_offset,
490 extent->l1_backup_table,
491 l1_size)
492 != l1_size) {
493 goto fail;
495 for (i = 0; i < extent->l1_size; i++) {
496 le32_to_cpus(&extent->l1_backup_table[i]);
500 extent->l2_cache =
501 qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
502 return 0;
503 fail:
504 vmdk_free_extents(bs);
505 return -1;
508 static int get_whole_cluster(BlockDriverState *bs,
509 VmdkExtent *extent,
510 uint64_t cluster_offset,
511 uint64_t offset,
512 bool allocate)
514 /* 128 sectors * 512 bytes each = grain size 64KB */
515 uint8_t whole_grain[extent->cluster_sectors * 512];
517 /* we will be here if it's first write on non-exist grain(cluster).
518 * try to read from parent image, if exist */
519 if (bs->backing_hd) {
520 int ret;
522 if (!vmdk_is_cid_valid(bs))
523 return -1;
525 /* floor offset to cluster */
526 offset -= offset % (extent->cluster_sectors * 512);
527 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
528 extent->cluster_sectors);
529 if (ret < 0) {
530 return -1;
533 /* Write grain only into the active image */
534 ret = bdrv_write(extent->file, cluster_offset, whole_grain,
535 extent->cluster_sectors);
536 if (ret < 0) {
537 return -1;
540 return 0;
543 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
545 /* update L2 table */
546 if (bdrv_pwrite_sync(
547 extent->file,
548 ((int64_t)m_data->l2_offset * 512)
549 + (m_data->l2_index * sizeof(m_data->offset)),
550 &(m_data->offset),
551 sizeof(m_data->offset)
552 ) < 0) {
553 return -1;
555 /* update backup L2 table */
556 if (extent->l1_backup_table_offset != 0) {
557 m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
558 if (bdrv_pwrite_sync(
559 extent->file,
560 ((int64_t)m_data->l2_offset * 512)
561 + (m_data->l2_index * sizeof(m_data->offset)),
562 &(m_data->offset), sizeof(m_data->offset)
563 ) < 0) {
564 return -1;
568 return 0;
571 static uint64_t get_cluster_offset(BlockDriverState *bs,
572 VmdkExtent *extent,
573 VmdkMetaData *m_data,
574 uint64_t offset, int allocate)
576 unsigned int l1_index, l2_offset, l2_index;
577 int min_index, i, j;
578 uint32_t min_count, *l2_table, tmp = 0;
579 uint64_t cluster_offset;
581 if (m_data)
582 m_data->valid = 0;
584 l1_index = (offset >> 9) / extent->l1_entry_sectors;
585 if (l1_index >= extent->l1_size) {
586 return 0;
588 l2_offset = extent->l1_table[l1_index];
589 if (!l2_offset) {
590 return 0;
592 for(i = 0; i < L2_CACHE_SIZE; i++) {
593 if (l2_offset == extent->l2_cache_offsets[i]) {
594 /* increment the hit count */
595 if (++extent->l2_cache_counts[i] == 0xffffffff) {
596 for(j = 0; j < L2_CACHE_SIZE; j++) {
597 extent->l2_cache_counts[j] >>= 1;
600 l2_table = extent->l2_cache + (i * extent->l2_size);
601 goto found;
604 /* not found: load a new entry in the least used one */
605 min_index = 0;
606 min_count = 0xffffffff;
607 for(i = 0; i < L2_CACHE_SIZE; i++) {
608 if (extent->l2_cache_counts[i] < min_count) {
609 min_count = extent->l2_cache_counts[i];
610 min_index = i;
613 l2_table = extent->l2_cache + (min_index * extent->l2_size);
614 if (bdrv_pread(
615 extent->file,
616 (int64_t)l2_offset * 512,
617 l2_table,
618 extent->l2_size * sizeof(uint32_t)
619 ) != extent->l2_size * sizeof(uint32_t)) {
620 return 0;
623 extent->l2_cache_offsets[min_index] = l2_offset;
624 extent->l2_cache_counts[min_index] = 1;
625 found:
626 l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
627 cluster_offset = le32_to_cpu(l2_table[l2_index]);
629 if (!cluster_offset) {
630 if (!allocate)
631 return 0;
633 // Avoid the L2 tables update for the images that have snapshots.
634 cluster_offset = bdrv_getlength(extent->file);
635 bdrv_truncate(
636 extent->file,
637 cluster_offset + (extent->cluster_sectors << 9)
640 cluster_offset >>= 9;
641 tmp = cpu_to_le32(cluster_offset);
642 l2_table[l2_index] = tmp;
644 /* First of all we write grain itself, to avoid race condition
645 * that may to corrupt the image.
646 * This problem may occur because of insufficient space on host disk
647 * or inappropriate VM shutdown.
649 if (get_whole_cluster(
650 bs, extent, cluster_offset, offset, allocate) == -1)
651 return 0;
653 if (m_data) {
654 m_data->offset = tmp;
655 m_data->l1_index = l1_index;
656 m_data->l2_index = l2_index;
657 m_data->l2_offset = l2_offset;
658 m_data->valid = 1;
661 cluster_offset <<= 9;
662 return cluster_offset;
665 static VmdkExtent *find_extent(BDRVVmdkState *s,
666 int64_t sector_num, VmdkExtent *start_hint)
668 VmdkExtent *extent = start_hint;
670 if (!extent) {
671 extent = &s->extents[0];
673 while (extent < &s->extents[s->num_extents]) {
674 if (sector_num < extent->end_sector) {
675 return extent;
677 extent++;
679 return NULL;
682 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
683 int nb_sectors, int *pnum)
685 BDRVVmdkState *s = bs->opaque;
687 int64_t index_in_cluster, n, ret;
688 uint64_t offset;
689 VmdkExtent *extent;
691 extent = find_extent(s, sector_num, NULL);
692 if (!extent) {
693 return 0;
695 if (extent->flat) {
696 n = extent->end_sector - sector_num;
697 ret = 1;
698 } else {
699 offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
700 index_in_cluster = sector_num % extent->cluster_sectors;
701 n = extent->cluster_sectors - index_in_cluster;
702 ret = offset ? 1 : 0;
704 if (n > nb_sectors)
705 n = nb_sectors;
706 *pnum = n;
707 return ret;
710 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
711 uint8_t *buf, int nb_sectors)
713 BDRVVmdkState *s = bs->opaque;
714 int ret;
715 uint64_t n, index_in_cluster;
716 VmdkExtent *extent = NULL;
717 uint64_t cluster_offset;
719 while (nb_sectors > 0) {
720 extent = find_extent(s, sector_num, extent);
721 if (!extent) {
722 return -EIO;
724 cluster_offset = get_cluster_offset(
725 bs, extent, NULL, sector_num << 9, 0);
726 index_in_cluster = sector_num % extent->cluster_sectors;
727 n = extent->cluster_sectors - index_in_cluster;
728 if (n > nb_sectors)
729 n = nb_sectors;
730 if (!cluster_offset) {
731 // try to read from parent image, if exist
732 if (bs->backing_hd) {
733 if (!vmdk_is_cid_valid(bs))
734 return -1;
735 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
736 if (ret < 0)
737 return -1;
738 } else {
739 memset(buf, 0, 512 * n);
741 } else {
742 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
743 return -1;
745 nb_sectors -= n;
746 sector_num += n;
747 buf += n * 512;
749 return 0;
752 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
753 const uint8_t *buf, int nb_sectors)
755 BDRVVmdkState *s = bs->opaque;
756 VmdkExtent *extent = NULL;
757 int n;
758 int64_t index_in_cluster;
759 uint64_t cluster_offset;
760 static int cid_update = 0;
761 VmdkMetaData m_data;
763 if (sector_num > bs->total_sectors) {
764 fprintf(stderr,
765 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
766 " total_sectors=0x%" PRIx64 "\n",
767 sector_num, bs->total_sectors);
768 return -1;
771 while (nb_sectors > 0) {
772 extent = find_extent(s, sector_num, extent);
773 if (!extent) {
774 return -EIO;
776 cluster_offset = get_cluster_offset(
778 extent,
779 &m_data,
780 sector_num << 9, 1);
781 if (!cluster_offset) {
782 return -1;
784 index_in_cluster = sector_num % extent->cluster_sectors;
785 n = extent->cluster_sectors - index_in_cluster;
786 if (n > nb_sectors) {
787 n = nb_sectors;
790 if (bdrv_pwrite(bs->file,
791 cluster_offset + index_in_cluster * 512,
792 buf, n * 512)
793 != n * 512) {
794 return -1;
796 if (m_data.valid) {
797 /* update L2 tables */
798 if (vmdk_L2update(extent, &m_data) == -1) {
799 return -1;
802 nb_sectors -= n;
803 sector_num += n;
804 buf += n * 512;
806 // update CID on the first write every time the virtual disk is opened
807 if (!cid_update) {
808 vmdk_write_cid(bs, time(NULL));
809 cid_update++;
812 return 0;
815 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
817 int fd, i;
818 VMDK4Header header;
819 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
820 static const char desc_template[] =
821 "# Disk DescriptorFile\n"
822 "version=1\n"
823 "CID=%x\n"
824 "parentCID=ffffffff\n"
825 "createType=\"monolithicSparse\"\n"
826 "\n"
827 "# Extent description\n"
828 "RW %" PRId64 " SPARSE \"%s\"\n"
829 "\n"
830 "# The Disk Data Base \n"
831 "#DDB\n"
832 "\n"
833 "ddb.virtualHWVersion = \"%d\"\n"
834 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
835 "ddb.geometry.heads = \"16\"\n"
836 "ddb.geometry.sectors = \"63\"\n"
837 "ddb.adapterType = \"ide\"\n";
838 char desc[1024];
839 const char *real_filename, *temp_str;
840 int64_t total_size = 0;
841 const char *backing_file = NULL;
842 int flags = 0;
843 int ret;
845 // Read out options
846 while (options && options->name) {
847 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
848 total_size = options->value.n / 512;
849 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
850 backing_file = options->value.s;
851 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
852 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
854 options++;
857 /* XXX: add support for backing file */
858 if (backing_file) {
859 return vmdk_snapshot_create(filename, backing_file);
862 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
863 0644);
864 if (fd < 0)
865 return -errno;
866 magic = cpu_to_be32(VMDK4_MAGIC);
867 memset(&header, 0, sizeof(header));
868 header.version = 1;
869 header.flags = 3; /* ?? */
870 header.capacity = total_size;
871 header.granularity = 128;
872 header.num_gtes_per_gte = 512;
874 grains = (total_size + header.granularity - 1) / header.granularity;
875 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
876 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
877 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
879 header.desc_offset = 1;
880 header.desc_size = 20;
881 header.rgd_offset = header.desc_offset + header.desc_size;
882 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
883 header.grain_offset =
884 ((header.gd_offset + gd_size + (gt_size * gt_count) +
885 header.granularity - 1) / header.granularity) *
886 header.granularity;
888 /* swap endianness for all header fields */
889 header.version = cpu_to_le32(header.version);
890 header.flags = cpu_to_le32(header.flags);
891 header.capacity = cpu_to_le64(header.capacity);
892 header.granularity = cpu_to_le64(header.granularity);
893 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
894 header.desc_offset = cpu_to_le64(header.desc_offset);
895 header.desc_size = cpu_to_le64(header.desc_size);
896 header.rgd_offset = cpu_to_le64(header.rgd_offset);
897 header.gd_offset = cpu_to_le64(header.gd_offset);
898 header.grain_offset = cpu_to_le64(header.grain_offset);
900 header.check_bytes[0] = 0xa;
901 header.check_bytes[1] = 0x20;
902 header.check_bytes[2] = 0xd;
903 header.check_bytes[3] = 0xa;
905 /* write all the data */
906 ret = qemu_write_full(fd, &magic, sizeof(magic));
907 if (ret != sizeof(magic)) {
908 ret = -errno;
909 goto exit;
911 ret = qemu_write_full(fd, &header, sizeof(header));
912 if (ret != sizeof(header)) {
913 ret = -errno;
914 goto exit;
917 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
918 if (ret < 0) {
919 ret = -errno;
920 goto exit;
923 /* write grain directory */
924 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
925 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
926 i < gt_count; i++, tmp += gt_size) {
927 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
928 if (ret != sizeof(tmp)) {
929 ret = -errno;
930 goto exit;
934 /* write backup grain directory */
935 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
936 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
937 i < gt_count; i++, tmp += gt_size) {
938 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
939 if (ret != sizeof(tmp)) {
940 ret = -errno;
941 goto exit;
945 /* compose the descriptor */
946 real_filename = filename;
947 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
948 real_filename = temp_str + 1;
949 if ((temp_str = strrchr(real_filename, '/')) != NULL)
950 real_filename = temp_str + 1;
951 if ((temp_str = strrchr(real_filename, ':')) != NULL)
952 real_filename = temp_str + 1;
953 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
954 total_size, real_filename,
955 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
956 total_size / (int64_t)(63 * 16));
958 /* write the descriptor */
959 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
960 ret = qemu_write_full(fd, desc, strlen(desc));
961 if (ret != strlen(desc)) {
962 ret = -errno;
963 goto exit;
966 ret = 0;
967 exit:
968 close(fd);
969 return ret;
972 static void vmdk_close(BlockDriverState *bs)
974 vmdk_free_extents(bs);
977 static int vmdk_flush(BlockDriverState *bs)
979 return bdrv_flush(bs->file);
983 static QEMUOptionParameter vmdk_create_options[] = {
985 .name = BLOCK_OPT_SIZE,
986 .type = OPT_SIZE,
987 .help = "Virtual disk size"
990 .name = BLOCK_OPT_BACKING_FILE,
991 .type = OPT_STRING,
992 .help = "File name of a base image"
995 .name = BLOCK_OPT_COMPAT6,
996 .type = OPT_FLAG,
997 .help = "VMDK version 6 image"
999 { NULL }
1002 static BlockDriver bdrv_vmdk = {
1003 .format_name = "vmdk",
1004 .instance_size = sizeof(BDRVVmdkState),
1005 .bdrv_probe = vmdk_probe,
1006 .bdrv_open = vmdk_open,
1007 .bdrv_read = vmdk_read,
1008 .bdrv_write = vmdk_write,
1009 .bdrv_close = vmdk_close,
1010 .bdrv_create = vmdk_create,
1011 .bdrv_flush = vmdk_flush,
1012 .bdrv_is_allocated = vmdk_is_allocated,
1014 .create_options = vmdk_create_options,
1017 static void bdrv_vmdk_init(void)
1019 bdrv_register(&bdrv_vmdk);
1022 block_init(bdrv_vmdk_init);