Revert "Add a DTrace tracing backend targetted for SystemTAP compatability"
[qemu/wangdongxu.git] / block / vmdk.c
blob872aebac9bc8179217c4f32d9bf5ed0787cdc409
1 /*
2 * Block driver for the VMDK format
4 * Copyright (c) 2004 Fabrice Bellard
5 * Copyright (c) 2005 Filip Navara
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "qemu-common.h"
27 #include "block_int.h"
28 #include "module.h"
30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
33 typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44 } VMDK3Header;
46 typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
59 } __attribute__((packed)) VMDK4Header;
61 #define L2_CACHE_SIZE 16
63 typedef struct BDRVVmdkState {
64 BlockDriverState *hd;
65 int64_t l1_table_offset;
66 int64_t l1_backup_table_offset;
67 uint32_t *l1_table;
68 uint32_t *l1_backup_table;
69 unsigned int l1_size;
70 uint32_t l1_entry_sectors;
72 unsigned int l2_size;
73 uint32_t *l2_cache;
74 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
75 uint32_t l2_cache_counts[L2_CACHE_SIZE];
77 unsigned int cluster_sectors;
78 uint32_t parent_cid;
79 } BDRVVmdkState;
81 typedef struct VmdkMetaData {
82 uint32_t offset;
83 unsigned int l1_index;
84 unsigned int l2_index;
85 unsigned int l2_offset;
86 int valid;
87 } VmdkMetaData;
89 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
91 uint32_t magic;
93 if (buf_size < 4)
94 return 0;
95 magic = be32_to_cpu(*(uint32_t *)buf);
96 if (magic == VMDK3_MAGIC ||
97 magic == VMDK4_MAGIC)
98 return 100;
99 else
100 return 0;
103 #define CHECK_CID 1
105 #define SECTOR_SIZE 512
106 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
107 #define HEADER_SIZE 512 // first sector of 512 bytes
109 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
111 char desc[DESC_SIZE];
112 uint32_t cid;
113 const char *p_name, *cid_str;
114 size_t cid_str_size;
116 /* the descriptor offset = 0x200 */
117 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
118 return 0;
120 if (parent) {
121 cid_str = "parentCID";
122 cid_str_size = sizeof("parentCID");
123 } else {
124 cid_str = "CID";
125 cid_str_size = sizeof("CID");
128 if ((p_name = strstr(desc,cid_str)) != NULL) {
129 p_name += cid_str_size;
130 sscanf(p_name,"%x",&cid);
133 return cid;
136 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
138 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
139 char *p_name, *tmp_str;
141 /* the descriptor offset = 0x200 */
142 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
143 return -1;
145 tmp_str = strstr(desc,"parentCID");
146 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
147 if ((p_name = strstr(desc,"CID")) != NULL) {
148 p_name += sizeof("CID");
149 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
150 pstrcat(desc, sizeof(desc), tmp_desc);
153 if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
154 return -1;
155 return 0;
158 static int vmdk_is_cid_valid(BlockDriverState *bs)
160 #ifdef CHECK_CID
161 BDRVVmdkState *s = bs->opaque;
162 BlockDriverState *p_bs = bs->backing_hd;
163 uint32_t cur_pcid;
165 if (p_bs) {
166 cur_pcid = vmdk_read_cid(p_bs,0);
167 if (s->parent_cid != cur_pcid)
168 // CID not valid
169 return 0;
171 #endif
172 // CID valid
173 return 1;
176 static int vmdk_snapshot_create(const char *filename, const char *backing_file)
178 int snp_fd, p_fd;
179 int ret;
180 uint32_t p_cid;
181 char *p_name, *gd_buf, *rgd_buf;
182 const char *real_filename, *temp_str;
183 VMDK4Header header;
184 uint32_t gde_entries, gd_size;
185 int64_t gd_offset, rgd_offset, capacity, gt_size;
186 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
187 static const char desc_template[] =
188 "# Disk DescriptorFile\n"
189 "version=1\n"
190 "CID=%x\n"
191 "parentCID=%x\n"
192 "createType=\"monolithicSparse\"\n"
193 "parentFileNameHint=\"%s\"\n"
194 "\n"
195 "# Extent description\n"
196 "RW %u SPARSE \"%s\"\n"
197 "\n"
198 "# The Disk Data Base \n"
199 "#DDB\n"
200 "\n";
202 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
203 if (snp_fd < 0)
204 return -errno;
205 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
206 if (p_fd < 0) {
207 close(snp_fd);
208 return -errno;
211 /* read the header */
212 if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
213 ret = -errno;
214 goto fail;
216 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
217 ret = -errno;
218 goto fail;
221 /* write the header */
222 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
223 ret = -errno;
224 goto fail;
226 if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
227 ret = -errno;
228 goto fail;
231 memset(&header, 0, sizeof(header));
232 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
234 if (ftruncate(snp_fd, header.grain_offset << 9)) {
235 ret = -errno;
236 goto fail;
238 /* the descriptor offset = 0x200 */
239 if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
240 ret = -errno;
241 goto fail;
243 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
244 ret = -errno;
245 goto fail;
248 if ((p_name = strstr(p_desc,"CID")) != NULL) {
249 p_name += sizeof("CID");
250 sscanf(p_name,"%x",&p_cid);
253 real_filename = filename;
254 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
255 real_filename = temp_str + 1;
256 if ((temp_str = strrchr(real_filename, '/')) != NULL)
257 real_filename = temp_str + 1;
258 if ((temp_str = strrchr(real_filename, ':')) != NULL)
259 real_filename = temp_str + 1;
261 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
262 (uint32_t)header.capacity, real_filename);
264 /* write the descriptor */
265 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
266 ret = -errno;
267 goto fail;
269 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
270 ret = -errno;
271 goto fail;
274 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
275 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
276 capacity = header.capacity * SECTOR_SIZE; // Extent size
278 * Each GDE span 32M disk, means:
279 * 512 GTE per GT, each GTE points to grain
281 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
282 if (!gt_size) {
283 ret = -EINVAL;
284 goto fail;
286 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
287 gd_size = gde_entries * sizeof(uint32_t);
289 /* write RGD */
290 rgd_buf = qemu_malloc(gd_size);
291 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
292 ret = -errno;
293 goto fail_rgd;
295 if (read(p_fd, rgd_buf, gd_size) != gd_size) {
296 ret = -errno;
297 goto fail_rgd;
299 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
300 ret = -errno;
301 goto fail_rgd;
303 if (write(snp_fd, rgd_buf, gd_size) == -1) {
304 ret = -errno;
305 goto fail_rgd;
308 /* write GD */
309 gd_buf = qemu_malloc(gd_size);
310 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
311 ret = -errno;
312 goto fail_gd;
314 if (read(p_fd, gd_buf, gd_size) != gd_size) {
315 ret = -errno;
316 goto fail_gd;
318 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
319 ret = -errno;
320 goto fail_gd;
322 if (write(snp_fd, gd_buf, gd_size) == -1) {
323 ret = -errno;
324 goto fail_gd;
326 ret = 0;
328 fail_gd:
329 qemu_free(gd_buf);
330 fail_rgd:
331 qemu_free(rgd_buf);
332 fail:
333 close(p_fd);
334 close(snp_fd);
335 return ret;
338 static int vmdk_parent_open(BlockDriverState *bs)
340 char *p_name;
341 char desc[DESC_SIZE];
343 /* the descriptor offset = 0x200 */
344 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
345 return -1;
347 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
348 char *end_name;
350 p_name += sizeof("parentFileNameHint") + 1;
351 if ((end_name = strchr(p_name,'\"')) == NULL)
352 return -1;
353 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
354 return -1;
356 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
359 return 0;
362 static int vmdk_open(BlockDriverState *bs, int flags)
364 BDRVVmdkState *s = bs->opaque;
365 uint32_t magic;
366 int l1_size, i;
368 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
369 goto fail;
371 magic = be32_to_cpu(magic);
372 if (magic == VMDK3_MAGIC) {
373 VMDK3Header header;
375 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
376 goto fail;
377 s->cluster_sectors = le32_to_cpu(header.granularity);
378 s->l2_size = 1 << 9;
379 s->l1_size = 1 << 6;
380 bs->total_sectors = le32_to_cpu(header.disk_sectors);
381 s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
382 s->l1_backup_table_offset = 0;
383 s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
384 } else if (magic == VMDK4_MAGIC) {
385 VMDK4Header header;
387 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
388 goto fail;
389 bs->total_sectors = le64_to_cpu(header.capacity);
390 s->cluster_sectors = le64_to_cpu(header.granularity);
391 s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
392 s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
393 if (s->l1_entry_sectors <= 0)
394 goto fail;
395 s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
396 / s->l1_entry_sectors;
397 s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
398 s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
400 // try to open parent images, if exist
401 if (vmdk_parent_open(bs) != 0)
402 goto fail;
403 // write the CID once after the image creation
404 s->parent_cid = vmdk_read_cid(bs,1);
405 } else {
406 goto fail;
409 /* read the L1 table */
410 l1_size = s->l1_size * sizeof(uint32_t);
411 s->l1_table = qemu_malloc(l1_size);
412 if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
413 goto fail;
414 for(i = 0; i < s->l1_size; i++) {
415 le32_to_cpus(&s->l1_table[i]);
418 if (s->l1_backup_table_offset) {
419 s->l1_backup_table = qemu_malloc(l1_size);
420 if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
421 goto fail;
422 for(i = 0; i < s->l1_size; i++) {
423 le32_to_cpus(&s->l1_backup_table[i]);
427 s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
428 return 0;
429 fail:
430 qemu_free(s->l1_backup_table);
431 qemu_free(s->l1_table);
432 qemu_free(s->l2_cache);
433 return -1;
436 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
437 uint64_t offset, int allocate);
439 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
440 uint64_t offset, int allocate)
442 BDRVVmdkState *s = bs->opaque;
443 uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB
445 // we will be here if it's first write on non-exist grain(cluster).
446 // try to read from parent image, if exist
447 if (bs->backing_hd) {
448 int ret;
450 if (!vmdk_is_cid_valid(bs))
451 return -1;
453 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
454 s->cluster_sectors);
455 if (ret < 0) {
456 return -1;
459 //Write grain only into the active image
460 ret = bdrv_write(bs->file, cluster_offset, whole_grain,
461 s->cluster_sectors);
462 if (ret < 0) {
463 return -1;
466 return 0;
469 static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
471 BDRVVmdkState *s = bs->opaque;
473 /* update L2 table */
474 if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
475 &(m_data->offset), sizeof(m_data->offset)) < 0)
476 return -1;
477 /* update backup L2 table */
478 if (s->l1_backup_table_offset != 0) {
479 m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
480 if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
481 &(m_data->offset), sizeof(m_data->offset)) < 0)
482 return -1;
485 return 0;
488 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
489 uint64_t offset, int allocate)
491 BDRVVmdkState *s = bs->opaque;
492 unsigned int l1_index, l2_offset, l2_index;
493 int min_index, i, j;
494 uint32_t min_count, *l2_table, tmp = 0;
495 uint64_t cluster_offset;
497 if (m_data)
498 m_data->valid = 0;
500 l1_index = (offset >> 9) / s->l1_entry_sectors;
501 if (l1_index >= s->l1_size)
502 return 0;
503 l2_offset = s->l1_table[l1_index];
504 if (!l2_offset)
505 return 0;
506 for(i = 0; i < L2_CACHE_SIZE; i++) {
507 if (l2_offset == s->l2_cache_offsets[i]) {
508 /* increment the hit count */
509 if (++s->l2_cache_counts[i] == 0xffffffff) {
510 for(j = 0; j < L2_CACHE_SIZE; j++) {
511 s->l2_cache_counts[j] >>= 1;
514 l2_table = s->l2_cache + (i * s->l2_size);
515 goto found;
518 /* not found: load a new entry in the least used one */
519 min_index = 0;
520 min_count = 0xffffffff;
521 for(i = 0; i < L2_CACHE_SIZE; i++) {
522 if (s->l2_cache_counts[i] < min_count) {
523 min_count = s->l2_cache_counts[i];
524 min_index = i;
527 l2_table = s->l2_cache + (min_index * s->l2_size);
528 if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
529 s->l2_size * sizeof(uint32_t))
530 return 0;
532 s->l2_cache_offsets[min_index] = l2_offset;
533 s->l2_cache_counts[min_index] = 1;
534 found:
535 l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
536 cluster_offset = le32_to_cpu(l2_table[l2_index]);
538 if (!cluster_offset) {
539 if (!allocate)
540 return 0;
542 // Avoid the L2 tables update for the images that have snapshots.
543 cluster_offset = bdrv_getlength(bs->file);
544 bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
546 cluster_offset >>= 9;
547 tmp = cpu_to_le32(cluster_offset);
548 l2_table[l2_index] = tmp;
550 /* First of all we write grain itself, to avoid race condition
551 * that may to corrupt the image.
552 * This problem may occur because of insufficient space on host disk
553 * or inappropriate VM shutdown.
555 if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
556 return 0;
558 if (m_data) {
559 m_data->offset = tmp;
560 m_data->l1_index = l1_index;
561 m_data->l2_index = l2_index;
562 m_data->l2_offset = l2_offset;
563 m_data->valid = 1;
566 cluster_offset <<= 9;
567 return cluster_offset;
570 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
571 int nb_sectors, int *pnum)
573 BDRVVmdkState *s = bs->opaque;
574 int index_in_cluster, n;
575 uint64_t cluster_offset;
577 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
578 index_in_cluster = sector_num % s->cluster_sectors;
579 n = s->cluster_sectors - index_in_cluster;
580 if (n > nb_sectors)
581 n = nb_sectors;
582 *pnum = n;
583 return (cluster_offset != 0);
586 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
587 uint8_t *buf, int nb_sectors)
589 BDRVVmdkState *s = bs->opaque;
590 int index_in_cluster, n, ret;
591 uint64_t cluster_offset;
593 while (nb_sectors > 0) {
594 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
595 index_in_cluster = sector_num % s->cluster_sectors;
596 n = s->cluster_sectors - index_in_cluster;
597 if (n > nb_sectors)
598 n = nb_sectors;
599 if (!cluster_offset) {
600 // try to read from parent image, if exist
601 if (bs->backing_hd) {
602 if (!vmdk_is_cid_valid(bs))
603 return -1;
604 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
605 if (ret < 0)
606 return -1;
607 } else {
608 memset(buf, 0, 512 * n);
610 } else {
611 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
612 return -1;
614 nb_sectors -= n;
615 sector_num += n;
616 buf += n * 512;
618 return 0;
621 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
622 const uint8_t *buf, int nb_sectors)
624 BDRVVmdkState *s = bs->opaque;
625 VmdkMetaData m_data;
626 int index_in_cluster, n;
627 uint64_t cluster_offset;
628 static int cid_update = 0;
630 if (sector_num > bs->total_sectors) {
631 fprintf(stderr,
632 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
633 " total_sectors=0x%" PRIx64 "\n",
634 sector_num, bs->total_sectors);
635 return -1;
638 while (nb_sectors > 0) {
639 index_in_cluster = sector_num & (s->cluster_sectors - 1);
640 n = s->cluster_sectors - index_in_cluster;
641 if (n > nb_sectors)
642 n = nb_sectors;
643 cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
644 if (!cluster_offset)
645 return -1;
647 if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
648 return -1;
649 if (m_data.valid) {
650 /* update L2 tables */
651 if (vmdk_L2update(bs, &m_data) == -1)
652 return -1;
654 nb_sectors -= n;
655 sector_num += n;
656 buf += n * 512;
658 // update CID on the first write every time the virtual disk is opened
659 if (!cid_update) {
660 vmdk_write_cid(bs, time(NULL));
661 cid_update++;
664 return 0;
667 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
669 int fd, i;
670 VMDK4Header header;
671 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
672 static const char desc_template[] =
673 "# Disk DescriptorFile\n"
674 "version=1\n"
675 "CID=%x\n"
676 "parentCID=ffffffff\n"
677 "createType=\"monolithicSparse\"\n"
678 "\n"
679 "# Extent description\n"
680 "RW %" PRId64 " SPARSE \"%s\"\n"
681 "\n"
682 "# The Disk Data Base \n"
683 "#DDB\n"
684 "\n"
685 "ddb.virtualHWVersion = \"%d\"\n"
686 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
687 "ddb.geometry.heads = \"16\"\n"
688 "ddb.geometry.sectors = \"63\"\n"
689 "ddb.adapterType = \"ide\"\n";
690 char desc[1024];
691 const char *real_filename, *temp_str;
692 int64_t total_size = 0;
693 const char *backing_file = NULL;
694 int flags = 0;
695 int ret;
697 // Read out options
698 while (options && options->name) {
699 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
700 total_size = options->value.n / 512;
701 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
702 backing_file = options->value.s;
703 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
704 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
706 options++;
709 /* XXX: add support for backing file */
710 if (backing_file) {
711 return vmdk_snapshot_create(filename, backing_file);
714 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
715 0644);
716 if (fd < 0)
717 return -errno;
718 magic = cpu_to_be32(VMDK4_MAGIC);
719 memset(&header, 0, sizeof(header));
720 header.version = cpu_to_le32(1);
721 header.flags = cpu_to_le32(3); /* ?? */
722 header.capacity = cpu_to_le64(total_size);
723 header.granularity = cpu_to_le64(128);
724 header.num_gtes_per_gte = cpu_to_le32(512);
726 grains = (total_size + header.granularity - 1) / header.granularity;
727 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
728 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
729 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
731 header.desc_offset = 1;
732 header.desc_size = 20;
733 header.rgd_offset = header.desc_offset + header.desc_size;
734 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
735 header.grain_offset =
736 ((header.gd_offset + gd_size + (gt_size * gt_count) +
737 header.granularity - 1) / header.granularity) *
738 header.granularity;
740 header.desc_offset = cpu_to_le64(header.desc_offset);
741 header.desc_size = cpu_to_le64(header.desc_size);
742 header.rgd_offset = cpu_to_le64(header.rgd_offset);
743 header.gd_offset = cpu_to_le64(header.gd_offset);
744 header.grain_offset = cpu_to_le64(header.grain_offset);
746 header.check_bytes[0] = 0xa;
747 header.check_bytes[1] = 0x20;
748 header.check_bytes[2] = 0xd;
749 header.check_bytes[3] = 0xa;
751 /* write all the data */
752 ret = qemu_write_full(fd, &magic, sizeof(magic));
753 if (ret != sizeof(magic)) {
754 ret = -errno;
755 goto exit;
757 ret = qemu_write_full(fd, &header, sizeof(header));
758 if (ret != sizeof(header)) {
759 ret = -errno;
760 goto exit;
763 ret = ftruncate(fd, header.grain_offset << 9);
764 if (ret < 0) {
765 ret = -errno;
766 goto exit;
769 /* write grain directory */
770 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
771 for (i = 0, tmp = header.rgd_offset + gd_size;
772 i < gt_count; i++, tmp += gt_size) {
773 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
774 if (ret != sizeof(tmp)) {
775 ret = -errno;
776 goto exit;
780 /* write backup grain directory */
781 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
782 for (i = 0, tmp = header.gd_offset + gd_size;
783 i < gt_count; i++, tmp += gt_size) {
784 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
785 if (ret != sizeof(tmp)) {
786 ret = -errno;
787 goto exit;
791 /* compose the descriptor */
792 real_filename = filename;
793 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
794 real_filename = temp_str + 1;
795 if ((temp_str = strrchr(real_filename, '/')) != NULL)
796 real_filename = temp_str + 1;
797 if ((temp_str = strrchr(real_filename, ':')) != NULL)
798 real_filename = temp_str + 1;
799 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
800 total_size, real_filename,
801 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
802 total_size / (int64_t)(63 * 16));
804 /* write the descriptor */
805 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
806 ret = qemu_write_full(fd, desc, strlen(desc));
807 if (ret != strlen(desc)) {
808 ret = -errno;
809 goto exit;
812 ret = 0;
813 exit:
814 close(fd);
815 return ret;
818 static void vmdk_close(BlockDriverState *bs)
820 BDRVVmdkState *s = bs->opaque;
822 qemu_free(s->l1_table);
823 qemu_free(s->l2_cache);
826 static int vmdk_flush(BlockDriverState *bs)
828 return bdrv_flush(bs->file);
832 static QEMUOptionParameter vmdk_create_options[] = {
834 .name = BLOCK_OPT_SIZE,
835 .type = OPT_SIZE,
836 .help = "Virtual disk size"
839 .name = BLOCK_OPT_BACKING_FILE,
840 .type = OPT_STRING,
841 .help = "File name of a base image"
844 .name = BLOCK_OPT_COMPAT6,
845 .type = OPT_FLAG,
846 .help = "VMDK version 6 image"
848 { NULL }
851 static BlockDriver bdrv_vmdk = {
852 .format_name = "vmdk",
853 .instance_size = sizeof(BDRVVmdkState),
854 .bdrv_probe = vmdk_probe,
855 .bdrv_open = vmdk_open,
856 .bdrv_read = vmdk_read,
857 .bdrv_write = vmdk_write,
858 .bdrv_close = vmdk_close,
859 .bdrv_create = vmdk_create,
860 .bdrv_flush = vmdk_flush,
861 .bdrv_is_allocated = vmdk_is_allocated,
863 .create_options = vmdk_create_options,
866 static void bdrv_vmdk_init(void)
868 bdrv_register(&bdrv_vmdk);
871 block_init(bdrv_vmdk_init);