Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[qemu/ar7.git] / tests / image-fuzzer / qcow2 / layout.py
blob57ebe86e9a035948bc91a12d3ec7812a2c89b1d7
1 # Generator of fuzzed qcow2 images
3 # Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 import random
20 import struct
21 from . import fuzz
22 from math import ceil
23 from os import urandom
24 from itertools import chain
26 MAX_IMAGE_SIZE = 10 * (1 << 20)
27 # Standard sizes
28 UINT32_S = 4
29 UINT64_S = 8
32 class Field(object):
34 """Atomic image element (field).
36 The class represents an image field as quadruple of a data format
37 of value necessary for its packing to binary form, an offset from
38 the beginning of the image, a value and a name.
40 The field can be iterated as a list [format, offset, value, name].
41 """
43 __slots__ = ('fmt', 'offset', 'value', 'name')
45 def __init__(self, fmt, offset, val, name):
46 self.fmt = fmt
47 self.offset = offset
48 self.value = val
49 self.name = name
51 def __iter__(self):
52 return iter([self.fmt, self.offset, self.value, self.name])
54 def __repr__(self):
55 return "Field(fmt=%r, offset=%r, value=%r, name=%r)" % \
56 (self.fmt, self.offset, self.value, self.name)
59 class FieldsList(object):
61 """List of fields.
63 The class allows access to a field in the list by its name.
64 """
66 def __init__(self, meta_data=None):
67 if meta_data is None:
68 self.data = []
69 else:
70 self.data = [Field(*f)
71 for f in meta_data]
73 def __getitem__(self, name):
74 return [x for x in self.data if x.name == name]
76 def __iter__(self):
77 return iter(self.data)
79 def __len__(self):
80 return len(self.data)
83 class Image(object):
85 """ Qcow2 image object.
87 This class allows to create qcow2 images with random valid structures and
88 values, fuzz them via external qcow2.fuzz module and write the result to
89 a file.
90 """
92 def __init__(self, backing_file_name=None):
93 """Create a random valid qcow2 image with the correct header and stored
94 backing file name.
95 """
96 cluster_bits, self.image_size = self._size_params()
97 self.cluster_size = 1 << cluster_bits
98 self.header = FieldsList()
99 self.backing_file_name = FieldsList()
100 self.backing_file_format = FieldsList()
101 self.feature_name_table = FieldsList()
102 self.end_of_extension_area = FieldsList()
103 self.l2_tables = FieldsList()
104 self.l1_table = FieldsList()
105 self.refcount_table = FieldsList()
106 self.refcount_blocks = FieldsList()
107 self.ext_offset = 0
108 self.create_header(cluster_bits, backing_file_name)
109 self.set_backing_file_name(backing_file_name)
110 self.data_clusters = self._alloc_data(self.image_size,
111 self.cluster_size)
112 # Percentage of fields will be fuzzed
113 self.bias = random.uniform(0.2, 0.5)
115 def __iter__(self):
116 return chain(self.header, self.backing_file_format,
117 self.feature_name_table, self.end_of_extension_area,
118 self.backing_file_name, self.l1_table, self.l2_tables,
119 self.refcount_table, self.refcount_blocks)
121 def create_header(self, cluster_bits, backing_file_name=None):
122 """Generate a random valid header."""
123 meta_header = [
124 ['>4s', 0, b"QFI\xfb", 'magic'],
125 ['>I', 4, random.randint(2, 3), 'version'],
126 ['>Q', 8, 0, 'backing_file_offset'],
127 ['>I', 16, 0, 'backing_file_size'],
128 ['>I', 20, cluster_bits, 'cluster_bits'],
129 ['>Q', 24, self.image_size, 'size'],
130 ['>I', 32, 0, 'crypt_method'],
131 ['>I', 36, 0, 'l1_size'],
132 ['>Q', 40, 0, 'l1_table_offset'],
133 ['>Q', 48, 0, 'refcount_table_offset'],
134 ['>I', 56, 0, 'refcount_table_clusters'],
135 ['>I', 60, 0, 'nb_snapshots'],
136 ['>Q', 64, 0, 'snapshots_offset'],
137 ['>Q', 72, 0, 'incompatible_features'],
138 ['>Q', 80, 0, 'compatible_features'],
139 ['>Q', 88, 0, 'autoclear_features'],
140 # Only refcount_order = 4 is supported by current (07.2014)
141 # implementation of QEMU
142 ['>I', 96, 4, 'refcount_order'],
143 ['>I', 100, 0, 'header_length']
145 self.header = FieldsList(meta_header)
147 if self.header['version'][0].value == 2:
148 self.header['header_length'][0].value = 72
149 else:
150 self.header['incompatible_features'][0].value = \
151 random.getrandbits(2)
152 self.header['compatible_features'][0].value = random.getrandbits(1)
153 self.header['header_length'][0].value = 104
154 # Extensions start at the header last field offset and the field size
155 self.ext_offset = struct.calcsize(
156 self.header['header_length'][0].fmt) + \
157 self.header['header_length'][0].offset
158 end_of_extension_area_len = 2 * UINT32_S
159 free_space = self.cluster_size - self.ext_offset - \
160 end_of_extension_area_len
161 # If the backing file name specified and there is enough space for it
162 # in the first cluster, then it's placed in the very end of the first
163 # cluster.
164 if (backing_file_name is not None) and \
165 (free_space >= len(backing_file_name)):
166 self.header['backing_file_size'][0].value = len(backing_file_name)
167 self.header['backing_file_offset'][0].value = \
168 self.cluster_size - len(backing_file_name)
170 def set_backing_file_name(self, backing_file_name=None):
171 """Add the name of the backing file at the offset specified
172 in the header.
174 if (backing_file_name is not None) and \
175 (not self.header['backing_file_offset'][0].value == 0):
176 data_len = len(backing_file_name)
177 data_fmt = '>' + str(data_len) + 's'
178 self.backing_file_name = FieldsList([
179 [data_fmt, self.header['backing_file_offset'][0].value,
180 backing_file_name, 'bf_name']
183 def set_backing_file_format(self, backing_file_fmt=None):
184 """Generate the header extension for the backing file format."""
185 if backing_file_fmt is not None:
186 # Calculation of the free space available in the first cluster
187 end_of_extension_area_len = 2 * UINT32_S
188 high_border = (self.header['backing_file_offset'][0].value or
189 (self.cluster_size - 1)) - \
190 end_of_extension_area_len
191 free_space = high_border - self.ext_offset
192 ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7)
194 if free_space >= ext_size:
195 ext_data_len = len(backing_file_fmt)
196 ext_data_fmt = '>' + str(ext_data_len) + 's'
197 ext_padding_len = 7 - (ext_data_len - 1) % 8
198 self.backing_file_format = FieldsList([
199 ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'],
200 ['>I', self.ext_offset + UINT32_S, ext_data_len,
201 'ext_length'],
202 [ext_data_fmt, self.ext_offset + UINT32_S * 2,
203 backing_file_fmt, 'bf_format']
205 self.ext_offset = \
206 struct.calcsize(
207 self.backing_file_format['bf_format'][0].fmt) + \
208 ext_padding_len + \
209 self.backing_file_format['bf_format'][0].offset
211 def create_feature_name_table(self):
212 """Generate a random header extension for names of features used in
213 the image.
215 def gen_feat_ids():
216 """Return random feature type and feature bit."""
217 return (random.randint(0, 2), random.randint(0, 63))
219 end_of_extension_area_len = 2 * UINT32_S
220 high_border = (self.header['backing_file_offset'][0].value or
221 (self.cluster_size - 1)) - \
222 end_of_extension_area_len
223 free_space = high_border - self.ext_offset
224 # Sum of sizes of 'magic' and 'length' header extension fields
225 ext_header_len = 2 * UINT32_S
226 fnt_entry_size = 6 * UINT64_S
227 num_fnt_entries = min(10, (free_space - ext_header_len) /
228 fnt_entry_size)
229 if not num_fnt_entries == 0:
230 feature_tables = []
231 feature_ids = []
232 inner_offset = self.ext_offset + ext_header_len
233 feat_name = b'some cool feature'
234 while len(feature_tables) < num_fnt_entries * 3:
235 feat_type, feat_bit = gen_feat_ids()
236 # Remove duplicates
237 while (feat_type, feat_bit) in feature_ids:
238 feat_type, feat_bit = gen_feat_ids()
239 feature_ids.append((feat_type, feat_bit))
240 feat_fmt = '>' + str(len(feat_name)) + 's'
241 feature_tables += [['B', inner_offset,
242 feat_type, 'feature_type'],
243 ['B', inner_offset + 1, feat_bit,
244 'feature_bit_number'],
245 [feat_fmt, inner_offset + 2,
246 feat_name, 'feature_name']
248 inner_offset += fnt_entry_size
249 # No padding for the extension is necessary, because
250 # the extension length is multiple of 8
251 self.feature_name_table = FieldsList([
252 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'],
253 # One feature table contains 3 fields and takes 48 bytes
254 ['>I', self.ext_offset + UINT32_S,
255 len(feature_tables) // 3 * 48, 'ext_length']
256 ] + feature_tables)
257 self.ext_offset = inner_offset
259 def set_end_of_extension_area(self):
260 """Generate a mandatory header extension marking end of header
261 extensions.
263 self.end_of_extension_area = FieldsList([
264 ['>I', self.ext_offset, 0, 'ext_magic'],
265 ['>I', self.ext_offset + UINT32_S, 0, 'ext_length']
268 def create_l_structures(self):
269 """Generate random valid L1 and L2 tables."""
270 def create_l2_entry(host, guest, l2_cluster):
271 """Generate one L2 entry."""
272 offset = l2_cluster * self.cluster_size
273 l2_size = self.cluster_size // UINT64_S
274 entry_offset = offset + UINT64_S * (guest % l2_size)
275 cluster_descriptor = host * self.cluster_size
276 if not self.header['version'][0].value == 2:
277 cluster_descriptor += random.randint(0, 1)
278 # While snapshots are not supported, bit #63 = 1
279 # Compressed clusters are not supported => bit #62 = 0
280 entry_val = (1 << 63) + cluster_descriptor
281 return ['>Q', entry_offset, entry_val, 'l2_entry']
283 def create_l1_entry(l2_cluster, l1_offset, guest):
284 """Generate one L1 entry."""
285 l2_size = self.cluster_size // UINT64_S
286 entry_offset = l1_offset + UINT64_S * (guest // l2_size)
287 # While snapshots are not supported bit #63 = 1
288 entry_val = (1 << 63) + l2_cluster * self.cluster_size
289 return ['>Q', entry_offset, entry_val, 'l1_entry']
291 if len(self.data_clusters) == 0:
292 # All metadata for an empty guest image needs 4 clusters:
293 # header, rfc table, rfc block, L1 table.
294 # Header takes cluster #0, other clusters ##1-3 can be used
295 l1_offset = random.randint(1, 3) * self.cluster_size
296 l1 = [['>Q', l1_offset, 0, 'l1_entry']]
297 l2 = []
298 else:
299 meta_data = self._get_metadata()
300 guest_clusters = random.sample(range(self.image_size //
301 self.cluster_size),
302 len(self.data_clusters))
303 # Number of entries in a L1/L2 table
304 l_size = self.cluster_size // UINT64_S
305 # Number of clusters necessary for L1 table
306 l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2)))
307 l1_start = self._get_adjacent_clusters(self.data_clusters |
308 meta_data, l1_size)
309 meta_data |= set(range(l1_start, l1_start + l1_size))
310 l1_offset = l1_start * self.cluster_size
311 # Indices of L2 tables
312 l2_ids = []
313 # Host clusters allocated for L2 tables
314 l2_clusters = []
315 # L1 entries
316 l1 = []
317 # L2 entries
318 l2 = []
319 for host, guest in zip(self.data_clusters, guest_clusters):
320 l2_id = guest // l_size
321 if l2_id not in l2_ids:
322 l2_ids.append(l2_id)
323 l2_clusters.append(self._get_adjacent_clusters(
324 self.data_clusters | meta_data | set(l2_clusters),
326 l1.append(create_l1_entry(l2_clusters[-1], l1_offset,
327 guest))
328 l2.append(create_l2_entry(host, guest,
329 l2_clusters[l2_ids.index(l2_id)]))
330 self.l2_tables = FieldsList(l2)
331 self.l1_table = FieldsList(l1)
332 self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size /
333 float(self.cluster_size**2)))
334 self.header['l1_table_offset'][0].value = l1_offset
336 def create_refcount_structures(self):
337 """Generate random refcount blocks and refcount table."""
338 def allocate_rfc_blocks(data, size):
339 """Return indices of clusters allocated for refcount blocks."""
340 cluster_ids = set()
341 diff = block_ids = set([x // size for x in data])
342 while len(diff) != 0:
343 # Allocate all yet not allocated clusters
344 new = self._get_available_clusters(data | cluster_ids,
345 len(diff))
346 # Indices of new refcount blocks necessary to cover clusters
347 # in 'new'
348 diff = set([x // size for x in new]) - block_ids
349 cluster_ids |= new
350 block_ids |= diff
351 return cluster_ids, block_ids
353 def allocate_rfc_table(data, init_blocks, block_size):
354 """Return indices of clusters allocated for the refcount table
355 and updated indices of clusters allocated for blocks and indices
356 of blocks.
358 blocks = set(init_blocks)
359 clusters = set()
360 # Number of entries in one cluster of the refcount table
361 size = self.cluster_size // UINT64_S
362 # Number of clusters necessary for the refcount table based on
363 # the current number of refcount blocks
364 table_size = int(ceil((max(blocks) + 1) / float(size)))
365 # Index of the first cluster of the refcount table
366 table_start = self._get_adjacent_clusters(data, table_size + 1)
367 # Clusters allocated for the current length of the refcount table
368 table_clusters = set(range(table_start, table_start + table_size))
369 # Clusters allocated for the refcount table including
370 # last optional one for potential l1 growth
371 table_clusters_allocated = set(range(table_start, table_start +
372 table_size + 1))
373 # New refcount blocks necessary for clusters occupied by the
374 # refcount table
375 diff = set([c // block_size for c in table_clusters]) - blocks
376 blocks |= diff
377 while len(diff) != 0:
378 # Allocate clusters for new refcount blocks
379 new = self._get_available_clusters((data | clusters) |
380 table_clusters_allocated,
381 len(diff))
382 # Indices of new refcount blocks necessary to cover
383 # clusters in 'new'
384 diff = set([x // block_size for x in new]) - blocks
385 clusters |= new
386 blocks |= diff
387 # Check if the refcount table needs one more cluster
388 if int(ceil((max(blocks) + 1) / float(size))) > table_size:
389 new_block_id = (table_start + table_size) // block_size
390 # Check if the additional table cluster needs
391 # one more refcount block
392 if new_block_id not in blocks:
393 diff.add(new_block_id)
394 table_clusters.add(table_start + table_size)
395 table_size += 1
396 return table_clusters, blocks, clusters
398 def create_table_entry(table_offset, block_cluster, block_size,
399 cluster):
400 """Generate a refcount table entry."""
401 offset = table_offset + UINT64_S * (cluster // block_size)
402 return ['>Q', offset, block_cluster * self.cluster_size,
403 'refcount_table_entry']
405 def create_block_entry(block_cluster, block_size, cluster):
406 """Generate a list of entries for the current block."""
407 entry_size = self.cluster_size // block_size
408 offset = block_cluster * self.cluster_size
409 entry_offset = offset + entry_size * (cluster % block_size)
410 # While snapshots are not supported all refcounts are set to 1
411 return ['>H', entry_offset, 1, 'refcount_block_entry']
412 # Size of a block entry in bits
413 refcount_bits = 1 << self.header['refcount_order'][0].value
414 # Number of refcount entries per refcount block
415 # Convert self.cluster_size from bytes to bits to have the same
416 # base for the numerator and denominator
417 block_size = self.cluster_size * 8 // refcount_bits
418 meta_data = self._get_metadata()
419 if len(self.data_clusters) == 0:
420 # All metadata for an empty guest image needs 4 clusters:
421 # header, rfc table, rfc block, L1 table.
422 # Header takes cluster #0, other clusters ##1-3 can be used
423 block_clusters = set([random.choice(list(set(range(1, 4)) -
424 meta_data))])
425 block_ids = set([0])
426 table_clusters = set([random.choice(list(set(range(1, 4)) -
427 meta_data -
428 block_clusters))])
429 else:
430 block_clusters, block_ids = \
431 allocate_rfc_blocks(self.data_clusters |
432 meta_data, block_size)
433 table_clusters, block_ids, new_clusters = \
434 allocate_rfc_table(self.data_clusters |
435 meta_data |
436 block_clusters,
437 block_ids,
438 block_size)
439 block_clusters |= new_clusters
441 meta_data |= block_clusters | table_clusters
442 table_offset = min(table_clusters) * self.cluster_size
443 block_id = None
444 # Clusters allocated for refcount blocks
445 block_clusters = list(block_clusters)
446 # Indices of refcount blocks
447 block_ids = list(block_ids)
448 # Refcount table entries
449 rfc_table = []
450 # Refcount entries
451 rfc_blocks = []
453 for cluster in sorted(self.data_clusters | meta_data):
454 if cluster // block_size != block_id:
455 block_id = cluster // block_size
456 block_cluster = block_clusters[block_ids.index(block_id)]
457 rfc_table.append(create_table_entry(table_offset,
458 block_cluster,
459 block_size, cluster))
460 rfc_blocks.append(create_block_entry(block_cluster, block_size,
461 cluster))
462 self.refcount_table = FieldsList(rfc_table)
463 self.refcount_blocks = FieldsList(rfc_blocks)
465 self.header['refcount_table_offset'][0].value = table_offset
466 self.header['refcount_table_clusters'][0].value = len(table_clusters)
468 def fuzz(self, fields_to_fuzz=None):
469 """Fuzz an image by corrupting values of a random subset of its fields.
471 Without parameters the method fuzzes an entire image.
473 If 'fields_to_fuzz' is specified then only fields in this list will be
474 fuzzed. 'fields_to_fuzz' can contain both individual fields and more
475 general image elements as a header or tables.
477 In the first case the field will be fuzzed always.
478 In the second a random subset of fields will be selected and fuzzed.
480 def coin():
481 """Return boolean value proportional to a portion of fields to be
482 fuzzed.
484 return random.random() < self.bias
486 if fields_to_fuzz is None:
487 for field in self:
488 if coin():
489 field.value = getattr(fuzz, field.name)(field.value)
490 else:
491 for item in fields_to_fuzz:
492 if len(item) == 1:
493 for field in getattr(self, item[0]):
494 if coin():
495 field.value = getattr(fuzz,
496 field.name)(field.value)
497 else:
498 # If fields with the requested name were not generated
499 # getattr(self, item[0])[item[1]] returns an empty list
500 for field in getattr(self, item[0])[item[1]]:
501 field.value = getattr(fuzz, field.name)(field.value)
503 def write(self, filename):
504 """Write an entire image to the file."""
505 image_file = open(filename, 'wb')
506 for field in self:
507 image_file.seek(field.offset)
508 image_file.write(struct.pack(field.fmt, field.value))
510 for cluster in sorted(self.data_clusters):
511 image_file.seek(cluster * self.cluster_size)
512 image_file.write(urandom(self.cluster_size))
514 # Align the real image size to the cluster size
515 image_file.seek(0, 2)
516 size = image_file.tell()
517 rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1)
518 if rounded > size:
519 image_file.seek(rounded - 1)
520 image_file.write(b'\x00')
521 image_file.close()
523 @staticmethod
524 def _size_params():
525 """Generate a random image size aligned to a random correct
526 cluster size.
528 cluster_bits = random.randrange(9, 21)
529 cluster_size = 1 << cluster_bits
530 img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size)
531 return (cluster_bits, img_size)
533 @staticmethod
534 def _get_available_clusters(used, number):
535 """Return a set of indices of not allocated clusters.
537 'used' contains indices of currently allocated clusters.
538 All clusters that cannot be allocated between 'used' clusters will have
539 indices appended to the end of 'used'.
541 append_id = max(used) + 1
542 free = set(range(1, append_id)) - used
543 if len(free) >= number:
544 return set(random.sample(free, number))
545 else:
546 return free | set(range(append_id, append_id + number - len(free)))
548 @staticmethod
549 def _get_adjacent_clusters(used, size):
550 """Return an index of the first cluster in the sequence of free ones.
552 'used' contains indices of currently allocated clusters. 'size' is the
553 length of the sequence of free clusters.
554 If the sequence of 'size' is not available between 'used' clusters, its
555 first index will be append to the end of 'used'.
557 def get_cluster_id(lst, length):
558 """Return the first index of the sequence of the specified length
559 or None if the sequence cannot be inserted in the list.
561 if len(lst) != 0:
562 pairs = []
563 pair = (lst[0], 1)
564 for i in range(1, len(lst)):
565 if lst[i] == lst[i-1] + 1:
566 pair = (lst[i], pair[1] + 1)
567 else:
568 pairs.append(pair)
569 pair = (lst[i], 1)
570 pairs.append(pair)
571 random.shuffle(pairs)
572 for x, s in pairs:
573 if s >= length:
574 return x - length + 1
575 return None
577 append_id = max(used) + 1
578 free = list(set(range(1, append_id)) - used)
579 idx = get_cluster_id(free, size)
580 if idx is None:
581 return append_id
582 else:
583 return idx
585 @staticmethod
586 def _alloc_data(img_size, cluster_size):
587 """Return a set of random indices of clusters allocated for guest data.
589 num_of_cls = img_size // cluster_size
590 return set(random.sample(range(1, num_of_cls + 1),
591 random.randint(0, num_of_cls)))
593 def _get_metadata(self):
594 """Return indices of clusters allocated for image metadata."""
595 ids = set()
596 for x in self:
597 ids.add(x.offset // self.cluster_size)
598 return ids
601 def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
602 fields_to_fuzz=None):
603 """Create a fuzzed image and write it to the specified file."""
604 image = Image(backing_file_name.encode())
605 image.set_backing_file_format(backing_file_fmt.encode())
606 image.create_feature_name_table()
607 image.set_end_of_extension_area()
608 image.create_l_structures()
609 image.create_refcount_structures()
610 image.fuzz(fields_to_fuzz)
611 image.write(test_img_path)
612 return image.image_size