chardev: ensure termios is fully initialized
[qemu/ar7.git] / tests / image-fuzzer / qcow2 / layout.py
blob675877da96194ad5172aa13f146db2a955060f3a
1 # Generator of fuzzed qcow2 images
3 # Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 from __future__ import absolute_import
20 import random
21 import struct
22 from . import fuzz
23 from math import ceil
24 from os import urandom
25 from itertools import chain
27 MAX_IMAGE_SIZE = 10 * (1 << 20)
28 # Standard sizes
29 UINT32_S = 4
30 UINT64_S = 8
33 class Field(object):
35 """Atomic image element (field).
37 The class represents an image field as quadruple of a data format
38 of value necessary for its packing to binary form, an offset from
39 the beginning of the image, a value and a name.
41 The field can be iterated as a list [format, offset, value, name].
42 """
44 __slots__ = ('fmt', 'offset', 'value', 'name')
46 def __init__(self, fmt, offset, val, name):
47 self.fmt = fmt
48 self.offset = offset
49 self.value = val
50 self.name = name
52 def __iter__(self):
53 return iter([self.fmt, self.offset, self.value, self.name])
55 def __repr__(self):
56 return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
57 (self.fmt, self.offset, str(self.value), self.name)
60 class FieldsList(object):
62 """List of fields.
64 The class allows access to a field in the list by its name.
65 """
67 def __init__(self, meta_data=None):
68 if meta_data is None:
69 self.data = []
70 else:
71 self.data = [Field(*f)
72 for f in meta_data]
74 def __getitem__(self, name):
75 return [x for x in self.data if x.name == name]
77 def __iter__(self):
78 return iter(self.data)
80 def __len__(self):
81 return len(self.data)
84 class Image(object):
86 """ Qcow2 image object.
88 This class allows to create qcow2 images with random valid structures and
89 values, fuzz them via external qcow2.fuzz module and write the result to
90 a file.
91 """
93 def __init__(self, backing_file_name=None):
94 """Create a random valid qcow2 image with the correct header and stored
95 backing file name.
96 """
97 cluster_bits, self.image_size = self._size_params()
98 self.cluster_size = 1 << cluster_bits
99 self.header = FieldsList()
100 self.backing_file_name = FieldsList()
101 self.backing_file_format = FieldsList()
102 self.feature_name_table = FieldsList()
103 self.end_of_extension_area = FieldsList()
104 self.l2_tables = FieldsList()
105 self.l1_table = FieldsList()
106 self.refcount_table = FieldsList()
107 self.refcount_blocks = FieldsList()
108 self.ext_offset = 0
109 self.create_header(cluster_bits, backing_file_name)
110 self.set_backing_file_name(backing_file_name)
111 self.data_clusters = self._alloc_data(self.image_size,
112 self.cluster_size)
113 # Percentage of fields will be fuzzed
114 self.bias = random.uniform(0.2, 0.5)
116 def __iter__(self):
117 return chain(self.header, self.backing_file_format,
118 self.feature_name_table, self.end_of_extension_area,
119 self.backing_file_name, self.l1_table, self.l2_tables,
120 self.refcount_table, self.refcount_blocks)
122 def create_header(self, cluster_bits, backing_file_name=None):
123 """Generate a random valid header."""
124 meta_header = [
125 ['>4s', 0, "QFI\xfb", 'magic'],
126 ['>I', 4, random.randint(2, 3), 'version'],
127 ['>Q', 8, 0, 'backing_file_offset'],
128 ['>I', 16, 0, 'backing_file_size'],
129 ['>I', 20, cluster_bits, 'cluster_bits'],
130 ['>Q', 24, self.image_size, 'size'],
131 ['>I', 32, 0, 'crypt_method'],
132 ['>I', 36, 0, 'l1_size'],
133 ['>Q', 40, 0, 'l1_table_offset'],
134 ['>Q', 48, 0, 'refcount_table_offset'],
135 ['>I', 56, 0, 'refcount_table_clusters'],
136 ['>I', 60, 0, 'nb_snapshots'],
137 ['>Q', 64, 0, 'snapshots_offset'],
138 ['>Q', 72, 0, 'incompatible_features'],
139 ['>Q', 80, 0, 'compatible_features'],
140 ['>Q', 88, 0, 'autoclear_features'],
141 # Only refcount_order = 4 is supported by current (07.2014)
142 # implementation of QEMU
143 ['>I', 96, 4, 'refcount_order'],
144 ['>I', 100, 0, 'header_length']
146 self.header = FieldsList(meta_header)
148 if self.header['version'][0].value == 2:
149 self.header['header_length'][0].value = 72
150 else:
151 self.header['incompatible_features'][0].value = \
152 random.getrandbits(2)
153 self.header['compatible_features'][0].value = random.getrandbits(1)
154 self.header['header_length'][0].value = 104
155 # Extensions start at the header last field offset and the field size
156 self.ext_offset = struct.calcsize(
157 self.header['header_length'][0].fmt) + \
158 self.header['header_length'][0].offset
159 end_of_extension_area_len = 2 * UINT32_S
160 free_space = self.cluster_size - self.ext_offset - \
161 end_of_extension_area_len
162 # If the backing file name specified and there is enough space for it
163 # in the first cluster, then it's placed in the very end of the first
164 # cluster.
165 if (backing_file_name is not None) and \
166 (free_space >= len(backing_file_name)):
167 self.header['backing_file_size'][0].value = len(backing_file_name)
168 self.header['backing_file_offset'][0].value = \
169 self.cluster_size - len(backing_file_name)
171 def set_backing_file_name(self, backing_file_name=None):
172 """Add the name of the backing file at the offset specified
173 in the header.
175 if (backing_file_name is not None) and \
176 (not self.header['backing_file_offset'][0].value == 0):
177 data_len = len(backing_file_name)
178 data_fmt = '>' + str(data_len) + 's'
179 self.backing_file_name = FieldsList([
180 [data_fmt, self.header['backing_file_offset'][0].value,
181 backing_file_name, 'bf_name']
184 def set_backing_file_format(self, backing_file_fmt=None):
185 """Generate the header extension for the backing file format."""
186 if backing_file_fmt is not None:
187 # Calculation of the free space available in the first cluster
188 end_of_extension_area_len = 2 * UINT32_S
189 high_border = (self.header['backing_file_offset'][0].value or
190 (self.cluster_size - 1)) - \
191 end_of_extension_area_len
192 free_space = high_border - self.ext_offset
193 ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7)
195 if free_space >= ext_size:
196 ext_data_len = len(backing_file_fmt)
197 ext_data_fmt = '>' + str(ext_data_len) + 's'
198 ext_padding_len = 7 - (ext_data_len - 1) % 8
199 self.backing_file_format = FieldsList([
200 ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'],
201 ['>I', self.ext_offset + UINT32_S, ext_data_len,
202 'ext_length'],
203 [ext_data_fmt, self.ext_offset + UINT32_S * 2,
204 backing_file_fmt, 'bf_format']
206 self.ext_offset = \
207 struct.calcsize(
208 self.backing_file_format['bf_format'][0].fmt) + \
209 ext_padding_len + \
210 self.backing_file_format['bf_format'][0].offset
212 def create_feature_name_table(self):
213 """Generate a random header extension for names of features used in
214 the image.
216 def gen_feat_ids():
217 """Return random feature type and feature bit."""
218 return (random.randint(0, 2), random.randint(0, 63))
220 end_of_extension_area_len = 2 * UINT32_S
221 high_border = (self.header['backing_file_offset'][0].value or
222 (self.cluster_size - 1)) - \
223 end_of_extension_area_len
224 free_space = high_border - self.ext_offset
225 # Sum of sizes of 'magic' and 'length' header extension fields
226 ext_header_len = 2 * UINT32_S
227 fnt_entry_size = 6 * UINT64_S
228 num_fnt_entries = min(10, (free_space - ext_header_len) /
229 fnt_entry_size)
230 if not num_fnt_entries == 0:
231 feature_tables = []
232 feature_ids = []
233 inner_offset = self.ext_offset + ext_header_len
234 feat_name = 'some cool feature'
235 while len(feature_tables) < num_fnt_entries * 3:
236 feat_type, feat_bit = gen_feat_ids()
237 # Remove duplicates
238 while (feat_type, feat_bit) in feature_ids:
239 feat_type, feat_bit = gen_feat_ids()
240 feature_ids.append((feat_type, feat_bit))
241 feat_fmt = '>' + str(len(feat_name)) + 's'
242 feature_tables += [['B', inner_offset,
243 feat_type, 'feature_type'],
244 ['B', inner_offset + 1, feat_bit,
245 'feature_bit_number'],
246 [feat_fmt, inner_offset + 2,
247 feat_name, 'feature_name']
249 inner_offset += fnt_entry_size
250 # No padding for the extension is necessary, because
251 # the extension length is multiple of 8
252 self.feature_name_table = FieldsList([
253 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'],
254 # One feature table contains 3 fields and takes 48 bytes
255 ['>I', self.ext_offset + UINT32_S,
256 len(feature_tables) / 3 * 48, 'ext_length']
257 ] + feature_tables)
258 self.ext_offset = inner_offset
260 def set_end_of_extension_area(self):
261 """Generate a mandatory header extension marking end of header
262 extensions.
264 self.end_of_extension_area = FieldsList([
265 ['>I', self.ext_offset, 0, 'ext_magic'],
266 ['>I', self.ext_offset + UINT32_S, 0, 'ext_length']
269 def create_l_structures(self):
270 """Generate random valid L1 and L2 tables."""
271 def create_l2_entry(host, guest, l2_cluster):
272 """Generate one L2 entry."""
273 offset = l2_cluster * self.cluster_size
274 l2_size = self.cluster_size / UINT64_S
275 entry_offset = offset + UINT64_S * (guest % l2_size)
276 cluster_descriptor = host * self.cluster_size
277 if not self.header['version'][0].value == 2:
278 cluster_descriptor += random.randint(0, 1)
279 # While snapshots are not supported, bit #63 = 1
280 # Compressed clusters are not supported => bit #62 = 0
281 entry_val = (1 << 63) + cluster_descriptor
282 return ['>Q', entry_offset, entry_val, 'l2_entry']
284 def create_l1_entry(l2_cluster, l1_offset, guest):
285 """Generate one L1 entry."""
286 l2_size = self.cluster_size / UINT64_S
287 entry_offset = l1_offset + UINT64_S * (guest / l2_size)
288 # While snapshots are not supported bit #63 = 1
289 entry_val = (1 << 63) + l2_cluster * self.cluster_size
290 return ['>Q', entry_offset, entry_val, 'l1_entry']
292 if len(self.data_clusters) == 0:
293 # All metadata for an empty guest image needs 4 clusters:
294 # header, rfc table, rfc block, L1 table.
295 # Header takes cluster #0, other clusters ##1-3 can be used
296 l1_offset = random.randint(1, 3) * self.cluster_size
297 l1 = [['>Q', l1_offset, 0, 'l1_entry']]
298 l2 = []
299 else:
300 meta_data = self._get_metadata()
301 guest_clusters = random.sample(range(self.image_size /
302 self.cluster_size),
303 len(self.data_clusters))
304 # Number of entries in a L1/L2 table
305 l_size = self.cluster_size / UINT64_S
306 # Number of clusters necessary for L1 table
307 l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2)))
308 l1_start = self._get_adjacent_clusters(self.data_clusters |
309 meta_data, l1_size)
310 meta_data |= set(range(l1_start, l1_start + l1_size))
311 l1_offset = l1_start * self.cluster_size
312 # Indices of L2 tables
313 l2_ids = []
314 # Host clusters allocated for L2 tables
315 l2_clusters = []
316 # L1 entries
317 l1 = []
318 # L2 entries
319 l2 = []
320 for host, guest in zip(self.data_clusters, guest_clusters):
321 l2_id = guest / l_size
322 if l2_id not in l2_ids:
323 l2_ids.append(l2_id)
324 l2_clusters.append(self._get_adjacent_clusters(
325 self.data_clusters | meta_data | set(l2_clusters),
327 l1.append(create_l1_entry(l2_clusters[-1], l1_offset,
328 guest))
329 l2.append(create_l2_entry(host, guest,
330 l2_clusters[l2_ids.index(l2_id)]))
331 self.l2_tables = FieldsList(l2)
332 self.l1_table = FieldsList(l1)
333 self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size /
334 float(self.cluster_size**2)))
335 self.header['l1_table_offset'][0].value = l1_offset
337 def create_refcount_structures(self):
338 """Generate random refcount blocks and refcount table."""
339 def allocate_rfc_blocks(data, size):
340 """Return indices of clusters allocated for refcount blocks."""
341 cluster_ids = set()
342 diff = block_ids = set([x / size for x in data])
343 while len(diff) != 0:
344 # Allocate all yet not allocated clusters
345 new = self._get_available_clusters(data | cluster_ids,
346 len(diff))
347 # Indices of new refcount blocks necessary to cover clusters
348 # in 'new'
349 diff = set([x / size for x in new]) - block_ids
350 cluster_ids |= new
351 block_ids |= diff
352 return cluster_ids, block_ids
354 def allocate_rfc_table(data, init_blocks, block_size):
355 """Return indices of clusters allocated for the refcount table
356 and updated indices of clusters allocated for blocks and indices
357 of blocks.
359 blocks = set(init_blocks)
360 clusters = set()
361 # Number of entries in one cluster of the refcount table
362 size = self.cluster_size / UINT64_S
363 # Number of clusters necessary for the refcount table based on
364 # the current number of refcount blocks
365 table_size = int(ceil((max(blocks) + 1) / float(size)))
366 # Index of the first cluster of the refcount table
367 table_start = self._get_adjacent_clusters(data, table_size + 1)
368 # Clusters allocated for the current length of the refcount table
369 table_clusters = set(range(table_start, table_start + table_size))
370 # Clusters allocated for the refcount table including
371 # last optional one for potential l1 growth
372 table_clusters_allocated = set(range(table_start, table_start +
373 table_size + 1))
374 # New refcount blocks necessary for clusters occupied by the
375 # refcount table
376 diff = set([c / block_size for c in table_clusters]) - blocks
377 blocks |= diff
378 while len(diff) != 0:
379 # Allocate clusters for new refcount blocks
380 new = self._get_available_clusters((data | clusters) |
381 table_clusters_allocated,
382 len(diff))
383 # Indices of new refcount blocks necessary to cover
384 # clusters in 'new'
385 diff = set([x / block_size for x in new]) - blocks
386 clusters |= new
387 blocks |= diff
388 # Check if the refcount table needs one more cluster
389 if int(ceil((max(blocks) + 1) / float(size))) > table_size:
390 new_block_id = (table_start + table_size) / block_size
391 # Check if the additional table cluster needs
392 # one more refcount block
393 if new_block_id not in blocks:
394 diff.add(new_block_id)
395 table_clusters.add(table_start + table_size)
396 table_size += 1
397 return table_clusters, blocks, clusters
399 def create_table_entry(table_offset, block_cluster, block_size,
400 cluster):
401 """Generate a refcount table entry."""
402 offset = table_offset + UINT64_S * (cluster / block_size)
403 return ['>Q', offset, block_cluster * self.cluster_size,
404 'refcount_table_entry']
406 def create_block_entry(block_cluster, block_size, cluster):
407 """Generate a list of entries for the current block."""
408 entry_size = self.cluster_size / block_size
409 offset = block_cluster * self.cluster_size
410 entry_offset = offset + entry_size * (cluster % block_size)
411 # While snapshots are not supported all refcounts are set to 1
412 return ['>H', entry_offset, 1, 'refcount_block_entry']
413 # Size of a block entry in bits
414 refcount_bits = 1 << self.header['refcount_order'][0].value
415 # Number of refcount entries per refcount block
416 # Convert self.cluster_size from bytes to bits to have the same
417 # base for the numerator and denominator
418 block_size = self.cluster_size * 8 / refcount_bits
419 meta_data = self._get_metadata()
420 if len(self.data_clusters) == 0:
421 # All metadata for an empty guest image needs 4 clusters:
422 # header, rfc table, rfc block, L1 table.
423 # Header takes cluster #0, other clusters ##1-3 can be used
424 block_clusters = set([random.choice(list(set(range(1, 4)) -
425 meta_data))])
426 block_ids = set([0])
427 table_clusters = set([random.choice(list(set(range(1, 4)) -
428 meta_data -
429 block_clusters))])
430 else:
431 block_clusters, block_ids = \
432 allocate_rfc_blocks(self.data_clusters |
433 meta_data, block_size)
434 table_clusters, block_ids, new_clusters = \
435 allocate_rfc_table(self.data_clusters |
436 meta_data |
437 block_clusters,
438 block_ids,
439 block_size)
440 block_clusters |= new_clusters
442 meta_data |= block_clusters | table_clusters
443 table_offset = min(table_clusters) * self.cluster_size
444 block_id = None
445 # Clusters allocated for refcount blocks
446 block_clusters = list(block_clusters)
447 # Indices of refcount blocks
448 block_ids = list(block_ids)
449 # Refcount table entries
450 rfc_table = []
451 # Refcount entries
452 rfc_blocks = []
454 for cluster in sorted(self.data_clusters | meta_data):
455 if cluster / block_size != block_id:
456 block_id = cluster / block_size
457 block_cluster = block_clusters[block_ids.index(block_id)]
458 rfc_table.append(create_table_entry(table_offset,
459 block_cluster,
460 block_size, cluster))
461 rfc_blocks.append(create_block_entry(block_cluster, block_size,
462 cluster))
463 self.refcount_table = FieldsList(rfc_table)
464 self.refcount_blocks = FieldsList(rfc_blocks)
466 self.header['refcount_table_offset'][0].value = table_offset
467 self.header['refcount_table_clusters'][0].value = len(table_clusters)
469 def fuzz(self, fields_to_fuzz=None):
470 """Fuzz an image by corrupting values of a random subset of its fields.
472 Without parameters the method fuzzes an entire image.
474 If 'fields_to_fuzz' is specified then only fields in this list will be
475 fuzzed. 'fields_to_fuzz' can contain both individual fields and more
476 general image elements as a header or tables.
478 In the first case the field will be fuzzed always.
479 In the second a random subset of fields will be selected and fuzzed.
481 def coin():
482 """Return boolean value proportional to a portion of fields to be
483 fuzzed.
485 return random.random() < self.bias
487 if fields_to_fuzz is None:
488 for field in self:
489 if coin():
490 field.value = getattr(fuzz, field.name)(field.value)
491 else:
492 for item in fields_to_fuzz:
493 if len(item) == 1:
494 for field in getattr(self, item[0]):
495 if coin():
496 field.value = getattr(fuzz,
497 field.name)(field.value)
498 else:
499 # If fields with the requested name were not generated
500 # getattr(self, item[0])[item[1]] returns an empty list
501 for field in getattr(self, item[0])[item[1]]:
502 field.value = getattr(fuzz, field.name)(field.value)
504 def write(self, filename):
505 """Write an entire image to the file."""
506 image_file = open(filename, 'w')
507 for field in self:
508 image_file.seek(field.offset)
509 image_file.write(struct.pack(field.fmt, field.value))
511 for cluster in sorted(self.data_clusters):
512 image_file.seek(cluster * self.cluster_size)
513 image_file.write(urandom(self.cluster_size))
515 # Align the real image size to the cluster size
516 image_file.seek(0, 2)
517 size = image_file.tell()
518 rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1)
519 if rounded > size:
520 image_file.seek(rounded - 1)
521 image_file.write("\0")
522 image_file.close()
524 @staticmethod
525 def _size_params():
526 """Generate a random image size aligned to a random correct
527 cluster size.
529 cluster_bits = random.randrange(9, 21)
530 cluster_size = 1 << cluster_bits
531 img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size)
532 return (cluster_bits, img_size)
534 @staticmethod
535 def _get_available_clusters(used, number):
536 """Return a set of indices of not allocated clusters.
538 'used' contains indices of currently allocated clusters.
539 All clusters that cannot be allocated between 'used' clusters will have
540 indices appended to the end of 'used'.
542 append_id = max(used) + 1
543 free = set(range(1, append_id)) - used
544 if len(free) >= number:
545 return set(random.sample(free, number))
546 else:
547 return free | set(range(append_id, append_id + number - len(free)))
549 @staticmethod
550 def _get_adjacent_clusters(used, size):
551 """Return an index of the first cluster in the sequence of free ones.
553 'used' contains indices of currently allocated clusters. 'size' is the
554 length of the sequence of free clusters.
555 If the sequence of 'size' is not available between 'used' clusters, its
556 first index will be append to the end of 'used'.
558 def get_cluster_id(lst, length):
559 """Return the first index of the sequence of the specified length
560 or None if the sequence cannot be inserted in the list.
562 if len(lst) != 0:
563 pairs = []
564 pair = (lst[0], 1)
565 for i in range(1, len(lst)):
566 if lst[i] == lst[i-1] + 1:
567 pair = (lst[i], pair[1] + 1)
568 else:
569 pairs.append(pair)
570 pair = (lst[i], 1)
571 pairs.append(pair)
572 random.shuffle(pairs)
573 for x, s in pairs:
574 if s >= length:
575 return x - length + 1
576 return None
578 append_id = max(used) + 1
579 free = list(set(range(1, append_id)) - used)
580 idx = get_cluster_id(free, size)
581 if idx is None:
582 return append_id
583 else:
584 return idx
586 @staticmethod
587 def _alloc_data(img_size, cluster_size):
588 """Return a set of random indices of clusters allocated for guest data.
590 num_of_cls = img_size/cluster_size
591 return set(random.sample(range(1, num_of_cls + 1),
592 random.randint(0, num_of_cls)))
594 def _get_metadata(self):
595 """Return indices of clusters allocated for image metadata."""
596 ids = set()
597 for x in self:
598 ids.add(x.offset/self.cluster_size)
599 return ids
602 def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
603 fields_to_fuzz=None):
604 """Create a fuzzed image and write it to the specified file."""
605 image = Image(backing_file_name)
606 image.set_backing_file_format(backing_file_fmt)
607 image.create_feature_name_table()
608 image.set_end_of_extension_area()
609 image.create_l_structures()
610 image.create_refcount_structures()
611 image.fuzz(fields_to_fuzz)
612 image.write(test_img_path)
613 return image.image_size