1 # Generator of fuzzed qcow2 images
3 # Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 from os
import urandom
24 from itertools
import chain
26 MAX_IMAGE_SIZE
= 10 * (1 << 20)
34 """Atomic image element (field).
36 The class represents an image field as quadruple of a data format
37 of value necessary for its packing to binary form, an offset from
38 the beginning of the image, a value and a name.
40 The field can be iterated as a list [format, offset, value, name].
43 __slots__
= ('fmt', 'offset', 'value', 'name')
45 def __init__(self
, fmt
, offset
, val
, name
):
52 return iter([self
.fmt
, self
.offset
, self
.value
, self
.name
])
55 return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
56 (self
.fmt
, self
.offset
, str(self
.value
), self
.name
)
59 class FieldsList(object):
63 The class allows access to a field in the list by its name.
66 def __init__(self
, meta_data
=None):
70 self
.data
= [Field(*f
)
73 def __getitem__(self
, name
):
74 return [x
for x
in self
.data
if x
.name
== name
]
77 return iter(self
.data
)
85 """ Qcow2 image object.
87 This class allows to create qcow2 images with random valid structures and
88 values, fuzz them via external qcow2.fuzz module and write the result to
92 def __init__(self
, backing_file_name
=None):
93 """Create a random valid qcow2 image with the correct header and stored
96 cluster_bits
, self
.image_size
= self
._size
_params
()
97 self
.cluster_size
= 1 << cluster_bits
98 self
.header
= FieldsList()
99 self
.backing_file_name
= FieldsList()
100 self
.backing_file_format
= FieldsList()
101 self
.feature_name_table
= FieldsList()
102 self
.end_of_extension_area
= FieldsList()
103 self
.l2_tables
= FieldsList()
104 self
.l1_table
= FieldsList()
106 self
.create_header(cluster_bits
, backing_file_name
)
107 self
.set_backing_file_name(backing_file_name
)
108 self
.data_clusters
= self
._alloc
_data
(self
.image_size
,
110 # Percentage of fields will be fuzzed
111 self
.bias
= random
.uniform(0.2, 0.5)
114 return chain(self
.header
, self
.backing_file_format
,
115 self
.feature_name_table
, self
.end_of_extension_area
,
116 self
.backing_file_name
, self
.l1_table
, self
.l2_tables
)
118 def create_header(self
, cluster_bits
, backing_file_name
=None):
119 """Generate a random valid header."""
121 ['>4s', 0, "QFI\xfb", 'magic'],
122 ['>I', 4, random
.randint(2, 3), 'version'],
123 ['>Q', 8, 0, 'backing_file_offset'],
124 ['>I', 16, 0, 'backing_file_size'],
125 ['>I', 20, cluster_bits
, 'cluster_bits'],
126 ['>Q', 24, self
.image_size
, 'size'],
127 ['>I', 32, 0, 'crypt_method'],
128 ['>I', 36, 0, 'l1_size'],
129 ['>Q', 40, 0, 'l1_table_offset'],
130 ['>Q', 48, 0, 'refcount_table_offset'],
131 ['>I', 56, 0, 'refcount_table_clusters'],
132 ['>I', 60, 0, 'nb_snapshots'],
133 ['>Q', 64, 0, 'snapshots_offset'],
134 ['>Q', 72, 0, 'incompatible_features'],
135 ['>Q', 80, 0, 'compatible_features'],
136 ['>Q', 88, 0, 'autoclear_features'],
137 # Only refcount_order = 4 is supported by current (07.2014)
138 # implementation of QEMU
139 ['>I', 96, 4, 'refcount_order'],
140 ['>I', 100, 0, 'header_length']
142 self
.header
= FieldsList(meta_header
)
144 if self
.header
['version'][0].value
== 2:
145 self
.header
['header_length'][0].value
= 72
147 self
.header
['incompatible_features'][0].value
= \
148 random
.getrandbits(2)
149 self
.header
['compatible_features'][0].value
= random
.getrandbits(1)
150 self
.header
['header_length'][0].value
= 104
151 # Extensions start at the header last field offset and the field size
152 self
.ext_offset
= struct
.calcsize(
153 self
.header
['header_length'][0].fmt
) + \
154 self
.header
['header_length'][0].offset
155 end_of_extension_area_len
= 2 * UINT32_S
156 free_space
= self
.cluster_size
- self
.ext_offset
- \
157 end_of_extension_area_len
158 # If the backing file name specified and there is enough space for it
159 # in the first cluster, then it's placed in the very end of the first
161 if (backing_file_name
is not None) and \
162 (free_space
>= len(backing_file_name
)):
163 self
.header
['backing_file_size'][0].value
= len(backing_file_name
)
164 self
.header
['backing_file_offset'][0].value
= \
165 self
.cluster_size
- len(backing_file_name
)
167 def set_backing_file_name(self
, backing_file_name
=None):
168 """Add the name of the backing file at the offset specified
171 if (backing_file_name
is not None) and \
172 (not self
.header
['backing_file_offset'][0].value
== 0):
173 data_len
= len(backing_file_name
)
174 data_fmt
= '>' + str(data_len
) + 's'
175 self
.backing_file_name
= FieldsList([
176 [data_fmt
, self
.header
['backing_file_offset'][0].value
,
177 backing_file_name
, 'bf_name']
180 def set_backing_file_format(self
, backing_file_fmt
=None):
181 """Generate the header extension for the backing file format."""
182 if backing_file_fmt
is not None:
183 # Calculation of the free space available in the first cluster
184 end_of_extension_area_len
= 2 * UINT32_S
185 high_border
= (self
.header
['backing_file_offset'][0].value
or
186 (self
.cluster_size
- 1)) - \
187 end_of_extension_area_len
188 free_space
= high_border
- self
.ext_offset
189 ext_size
= 2 * UINT32_S
+ ((len(backing_file_fmt
) + 7) & ~
7)
191 if free_space
>= ext_size
:
192 ext_data_len
= len(backing_file_fmt
)
193 ext_data_fmt
= '>' + str(ext_data_len
) + 's'
194 ext_padding_len
= 7 - (ext_data_len
- 1) % 8
195 self
.backing_file_format
= FieldsList([
196 ['>I', self
.ext_offset
, 0xE2792ACA, 'ext_magic'],
197 ['>I', self
.ext_offset
+ UINT32_S
, ext_data_len
,
199 [ext_data_fmt
, self
.ext_offset
+ UINT32_S
* 2,
200 backing_file_fmt
, 'bf_format']
204 self
.backing_file_format
['bf_format'][0].fmt
) + \
206 self
.backing_file_format
['bf_format'][0].offset
208 def create_feature_name_table(self
):
209 """Generate a random header extension for names of features used in
213 """Return random feature type and feature bit."""
214 return (random
.randint(0, 2), random
.randint(0, 63))
216 end_of_extension_area_len
= 2 * UINT32_S
217 high_border
= (self
.header
['backing_file_offset'][0].value
or
218 (self
.cluster_size
- 1)) - \
219 end_of_extension_area_len
220 free_space
= high_border
- self
.ext_offset
221 # Sum of sizes of 'magic' and 'length' header extension fields
222 ext_header_len
= 2 * UINT32_S
223 fnt_entry_size
= 6 * UINT64_S
224 num_fnt_entries
= min(10, (free_space
- ext_header_len
) /
226 if not num_fnt_entries
== 0:
229 inner_offset
= self
.ext_offset
+ ext_header_len
230 feat_name
= 'some cool feature'
231 while len(feature_tables
) < num_fnt_entries
* 3:
232 feat_type
, feat_bit
= gen_feat_ids()
234 while (feat_type
, feat_bit
) in feature_ids
:
235 feat_type
, feat_bit
= gen_feat_ids()
236 feature_ids
.append((feat_type
, feat_bit
))
237 feat_fmt
= '>' + str(len(feat_name
)) + 's'
238 feature_tables
+= [['B', inner_offset
,
239 feat_type
, 'feature_type'],
240 ['B', inner_offset
+ 1, feat_bit
,
241 'feature_bit_number'],
242 [feat_fmt
, inner_offset
+ 2,
243 feat_name
, 'feature_name']
245 inner_offset
+= fnt_entry_size
246 # No padding for the extension is necessary, because
247 # the extension length is multiple of 8
248 self
.feature_name_table
= FieldsList([
249 ['>I', self
.ext_offset
, 0x6803f857, 'ext_magic'],
250 # One feature table contains 3 fields and takes 48 bytes
251 ['>I', self
.ext_offset
+ UINT32_S
,
252 len(feature_tables
) / 3 * 48, 'ext_length']
254 self
.ext_offset
= inner_offset
256 def set_end_of_extension_area(self
):
257 """Generate a mandatory header extension marking end of header
260 self
.end_of_extension_area
= FieldsList([
261 ['>I', self
.ext_offset
, 0, 'ext_magic'],
262 ['>I', self
.ext_offset
+ UINT32_S
, 0, 'ext_length']
265 def create_l_structures(self
):
266 """Generate random valid L1 and L2 tables."""
267 def create_l2_entry(host
, guest
, l2_cluster
):
268 """Generate one L2 entry."""
269 offset
= l2_cluster
* self
.cluster_size
270 l2_size
= self
.cluster_size
/ UINT64_S
271 entry_offset
= offset
+ UINT64_S
* (guest
% l2_size
)
272 cluster_descriptor
= host
* self
.cluster_size
273 if not self
.header
['version'][0].value
== 2:
274 cluster_descriptor
+= random
.randint(0, 1)
275 # While snapshots are not supported, bit #63 = 1
276 # Compressed clusters are not supported => bit #62 = 0
277 entry_val
= (1 << 63) + cluster_descriptor
278 return ['>Q', entry_offset
, entry_val
, 'l2_entry']
280 def create_l1_entry(l2_cluster
, l1_offset
, guest
):
281 """Generate one L1 entry."""
282 l2_size
= self
.cluster_size
/ UINT64_S
283 entry_offset
= l1_offset
+ UINT64_S
* (guest
/ l2_size
)
284 # While snapshots are not supported bit #63 = 1
285 entry_val
= (1 << 63) + l2_cluster
* self
.cluster_size
286 return ['>Q', entry_offset
, entry_val
, 'l1_entry']
288 if len(self
.data_clusters
) == 0:
289 # All metadata for an empty guest image needs 4 clusters:
290 # header, rfc table, rfc block, L1 table.
291 # Header takes cluster #0, other clusters ##1-3 can be used
292 l1_offset
= random
.randint(1, 3) * self
.cluster_size
293 l1
= [['>Q', l1_offset
, 0, 'l1_entry']]
296 meta_data
= self
._get
_metadata
()
297 guest_clusters
= random
.sample(range(self
.image_size
/
299 len(self
.data_clusters
))
300 # Number of entries in a L1/L2 table
301 l_size
= self
.cluster_size
/ UINT64_S
302 # Number of clusters necessary for L1 table
303 l1_size
= int(ceil((max(guest_clusters
) + 1) / float(l_size
**2)))
304 l1_start
= self
._get
_adjacent
_clusters
(self
.data_clusters |
306 meta_data |
= set(range(l1_start
, l1_start
+ l1_size
))
307 l1_offset
= l1_start
* self
.cluster_size
308 # Indices of L2 tables
310 # Host clusters allocated for L2 tables
316 for host
, guest
in zip(self
.data_clusters
, guest_clusters
):
317 l2_id
= guest
/ l_size
318 if l2_id
not in l2_ids
:
320 l2_clusters
.append(self
._get
_adjacent
_clusters
(
321 self
.data_clusters | meta_data |
set(l2_clusters
),
323 l1
.append(create_l1_entry(l2_clusters
[-1], l1_offset
,
325 l2
.append(create_l2_entry(host
, guest
,
326 l2_clusters
[l2_ids
.index(l2_id
)]))
327 self
.l2_tables
= FieldsList(l2
)
328 self
.l1_table
= FieldsList(l1
)
329 self
.header
['l1_size'][0].value
= int(ceil(UINT64_S
* self
.image_size
/
330 float(self
.cluster_size
**2)))
331 self
.header
['l1_table_offset'][0].value
= l1_offset
333 def fuzz(self
, fields_to_fuzz
=None):
334 """Fuzz an image by corrupting values of a random subset of its fields.
336 Without parameters the method fuzzes an entire image.
338 If 'fields_to_fuzz' is specified then only fields in this list will be
339 fuzzed. 'fields_to_fuzz' can contain both individual fields and more
340 general image elements as a header or tables.
342 In the first case the field will be fuzzed always.
343 In the second a random subset of fields will be selected and fuzzed.
346 """Return boolean value proportional to a portion of fields to be
349 return random
.random() < self
.bias
351 if fields_to_fuzz
is None:
354 field
.value
= getattr(fuzz
, field
.name
)(field
.value
)
356 for item
in fields_to_fuzz
:
358 for field
in getattr(self
, item
[0]):
360 field
.value
= getattr(fuzz
,
361 field
.name
)(field
.value
)
363 # If fields with the requested name were not generated
364 # getattr(self, item[0])[item[1]] returns an empty list
365 for field
in getattr(self
, item
[0])[item
[1]]:
366 field
.value
= getattr(fuzz
, field
.name
)(field
.value
)
368 def write(self
, filename
):
369 """Write an entire image to the file."""
370 image_file
= open(filename
, 'w')
372 image_file
.seek(field
.offset
)
373 image_file
.write(struct
.pack(field
.fmt
, field
.value
))
375 for cluster
in sorted(self
.data_clusters
):
376 image_file
.seek(cluster
* self
.cluster_size
)
377 image_file
.write(urandom(self
.cluster_size
))
379 # Align the real image size to the cluster size
380 image_file
.seek(0, 2)
381 size
= image_file
.tell()
382 rounded
= (size
+ self
.cluster_size
- 1) & ~
(self
.cluster_size
- 1)
384 image_file
.seek(rounded
- 1)
385 image_file
.write("\0")
390 """Generate a random image size aligned to a random correct
393 cluster_bits
= random
.randrange(9, 21)
394 cluster_size
= 1 << cluster_bits
395 img_size
= random
.randrange(0, MAX_IMAGE_SIZE
+ 1, cluster_size
)
396 return (cluster_bits
, img_size
)
399 def _get_available_clusters(used
, number
):
400 """Return a set of indices of not allocated clusters.
402 'used' contains indices of currently allocated clusters.
403 All clusters that cannot be allocated between 'used' clusters will have
404 indices appended to the end of 'used'.
406 append_id
= max(used
) + 1
407 free
= set(range(1, append_id
)) - used
408 if len(free
) >= number
:
409 return set(random
.sample(free
, number
))
411 return free |
set(range(append_id
, append_id
+ number
- len(free
)))
414 def _get_adjacent_clusters(used
, size
):
415 """Return an index of the first cluster in the sequence of free ones.
417 'used' contains indices of currently allocated clusters. 'size' is the
418 length of the sequence of free clusters.
419 If the sequence of 'size' is not available between 'used' clusters, its
420 first index will be append to the end of 'used'.
422 def get_cluster_id(lst
, length
):
423 """Return the first index of the sequence of the specified length
424 or None if the sequence cannot be inserted in the list.
429 for i
in range(1, len(lst
)):
430 if lst
[i
] == lst
[i
-1] + 1:
431 pair
= (lst
[i
], pair
[1] + 1)
436 random
.shuffle(pairs
)
439 return x
- length
+ 1
442 append_id
= max(used
) + 1
443 free
= list(set(range(1, append_id
)) - used
)
444 idx
= get_cluster_id(free
, size
)
451 def _alloc_data(img_size
, cluster_size
):
452 """Return a set of random indices of clusters allocated for guest data.
454 num_of_cls
= img_size
/cluster_size
455 return set(random
.sample(range(1, num_of_cls
+ 1),
456 random
.randint(0, num_of_cls
)))
458 def _get_metadata(self
):
459 """Return indices of clusters allocated for image metadata."""
462 ids
.add(x
.offset
/self
.cluster_size
)
466 def create_image(test_img_path
, backing_file_name
=None, backing_file_fmt
=None,
467 fields_to_fuzz
=None):
468 """Create a fuzzed image and write it to the specified file."""
469 image
= Image(backing_file_name
)
470 image
.set_backing_file_format(backing_file_fmt
)
471 image
.create_feature_name_table()
472 image
.set_end_of_extension_area()
473 image
.create_l_structures()
474 image
.fuzz(fields_to_fuzz
)
475 image
.write(test_img_path
)
476 return image
.image_size