tests/image-fuzzer/qcow2/layout.py

   1 # Generator of fuzzed qcow2 images
   2 #
   3 # Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
   4 #
   5 # This program is free software: you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation, either version 2 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 import random
  20 import struct
  21 import fuzz
  22 from math import ceil
  23 from os import urandom
  24 from itertools import chain
  25
  26 MAX_IMAGE_SIZE = 10 * (1 << 20)
  27 # Standard sizes
  28 UINT32_S = 4
  29 UINT64_S = 8
  30
  31
  32 class Field(object):
  33
  34     """Atomic image element (field).
  35
  36     The class represents an image field as quadruple of a data format
  37     of value necessary for its packing to binary form, an offset from
  38     the beginning of the image, a value and a name.
  39
  40     The field can be iterated as a list [format, offset, value, name].
  41     """
  42
  43     __slots__ = ('fmt', 'offset', 'value', 'name')
  44
  45     def __init__(self, fmt, offset, val, name):
  46         self.fmt = fmt
  47         self.offset = offset
  48         self.value = val
  49         self.name = name
  50
  51     def __iter__(self):
  52         return iter([self.fmt, self.offset, self.value, self.name])
  53
  54     def __repr__(self):
  55         return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
  56             (self.fmt, self.offset, str(self.value), self.name)
  57
  58
  59 class FieldsList(object):
  60
  61     """List of fields.
  62
  63     The class allows access to a field in the list by its name.
  64     """
  65
  66     def __init__(self, meta_data=None):
  67         if meta_data is None:
  68             self.data = []
  69         else:
  70             self.data = [Field(*f)
  71                          for f in meta_data]
  72
  73     def __getitem__(self, name):
  74         return [x for x in self.data if x.name == name]
  75
  76     def __iter__(self):
  77         return iter(self.data)
  78
  79     def __len__(self):
  80         return len(self.data)
  81
  82
  83 class Image(object):
  84
  85     """ Qcow2 image object.
  86
  87     This class allows to create qcow2 images with random valid structures and
  88     values, fuzz them via external qcow2.fuzz module and write the result to
  89     a file.
  90     """
  91
  92     def __init__(self, backing_file_name=None):
  93         """Create a random valid qcow2 image with the correct header and stored
  94         backing file name.
  95         """
  96         cluster_bits, self.image_size = self._size_params()
  97         self.cluster_size = 1 << cluster_bits
  98         self.header = FieldsList()
  99         self.backing_file_name = FieldsList()
 100         self.backing_file_format = FieldsList()
 101         self.feature_name_table = FieldsList()
 102         self.end_of_extension_area = FieldsList()
 103         self.l2_tables = FieldsList()
 104         self.l1_table = FieldsList()
 105         self.ext_offset = 0
 106         self.create_header(cluster_bits, backing_file_name)
 107         self.set_backing_file_name(backing_file_name)
 108         self.data_clusters = self._alloc_data(self.image_size,
 109                                               self.cluster_size)
 110         # Percentage of fields will be fuzzed
 111         self.bias = random.uniform(0.2, 0.5)
 112
 113     def __iter__(self):
 114         return chain(self.header, self.backing_file_format,
 115                      self.feature_name_table, self.end_of_extension_area,
 116                      self.backing_file_name, self.l1_table, self.l2_tables)
 117
 118     def create_header(self, cluster_bits, backing_file_name=None):
 119         """Generate a random valid header."""
 120         meta_header = [
 121             ['>4s', 0, "QFI\xfb", 'magic'],
 122             ['>I', 4, random.randint(2, 3), 'version'],
 123             ['>Q', 8, 0, 'backing_file_offset'],
 124             ['>I', 16, 0, 'backing_file_size'],
 125             ['>I', 20, cluster_bits, 'cluster_bits'],
 126             ['>Q', 24, self.image_size, 'size'],
 127             ['>I', 32, 0, 'crypt_method'],
 128             ['>I', 36, 0, 'l1_size'],
 129             ['>Q', 40, 0, 'l1_table_offset'],
 130             ['>Q', 48, 0, 'refcount_table_offset'],
 131             ['>I', 56, 0, 'refcount_table_clusters'],
 132             ['>I', 60, 0, 'nb_snapshots'],
 133             ['>Q', 64, 0, 'snapshots_offset'],
 134             ['>Q', 72, 0, 'incompatible_features'],
 135             ['>Q', 80, 0, 'compatible_features'],
 136             ['>Q', 88, 0, 'autoclear_features'],
 137             # Only refcount_order = 4 is supported by current (07.2014)
 138             # implementation of QEMU
 139             ['>I', 96, 4, 'refcount_order'],
 140             ['>I', 100, 0, 'header_length']
 141         ]
 142         self.header = FieldsList(meta_header)
 143
 144         if self.header['version'][0].value == 2:
 145             self.header['header_length'][0].value = 72
 146         else:
 147             self.header['incompatible_features'][0].value = \
 148                                                         random.getrandbits(2)
 149             self.header['compatible_features'][0].value = random.getrandbits(1)
 150             self.header['header_length'][0].value = 104
 151         # Extensions start at the header last field offset and the field size
 152         self.ext_offset = struct.calcsize(
 153             self.header['header_length'][0].fmt) + \
 154             self.header['header_length'][0].offset
 155         end_of_extension_area_len = 2 * UINT32_S
 156         free_space = self.cluster_size - self.ext_offset - \
 157                      end_of_extension_area_len
 158         # If the backing file name specified and there is enough space for it
 159         # in the first cluster, then it's placed in the very end of the first
 160         # cluster.
 161         if (backing_file_name is not None) and \
 162            (free_space >= len(backing_file_name)):
 163             self.header['backing_file_size'][0].value = len(backing_file_name)
 164             self.header['backing_file_offset'][0].value = \
 165                                     self.cluster_size - len(backing_file_name)
 166
 167     def set_backing_file_name(self, backing_file_name=None):
 168         """Add the name of the backing file at the offset specified
 169         in the header.
 170         """
 171         if (backing_file_name is not None) and \
 172            (not self.header['backing_file_offset'][0].value == 0):
 173             data_len = len(backing_file_name)
 174             data_fmt = '>' + str(data_len) + 's'
 175             self.backing_file_name = FieldsList([
 176                 [data_fmt, self.header['backing_file_offset'][0].value,
 177                  backing_file_name, 'bf_name']
 178             ])
 179
 180     def set_backing_file_format(self, backing_file_fmt=None):
 181         """Generate the header extension for the backing file format."""
 182         if backing_file_fmt is not None:
 183             # Calculation of the free space available in the first cluster
 184             end_of_extension_area_len = 2 * UINT32_S
 185             high_border = (self.header['backing_file_offset'][0].value or
 186                            (self.cluster_size - 1)) - \
 187                 end_of_extension_area_len
 188             free_space = high_border - self.ext_offset
 189             ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7)
 190
 191             if free_space >= ext_size:
 192                 ext_data_len = len(backing_file_fmt)
 193                 ext_data_fmt = '>' + str(ext_data_len) + 's'
 194                 ext_padding_len = 7 - (ext_data_len - 1) % 8
 195                 self.backing_file_format = FieldsList([
 196                     ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'],
 197                     ['>I', self.ext_offset + UINT32_S, ext_data_len,
 198                      'ext_length'],
 199                     [ext_data_fmt, self.ext_offset + UINT32_S * 2,
 200                      backing_file_fmt, 'bf_format']
 201                 ])
 202                 self.ext_offset = \
 203                         struct.calcsize(
 204                             self.backing_file_format['bf_format'][0].fmt) + \
 205                         ext_padding_len + \
 206                         self.backing_file_format['bf_format'][0].offset
 207
 208     def create_feature_name_table(self):
 209         """Generate a random header extension for names of features used in
 210         the image.
 211         """
 212         def gen_feat_ids():
 213             """Return random feature type and feature bit."""
 214             return (random.randint(0, 2), random.randint(0, 63))
 215
 216         end_of_extension_area_len = 2 * UINT32_S
 217         high_border = (self.header['backing_file_offset'][0].value or
 218                        (self.cluster_size - 1)) - \
 219             end_of_extension_area_len
 220         free_space = high_border - self.ext_offset
 221         # Sum of sizes of 'magic' and 'length' header extension fields
 222         ext_header_len = 2 * UINT32_S
 223         fnt_entry_size = 6 * UINT64_S
 224         num_fnt_entries = min(10, (free_space - ext_header_len) /
 225                               fnt_entry_size)
 226         if not num_fnt_entries == 0:
 227             feature_tables = []
 228             feature_ids = []
 229             inner_offset = self.ext_offset + ext_header_len
 230             feat_name = 'some cool feature'
 231             while len(feature_tables) < num_fnt_entries * 3:
 232                 feat_type, feat_bit = gen_feat_ids()
 233                 # Remove duplicates
 234                 while (feat_type, feat_bit) in feature_ids:
 235                     feat_type, feat_bit = gen_feat_ids()
 236                 feature_ids.append((feat_type, feat_bit))
 237                 feat_fmt = '>' + str(len(feat_name)) + 's'
 238                 feature_tables += [['B', inner_offset,
 239                                     feat_type, 'feature_type'],
 240                                    ['B', inner_offset + 1, feat_bit,
 241                                     'feature_bit_number'],
 242                                    [feat_fmt, inner_offset + 2,
 243                                     feat_name, 'feature_name']
 244                 ]
 245                 inner_offset += fnt_entry_size
 246             # No padding for the extension is necessary, because
 247             # the extension length is multiple of 8
 248             self.feature_name_table = FieldsList([
 249                 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'],
 250                 # One feature table contains 3 fields and takes 48 bytes
 251                 ['>I', self.ext_offset + UINT32_S,
 252                  len(feature_tables) / 3 * 48, 'ext_length']
 253             ] + feature_tables)
 254             self.ext_offset = inner_offset
 255
 256     def set_end_of_extension_area(self):
 257         """Generate a mandatory header extension marking end of header
 258         extensions.
 259         """
 260         self.end_of_extension_area = FieldsList([
 261             ['>I', self.ext_offset, 0, 'ext_magic'],
 262             ['>I', self.ext_offset + UINT32_S, 0, 'ext_length']
 263         ])
 264
 265     def create_l_structures(self):
 266         """Generate random valid L1 and L2 tables."""
 267         def create_l2_entry(host, guest, l2_cluster):
 268             """Generate one L2 entry."""
 269             offset = l2_cluster * self.cluster_size
 270             l2_size = self.cluster_size / UINT64_S
 271             entry_offset = offset + UINT64_S * (guest % l2_size)
 272             cluster_descriptor = host * self.cluster_size
 273             if not self.header['version'][0].value == 2:
 274                 cluster_descriptor += random.randint(0, 1)
 275             # While snapshots are not supported, bit #63 = 1
 276             # Compressed clusters are not supported => bit #62 = 0
 277             entry_val = (1 << 63) + cluster_descriptor
 278             return ['>Q', entry_offset, entry_val, 'l2_entry']
 279
 280         def create_l1_entry(l2_cluster, l1_offset, guest):
 281             """Generate one L1 entry."""
 282             l2_size = self.cluster_size / UINT64_S
 283             entry_offset = l1_offset + UINT64_S * (guest / l2_size)
 284             # While snapshots are not supported bit #63 = 1
 285             entry_val = (1 << 63) + l2_cluster * self.cluster_size
 286             return ['>Q', entry_offset, entry_val, 'l1_entry']
 287
 288         if len(self.data_clusters) == 0:
 289             # All metadata for an empty guest image needs 4 clusters:
 290             # header, rfc table, rfc block, L1 table.
 291             # Header takes cluster #0, other clusters ##1-3 can be used
 292             l1_offset = random.randint(1, 3) * self.cluster_size
 293             l1 = [['>Q', l1_offset, 0, 'l1_entry']]
 294             l2 = []
 295         else:
 296             meta_data = self._get_metadata()
 297             guest_clusters = random.sample(range(self.image_size /
 298                                                  self.cluster_size),
 299                                            len(self.data_clusters))
 300             # Number of entries in a L1/L2 table
 301             l_size = self.cluster_size / UINT64_S
 302             # Number of clusters necessary for L1 table
 303             l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2)))
 304             l1_start = self._get_adjacent_clusters(self.data_clusters |
 305                                                    meta_data, l1_size)
 306             meta_data |= set(range(l1_start, l1_start + l1_size))
 307             l1_offset = l1_start * self.cluster_size
 308             # Indices of L2 tables
 309             l2_ids = []
 310             # Host clusters allocated for L2 tables
 311             l2_clusters = []
 312             # L1 entries
 313             l1 = []
 314             # L2 entries
 315             l2 = []
 316             for host, guest in zip(self.data_clusters, guest_clusters):
 317                 l2_id = guest / l_size
 318                 if l2_id not in l2_ids:
 319                     l2_ids.append(l2_id)
 320                     l2_clusters.append(self._get_adjacent_clusters(
 321                         self.data_clusters | meta_data | set(l2_clusters),
 322                         1))
 323                     l1.append(create_l1_entry(l2_clusters[-1], l1_offset,
 324                                               guest))
 325                 l2.append(create_l2_entry(host, guest,
 326                                           l2_clusters[l2_ids.index(l2_id)]))
 327         self.l2_tables = FieldsList(l2)
 328         self.l1_table = FieldsList(l1)
 329         self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size /
 330                                                 float(self.cluster_size**2)))
 331         self.header['l1_table_offset'][0].value = l1_offset
 332
 333     def fuzz(self, fields_to_fuzz=None):
 334         """Fuzz an image by corrupting values of a random subset of its fields.
 335
 336         Without parameters the method fuzzes an entire image.
 337
 338         If 'fields_to_fuzz' is specified then only fields in this list will be
 339         fuzzed. 'fields_to_fuzz' can contain both individual fields and more
 340         general image elements as a header or tables.
 341
 342         In the first case the field will be fuzzed always.
 343         In the second a random subset of fields will be selected and fuzzed.
 344         """
 345         def coin():
 346             """Return boolean value proportional to a portion of fields to be
 347             fuzzed.
 348             """
 349             return random.random() < self.bias
 350
 351         if fields_to_fuzz is None:
 352             for field in self:
 353                 if coin():
 354                     field.value = getattr(fuzz, field.name)(field.value)
 355         else:
 356             for item in fields_to_fuzz:
 357                 if len(item) == 1:
 358                     for field in getattr(self, item[0]):
 359                         if coin():
 360                             field.value = getattr(fuzz,
 361                                                   field.name)(field.value)
 362                 else:
 363                     # If fields with the requested name were not generated
 364                     # getattr(self, item[0])[item[1]] returns an empty list
 365                     for field in getattr(self, item[0])[item[1]]:
 366                         field.value = getattr(fuzz, field.name)(field.value)
 367
 368     def write(self, filename):
 369         """Write an entire image to the file."""
 370         image_file = open(filename, 'w')
 371         for field in self:
 372             image_file.seek(field.offset)
 373             image_file.write(struct.pack(field.fmt, field.value))
 374
 375         for cluster in sorted(self.data_clusters):
 376             image_file.seek(cluster * self.cluster_size)
 377             image_file.write(urandom(self.cluster_size))
 378
 379         # Align the real image size to the cluster size
 380         image_file.seek(0, 2)
 381         size = image_file.tell()
 382         rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1)
 383         if rounded > size:
 384             image_file.seek(rounded - 1)
 385             image_file.write("\0")
 386         image_file.close()
 387
 388     @staticmethod
 389     def _size_params():
 390         """Generate a random image size aligned to a random correct
 391         cluster size.
 392         """
 393         cluster_bits = random.randrange(9, 21)
 394         cluster_size = 1 << cluster_bits
 395         img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size)
 396         return (cluster_bits, img_size)
 397
 398     @staticmethod
 399     def _get_available_clusters(used, number):
 400         """Return a set of indices of not allocated clusters.
 401
 402         'used' contains indices of currently allocated clusters.
 403         All clusters that cannot be allocated between 'used' clusters will have
 404         indices appended to the end of 'used'.
 405         """
 406         append_id = max(used) + 1
 407         free = set(range(1, append_id)) - used
 408         if len(free) >= number:
 409             return set(random.sample(free, number))
 410         else:
 411             return free | set(range(append_id, append_id + number - len(free)))
 412
 413     @staticmethod
 414     def _get_adjacent_clusters(used, size):
 415         """Return an index of the first cluster in the sequence of free ones.
 416
 417         'used' contains indices of currently allocated clusters. 'size' is the
 418         length of the sequence of free clusters.
 419         If the sequence of 'size' is not available between 'used' clusters, its
 420         first index will be append to the end of 'used'.
 421         """
 422         def get_cluster_id(lst, length):
 423             """Return the first index of the sequence of the specified length
 424             or None if the sequence cannot be inserted in the list.
 425             """
 426             if len(lst) != 0:
 427                 pairs = []
 428                 pair = (lst[0], 1)
 429                 for i in range(1, len(lst)):
 430                     if lst[i] == lst[i-1] + 1:
 431                         pair = (lst[i], pair[1] + 1)
 432                     else:
 433                         pairs.append(pair)
 434                         pair = (lst[i], 1)
 435                 pairs.append(pair)
 436                 random.shuffle(pairs)
 437                 for x, s in pairs:
 438                     if s >= length:
 439                         return x - length + 1
 440             return None
 441
 442         append_id = max(used) + 1
 443         free = list(set(range(1, append_id)) - used)
 444         idx = get_cluster_id(free, size)
 445         if idx is None:
 446             return append_id
 447         else:
 448             return idx
 449
 450     @staticmethod
 451     def _alloc_data(img_size, cluster_size):
 452         """Return a set of random indices of clusters allocated for guest data.
 453         """
 454         num_of_cls = img_size/cluster_size
 455         return set(random.sample(range(1, num_of_cls + 1),
 456                                  random.randint(0, num_of_cls)))
 457
 458     def _get_metadata(self):
 459         """Return indices of clusters allocated for image metadata."""
 460         ids = set()
 461         for x in self:
 462             ids.add(x.offset/self.cluster_size)
 463         return ids
 464
 465
 466 def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
 467                  fields_to_fuzz=None):
 468     """Create a fuzzed image and write it to the specified file."""
 469     image = Image(backing_file_name)
 470     image.set_backing_file_format(backing_file_fmt)
 471     image.create_feature_name_table()
 472     image.set_end_of_extension_area()
 473     image.create_l_structures()
 474     image.fuzz(fields_to_fuzz)
 475     image.write(test_img_path)
 476     return image.image_size