1# Generator of fuzzed qcow2 images
2#
3# Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program.  If not, see <http://www.gnu.org/licenses/>.
17#
18
19import random
20import struct
21import fuzz
22from math import ceil
23from os import urandom
24from itertools import chain
25
26MAX_IMAGE_SIZE = 10 * (1 << 20)
27# Standard sizes
28UINT32_S = 4
29UINT64_S = 8
30
31
32class Field(object):
33
34    """Atomic image element (field).
35
36    The class represents an image field as quadruple of a data format
37    of value necessary for its packing to binary form, an offset from
38    the beginning of the image, a value and a name.
39
40    The field can be iterated as a list [format, offset, value, name].
41    """
42
43    __slots__ = ('fmt', 'offset', 'value', 'name')
44
45    def __init__(self, fmt, offset, val, name):
46        self.fmt = fmt
47        self.offset = offset
48        self.value = val
49        self.name = name
50
51    def __iter__(self):
52        return iter([self.fmt, self.offset, self.value, self.name])
53
54    def __repr__(self):
55        return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
56            (self.fmt, self.offset, str(self.value), self.name)
57
58
59class FieldsList(object):
60
61    """List of fields.
62
63    The class allows access to a field in the list by its name.
64    """
65
66    def __init__(self, meta_data=None):
67        if meta_data is None:
68            self.data = []
69        else:
70            self.data = [Field(*f)
71                         for f in meta_data]
72
73    def __getitem__(self, name):
74        return [x for x in self.data if x.name == name]
75
76    def __iter__(self):
77        return iter(self.data)
78
79    def __len__(self):
80        return len(self.data)
81
82
83class Image(object):
84
85    """ Qcow2 image object.
86
87    This class allows to create qcow2 images with random valid structures and
88    values, fuzz them via external qcow2.fuzz module and write the result to
89    a file.
90    """
91
92    def __init__(self, backing_file_name=None):
93        """Create a random valid qcow2 image with the correct header and stored
94        backing file name.
95        """
96        cluster_bits, self.image_size = self._size_params()
97        self.cluster_size = 1 << cluster_bits
98        self.header = FieldsList()
99        self.backing_file_name = FieldsList()
100        self.backing_file_format = FieldsList()
101        self.feature_name_table = FieldsList()
102        self.end_of_extension_area = FieldsList()
103        self.l2_tables = FieldsList()
104        self.l1_table = FieldsList()
105        self.ext_offset = 0
106        self.create_header(cluster_bits, backing_file_name)
107        self.set_backing_file_name(backing_file_name)
108        self.data_clusters = self._alloc_data(self.image_size,
109                                              self.cluster_size)
110        # Percentage of fields will be fuzzed
111        self.bias = random.uniform(0.2, 0.5)
112
113    def __iter__(self):
114        return chain(self.header, self.backing_file_format,
115                     self.feature_name_table, self.end_of_extension_area,
116                     self.backing_file_name, self.l1_table, self.l2_tables)
117
118    def create_header(self, cluster_bits, backing_file_name=None):
119        """Generate a random valid header."""
120        meta_header = [
121            ['>4s', 0, "QFI\xfb", 'magic'],
122            ['>I', 4, random.randint(2, 3), 'version'],
123            ['>Q', 8, 0, 'backing_file_offset'],
124            ['>I', 16, 0, 'backing_file_size'],
125            ['>I', 20, cluster_bits, 'cluster_bits'],
126            ['>Q', 24, self.image_size, 'size'],
127            ['>I', 32, 0, 'crypt_method'],
128            ['>I', 36, 0, 'l1_size'],
129            ['>Q', 40, 0, 'l1_table_offset'],
130            ['>Q', 48, 0, 'refcount_table_offset'],
131            ['>I', 56, 0, 'refcount_table_clusters'],
132            ['>I', 60, 0, 'nb_snapshots'],
133            ['>Q', 64, 0, 'snapshots_offset'],
134            ['>Q', 72, 0, 'incompatible_features'],
135            ['>Q', 80, 0, 'compatible_features'],
136            ['>Q', 88, 0, 'autoclear_features'],
137            # Only refcount_order = 4 is supported by current (07.2014)
138            # implementation of QEMU
139            ['>I', 96, 4, 'refcount_order'],
140            ['>I', 100, 0, 'header_length']
141        ]
142        self.header = FieldsList(meta_header)
143
144        if self.header['version'][0].value == 2:
145            self.header['header_length'][0].value = 72
146        else:
147            self.header['incompatible_features'][0].value = \
148                                                        random.getrandbits(2)
149            self.header['compatible_features'][0].value = random.getrandbits(1)
150            self.header['header_length'][0].value = 104
151        # Extensions start at the header last field offset and the field size
152        self.ext_offset = struct.calcsize(
153            self.header['header_length'][0].fmt) + \
154            self.header['header_length'][0].offset
155        end_of_extension_area_len = 2 * UINT32_S
156        free_space = self.cluster_size - self.ext_offset - \
157                     end_of_extension_area_len
158        # If the backing file name specified and there is enough space for it
159        # in the first cluster, then it's placed in the very end of the first
160        # cluster.
161        if (backing_file_name is not None) and \
162           (free_space >= len(backing_file_name)):
163            self.header['backing_file_size'][0].value = len(backing_file_name)
164            self.header['backing_file_offset'][0].value = \
165                                    self.cluster_size - len(backing_file_name)
166
167    def set_backing_file_name(self, backing_file_name=None):
168        """Add the name of the backing file at the offset specified
169        in the header.
170        """
171        if (backing_file_name is not None) and \
172           (not self.header['backing_file_offset'][0].value == 0):
173            data_len = len(backing_file_name)
174            data_fmt = '>' + str(data_len) + 's'
175            self.backing_file_name = FieldsList([
176                [data_fmt, self.header['backing_file_offset'][0].value,
177                 backing_file_name, 'bf_name']
178            ])
179
180    def set_backing_file_format(self, backing_file_fmt=None):
181        """Generate the header extension for the backing file format."""
182        if backing_file_fmt is not None:
183            # Calculation of the free space available in the first cluster
184            end_of_extension_area_len = 2 * UINT32_S
185            high_border = (self.header['backing_file_offset'][0].value or
186                           (self.cluster_size - 1)) - \
187                end_of_extension_area_len
188            free_space = high_border - self.ext_offset
189            ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7)
190
191            if free_space >= ext_size:
192                ext_data_len = len(backing_file_fmt)
193                ext_data_fmt = '>' + str(ext_data_len) + 's'
194                ext_padding_len = 7 - (ext_data_len - 1) % 8
195                self.backing_file_format = FieldsList([
196                    ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'],
197                    ['>I', self.ext_offset + UINT32_S, ext_data_len,
198                     'ext_length'],
199                    [ext_data_fmt, self.ext_offset + UINT32_S * 2,
200                     backing_file_fmt, 'bf_format']
201                ])
202                self.ext_offset = \
203                        struct.calcsize(
204                            self.backing_file_format['bf_format'][0].fmt) + \
205                        ext_padding_len + \
206                        self.backing_file_format['bf_format'][0].offset
207
208    def create_feature_name_table(self):
209        """Generate a random header extension for names of features used in
210        the image.
211        """
212        def gen_feat_ids():
213            """Return random feature type and feature bit."""
214            return (random.randint(0, 2), random.randint(0, 63))
215
216        end_of_extension_area_len = 2 * UINT32_S
217        high_border = (self.header['backing_file_offset'][0].value or
218                       (self.cluster_size - 1)) - \
219            end_of_extension_area_len
220        free_space = high_border - self.ext_offset
221        # Sum of sizes of 'magic' and 'length' header extension fields
222        ext_header_len = 2 * UINT32_S
223        fnt_entry_size = 6 * UINT64_S
224        num_fnt_entries = min(10, (free_space - ext_header_len) /
225                              fnt_entry_size)
226        if not num_fnt_entries == 0:
227            feature_tables = []
228            feature_ids = []
229            inner_offset = self.ext_offset + ext_header_len
230            feat_name = 'some cool feature'
231            while len(feature_tables) < num_fnt_entries * 3:
232                feat_type, feat_bit = gen_feat_ids()
233                # Remove duplicates
234                while (feat_type, feat_bit) in feature_ids:
235                    feat_type, feat_bit = gen_feat_ids()
236                feature_ids.append((feat_type, feat_bit))
237                feat_fmt = '>' + str(len(feat_name)) + 's'
238                feature_tables += [['B', inner_offset,
239                                    feat_type, 'feature_type'],
240                                   ['B', inner_offset + 1, feat_bit,
241                                    'feature_bit_number'],
242                                   [feat_fmt, inner_offset + 2,
243                                    feat_name, 'feature_name']
244                ]
245                inner_offset += fnt_entry_size
246            # No padding for the extension is necessary, because
247            # the extension length is multiple of 8
248            self.feature_name_table = FieldsList([
249                ['>I', self.ext_offset, 0x6803f857, 'ext_magic'],
250                # One feature table contains 3 fields and takes 48 bytes
251                ['>I', self.ext_offset + UINT32_S,
252                 len(feature_tables) / 3 * 48, 'ext_length']
253            ] + feature_tables)
254            self.ext_offset = inner_offset
255
256    def set_end_of_extension_area(self):
257        """Generate a mandatory header extension marking end of header
258        extensions.
259        """
260        self.end_of_extension_area = FieldsList([
261            ['>I', self.ext_offset, 0, 'ext_magic'],
262            ['>I', self.ext_offset + UINT32_S, 0, 'ext_length']
263        ])
264
265    def create_l_structures(self):
266        """Generate random valid L1 and L2 tables."""
267        def create_l2_entry(host, guest, l2_cluster):
268            """Generate one L2 entry."""
269            offset = l2_cluster * self.cluster_size
270            l2_size = self.cluster_size / UINT64_S
271            entry_offset = offset + UINT64_S * (guest % l2_size)
272            cluster_descriptor = host * self.cluster_size
273            if not self.header['version'][0].value == 2:
274                cluster_descriptor += random.randint(0, 1)
275            # While snapshots are not supported, bit #63 = 1
276            # Compressed clusters are not supported => bit #62 = 0
277            entry_val = (1 << 63) + cluster_descriptor
278            return ['>Q', entry_offset, entry_val, 'l2_entry']
279
280        def create_l1_entry(l2_cluster, l1_offset, guest):
281            """Generate one L1 entry."""
282            l2_size = self.cluster_size / UINT64_S
283            entry_offset = l1_offset + UINT64_S * (guest / l2_size)
284            # While snapshots are not supported bit #63 = 1
285            entry_val = (1 << 63) + l2_cluster * self.cluster_size
286            return ['>Q', entry_offset, entry_val, 'l1_entry']
287
288        if len(self.data_clusters) == 0:
289            # All metadata for an empty guest image needs 4 clusters:
290            # header, rfc table, rfc block, L1 table.
291            # Header takes cluster #0, other clusters ##1-3 can be used
292            l1_offset = random.randint(1, 3) * self.cluster_size
293            l1 = [['>Q', l1_offset, 0, 'l1_entry']]
294            l2 = []
295        else:
296            meta_data = self._get_metadata()
297            guest_clusters = random.sample(range(self.image_size /
298                                                 self.cluster_size),
299                                           len(self.data_clusters))
300            # Number of entries in a L1/L2 table
301            l_size = self.cluster_size / UINT64_S
302            # Number of clusters necessary for L1 table
303            l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2)))
304            l1_start = self._get_adjacent_clusters(self.data_clusters |
305                                                   meta_data, l1_size)
306            meta_data |= set(range(l1_start, l1_start + l1_size))
307            l1_offset = l1_start * self.cluster_size
308            # Indices of L2 tables
309            l2_ids = []
310            # Host clusters allocated for L2 tables
311            l2_clusters = []
312            # L1 entries
313            l1 = []
314            # L2 entries
315            l2 = []
316            for host, guest in zip(self.data_clusters, guest_clusters):
317                l2_id = guest / l_size
318                if l2_id not in l2_ids:
319                    l2_ids.append(l2_id)
320                    l2_clusters.append(self._get_adjacent_clusters(
321                        self.data_clusters | meta_data | set(l2_clusters),
322                        1))
323                    l1.append(create_l1_entry(l2_clusters[-1], l1_offset,
324                                              guest))
325                l2.append(create_l2_entry(host, guest,
326                                          l2_clusters[l2_ids.index(l2_id)]))
327        self.l2_tables = FieldsList(l2)
328        self.l1_table = FieldsList(l1)
329        self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size /
330                                                float(self.cluster_size**2)))
331        self.header['l1_table_offset'][0].value = l1_offset
332
333    def fuzz(self, fields_to_fuzz=None):
334        """Fuzz an image by corrupting values of a random subset of its fields.
335
336        Without parameters the method fuzzes an entire image.
337
338        If 'fields_to_fuzz' is specified then only fields in this list will be
339        fuzzed. 'fields_to_fuzz' can contain both individual fields and more
340        general image elements as a header or tables.
341
342        In the first case the field will be fuzzed always.
343        In the second a random subset of fields will be selected and fuzzed.
344        """
345        def coin():
346            """Return boolean value proportional to a portion of fields to be
347            fuzzed.
348            """
349            return random.random() < self.bias
350
351        if fields_to_fuzz is None:
352            for field in self:
353                if coin():
354                    field.value = getattr(fuzz, field.name)(field.value)
355        else:
356            for item in fields_to_fuzz:
357                if len(item) == 1:
358                    for field in getattr(self, item[0]):
359                        if coin():
360                            field.value = getattr(fuzz,
361                                                  field.name)(field.value)
362                else:
363                    # If fields with the requested name were not generated
364                    # getattr(self, item[0])[item[1]] returns an empty list
365                    for field in getattr(self, item[0])[item[1]]:
366                        field.value = getattr(fuzz, field.name)(field.value)
367
368    def write(self, filename):
369        """Write an entire image to the file."""
370        image_file = open(filename, 'w')
371        for field in self:
372            image_file.seek(field.offset)
373            image_file.write(struct.pack(field.fmt, field.value))
374
375        for cluster in sorted(self.data_clusters):
376            image_file.seek(cluster * self.cluster_size)
377            image_file.write(urandom(self.cluster_size))
378
379        # Align the real image size to the cluster size
380        image_file.seek(0, 2)
381        size = image_file.tell()
382        rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1)
383        if rounded > size:
384            image_file.seek(rounded - 1)
385            image_file.write("\0")
386        image_file.close()
387
388    @staticmethod
389    def _size_params():
390        """Generate a random image size aligned to a random correct
391        cluster size.
392        """
393        cluster_bits = random.randrange(9, 21)
394        cluster_size = 1 << cluster_bits
395        img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size)
396        return (cluster_bits, img_size)
397
398    @staticmethod
399    def _get_available_clusters(used, number):
400        """Return a set of indices of not allocated clusters.
401
402        'used' contains indices of currently allocated clusters.
403        All clusters that cannot be allocated between 'used' clusters will have
404        indices appended to the end of 'used'.
405        """
406        append_id = max(used) + 1
407        free = set(range(1, append_id)) - used
408        if len(free) >= number:
409            return set(random.sample(free, number))
410        else:
411            return free | set(range(append_id, append_id + number - len(free)))
412
413    @staticmethod
414    def _get_adjacent_clusters(used, size):
415        """Return an index of the first cluster in the sequence of free ones.
416
417        'used' contains indices of currently allocated clusters. 'size' is the
418        length of the sequence of free clusters.
419        If the sequence of 'size' is not available between 'used' clusters, its
420        first index will be append to the end of 'used'.
421        """
422        def get_cluster_id(lst, length):
423            """Return the first index of the sequence of the specified length
424            or None if the sequence cannot be inserted in the list.
425            """
426            if len(lst) != 0:
427                pairs = []
428                pair = (lst[0], 1)
429                for i in range(1, len(lst)):
430                    if lst[i] == lst[i-1] + 1:
431                        pair = (lst[i], pair[1] + 1)
432                    else:
433                        pairs.append(pair)
434                        pair = (lst[i], 1)
435                pairs.append(pair)
436                random.shuffle(pairs)
437                for x, s in pairs:
438                    if s >= length:
439                        return x - length + 1
440            return None
441
442        append_id = max(used) + 1
443        free = list(set(range(1, append_id)) - used)
444        idx = get_cluster_id(free, size)
445        if idx is None:
446            return append_id
447        else:
448            return idx
449
450    @staticmethod
451    def _alloc_data(img_size, cluster_size):
452        """Return a set of random indices of clusters allocated for guest data.
453        """
454        num_of_cls = img_size/cluster_size
455        return set(random.sample(range(1, num_of_cls + 1),
456                                 random.randint(0, num_of_cls)))
457
458    def _get_metadata(self):
459        """Return indices of clusters allocated for image metadata."""
460        ids = set()
461        for x in self:
462            ids.add(x.offset/self.cluster_size)
463        return ids
464
465
466def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
467                 fields_to_fuzz=None):
468    """Create a fuzzed image and write it to the specified file."""
469    image = Image(backing_file_name)
470    image.set_backing_file_format(backing_file_fmt)
471    image.create_feature_name_table()
472    image.set_end_of_extension_area()
473    image.create_l_structures()
474    image.fuzz(fields_to_fuzz)
475    image.write(test_img_path)
476    return image.image_size
477