1# Generator of fuzzed qcow2 images 2# 3# Copyright (C) 2014 Maria Kustova <maria.k@catit.be> 4# 5# This program is free software: you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation, either version 2 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program. If not, see <http://www.gnu.org/licenses/>. 17# 18 19import random 20import struct 21import fuzz 22from math import ceil 23from os import urandom 24from itertools import chain 25 26MAX_IMAGE_SIZE = 10 * (1 << 20) 27# Standard sizes 28UINT32_S = 4 29UINT64_S = 8 30 31 32class Field(object): 33 34 """Atomic image element (field). 35 36 The class represents an image field as quadruple of a data format 37 of value necessary for its packing to binary form, an offset from 38 the beginning of the image, a value and a name. 39 40 The field can be iterated as a list [format, offset, value, name]. 41 """ 42 43 __slots__ = ('fmt', 'offset', 'value', 'name') 44 45 def __init__(self, fmt, offset, val, name): 46 self.fmt = fmt 47 self.offset = offset 48 self.value = val 49 self.name = name 50 51 def __iter__(self): 52 return iter([self.fmt, self.offset, self.value, self.name]) 53 54 def __repr__(self): 55 return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \ 56 (self.fmt, self.offset, str(self.value), self.name) 57 58 59class FieldsList(object): 60 61 """List of fields. 62 63 The class allows access to a field in the list by its name. 64 """ 65 66 def __init__(self, meta_data=None): 67 if meta_data is None: 68 self.data = [] 69 else: 70 self.data = [Field(*f) 71 for f in meta_data] 72 73 def __getitem__(self, name): 74 return [x for x in self.data if x.name == name] 75 76 def __iter__(self): 77 return iter(self.data) 78 79 def __len__(self): 80 return len(self.data) 81 82 83class Image(object): 84 85 """ Qcow2 image object. 86 87 This class allows to create qcow2 images with random valid structures and 88 values, fuzz them via external qcow2.fuzz module and write the result to 89 a file. 90 """ 91 92 def __init__(self, backing_file_name=None): 93 """Create a random valid qcow2 image with the correct header and stored 94 backing file name. 95 """ 96 cluster_bits, self.image_size = self._size_params() 97 self.cluster_size = 1 << cluster_bits 98 self.header = FieldsList() 99 self.backing_file_name = FieldsList() 100 self.backing_file_format = FieldsList() 101 self.feature_name_table = FieldsList() 102 self.end_of_extension_area = FieldsList() 103 self.l2_tables = FieldsList() 104 self.l1_table = FieldsList() 105 self.ext_offset = 0 106 self.create_header(cluster_bits, backing_file_name) 107 self.set_backing_file_name(backing_file_name) 108 self.data_clusters = self._alloc_data(self.image_size, 109 self.cluster_size) 110 # Percentage of fields will be fuzzed 111 self.bias = random.uniform(0.2, 0.5) 112 113 def __iter__(self): 114 return chain(self.header, self.backing_file_format, 115 self.feature_name_table, self.end_of_extension_area, 116 self.backing_file_name, self.l1_table, self.l2_tables) 117 118 def create_header(self, cluster_bits, backing_file_name=None): 119 """Generate a random valid header.""" 120 meta_header = [ 121 ['>4s', 0, "QFI\xfb", 'magic'], 122 ['>I', 4, random.randint(2, 3), 'version'], 123 ['>Q', 8, 0, 'backing_file_offset'], 124 ['>I', 16, 0, 'backing_file_size'], 125 ['>I', 20, cluster_bits, 'cluster_bits'], 126 ['>Q', 24, self.image_size, 'size'], 127 ['>I', 32, 0, 'crypt_method'], 128 ['>I', 36, 0, 'l1_size'], 129 ['>Q', 40, 0, 'l1_table_offset'], 130 ['>Q', 48, 0, 'refcount_table_offset'], 131 ['>I', 56, 0, 'refcount_table_clusters'], 132 ['>I', 60, 0, 'nb_snapshots'], 133 ['>Q', 64, 0, 'snapshots_offset'], 134 ['>Q', 72, 0, 'incompatible_features'], 135 ['>Q', 80, 0, 'compatible_features'], 136 ['>Q', 88, 0, 'autoclear_features'], 137 # Only refcount_order = 4 is supported by current (07.2014) 138 # implementation of QEMU 139 ['>I', 96, 4, 'refcount_order'], 140 ['>I', 100, 0, 'header_length'] 141 ] 142 self.header = FieldsList(meta_header) 143 144 if self.header['version'][0].value == 2: 145 self.header['header_length'][0].value = 72 146 else: 147 self.header['incompatible_features'][0].value = \ 148 random.getrandbits(2) 149 self.header['compatible_features'][0].value = random.getrandbits(1) 150 self.header['header_length'][0].value = 104 151 # Extensions start at the header last field offset and the field size 152 self.ext_offset = struct.calcsize( 153 self.header['header_length'][0].fmt) + \ 154 self.header['header_length'][0].offset 155 end_of_extension_area_len = 2 * UINT32_S 156 free_space = self.cluster_size - self.ext_offset - \ 157 end_of_extension_area_len 158 # If the backing file name specified and there is enough space for it 159 # in the first cluster, then it's placed in the very end of the first 160 # cluster. 161 if (backing_file_name is not None) and \ 162 (free_space >= len(backing_file_name)): 163 self.header['backing_file_size'][0].value = len(backing_file_name) 164 self.header['backing_file_offset'][0].value = \ 165 self.cluster_size - len(backing_file_name) 166 167 def set_backing_file_name(self, backing_file_name=None): 168 """Add the name of the backing file at the offset specified 169 in the header. 170 """ 171 if (backing_file_name is not None) and \ 172 (not self.header['backing_file_offset'][0].value == 0): 173 data_len = len(backing_file_name) 174 data_fmt = '>' + str(data_len) + 's' 175 self.backing_file_name = FieldsList([ 176 [data_fmt, self.header['backing_file_offset'][0].value, 177 backing_file_name, 'bf_name'] 178 ]) 179 180 def set_backing_file_format(self, backing_file_fmt=None): 181 """Generate the header extension for the backing file format.""" 182 if backing_file_fmt is not None: 183 # Calculation of the free space available in the first cluster 184 end_of_extension_area_len = 2 * UINT32_S 185 high_border = (self.header['backing_file_offset'][0].value or 186 (self.cluster_size - 1)) - \ 187 end_of_extension_area_len 188 free_space = high_border - self.ext_offset 189 ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7) 190 191 if free_space >= ext_size: 192 ext_data_len = len(backing_file_fmt) 193 ext_data_fmt = '>' + str(ext_data_len) + 's' 194 ext_padding_len = 7 - (ext_data_len - 1) % 8 195 self.backing_file_format = FieldsList([ 196 ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'], 197 ['>I', self.ext_offset + UINT32_S, ext_data_len, 198 'ext_length'], 199 [ext_data_fmt, self.ext_offset + UINT32_S * 2, 200 backing_file_fmt, 'bf_format'] 201 ]) 202 self.ext_offset = \ 203 struct.calcsize( 204 self.backing_file_format['bf_format'][0].fmt) + \ 205 ext_padding_len + \ 206 self.backing_file_format['bf_format'][0].offset 207 208 def create_feature_name_table(self): 209 """Generate a random header extension for names of features used in 210 the image. 211 """ 212 def gen_feat_ids(): 213 """Return random feature type and feature bit.""" 214 return (random.randint(0, 2), random.randint(0, 63)) 215 216 end_of_extension_area_len = 2 * UINT32_S 217 high_border = (self.header['backing_file_offset'][0].value or 218 (self.cluster_size - 1)) - \ 219 end_of_extension_area_len 220 free_space = high_border - self.ext_offset 221 # Sum of sizes of 'magic' and 'length' header extension fields 222 ext_header_len = 2 * UINT32_S 223 fnt_entry_size = 6 * UINT64_S 224 num_fnt_entries = min(10, (free_space - ext_header_len) / 225 fnt_entry_size) 226 if not num_fnt_entries == 0: 227 feature_tables = [] 228 feature_ids = [] 229 inner_offset = self.ext_offset + ext_header_len 230 feat_name = 'some cool feature' 231 while len(feature_tables) < num_fnt_entries * 3: 232 feat_type, feat_bit = gen_feat_ids() 233 # Remove duplicates 234 while (feat_type, feat_bit) in feature_ids: 235 feat_type, feat_bit = gen_feat_ids() 236 feature_ids.append((feat_type, feat_bit)) 237 feat_fmt = '>' + str(len(feat_name)) + 's' 238 feature_tables += [['B', inner_offset, 239 feat_type, 'feature_type'], 240 ['B', inner_offset + 1, feat_bit, 241 'feature_bit_number'], 242 [feat_fmt, inner_offset + 2, 243 feat_name, 'feature_name'] 244 ] 245 inner_offset += fnt_entry_size 246 # No padding for the extension is necessary, because 247 # the extension length is multiple of 8 248 self.feature_name_table = FieldsList([ 249 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'], 250 # One feature table contains 3 fields and takes 48 bytes 251 ['>I', self.ext_offset + UINT32_S, 252 len(feature_tables) / 3 * 48, 'ext_length'] 253 ] + feature_tables) 254 self.ext_offset = inner_offset 255 256 def set_end_of_extension_area(self): 257 """Generate a mandatory header extension marking end of header 258 extensions. 259 """ 260 self.end_of_extension_area = FieldsList([ 261 ['>I', self.ext_offset, 0, 'ext_magic'], 262 ['>I', self.ext_offset + UINT32_S, 0, 'ext_length'] 263 ]) 264 265 def create_l_structures(self): 266 """Generate random valid L1 and L2 tables.""" 267 def create_l2_entry(host, guest, l2_cluster): 268 """Generate one L2 entry.""" 269 offset = l2_cluster * self.cluster_size 270 l2_size = self.cluster_size / UINT64_S 271 entry_offset = offset + UINT64_S * (guest % l2_size) 272 cluster_descriptor = host * self.cluster_size 273 if not self.header['version'][0].value == 2: 274 cluster_descriptor += random.randint(0, 1) 275 # While snapshots are not supported, bit #63 = 1 276 # Compressed clusters are not supported => bit #62 = 0 277 entry_val = (1 << 63) + cluster_descriptor 278 return ['>Q', entry_offset, entry_val, 'l2_entry'] 279 280 def create_l1_entry(l2_cluster, l1_offset, guest): 281 """Generate one L1 entry.""" 282 l2_size = self.cluster_size / UINT64_S 283 entry_offset = l1_offset + UINT64_S * (guest / l2_size) 284 # While snapshots are not supported bit #63 = 1 285 entry_val = (1 << 63) + l2_cluster * self.cluster_size 286 return ['>Q', entry_offset, entry_val, 'l1_entry'] 287 288 if len(self.data_clusters) == 0: 289 # All metadata for an empty guest image needs 4 clusters: 290 # header, rfc table, rfc block, L1 table. 291 # Header takes cluster #0, other clusters ##1-3 can be used 292 l1_offset = random.randint(1, 3) * self.cluster_size 293 l1 = [['>Q', l1_offset, 0, 'l1_entry']] 294 l2 = [] 295 else: 296 meta_data = self._get_metadata() 297 guest_clusters = random.sample(range(self.image_size / 298 self.cluster_size), 299 len(self.data_clusters)) 300 # Number of entries in a L1/L2 table 301 l_size = self.cluster_size / UINT64_S 302 # Number of clusters necessary for L1 table 303 l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2))) 304 l1_start = self._get_adjacent_clusters(self.data_clusters | 305 meta_data, l1_size) 306 meta_data |= set(range(l1_start, l1_start + l1_size)) 307 l1_offset = l1_start * self.cluster_size 308 # Indices of L2 tables 309 l2_ids = [] 310 # Host clusters allocated for L2 tables 311 l2_clusters = [] 312 # L1 entries 313 l1 = [] 314 # L2 entries 315 l2 = [] 316 for host, guest in zip(self.data_clusters, guest_clusters): 317 l2_id = guest / l_size 318 if l2_id not in l2_ids: 319 l2_ids.append(l2_id) 320 l2_clusters.append(self._get_adjacent_clusters( 321 self.data_clusters | meta_data | set(l2_clusters), 322 1)) 323 l1.append(create_l1_entry(l2_clusters[-1], l1_offset, 324 guest)) 325 l2.append(create_l2_entry(host, guest, 326 l2_clusters[l2_ids.index(l2_id)])) 327 self.l2_tables = FieldsList(l2) 328 self.l1_table = FieldsList(l1) 329 self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size / 330 float(self.cluster_size**2))) 331 self.header['l1_table_offset'][0].value = l1_offset 332 333 def fuzz(self, fields_to_fuzz=None): 334 """Fuzz an image by corrupting values of a random subset of its fields. 335 336 Without parameters the method fuzzes an entire image. 337 338 If 'fields_to_fuzz' is specified then only fields in this list will be 339 fuzzed. 'fields_to_fuzz' can contain both individual fields and more 340 general image elements as a header or tables. 341 342 In the first case the field will be fuzzed always. 343 In the second a random subset of fields will be selected and fuzzed. 344 """ 345 def coin(): 346 """Return boolean value proportional to a portion of fields to be 347 fuzzed. 348 """ 349 return random.random() < self.bias 350 351 if fields_to_fuzz is None: 352 for field in self: 353 if coin(): 354 field.value = getattr(fuzz, field.name)(field.value) 355 else: 356 for item in fields_to_fuzz: 357 if len(item) == 1: 358 for field in getattr(self, item[0]): 359 if coin(): 360 field.value = getattr(fuzz, 361 field.name)(field.value) 362 else: 363 # If fields with the requested name were not generated 364 # getattr(self, item[0])[item[1]] returns an empty list 365 for field in getattr(self, item[0])[item[1]]: 366 field.value = getattr(fuzz, field.name)(field.value) 367 368 def write(self, filename): 369 """Write an entire image to the file.""" 370 image_file = open(filename, 'w') 371 for field in self: 372 image_file.seek(field.offset) 373 image_file.write(struct.pack(field.fmt, field.value)) 374 375 for cluster in sorted(self.data_clusters): 376 image_file.seek(cluster * self.cluster_size) 377 image_file.write(urandom(self.cluster_size)) 378 379 # Align the real image size to the cluster size 380 image_file.seek(0, 2) 381 size = image_file.tell() 382 rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1) 383 if rounded > size: 384 image_file.seek(rounded - 1) 385 image_file.write("\0") 386 image_file.close() 387 388 @staticmethod 389 def _size_params(): 390 """Generate a random image size aligned to a random correct 391 cluster size. 392 """ 393 cluster_bits = random.randrange(9, 21) 394 cluster_size = 1 << cluster_bits 395 img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size) 396 return (cluster_bits, img_size) 397 398 @staticmethod 399 def _get_available_clusters(used, number): 400 """Return a set of indices of not allocated clusters. 401 402 'used' contains indices of currently allocated clusters. 403 All clusters that cannot be allocated between 'used' clusters will have 404 indices appended to the end of 'used'. 405 """ 406 append_id = max(used) + 1 407 free = set(range(1, append_id)) - used 408 if len(free) >= number: 409 return set(random.sample(free, number)) 410 else: 411 return free | set(range(append_id, append_id + number - len(free))) 412 413 @staticmethod 414 def _get_adjacent_clusters(used, size): 415 """Return an index of the first cluster in the sequence of free ones. 416 417 'used' contains indices of currently allocated clusters. 'size' is the 418 length of the sequence of free clusters. 419 If the sequence of 'size' is not available between 'used' clusters, its 420 first index will be append to the end of 'used'. 421 """ 422 def get_cluster_id(lst, length): 423 """Return the first index of the sequence of the specified length 424 or None if the sequence cannot be inserted in the list. 425 """ 426 if len(lst) != 0: 427 pairs = [] 428 pair = (lst[0], 1) 429 for i in range(1, len(lst)): 430 if lst[i] == lst[i-1] + 1: 431 pair = (lst[i], pair[1] + 1) 432 else: 433 pairs.append(pair) 434 pair = (lst[i], 1) 435 pairs.append(pair) 436 random.shuffle(pairs) 437 for x, s in pairs: 438 if s >= length: 439 return x - length + 1 440 return None 441 442 append_id = max(used) + 1 443 free = list(set(range(1, append_id)) - used) 444 idx = get_cluster_id(free, size) 445 if idx is None: 446 return append_id 447 else: 448 return idx 449 450 @staticmethod 451 def _alloc_data(img_size, cluster_size): 452 """Return a set of random indices of clusters allocated for guest data. 453 """ 454 num_of_cls = img_size/cluster_size 455 return set(random.sample(range(1, num_of_cls + 1), 456 random.randint(0, num_of_cls))) 457 458 def _get_metadata(self): 459 """Return indices of clusters allocated for image metadata.""" 460 ids = set() 461 for x in self: 462 ids.add(x.offset/self.cluster_size) 463 return ids 464 465 466def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None, 467 fields_to_fuzz=None): 468 """Create a fuzzed image and write it to the specified file.""" 469 image = Image(backing_file_name) 470 image.set_backing_file_format(backing_file_fmt) 471 image.create_feature_name_table() 472 image.set_end_of_extension_area() 473 image.create_l_structures() 474 image.fuzz(fields_to_fuzz) 475 image.write(test_img_path) 476 return image.image_size 477