1# Generator of fuzzed qcow2 images 2# 3# Copyright (C) 2014 Maria Kustova <maria.k@catit.be> 4# 5# This program is free software: you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation, either version 2 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program. If not, see <http://www.gnu.org/licenses/>. 17# 18 19import random 20import struct 21from . import fuzz 22from math import ceil 23from os import urandom 24from itertools import chain 25 26MAX_IMAGE_SIZE = 10 * (1 << 20) 27# Standard sizes 28UINT32_S = 4 29UINT64_S = 8 30 31 32class Field(object): 33 34 """Atomic image element (field). 35 36 The class represents an image field as quadruple of a data format 37 of value necessary for its packing to binary form, an offset from 38 the beginning of the image, a value and a name. 39 40 The field can be iterated as a list [format, offset, value, name]. 41 """ 42 43 __slots__ = ('fmt', 'offset', 'value', 'name') 44 45 def __init__(self, fmt, offset, val, name): 46 self.fmt = fmt 47 self.offset = offset 48 self.value = val 49 self.name = name 50 51 def __iter__(self): 52 return iter([self.fmt, self.offset, self.value, self.name]) 53 54 def __repr__(self): 55 return "Field(fmt=%r, offset=%r, value=%r, name=%r)" % \ 56 (self.fmt, self.offset, self.value, self.name) 57 58 59class FieldsList(object): 60 61 """List of fields. 62 63 The class allows access to a field in the list by its name. 64 """ 65 66 def __init__(self, meta_data=None): 67 if meta_data is None: 68 self.data = [] 69 else: 70 self.data = [Field(*f) 71 for f in meta_data] 72 73 def __getitem__(self, name): 74 return [x for x in self.data if x.name == name] 75 76 def __iter__(self): 77 return iter(self.data) 78 79 def __len__(self): 80 return len(self.data) 81 82 83class Image(object): 84 85 """ Qcow2 image object. 86 87 This class allows to create qcow2 images with random valid structures and 88 values, fuzz them via external qcow2.fuzz module and write the result to 89 a file. 90 """ 91 92 def __init__(self, backing_file_name=None): 93 """Create a random valid qcow2 image with the correct header and stored 94 backing file name. 95 """ 96 cluster_bits, self.image_size = self._size_params() 97 self.cluster_size = 1 << cluster_bits 98 self.header = FieldsList() 99 self.backing_file_name = FieldsList() 100 self.backing_file_format = FieldsList() 101 self.feature_name_table = FieldsList() 102 self.end_of_extension_area = FieldsList() 103 self.l2_tables = FieldsList() 104 self.l1_table = FieldsList() 105 self.refcount_table = FieldsList() 106 self.refcount_blocks = FieldsList() 107 self.ext_offset = 0 108 self.create_header(cluster_bits, backing_file_name) 109 self.set_backing_file_name(backing_file_name) 110 self.data_clusters = self._alloc_data(self.image_size, 111 self.cluster_size) 112 # Percentage of fields will be fuzzed 113 self.bias = random.uniform(0.2, 0.5) 114 115 def __iter__(self): 116 return chain(self.header, self.backing_file_format, 117 self.feature_name_table, self.end_of_extension_area, 118 self.backing_file_name, self.l1_table, self.l2_tables, 119 self.refcount_table, self.refcount_blocks) 120 121 def create_header(self, cluster_bits, backing_file_name=None): 122 """Generate a random valid header.""" 123 meta_header = [ 124 ['>4s', 0, b"QFI\xfb", 'magic'], 125 ['>I', 4, random.randint(2, 3), 'version'], 126 ['>Q', 8, 0, 'backing_file_offset'], 127 ['>I', 16, 0, 'backing_file_size'], 128 ['>I', 20, cluster_bits, 'cluster_bits'], 129 ['>Q', 24, self.image_size, 'size'], 130 ['>I', 32, 0, 'crypt_method'], 131 ['>I', 36, 0, 'l1_size'], 132 ['>Q', 40, 0, 'l1_table_offset'], 133 ['>Q', 48, 0, 'refcount_table_offset'], 134 ['>I', 56, 0, 'refcount_table_clusters'], 135 ['>I', 60, 0, 'nb_snapshots'], 136 ['>Q', 64, 0, 'snapshots_offset'], 137 ['>Q', 72, 0, 'incompatible_features'], 138 ['>Q', 80, 0, 'compatible_features'], 139 ['>Q', 88, 0, 'autoclear_features'], 140 # Only refcount_order = 4 is supported by current (07.2014) 141 # implementation of QEMU 142 ['>I', 96, 4, 'refcount_order'], 143 ['>I', 100, 0, 'header_length'] 144 ] 145 self.header = FieldsList(meta_header) 146 147 if self.header['version'][0].value == 2: 148 self.header['header_length'][0].value = 72 149 else: 150 self.header['incompatible_features'][0].value = \ 151 random.getrandbits(2) 152 self.header['compatible_features'][0].value = random.getrandbits(1) 153 self.header['header_length'][0].value = 104 154 # Extensions start at the header last field offset and the field size 155 self.ext_offset = struct.calcsize( 156 self.header['header_length'][0].fmt) + \ 157 self.header['header_length'][0].offset 158 end_of_extension_area_len = 2 * UINT32_S 159 free_space = self.cluster_size - self.ext_offset - \ 160 end_of_extension_area_len 161 # If the backing file name specified and there is enough space for it 162 # in the first cluster, then it's placed in the very end of the first 163 # cluster. 164 if (backing_file_name is not None) and \ 165 (free_space >= len(backing_file_name)): 166 self.header['backing_file_size'][0].value = len(backing_file_name) 167 self.header['backing_file_offset'][0].value = \ 168 self.cluster_size - len(backing_file_name) 169 170 def set_backing_file_name(self, backing_file_name=None): 171 """Add the name of the backing file at the offset specified 172 in the header. 173 """ 174 if (backing_file_name is not None) and \ 175 (not self.header['backing_file_offset'][0].value == 0): 176 data_len = len(backing_file_name) 177 data_fmt = '>' + str(data_len) + 's' 178 self.backing_file_name = FieldsList([ 179 [data_fmt, self.header['backing_file_offset'][0].value, 180 backing_file_name, 'bf_name'] 181 ]) 182 183 def set_backing_file_format(self, backing_file_fmt=None): 184 """Generate the header extension for the backing file format.""" 185 if backing_file_fmt is not None: 186 # Calculation of the free space available in the first cluster 187 end_of_extension_area_len = 2 * UINT32_S 188 high_border = (self.header['backing_file_offset'][0].value or 189 (self.cluster_size - 1)) - \ 190 end_of_extension_area_len 191 free_space = high_border - self.ext_offset 192 ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7) 193 194 if free_space >= ext_size: 195 ext_data_len = len(backing_file_fmt) 196 ext_data_fmt = '>' + str(ext_data_len) + 's' 197 ext_padding_len = 7 - (ext_data_len - 1) % 8 198 self.backing_file_format = FieldsList([ 199 ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'], 200 ['>I', self.ext_offset + UINT32_S, ext_data_len, 201 'ext_length'], 202 [ext_data_fmt, self.ext_offset + UINT32_S * 2, 203 backing_file_fmt, 'bf_format'] 204 ]) 205 self.ext_offset = \ 206 struct.calcsize( 207 self.backing_file_format['bf_format'][0].fmt) + \ 208 ext_padding_len + \ 209 self.backing_file_format['bf_format'][0].offset 210 211 def create_feature_name_table(self): 212 """Generate a random header extension for names of features used in 213 the image. 214 """ 215 def gen_feat_ids(): 216 """Return random feature type and feature bit.""" 217 return (random.randint(0, 2), random.randint(0, 63)) 218 219 end_of_extension_area_len = 2 * UINT32_S 220 high_border = (self.header['backing_file_offset'][0].value or 221 (self.cluster_size - 1)) - \ 222 end_of_extension_area_len 223 free_space = high_border - self.ext_offset 224 # Sum of sizes of 'magic' and 'length' header extension fields 225 ext_header_len = 2 * UINT32_S 226 fnt_entry_size = 6 * UINT64_S 227 num_fnt_entries = min(10, (free_space - ext_header_len) / 228 fnt_entry_size) 229 if not num_fnt_entries == 0: 230 feature_tables = [] 231 feature_ids = [] 232 inner_offset = self.ext_offset + ext_header_len 233 feat_name = b'some cool feature' 234 while len(feature_tables) < num_fnt_entries * 3: 235 feat_type, feat_bit = gen_feat_ids() 236 # Remove duplicates 237 while (feat_type, feat_bit) in feature_ids: 238 feat_type, feat_bit = gen_feat_ids() 239 feature_ids.append((feat_type, feat_bit)) 240 feat_fmt = '>' + str(len(feat_name)) + 's' 241 feature_tables += [['B', inner_offset, 242 feat_type, 'feature_type'], 243 ['B', inner_offset + 1, feat_bit, 244 'feature_bit_number'], 245 [feat_fmt, inner_offset + 2, 246 feat_name, 'feature_name'] 247 ] 248 inner_offset += fnt_entry_size 249 # No padding for the extension is necessary, because 250 # the extension length is multiple of 8 251 self.feature_name_table = FieldsList([ 252 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'], 253 # One feature table contains 3 fields and takes 48 bytes 254 ['>I', self.ext_offset + UINT32_S, 255 len(feature_tables) // 3 * 48, 'ext_length'] 256 ] + feature_tables) 257 self.ext_offset = inner_offset 258 259 def set_end_of_extension_area(self): 260 """Generate a mandatory header extension marking end of header 261 extensions. 262 """ 263 self.end_of_extension_area = FieldsList([ 264 ['>I', self.ext_offset, 0, 'ext_magic'], 265 ['>I', self.ext_offset + UINT32_S, 0, 'ext_length'] 266 ]) 267 268 def create_l_structures(self): 269 """Generate random valid L1 and L2 tables.""" 270 def create_l2_entry(host, guest, l2_cluster): 271 """Generate one L2 entry.""" 272 offset = l2_cluster * self.cluster_size 273 l2_size = self.cluster_size // UINT64_S 274 entry_offset = offset + UINT64_S * (guest % l2_size) 275 cluster_descriptor = host * self.cluster_size 276 if not self.header['version'][0].value == 2: 277 cluster_descriptor += random.randint(0, 1) 278 # While snapshots are not supported, bit #63 = 1 279 # Compressed clusters are not supported => bit #62 = 0 280 entry_val = (1 << 63) + cluster_descriptor 281 return ['>Q', entry_offset, entry_val, 'l2_entry'] 282 283 def create_l1_entry(l2_cluster, l1_offset, guest): 284 """Generate one L1 entry.""" 285 l2_size = self.cluster_size // UINT64_S 286 entry_offset = l1_offset + UINT64_S * (guest // l2_size) 287 # While snapshots are not supported bit #63 = 1 288 entry_val = (1 << 63) + l2_cluster * self.cluster_size 289 return ['>Q', entry_offset, entry_val, 'l1_entry'] 290 291 if len(self.data_clusters) == 0: 292 # All metadata for an empty guest image needs 4 clusters: 293 # header, rfc table, rfc block, L1 table. 294 # Header takes cluster #0, other clusters ##1-3 can be used 295 l1_offset = random.randint(1, 3) * self.cluster_size 296 l1 = [['>Q', l1_offset, 0, 'l1_entry']] 297 l2 = [] 298 else: 299 meta_data = self._get_metadata() 300 guest_clusters = random.sample(range(self.image_size // 301 self.cluster_size), 302 len(self.data_clusters)) 303 # Number of entries in a L1/L2 table 304 l_size = self.cluster_size // UINT64_S 305 # Number of clusters necessary for L1 table 306 l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2))) 307 l1_start = self._get_adjacent_clusters(self.data_clusters | 308 meta_data, l1_size) 309 meta_data |= set(range(l1_start, l1_start + l1_size)) 310 l1_offset = l1_start * self.cluster_size 311 # Indices of L2 tables 312 l2_ids = [] 313 # Host clusters allocated for L2 tables 314 l2_clusters = [] 315 # L1 entries 316 l1 = [] 317 # L2 entries 318 l2 = [] 319 for host, guest in zip(self.data_clusters, guest_clusters): 320 l2_id = guest // l_size 321 if l2_id not in l2_ids: 322 l2_ids.append(l2_id) 323 l2_clusters.append(self._get_adjacent_clusters( 324 self.data_clusters | meta_data | set(l2_clusters), 325 1)) 326 l1.append(create_l1_entry(l2_clusters[-1], l1_offset, 327 guest)) 328 l2.append(create_l2_entry(host, guest, 329 l2_clusters[l2_ids.index(l2_id)])) 330 self.l2_tables = FieldsList(l2) 331 self.l1_table = FieldsList(l1) 332 self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size / 333 float(self.cluster_size**2))) 334 self.header['l1_table_offset'][0].value = l1_offset 335 336 def create_refcount_structures(self): 337 """Generate random refcount blocks and refcount table.""" 338 def allocate_rfc_blocks(data, size): 339 """Return indices of clusters allocated for refcount blocks.""" 340 cluster_ids = set() 341 diff = block_ids = set([x // size for x in data]) 342 while len(diff) != 0: 343 # Allocate all yet not allocated clusters 344 new = self._get_available_clusters(data | cluster_ids, 345 len(diff)) 346 # Indices of new refcount blocks necessary to cover clusters 347 # in 'new' 348 diff = set([x // size for x in new]) - block_ids 349 cluster_ids |= new 350 block_ids |= diff 351 return cluster_ids, block_ids 352 353 def allocate_rfc_table(data, init_blocks, block_size): 354 """Return indices of clusters allocated for the refcount table 355 and updated indices of clusters allocated for blocks and indices 356 of blocks. 357 """ 358 blocks = set(init_blocks) 359 clusters = set() 360 # Number of entries in one cluster of the refcount table 361 size = self.cluster_size // UINT64_S 362 # Number of clusters necessary for the refcount table based on 363 # the current number of refcount blocks 364 table_size = int(ceil((max(blocks) + 1) / float(size))) 365 # Index of the first cluster of the refcount table 366 table_start = self._get_adjacent_clusters(data, table_size + 1) 367 # Clusters allocated for the current length of the refcount table 368 table_clusters = set(range(table_start, table_start + table_size)) 369 # Clusters allocated for the refcount table including 370 # last optional one for potential l1 growth 371 table_clusters_allocated = set(range(table_start, table_start + 372 table_size + 1)) 373 # New refcount blocks necessary for clusters occupied by the 374 # refcount table 375 diff = set([c // block_size for c in table_clusters]) - blocks 376 blocks |= diff 377 while len(diff) != 0: 378 # Allocate clusters for new refcount blocks 379 new = self._get_available_clusters((data | clusters) | 380 table_clusters_allocated, 381 len(diff)) 382 # Indices of new refcount blocks necessary to cover 383 # clusters in 'new' 384 diff = set([x // block_size for x in new]) - blocks 385 clusters |= new 386 blocks |= diff 387 # Check if the refcount table needs one more cluster 388 if int(ceil((max(blocks) + 1) / float(size))) > table_size: 389 new_block_id = (table_start + table_size) // block_size 390 # Check if the additional table cluster needs 391 # one more refcount block 392 if new_block_id not in blocks: 393 diff.add(new_block_id) 394 table_clusters.add(table_start + table_size) 395 table_size += 1 396 return table_clusters, blocks, clusters 397 398 def create_table_entry(table_offset, block_cluster, block_size, 399 cluster): 400 """Generate a refcount table entry.""" 401 offset = table_offset + UINT64_S * (cluster // block_size) 402 return ['>Q', offset, block_cluster * self.cluster_size, 403 'refcount_table_entry'] 404 405 def create_block_entry(block_cluster, block_size, cluster): 406 """Generate a list of entries for the current block.""" 407 entry_size = self.cluster_size // block_size 408 offset = block_cluster * self.cluster_size 409 entry_offset = offset + entry_size * (cluster % block_size) 410 # While snapshots are not supported all refcounts are set to 1 411 return ['>H', entry_offset, 1, 'refcount_block_entry'] 412 # Size of a block entry in bits 413 refcount_bits = 1 << self.header['refcount_order'][0].value 414 # Number of refcount entries per refcount block 415 # Convert self.cluster_size from bytes to bits to have the same 416 # base for the numerator and denominator 417 block_size = self.cluster_size * 8 // refcount_bits 418 meta_data = self._get_metadata() 419 if len(self.data_clusters) == 0: 420 # All metadata for an empty guest image needs 4 clusters: 421 # header, rfc table, rfc block, L1 table. 422 # Header takes cluster #0, other clusters ##1-3 can be used 423 block_clusters = set([random.choice(list(set(range(1, 4)) - 424 meta_data))]) 425 block_ids = set([0]) 426 table_clusters = set([random.choice(list(set(range(1, 4)) - 427 meta_data - 428 block_clusters))]) 429 else: 430 block_clusters, block_ids = \ 431 allocate_rfc_blocks(self.data_clusters | 432 meta_data, block_size) 433 table_clusters, block_ids, new_clusters = \ 434 allocate_rfc_table(self.data_clusters | 435 meta_data | 436 block_clusters, 437 block_ids, 438 block_size) 439 block_clusters |= new_clusters 440 441 meta_data |= block_clusters | table_clusters 442 table_offset = min(table_clusters) * self.cluster_size 443 block_id = None 444 # Clusters allocated for refcount blocks 445 block_clusters = list(block_clusters) 446 # Indices of refcount blocks 447 block_ids = list(block_ids) 448 # Refcount table entries 449 rfc_table = [] 450 # Refcount entries 451 rfc_blocks = [] 452 453 for cluster in sorted(self.data_clusters | meta_data): 454 if cluster // block_size != block_id: 455 block_id = cluster // block_size 456 block_cluster = block_clusters[block_ids.index(block_id)] 457 rfc_table.append(create_table_entry(table_offset, 458 block_cluster, 459 block_size, cluster)) 460 rfc_blocks.append(create_block_entry(block_cluster, block_size, 461 cluster)) 462 self.refcount_table = FieldsList(rfc_table) 463 self.refcount_blocks = FieldsList(rfc_blocks) 464 465 self.header['refcount_table_offset'][0].value = table_offset 466 self.header['refcount_table_clusters'][0].value = len(table_clusters) 467 468 def fuzz(self, fields_to_fuzz=None): 469 """Fuzz an image by corrupting values of a random subset of its fields. 470 471 Without parameters the method fuzzes an entire image. 472 473 If 'fields_to_fuzz' is specified then only fields in this list will be 474 fuzzed. 'fields_to_fuzz' can contain both individual fields and more 475 general image elements as a header or tables. 476 477 In the first case the field will be fuzzed always. 478 In the second a random subset of fields will be selected and fuzzed. 479 """ 480 def coin(): 481 """Return boolean value proportional to a portion of fields to be 482 fuzzed. 483 """ 484 return random.random() < self.bias 485 486 if fields_to_fuzz is None: 487 for field in self: 488 if coin(): 489 field.value = getattr(fuzz, field.name)(field.value) 490 else: 491 for item in fields_to_fuzz: 492 if len(item) == 1: 493 for field in getattr(self, item[0]): 494 if coin(): 495 field.value = getattr(fuzz, 496 field.name)(field.value) 497 else: 498 # If fields with the requested name were not generated 499 # getattr(self, item[0])[item[1]] returns an empty list 500 for field in getattr(self, item[0])[item[1]]: 501 field.value = getattr(fuzz, field.name)(field.value) 502 503 def write(self, filename): 504 """Write an entire image to the file.""" 505 image_file = open(filename, 'wb') 506 for field in self: 507 image_file.seek(field.offset) 508 image_file.write(struct.pack(field.fmt, field.value)) 509 510 for cluster in sorted(self.data_clusters): 511 image_file.seek(cluster * self.cluster_size) 512 image_file.write(urandom(self.cluster_size)) 513 514 # Align the real image size to the cluster size 515 image_file.seek(0, 2) 516 size = image_file.tell() 517 rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1) 518 if rounded > size: 519 image_file.seek(rounded - 1) 520 image_file.write(b'\x00') 521 image_file.close() 522 523 @staticmethod 524 def _size_params(): 525 """Generate a random image size aligned to a random correct 526 cluster size. 527 """ 528 cluster_bits = random.randrange(9, 21) 529 cluster_size = 1 << cluster_bits 530 img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size) 531 return (cluster_bits, img_size) 532 533 @staticmethod 534 def _get_available_clusters(used, number): 535 """Return a set of indices of not allocated clusters. 536 537 'used' contains indices of currently allocated clusters. 538 All clusters that cannot be allocated between 'used' clusters will have 539 indices appended to the end of 'used'. 540 """ 541 append_id = max(used) + 1 542 free = set(range(1, append_id)) - used 543 if len(free) >= number: 544 return set(random.sample(free, number)) 545 else: 546 return free | set(range(append_id, append_id + number - len(free))) 547 548 @staticmethod 549 def _get_adjacent_clusters(used, size): 550 """Return an index of the first cluster in the sequence of free ones. 551 552 'used' contains indices of currently allocated clusters. 'size' is the 553 length of the sequence of free clusters. 554 If the sequence of 'size' is not available between 'used' clusters, its 555 first index will be append to the end of 'used'. 556 """ 557 def get_cluster_id(lst, length): 558 """Return the first index of the sequence of the specified length 559 or None if the sequence cannot be inserted in the list. 560 """ 561 if len(lst) != 0: 562 pairs = [] 563 pair = (lst[0], 1) 564 for i in range(1, len(lst)): 565 if lst[i] == lst[i-1] + 1: 566 pair = (lst[i], pair[1] + 1) 567 else: 568 pairs.append(pair) 569 pair = (lst[i], 1) 570 pairs.append(pair) 571 random.shuffle(pairs) 572 for x, s in pairs: 573 if s >= length: 574 return x - length + 1 575 return None 576 577 append_id = max(used) + 1 578 free = list(set(range(1, append_id)) - used) 579 idx = get_cluster_id(free, size) 580 if idx is None: 581 return append_id 582 else: 583 return idx 584 585 @staticmethod 586 def _alloc_data(img_size, cluster_size): 587 """Return a set of random indices of clusters allocated for guest data. 588 """ 589 num_of_cls = img_size // cluster_size 590 return set(random.sample(range(1, num_of_cls + 1), 591 random.randint(0, num_of_cls))) 592 593 def _get_metadata(self): 594 """Return indices of clusters allocated for image metadata.""" 595 ids = set() 596 for x in self: 597 ids.add(x.offset // self.cluster_size) 598 return ids 599 600 601def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None, 602 fields_to_fuzz=None): 603 """Create a fuzzed image and write it to the specified file.""" 604 image = Image(backing_file_name.encode()) 605 image.set_backing_file_format(backing_file_fmt.encode()) 606 image.create_feature_name_table() 607 image.set_end_of_extension_area() 608 image.create_l_structures() 609 image.create_refcount_structures() 610 image.fuzz(fields_to_fuzz) 611 image.write(test_img_path) 612 return image.image_size 613