1# Generator of fuzzed qcow2 images 2# 3# Copyright (C) 2014 Maria Kustova <maria.k@catit.be> 4# 5# This program is free software: you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation, either version 2 of the License, or 8# (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program. If not, see <http://www.gnu.org/licenses/>. 17# 18 19from __future__ import absolute_import 20import random 21import struct 22from . import fuzz 23from math import ceil 24from os import urandom 25from itertools import chain 26 27MAX_IMAGE_SIZE = 10 * (1 << 20) 28# Standard sizes 29UINT32_S = 4 30UINT64_S = 8 31 32 33class Field(object): 34 35 """Atomic image element (field). 36 37 The class represents an image field as quadruple of a data format 38 of value necessary for its packing to binary form, an offset from 39 the beginning of the image, a value and a name. 40 41 The field can be iterated as a list [format, offset, value, name]. 42 """ 43 44 __slots__ = ('fmt', 'offset', 'value', 'name') 45 46 def __init__(self, fmt, offset, val, name): 47 self.fmt = fmt 48 self.offset = offset 49 self.value = val 50 self.name = name 51 52 def __iter__(self): 53 return iter([self.fmt, self.offset, self.value, self.name]) 54 55 def __repr__(self): 56 return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \ 57 (self.fmt, self.offset, str(self.value), self.name) 58 59 60class FieldsList(object): 61 62 """List of fields. 63 64 The class allows access to a field in the list by its name. 65 """ 66 67 def __init__(self, meta_data=None): 68 if meta_data is None: 69 self.data = [] 70 else: 71 self.data = [Field(*f) 72 for f in meta_data] 73 74 def __getitem__(self, name): 75 return [x for x in self.data if x.name == name] 76 77 def __iter__(self): 78 return iter(self.data) 79 80 def __len__(self): 81 return len(self.data) 82 83 84class Image(object): 85 86 """ Qcow2 image object. 87 88 This class allows to create qcow2 images with random valid structures and 89 values, fuzz them via external qcow2.fuzz module and write the result to 90 a file. 91 """ 92 93 def __init__(self, backing_file_name=None): 94 """Create a random valid qcow2 image with the correct header and stored 95 backing file name. 96 """ 97 cluster_bits, self.image_size = self._size_params() 98 self.cluster_size = 1 << cluster_bits 99 self.header = FieldsList() 100 self.backing_file_name = FieldsList() 101 self.backing_file_format = FieldsList() 102 self.feature_name_table = FieldsList() 103 self.end_of_extension_area = FieldsList() 104 self.l2_tables = FieldsList() 105 self.l1_table = FieldsList() 106 self.refcount_table = FieldsList() 107 self.refcount_blocks = FieldsList() 108 self.ext_offset = 0 109 self.create_header(cluster_bits, backing_file_name) 110 self.set_backing_file_name(backing_file_name) 111 self.data_clusters = self._alloc_data(self.image_size, 112 self.cluster_size) 113 # Percentage of fields will be fuzzed 114 self.bias = random.uniform(0.2, 0.5) 115 116 def __iter__(self): 117 return chain(self.header, self.backing_file_format, 118 self.feature_name_table, self.end_of_extension_area, 119 self.backing_file_name, self.l1_table, self.l2_tables, 120 self.refcount_table, self.refcount_blocks) 121 122 def create_header(self, cluster_bits, backing_file_name=None): 123 """Generate a random valid header.""" 124 meta_header = [ 125 ['>4s', 0, "QFI\xfb", 'magic'], 126 ['>I', 4, random.randint(2, 3), 'version'], 127 ['>Q', 8, 0, 'backing_file_offset'], 128 ['>I', 16, 0, 'backing_file_size'], 129 ['>I', 20, cluster_bits, 'cluster_bits'], 130 ['>Q', 24, self.image_size, 'size'], 131 ['>I', 32, 0, 'crypt_method'], 132 ['>I', 36, 0, 'l1_size'], 133 ['>Q', 40, 0, 'l1_table_offset'], 134 ['>Q', 48, 0, 'refcount_table_offset'], 135 ['>I', 56, 0, 'refcount_table_clusters'], 136 ['>I', 60, 0, 'nb_snapshots'], 137 ['>Q', 64, 0, 'snapshots_offset'], 138 ['>Q', 72, 0, 'incompatible_features'], 139 ['>Q', 80, 0, 'compatible_features'], 140 ['>Q', 88, 0, 'autoclear_features'], 141 # Only refcount_order = 4 is supported by current (07.2014) 142 # implementation of QEMU 143 ['>I', 96, 4, 'refcount_order'], 144 ['>I', 100, 0, 'header_length'] 145 ] 146 self.header = FieldsList(meta_header) 147 148 if self.header['version'][0].value == 2: 149 self.header['header_length'][0].value = 72 150 else: 151 self.header['incompatible_features'][0].value = \ 152 random.getrandbits(2) 153 self.header['compatible_features'][0].value = random.getrandbits(1) 154 self.header['header_length'][0].value = 104 155 # Extensions start at the header last field offset and the field size 156 self.ext_offset = struct.calcsize( 157 self.header['header_length'][0].fmt) + \ 158 self.header['header_length'][0].offset 159 end_of_extension_area_len = 2 * UINT32_S 160 free_space = self.cluster_size - self.ext_offset - \ 161 end_of_extension_area_len 162 # If the backing file name specified and there is enough space for it 163 # in the first cluster, then it's placed in the very end of the first 164 # cluster. 165 if (backing_file_name is not None) and \ 166 (free_space >= len(backing_file_name)): 167 self.header['backing_file_size'][0].value = len(backing_file_name) 168 self.header['backing_file_offset'][0].value = \ 169 self.cluster_size - len(backing_file_name) 170 171 def set_backing_file_name(self, backing_file_name=None): 172 """Add the name of the backing file at the offset specified 173 in the header. 174 """ 175 if (backing_file_name is not None) and \ 176 (not self.header['backing_file_offset'][0].value == 0): 177 data_len = len(backing_file_name) 178 data_fmt = '>' + str(data_len) + 's' 179 self.backing_file_name = FieldsList([ 180 [data_fmt, self.header['backing_file_offset'][0].value, 181 backing_file_name, 'bf_name'] 182 ]) 183 184 def set_backing_file_format(self, backing_file_fmt=None): 185 """Generate the header extension for the backing file format.""" 186 if backing_file_fmt is not None: 187 # Calculation of the free space available in the first cluster 188 end_of_extension_area_len = 2 * UINT32_S 189 high_border = (self.header['backing_file_offset'][0].value or 190 (self.cluster_size - 1)) - \ 191 end_of_extension_area_len 192 free_space = high_border - self.ext_offset 193 ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7) 194 195 if free_space >= ext_size: 196 ext_data_len = len(backing_file_fmt) 197 ext_data_fmt = '>' + str(ext_data_len) + 's' 198 ext_padding_len = 7 - (ext_data_len - 1) % 8 199 self.backing_file_format = FieldsList([ 200 ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'], 201 ['>I', self.ext_offset + UINT32_S, ext_data_len, 202 'ext_length'], 203 [ext_data_fmt, self.ext_offset + UINT32_S * 2, 204 backing_file_fmt, 'bf_format'] 205 ]) 206 self.ext_offset = \ 207 struct.calcsize( 208 self.backing_file_format['bf_format'][0].fmt) + \ 209 ext_padding_len + \ 210 self.backing_file_format['bf_format'][0].offset 211 212 def create_feature_name_table(self): 213 """Generate a random header extension for names of features used in 214 the image. 215 """ 216 def gen_feat_ids(): 217 """Return random feature type and feature bit.""" 218 return (random.randint(0, 2), random.randint(0, 63)) 219 220 end_of_extension_area_len = 2 * UINT32_S 221 high_border = (self.header['backing_file_offset'][0].value or 222 (self.cluster_size - 1)) - \ 223 end_of_extension_area_len 224 free_space = high_border - self.ext_offset 225 # Sum of sizes of 'magic' and 'length' header extension fields 226 ext_header_len = 2 * UINT32_S 227 fnt_entry_size = 6 * UINT64_S 228 num_fnt_entries = min(10, (free_space - ext_header_len) / 229 fnt_entry_size) 230 if not num_fnt_entries == 0: 231 feature_tables = [] 232 feature_ids = [] 233 inner_offset = self.ext_offset + ext_header_len 234 feat_name = 'some cool feature' 235 while len(feature_tables) < num_fnt_entries * 3: 236 feat_type, feat_bit = gen_feat_ids() 237 # Remove duplicates 238 while (feat_type, feat_bit) in feature_ids: 239 feat_type, feat_bit = gen_feat_ids() 240 feature_ids.append((feat_type, feat_bit)) 241 feat_fmt = '>' + str(len(feat_name)) + 's' 242 feature_tables += [['B', inner_offset, 243 feat_type, 'feature_type'], 244 ['B', inner_offset + 1, feat_bit, 245 'feature_bit_number'], 246 [feat_fmt, inner_offset + 2, 247 feat_name, 'feature_name'] 248 ] 249 inner_offset += fnt_entry_size 250 # No padding for the extension is necessary, because 251 # the extension length is multiple of 8 252 self.feature_name_table = FieldsList([ 253 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'], 254 # One feature table contains 3 fields and takes 48 bytes 255 ['>I', self.ext_offset + UINT32_S, 256 len(feature_tables) / 3 * 48, 'ext_length'] 257 ] + feature_tables) 258 self.ext_offset = inner_offset 259 260 def set_end_of_extension_area(self): 261 """Generate a mandatory header extension marking end of header 262 extensions. 263 """ 264 self.end_of_extension_area = FieldsList([ 265 ['>I', self.ext_offset, 0, 'ext_magic'], 266 ['>I', self.ext_offset + UINT32_S, 0, 'ext_length'] 267 ]) 268 269 def create_l_structures(self): 270 """Generate random valid L1 and L2 tables.""" 271 def create_l2_entry(host, guest, l2_cluster): 272 """Generate one L2 entry.""" 273 offset = l2_cluster * self.cluster_size 274 l2_size = self.cluster_size / UINT64_S 275 entry_offset = offset + UINT64_S * (guest % l2_size) 276 cluster_descriptor = host * self.cluster_size 277 if not self.header['version'][0].value == 2: 278 cluster_descriptor += random.randint(0, 1) 279 # While snapshots are not supported, bit #63 = 1 280 # Compressed clusters are not supported => bit #62 = 0 281 entry_val = (1 << 63) + cluster_descriptor 282 return ['>Q', entry_offset, entry_val, 'l2_entry'] 283 284 def create_l1_entry(l2_cluster, l1_offset, guest): 285 """Generate one L1 entry.""" 286 l2_size = self.cluster_size / UINT64_S 287 entry_offset = l1_offset + UINT64_S * (guest / l2_size) 288 # While snapshots are not supported bit #63 = 1 289 entry_val = (1 << 63) + l2_cluster * self.cluster_size 290 return ['>Q', entry_offset, entry_val, 'l1_entry'] 291 292 if len(self.data_clusters) == 0: 293 # All metadata for an empty guest image needs 4 clusters: 294 # header, rfc table, rfc block, L1 table. 295 # Header takes cluster #0, other clusters ##1-3 can be used 296 l1_offset = random.randint(1, 3) * self.cluster_size 297 l1 = [['>Q', l1_offset, 0, 'l1_entry']] 298 l2 = [] 299 else: 300 meta_data = self._get_metadata() 301 guest_clusters = random.sample(range(self.image_size / 302 self.cluster_size), 303 len(self.data_clusters)) 304 # Number of entries in a L1/L2 table 305 l_size = self.cluster_size / UINT64_S 306 # Number of clusters necessary for L1 table 307 l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2))) 308 l1_start = self._get_adjacent_clusters(self.data_clusters | 309 meta_data, l1_size) 310 meta_data |= set(range(l1_start, l1_start + l1_size)) 311 l1_offset = l1_start * self.cluster_size 312 # Indices of L2 tables 313 l2_ids = [] 314 # Host clusters allocated for L2 tables 315 l2_clusters = [] 316 # L1 entries 317 l1 = [] 318 # L2 entries 319 l2 = [] 320 for host, guest in zip(self.data_clusters, guest_clusters): 321 l2_id = guest / l_size 322 if l2_id not in l2_ids: 323 l2_ids.append(l2_id) 324 l2_clusters.append(self._get_adjacent_clusters( 325 self.data_clusters | meta_data | set(l2_clusters), 326 1)) 327 l1.append(create_l1_entry(l2_clusters[-1], l1_offset, 328 guest)) 329 l2.append(create_l2_entry(host, guest, 330 l2_clusters[l2_ids.index(l2_id)])) 331 self.l2_tables = FieldsList(l2) 332 self.l1_table = FieldsList(l1) 333 self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size / 334 float(self.cluster_size**2))) 335 self.header['l1_table_offset'][0].value = l1_offset 336 337 def create_refcount_structures(self): 338 """Generate random refcount blocks and refcount table.""" 339 def allocate_rfc_blocks(data, size): 340 """Return indices of clusters allocated for refcount blocks.""" 341 cluster_ids = set() 342 diff = block_ids = set([x / size for x in data]) 343 while len(diff) != 0: 344 # Allocate all yet not allocated clusters 345 new = self._get_available_clusters(data | cluster_ids, 346 len(diff)) 347 # Indices of new refcount blocks necessary to cover clusters 348 # in 'new' 349 diff = set([x / size for x in new]) - block_ids 350 cluster_ids |= new 351 block_ids |= diff 352 return cluster_ids, block_ids 353 354 def allocate_rfc_table(data, init_blocks, block_size): 355 """Return indices of clusters allocated for the refcount table 356 and updated indices of clusters allocated for blocks and indices 357 of blocks. 358 """ 359 blocks = set(init_blocks) 360 clusters = set() 361 # Number of entries in one cluster of the refcount table 362 size = self.cluster_size / UINT64_S 363 # Number of clusters necessary for the refcount table based on 364 # the current number of refcount blocks 365 table_size = int(ceil((max(blocks) + 1) / float(size))) 366 # Index of the first cluster of the refcount table 367 table_start = self._get_adjacent_clusters(data, table_size + 1) 368 # Clusters allocated for the current length of the refcount table 369 table_clusters = set(range(table_start, table_start + table_size)) 370 # Clusters allocated for the refcount table including 371 # last optional one for potential l1 growth 372 table_clusters_allocated = set(range(table_start, table_start + 373 table_size + 1)) 374 # New refcount blocks necessary for clusters occupied by the 375 # refcount table 376 diff = set([c / block_size for c in table_clusters]) - blocks 377 blocks |= diff 378 while len(diff) != 0: 379 # Allocate clusters for new refcount blocks 380 new = self._get_available_clusters((data | clusters) | 381 table_clusters_allocated, 382 len(diff)) 383 # Indices of new refcount blocks necessary to cover 384 # clusters in 'new' 385 diff = set([x / block_size for x in new]) - blocks 386 clusters |= new 387 blocks |= diff 388 # Check if the refcount table needs one more cluster 389 if int(ceil((max(blocks) + 1) / float(size))) > table_size: 390 new_block_id = (table_start + table_size) / block_size 391 # Check if the additional table cluster needs 392 # one more refcount block 393 if new_block_id not in blocks: 394 diff.add(new_block_id) 395 table_clusters.add(table_start + table_size) 396 table_size += 1 397 return table_clusters, blocks, clusters 398 399 def create_table_entry(table_offset, block_cluster, block_size, 400 cluster): 401 """Generate a refcount table entry.""" 402 offset = table_offset + UINT64_S * (cluster / block_size) 403 return ['>Q', offset, block_cluster * self.cluster_size, 404 'refcount_table_entry'] 405 406 def create_block_entry(block_cluster, block_size, cluster): 407 """Generate a list of entries for the current block.""" 408 entry_size = self.cluster_size / block_size 409 offset = block_cluster * self.cluster_size 410 entry_offset = offset + entry_size * (cluster % block_size) 411 # While snapshots are not supported all refcounts are set to 1 412 return ['>H', entry_offset, 1, 'refcount_block_entry'] 413 # Size of a block entry in bits 414 refcount_bits = 1 << self.header['refcount_order'][0].value 415 # Number of refcount entries per refcount block 416 # Convert self.cluster_size from bytes to bits to have the same 417 # base for the numerator and denominator 418 block_size = self.cluster_size * 8 / refcount_bits 419 meta_data = self._get_metadata() 420 if len(self.data_clusters) == 0: 421 # All metadata for an empty guest image needs 4 clusters: 422 # header, rfc table, rfc block, L1 table. 423 # Header takes cluster #0, other clusters ##1-3 can be used 424 block_clusters = set([random.choice(list(set(range(1, 4)) - 425 meta_data))]) 426 block_ids = set([0]) 427 table_clusters = set([random.choice(list(set(range(1, 4)) - 428 meta_data - 429 block_clusters))]) 430 else: 431 block_clusters, block_ids = \ 432 allocate_rfc_blocks(self.data_clusters | 433 meta_data, block_size) 434 table_clusters, block_ids, new_clusters = \ 435 allocate_rfc_table(self.data_clusters | 436 meta_data | 437 block_clusters, 438 block_ids, 439 block_size) 440 block_clusters |= new_clusters 441 442 meta_data |= block_clusters | table_clusters 443 table_offset = min(table_clusters) * self.cluster_size 444 block_id = None 445 # Clusters allocated for refcount blocks 446 block_clusters = list(block_clusters) 447 # Indices of refcount blocks 448 block_ids = list(block_ids) 449 # Refcount table entries 450 rfc_table = [] 451 # Refcount entries 452 rfc_blocks = [] 453 454 for cluster in sorted(self.data_clusters | meta_data): 455 if cluster / block_size != block_id: 456 block_id = cluster / block_size 457 block_cluster = block_clusters[block_ids.index(block_id)] 458 rfc_table.append(create_table_entry(table_offset, 459 block_cluster, 460 block_size, cluster)) 461 rfc_blocks.append(create_block_entry(block_cluster, block_size, 462 cluster)) 463 self.refcount_table = FieldsList(rfc_table) 464 self.refcount_blocks = FieldsList(rfc_blocks) 465 466 self.header['refcount_table_offset'][0].value = table_offset 467 self.header['refcount_table_clusters'][0].value = len(table_clusters) 468 469 def fuzz(self, fields_to_fuzz=None): 470 """Fuzz an image by corrupting values of a random subset of its fields. 471 472 Without parameters the method fuzzes an entire image. 473 474 If 'fields_to_fuzz' is specified then only fields in this list will be 475 fuzzed. 'fields_to_fuzz' can contain both individual fields and more 476 general image elements as a header or tables. 477 478 In the first case the field will be fuzzed always. 479 In the second a random subset of fields will be selected and fuzzed. 480 """ 481 def coin(): 482 """Return boolean value proportional to a portion of fields to be 483 fuzzed. 484 """ 485 return random.random() < self.bias 486 487 if fields_to_fuzz is None: 488 for field in self: 489 if coin(): 490 field.value = getattr(fuzz, field.name)(field.value) 491 else: 492 for item in fields_to_fuzz: 493 if len(item) == 1: 494 for field in getattr(self, item[0]): 495 if coin(): 496 field.value = getattr(fuzz, 497 field.name)(field.value) 498 else: 499 # If fields with the requested name were not generated 500 # getattr(self, item[0])[item[1]] returns an empty list 501 for field in getattr(self, item[0])[item[1]]: 502 field.value = getattr(fuzz, field.name)(field.value) 503 504 def write(self, filename): 505 """Write an entire image to the file.""" 506 image_file = open(filename, 'w') 507 for field in self: 508 image_file.seek(field.offset) 509 image_file.write(struct.pack(field.fmt, field.value)) 510 511 for cluster in sorted(self.data_clusters): 512 image_file.seek(cluster * self.cluster_size) 513 image_file.write(urandom(self.cluster_size)) 514 515 # Align the real image size to the cluster size 516 image_file.seek(0, 2) 517 size = image_file.tell() 518 rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1) 519 if rounded > size: 520 image_file.seek(rounded - 1) 521 image_file.write("\0") 522 image_file.close() 523 524 @staticmethod 525 def _size_params(): 526 """Generate a random image size aligned to a random correct 527 cluster size. 528 """ 529 cluster_bits = random.randrange(9, 21) 530 cluster_size = 1 << cluster_bits 531 img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size) 532 return (cluster_bits, img_size) 533 534 @staticmethod 535 def _get_available_clusters(used, number): 536 """Return a set of indices of not allocated clusters. 537 538 'used' contains indices of currently allocated clusters. 539 All clusters that cannot be allocated between 'used' clusters will have 540 indices appended to the end of 'used'. 541 """ 542 append_id = max(used) + 1 543 free = set(range(1, append_id)) - used 544 if len(free) >= number: 545 return set(random.sample(free, number)) 546 else: 547 return free | set(range(append_id, append_id + number - len(free))) 548 549 @staticmethod 550 def _get_adjacent_clusters(used, size): 551 """Return an index of the first cluster in the sequence of free ones. 552 553 'used' contains indices of currently allocated clusters. 'size' is the 554 length of the sequence of free clusters. 555 If the sequence of 'size' is not available between 'used' clusters, its 556 first index will be append to the end of 'used'. 557 """ 558 def get_cluster_id(lst, length): 559 """Return the first index of the sequence of the specified length 560 or None if the sequence cannot be inserted in the list. 561 """ 562 if len(lst) != 0: 563 pairs = [] 564 pair = (lst[0], 1) 565 for i in range(1, len(lst)): 566 if lst[i] == lst[i-1] + 1: 567 pair = (lst[i], pair[1] + 1) 568 else: 569 pairs.append(pair) 570 pair = (lst[i], 1) 571 pairs.append(pair) 572 random.shuffle(pairs) 573 for x, s in pairs: 574 if s >= length: 575 return x - length + 1 576 return None 577 578 append_id = max(used) + 1 579 free = list(set(range(1, append_id)) - used) 580 idx = get_cluster_id(free, size) 581 if idx is None: 582 return append_id 583 else: 584 return idx 585 586 @staticmethod 587 def _alloc_data(img_size, cluster_size): 588 """Return a set of random indices of clusters allocated for guest data. 589 """ 590 num_of_cls = img_size/cluster_size 591 return set(random.sample(range(1, num_of_cls + 1), 592 random.randint(0, num_of_cls))) 593 594 def _get_metadata(self): 595 """Return indices of clusters allocated for image metadata.""" 596 ids = set() 597 for x in self: 598 ids.add(x.offset/self.cluster_size) 599 return ids 600 601 602def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None, 603 fields_to_fuzz=None): 604 """Create a fuzzed image and write it to the specified file.""" 605 image = Image(backing_file_name) 606 image.set_backing_file_format(backing_file_fmt) 607 image.create_feature_name_table() 608 image.set_end_of_extension_area() 609 image.create_l_structures() 610 image.create_refcount_structures() 611 image.fuzz(fields_to_fuzz) 612 image.write(test_img_path) 613 return image.image_size 614