1#!/usr/bin/env python3 2# 3# Migration Stream Analyzer 4# 5# Copyright (c) 2015 Alexander Graf <agraf@suse.de> 6# 7# This library is free software; you can redistribute it and/or 8# modify it under the terms of the GNU Lesser General Public 9# License as published by the Free Software Foundation; either 10# version 2 of the License, or (at your option) any later version. 11# 12# This library is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15# Lesser General Public License for more details. 16# 17# You should have received a copy of the GNU Lesser General Public 18# License along with this library; if not, see <http://www.gnu.org/licenses/>. 19 20import json 21import os 22import argparse 23import collections 24import struct 25import sys 26 27 28MIN_PYTHON = (3, 2) 29if sys.version_info < MIN_PYTHON: 30 sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON) 31 32 33def mkdir_p(path): 34 try: 35 os.makedirs(path) 36 except OSError: 37 pass 38 39 40class MigrationFile(object): 41 def __init__(self, filename): 42 self.filename = filename 43 self.file = open(self.filename, "rb") 44 45 def read64(self): 46 return int.from_bytes(self.file.read(8), byteorder='big', signed=True) 47 48 def read32(self): 49 return int.from_bytes(self.file.read(4), byteorder='big', signed=True) 50 51 def read16(self): 52 return int.from_bytes(self.file.read(2), byteorder='big', signed=True) 53 54 def read8(self): 55 return int.from_bytes(self.file.read(1), byteorder='big', signed=True) 56 57 def readstr(self, len = None): 58 return self.readvar(len).decode('utf-8') 59 60 def readvar(self, size = None): 61 if size is None: 62 size = self.read8() 63 if size == 0: 64 return "" 65 value = self.file.read(size) 66 if len(value) != size: 67 raise Exception("Unexpected end of %s at 0x%x" % (self.filename, self.file.tell())) 68 return value 69 70 def tell(self): 71 return self.file.tell() 72 73 # The VMSD description is at the end of the file, after EOF. Look for 74 # the last NULL byte, then for the beginning brace of JSON. 75 def read_migration_debug_json(self): 76 QEMU_VM_VMDESCRIPTION = 0x06 77 78 # Remember the offset in the file when we started 79 entrypos = self.file.tell() 80 81 # Read the last 10MB 82 self.file.seek(0, os.SEEK_END) 83 endpos = self.file.tell() 84 self.file.seek(max(-endpos, -10 * 1024 * 1024), os.SEEK_END) 85 datapos = self.file.tell() 86 data = self.file.read() 87 # The full file read closed the file as well, reopen it 88 self.file = open(self.filename, "rb") 89 90 # Find the last NULL byte, then the first brace after that. This should 91 # be the beginning of our JSON data. 92 nulpos = data.rfind(b'\0') 93 jsonpos = data.find(b'{', nulpos) 94 95 # Check backwards from there and see whether we guessed right 96 self.file.seek(datapos + jsonpos - 5, 0) 97 if self.read8() != QEMU_VM_VMDESCRIPTION: 98 raise Exception("No Debug Migration device found") 99 100 jsonlen = self.read32() 101 102 # Seek back to where we were at the beginning 103 self.file.seek(entrypos, 0) 104 105 return data[jsonpos:jsonpos + jsonlen] 106 107 def close(self): 108 self.file.close() 109 110class RamSection(object): 111 RAM_SAVE_FLAG_COMPRESS = 0x02 112 RAM_SAVE_FLAG_MEM_SIZE = 0x04 113 RAM_SAVE_FLAG_PAGE = 0x08 114 RAM_SAVE_FLAG_EOS = 0x10 115 RAM_SAVE_FLAG_CONTINUE = 0x20 116 RAM_SAVE_FLAG_XBZRLE = 0x40 117 RAM_SAVE_FLAG_HOOK = 0x80 118 119 def __init__(self, file, version_id, ramargs, section_key): 120 if version_id != 4: 121 raise Exception("Unknown RAM version %d" % version_id) 122 123 self.file = file 124 self.section_key = section_key 125 self.TARGET_PAGE_SIZE = ramargs['page_size'] 126 self.dump_memory = ramargs['dump_memory'] 127 self.write_memory = ramargs['write_memory'] 128 self.sizeinfo = collections.OrderedDict() 129 self.data = collections.OrderedDict() 130 self.data['section sizes'] = self.sizeinfo 131 self.name = '' 132 if self.write_memory: 133 self.files = { } 134 if self.dump_memory: 135 self.memory = collections.OrderedDict() 136 self.data['memory'] = self.memory 137 138 def __repr__(self): 139 return self.data.__repr__() 140 141 def __str__(self): 142 return self.data.__str__() 143 144 def getDict(self): 145 return self.data 146 147 def read(self): 148 # Read all RAM sections 149 while True: 150 addr = self.file.read64() 151 flags = addr & (self.TARGET_PAGE_SIZE - 1) 152 addr &= ~(self.TARGET_PAGE_SIZE - 1) 153 154 if flags & self.RAM_SAVE_FLAG_MEM_SIZE: 155 while True: 156 namelen = self.file.read8() 157 # We assume that no RAM chunk is big enough to ever 158 # hit the first byte of the address, so when we see 159 # a zero here we know it has to be an address, not the 160 # length of the next block. 161 if namelen == 0: 162 self.file.file.seek(-1, 1) 163 break 164 self.name = self.file.readstr(len = namelen) 165 len = self.file.read64() 166 self.sizeinfo[self.name] = '0x%016x' % len 167 if self.write_memory: 168 print(self.name) 169 mkdir_p('./' + os.path.dirname(self.name)) 170 f = open('./' + self.name, "wb") 171 f.truncate(0) 172 f.truncate(len) 173 self.files[self.name] = f 174 flags &= ~self.RAM_SAVE_FLAG_MEM_SIZE 175 176 if flags & self.RAM_SAVE_FLAG_COMPRESS: 177 if flags & self.RAM_SAVE_FLAG_CONTINUE: 178 flags &= ~self.RAM_SAVE_FLAG_CONTINUE 179 else: 180 self.name = self.file.readstr() 181 fill_char = self.file.read8() 182 # The page in question is filled with fill_char now 183 if self.write_memory and fill_char != 0: 184 self.files[self.name].seek(addr, os.SEEK_SET) 185 self.files[self.name].write(chr(fill_char) * self.TARGET_PAGE_SIZE) 186 if self.dump_memory: 187 self.memory['%s (0x%016x)' % (self.name, addr)] = 'Filled with 0x%02x' % fill_char 188 flags &= ~self.RAM_SAVE_FLAG_COMPRESS 189 elif flags & self.RAM_SAVE_FLAG_PAGE: 190 if flags & self.RAM_SAVE_FLAG_CONTINUE: 191 flags &= ~self.RAM_SAVE_FLAG_CONTINUE 192 else: 193 self.name = self.file.readstr() 194 195 if self.write_memory or self.dump_memory: 196 data = self.file.readvar(size = self.TARGET_PAGE_SIZE) 197 else: # Just skip RAM data 198 self.file.file.seek(self.TARGET_PAGE_SIZE, 1) 199 200 if self.write_memory: 201 self.files[self.name].seek(addr, os.SEEK_SET) 202 self.files[self.name].write(data) 203 if self.dump_memory: 204 hexdata = " ".join("{0:02x}".format(ord(c)) for c in data) 205 self.memory['%s (0x%016x)' % (self.name, addr)] = hexdata 206 207 flags &= ~self.RAM_SAVE_FLAG_PAGE 208 elif flags & self.RAM_SAVE_FLAG_XBZRLE: 209 raise Exception("XBZRLE RAM compression is not supported yet") 210 elif flags & self.RAM_SAVE_FLAG_HOOK: 211 raise Exception("RAM hooks don't make sense with files") 212 213 # End of RAM section 214 if flags & self.RAM_SAVE_FLAG_EOS: 215 break 216 217 if flags != 0: 218 raise Exception("Unknown RAM flags: %x" % flags) 219 220 def __del__(self): 221 if self.write_memory: 222 for key in self.files: 223 self.files[key].close() 224 225 226class HTABSection(object): 227 HASH_PTE_SIZE_64 = 16 228 229 def __init__(self, file, version_id, device, section_key): 230 if version_id != 1: 231 raise Exception("Unknown HTAB version %d" % version_id) 232 233 self.file = file 234 self.section_key = section_key 235 236 def read(self): 237 238 header = self.file.read32() 239 240 if (header == -1): 241 # "no HPT" encoding 242 return 243 244 if (header > 0): 245 # First section, just the hash shift 246 return 247 248 # Read until end marker 249 while True: 250 index = self.file.read32() 251 n_valid = self.file.read16() 252 n_invalid = self.file.read16() 253 254 if index == 0 and n_valid == 0 and n_invalid == 0: 255 break 256 257 self.file.readvar(n_valid * self.HASH_PTE_SIZE_64) 258 259 def getDict(self): 260 return "" 261 262 263class ConfigurationSection(object): 264 def __init__(self, file): 265 self.file = file 266 267 def read(self): 268 name_len = self.file.read32() 269 name = self.file.readstr(len = name_len) 270 271class VMSDFieldGeneric(object): 272 def __init__(self, desc, file): 273 self.file = file 274 self.desc = desc 275 self.data = "" 276 277 def __repr__(self): 278 return str(self.__str__()) 279 280 def __str__(self): 281 return " ".join("{0:02x}".format(c) for c in self.data) 282 283 def getDict(self): 284 return self.__str__() 285 286 def read(self): 287 size = int(self.desc['size']) 288 self.data = self.file.readvar(size) 289 return self.data 290 291class VMSDFieldInt(VMSDFieldGeneric): 292 def __init__(self, desc, file): 293 super(VMSDFieldInt, self).__init__(desc, file) 294 self.size = int(desc['size']) 295 self.format = '0x%%0%dx' % (self.size * 2) 296 self.sdtype = '>i%d' % self.size 297 self.udtype = '>u%d' % self.size 298 299 def __repr__(self): 300 if self.data < 0: 301 return ('%s (%d)' % ((self.format % self.udata), self.data)) 302 else: 303 return self.format % self.data 304 305 def __str__(self): 306 return self.__repr__() 307 308 def getDict(self): 309 return self.__str__() 310 311 def read(self): 312 super(VMSDFieldInt, self).read() 313 self.sdata = int.from_bytes(self.data, byteorder='big', signed=True) 314 self.udata = int.from_bytes(self.data, byteorder='big', signed=False) 315 self.data = self.sdata 316 return self.data 317 318class VMSDFieldUInt(VMSDFieldInt): 319 def __init__(self, desc, file): 320 super(VMSDFieldUInt, self).__init__(desc, file) 321 322 def read(self): 323 super(VMSDFieldUInt, self).read() 324 self.data = self.udata 325 return self.data 326 327class VMSDFieldIntLE(VMSDFieldInt): 328 def __init__(self, desc, file): 329 super(VMSDFieldIntLE, self).__init__(desc, file) 330 self.dtype = '<i%d' % self.size 331 332class VMSDFieldBool(VMSDFieldGeneric): 333 def __init__(self, desc, file): 334 super(VMSDFieldBool, self).__init__(desc, file) 335 336 def __repr__(self): 337 return self.data.__repr__() 338 339 def __str__(self): 340 return self.data.__str__() 341 342 def getDict(self): 343 return self.data 344 345 def read(self): 346 super(VMSDFieldBool, self).read() 347 if self.data[0] == 0: 348 self.data = False 349 else: 350 self.data = True 351 return self.data 352 353class VMSDFieldStruct(VMSDFieldGeneric): 354 QEMU_VM_SUBSECTION = 0x05 355 356 def __init__(self, desc, file): 357 super(VMSDFieldStruct, self).__init__(desc, file) 358 self.data = collections.OrderedDict() 359 360 # When we see compressed array elements, unfold them here 361 new_fields = [] 362 for field in self.desc['struct']['fields']: 363 if not 'array_len' in field: 364 new_fields.append(field) 365 continue 366 array_len = field.pop('array_len') 367 field['index'] = 0 368 new_fields.append(field) 369 for i in range(1, array_len): 370 c = field.copy() 371 c['index'] = i 372 new_fields.append(c) 373 374 self.desc['struct']['fields'] = new_fields 375 376 def __repr__(self): 377 return self.data.__repr__() 378 379 def __str__(self): 380 return self.data.__str__() 381 382 def read(self): 383 for field in self.desc['struct']['fields']: 384 try: 385 reader = vmsd_field_readers[field['type']] 386 except: 387 reader = VMSDFieldGeneric 388 389 field['data'] = reader(field, self.file) 390 field['data'].read() 391 392 if 'index' in field: 393 if field['name'] not in self.data: 394 self.data[field['name']] = [] 395 a = self.data[field['name']] 396 if len(a) != int(field['index']): 397 raise Exception("internal index of data field unmatched (%d/%d)" % (len(a), int(field['index']))) 398 a.append(field['data']) 399 else: 400 self.data[field['name']] = field['data'] 401 402 if 'subsections' in self.desc['struct']: 403 for subsection in self.desc['struct']['subsections']: 404 if self.file.read8() != self.QEMU_VM_SUBSECTION: 405 raise Exception("Subsection %s not found at offset %x" % ( subsection['vmsd_name'], self.file.tell())) 406 name = self.file.readstr() 407 version_id = self.file.read32() 408 self.data[name] = VMSDSection(self.file, version_id, subsection, (name, 0)) 409 self.data[name].read() 410 411 def getDictItem(self, value): 412 # Strings would fall into the array category, treat 413 # them specially 414 if value.__class__ is ''.__class__: 415 return value 416 417 try: 418 return self.getDictOrderedDict(value) 419 except: 420 try: 421 return self.getDictArray(value) 422 except: 423 try: 424 return value.getDict() 425 except: 426 return value 427 428 def getDictArray(self, array): 429 r = [] 430 for value in array: 431 r.append(self.getDictItem(value)) 432 return r 433 434 def getDictOrderedDict(self, dict): 435 r = collections.OrderedDict() 436 for (key, value) in dict.items(): 437 r[key] = self.getDictItem(value) 438 return r 439 440 def getDict(self): 441 return self.getDictOrderedDict(self.data) 442 443vmsd_field_readers = { 444 "bool" : VMSDFieldBool, 445 "int8" : VMSDFieldInt, 446 "int16" : VMSDFieldInt, 447 "int32" : VMSDFieldInt, 448 "int32 equal" : VMSDFieldInt, 449 "int32 le" : VMSDFieldIntLE, 450 "int64" : VMSDFieldInt, 451 "uint8" : VMSDFieldUInt, 452 "uint16" : VMSDFieldUInt, 453 "uint32" : VMSDFieldUInt, 454 "uint32 equal" : VMSDFieldUInt, 455 "uint64" : VMSDFieldUInt, 456 "int64 equal" : VMSDFieldInt, 457 "uint8 equal" : VMSDFieldInt, 458 "uint16 equal" : VMSDFieldInt, 459 "float64" : VMSDFieldGeneric, 460 "timer" : VMSDFieldGeneric, 461 "buffer" : VMSDFieldGeneric, 462 "unused_buffer" : VMSDFieldGeneric, 463 "bitmap" : VMSDFieldGeneric, 464 "struct" : VMSDFieldStruct, 465 "unknown" : VMSDFieldGeneric, 466} 467 468class VMSDSection(VMSDFieldStruct): 469 def __init__(self, file, version_id, device, section_key): 470 self.file = file 471 self.data = "" 472 self.vmsd_name = "" 473 self.section_key = section_key 474 desc = device 475 if 'vmsd_name' in device: 476 self.vmsd_name = device['vmsd_name'] 477 478 # A section really is nothing but a FieldStruct :) 479 super(VMSDSection, self).__init__({ 'struct' : desc }, file) 480 481############################################################################### 482 483class MigrationDump(object): 484 QEMU_VM_FILE_MAGIC = 0x5145564d 485 QEMU_VM_FILE_VERSION = 0x00000003 486 QEMU_VM_EOF = 0x00 487 QEMU_VM_SECTION_START = 0x01 488 QEMU_VM_SECTION_PART = 0x02 489 QEMU_VM_SECTION_END = 0x03 490 QEMU_VM_SECTION_FULL = 0x04 491 QEMU_VM_SUBSECTION = 0x05 492 QEMU_VM_VMDESCRIPTION = 0x06 493 QEMU_VM_CONFIGURATION = 0x07 494 QEMU_VM_SECTION_FOOTER= 0x7e 495 496 def __init__(self, filename): 497 self.section_classes = { ( 'ram', 0 ) : [ RamSection, None ], 498 ( 'spapr/htab', 0) : ( HTABSection, None ) } 499 self.filename = filename 500 self.vmsd_desc = None 501 502 def read(self, desc_only = False, dump_memory = False, write_memory = False): 503 # Read in the whole file 504 file = MigrationFile(self.filename) 505 506 # File magic 507 data = file.read32() 508 if data != self.QEMU_VM_FILE_MAGIC: 509 raise Exception("Invalid file magic %x" % data) 510 511 # Version (has to be v3) 512 data = file.read32() 513 if data != self.QEMU_VM_FILE_VERSION: 514 raise Exception("Invalid version number %d" % data) 515 516 self.load_vmsd_json(file) 517 518 # Read sections 519 self.sections = collections.OrderedDict() 520 521 if desc_only: 522 return 523 524 ramargs = {} 525 ramargs['page_size'] = self.vmsd_desc['page_size'] 526 ramargs['dump_memory'] = dump_memory 527 ramargs['write_memory'] = write_memory 528 self.section_classes[('ram',0)][1] = ramargs 529 530 while True: 531 section_type = file.read8() 532 if section_type == self.QEMU_VM_EOF: 533 break 534 elif section_type == self.QEMU_VM_CONFIGURATION: 535 section = ConfigurationSection(file) 536 section.read() 537 elif section_type == self.QEMU_VM_SECTION_START or section_type == self.QEMU_VM_SECTION_FULL: 538 section_id = file.read32() 539 name = file.readstr() 540 instance_id = file.read32() 541 version_id = file.read32() 542 section_key = (name, instance_id) 543 classdesc = self.section_classes[section_key] 544 section = classdesc[0](file, version_id, classdesc[1], section_key) 545 self.sections[section_id] = section 546 section.read() 547 elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END: 548 section_id = file.read32() 549 self.sections[section_id].read() 550 elif section_type == self.QEMU_VM_SECTION_FOOTER: 551 read_section_id = file.read32() 552 if read_section_id != section_id: 553 raise Exception("Mismatched section footer: %x vs %x" % (read_section_id, section_id)) 554 else: 555 raise Exception("Unknown section type: %d" % section_type) 556 file.close() 557 558 def load_vmsd_json(self, file): 559 vmsd_json = file.read_migration_debug_json() 560 self.vmsd_desc = json.loads(vmsd_json, object_pairs_hook=collections.OrderedDict) 561 for device in self.vmsd_desc['devices']: 562 key = (device['name'], device['instance_id']) 563 value = ( VMSDSection, device ) 564 self.section_classes[key] = value 565 566 def getDict(self): 567 r = collections.OrderedDict() 568 for (key, value) in self.sections.items(): 569 key = "%s (%d)" % ( value.section_key[0], key ) 570 r[key] = value.getDict() 571 return r 572 573############################################################################### 574 575class JSONEncoder(json.JSONEncoder): 576 def default(self, o): 577 if isinstance(o, VMSDFieldGeneric): 578 return str(o) 579 return json.JSONEncoder.default(self, o) 580 581parser = argparse.ArgumentParser() 582parser.add_argument("-f", "--file", help='migration dump to read from', required=True) 583parser.add_argument("-m", "--memory", help='dump RAM contents as well', action='store_true') 584parser.add_argument("-d", "--dump", help='what to dump ("state" or "desc")', default='state') 585parser.add_argument("-x", "--extract", help='extract contents into individual files', action='store_true') 586args = parser.parse_args() 587 588jsonenc = JSONEncoder(indent=4, separators=(',', ': ')) 589 590if args.extract: 591 dump = MigrationDump(args.file) 592 593 dump.read(desc_only = True) 594 print("desc.json") 595 f = open("desc.json", "wb") 596 f.truncate() 597 f.write(jsonenc.encode(dump.vmsd_desc)) 598 f.close() 599 600 dump.read(write_memory = True) 601 dict = dump.getDict() 602 print("state.json") 603 f = open("state.json", "wb") 604 f.truncate() 605 f.write(jsonenc.encode(dict)) 606 f.close() 607elif args.dump == "state": 608 dump = MigrationDump(args.file) 609 dump.read(dump_memory = args.memory) 610 dict = dump.getDict() 611 print(jsonenc.encode(dict)) 612elif args.dump == "desc": 613 dump = MigrationDump(args.file) 614 dump.read(desc_only = True) 615 print(jsonenc.encode(dump.vmsd_desc)) 616else: 617 raise Exception("Please specify either -x, -d state or -d dump") 618