1""" 2This python script adds a new gdb command, "dump-guest-memory". It 3should be loaded with "source dump-guest-memory.py" at the (gdb) 4prompt. 5 6Copyright (C) 2013, Red Hat, Inc. 7 8Authors: 9 Laszlo Ersek <lersek@redhat.com> 10 Janosch Frank <frankja@linux.vnet.ibm.com> 11 12This work is licensed under the terms of the GNU GPL, version 2 or later. See 13the COPYING file in the top-level directory. 14""" 15 16import ctypes 17import struct 18 19UINTPTR_T = gdb.lookup_type("uintptr_t") 20 21TARGET_PAGE_SIZE = 0x1000 22TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 23 24# Special value for e_phnum. This indicates that the real number of 25# program headers is too large to fit into e_phnum. Instead the real 26# value is in the field sh_info of section 0. 27PN_XNUM = 0xFFFF 28 29EV_CURRENT = 1 30 31ELFCLASS32 = 1 32ELFCLASS64 = 2 33 34ELFDATA2LSB = 1 35ELFDATA2MSB = 2 36 37ET_CORE = 4 38 39PT_LOAD = 1 40PT_NOTE = 4 41 42EM_386 = 3 43EM_PPC = 20 44EM_PPC64 = 21 45EM_S390 = 22 46EM_AARCH = 183 47EM_X86_64 = 62 48 49VMCOREINFO_FORMAT_ELF = 1 50 51def le16_to_cpu(val): 52 return struct.unpack("<H", struct.pack("=H", val))[0] 53 54def le32_to_cpu(val): 55 return struct.unpack("<I", struct.pack("=I", val))[0] 56 57def le64_to_cpu(val): 58 return struct.unpack("<Q", struct.pack("=Q", val))[0] 59 60class ELF(object): 61 """Representation of a ELF file.""" 62 63 def __init__(self, arch): 64 self.ehdr = None 65 self.notes = [] 66 self.segments = [] 67 self.notes_size = 0 68 self.endianness = None 69 self.elfclass = ELFCLASS64 70 71 if arch == 'aarch64-le': 72 self.endianness = ELFDATA2LSB 73 self.elfclass = ELFCLASS64 74 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) 75 self.ehdr.e_machine = EM_AARCH 76 77 elif arch == 'aarch64-be': 78 self.endianness = ELFDATA2MSB 79 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) 80 self.ehdr.e_machine = EM_AARCH 81 82 elif arch == 'X86_64': 83 self.endianness = ELFDATA2LSB 84 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) 85 self.ehdr.e_machine = EM_X86_64 86 87 elif arch == '386': 88 self.endianness = ELFDATA2LSB 89 self.elfclass = ELFCLASS32 90 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) 91 self.ehdr.e_machine = EM_386 92 93 elif arch == 's390': 94 self.endianness = ELFDATA2MSB 95 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) 96 self.ehdr.e_machine = EM_S390 97 98 elif arch == 'ppc64-le': 99 self.endianness = ELFDATA2LSB 100 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) 101 self.ehdr.e_machine = EM_PPC64 102 103 elif arch == 'ppc64-be': 104 self.endianness = ELFDATA2MSB 105 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass) 106 self.ehdr.e_machine = EM_PPC64 107 108 else: 109 raise gdb.GdbError("No valid arch type specified.\n" 110 "Currently supported types:\n" 111 "aarch64-be, aarch64-le, X86_64, 386, s390, " 112 "ppc64-be, ppc64-le") 113 114 self.add_segment(PT_NOTE, 0, 0) 115 116 def add_note(self, n_name, n_desc, n_type): 117 """Adds a note to the ELF.""" 118 119 note = get_arch_note(self.endianness, len(n_name), len(n_desc)) 120 note.n_namesz = len(n_name) + 1 121 note.n_descsz = len(n_desc) 122 note.n_name = n_name.encode() 123 note.n_type = n_type 124 125 # Desc needs to be 4 byte aligned (although the 64bit spec 126 # specifies 8 byte). When defining n_desc as uint32 it will be 127 # automatically aligned but we need the memmove to copy the 128 # string into it. 129 ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc)) 130 131 self.notes.append(note) 132 self.segments[0].p_filesz += ctypes.sizeof(note) 133 self.segments[0].p_memsz += ctypes.sizeof(note) 134 135 136 def add_vmcoreinfo_note(self, vmcoreinfo): 137 """Adds a vmcoreinfo note to the ELF dump.""" 138 # compute the header size, and copy that many bytes from the note 139 header = get_arch_note(self.endianness, 0, 0) 140 ctypes.memmove(ctypes.pointer(header), 141 vmcoreinfo, ctypes.sizeof(header)) 142 if header.n_descsz > 1 << 20: 143 print('warning: invalid vmcoreinfo size') 144 return 145 # now get the full note 146 note = get_arch_note(self.endianness, 147 header.n_namesz - 1, header.n_descsz) 148 ctypes.memmove(ctypes.pointer(note), vmcoreinfo, ctypes.sizeof(note)) 149 150 self.notes.append(note) 151 self.segments[0].p_filesz += ctypes.sizeof(note) 152 self.segments[0].p_memsz += ctypes.sizeof(note) 153 154 def add_segment(self, p_type, p_paddr, p_size): 155 """Adds a segment to the elf.""" 156 157 phdr = get_arch_phdr(self.endianness, self.elfclass) 158 phdr.p_type = p_type 159 phdr.p_paddr = p_paddr 160 phdr.p_filesz = p_size 161 phdr.p_memsz = p_size 162 self.segments.append(phdr) 163 self.ehdr.e_phnum += 1 164 165 def to_file(self, elf_file): 166 """Writes all ELF structures to the the passed file. 167 168 Structure: 169 Ehdr 170 Segment 0:PT_NOTE 171 Segment 1:PT_LOAD 172 Segment N:PT_LOAD 173 Note 0..N 174 Dump contents 175 """ 176 elf_file.write(self.ehdr) 177 off = ctypes.sizeof(self.ehdr) + \ 178 len(self.segments) * ctypes.sizeof(self.segments[0]) 179 180 for phdr in self.segments: 181 phdr.p_offset = off 182 elf_file.write(phdr) 183 off += phdr.p_filesz 184 185 for note in self.notes: 186 elf_file.write(note) 187 188 189def get_arch_note(endianness, len_name, len_desc): 190 """Returns a Note class with the specified endianness.""" 191 192 if endianness == ELFDATA2LSB: 193 superclass = ctypes.LittleEndianStructure 194 else: 195 superclass = ctypes.BigEndianStructure 196 197 len_name = len_name + 1 198 199 class Note(superclass): 200 """Represents an ELF note, includes the content.""" 201 202 _fields_ = [("n_namesz", ctypes.c_uint32), 203 ("n_descsz", ctypes.c_uint32), 204 ("n_type", ctypes.c_uint32), 205 ("n_name", ctypes.c_char * len_name), 206 ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))] 207 return Note() 208 209 210class Ident(ctypes.Structure): 211 """Represents the ELF ident array in the ehdr structure.""" 212 213 _fields_ = [('ei_mag0', ctypes.c_ubyte), 214 ('ei_mag1', ctypes.c_ubyte), 215 ('ei_mag2', ctypes.c_ubyte), 216 ('ei_mag3', ctypes.c_ubyte), 217 ('ei_class', ctypes.c_ubyte), 218 ('ei_data', ctypes.c_ubyte), 219 ('ei_version', ctypes.c_ubyte), 220 ('ei_osabi', ctypes.c_ubyte), 221 ('ei_abiversion', ctypes.c_ubyte), 222 ('ei_pad', ctypes.c_ubyte * 7)] 223 224 def __init__(self, endianness, elfclass): 225 self.ei_mag0 = 0x7F 226 self.ei_mag1 = ord('E') 227 self.ei_mag2 = ord('L') 228 self.ei_mag3 = ord('F') 229 self.ei_class = elfclass 230 self.ei_data = endianness 231 self.ei_version = EV_CURRENT 232 233 234def get_arch_ehdr(endianness, elfclass): 235 """Returns a EHDR64 class with the specified endianness.""" 236 237 if endianness == ELFDATA2LSB: 238 superclass = ctypes.LittleEndianStructure 239 else: 240 superclass = ctypes.BigEndianStructure 241 242 class EHDR64(superclass): 243 """Represents the 64 bit ELF header struct.""" 244 245 _fields_ = [('e_ident', Ident), 246 ('e_type', ctypes.c_uint16), 247 ('e_machine', ctypes.c_uint16), 248 ('e_version', ctypes.c_uint32), 249 ('e_entry', ctypes.c_uint64), 250 ('e_phoff', ctypes.c_uint64), 251 ('e_shoff', ctypes.c_uint64), 252 ('e_flags', ctypes.c_uint32), 253 ('e_ehsize', ctypes.c_uint16), 254 ('e_phentsize', ctypes.c_uint16), 255 ('e_phnum', ctypes.c_uint16), 256 ('e_shentsize', ctypes.c_uint16), 257 ('e_shnum', ctypes.c_uint16), 258 ('e_shstrndx', ctypes.c_uint16)] 259 260 def __init__(self): 261 super(superclass, self).__init__() 262 self.e_ident = Ident(endianness, elfclass) 263 self.e_type = ET_CORE 264 self.e_version = EV_CURRENT 265 self.e_ehsize = ctypes.sizeof(self) 266 self.e_phoff = ctypes.sizeof(self) 267 self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass)) 268 self.e_phnum = 0 269 270 271 class EHDR32(superclass): 272 """Represents the 32 bit ELF header struct.""" 273 274 _fields_ = [('e_ident', Ident), 275 ('e_type', ctypes.c_uint16), 276 ('e_machine', ctypes.c_uint16), 277 ('e_version', ctypes.c_uint32), 278 ('e_entry', ctypes.c_uint32), 279 ('e_phoff', ctypes.c_uint32), 280 ('e_shoff', ctypes.c_uint32), 281 ('e_flags', ctypes.c_uint32), 282 ('e_ehsize', ctypes.c_uint16), 283 ('e_phentsize', ctypes.c_uint16), 284 ('e_phnum', ctypes.c_uint16), 285 ('e_shentsize', ctypes.c_uint16), 286 ('e_shnum', ctypes.c_uint16), 287 ('e_shstrndx', ctypes.c_uint16)] 288 289 def __init__(self): 290 super(superclass, self).__init__() 291 self.e_ident = Ident(endianness, elfclass) 292 self.e_type = ET_CORE 293 self.e_version = EV_CURRENT 294 self.e_ehsize = ctypes.sizeof(self) 295 self.e_phoff = ctypes.sizeof(self) 296 self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass)) 297 self.e_phnum = 0 298 299 # End get_arch_ehdr 300 if elfclass == ELFCLASS64: 301 return EHDR64() 302 else: 303 return EHDR32() 304 305 306def get_arch_phdr(endianness, elfclass): 307 """Returns a 32 or 64 bit PHDR class with the specified endianness.""" 308 309 if endianness == ELFDATA2LSB: 310 superclass = ctypes.LittleEndianStructure 311 else: 312 superclass = ctypes.BigEndianStructure 313 314 class PHDR64(superclass): 315 """Represents the 64 bit ELF program header struct.""" 316 317 _fields_ = [('p_type', ctypes.c_uint32), 318 ('p_flags', ctypes.c_uint32), 319 ('p_offset', ctypes.c_uint64), 320 ('p_vaddr', ctypes.c_uint64), 321 ('p_paddr', ctypes.c_uint64), 322 ('p_filesz', ctypes.c_uint64), 323 ('p_memsz', ctypes.c_uint64), 324 ('p_align', ctypes.c_uint64)] 325 326 class PHDR32(superclass): 327 """Represents the 32 bit ELF program header struct.""" 328 329 _fields_ = [('p_type', ctypes.c_uint32), 330 ('p_offset', ctypes.c_uint32), 331 ('p_vaddr', ctypes.c_uint32), 332 ('p_paddr', ctypes.c_uint32), 333 ('p_filesz', ctypes.c_uint32), 334 ('p_memsz', ctypes.c_uint32), 335 ('p_flags', ctypes.c_uint32), 336 ('p_align', ctypes.c_uint32)] 337 338 # End get_arch_phdr 339 if elfclass == ELFCLASS64: 340 return PHDR64() 341 else: 342 return PHDR32() 343 344 345def int128_get64(val): 346 """Returns low 64bit part of Int128 struct.""" 347 348 try: 349 assert val["hi"] == 0 350 return val["lo"] 351 except gdb.error: 352 u64t = gdb.lookup_type('uint64_t').array(2) 353 u64 = val.cast(u64t) 354 if sys.byteorder == 'little': 355 assert u64[1] == 0 356 return u64[0] 357 else: 358 assert u64[0] == 0 359 return u64[1] 360 361 362def qlist_foreach(head, field_str): 363 """Generator for qlists.""" 364 365 var_p = head["lh_first"] 366 while var_p != 0: 367 var = var_p.dereference() 368 var_p = var[field_str]["le_next"] 369 yield var 370 371 372def qemu_map_ram_ptr(block, offset): 373 """Returns qemu vaddr for given guest physical address.""" 374 375 return block["host"] + offset 376 377 378def memory_region_get_ram_ptr(memory_region): 379 if memory_region["alias"] != 0: 380 return (memory_region_get_ram_ptr(memory_region["alias"].dereference()) 381 + memory_region["alias_offset"]) 382 383 return qemu_map_ram_ptr(memory_region["ram_block"], 0) 384 385 386def get_guest_phys_blocks(): 387 """Returns a list of ram blocks. 388 389 Each block entry contains: 390 'target_start': guest block phys start address 391 'target_end': guest block phys end address 392 'host_addr': qemu vaddr of the block's start 393 """ 394 395 guest_phys_blocks = [] 396 397 print("guest RAM blocks:") 398 print("target_start target_end host_addr message " 399 "count") 400 print("---------------- ---------------- ---------------- ------- " 401 "-----") 402 403 current_map_p = gdb.parse_and_eval("address_space_memory.current_map") 404 current_map = current_map_p.dereference() 405 406 # Conversion to int is needed for python 3 407 # compatibility. Otherwise range doesn't cast the value itself and 408 # breaks. 409 for cur in range(int(current_map["nr"])): 410 flat_range = (current_map["ranges"] + cur).dereference() 411 memory_region = flat_range["mr"].dereference() 412 413 # we only care about RAM 414 if not memory_region["ram"]: 415 continue 416 417 section_size = int128_get64(flat_range["addr"]["size"]) 418 target_start = int128_get64(flat_range["addr"]["start"]) 419 target_end = target_start + section_size 420 host_addr = (memory_region_get_ram_ptr(memory_region) 421 + flat_range["offset_in_region"]) 422 predecessor = None 423 424 # find continuity in guest physical address space 425 if len(guest_phys_blocks) > 0: 426 predecessor = guest_phys_blocks[-1] 427 predecessor_size = (predecessor["target_end"] - 428 predecessor["target_start"]) 429 430 # the memory API guarantees monotonically increasing 431 # traversal 432 assert predecessor["target_end"] <= target_start 433 434 # we want continuity in both guest-physical and 435 # host-virtual memory 436 if (predecessor["target_end"] < target_start or 437 predecessor["host_addr"] + predecessor_size != host_addr): 438 predecessor = None 439 440 if predecessor is None: 441 # isolated mapping, add it to the list 442 guest_phys_blocks.append({"target_start": target_start, 443 "target_end": target_end, 444 "host_addr": host_addr}) 445 message = "added" 446 else: 447 # expand predecessor until @target_end; predecessor's 448 # start doesn't change 449 predecessor["target_end"] = target_end 450 message = "joined" 451 452 print("%016x %016x %016x %-7s %5u" % 453 (target_start, target_end, host_addr.cast(UINTPTR_T), 454 message, len(guest_phys_blocks))) 455 456 return guest_phys_blocks 457 458 459# The leading docstring doesn't have idiomatic Python formatting. It is 460# printed by gdb's "help" command (the first line is printed in the 461# "help data" summary), and it should match how other help texts look in 462# gdb. 463class DumpGuestMemory(gdb.Command): 464 """Extract guest vmcore from qemu process coredump. 465 466The two required arguments are FILE and ARCH: 467FILE identifies the target file to write the guest vmcore to. 468ARCH specifies the architecture for which the core will be generated. 469 470This GDB command reimplements the dump-guest-memory QMP command in 471python, using the representation of guest memory as captured in the qemu 472coredump. The qemu process that has been dumped must have had the 473command line option "-machine dump-guest-core=on" which is the default. 474 475For simplicity, the "paging", "begin" and "end" parameters of the QMP 476command are not supported -- no attempt is made to get the guest's 477internal paging structures (ie. paging=false is hard-wired), and guest 478memory is always fully dumped. 479 480Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be, 481ppc64-le guests are supported. 482 483The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are 484not written to the vmcore. Preparing these would require context that is 485only present in the KVM host kernel module when the guest is alive. A 486fake ELF note is written instead, only to keep the ELF parser of "crash" 487happy. 488 489Dependent on how busted the qemu process was at the time of the 490coredump, this command might produce unpredictable results. If qemu 491deliberately called abort(), or it was dumped in response to a signal at 492a halfway fortunate point, then its coredump should be in reasonable 493shape and this command should mostly work.""" 494 495 def __init__(self): 496 super(DumpGuestMemory, self).__init__("dump-guest-memory", 497 gdb.COMMAND_DATA, 498 gdb.COMPLETE_FILENAME) 499 self.elf = None 500 self.guest_phys_blocks = None 501 502 def dump_init(self, vmcore): 503 """Prepares and writes ELF structures to core file.""" 504 505 # Needed to make crash happy, data for more useful notes is 506 # not available in a qemu core. 507 self.elf.add_note("NONE", "EMPTY", 0) 508 509 # We should never reach PN_XNUM for paging=false dumps, 510 # there's just a handful of discontiguous ranges after 511 # merging. 512 # The constant is needed to account for the PT_NOTE segment. 513 phdr_num = len(self.guest_phys_blocks) + 1 514 assert phdr_num < PN_XNUM 515 516 for block in self.guest_phys_blocks: 517 block_size = block["target_end"] - block["target_start"] 518 self.elf.add_segment(PT_LOAD, block["target_start"], block_size) 519 520 self.elf.to_file(vmcore) 521 522 def dump_iterate(self, vmcore): 523 """Writes guest core to file.""" 524 525 qemu_core = gdb.inferiors()[0] 526 for block in self.guest_phys_blocks: 527 cur = block["host_addr"] 528 left = block["target_end"] - block["target_start"] 529 print("dumping range at %016x for length %016x" % 530 (cur.cast(UINTPTR_T), left)) 531 532 while left > 0: 533 chunk_size = min(TARGET_PAGE_SIZE, left) 534 chunk = qemu_core.read_memory(cur, chunk_size) 535 vmcore.write(chunk) 536 cur += chunk_size 537 left -= chunk_size 538 539 def phys_memory_read(self, addr, size): 540 qemu_core = gdb.inferiors()[0] 541 for block in self.guest_phys_blocks: 542 if block["target_start"] <= addr \ 543 and addr + size <= block["target_end"]: 544 haddr = block["host_addr"] + (addr - block["target_start"]) 545 return qemu_core.read_memory(haddr, size) 546 return None 547 548 def add_vmcoreinfo(self): 549 if not gdb.parse_and_eval("vmcoreinfo_find()") \ 550 or not gdb.parse_and_eval("vmcoreinfo_find()->has_vmcoreinfo"): 551 return 552 553 fmt = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.guest_format") 554 addr = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.paddr") 555 size = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.size") 556 557 fmt = le16_to_cpu(fmt) 558 addr = le64_to_cpu(addr) 559 size = le32_to_cpu(size) 560 561 if fmt != VMCOREINFO_FORMAT_ELF: 562 return 563 564 vmcoreinfo = self.phys_memory_read(addr, size) 565 if vmcoreinfo: 566 self.elf.add_vmcoreinfo_note(vmcoreinfo.tobytes()) 567 568 def invoke(self, args, from_tty): 569 """Handles command invocation from gdb.""" 570 571 # Unwittingly pressing the Enter key after the command should 572 # not dump the same multi-gig coredump to the same file. 573 self.dont_repeat() 574 575 argv = gdb.string_to_argv(args) 576 if len(argv) != 2: 577 raise gdb.GdbError("usage: dump-guest-memory FILE ARCH") 578 579 self.elf = ELF(argv[1]) 580 self.guest_phys_blocks = get_guest_phys_blocks() 581 self.add_vmcoreinfo() 582 583 with open(argv[0], "wb") as vmcore: 584 self.dump_init(vmcore) 585 self.dump_iterate(vmcore) 586 587DumpGuestMemory() 588