1 /* 2 * QEMU dump 3 * 4 * Copyright Fujitsu, Corp. 2011, 2012 5 * 6 * Authors: 7 * Wen Congyang <wency@cn.fujitsu.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/cutils.h" 16 #include "elf.h" 17 #include "exec/hwaddr.h" 18 #include "monitor/monitor.h" 19 #include "sysemu/kvm.h" 20 #include "sysemu/dump.h" 21 #include "sysemu/memory_mapping.h" 22 #include "sysemu/runstate.h" 23 #include "sysemu/cpus.h" 24 #include "qapi/error.h" 25 #include "qapi/qapi-commands-dump.h" 26 #include "qapi/qapi-events-dump.h" 27 #include "qapi/qmp/qerror.h" 28 #include "qemu/error-report.h" 29 #include "qemu/main-loop.h" 30 #include "hw/misc/vmcoreinfo.h" 31 #include "migration/blocker.h" 32 33 #ifdef TARGET_X86_64 34 #include "win_dump.h" 35 #endif 36 37 #include <zlib.h> 38 #ifdef CONFIG_LZO 39 #include <lzo/lzo1x.h> 40 #endif 41 #ifdef CONFIG_SNAPPY 42 #include <snappy-c.h> 43 #endif 44 #ifndef ELF_MACHINE_UNAME 45 #define ELF_MACHINE_UNAME "Unknown" 46 #endif 47 48 #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */ 49 50 static Error *dump_migration_blocker; 51 52 #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \ 53 ((DIV_ROUND_UP((hdr_size), 4) + \ 54 DIV_ROUND_UP((name_size), 4) + \ 55 DIV_ROUND_UP((desc_size), 4)) * 4) 56 57 static inline bool dump_is_64bit(DumpState *s) 58 { 59 return s->dump_info.d_class == ELFCLASS64; 60 } 61 62 static inline bool dump_has_filter(DumpState *s) 63 { 64 return s->filter_area_length > 0; 65 } 66 67 uint16_t cpu_to_dump16(DumpState *s, uint16_t val) 68 { 69 if (s->dump_info.d_endian == ELFDATA2LSB) { 70 val = cpu_to_le16(val); 71 } else { 72 val = cpu_to_be16(val); 73 } 74 75 return val; 76 } 77 78 uint32_t cpu_to_dump32(DumpState *s, uint32_t val) 79 { 80 if (s->dump_info.d_endian == ELFDATA2LSB) { 81 val = cpu_to_le32(val); 82 } else { 83 val = cpu_to_be32(val); 84 } 85 86 return val; 87 } 88 89 uint64_t cpu_to_dump64(DumpState *s, uint64_t val) 90 { 91 if (s->dump_info.d_endian == ELFDATA2LSB) { 92 val = cpu_to_le64(val); 93 } else { 94 val = cpu_to_be64(val); 95 } 96 97 return val; 98 } 99 100 static int dump_cleanup(DumpState *s) 101 { 102 guest_phys_blocks_free(&s->guest_phys_blocks); 103 memory_mapping_list_free(&s->list); 104 close(s->fd); 105 g_free(s->guest_note); 106 g_array_unref(s->string_table_buf); 107 s->guest_note = NULL; 108 if (s->resume) { 109 if (s->detached) { 110 qemu_mutex_lock_iothread(); 111 } 112 vm_start(); 113 if (s->detached) { 114 qemu_mutex_unlock_iothread(); 115 } 116 } 117 migrate_del_blocker(dump_migration_blocker); 118 119 return 0; 120 } 121 122 static int fd_write_vmcore(const void *buf, size_t size, void *opaque) 123 { 124 DumpState *s = opaque; 125 size_t written_size; 126 127 written_size = qemu_write_full(s->fd, buf, size); 128 if (written_size != size) { 129 return -errno; 130 } 131 132 return 0; 133 } 134 135 static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) 136 { 137 /* 138 * phnum in the elf header is 16 bit, if we have more segments we 139 * set phnum to PN_XNUM and write the real number of segments to a 140 * special section. 141 */ 142 uint16_t phnum = MIN(s->phdr_num, PN_XNUM); 143 144 memset(elf_header, 0, sizeof(Elf64_Ehdr)); 145 memcpy(elf_header, ELFMAG, SELFMAG); 146 elf_header->e_ident[EI_CLASS] = ELFCLASS64; 147 elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; 148 elf_header->e_ident[EI_VERSION] = EV_CURRENT; 149 elf_header->e_type = cpu_to_dump16(s, ET_CORE); 150 elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); 151 elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); 152 elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); 153 elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); 154 elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); 155 elf_header->e_phnum = cpu_to_dump16(s, phnum); 156 elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); 157 elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); 158 elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); 159 elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); 160 } 161 162 static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) 163 { 164 /* 165 * phnum in the elf header is 16 bit, if we have more segments we 166 * set phnum to PN_XNUM and write the real number of segments to a 167 * special section. 168 */ 169 uint16_t phnum = MIN(s->phdr_num, PN_XNUM); 170 171 memset(elf_header, 0, sizeof(Elf32_Ehdr)); 172 memcpy(elf_header, ELFMAG, SELFMAG); 173 elf_header->e_ident[EI_CLASS] = ELFCLASS32; 174 elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; 175 elf_header->e_ident[EI_VERSION] = EV_CURRENT; 176 elf_header->e_type = cpu_to_dump16(s, ET_CORE); 177 elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); 178 elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); 179 elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); 180 elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); 181 elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); 182 elf_header->e_phnum = cpu_to_dump16(s, phnum); 183 elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); 184 elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); 185 elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); 186 elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); 187 } 188 189 static void write_elf_header(DumpState *s, Error **errp) 190 { 191 Elf32_Ehdr elf32_header; 192 Elf64_Ehdr elf64_header; 193 size_t header_size; 194 void *header_ptr; 195 int ret; 196 197 /* The NULL header and the shstrtab are always defined */ 198 assert(s->shdr_num >= 2); 199 if (dump_is_64bit(s)) { 200 prepare_elf64_header(s, &elf64_header); 201 header_size = sizeof(elf64_header); 202 header_ptr = &elf64_header; 203 } else { 204 prepare_elf32_header(s, &elf32_header); 205 header_size = sizeof(elf32_header); 206 header_ptr = &elf32_header; 207 } 208 209 ret = fd_write_vmcore(header_ptr, header_size, s); 210 if (ret < 0) { 211 error_setg_errno(errp, -ret, "dump: failed to write elf header"); 212 } 213 } 214 215 static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping, 216 int phdr_index, hwaddr offset, 217 hwaddr filesz, Error **errp) 218 { 219 Elf64_Phdr phdr; 220 int ret; 221 222 memset(&phdr, 0, sizeof(Elf64_Phdr)); 223 phdr.p_type = cpu_to_dump32(s, PT_LOAD); 224 phdr.p_offset = cpu_to_dump64(s, offset); 225 phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr); 226 phdr.p_filesz = cpu_to_dump64(s, filesz); 227 phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length); 228 phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr) ?: phdr.p_paddr; 229 230 assert(memory_mapping->length >= filesz); 231 232 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s); 233 if (ret < 0) { 234 error_setg_errno(errp, -ret, 235 "dump: failed to write program header table"); 236 } 237 } 238 239 static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, 240 int phdr_index, hwaddr offset, 241 hwaddr filesz, Error **errp) 242 { 243 Elf32_Phdr phdr; 244 int ret; 245 246 memset(&phdr, 0, sizeof(Elf32_Phdr)); 247 phdr.p_type = cpu_to_dump32(s, PT_LOAD); 248 phdr.p_offset = cpu_to_dump32(s, offset); 249 phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr); 250 phdr.p_filesz = cpu_to_dump32(s, filesz); 251 phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length); 252 phdr.p_vaddr = 253 cpu_to_dump32(s, memory_mapping->virt_addr) ?: phdr.p_paddr; 254 255 assert(memory_mapping->length >= filesz); 256 257 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s); 258 if (ret < 0) { 259 error_setg_errno(errp, -ret, 260 "dump: failed to write program header table"); 261 } 262 } 263 264 static void prepare_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) 265 { 266 memset(phdr, 0, sizeof(*phdr)); 267 phdr->p_type = cpu_to_dump32(s, PT_NOTE); 268 phdr->p_offset = cpu_to_dump64(s, s->note_offset); 269 phdr->p_paddr = 0; 270 phdr->p_filesz = cpu_to_dump64(s, s->note_size); 271 phdr->p_memsz = cpu_to_dump64(s, s->note_size); 272 phdr->p_vaddr = 0; 273 } 274 275 static inline int cpu_index(CPUState *cpu) 276 { 277 return cpu->cpu_index + 1; 278 } 279 280 static void write_guest_note(WriteCoreDumpFunction f, DumpState *s, 281 Error **errp) 282 { 283 int ret; 284 285 if (s->guest_note) { 286 ret = f(s->guest_note, s->guest_note_size, s); 287 if (ret < 0) { 288 error_setg(errp, "dump: failed to write guest note"); 289 } 290 } 291 } 292 293 static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, 294 Error **errp) 295 { 296 CPUState *cpu; 297 int ret; 298 int id; 299 300 CPU_FOREACH(cpu) { 301 id = cpu_index(cpu); 302 ret = cpu_write_elf64_note(f, cpu, id, s); 303 if (ret < 0) { 304 error_setg(errp, "dump: failed to write elf notes"); 305 return; 306 } 307 } 308 309 CPU_FOREACH(cpu) { 310 ret = cpu_write_elf64_qemunote(f, cpu, s); 311 if (ret < 0) { 312 error_setg(errp, "dump: failed to write CPU status"); 313 return; 314 } 315 } 316 317 write_guest_note(f, s, errp); 318 } 319 320 static void prepare_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) 321 { 322 memset(phdr, 0, sizeof(*phdr)); 323 phdr->p_type = cpu_to_dump32(s, PT_NOTE); 324 phdr->p_offset = cpu_to_dump32(s, s->note_offset); 325 phdr->p_paddr = 0; 326 phdr->p_filesz = cpu_to_dump32(s, s->note_size); 327 phdr->p_memsz = cpu_to_dump32(s, s->note_size); 328 phdr->p_vaddr = 0; 329 } 330 331 static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s, 332 Error **errp) 333 { 334 CPUState *cpu; 335 int ret; 336 int id; 337 338 CPU_FOREACH(cpu) { 339 id = cpu_index(cpu); 340 ret = cpu_write_elf32_note(f, cpu, id, s); 341 if (ret < 0) { 342 error_setg(errp, "dump: failed to write elf notes"); 343 return; 344 } 345 } 346 347 CPU_FOREACH(cpu) { 348 ret = cpu_write_elf32_qemunote(f, cpu, s); 349 if (ret < 0) { 350 error_setg(errp, "dump: failed to write CPU status"); 351 return; 352 } 353 } 354 355 write_guest_note(f, s, errp); 356 } 357 358 static void write_elf_phdr_note(DumpState *s, Error **errp) 359 { 360 Elf32_Phdr phdr32; 361 Elf64_Phdr phdr64; 362 void *phdr; 363 size_t size; 364 int ret; 365 366 if (dump_is_64bit(s)) { 367 prepare_elf64_phdr_note(s, &phdr64); 368 size = sizeof(phdr64); 369 phdr = &phdr64; 370 } else { 371 prepare_elf32_phdr_note(s, &phdr32); 372 size = sizeof(phdr32); 373 phdr = &phdr32; 374 } 375 376 ret = fd_write_vmcore(phdr, size, s); 377 if (ret < 0) { 378 error_setg_errno(errp, -ret, 379 "dump: failed to write program header table"); 380 } 381 } 382 383 static void prepare_elf_section_hdr_zero(DumpState *s) 384 { 385 if (dump_is_64bit(s)) { 386 Elf64_Shdr *shdr64 = s->elf_section_hdrs; 387 388 shdr64->sh_info = cpu_to_dump32(s, s->phdr_num); 389 } else { 390 Elf32_Shdr *shdr32 = s->elf_section_hdrs; 391 392 shdr32->sh_info = cpu_to_dump32(s, s->phdr_num); 393 } 394 } 395 396 static void prepare_elf_section_hdr_string(DumpState *s, void *buff) 397 { 398 uint64_t index = s->string_table_buf->len; 399 const char strtab[] = ".shstrtab"; 400 Elf32_Shdr shdr32 = {}; 401 Elf64_Shdr shdr64 = {}; 402 int shdr_size; 403 void *shdr; 404 405 g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab)); 406 if (dump_is_64bit(s)) { 407 shdr_size = sizeof(Elf64_Shdr); 408 shdr64.sh_type = SHT_STRTAB; 409 shdr64.sh_offset = s->section_offset + s->elf_section_data_size; 410 shdr64.sh_name = index; 411 shdr64.sh_size = s->string_table_buf->len; 412 shdr = &shdr64; 413 } else { 414 shdr_size = sizeof(Elf32_Shdr); 415 shdr32.sh_type = SHT_STRTAB; 416 shdr32.sh_offset = s->section_offset + s->elf_section_data_size; 417 shdr32.sh_name = index; 418 shdr32.sh_size = s->string_table_buf->len; 419 shdr = &shdr32; 420 } 421 memcpy(buff, shdr, shdr_size); 422 } 423 424 static bool prepare_elf_section_hdrs(DumpState *s, Error **errp) 425 { 426 size_t len, sizeof_shdr; 427 void *buff_hdr; 428 429 /* 430 * Section ordering: 431 * - HDR zero 432 * - Arch section hdrs 433 * - String table hdr 434 */ 435 sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); 436 len = sizeof_shdr * s->shdr_num; 437 s->elf_section_hdrs = g_malloc0(len); 438 buff_hdr = s->elf_section_hdrs; 439 440 /* 441 * The first section header is ALWAYS a special initial section 442 * header. 443 * 444 * The header should be 0 with one exception being that if 445 * phdr_num is PN_XNUM then the sh_info field contains the real 446 * number of segment entries. 447 * 448 * As we zero allocate the buffer we will only need to modify 449 * sh_info for the PN_XNUM case. 450 */ 451 if (s->phdr_num >= PN_XNUM) { 452 prepare_elf_section_hdr_zero(s); 453 } 454 buff_hdr += sizeof_shdr; 455 456 /* Add architecture defined section headers */ 457 if (s->dump_info.arch_sections_write_hdr_fn 458 && s->shdr_num > 2) { 459 buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr); 460 461 if (s->shdr_num >= SHN_LORESERVE) { 462 error_setg_errno(errp, EINVAL, 463 "dump: too many architecture defined sections"); 464 return false; 465 } 466 } 467 468 /* 469 * String table is the last section since strings are added via 470 * arch_sections_write_hdr(). 471 */ 472 prepare_elf_section_hdr_string(s, buff_hdr); 473 return true; 474 } 475 476 static void write_elf_section_headers(DumpState *s, Error **errp) 477 { 478 size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); 479 int ret; 480 481 if (!prepare_elf_section_hdrs(s, errp)) { 482 return; 483 } 484 485 ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); 486 if (ret < 0) { 487 error_setg_errno(errp, -ret, "dump: failed to write section headers"); 488 } 489 490 g_free(s->elf_section_hdrs); 491 } 492 493 static void write_elf_sections(DumpState *s, Error **errp) 494 { 495 int ret; 496 497 if (s->elf_section_data_size) { 498 /* Write architecture section data */ 499 ret = fd_write_vmcore(s->elf_section_data, 500 s->elf_section_data_size, s); 501 if (ret < 0) { 502 error_setg_errno(errp, -ret, 503 "dump: failed to write architecture section data"); 504 return; 505 } 506 } 507 508 /* Write string table */ 509 ret = fd_write_vmcore(s->string_table_buf->data, 510 s->string_table_buf->len, s); 511 if (ret < 0) { 512 error_setg_errno(errp, -ret, "dump: failed to write string table data"); 513 } 514 } 515 516 static void write_data(DumpState *s, void *buf, int length, Error **errp) 517 { 518 int ret; 519 520 ret = fd_write_vmcore(buf, length, s); 521 if (ret < 0) { 522 error_setg_errno(errp, -ret, "dump: failed to save memory"); 523 } else { 524 s->written_size += length; 525 } 526 } 527 528 /* write the memory to vmcore. 1 page per I/O. */ 529 static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start, 530 int64_t size, Error **errp) 531 { 532 ERRP_GUARD(); 533 int64_t i; 534 535 for (i = 0; i < size / s->dump_info.page_size; i++) { 536 write_data(s, block->host_addr + start + i * s->dump_info.page_size, 537 s->dump_info.page_size, errp); 538 if (*errp) { 539 return; 540 } 541 } 542 543 if ((size % s->dump_info.page_size) != 0) { 544 write_data(s, block->host_addr + start + i * s->dump_info.page_size, 545 size % s->dump_info.page_size, errp); 546 if (*errp) { 547 return; 548 } 549 } 550 } 551 552 /* get the memory's offset and size in the vmcore */ 553 static void get_offset_range(hwaddr phys_addr, 554 ram_addr_t mapping_length, 555 DumpState *s, 556 hwaddr *p_offset, 557 hwaddr *p_filesz) 558 { 559 GuestPhysBlock *block; 560 hwaddr offset = s->memory_offset; 561 int64_t size_in_block, start; 562 563 /* When the memory is not stored into vmcore, offset will be -1 */ 564 *p_offset = -1; 565 *p_filesz = 0; 566 567 if (dump_has_filter(s)) { 568 if (phys_addr < s->filter_area_begin || 569 phys_addr >= s->filter_area_begin + s->filter_area_length) { 570 return; 571 } 572 } 573 574 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { 575 if (dump_has_filter(s)) { 576 if (block->target_start >= s->filter_area_begin + s->filter_area_length || 577 block->target_end <= s->filter_area_begin) { 578 /* This block is out of the range */ 579 continue; 580 } 581 582 if (s->filter_area_begin <= block->target_start) { 583 start = block->target_start; 584 } else { 585 start = s->filter_area_begin; 586 } 587 588 size_in_block = block->target_end - start; 589 if (s->filter_area_begin + s->filter_area_length < block->target_end) { 590 size_in_block -= block->target_end - (s->filter_area_begin + s->filter_area_length); 591 } 592 } else { 593 start = block->target_start; 594 size_in_block = block->target_end - block->target_start; 595 } 596 597 if (phys_addr >= start && phys_addr < start + size_in_block) { 598 *p_offset = phys_addr - start + offset; 599 600 /* The offset range mapped from the vmcore file must not spill over 601 * the GuestPhysBlock, clamp it. The rest of the mapping will be 602 * zero-filled in memory at load time; see 603 * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>. 604 */ 605 *p_filesz = phys_addr + mapping_length <= start + size_in_block ? 606 mapping_length : 607 size_in_block - (phys_addr - start); 608 return; 609 } 610 611 offset += size_in_block; 612 } 613 } 614 615 static void write_elf_phdr_loads(DumpState *s, Error **errp) 616 { 617 ERRP_GUARD(); 618 hwaddr offset, filesz; 619 MemoryMapping *memory_mapping; 620 uint32_t phdr_index = 1; 621 622 QTAILQ_FOREACH(memory_mapping, &s->list.head, next) { 623 get_offset_range(memory_mapping->phys_addr, 624 memory_mapping->length, 625 s, &offset, &filesz); 626 if (dump_is_64bit(s)) { 627 write_elf64_load(s, memory_mapping, phdr_index++, offset, 628 filesz, errp); 629 } else { 630 write_elf32_load(s, memory_mapping, phdr_index++, offset, 631 filesz, errp); 632 } 633 634 if (*errp) { 635 return; 636 } 637 638 if (phdr_index >= s->phdr_num) { 639 break; 640 } 641 } 642 } 643 644 static void write_elf_notes(DumpState *s, Error **errp) 645 { 646 if (dump_is_64bit(s)) { 647 write_elf64_notes(fd_write_vmcore, s, errp); 648 } else { 649 write_elf32_notes(fd_write_vmcore, s, errp); 650 } 651 } 652 653 /* write elf header, PT_NOTE and elf note to vmcore. */ 654 static void dump_begin(DumpState *s, Error **errp) 655 { 656 ERRP_GUARD(); 657 658 /* 659 * the vmcore's format is: 660 * -------------- 661 * | elf header | 662 * -------------- 663 * | sctn_hdr | 664 * -------------- 665 * | PT_NOTE | 666 * -------------- 667 * | PT_LOAD | 668 * -------------- 669 * | ...... | 670 * -------------- 671 * | PT_LOAD | 672 * -------------- 673 * | elf note | 674 * -------------- 675 * | memory | 676 * -------------- 677 * 678 * we only know where the memory is saved after we write elf note into 679 * vmcore. 680 */ 681 682 /* write elf header to vmcore */ 683 write_elf_header(s, errp); 684 if (*errp) { 685 return; 686 } 687 688 /* write section headers to vmcore */ 689 write_elf_section_headers(s, errp); 690 if (*errp) { 691 return; 692 } 693 694 /* write PT_NOTE to vmcore */ 695 write_elf_phdr_note(s, errp); 696 if (*errp) { 697 return; 698 } 699 700 /* write all PT_LOADs to vmcore */ 701 write_elf_phdr_loads(s, errp); 702 if (*errp) { 703 return; 704 } 705 706 /* write notes to vmcore */ 707 write_elf_notes(s, errp); 708 } 709 710 int64_t dump_filtered_memblock_size(GuestPhysBlock *block, 711 int64_t filter_area_start, 712 int64_t filter_area_length) 713 { 714 int64_t size, left, right; 715 716 /* No filter, return full size */ 717 if (!filter_area_length) { 718 return block->target_end - block->target_start; 719 } 720 721 /* calculate the overlapped region. */ 722 left = MAX(filter_area_start, block->target_start); 723 right = MIN(filter_area_start + filter_area_length, block->target_end); 724 size = right - left; 725 size = size > 0 ? size : 0; 726 727 return size; 728 } 729 730 int64_t dump_filtered_memblock_start(GuestPhysBlock *block, 731 int64_t filter_area_start, 732 int64_t filter_area_length) 733 { 734 if (filter_area_length) { 735 /* return -1 if the block is not within filter area */ 736 if (block->target_start >= filter_area_start + filter_area_length || 737 block->target_end <= filter_area_start) { 738 return -1; 739 } 740 741 if (filter_area_start > block->target_start) { 742 return filter_area_start - block->target_start; 743 } 744 } 745 746 return 0; 747 } 748 749 /* write all memory to vmcore */ 750 static void dump_iterate(DumpState *s, Error **errp) 751 { 752 ERRP_GUARD(); 753 GuestPhysBlock *block; 754 int64_t memblock_size, memblock_start; 755 756 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { 757 memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length); 758 if (memblock_start == -1) { 759 continue; 760 } 761 762 memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length); 763 764 /* Write the memory to file */ 765 write_memory(s, block, memblock_start, memblock_size, errp); 766 if (*errp) { 767 return; 768 } 769 } 770 } 771 772 static void dump_end(DumpState *s, Error **errp) 773 { 774 int rc; 775 776 if (s->elf_section_data_size) { 777 s->elf_section_data = g_malloc0(s->elf_section_data_size); 778 } 779 780 /* Adds the architecture defined section data to s->elf_section_data */ 781 if (s->dump_info.arch_sections_write_fn && 782 s->elf_section_data_size) { 783 rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data); 784 if (rc) { 785 error_setg_errno(errp, rc, 786 "dump: failed to get arch section data"); 787 g_free(s->elf_section_data); 788 return; 789 } 790 } 791 792 /* write sections to vmcore */ 793 write_elf_sections(s, errp); 794 } 795 796 static void create_vmcore(DumpState *s, Error **errp) 797 { 798 ERRP_GUARD(); 799 800 dump_begin(s, errp); 801 if (*errp) { 802 return; 803 } 804 805 /* Iterate over memory and dump it to file */ 806 dump_iterate(s, errp); 807 if (*errp) { 808 return; 809 } 810 811 /* Write the section data */ 812 dump_end(s, errp); 813 } 814 815 static int write_start_flat_header(int fd) 816 { 817 MakedumpfileHeader *mh; 818 int ret = 0; 819 820 QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER); 821 mh = g_malloc0(MAX_SIZE_MDF_HEADER); 822 823 memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE, 824 MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE)); 825 826 mh->type = cpu_to_be64(TYPE_FLAT_HEADER); 827 mh->version = cpu_to_be64(VERSION_FLAT_HEADER); 828 829 size_t written_size; 830 written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER); 831 if (written_size != MAX_SIZE_MDF_HEADER) { 832 ret = -1; 833 } 834 835 g_free(mh); 836 return ret; 837 } 838 839 static int write_end_flat_header(int fd) 840 { 841 MakedumpfileDataHeader mdh; 842 843 mdh.offset = END_FLAG_FLAT_HEADER; 844 mdh.buf_size = END_FLAG_FLAT_HEADER; 845 846 size_t written_size; 847 written_size = qemu_write_full(fd, &mdh, sizeof(mdh)); 848 if (written_size != sizeof(mdh)) { 849 return -1; 850 } 851 852 return 0; 853 } 854 855 static int write_buffer(int fd, off_t offset, const void *buf, size_t size) 856 { 857 size_t written_size; 858 MakedumpfileDataHeader mdh; 859 860 mdh.offset = cpu_to_be64(offset); 861 mdh.buf_size = cpu_to_be64(size); 862 863 written_size = qemu_write_full(fd, &mdh, sizeof(mdh)); 864 if (written_size != sizeof(mdh)) { 865 return -1; 866 } 867 868 written_size = qemu_write_full(fd, buf, size); 869 if (written_size != size) { 870 return -1; 871 } 872 873 return 0; 874 } 875 876 static int buf_write_note(const void *buf, size_t size, void *opaque) 877 { 878 DumpState *s = opaque; 879 880 /* note_buf is not enough */ 881 if (s->note_buf_offset + size > s->note_size) { 882 return -1; 883 } 884 885 memcpy(s->note_buf + s->note_buf_offset, buf, size); 886 887 s->note_buf_offset += size; 888 889 return 0; 890 } 891 892 /* 893 * This function retrieves various sizes from an elf header. 894 * 895 * @note has to be a valid ELF note. The return sizes are unmodified 896 * (not padded or rounded up to be multiple of 4). 897 */ 898 static void get_note_sizes(DumpState *s, const void *note, 899 uint64_t *note_head_size, 900 uint64_t *name_size, 901 uint64_t *desc_size) 902 { 903 uint64_t note_head_sz; 904 uint64_t name_sz; 905 uint64_t desc_sz; 906 907 if (dump_is_64bit(s)) { 908 const Elf64_Nhdr *hdr = note; 909 note_head_sz = sizeof(Elf64_Nhdr); 910 name_sz = tswap64(hdr->n_namesz); 911 desc_sz = tswap64(hdr->n_descsz); 912 } else { 913 const Elf32_Nhdr *hdr = note; 914 note_head_sz = sizeof(Elf32_Nhdr); 915 name_sz = tswap32(hdr->n_namesz); 916 desc_sz = tswap32(hdr->n_descsz); 917 } 918 919 if (note_head_size) { 920 *note_head_size = note_head_sz; 921 } 922 if (name_size) { 923 *name_size = name_sz; 924 } 925 if (desc_size) { 926 *desc_size = desc_sz; 927 } 928 } 929 930 static bool note_name_equal(DumpState *s, 931 const uint8_t *note, const char *name) 932 { 933 int len = strlen(name) + 1; 934 uint64_t head_size, name_size; 935 936 get_note_sizes(s, note, &head_size, &name_size, NULL); 937 head_size = ROUND_UP(head_size, 4); 938 939 return name_size == len && memcmp(note + head_size, name, len) == 0; 940 } 941 942 /* write common header, sub header and elf note to vmcore */ 943 static void create_header32(DumpState *s, Error **errp) 944 { 945 ERRP_GUARD(); 946 DiskDumpHeader32 *dh = NULL; 947 KdumpSubHeader32 *kh = NULL; 948 size_t size; 949 uint32_t block_size; 950 uint32_t sub_hdr_size; 951 uint32_t bitmap_blocks; 952 uint32_t status = 0; 953 uint64_t offset_note; 954 955 /* write common header, the version of kdump-compressed format is 6th */ 956 size = sizeof(DiskDumpHeader32); 957 dh = g_malloc0(size); 958 959 memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN); 960 dh->header_version = cpu_to_dump32(s, 6); 961 block_size = s->dump_info.page_size; 962 dh->block_size = cpu_to_dump32(s, block_size); 963 sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size; 964 sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size); 965 dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size); 966 /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */ 967 dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX)); 968 dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus); 969 bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2; 970 dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks); 971 strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine)); 972 973 if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) { 974 status |= DUMP_DH_COMPRESSED_ZLIB; 975 } 976 #ifdef CONFIG_LZO 977 if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) { 978 status |= DUMP_DH_COMPRESSED_LZO; 979 } 980 #endif 981 #ifdef CONFIG_SNAPPY 982 if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) { 983 status |= DUMP_DH_COMPRESSED_SNAPPY; 984 } 985 #endif 986 dh->status = cpu_to_dump32(s, status); 987 988 if (write_buffer(s->fd, 0, dh, size) < 0) { 989 error_setg(errp, "dump: failed to write disk dump header"); 990 goto out; 991 } 992 993 /* write sub header */ 994 size = sizeof(KdumpSubHeader32); 995 kh = g_malloc0(size); 996 997 /* 64bit max_mapnr_64 */ 998 kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr); 999 kh->phys_base = cpu_to_dump32(s, s->dump_info.phys_base); 1000 kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL); 1001 1002 offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size; 1003 if (s->guest_note && 1004 note_name_equal(s, s->guest_note, "VMCOREINFO")) { 1005 uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo; 1006 1007 get_note_sizes(s, s->guest_note, 1008 &hsize, &name_size, &size_vmcoreinfo_desc); 1009 offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size + 1010 (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4; 1011 kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo); 1012 kh->size_vmcoreinfo = cpu_to_dump32(s, size_vmcoreinfo_desc); 1013 } 1014 1015 kh->offset_note = cpu_to_dump64(s, offset_note); 1016 kh->note_size = cpu_to_dump32(s, s->note_size); 1017 1018 if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS * 1019 block_size, kh, size) < 0) { 1020 error_setg(errp, "dump: failed to write kdump sub header"); 1021 goto out; 1022 } 1023 1024 /* write note */ 1025 s->note_buf = g_malloc0(s->note_size); 1026 s->note_buf_offset = 0; 1027 1028 /* use s->note_buf to store notes temporarily */ 1029 write_elf32_notes(buf_write_note, s, errp); 1030 if (*errp) { 1031 goto out; 1032 } 1033 if (write_buffer(s->fd, offset_note, s->note_buf, 1034 s->note_size) < 0) { 1035 error_setg(errp, "dump: failed to write notes"); 1036 goto out; 1037 } 1038 1039 /* get offset of dump_bitmap */ 1040 s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) * 1041 block_size; 1042 1043 /* get offset of page */ 1044 s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) * 1045 block_size; 1046 1047 out: 1048 g_free(dh); 1049 g_free(kh); 1050 g_free(s->note_buf); 1051 } 1052 1053 /* write common header, sub header and elf note to vmcore */ 1054 static void create_header64(DumpState *s, Error **errp) 1055 { 1056 ERRP_GUARD(); 1057 DiskDumpHeader64 *dh = NULL; 1058 KdumpSubHeader64 *kh = NULL; 1059 size_t size; 1060 uint32_t block_size; 1061 uint32_t sub_hdr_size; 1062 uint32_t bitmap_blocks; 1063 uint32_t status = 0; 1064 uint64_t offset_note; 1065 1066 /* write common header, the version of kdump-compressed format is 6th */ 1067 size = sizeof(DiskDumpHeader64); 1068 dh = g_malloc0(size); 1069 1070 memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN); 1071 dh->header_version = cpu_to_dump32(s, 6); 1072 block_size = s->dump_info.page_size; 1073 dh->block_size = cpu_to_dump32(s, block_size); 1074 sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size; 1075 sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size); 1076 dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size); 1077 /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */ 1078 dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX)); 1079 dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus); 1080 bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2; 1081 dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks); 1082 strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine)); 1083 1084 if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) { 1085 status |= DUMP_DH_COMPRESSED_ZLIB; 1086 } 1087 #ifdef CONFIG_LZO 1088 if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) { 1089 status |= DUMP_DH_COMPRESSED_LZO; 1090 } 1091 #endif 1092 #ifdef CONFIG_SNAPPY 1093 if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) { 1094 status |= DUMP_DH_COMPRESSED_SNAPPY; 1095 } 1096 #endif 1097 dh->status = cpu_to_dump32(s, status); 1098 1099 if (write_buffer(s->fd, 0, dh, size) < 0) { 1100 error_setg(errp, "dump: failed to write disk dump header"); 1101 goto out; 1102 } 1103 1104 /* write sub header */ 1105 size = sizeof(KdumpSubHeader64); 1106 kh = g_malloc0(size); 1107 1108 /* 64bit max_mapnr_64 */ 1109 kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr); 1110 kh->phys_base = cpu_to_dump64(s, s->dump_info.phys_base); 1111 kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL); 1112 1113 offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size; 1114 if (s->guest_note && 1115 note_name_equal(s, s->guest_note, "VMCOREINFO")) { 1116 uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo; 1117 1118 get_note_sizes(s, s->guest_note, 1119 &hsize, &name_size, &size_vmcoreinfo_desc); 1120 offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size + 1121 (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4; 1122 kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo); 1123 kh->size_vmcoreinfo = cpu_to_dump64(s, size_vmcoreinfo_desc); 1124 } 1125 1126 kh->offset_note = cpu_to_dump64(s, offset_note); 1127 kh->note_size = cpu_to_dump64(s, s->note_size); 1128 1129 if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS * 1130 block_size, kh, size) < 0) { 1131 error_setg(errp, "dump: failed to write kdump sub header"); 1132 goto out; 1133 } 1134 1135 /* write note */ 1136 s->note_buf = g_malloc0(s->note_size); 1137 s->note_buf_offset = 0; 1138 1139 /* use s->note_buf to store notes temporarily */ 1140 write_elf64_notes(buf_write_note, s, errp); 1141 if (*errp) { 1142 goto out; 1143 } 1144 1145 if (write_buffer(s->fd, offset_note, s->note_buf, 1146 s->note_size) < 0) { 1147 error_setg(errp, "dump: failed to write notes"); 1148 goto out; 1149 } 1150 1151 /* get offset of dump_bitmap */ 1152 s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) * 1153 block_size; 1154 1155 /* get offset of page */ 1156 s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) * 1157 block_size; 1158 1159 out: 1160 g_free(dh); 1161 g_free(kh); 1162 g_free(s->note_buf); 1163 } 1164 1165 static void write_dump_header(DumpState *s, Error **errp) 1166 { 1167 if (dump_is_64bit(s)) { 1168 create_header64(s, errp); 1169 } else { 1170 create_header32(s, errp); 1171 } 1172 } 1173 1174 static size_t dump_bitmap_get_bufsize(DumpState *s) 1175 { 1176 return s->dump_info.page_size; 1177 } 1178 1179 /* 1180 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be 1181 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0. 1182 * set_dump_bitmap will always leave the recently set bit un-sync. And setting 1183 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into 1184 * vmcore, ie. synchronizing un-sync bit into vmcore. 1185 */ 1186 static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value, 1187 uint8_t *buf, DumpState *s) 1188 { 1189 off_t old_offset, new_offset; 1190 off_t offset_bitmap1, offset_bitmap2; 1191 uint32_t byte, bit; 1192 size_t bitmap_bufsize = dump_bitmap_get_bufsize(s); 1193 size_t bits_per_buf = bitmap_bufsize * CHAR_BIT; 1194 1195 /* should not set the previous place */ 1196 assert(last_pfn <= pfn); 1197 1198 /* 1199 * if the bit needed to be set is not cached in buf, flush the data in buf 1200 * to vmcore firstly. 1201 * making new_offset be bigger than old_offset can also sync remained data 1202 * into vmcore. 1203 */ 1204 old_offset = bitmap_bufsize * (last_pfn / bits_per_buf); 1205 new_offset = bitmap_bufsize * (pfn / bits_per_buf); 1206 1207 while (old_offset < new_offset) { 1208 /* calculate the offset and write dump_bitmap */ 1209 offset_bitmap1 = s->offset_dump_bitmap + old_offset; 1210 if (write_buffer(s->fd, offset_bitmap1, buf, 1211 bitmap_bufsize) < 0) { 1212 return -1; 1213 } 1214 1215 /* dump level 1 is chosen, so 1st and 2nd bitmap are same */ 1216 offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap + 1217 old_offset; 1218 if (write_buffer(s->fd, offset_bitmap2, buf, 1219 bitmap_bufsize) < 0) { 1220 return -1; 1221 } 1222 1223 memset(buf, 0, bitmap_bufsize); 1224 old_offset += bitmap_bufsize; 1225 } 1226 1227 /* get the exact place of the bit in the buf, and set it */ 1228 byte = (pfn % bits_per_buf) / CHAR_BIT; 1229 bit = (pfn % bits_per_buf) % CHAR_BIT; 1230 if (value) { 1231 buf[byte] |= 1u << bit; 1232 } else { 1233 buf[byte] &= ~(1u << bit); 1234 } 1235 1236 return 0; 1237 } 1238 1239 static uint64_t dump_paddr_to_pfn(DumpState *s, uint64_t addr) 1240 { 1241 int target_page_shift = ctz32(s->dump_info.page_size); 1242 1243 return (addr >> target_page_shift) - ARCH_PFN_OFFSET; 1244 } 1245 1246 static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn) 1247 { 1248 int target_page_shift = ctz32(s->dump_info.page_size); 1249 1250 return (pfn + ARCH_PFN_OFFSET) << target_page_shift; 1251 } 1252 1253 /* 1254 * Return the page frame number and the page content in *bufptr. bufptr can be 1255 * NULL. If not NULL, *bufptr must contains a target page size of pre-allocated 1256 * memory. This is not necessarily the memory returned. 1257 */ 1258 static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, 1259 uint8_t **bufptr, DumpState *s) 1260 { 1261 GuestPhysBlock *block = *blockptr; 1262 uint32_t page_size = s->dump_info.page_size; 1263 uint8_t *buf = NULL, *hbuf; 1264 hwaddr addr; 1265 1266 /* block == NULL means the start of the iteration */ 1267 if (!block) { 1268 block = QTAILQ_FIRST(&s->guest_phys_blocks.head); 1269 *blockptr = block; 1270 addr = block->target_start; 1271 *pfnptr = dump_paddr_to_pfn(s, addr); 1272 } else { 1273 *pfnptr += 1; 1274 addr = dump_pfn_to_paddr(s, *pfnptr); 1275 } 1276 assert(block != NULL); 1277 1278 while (1) { 1279 if (addr >= block->target_start && addr < block->target_end) { 1280 size_t n = MIN(block->target_end - addr, page_size - addr % page_size); 1281 hbuf = block->host_addr + (addr - block->target_start); 1282 if (!buf) { 1283 if (n == page_size) { 1284 /* this is a whole target page, go for it */ 1285 assert(addr % page_size == 0); 1286 buf = hbuf; 1287 break; 1288 } else if (bufptr) { 1289 assert(*bufptr); 1290 buf = *bufptr; 1291 memset(buf, 0, page_size); 1292 } else { 1293 return true; 1294 } 1295 } 1296 1297 memcpy(buf + addr % page_size, hbuf, n); 1298 addr += n; 1299 if (addr % page_size == 0) { 1300 /* we filled up the page */ 1301 break; 1302 } 1303 } else { 1304 /* the next page is in the next block */ 1305 *blockptr = block = QTAILQ_NEXT(block, next); 1306 if (!block) { 1307 break; 1308 } 1309 1310 addr = block->target_start; 1311 /* are we still in the same page? */ 1312 if (dump_paddr_to_pfn(s, addr) != *pfnptr) { 1313 if (buf) { 1314 /* no, but we already filled something earlier, return it */ 1315 break; 1316 } else { 1317 /* else continue from there */ 1318 *pfnptr = dump_paddr_to_pfn(s, addr); 1319 } 1320 } 1321 } 1322 } 1323 1324 if (bufptr) { 1325 *bufptr = buf; 1326 } 1327 1328 return buf != NULL; 1329 } 1330 1331 static void write_dump_bitmap(DumpState *s, Error **errp) 1332 { 1333 int ret = 0; 1334 uint64_t last_pfn, pfn; 1335 void *dump_bitmap_buf; 1336 size_t num_dumpable; 1337 GuestPhysBlock *block_iter = NULL; 1338 size_t bitmap_bufsize = dump_bitmap_get_bufsize(s); 1339 size_t bits_per_buf = bitmap_bufsize * CHAR_BIT; 1340 1341 /* dump_bitmap_buf is used to store dump_bitmap temporarily */ 1342 dump_bitmap_buf = g_malloc0(bitmap_bufsize); 1343 1344 num_dumpable = 0; 1345 last_pfn = 0; 1346 1347 /* 1348 * exam memory page by page, and set the bit in dump_bitmap corresponded 1349 * to the existing page. 1350 */ 1351 while (get_next_page(&block_iter, &pfn, NULL, s)) { 1352 ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s); 1353 if (ret < 0) { 1354 error_setg(errp, "dump: failed to set dump_bitmap"); 1355 goto out; 1356 } 1357 1358 last_pfn = pfn; 1359 num_dumpable++; 1360 } 1361 1362 /* 1363 * set_dump_bitmap will always leave the recently set bit un-sync. Here we 1364 * set the remaining bits from last_pfn to the end of the bitmap buffer to 1365 * 0. With those set, the un-sync bit will be synchronized into the vmcore. 1366 */ 1367 if (num_dumpable > 0) { 1368 ret = set_dump_bitmap(last_pfn, last_pfn + bits_per_buf, false, 1369 dump_bitmap_buf, s); 1370 if (ret < 0) { 1371 error_setg(errp, "dump: failed to sync dump_bitmap"); 1372 goto out; 1373 } 1374 } 1375 1376 /* number of dumpable pages that will be dumped later */ 1377 s->num_dumpable = num_dumpable; 1378 1379 out: 1380 g_free(dump_bitmap_buf); 1381 } 1382 1383 static void prepare_data_cache(DataCache *data_cache, DumpState *s, 1384 off_t offset) 1385 { 1386 data_cache->fd = s->fd; 1387 data_cache->data_size = 0; 1388 data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s); 1389 data_cache->buf = g_malloc0(data_cache->buf_size); 1390 data_cache->offset = offset; 1391 } 1392 1393 static int write_cache(DataCache *dc, const void *buf, size_t size, 1394 bool flag_sync) 1395 { 1396 /* 1397 * dc->buf_size should not be less than size, otherwise dc will never be 1398 * enough 1399 */ 1400 assert(size <= dc->buf_size); 1401 1402 /* 1403 * if flag_sync is set, synchronize data in dc->buf into vmcore. 1404 * otherwise check if the space is enough for caching data in buf, if not, 1405 * write the data in dc->buf to dc->fd and reset dc->buf 1406 */ 1407 if ((!flag_sync && dc->data_size + size > dc->buf_size) || 1408 (flag_sync && dc->data_size > 0)) { 1409 if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) { 1410 return -1; 1411 } 1412 1413 dc->offset += dc->data_size; 1414 dc->data_size = 0; 1415 } 1416 1417 if (!flag_sync) { 1418 memcpy(dc->buf + dc->data_size, buf, size); 1419 dc->data_size += size; 1420 } 1421 1422 return 0; 1423 } 1424 1425 static void free_data_cache(DataCache *data_cache) 1426 { 1427 g_free(data_cache->buf); 1428 } 1429 1430 static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress) 1431 { 1432 switch (flag_compress) { 1433 case DUMP_DH_COMPRESSED_ZLIB: 1434 return compressBound(page_size); 1435 1436 case DUMP_DH_COMPRESSED_LZO: 1437 /* 1438 * LZO will expand incompressible data by a little amount. Please check 1439 * the following URL to see the expansion calculation: 1440 * http://www.oberhumer.com/opensource/lzo/lzofaq.php 1441 */ 1442 return page_size + page_size / 16 + 64 + 3; 1443 1444 #ifdef CONFIG_SNAPPY 1445 case DUMP_DH_COMPRESSED_SNAPPY: 1446 return snappy_max_compressed_length(page_size); 1447 #endif 1448 } 1449 return 0; 1450 } 1451 1452 static void write_dump_pages(DumpState *s, Error **errp) 1453 { 1454 int ret = 0; 1455 DataCache page_desc, page_data; 1456 size_t len_buf_out, size_out; 1457 #ifdef CONFIG_LZO 1458 lzo_bytep wrkmem = NULL; 1459 #endif 1460 uint8_t *buf_out = NULL; 1461 off_t offset_desc, offset_data; 1462 PageDescriptor pd, pd_zero; 1463 uint8_t *buf; 1464 GuestPhysBlock *block_iter = NULL; 1465 uint64_t pfn_iter; 1466 g_autofree uint8_t *page = NULL; 1467 1468 /* get offset of page_desc and page_data in dump file */ 1469 offset_desc = s->offset_page; 1470 offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable; 1471 1472 prepare_data_cache(&page_desc, s, offset_desc); 1473 prepare_data_cache(&page_data, s, offset_data); 1474 1475 /* prepare buffer to store compressed data */ 1476 len_buf_out = get_len_buf_out(s->dump_info.page_size, s->flag_compress); 1477 assert(len_buf_out != 0); 1478 1479 #ifdef CONFIG_LZO 1480 wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS); 1481 #endif 1482 1483 buf_out = g_malloc(len_buf_out); 1484 1485 /* 1486 * init zero page's page_desc and page_data, because every zero page 1487 * uses the same page_data 1488 */ 1489 pd_zero.size = cpu_to_dump32(s, s->dump_info.page_size); 1490 pd_zero.flags = cpu_to_dump32(s, 0); 1491 pd_zero.offset = cpu_to_dump64(s, offset_data); 1492 pd_zero.page_flags = cpu_to_dump64(s, 0); 1493 buf = g_malloc0(s->dump_info.page_size); 1494 ret = write_cache(&page_data, buf, s->dump_info.page_size, false); 1495 g_free(buf); 1496 if (ret < 0) { 1497 error_setg(errp, "dump: failed to write page data (zero page)"); 1498 goto out; 1499 } 1500 1501 offset_data += s->dump_info.page_size; 1502 page = g_malloc(s->dump_info.page_size); 1503 1504 /* 1505 * dump memory to vmcore page by page. zero page will all be resided in the 1506 * first page of page section 1507 */ 1508 for (buf = page; get_next_page(&block_iter, &pfn_iter, &buf, s); buf = page) { 1509 /* check zero page */ 1510 if (buffer_is_zero(buf, s->dump_info.page_size)) { 1511 ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor), 1512 false); 1513 if (ret < 0) { 1514 error_setg(errp, "dump: failed to write page desc"); 1515 goto out; 1516 } 1517 } else { 1518 /* 1519 * not zero page, then: 1520 * 1. compress the page 1521 * 2. write the compressed page into the cache of page_data 1522 * 3. get page desc of the compressed page and write it into the 1523 * cache of page_desc 1524 * 1525 * only one compression format will be used here, for 1526 * s->flag_compress is set. But when compression fails to work, 1527 * we fall back to save in plaintext. 1528 */ 1529 size_out = len_buf_out; 1530 if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) && 1531 (compress2(buf_out, (uLongf *)&size_out, buf, 1532 s->dump_info.page_size, Z_BEST_SPEED) == Z_OK) && 1533 (size_out < s->dump_info.page_size)) { 1534 pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB); 1535 pd.size = cpu_to_dump32(s, size_out); 1536 1537 ret = write_cache(&page_data, buf_out, size_out, false); 1538 if (ret < 0) { 1539 error_setg(errp, "dump: failed to write page data"); 1540 goto out; 1541 } 1542 #ifdef CONFIG_LZO 1543 } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) && 1544 (lzo1x_1_compress(buf, s->dump_info.page_size, buf_out, 1545 (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) && 1546 (size_out < s->dump_info.page_size)) { 1547 pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO); 1548 pd.size = cpu_to_dump32(s, size_out); 1549 1550 ret = write_cache(&page_data, buf_out, size_out, false); 1551 if (ret < 0) { 1552 error_setg(errp, "dump: failed to write page data"); 1553 goto out; 1554 } 1555 #endif 1556 #ifdef CONFIG_SNAPPY 1557 } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) && 1558 (snappy_compress((char *)buf, s->dump_info.page_size, 1559 (char *)buf_out, &size_out) == SNAPPY_OK) && 1560 (size_out < s->dump_info.page_size)) { 1561 pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY); 1562 pd.size = cpu_to_dump32(s, size_out); 1563 1564 ret = write_cache(&page_data, buf_out, size_out, false); 1565 if (ret < 0) { 1566 error_setg(errp, "dump: failed to write page data"); 1567 goto out; 1568 } 1569 #endif 1570 } else { 1571 /* 1572 * fall back to save in plaintext, size_out should be 1573 * assigned the target's page size 1574 */ 1575 pd.flags = cpu_to_dump32(s, 0); 1576 size_out = s->dump_info.page_size; 1577 pd.size = cpu_to_dump32(s, size_out); 1578 1579 ret = write_cache(&page_data, buf, 1580 s->dump_info.page_size, false); 1581 if (ret < 0) { 1582 error_setg(errp, "dump: failed to write page data"); 1583 goto out; 1584 } 1585 } 1586 1587 /* get and write page desc here */ 1588 pd.page_flags = cpu_to_dump64(s, 0); 1589 pd.offset = cpu_to_dump64(s, offset_data); 1590 offset_data += size_out; 1591 1592 ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false); 1593 if (ret < 0) { 1594 error_setg(errp, "dump: failed to write page desc"); 1595 goto out; 1596 } 1597 } 1598 s->written_size += s->dump_info.page_size; 1599 } 1600 1601 ret = write_cache(&page_desc, NULL, 0, true); 1602 if (ret < 0) { 1603 error_setg(errp, "dump: failed to sync cache for page_desc"); 1604 goto out; 1605 } 1606 ret = write_cache(&page_data, NULL, 0, true); 1607 if (ret < 0) { 1608 error_setg(errp, "dump: failed to sync cache for page_data"); 1609 goto out; 1610 } 1611 1612 out: 1613 free_data_cache(&page_desc); 1614 free_data_cache(&page_data); 1615 1616 #ifdef CONFIG_LZO 1617 g_free(wrkmem); 1618 #endif 1619 1620 g_free(buf_out); 1621 } 1622 1623 static void create_kdump_vmcore(DumpState *s, Error **errp) 1624 { 1625 ERRP_GUARD(); 1626 int ret; 1627 1628 /* 1629 * the kdump-compressed format is: 1630 * File offset 1631 * +------------------------------------------+ 0x0 1632 * | main header (struct disk_dump_header) | 1633 * |------------------------------------------+ block 1 1634 * | sub header (struct kdump_sub_header) | 1635 * |------------------------------------------+ block 2 1636 * | 1st-dump_bitmap | 1637 * |------------------------------------------+ block 2 + X blocks 1638 * | 2nd-dump_bitmap | (aligned by block) 1639 * |------------------------------------------+ block 2 + 2 * X blocks 1640 * | page desc for pfn 0 (struct page_desc) | (aligned by block) 1641 * | page desc for pfn 1 (struct page_desc) | 1642 * | : | 1643 * |------------------------------------------| (not aligned by block) 1644 * | page data (pfn 0) | 1645 * | page data (pfn 1) | 1646 * | : | 1647 * +------------------------------------------+ 1648 */ 1649 1650 ret = write_start_flat_header(s->fd); 1651 if (ret < 0) { 1652 error_setg(errp, "dump: failed to write start flat header"); 1653 return; 1654 } 1655 1656 write_dump_header(s, errp); 1657 if (*errp) { 1658 return; 1659 } 1660 1661 write_dump_bitmap(s, errp); 1662 if (*errp) { 1663 return; 1664 } 1665 1666 write_dump_pages(s, errp); 1667 if (*errp) { 1668 return; 1669 } 1670 1671 ret = write_end_flat_header(s->fd); 1672 if (ret < 0) { 1673 error_setg(errp, "dump: failed to write end flat header"); 1674 return; 1675 } 1676 } 1677 1678 static int validate_start_block(DumpState *s) 1679 { 1680 GuestPhysBlock *block; 1681 1682 if (!dump_has_filter(s)) { 1683 return 0; 1684 } 1685 1686 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { 1687 /* This block is out of the range */ 1688 if (block->target_start >= s->filter_area_begin + s->filter_area_length || 1689 block->target_end <= s->filter_area_begin) { 1690 continue; 1691 } 1692 return 0; 1693 } 1694 1695 return -1; 1696 } 1697 1698 static void get_max_mapnr(DumpState *s) 1699 { 1700 GuestPhysBlock *last_block; 1701 1702 last_block = QTAILQ_LAST(&s->guest_phys_blocks.head); 1703 s->max_mapnr = dump_paddr_to_pfn(s, last_block->target_end); 1704 } 1705 1706 static DumpState dump_state_global = { .status = DUMP_STATUS_NONE }; 1707 1708 static void dump_state_prepare(DumpState *s) 1709 { 1710 /* zero the struct, setting status to active */ 1711 *s = (DumpState) { .status = DUMP_STATUS_ACTIVE }; 1712 } 1713 1714 bool qemu_system_dump_in_progress(void) 1715 { 1716 DumpState *state = &dump_state_global; 1717 return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE); 1718 } 1719 1720 /* 1721 * calculate total size of memory to be dumped (taking filter into 1722 * account.) 1723 */ 1724 static int64_t dump_calculate_size(DumpState *s) 1725 { 1726 GuestPhysBlock *block; 1727 int64_t total = 0; 1728 1729 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { 1730 total += dump_filtered_memblock_size(block, 1731 s->filter_area_begin, 1732 s->filter_area_length); 1733 } 1734 1735 return total; 1736 } 1737 1738 static void vmcoreinfo_update_phys_base(DumpState *s) 1739 { 1740 uint64_t size, note_head_size, name_size, phys_base; 1741 char **lines; 1742 uint8_t *vmci; 1743 size_t i; 1744 1745 if (!note_name_equal(s, s->guest_note, "VMCOREINFO")) { 1746 return; 1747 } 1748 1749 get_note_sizes(s, s->guest_note, ¬e_head_size, &name_size, &size); 1750 note_head_size = ROUND_UP(note_head_size, 4); 1751 1752 vmci = s->guest_note + note_head_size + ROUND_UP(name_size, 4); 1753 *(vmci + size) = '\0'; 1754 1755 lines = g_strsplit((char *)vmci, "\n", -1); 1756 for (i = 0; lines[i]; i++) { 1757 const char *prefix = NULL; 1758 1759 if (s->dump_info.d_machine == EM_X86_64) { 1760 prefix = "NUMBER(phys_base)="; 1761 } else if (s->dump_info.d_machine == EM_AARCH64) { 1762 prefix = "NUMBER(PHYS_OFFSET)="; 1763 } 1764 1765 if (prefix && g_str_has_prefix(lines[i], prefix)) { 1766 if (qemu_strtou64(lines[i] + strlen(prefix), NULL, 16, 1767 &phys_base) < 0) { 1768 warn_report("Failed to read %s", prefix); 1769 } else { 1770 s->dump_info.phys_base = phys_base; 1771 } 1772 break; 1773 } 1774 } 1775 1776 g_strfreev(lines); 1777 } 1778 1779 static void dump_init(DumpState *s, int fd, bool has_format, 1780 DumpGuestMemoryFormat format, bool paging, bool has_filter, 1781 int64_t begin, int64_t length, Error **errp) 1782 { 1783 ERRP_GUARD(); 1784 VMCoreInfoState *vmci = vmcoreinfo_find(); 1785 CPUState *cpu; 1786 int nr_cpus; 1787 int ret; 1788 1789 s->has_format = has_format; 1790 s->format = format; 1791 s->written_size = 0; 1792 1793 /* kdump-compressed is conflict with paging and filter */ 1794 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { 1795 assert(!paging && !has_filter); 1796 } 1797 1798 if (runstate_is_running()) { 1799 vm_stop(RUN_STATE_SAVE_VM); 1800 s->resume = true; 1801 } else { 1802 s->resume = false; 1803 } 1804 1805 /* If we use KVM, we should synchronize the registers before we get dump 1806 * info or physmap info. 1807 */ 1808 cpu_synchronize_all_states(); 1809 nr_cpus = 0; 1810 CPU_FOREACH(cpu) { 1811 nr_cpus++; 1812 } 1813 1814 s->fd = fd; 1815 if (has_filter && !length) { 1816 error_setg(errp, QERR_INVALID_PARAMETER, "length"); 1817 goto cleanup; 1818 } 1819 s->filter_area_begin = begin; 1820 s->filter_area_length = length; 1821 1822 /* First index is 0, it's the special null name */ 1823 s->string_table_buf = g_array_new(FALSE, TRUE, 1); 1824 /* 1825 * Allocate the null name, due to the clearing option set to true 1826 * it will be 0. 1827 */ 1828 g_array_set_size(s->string_table_buf, 1); 1829 1830 memory_mapping_list_init(&s->list); 1831 1832 guest_phys_blocks_init(&s->guest_phys_blocks); 1833 guest_phys_blocks_append(&s->guest_phys_blocks); 1834 s->total_size = dump_calculate_size(s); 1835 #ifdef DEBUG_DUMP_GUEST_MEMORY 1836 fprintf(stderr, "DUMP: total memory to dump: %lu\n", s->total_size); 1837 #endif 1838 1839 /* it does not make sense to dump non-existent memory */ 1840 if (!s->total_size) { 1841 error_setg(errp, "dump: no guest memory to dump"); 1842 goto cleanup; 1843 } 1844 1845 /* Is the filter filtering everything? */ 1846 if (validate_start_block(s) == -1) { 1847 error_setg(errp, QERR_INVALID_PARAMETER, "begin"); 1848 goto cleanup; 1849 } 1850 1851 /* get dump info: endian, class and architecture. 1852 * If the target architecture is not supported, cpu_get_dump_info() will 1853 * return -1. 1854 */ 1855 ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks); 1856 if (ret < 0) { 1857 error_setg(errp, QERR_UNSUPPORTED); 1858 goto cleanup; 1859 } 1860 1861 if (!s->dump_info.page_size) { 1862 s->dump_info.page_size = TARGET_PAGE_SIZE; 1863 } 1864 1865 s->note_size = cpu_get_note_size(s->dump_info.d_class, 1866 s->dump_info.d_machine, nr_cpus); 1867 if (s->note_size < 0) { 1868 error_setg(errp, QERR_UNSUPPORTED); 1869 goto cleanup; 1870 } 1871 1872 /* 1873 * The goal of this block is to (a) update the previously guessed 1874 * phys_base, (b) copy the guest note out of the guest. 1875 * Failure to do so is not fatal for dumping. 1876 */ 1877 if (vmci) { 1878 uint64_t addr, note_head_size, name_size, desc_size; 1879 uint32_t size; 1880 uint16_t format; 1881 1882 note_head_size = dump_is_64bit(s) ? 1883 sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); 1884 1885 format = le16_to_cpu(vmci->vmcoreinfo.guest_format); 1886 size = le32_to_cpu(vmci->vmcoreinfo.size); 1887 addr = le64_to_cpu(vmci->vmcoreinfo.paddr); 1888 if (!vmci->has_vmcoreinfo) { 1889 warn_report("guest note is not present"); 1890 } else if (size < note_head_size || size > MAX_GUEST_NOTE_SIZE) { 1891 warn_report("guest note size is invalid: %" PRIu32, size); 1892 } else if (format != FW_CFG_VMCOREINFO_FORMAT_ELF) { 1893 warn_report("guest note format is unsupported: %" PRIu16, format); 1894 } else { 1895 s->guest_note = g_malloc(size + 1); /* +1 for adding \0 */ 1896 cpu_physical_memory_read(addr, s->guest_note, size); 1897 1898 get_note_sizes(s, s->guest_note, NULL, &name_size, &desc_size); 1899 s->guest_note_size = ELF_NOTE_SIZE(note_head_size, name_size, 1900 desc_size); 1901 if (name_size > MAX_GUEST_NOTE_SIZE || 1902 desc_size > MAX_GUEST_NOTE_SIZE || 1903 s->guest_note_size > size) { 1904 warn_report("Invalid guest note header"); 1905 g_free(s->guest_note); 1906 s->guest_note = NULL; 1907 } else { 1908 vmcoreinfo_update_phys_base(s); 1909 s->note_size += s->guest_note_size; 1910 } 1911 } 1912 } 1913 1914 /* get memory mapping */ 1915 if (paging) { 1916 qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, errp); 1917 if (*errp) { 1918 goto cleanup; 1919 } 1920 } else { 1921 qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks); 1922 } 1923 1924 s->nr_cpus = nr_cpus; 1925 1926 get_max_mapnr(s); 1927 1928 uint64_t tmp; 1929 tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT), 1930 s->dump_info.page_size); 1931 s->len_dump_bitmap = tmp * s->dump_info.page_size; 1932 1933 /* init for kdump-compressed format */ 1934 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { 1935 switch (format) { 1936 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB: 1937 s->flag_compress = DUMP_DH_COMPRESSED_ZLIB; 1938 break; 1939 1940 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO: 1941 #ifdef CONFIG_LZO 1942 if (lzo_init() != LZO_E_OK) { 1943 error_setg(errp, "failed to initialize the LZO library"); 1944 goto cleanup; 1945 } 1946 #endif 1947 s->flag_compress = DUMP_DH_COMPRESSED_LZO; 1948 break; 1949 1950 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY: 1951 s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY; 1952 break; 1953 1954 default: 1955 s->flag_compress = 0; 1956 } 1957 1958 return; 1959 } 1960 1961 if (dump_has_filter(s)) { 1962 memory_mapping_filter(&s->list, s->filter_area_begin, s->filter_area_length); 1963 } 1964 1965 /* 1966 * The first section header is always a special one in which most 1967 * fields are 0. The section header string table is also always 1968 * set. 1969 */ 1970 s->shdr_num = 2; 1971 1972 /* 1973 * Adds the number of architecture sections to shdr_num and sets 1974 * elf_section_data_size so we know the offsets and sizes of all 1975 * parts. 1976 */ 1977 if (s->dump_info.arch_sections_add_fn) { 1978 s->dump_info.arch_sections_add_fn(s); 1979 } 1980 1981 /* 1982 * calculate shdr_num so we know the offsets and sizes of all 1983 * parts. 1984 * Calculate phdr_num 1985 * 1986 * The absolute maximum amount of phdrs is UINT32_MAX - 1 as 1987 * sh_info is 32 bit. There's special handling once we go over 1988 * UINT16_MAX - 1 but that is handled in the ehdr and section 1989 * code. 1990 */ 1991 s->phdr_num = 1; /* Reserve PT_NOTE */ 1992 if (s->list.num <= UINT32_MAX - 1) { 1993 s->phdr_num += s->list.num; 1994 } else { 1995 s->phdr_num = UINT32_MAX; 1996 } 1997 1998 /* 1999 * Now that the number of section and program headers is known we 2000 * can calculate the offsets of the headers and data. 2001 */ 2002 if (dump_is_64bit(s)) { 2003 s->shdr_offset = sizeof(Elf64_Ehdr); 2004 s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; 2005 s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; 2006 } else { 2007 s->shdr_offset = sizeof(Elf32_Ehdr); 2008 s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; 2009 s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; 2010 } 2011 s->memory_offset = s->note_offset + s->note_size; 2012 s->section_offset = s->memory_offset + s->total_size; 2013 2014 return; 2015 2016 cleanup: 2017 dump_cleanup(s); 2018 } 2019 2020 /* this operation might be time consuming. */ 2021 static void dump_process(DumpState *s, Error **errp) 2022 { 2023 ERRP_GUARD(); 2024 DumpQueryResult *result = NULL; 2025 2026 if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) { 2027 #ifdef TARGET_X86_64 2028 create_win_dump(s, errp); 2029 #endif 2030 } else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { 2031 create_kdump_vmcore(s, errp); 2032 } else { 2033 create_vmcore(s, errp); 2034 } 2035 2036 /* make sure status is written after written_size updates */ 2037 smp_wmb(); 2038 qatomic_set(&s->status, 2039 (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED)); 2040 2041 /* send DUMP_COMPLETED message (unconditionally) */ 2042 result = qmp_query_dump(NULL); 2043 /* should never fail */ 2044 assert(result); 2045 qapi_event_send_dump_completed(result, 2046 *errp ? error_get_pretty(*errp) : NULL); 2047 qapi_free_DumpQueryResult(result); 2048 2049 dump_cleanup(s); 2050 } 2051 2052 static void *dump_thread(void *data) 2053 { 2054 DumpState *s = (DumpState *)data; 2055 dump_process(s, NULL); 2056 return NULL; 2057 } 2058 2059 DumpQueryResult *qmp_query_dump(Error **errp) 2060 { 2061 DumpQueryResult *result = g_new(DumpQueryResult, 1); 2062 DumpState *state = &dump_state_global; 2063 result->status = qatomic_read(&state->status); 2064 /* make sure we are reading status and written_size in order */ 2065 smp_rmb(); 2066 result->completed = state->written_size; 2067 result->total = state->total_size; 2068 return result; 2069 } 2070 2071 void qmp_dump_guest_memory(bool paging, const char *file, 2072 bool has_detach, bool detach, 2073 bool has_begin, int64_t begin, bool has_length, 2074 int64_t length, bool has_format, 2075 DumpGuestMemoryFormat format, Error **errp) 2076 { 2077 ERRP_GUARD(); 2078 const char *p; 2079 int fd = -1; 2080 DumpState *s; 2081 bool detach_p = false; 2082 2083 if (runstate_check(RUN_STATE_INMIGRATE)) { 2084 error_setg(errp, "Dump not allowed during incoming migration."); 2085 return; 2086 } 2087 2088 /* if there is a dump in background, we should wait until the dump 2089 * finished */ 2090 if (qemu_system_dump_in_progress()) { 2091 error_setg(errp, "There is a dump in process, please wait."); 2092 return; 2093 } 2094 2095 /* 2096 * kdump-compressed format need the whole memory dumped, so paging or 2097 * filter is not supported here. 2098 */ 2099 if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) && 2100 (paging || has_begin || has_length)) { 2101 error_setg(errp, "kdump-compressed format doesn't support paging or " 2102 "filter"); 2103 return; 2104 } 2105 if (has_begin && !has_length) { 2106 error_setg(errp, QERR_MISSING_PARAMETER, "length"); 2107 return; 2108 } 2109 if (!has_begin && has_length) { 2110 error_setg(errp, QERR_MISSING_PARAMETER, "begin"); 2111 return; 2112 } 2113 if (has_detach) { 2114 detach_p = detach; 2115 } 2116 2117 /* check whether lzo/snappy is supported */ 2118 #ifndef CONFIG_LZO 2119 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) { 2120 error_setg(errp, "kdump-lzo is not available now"); 2121 return; 2122 } 2123 #endif 2124 2125 #ifndef CONFIG_SNAPPY 2126 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) { 2127 error_setg(errp, "kdump-snappy is not available now"); 2128 return; 2129 } 2130 #endif 2131 2132 #ifndef TARGET_X86_64 2133 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) { 2134 error_setg(errp, "Windows dump is only available for x86-64"); 2135 return; 2136 } 2137 #endif 2138 2139 #if !defined(WIN32) 2140 if (strstart(file, "fd:", &p)) { 2141 fd = monitor_get_fd(monitor_cur(), p, errp); 2142 if (fd == -1) { 2143 return; 2144 } 2145 } 2146 #endif 2147 2148 if (strstart(file, "file:", &p)) { 2149 fd = qemu_open_old(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR); 2150 if (fd < 0) { 2151 error_setg_file_open(errp, errno, p); 2152 return; 2153 } 2154 } 2155 2156 if (fd == -1) { 2157 error_setg(errp, QERR_INVALID_PARAMETER, "protocol"); 2158 return; 2159 } 2160 2161 if (!dump_migration_blocker) { 2162 error_setg(&dump_migration_blocker, 2163 "Live migration disabled: dump-guest-memory in progress"); 2164 } 2165 2166 /* 2167 * Allows even for -only-migratable, but forbid migration during the 2168 * process of dump guest memory. 2169 */ 2170 if (migrate_add_blocker_internal(dump_migration_blocker, errp)) { 2171 /* Remember to release the fd before passing it over to dump state */ 2172 close(fd); 2173 return; 2174 } 2175 2176 s = &dump_state_global; 2177 dump_state_prepare(s); 2178 2179 dump_init(s, fd, has_format, format, paging, has_begin, 2180 begin, length, errp); 2181 if (*errp) { 2182 qatomic_set(&s->status, DUMP_STATUS_FAILED); 2183 return; 2184 } 2185 2186 if (detach_p) { 2187 /* detached dump */ 2188 s->detached = true; 2189 qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread, 2190 s, QEMU_THREAD_DETACHED); 2191 } else { 2192 /* sync dump */ 2193 dump_process(s, errp); 2194 } 2195 } 2196 2197 DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp) 2198 { 2199 DumpGuestMemoryCapability *cap = 2200 g_new0(DumpGuestMemoryCapability, 1); 2201 DumpGuestMemoryFormatList **tail = &cap->formats; 2202 2203 /* elf is always available */ 2204 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_ELF); 2205 2206 /* kdump-zlib is always available */ 2207 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB); 2208 2209 /* add new item if kdump-lzo is available */ 2210 #ifdef CONFIG_LZO 2211 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO); 2212 #endif 2213 2214 /* add new item if kdump-snappy is available */ 2215 #ifdef CONFIG_SNAPPY 2216 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY); 2217 #endif 2218 2219 /* Windows dump is available only if target is x86_64 */ 2220 #ifdef TARGET_X86_64 2221 QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_WIN_DMP); 2222 #endif 2223 2224 return cap; 2225 } 2226