xref: /openbmc/qemu/dump/dump.c (revision 6e0bc06e210cbd25006c3a39e9a8325784d0be78)
1  /*
2   * QEMU dump
3   *
4   * Copyright Fujitsu, Corp. 2011, 2012
5   *
6   * Authors:
7   *     Wen Congyang <wency@cn.fujitsu.com>
8   *
9   * This work is licensed under the terms of the GNU GPL, version 2 or later.
10   * See the COPYING file in the top-level directory.
11   *
12   */
13  
14  #include "qemu/osdep.h"
15  #include "qemu-common.h"
16  #include "qemu/cutils.h"
17  #include "elf.h"
18  #include "exec/hwaddr.h"
19  #include "monitor/monitor.h"
20  #include "sysemu/kvm.h"
21  #include "sysemu/dump.h"
22  #include "sysemu/memory_mapping.h"
23  #include "sysemu/runstate.h"
24  #include "sysemu/cpus.h"
25  #include "qapi/error.h"
26  #include "qapi/qapi-commands-dump.h"
27  #include "qapi/qapi-events-dump.h"
28  #include "qapi/qmp/qerror.h"
29  #include "qemu/error-report.h"
30  #include "qemu/main-loop.h"
31  #include "hw/misc/vmcoreinfo.h"
32  #include "migration/blocker.h"
33  
34  #ifdef TARGET_X86_64
35  #include "win_dump.h"
36  #endif
37  
38  #include <zlib.h>
39  #ifdef CONFIG_LZO
40  #include <lzo/lzo1x.h>
41  #endif
42  #ifdef CONFIG_SNAPPY
43  #include <snappy-c.h>
44  #endif
45  #ifndef ELF_MACHINE_UNAME
46  #define ELF_MACHINE_UNAME "Unknown"
47  #endif
48  
49  #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
50  
51  static Error *dump_migration_blocker;
52  
53  #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
54      ((DIV_ROUND_UP((hdr_size), 4) +                     \
55        DIV_ROUND_UP((name_size), 4) +                    \
56        DIV_ROUND_UP((desc_size), 4)) * 4)
57  
58  uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
59  {
60      if (s->dump_info.d_endian == ELFDATA2LSB) {
61          val = cpu_to_le16(val);
62      } else {
63          val = cpu_to_be16(val);
64      }
65  
66      return val;
67  }
68  
69  uint32_t cpu_to_dump32(DumpState *s, uint32_t val)
70  {
71      if (s->dump_info.d_endian == ELFDATA2LSB) {
72          val = cpu_to_le32(val);
73      } else {
74          val = cpu_to_be32(val);
75      }
76  
77      return val;
78  }
79  
80  uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
81  {
82      if (s->dump_info.d_endian == ELFDATA2LSB) {
83          val = cpu_to_le64(val);
84      } else {
85          val = cpu_to_be64(val);
86      }
87  
88      return val;
89  }
90  
91  static int dump_cleanup(DumpState *s)
92  {
93      guest_phys_blocks_free(&s->guest_phys_blocks);
94      memory_mapping_list_free(&s->list);
95      close(s->fd);
96      g_free(s->guest_note);
97      s->guest_note = NULL;
98      if (s->resume) {
99          if (s->detached) {
100              qemu_mutex_lock_iothread();
101          }
102          vm_start();
103          if (s->detached) {
104              qemu_mutex_unlock_iothread();
105          }
106      }
107      migrate_del_blocker(dump_migration_blocker);
108  
109      return 0;
110  }
111  
112  static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
113  {
114      DumpState *s = opaque;
115      size_t written_size;
116  
117      written_size = qemu_write_full(s->fd, buf, size);
118      if (written_size != size) {
119          return -errno;
120      }
121  
122      return 0;
123  }
124  
125  static void write_elf64_header(DumpState *s, Error **errp)
126  {
127      Elf64_Ehdr elf_header;
128      int ret;
129  
130      memset(&elf_header, 0, sizeof(Elf64_Ehdr));
131      memcpy(&elf_header, ELFMAG, SELFMAG);
132      elf_header.e_ident[EI_CLASS] = ELFCLASS64;
133      elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
134      elf_header.e_ident[EI_VERSION] = EV_CURRENT;
135      elf_header.e_type = cpu_to_dump16(s, ET_CORE);
136      elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
137      elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
138      elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
139      elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
140      elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
141      elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
142      if (s->have_section) {
143          uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
144  
145          elf_header.e_shoff = cpu_to_dump64(s, shoff);
146          elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
147          elf_header.e_shnum = cpu_to_dump16(s, 1);
148      }
149  
150      ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
151      if (ret < 0) {
152          error_setg_errno(errp, -ret, "dump: failed to write elf header");
153      }
154  }
155  
156  static void write_elf32_header(DumpState *s, Error **errp)
157  {
158      Elf32_Ehdr elf_header;
159      int ret;
160  
161      memset(&elf_header, 0, sizeof(Elf32_Ehdr));
162      memcpy(&elf_header, ELFMAG, SELFMAG);
163      elf_header.e_ident[EI_CLASS] = ELFCLASS32;
164      elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
165      elf_header.e_ident[EI_VERSION] = EV_CURRENT;
166      elf_header.e_type = cpu_to_dump16(s, ET_CORE);
167      elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
168      elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
169      elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
170      elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
171      elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
172      elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
173      if (s->have_section) {
174          uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
175  
176          elf_header.e_shoff = cpu_to_dump32(s, shoff);
177          elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
178          elf_header.e_shnum = cpu_to_dump16(s, 1);
179      }
180  
181      ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
182      if (ret < 0) {
183          error_setg_errno(errp, -ret, "dump: failed to write elf header");
184      }
185  }
186  
187  static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
188                               int phdr_index, hwaddr offset,
189                               hwaddr filesz, Error **errp)
190  {
191      Elf64_Phdr phdr;
192      int ret;
193  
194      memset(&phdr, 0, sizeof(Elf64_Phdr));
195      phdr.p_type = cpu_to_dump32(s, PT_LOAD);
196      phdr.p_offset = cpu_to_dump64(s, offset);
197      phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr);
198      phdr.p_filesz = cpu_to_dump64(s, filesz);
199      phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length);
200      phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr) ?: phdr.p_paddr;
201  
202      assert(memory_mapping->length >= filesz);
203  
204      ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
205      if (ret < 0) {
206          error_setg_errno(errp, -ret,
207                           "dump: failed to write program header table");
208      }
209  }
210  
211  static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
212                               int phdr_index, hwaddr offset,
213                               hwaddr filesz, Error **errp)
214  {
215      Elf32_Phdr phdr;
216      int ret;
217  
218      memset(&phdr, 0, sizeof(Elf32_Phdr));
219      phdr.p_type = cpu_to_dump32(s, PT_LOAD);
220      phdr.p_offset = cpu_to_dump32(s, offset);
221      phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr);
222      phdr.p_filesz = cpu_to_dump32(s, filesz);
223      phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length);
224      phdr.p_vaddr =
225          cpu_to_dump32(s, memory_mapping->virt_addr) ?: phdr.p_paddr;
226  
227      assert(memory_mapping->length >= filesz);
228  
229      ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
230      if (ret < 0) {
231          error_setg_errno(errp, -ret,
232                           "dump: failed to write program header table");
233      }
234  }
235  
236  static void write_elf64_note(DumpState *s, Error **errp)
237  {
238      Elf64_Phdr phdr;
239      hwaddr begin = s->memory_offset - s->note_size;
240      int ret;
241  
242      memset(&phdr, 0, sizeof(Elf64_Phdr));
243      phdr.p_type = cpu_to_dump32(s, PT_NOTE);
244      phdr.p_offset = cpu_to_dump64(s, begin);
245      phdr.p_paddr = 0;
246      phdr.p_filesz = cpu_to_dump64(s, s->note_size);
247      phdr.p_memsz = cpu_to_dump64(s, s->note_size);
248      phdr.p_vaddr = 0;
249  
250      ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
251      if (ret < 0) {
252          error_setg_errno(errp, -ret,
253                           "dump: failed to write program header table");
254      }
255  }
256  
257  static inline int cpu_index(CPUState *cpu)
258  {
259      return cpu->cpu_index + 1;
260  }
261  
262  static void write_guest_note(WriteCoreDumpFunction f, DumpState *s,
263                               Error **errp)
264  {
265      int ret;
266  
267      if (s->guest_note) {
268          ret = f(s->guest_note, s->guest_note_size, s);
269          if (ret < 0) {
270              error_setg(errp, "dump: failed to write guest note");
271          }
272      }
273  }
274  
275  static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
276                                Error **errp)
277  {
278      CPUState *cpu;
279      int ret;
280      int id;
281  
282      CPU_FOREACH(cpu) {
283          id = cpu_index(cpu);
284          ret = cpu_write_elf64_note(f, cpu, id, s);
285          if (ret < 0) {
286              error_setg(errp, "dump: failed to write elf notes");
287              return;
288          }
289      }
290  
291      CPU_FOREACH(cpu) {
292          ret = cpu_write_elf64_qemunote(f, cpu, s);
293          if (ret < 0) {
294              error_setg(errp, "dump: failed to write CPU status");
295              return;
296          }
297      }
298  
299      write_guest_note(f, s, errp);
300  }
301  
302  static void write_elf32_note(DumpState *s, Error **errp)
303  {
304      hwaddr begin = s->memory_offset - s->note_size;
305      Elf32_Phdr phdr;
306      int ret;
307  
308      memset(&phdr, 0, sizeof(Elf32_Phdr));
309      phdr.p_type = cpu_to_dump32(s, PT_NOTE);
310      phdr.p_offset = cpu_to_dump32(s, begin);
311      phdr.p_paddr = 0;
312      phdr.p_filesz = cpu_to_dump32(s, s->note_size);
313      phdr.p_memsz = cpu_to_dump32(s, s->note_size);
314      phdr.p_vaddr = 0;
315  
316      ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
317      if (ret < 0) {
318          error_setg_errno(errp, -ret,
319                           "dump: failed to write program header table");
320      }
321  }
322  
323  static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
324                                Error **errp)
325  {
326      CPUState *cpu;
327      int ret;
328      int id;
329  
330      CPU_FOREACH(cpu) {
331          id = cpu_index(cpu);
332          ret = cpu_write_elf32_note(f, cpu, id, s);
333          if (ret < 0) {
334              error_setg(errp, "dump: failed to write elf notes");
335              return;
336          }
337      }
338  
339      CPU_FOREACH(cpu) {
340          ret = cpu_write_elf32_qemunote(f, cpu, s);
341          if (ret < 0) {
342              error_setg(errp, "dump: failed to write CPU status");
343              return;
344          }
345      }
346  
347      write_guest_note(f, s, errp);
348  }
349  
350  static void write_elf_section(DumpState *s, int type, Error **errp)
351  {
352      Elf32_Shdr shdr32;
353      Elf64_Shdr shdr64;
354      int shdr_size;
355      void *shdr;
356      int ret;
357  
358      if (type == 0) {
359          shdr_size = sizeof(Elf32_Shdr);
360          memset(&shdr32, 0, shdr_size);
361          shdr32.sh_info = cpu_to_dump32(s, s->sh_info);
362          shdr = &shdr32;
363      } else {
364          shdr_size = sizeof(Elf64_Shdr);
365          memset(&shdr64, 0, shdr_size);
366          shdr64.sh_info = cpu_to_dump32(s, s->sh_info);
367          shdr = &shdr64;
368      }
369  
370      ret = fd_write_vmcore(shdr, shdr_size, s);
371      if (ret < 0) {
372          error_setg_errno(errp, -ret,
373                           "dump: failed to write section header table");
374      }
375  }
376  
377  static void write_data(DumpState *s, void *buf, int length, Error **errp)
378  {
379      int ret;
380  
381      ret = fd_write_vmcore(buf, length, s);
382      if (ret < 0) {
383          error_setg_errno(errp, -ret, "dump: failed to save memory");
384      } else {
385          s->written_size += length;
386      }
387  }
388  
389  /* write the memory to vmcore. 1 page per I/O. */
390  static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
391                           int64_t size, Error **errp)
392  {
393      int64_t i;
394      Error *local_err = NULL;
395  
396      for (i = 0; i < size / s->dump_info.page_size; i++) {
397          write_data(s, block->host_addr + start + i * s->dump_info.page_size,
398                     s->dump_info.page_size, &local_err);
399          if (local_err) {
400              error_propagate(errp, local_err);
401              return;
402          }
403      }
404  
405      if ((size % s->dump_info.page_size) != 0) {
406          write_data(s, block->host_addr + start + i * s->dump_info.page_size,
407                     size % s->dump_info.page_size, &local_err);
408          if (local_err) {
409              error_propagate(errp, local_err);
410              return;
411          }
412      }
413  }
414  
415  /* get the memory's offset and size in the vmcore */
416  static void get_offset_range(hwaddr phys_addr,
417                               ram_addr_t mapping_length,
418                               DumpState *s,
419                               hwaddr *p_offset,
420                               hwaddr *p_filesz)
421  {
422      GuestPhysBlock *block;
423      hwaddr offset = s->memory_offset;
424      int64_t size_in_block, start;
425  
426      /* When the memory is not stored into vmcore, offset will be -1 */
427      *p_offset = -1;
428      *p_filesz = 0;
429  
430      if (s->has_filter) {
431          if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
432              return;
433          }
434      }
435  
436      QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
437          if (s->has_filter) {
438              if (block->target_start >= s->begin + s->length ||
439                  block->target_end <= s->begin) {
440                  /* This block is out of the range */
441                  continue;
442              }
443  
444              if (s->begin <= block->target_start) {
445                  start = block->target_start;
446              } else {
447                  start = s->begin;
448              }
449  
450              size_in_block = block->target_end - start;
451              if (s->begin + s->length < block->target_end) {
452                  size_in_block -= block->target_end - (s->begin + s->length);
453              }
454          } else {
455              start = block->target_start;
456              size_in_block = block->target_end - block->target_start;
457          }
458  
459          if (phys_addr >= start && phys_addr < start + size_in_block) {
460              *p_offset = phys_addr - start + offset;
461  
462              /* The offset range mapped from the vmcore file must not spill over
463               * the GuestPhysBlock, clamp it. The rest of the mapping will be
464               * zero-filled in memory at load time; see
465               * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
466               */
467              *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
468                          mapping_length :
469                          size_in_block - (phys_addr - start);
470              return;
471          }
472  
473          offset += size_in_block;
474      }
475  }
476  
477  static void write_elf_loads(DumpState *s, Error **errp)
478  {
479      hwaddr offset, filesz;
480      MemoryMapping *memory_mapping;
481      uint32_t phdr_index = 1;
482      uint32_t max_index;
483      Error *local_err = NULL;
484  
485      if (s->have_section) {
486          max_index = s->sh_info;
487      } else {
488          max_index = s->phdr_num;
489      }
490  
491      QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
492          get_offset_range(memory_mapping->phys_addr,
493                           memory_mapping->length,
494                           s, &offset, &filesz);
495          if (s->dump_info.d_class == ELFCLASS64) {
496              write_elf64_load(s, memory_mapping, phdr_index++, offset,
497                               filesz, &local_err);
498          } else {
499              write_elf32_load(s, memory_mapping, phdr_index++, offset,
500                               filesz, &local_err);
501          }
502  
503          if (local_err) {
504              error_propagate(errp, local_err);
505              return;
506          }
507  
508          if (phdr_index >= max_index) {
509              break;
510          }
511      }
512  }
513  
514  /* write elf header, PT_NOTE and elf note to vmcore. */
515  static void dump_begin(DumpState *s, Error **errp)
516  {
517      Error *local_err = NULL;
518  
519      /*
520       * the vmcore's format is:
521       *   --------------
522       *   |  elf header |
523       *   --------------
524       *   |  PT_NOTE    |
525       *   --------------
526       *   |  PT_LOAD    |
527       *   --------------
528       *   |  ......     |
529       *   --------------
530       *   |  PT_LOAD    |
531       *   --------------
532       *   |  sec_hdr    |
533       *   --------------
534       *   |  elf note   |
535       *   --------------
536       *   |  memory     |
537       *   --------------
538       *
539       * we only know where the memory is saved after we write elf note into
540       * vmcore.
541       */
542  
543      /* write elf header to vmcore */
544      if (s->dump_info.d_class == ELFCLASS64) {
545          write_elf64_header(s, &local_err);
546      } else {
547          write_elf32_header(s, &local_err);
548      }
549      if (local_err) {
550          error_propagate(errp, local_err);
551          return;
552      }
553  
554      if (s->dump_info.d_class == ELFCLASS64) {
555          /* write PT_NOTE to vmcore */
556          write_elf64_note(s, &local_err);
557          if (local_err) {
558              error_propagate(errp, local_err);
559              return;
560          }
561  
562          /* write all PT_LOAD to vmcore */
563          write_elf_loads(s, &local_err);
564          if (local_err) {
565              error_propagate(errp, local_err);
566              return;
567          }
568  
569          /* write section to vmcore */
570          if (s->have_section) {
571              write_elf_section(s, 1, &local_err);
572              if (local_err) {
573                  error_propagate(errp, local_err);
574                  return;
575              }
576          }
577  
578          /* write notes to vmcore */
579          write_elf64_notes(fd_write_vmcore, s, &local_err);
580          if (local_err) {
581              error_propagate(errp, local_err);
582              return;
583          }
584      } else {
585          /* write PT_NOTE to vmcore */
586          write_elf32_note(s, &local_err);
587          if (local_err) {
588              error_propagate(errp, local_err);
589              return;
590          }
591  
592          /* write all PT_LOAD to vmcore */
593          write_elf_loads(s, &local_err);
594          if (local_err) {
595              error_propagate(errp, local_err);
596              return;
597          }
598  
599          /* write section to vmcore */
600          if (s->have_section) {
601              write_elf_section(s, 0, &local_err);
602              if (local_err) {
603                  error_propagate(errp, local_err);
604                  return;
605              }
606          }
607  
608          /* write notes to vmcore */
609          write_elf32_notes(fd_write_vmcore, s, &local_err);
610          if (local_err) {
611              error_propagate(errp, local_err);
612              return;
613          }
614      }
615  }
616  
617  static int get_next_block(DumpState *s, GuestPhysBlock *block)
618  {
619      while (1) {
620          block = QTAILQ_NEXT(block, next);
621          if (!block) {
622              /* no more block */
623              return 1;
624          }
625  
626          s->start = 0;
627          s->next_block = block;
628          if (s->has_filter) {
629              if (block->target_start >= s->begin + s->length ||
630                  block->target_end <= s->begin) {
631                  /* This block is out of the range */
632                  continue;
633              }
634  
635              if (s->begin > block->target_start) {
636                  s->start = s->begin - block->target_start;
637              }
638          }
639  
640          return 0;
641      }
642  }
643  
644  /* write all memory to vmcore */
645  static void dump_iterate(DumpState *s, Error **errp)
646  {
647      GuestPhysBlock *block;
648      int64_t size;
649      Error *local_err = NULL;
650  
651      do {
652          block = s->next_block;
653  
654          size = block->target_end - block->target_start;
655          if (s->has_filter) {
656              size -= s->start;
657              if (s->begin + s->length < block->target_end) {
658                  size -= block->target_end - (s->begin + s->length);
659              }
660          }
661          write_memory(s, block, s->start, size, &local_err);
662          if (local_err) {
663              error_propagate(errp, local_err);
664              return;
665          }
666  
667      } while (!get_next_block(s, block));
668  }
669  
670  static void create_vmcore(DumpState *s, Error **errp)
671  {
672      Error *local_err = NULL;
673  
674      dump_begin(s, &local_err);
675      if (local_err) {
676          error_propagate(errp, local_err);
677          return;
678      }
679  
680      dump_iterate(s, errp);
681  }
682  
683  static int write_start_flat_header(int fd)
684  {
685      MakedumpfileHeader *mh;
686      int ret = 0;
687  
688      QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
689      mh = g_malloc0(MAX_SIZE_MDF_HEADER);
690  
691      memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
692             MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
693  
694      mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
695      mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
696  
697      size_t written_size;
698      written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
699      if (written_size != MAX_SIZE_MDF_HEADER) {
700          ret = -1;
701      }
702  
703      g_free(mh);
704      return ret;
705  }
706  
707  static int write_end_flat_header(int fd)
708  {
709      MakedumpfileDataHeader mdh;
710  
711      mdh.offset = END_FLAG_FLAT_HEADER;
712      mdh.buf_size = END_FLAG_FLAT_HEADER;
713  
714      size_t written_size;
715      written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
716      if (written_size != sizeof(mdh)) {
717          return -1;
718      }
719  
720      return 0;
721  }
722  
723  static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
724  {
725      size_t written_size;
726      MakedumpfileDataHeader mdh;
727  
728      mdh.offset = cpu_to_be64(offset);
729      mdh.buf_size = cpu_to_be64(size);
730  
731      written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
732      if (written_size != sizeof(mdh)) {
733          return -1;
734      }
735  
736      written_size = qemu_write_full(fd, buf, size);
737      if (written_size != size) {
738          return -1;
739      }
740  
741      return 0;
742  }
743  
744  static int buf_write_note(const void *buf, size_t size, void *opaque)
745  {
746      DumpState *s = opaque;
747  
748      /* note_buf is not enough */
749      if (s->note_buf_offset + size > s->note_size) {
750          return -1;
751      }
752  
753      memcpy(s->note_buf + s->note_buf_offset, buf, size);
754  
755      s->note_buf_offset += size;
756  
757      return 0;
758  }
759  
760  /*
761   * This function retrieves various sizes from an elf header.
762   *
763   * @note has to be a valid ELF note. The return sizes are unmodified
764   * (not padded or rounded up to be multiple of 4).
765   */
766  static void get_note_sizes(DumpState *s, const void *note,
767                             uint64_t *note_head_size,
768                             uint64_t *name_size,
769                             uint64_t *desc_size)
770  {
771      uint64_t note_head_sz;
772      uint64_t name_sz;
773      uint64_t desc_sz;
774  
775      if (s->dump_info.d_class == ELFCLASS64) {
776          const Elf64_Nhdr *hdr = note;
777          note_head_sz = sizeof(Elf64_Nhdr);
778          name_sz = tswap64(hdr->n_namesz);
779          desc_sz = tswap64(hdr->n_descsz);
780      } else {
781          const Elf32_Nhdr *hdr = note;
782          note_head_sz = sizeof(Elf32_Nhdr);
783          name_sz = tswap32(hdr->n_namesz);
784          desc_sz = tswap32(hdr->n_descsz);
785      }
786  
787      if (note_head_size) {
788          *note_head_size = note_head_sz;
789      }
790      if (name_size) {
791          *name_size = name_sz;
792      }
793      if (desc_size) {
794          *desc_size = desc_sz;
795      }
796  }
797  
798  static bool note_name_equal(DumpState *s,
799                              const uint8_t *note, const char *name)
800  {
801      int len = strlen(name) + 1;
802      uint64_t head_size, name_size;
803  
804      get_note_sizes(s, note, &head_size, &name_size, NULL);
805      head_size = ROUND_UP(head_size, 4);
806  
807      return name_size == len && memcmp(note + head_size, name, len) == 0;
808  }
809  
810  /* write common header, sub header and elf note to vmcore */
811  static void create_header32(DumpState *s, Error **errp)
812  {
813      DiskDumpHeader32 *dh = NULL;
814      KdumpSubHeader32 *kh = NULL;
815      size_t size;
816      uint32_t block_size;
817      uint32_t sub_hdr_size;
818      uint32_t bitmap_blocks;
819      uint32_t status = 0;
820      uint64_t offset_note;
821      Error *local_err = NULL;
822  
823      /* write common header, the version of kdump-compressed format is 6th */
824      size = sizeof(DiskDumpHeader32);
825      dh = g_malloc0(size);
826  
827      memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN);
828      dh->header_version = cpu_to_dump32(s, 6);
829      block_size = s->dump_info.page_size;
830      dh->block_size = cpu_to_dump32(s, block_size);
831      sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
832      sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
833      dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
834      /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
835      dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
836      dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
837      bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
838      dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
839      strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
840  
841      if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
842          status |= DUMP_DH_COMPRESSED_ZLIB;
843      }
844  #ifdef CONFIG_LZO
845      if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
846          status |= DUMP_DH_COMPRESSED_LZO;
847      }
848  #endif
849  #ifdef CONFIG_SNAPPY
850      if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
851          status |= DUMP_DH_COMPRESSED_SNAPPY;
852      }
853  #endif
854      dh->status = cpu_to_dump32(s, status);
855  
856      if (write_buffer(s->fd, 0, dh, size) < 0) {
857          error_setg(errp, "dump: failed to write disk dump header");
858          goto out;
859      }
860  
861      /* write sub header */
862      size = sizeof(KdumpSubHeader32);
863      kh = g_malloc0(size);
864  
865      /* 64bit max_mapnr_64 */
866      kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
867      kh->phys_base = cpu_to_dump32(s, s->dump_info.phys_base);
868      kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
869  
870      offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
871      if (s->guest_note &&
872          note_name_equal(s, s->guest_note, "VMCOREINFO")) {
873          uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;
874  
875          get_note_sizes(s, s->guest_note,
876                         &hsize, &name_size, &size_vmcoreinfo_desc);
877          offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
878              (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
879          kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
880          kh->size_vmcoreinfo = cpu_to_dump32(s, size_vmcoreinfo_desc);
881      }
882  
883      kh->offset_note = cpu_to_dump64(s, offset_note);
884      kh->note_size = cpu_to_dump32(s, s->note_size);
885  
886      if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
887                       block_size, kh, size) < 0) {
888          error_setg(errp, "dump: failed to write kdump sub header");
889          goto out;
890      }
891  
892      /* write note */
893      s->note_buf = g_malloc0(s->note_size);
894      s->note_buf_offset = 0;
895  
896      /* use s->note_buf to store notes temporarily */
897      write_elf32_notes(buf_write_note, s, &local_err);
898      if (local_err) {
899          error_propagate(errp, local_err);
900          goto out;
901      }
902      if (write_buffer(s->fd, offset_note, s->note_buf,
903                       s->note_size) < 0) {
904          error_setg(errp, "dump: failed to write notes");
905          goto out;
906      }
907  
908      /* get offset of dump_bitmap */
909      s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
910                               block_size;
911  
912      /* get offset of page */
913      s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
914                       block_size;
915  
916  out:
917      g_free(dh);
918      g_free(kh);
919      g_free(s->note_buf);
920  }
921  
922  /* write common header, sub header and elf note to vmcore */
923  static void create_header64(DumpState *s, Error **errp)
924  {
925      DiskDumpHeader64 *dh = NULL;
926      KdumpSubHeader64 *kh = NULL;
927      size_t size;
928      uint32_t block_size;
929      uint32_t sub_hdr_size;
930      uint32_t bitmap_blocks;
931      uint32_t status = 0;
932      uint64_t offset_note;
933      Error *local_err = NULL;
934  
935      /* write common header, the version of kdump-compressed format is 6th */
936      size = sizeof(DiskDumpHeader64);
937      dh = g_malloc0(size);
938  
939      memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN);
940      dh->header_version = cpu_to_dump32(s, 6);
941      block_size = s->dump_info.page_size;
942      dh->block_size = cpu_to_dump32(s, block_size);
943      sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
944      sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
945      dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
946      /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
947      dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
948      dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
949      bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
950      dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
951      strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
952  
953      if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
954          status |= DUMP_DH_COMPRESSED_ZLIB;
955      }
956  #ifdef CONFIG_LZO
957      if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
958          status |= DUMP_DH_COMPRESSED_LZO;
959      }
960  #endif
961  #ifdef CONFIG_SNAPPY
962      if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
963          status |= DUMP_DH_COMPRESSED_SNAPPY;
964      }
965  #endif
966      dh->status = cpu_to_dump32(s, status);
967  
968      if (write_buffer(s->fd, 0, dh, size) < 0) {
969          error_setg(errp, "dump: failed to write disk dump header");
970          goto out;
971      }
972  
973      /* write sub header */
974      size = sizeof(KdumpSubHeader64);
975      kh = g_malloc0(size);
976  
977      /* 64bit max_mapnr_64 */
978      kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
979      kh->phys_base = cpu_to_dump64(s, s->dump_info.phys_base);
980      kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
981  
982      offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
983      if (s->guest_note &&
984          note_name_equal(s, s->guest_note, "VMCOREINFO")) {
985          uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;
986  
987          get_note_sizes(s, s->guest_note,
988                         &hsize, &name_size, &size_vmcoreinfo_desc);
989          offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
990              (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
991          kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
992          kh->size_vmcoreinfo = cpu_to_dump64(s, size_vmcoreinfo_desc);
993      }
994  
995      kh->offset_note = cpu_to_dump64(s, offset_note);
996      kh->note_size = cpu_to_dump64(s, s->note_size);
997  
998      if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
999                       block_size, kh, size) < 0) {
1000          error_setg(errp, "dump: failed to write kdump sub header");
1001          goto out;
1002      }
1003  
1004      /* write note */
1005      s->note_buf = g_malloc0(s->note_size);
1006      s->note_buf_offset = 0;
1007  
1008      /* use s->note_buf to store notes temporarily */
1009      write_elf64_notes(buf_write_note, s, &local_err);
1010      if (local_err) {
1011          error_propagate(errp, local_err);
1012          goto out;
1013      }
1014  
1015      if (write_buffer(s->fd, offset_note, s->note_buf,
1016                       s->note_size) < 0) {
1017          error_setg(errp, "dump: failed to write notes");
1018          goto out;
1019      }
1020  
1021      /* get offset of dump_bitmap */
1022      s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
1023                               block_size;
1024  
1025      /* get offset of page */
1026      s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
1027                       block_size;
1028  
1029  out:
1030      g_free(dh);
1031      g_free(kh);
1032      g_free(s->note_buf);
1033  }
1034  
1035  static void write_dump_header(DumpState *s, Error **errp)
1036  {
1037      if (s->dump_info.d_class == ELFCLASS32) {
1038          create_header32(s, errp);
1039      } else {
1040          create_header64(s, errp);
1041      }
1042  }
1043  
1044  static size_t dump_bitmap_get_bufsize(DumpState *s)
1045  {
1046      return s->dump_info.page_size;
1047  }
1048  
1049  /*
1050   * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
1051   * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
1052   * set_dump_bitmap will always leave the recently set bit un-sync. And setting
1053   * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
1054   * vmcore, ie. synchronizing un-sync bit into vmcore.
1055   */
1056  static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
1057                             uint8_t *buf, DumpState *s)
1058  {
1059      off_t old_offset, new_offset;
1060      off_t offset_bitmap1, offset_bitmap2;
1061      uint32_t byte, bit;
1062      size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
1063      size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
1064  
1065      /* should not set the previous place */
1066      assert(last_pfn <= pfn);
1067  
1068      /*
1069       * if the bit needed to be set is not cached in buf, flush the data in buf
1070       * to vmcore firstly.
1071       * making new_offset be bigger than old_offset can also sync remained data
1072       * into vmcore.
1073       */
1074      old_offset = bitmap_bufsize * (last_pfn / bits_per_buf);
1075      new_offset = bitmap_bufsize * (pfn / bits_per_buf);
1076  
1077      while (old_offset < new_offset) {
1078          /* calculate the offset and write dump_bitmap */
1079          offset_bitmap1 = s->offset_dump_bitmap + old_offset;
1080          if (write_buffer(s->fd, offset_bitmap1, buf,
1081                           bitmap_bufsize) < 0) {
1082              return -1;
1083          }
1084  
1085          /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
1086          offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
1087                           old_offset;
1088          if (write_buffer(s->fd, offset_bitmap2, buf,
1089                           bitmap_bufsize) < 0) {
1090              return -1;
1091          }
1092  
1093          memset(buf, 0, bitmap_bufsize);
1094          old_offset += bitmap_bufsize;
1095      }
1096  
1097      /* get the exact place of the bit in the buf, and set it */
1098      byte = (pfn % bits_per_buf) / CHAR_BIT;
1099      bit = (pfn % bits_per_buf) % CHAR_BIT;
1100      if (value) {
1101          buf[byte] |= 1u << bit;
1102      } else {
1103          buf[byte] &= ~(1u << bit);
1104      }
1105  
1106      return 0;
1107  }
1108  
1109  static uint64_t dump_paddr_to_pfn(DumpState *s, uint64_t addr)
1110  {
1111      int target_page_shift = ctz32(s->dump_info.page_size);
1112  
1113      return (addr >> target_page_shift) - ARCH_PFN_OFFSET;
1114  }
1115  
1116  static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
1117  {
1118      int target_page_shift = ctz32(s->dump_info.page_size);
1119  
1120      return (pfn + ARCH_PFN_OFFSET) << target_page_shift;
1121  }
1122  
1123  /*
1124   * exam every page and return the page frame number and the address of the page.
1125   * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
1126   * blocks, so block->target_start and block->target_end should be interal
1127   * multiples of the target page size.
1128   */
1129  static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
1130                            uint8_t **bufptr, DumpState *s)
1131  {
1132      GuestPhysBlock *block = *blockptr;
1133      hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1);
1134      uint8_t *buf;
1135  
1136      /* block == NULL means the start of the iteration */
1137      if (!block) {
1138          block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1139          *blockptr = block;
1140          assert((block->target_start & ~target_page_mask) == 0);
1141          assert((block->target_end & ~target_page_mask) == 0);
1142          *pfnptr = dump_paddr_to_pfn(s, block->target_start);
1143          if (bufptr) {
1144              *bufptr = block->host_addr;
1145          }
1146          return true;
1147      }
1148  
1149      *pfnptr = *pfnptr + 1;
1150      addr = dump_pfn_to_paddr(s, *pfnptr);
1151  
1152      if ((addr >= block->target_start) &&
1153          (addr + s->dump_info.page_size <= block->target_end)) {
1154          buf = block->host_addr + (addr - block->target_start);
1155      } else {
1156          /* the next page is in the next block */
1157          block = QTAILQ_NEXT(block, next);
1158          *blockptr = block;
1159          if (!block) {
1160              return false;
1161          }
1162          assert((block->target_start & ~target_page_mask) == 0);
1163          assert((block->target_end & ~target_page_mask) == 0);
1164          *pfnptr = dump_paddr_to_pfn(s, block->target_start);
1165          buf = block->host_addr;
1166      }
1167  
1168      if (bufptr) {
1169          *bufptr = buf;
1170      }
1171  
1172      return true;
1173  }
1174  
1175  static void write_dump_bitmap(DumpState *s, Error **errp)
1176  {
1177      int ret = 0;
1178      uint64_t last_pfn, pfn;
1179      void *dump_bitmap_buf;
1180      size_t num_dumpable;
1181      GuestPhysBlock *block_iter = NULL;
1182      size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
1183      size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
1184  
1185      /* dump_bitmap_buf is used to store dump_bitmap temporarily */
1186      dump_bitmap_buf = g_malloc0(bitmap_bufsize);
1187  
1188      num_dumpable = 0;
1189      last_pfn = 0;
1190  
1191      /*
1192       * exam memory page by page, and set the bit in dump_bitmap corresponded
1193       * to the existing page.
1194       */
1195      while (get_next_page(&block_iter, &pfn, NULL, s)) {
1196          ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
1197          if (ret < 0) {
1198              error_setg(errp, "dump: failed to set dump_bitmap");
1199              goto out;
1200          }
1201  
1202          last_pfn = pfn;
1203          num_dumpable++;
1204      }
1205  
1206      /*
1207       * set_dump_bitmap will always leave the recently set bit un-sync. Here we
1208       * set the remaining bits from last_pfn to the end of the bitmap buffer to
1209       * 0. With those set, the un-sync bit will be synchronized into the vmcore.
1210       */
1211      if (num_dumpable > 0) {
1212          ret = set_dump_bitmap(last_pfn, last_pfn + bits_per_buf, false,
1213                                dump_bitmap_buf, s);
1214          if (ret < 0) {
1215              error_setg(errp, "dump: failed to sync dump_bitmap");
1216              goto out;
1217          }
1218      }
1219  
1220      /* number of dumpable pages that will be dumped later */
1221      s->num_dumpable = num_dumpable;
1222  
1223  out:
1224      g_free(dump_bitmap_buf);
1225  }
1226  
1227  static void prepare_data_cache(DataCache *data_cache, DumpState *s,
1228                                 off_t offset)
1229  {
1230      data_cache->fd = s->fd;
1231      data_cache->data_size = 0;
1232      data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s);
1233      data_cache->buf = g_malloc0(data_cache->buf_size);
1234      data_cache->offset = offset;
1235  }
1236  
1237  static int write_cache(DataCache *dc, const void *buf, size_t size,
1238                         bool flag_sync)
1239  {
1240      /*
1241       * dc->buf_size should not be less than size, otherwise dc will never be
1242       * enough
1243       */
1244      assert(size <= dc->buf_size);
1245  
1246      /*
1247       * if flag_sync is set, synchronize data in dc->buf into vmcore.
1248       * otherwise check if the space is enough for caching data in buf, if not,
1249       * write the data in dc->buf to dc->fd and reset dc->buf
1250       */
1251      if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
1252          (flag_sync && dc->data_size > 0)) {
1253          if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
1254              return -1;
1255          }
1256  
1257          dc->offset += dc->data_size;
1258          dc->data_size = 0;
1259      }
1260  
1261      if (!flag_sync) {
1262          memcpy(dc->buf + dc->data_size, buf, size);
1263          dc->data_size += size;
1264      }
1265  
1266      return 0;
1267  }
1268  
1269  static void free_data_cache(DataCache *data_cache)
1270  {
1271      g_free(data_cache->buf);
1272  }
1273  
1274  static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
1275  {
1276      switch (flag_compress) {
1277      case DUMP_DH_COMPRESSED_ZLIB:
1278          return compressBound(page_size);
1279  
1280      case DUMP_DH_COMPRESSED_LZO:
1281          /*
1282           * LZO will expand incompressible data by a little amount. Please check
1283           * the following URL to see the expansion calculation:
1284           * http://www.oberhumer.com/opensource/lzo/lzofaq.php
1285           */
1286          return page_size + page_size / 16 + 64 + 3;
1287  
1288  #ifdef CONFIG_SNAPPY
1289      case DUMP_DH_COMPRESSED_SNAPPY:
1290          return snappy_max_compressed_length(page_size);
1291  #endif
1292      }
1293      return 0;
1294  }
1295  
1296  static void write_dump_pages(DumpState *s, Error **errp)
1297  {
1298      int ret = 0;
1299      DataCache page_desc, page_data;
1300      size_t len_buf_out, size_out;
1301  #ifdef CONFIG_LZO
1302      lzo_bytep wrkmem = NULL;
1303  #endif
1304      uint8_t *buf_out = NULL;
1305      off_t offset_desc, offset_data;
1306      PageDescriptor pd, pd_zero;
1307      uint8_t *buf;
1308      GuestPhysBlock *block_iter = NULL;
1309      uint64_t pfn_iter;
1310  
1311      /* get offset of page_desc and page_data in dump file */
1312      offset_desc = s->offset_page;
1313      offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
1314  
1315      prepare_data_cache(&page_desc, s, offset_desc);
1316      prepare_data_cache(&page_data, s, offset_data);
1317  
1318      /* prepare buffer to store compressed data */
1319      len_buf_out = get_len_buf_out(s->dump_info.page_size, s->flag_compress);
1320      assert(len_buf_out != 0);
1321  
1322  #ifdef CONFIG_LZO
1323      wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
1324  #endif
1325  
1326      buf_out = g_malloc(len_buf_out);
1327  
1328      /*
1329       * init zero page's page_desc and page_data, because every zero page
1330       * uses the same page_data
1331       */
1332      pd_zero.size = cpu_to_dump32(s, s->dump_info.page_size);
1333      pd_zero.flags = cpu_to_dump32(s, 0);
1334      pd_zero.offset = cpu_to_dump64(s, offset_data);
1335      pd_zero.page_flags = cpu_to_dump64(s, 0);
1336      buf = g_malloc0(s->dump_info.page_size);
1337      ret = write_cache(&page_data, buf, s->dump_info.page_size, false);
1338      g_free(buf);
1339      if (ret < 0) {
1340          error_setg(errp, "dump: failed to write page data (zero page)");
1341          goto out;
1342      }
1343  
1344      offset_data += s->dump_info.page_size;
1345  
1346      /*
1347       * dump memory to vmcore page by page. zero page will all be resided in the
1348       * first page of page section
1349       */
1350      while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
1351          /* check zero page */
1352          if (buffer_is_zero(buf, s->dump_info.page_size)) {
1353              ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
1354                                false);
1355              if (ret < 0) {
1356                  error_setg(errp, "dump: failed to write page desc");
1357                  goto out;
1358              }
1359          } else {
1360              /*
1361               * not zero page, then:
1362               * 1. compress the page
1363               * 2. write the compressed page into the cache of page_data
1364               * 3. get page desc of the compressed page and write it into the
1365               *    cache of page_desc
1366               *
1367               * only one compression format will be used here, for
1368               * s->flag_compress is set. But when compression fails to work,
1369               * we fall back to save in plaintext.
1370               */
1371               size_out = len_buf_out;
1372               if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
1373                      (compress2(buf_out, (uLongf *)&size_out, buf,
1374                                 s->dump_info.page_size, Z_BEST_SPEED) == Z_OK) &&
1375                      (size_out < s->dump_info.page_size)) {
1376                  pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
1377                  pd.size  = cpu_to_dump32(s, size_out);
1378  
1379                  ret = write_cache(&page_data, buf_out, size_out, false);
1380                  if (ret < 0) {
1381                      error_setg(errp, "dump: failed to write page data");
1382                      goto out;
1383                  }
1384  #ifdef CONFIG_LZO
1385              } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
1386                      (lzo1x_1_compress(buf, s->dump_info.page_size, buf_out,
1387                      (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
1388                      (size_out < s->dump_info.page_size)) {
1389                  pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
1390                  pd.size  = cpu_to_dump32(s, size_out);
1391  
1392                  ret = write_cache(&page_data, buf_out, size_out, false);
1393                  if (ret < 0) {
1394                      error_setg(errp, "dump: failed to write page data");
1395                      goto out;
1396                  }
1397  #endif
1398  #ifdef CONFIG_SNAPPY
1399              } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
1400                      (snappy_compress((char *)buf, s->dump_info.page_size,
1401                      (char *)buf_out, &size_out) == SNAPPY_OK) &&
1402                      (size_out < s->dump_info.page_size)) {
1403                  pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
1404                  pd.size  = cpu_to_dump32(s, size_out);
1405  
1406                  ret = write_cache(&page_data, buf_out, size_out, false);
1407                  if (ret < 0) {
1408                      error_setg(errp, "dump: failed to write page data");
1409                      goto out;
1410                  }
1411  #endif
1412              } else {
1413                  /*
1414                   * fall back to save in plaintext, size_out should be
1415                   * assigned the target's page size
1416                   */
1417                  pd.flags = cpu_to_dump32(s, 0);
1418                  size_out = s->dump_info.page_size;
1419                  pd.size = cpu_to_dump32(s, size_out);
1420  
1421                  ret = write_cache(&page_data, buf,
1422                                    s->dump_info.page_size, false);
1423                  if (ret < 0) {
1424                      error_setg(errp, "dump: failed to write page data");
1425                      goto out;
1426                  }
1427              }
1428  
1429              /* get and write page desc here */
1430              pd.page_flags = cpu_to_dump64(s, 0);
1431              pd.offset = cpu_to_dump64(s, offset_data);
1432              offset_data += size_out;
1433  
1434              ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
1435              if (ret < 0) {
1436                  error_setg(errp, "dump: failed to write page desc");
1437                  goto out;
1438              }
1439          }
1440          s->written_size += s->dump_info.page_size;
1441      }
1442  
1443      ret = write_cache(&page_desc, NULL, 0, true);
1444      if (ret < 0) {
1445          error_setg(errp, "dump: failed to sync cache for page_desc");
1446          goto out;
1447      }
1448      ret = write_cache(&page_data, NULL, 0, true);
1449      if (ret < 0) {
1450          error_setg(errp, "dump: failed to sync cache for page_data");
1451          goto out;
1452      }
1453  
1454  out:
1455      free_data_cache(&page_desc);
1456      free_data_cache(&page_data);
1457  
1458  #ifdef CONFIG_LZO
1459      g_free(wrkmem);
1460  #endif
1461  
1462      g_free(buf_out);
1463  }
1464  
1465  static void create_kdump_vmcore(DumpState *s, Error **errp)
1466  {
1467      int ret;
1468      Error *local_err = NULL;
1469  
1470      /*
1471       * the kdump-compressed format is:
1472       *                                               File offset
1473       *  +------------------------------------------+ 0x0
1474       *  |    main header (struct disk_dump_header) |
1475       *  |------------------------------------------+ block 1
1476       *  |    sub header (struct kdump_sub_header)  |
1477       *  |------------------------------------------+ block 2
1478       *  |            1st-dump_bitmap               |
1479       *  |------------------------------------------+ block 2 + X blocks
1480       *  |            2nd-dump_bitmap               | (aligned by block)
1481       *  |------------------------------------------+ block 2 + 2 * X blocks
1482       *  |  page desc for pfn 0 (struct page_desc)  | (aligned by block)
1483       *  |  page desc for pfn 1 (struct page_desc)  |
1484       *  |                    :                     |
1485       *  |------------------------------------------| (not aligned by block)
1486       *  |         page data (pfn 0)                |
1487       *  |         page data (pfn 1)                |
1488       *  |                    :                     |
1489       *  +------------------------------------------+
1490       */
1491  
1492      ret = write_start_flat_header(s->fd);
1493      if (ret < 0) {
1494          error_setg(errp, "dump: failed to write start flat header");
1495          return;
1496      }
1497  
1498      write_dump_header(s, &local_err);
1499      if (local_err) {
1500          error_propagate(errp, local_err);
1501          return;
1502      }
1503  
1504      write_dump_bitmap(s, &local_err);
1505      if (local_err) {
1506          error_propagate(errp, local_err);
1507          return;
1508      }
1509  
1510      write_dump_pages(s, &local_err);
1511      if (local_err) {
1512          error_propagate(errp, local_err);
1513          return;
1514      }
1515  
1516      ret = write_end_flat_header(s->fd);
1517      if (ret < 0) {
1518          error_setg(errp, "dump: failed to write end flat header");
1519          return;
1520      }
1521  }
1522  
1523  static ram_addr_t get_start_block(DumpState *s)
1524  {
1525      GuestPhysBlock *block;
1526  
1527      if (!s->has_filter) {
1528          s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1529          return 0;
1530      }
1531  
1532      QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
1533          if (block->target_start >= s->begin + s->length ||
1534              block->target_end <= s->begin) {
1535              /* This block is out of the range */
1536              continue;
1537          }
1538  
1539          s->next_block = block;
1540          if (s->begin > block->target_start) {
1541              s->start = s->begin - block->target_start;
1542          } else {
1543              s->start = 0;
1544          }
1545          return s->start;
1546      }
1547  
1548      return -1;
1549  }
1550  
1551  static void get_max_mapnr(DumpState *s)
1552  {
1553      GuestPhysBlock *last_block;
1554  
1555      last_block = QTAILQ_LAST(&s->guest_phys_blocks.head);
1556      s->max_mapnr = dump_paddr_to_pfn(s, last_block->target_end);
1557  }
1558  
1559  static DumpState dump_state_global = { .status = DUMP_STATUS_NONE };
1560  
1561  static void dump_state_prepare(DumpState *s)
1562  {
1563      /* zero the struct, setting status to active */
1564      *s = (DumpState) { .status = DUMP_STATUS_ACTIVE };
1565  }
1566  
1567  bool dump_in_progress(void)
1568  {
1569      DumpState *state = &dump_state_global;
1570      return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
1571  }
1572  
1573  /* calculate total size of memory to be dumped (taking filter into
1574   * acoount.) */
1575  static int64_t dump_calculate_size(DumpState *s)
1576  {
1577      GuestPhysBlock *block;
1578      int64_t size = 0, total = 0, left = 0, right = 0;
1579  
1580      QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
1581          if (s->has_filter) {
1582              /* calculate the overlapped region. */
1583              left = MAX(s->begin, block->target_start);
1584              right = MIN(s->begin + s->length, block->target_end);
1585              size = right - left;
1586              size = size > 0 ? size : 0;
1587          } else {
1588              /* count the whole region in */
1589              size = (block->target_end - block->target_start);
1590          }
1591          total += size;
1592      }
1593  
1594      return total;
1595  }
1596  
1597  static void vmcoreinfo_update_phys_base(DumpState *s)
1598  {
1599      uint64_t size, note_head_size, name_size, phys_base;
1600      char **lines;
1601      uint8_t *vmci;
1602      size_t i;
1603  
1604      if (!note_name_equal(s, s->guest_note, "VMCOREINFO")) {
1605          return;
1606      }
1607  
1608      get_note_sizes(s, s->guest_note, &note_head_size, &name_size, &size);
1609      note_head_size = ROUND_UP(note_head_size, 4);
1610  
1611      vmci = s->guest_note + note_head_size + ROUND_UP(name_size, 4);
1612      *(vmci + size) = '\0';
1613  
1614      lines = g_strsplit((char *)vmci, "\n", -1);
1615      for (i = 0; lines[i]; i++) {
1616          const char *prefix = NULL;
1617  
1618          if (s->dump_info.d_machine == EM_X86_64) {
1619              prefix = "NUMBER(phys_base)=";
1620          } else if (s->dump_info.d_machine == EM_AARCH64) {
1621              prefix = "NUMBER(PHYS_OFFSET)=";
1622          }
1623  
1624          if (prefix && g_str_has_prefix(lines[i], prefix)) {
1625              if (qemu_strtou64(lines[i] + strlen(prefix), NULL, 16,
1626                                &phys_base) < 0) {
1627                  warn_report("Failed to read %s", prefix);
1628              } else {
1629                  s->dump_info.phys_base = phys_base;
1630              }
1631              break;
1632          }
1633      }
1634  
1635      g_strfreev(lines);
1636  }
1637  
1638  static void dump_init(DumpState *s, int fd, bool has_format,
1639                        DumpGuestMemoryFormat format, bool paging, bool has_filter,
1640                        int64_t begin, int64_t length, Error **errp)
1641  {
1642      VMCoreInfoState *vmci = vmcoreinfo_find();
1643      CPUState *cpu;
1644      int nr_cpus;
1645      Error *err = NULL;
1646      int ret;
1647  
1648      s->has_format = has_format;
1649      s->format = format;
1650      s->written_size = 0;
1651  
1652      /* kdump-compressed is conflict with paging and filter */
1653      if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1654          assert(!paging && !has_filter);
1655      }
1656  
1657      if (runstate_is_running()) {
1658          vm_stop(RUN_STATE_SAVE_VM);
1659          s->resume = true;
1660      } else {
1661          s->resume = false;
1662      }
1663  
1664      /* If we use KVM, we should synchronize the registers before we get dump
1665       * info or physmap info.
1666       */
1667      cpu_synchronize_all_states();
1668      nr_cpus = 0;
1669      CPU_FOREACH(cpu) {
1670          nr_cpus++;
1671      }
1672  
1673      s->fd = fd;
1674      s->has_filter = has_filter;
1675      s->begin = begin;
1676      s->length = length;
1677  
1678      memory_mapping_list_init(&s->list);
1679  
1680      guest_phys_blocks_init(&s->guest_phys_blocks);
1681      guest_phys_blocks_append(&s->guest_phys_blocks);
1682      s->total_size = dump_calculate_size(s);
1683  #ifdef DEBUG_DUMP_GUEST_MEMORY
1684      fprintf(stderr, "DUMP: total memory to dump: %lu\n", s->total_size);
1685  #endif
1686  
1687      /* it does not make sense to dump non-existent memory */
1688      if (!s->total_size) {
1689          error_setg(errp, "dump: no guest memory to dump");
1690          goto cleanup;
1691      }
1692  
1693      s->start = get_start_block(s);
1694      if (s->start == -1) {
1695          error_setg(errp, QERR_INVALID_PARAMETER, "begin");
1696          goto cleanup;
1697      }
1698  
1699      /* get dump info: endian, class and architecture.
1700       * If the target architecture is not supported, cpu_get_dump_info() will
1701       * return -1.
1702       */
1703      ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
1704      if (ret < 0) {
1705          error_setg(errp, QERR_UNSUPPORTED);
1706          goto cleanup;
1707      }
1708  
1709      if (!s->dump_info.page_size) {
1710          s->dump_info.page_size = TARGET_PAGE_SIZE;
1711      }
1712  
1713      s->note_size = cpu_get_note_size(s->dump_info.d_class,
1714                                       s->dump_info.d_machine, nr_cpus);
1715      if (s->note_size < 0) {
1716          error_setg(errp, QERR_UNSUPPORTED);
1717          goto cleanup;
1718      }
1719  
1720      /*
1721       * The goal of this block is to (a) update the previously guessed
1722       * phys_base, (b) copy the guest note out of the guest.
1723       * Failure to do so is not fatal for dumping.
1724       */
1725      if (vmci) {
1726          uint64_t addr, note_head_size, name_size, desc_size;
1727          uint32_t size;
1728          uint16_t format;
1729  
1730          note_head_size = s->dump_info.d_class == ELFCLASS32 ?
1731              sizeof(Elf32_Nhdr) : sizeof(Elf64_Nhdr);
1732  
1733          format = le16_to_cpu(vmci->vmcoreinfo.guest_format);
1734          size = le32_to_cpu(vmci->vmcoreinfo.size);
1735          addr = le64_to_cpu(vmci->vmcoreinfo.paddr);
1736          if (!vmci->has_vmcoreinfo) {
1737              warn_report("guest note is not present");
1738          } else if (size < note_head_size || size > MAX_GUEST_NOTE_SIZE) {
1739              warn_report("guest note size is invalid: %" PRIu32, size);
1740          } else if (format != FW_CFG_VMCOREINFO_FORMAT_ELF) {
1741              warn_report("guest note format is unsupported: %" PRIu16, format);
1742          } else {
1743              s->guest_note = g_malloc(size + 1); /* +1 for adding \0 */
1744              cpu_physical_memory_read(addr, s->guest_note, size);
1745  
1746              get_note_sizes(s, s->guest_note, NULL, &name_size, &desc_size);
1747              s->guest_note_size = ELF_NOTE_SIZE(note_head_size, name_size,
1748                                                 desc_size);
1749              if (name_size > MAX_GUEST_NOTE_SIZE ||
1750                  desc_size > MAX_GUEST_NOTE_SIZE ||
1751                  s->guest_note_size > size) {
1752                  warn_report("Invalid guest note header");
1753                  g_free(s->guest_note);
1754                  s->guest_note = NULL;
1755              } else {
1756                  vmcoreinfo_update_phys_base(s);
1757                  s->note_size += s->guest_note_size;
1758              }
1759          }
1760      }
1761  
1762      /* get memory mapping */
1763      if (paging) {
1764          qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
1765          if (err != NULL) {
1766              error_propagate(errp, err);
1767              goto cleanup;
1768          }
1769      } else {
1770          qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
1771      }
1772  
1773      s->nr_cpus = nr_cpus;
1774  
1775      get_max_mapnr(s);
1776  
1777      uint64_t tmp;
1778      tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT),
1779                         s->dump_info.page_size);
1780      s->len_dump_bitmap = tmp * s->dump_info.page_size;
1781  
1782      /* init for kdump-compressed format */
1783      if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1784          switch (format) {
1785          case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
1786              s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
1787              break;
1788  
1789          case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
1790  #ifdef CONFIG_LZO
1791              if (lzo_init() != LZO_E_OK) {
1792                  error_setg(errp, "failed to initialize the LZO library");
1793                  goto cleanup;
1794              }
1795  #endif
1796              s->flag_compress = DUMP_DH_COMPRESSED_LZO;
1797              break;
1798  
1799          case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
1800              s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
1801              break;
1802  
1803          default:
1804              s->flag_compress = 0;
1805          }
1806  
1807          return;
1808      }
1809  
1810      if (s->has_filter) {
1811          memory_mapping_filter(&s->list, s->begin, s->length);
1812      }
1813  
1814      /*
1815       * calculate phdr_num
1816       *
1817       * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
1818       */
1819      s->phdr_num = 1; /* PT_NOTE */
1820      if (s->list.num < UINT16_MAX - 2) {
1821          s->phdr_num += s->list.num;
1822          s->have_section = false;
1823      } else {
1824          s->have_section = true;
1825          s->phdr_num = PN_XNUM;
1826          s->sh_info = 1; /* PT_NOTE */
1827  
1828          /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
1829          if (s->list.num <= UINT32_MAX - 1) {
1830              s->sh_info += s->list.num;
1831          } else {
1832              s->sh_info = UINT32_MAX;
1833          }
1834      }
1835  
1836      if (s->dump_info.d_class == ELFCLASS64) {
1837          if (s->have_section) {
1838              s->memory_offset = sizeof(Elf64_Ehdr) +
1839                                 sizeof(Elf64_Phdr) * s->sh_info +
1840                                 sizeof(Elf64_Shdr) + s->note_size;
1841          } else {
1842              s->memory_offset = sizeof(Elf64_Ehdr) +
1843                                 sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
1844          }
1845      } else {
1846          if (s->have_section) {
1847              s->memory_offset = sizeof(Elf32_Ehdr) +
1848                                 sizeof(Elf32_Phdr) * s->sh_info +
1849                                 sizeof(Elf32_Shdr) + s->note_size;
1850          } else {
1851              s->memory_offset = sizeof(Elf32_Ehdr) +
1852                                 sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
1853          }
1854      }
1855  
1856      return;
1857  
1858  cleanup:
1859      dump_cleanup(s);
1860  }
1861  
1862  /* this operation might be time consuming. */
1863  static void dump_process(DumpState *s, Error **errp)
1864  {
1865      Error *local_err = NULL;
1866      DumpQueryResult *result = NULL;
1867  
1868      if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) {
1869  #ifdef TARGET_X86_64
1870          create_win_dump(s, &local_err);
1871  #endif
1872      } else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1873          create_kdump_vmcore(s, &local_err);
1874      } else {
1875          create_vmcore(s, &local_err);
1876      }
1877  
1878      /* make sure status is written after written_size updates */
1879      smp_wmb();
1880      qatomic_set(&s->status,
1881                 (local_err ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
1882  
1883      /* send DUMP_COMPLETED message (unconditionally) */
1884      result = qmp_query_dump(NULL);
1885      /* should never fail */
1886      assert(result);
1887      qapi_event_send_dump_completed(result, !!local_err, (local_err ?
1888                                     error_get_pretty(local_err) : NULL));
1889      qapi_free_DumpQueryResult(result);
1890  
1891      error_propagate(errp, local_err);
1892      dump_cleanup(s);
1893  }
1894  
1895  static void *dump_thread(void *data)
1896  {
1897      DumpState *s = (DumpState *)data;
1898      dump_process(s, NULL);
1899      return NULL;
1900  }
1901  
1902  DumpQueryResult *qmp_query_dump(Error **errp)
1903  {
1904      DumpQueryResult *result = g_new(DumpQueryResult, 1);
1905      DumpState *state = &dump_state_global;
1906      result->status = qatomic_read(&state->status);
1907      /* make sure we are reading status and written_size in order */
1908      smp_rmb();
1909      result->completed = state->written_size;
1910      result->total = state->total_size;
1911      return result;
1912  }
1913  
1914  void qmp_dump_guest_memory(bool paging, const char *file,
1915                             bool has_detach, bool detach,
1916                             bool has_begin, int64_t begin, bool has_length,
1917                             int64_t length, bool has_format,
1918                             DumpGuestMemoryFormat format, Error **errp)
1919  {
1920      const char *p;
1921      int fd = -1;
1922      DumpState *s;
1923      Error *local_err = NULL;
1924      bool detach_p = false;
1925  
1926      if (runstate_check(RUN_STATE_INMIGRATE)) {
1927          error_setg(errp, "Dump not allowed during incoming migration.");
1928          return;
1929      }
1930  
1931      /* if there is a dump in background, we should wait until the dump
1932       * finished */
1933      if (dump_in_progress()) {
1934          error_setg(errp, "There is a dump in process, please wait.");
1935          return;
1936      }
1937  
1938      /*
1939       * kdump-compressed format need the whole memory dumped, so paging or
1940       * filter is not supported here.
1941       */
1942      if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
1943          (paging || has_begin || has_length)) {
1944          error_setg(errp, "kdump-compressed format doesn't support paging or "
1945                           "filter");
1946          return;
1947      }
1948      if (has_begin && !has_length) {
1949          error_setg(errp, QERR_MISSING_PARAMETER, "length");
1950          return;
1951      }
1952      if (!has_begin && has_length) {
1953          error_setg(errp, QERR_MISSING_PARAMETER, "begin");
1954          return;
1955      }
1956      if (has_detach) {
1957          detach_p = detach;
1958      }
1959  
1960      /* check whether lzo/snappy is supported */
1961  #ifndef CONFIG_LZO
1962      if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
1963          error_setg(errp, "kdump-lzo is not available now");
1964          return;
1965      }
1966  #endif
1967  
1968  #ifndef CONFIG_SNAPPY
1969      if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
1970          error_setg(errp, "kdump-snappy is not available now");
1971          return;
1972      }
1973  #endif
1974  
1975  #ifndef TARGET_X86_64
1976      if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) {
1977          error_setg(errp, "Windows dump is only available for x86-64");
1978          return;
1979      }
1980  #endif
1981  
1982  #if !defined(WIN32)
1983      if (strstart(file, "fd:", &p)) {
1984          fd = monitor_get_fd(monitor_cur(), p, errp);
1985          if (fd == -1) {
1986              return;
1987          }
1988      }
1989  #endif
1990  
1991      if  (strstart(file, "file:", &p)) {
1992          fd = qemu_open_old(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
1993          if (fd < 0) {
1994              error_setg_file_open(errp, errno, p);
1995              return;
1996          }
1997      }
1998  
1999      if (fd == -1) {
2000          error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
2001          return;
2002      }
2003  
2004      if (!dump_migration_blocker) {
2005          error_setg(&dump_migration_blocker,
2006                     "Live migration disabled: dump-guest-memory in progress");
2007      }
2008  
2009      /*
2010       * Allows even for -only-migratable, but forbid migration during the
2011       * process of dump guest memory.
2012       */
2013      if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
2014          /* Remember to release the fd before passing it over to dump state */
2015          close(fd);
2016          return;
2017      }
2018  
2019      s = &dump_state_global;
2020      dump_state_prepare(s);
2021  
2022      dump_init(s, fd, has_format, format, paging, has_begin,
2023                begin, length, &local_err);
2024      if (local_err) {
2025          error_propagate(errp, local_err);
2026          qatomic_set(&s->status, DUMP_STATUS_FAILED);
2027          return;
2028      }
2029  
2030      if (detach_p) {
2031          /* detached dump */
2032          s->detached = true;
2033          qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
2034                             s, QEMU_THREAD_DETACHED);
2035      } else {
2036          /* sync dump */
2037          dump_process(s, errp);
2038      }
2039  }
2040  
2041  DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
2042  {
2043      DumpGuestMemoryCapability *cap =
2044                                    g_malloc0(sizeof(DumpGuestMemoryCapability));
2045      DumpGuestMemoryFormatList **tail = &cap->formats;
2046  
2047      /* elf is always available */
2048      QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_ELF);
2049  
2050      /* kdump-zlib is always available */
2051      QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB);
2052  
2053      /* add new item if kdump-lzo is available */
2054  #ifdef CONFIG_LZO
2055      QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO);
2056  #endif
2057  
2058      /* add new item if kdump-snappy is available */
2059  #ifdef CONFIG_SNAPPY
2060      QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY);
2061  #endif
2062  
2063      /* Windows dump is available only if target is x86_64 */
2064  #ifdef TARGET_X86_64
2065      QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_WIN_DMP);
2066  #endif
2067  
2068      return cap;
2069  }
2070