iov: handle partial writes from sendmsg and recvmsg
[qemu.git] / dump.c
blobb34f143c4206f339ab159271c67e4f3767fe2a37
1 /*
2 * QEMU dump
4 * Copyright Fujitsu, Corp. 2011, 2012
6 * Authors:
7 * Wen Congyang <wency@cn.fujitsu.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 #include "qemu-common.h"
15 #include "elf.h"
16 #include "cpu.h"
17 #include "exec/cpu-all.h"
18 #include "exec/hwaddr.h"
19 #include "monitor/monitor.h"
20 #include "sysemu/kvm.h"
21 #include "sysemu/dump.h"
22 #include "sysemu/sysemu.h"
23 #include "sysemu/memory_mapping.h"
24 #include "qapi/error.h"
25 #include "qmp-commands.h"
27 static uint16_t cpu_convert_to_target16(uint16_t val, int endian)
29 if (endian == ELFDATA2LSB) {
30 val = cpu_to_le16(val);
31 } else {
32 val = cpu_to_be16(val);
35 return val;
38 static uint32_t cpu_convert_to_target32(uint32_t val, int endian)
40 if (endian == ELFDATA2LSB) {
41 val = cpu_to_le32(val);
42 } else {
43 val = cpu_to_be32(val);
46 return val;
49 static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
51 if (endian == ELFDATA2LSB) {
52 val = cpu_to_le64(val);
53 } else {
54 val = cpu_to_be64(val);
57 return val;
60 typedef struct DumpState {
61 ArchDumpInfo dump_info;
62 MemoryMappingList list;
63 uint16_t phdr_num;
64 uint32_t sh_info;
65 bool have_section;
66 bool resume;
67 size_t note_size;
68 hwaddr memory_offset;
69 int fd;
71 RAMBlock *block;
72 ram_addr_t start;
73 bool has_filter;
74 int64_t begin;
75 int64_t length;
76 Error **errp;
77 } DumpState;
79 static int dump_cleanup(DumpState *s)
81 int ret = 0;
83 memory_mapping_list_free(&s->list);
84 if (s->fd != -1) {
85 close(s->fd);
87 if (s->resume) {
88 vm_start();
91 return ret;
94 static void dump_error(DumpState *s, const char *reason)
96 dump_cleanup(s);
99 static int fd_write_vmcore(void *buf, size_t size, void *opaque)
101 DumpState *s = opaque;
102 size_t written_size;
104 written_size = qemu_write_full(s->fd, buf, size);
105 if (written_size != size) {
106 return -1;
109 return 0;
112 static int write_elf64_header(DumpState *s)
114 Elf64_Ehdr elf_header;
115 int ret;
116 int endian = s->dump_info.d_endian;
118 memset(&elf_header, 0, sizeof(Elf64_Ehdr));
119 memcpy(&elf_header, ELFMAG, SELFMAG);
120 elf_header.e_ident[EI_CLASS] = ELFCLASS64;
121 elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
122 elf_header.e_ident[EI_VERSION] = EV_CURRENT;
123 elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
124 elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
125 endian);
126 elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
127 elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
128 elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
129 elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
130 endian);
131 elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
132 if (s->have_section) {
133 uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
135 elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
136 elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
137 endian);
138 elf_header.e_shnum = cpu_convert_to_target16(1, endian);
141 ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
142 if (ret < 0) {
143 dump_error(s, "dump: failed to write elf header.\n");
144 return -1;
147 return 0;
150 static int write_elf32_header(DumpState *s)
152 Elf32_Ehdr elf_header;
153 int ret;
154 int endian = s->dump_info.d_endian;
156 memset(&elf_header, 0, sizeof(Elf32_Ehdr));
157 memcpy(&elf_header, ELFMAG, SELFMAG);
158 elf_header.e_ident[EI_CLASS] = ELFCLASS32;
159 elf_header.e_ident[EI_DATA] = endian;
160 elf_header.e_ident[EI_VERSION] = EV_CURRENT;
161 elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
162 elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
163 endian);
164 elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
165 elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
166 elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
167 elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
168 endian);
169 elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
170 if (s->have_section) {
171 uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
173 elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
174 elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
175 endian);
176 elf_header.e_shnum = cpu_convert_to_target16(1, endian);
179 ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
180 if (ret < 0) {
181 dump_error(s, "dump: failed to write elf header.\n");
182 return -1;
185 return 0;
188 static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
189 int phdr_index, hwaddr offset)
191 Elf64_Phdr phdr;
192 int ret;
193 int endian = s->dump_info.d_endian;
195 memset(&phdr, 0, sizeof(Elf64_Phdr));
196 phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
197 phdr.p_offset = cpu_convert_to_target64(offset, endian);
198 phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
199 if (offset == -1) {
200 /* When the memory is not stored into vmcore, offset will be -1 */
201 phdr.p_filesz = 0;
202 } else {
203 phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
205 phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
206 phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
208 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
209 if (ret < 0) {
210 dump_error(s, "dump: failed to write program header table.\n");
211 return -1;
214 return 0;
217 static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
218 int phdr_index, hwaddr offset)
220 Elf32_Phdr phdr;
221 int ret;
222 int endian = s->dump_info.d_endian;
224 memset(&phdr, 0, sizeof(Elf32_Phdr));
225 phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
226 phdr.p_offset = cpu_convert_to_target32(offset, endian);
227 phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
228 if (offset == -1) {
229 /* When the memory is not stored into vmcore, offset will be -1 */
230 phdr.p_filesz = 0;
231 } else {
232 phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
234 phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
235 phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
237 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
238 if (ret < 0) {
239 dump_error(s, "dump: failed to write program header table.\n");
240 return -1;
243 return 0;
246 static int write_elf64_note(DumpState *s)
248 Elf64_Phdr phdr;
249 int endian = s->dump_info.d_endian;
250 hwaddr begin = s->memory_offset - s->note_size;
251 int ret;
253 memset(&phdr, 0, sizeof(Elf64_Phdr));
254 phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
255 phdr.p_offset = cpu_convert_to_target64(begin, endian);
256 phdr.p_paddr = 0;
257 phdr.p_filesz = cpu_convert_to_target64(s->note_size, endian);
258 phdr.p_memsz = cpu_convert_to_target64(s->note_size, endian);
259 phdr.p_vaddr = 0;
261 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
262 if (ret < 0) {
263 dump_error(s, "dump: failed to write program header table.\n");
264 return -1;
267 return 0;
270 static inline int cpu_index(CPUState *cpu)
272 return cpu->cpu_index + 1;
275 static int write_elf64_notes(DumpState *s)
277 CPUArchState *env;
278 CPUState *cpu;
279 int ret;
280 int id;
282 for (env = first_cpu; env != NULL; env = env->next_cpu) {
283 cpu = ENV_GET_CPU(env);
284 id = cpu_index(cpu);
285 ret = cpu_write_elf64_note(fd_write_vmcore, env, id, s);
286 if (ret < 0) {
287 dump_error(s, "dump: failed to write elf notes.\n");
288 return -1;
292 for (env = first_cpu; env != NULL; env = env->next_cpu) {
293 ret = cpu_write_elf64_qemunote(fd_write_vmcore, env, s);
294 if (ret < 0) {
295 dump_error(s, "dump: failed to write CPU status.\n");
296 return -1;
300 return 0;
303 static int write_elf32_note(DumpState *s)
305 hwaddr begin = s->memory_offset - s->note_size;
306 Elf32_Phdr phdr;
307 int endian = s->dump_info.d_endian;
308 int ret;
310 memset(&phdr, 0, sizeof(Elf32_Phdr));
311 phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
312 phdr.p_offset = cpu_convert_to_target32(begin, endian);
313 phdr.p_paddr = 0;
314 phdr.p_filesz = cpu_convert_to_target32(s->note_size, endian);
315 phdr.p_memsz = cpu_convert_to_target32(s->note_size, endian);
316 phdr.p_vaddr = 0;
318 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
319 if (ret < 0) {
320 dump_error(s, "dump: failed to write program header table.\n");
321 return -1;
324 return 0;
327 static int write_elf32_notes(DumpState *s)
329 CPUArchState *env;
330 CPUState *cpu;
331 int ret;
332 int id;
334 for (env = first_cpu; env != NULL; env = env->next_cpu) {
335 cpu = ENV_GET_CPU(env);
336 id = cpu_index(cpu);
337 ret = cpu_write_elf32_note(fd_write_vmcore, env, id, s);
338 if (ret < 0) {
339 dump_error(s, "dump: failed to write elf notes.\n");
340 return -1;
344 for (env = first_cpu; env != NULL; env = env->next_cpu) {
345 ret = cpu_write_elf32_qemunote(fd_write_vmcore, env, s);
346 if (ret < 0) {
347 dump_error(s, "dump: failed to write CPU status.\n");
348 return -1;
352 return 0;
355 static int write_elf_section(DumpState *s, int type)
357 Elf32_Shdr shdr32;
358 Elf64_Shdr shdr64;
359 int endian = s->dump_info.d_endian;
360 int shdr_size;
361 void *shdr;
362 int ret;
364 if (type == 0) {
365 shdr_size = sizeof(Elf32_Shdr);
366 memset(&shdr32, 0, shdr_size);
367 shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
368 shdr = &shdr32;
369 } else {
370 shdr_size = sizeof(Elf64_Shdr);
371 memset(&shdr64, 0, shdr_size);
372 shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
373 shdr = &shdr64;
376 ret = fd_write_vmcore(&shdr, shdr_size, s);
377 if (ret < 0) {
378 dump_error(s, "dump: failed to write section header table.\n");
379 return -1;
382 return 0;
385 static int write_data(DumpState *s, void *buf, int length)
387 int ret;
389 ret = fd_write_vmcore(buf, length, s);
390 if (ret < 0) {
391 dump_error(s, "dump: failed to save memory.\n");
392 return -1;
395 return 0;
398 /* write the memroy to vmcore. 1 page per I/O. */
399 static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
400 int64_t size)
402 int64_t i;
403 int ret;
405 for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
406 ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
407 TARGET_PAGE_SIZE);
408 if (ret < 0) {
409 return ret;
413 if ((size % TARGET_PAGE_SIZE) != 0) {
414 ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
415 size % TARGET_PAGE_SIZE);
416 if (ret < 0) {
417 return ret;
421 return 0;
424 /* get the memory's offset in the vmcore */
425 static hwaddr get_offset(hwaddr phys_addr,
426 DumpState *s)
428 RAMBlock *block;
429 hwaddr offset = s->memory_offset;
430 int64_t size_in_block, start;
432 if (s->has_filter) {
433 if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
434 return -1;
438 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
439 if (s->has_filter) {
440 if (block->offset >= s->begin + s->length ||
441 block->offset + block->length <= s->begin) {
442 /* This block is out of the range */
443 continue;
446 if (s->begin <= block->offset) {
447 start = block->offset;
448 } else {
449 start = s->begin;
452 size_in_block = block->length - (start - block->offset);
453 if (s->begin + s->length < block->offset + block->length) {
454 size_in_block -= block->offset + block->length -
455 (s->begin + s->length);
457 } else {
458 start = block->offset;
459 size_in_block = block->length;
462 if (phys_addr >= start && phys_addr < start + size_in_block) {
463 return phys_addr - start + offset;
466 offset += size_in_block;
469 return -1;
472 static int write_elf_loads(DumpState *s)
474 hwaddr offset;
475 MemoryMapping *memory_mapping;
476 uint32_t phdr_index = 1;
477 int ret;
478 uint32_t max_index;
480 if (s->have_section) {
481 max_index = s->sh_info;
482 } else {
483 max_index = s->phdr_num;
486 QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
487 offset = get_offset(memory_mapping->phys_addr, s);
488 if (s->dump_info.d_class == ELFCLASS64) {
489 ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
490 } else {
491 ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
494 if (ret < 0) {
495 return -1;
498 if (phdr_index >= max_index) {
499 break;
503 return 0;
506 /* write elf header, PT_NOTE and elf note to vmcore. */
507 static int dump_begin(DumpState *s)
509 int ret;
512 * the vmcore's format is:
513 * --------------
514 * | elf header |
515 * --------------
516 * | PT_NOTE |
517 * --------------
518 * | PT_LOAD |
519 * --------------
520 * | ...... |
521 * --------------
522 * | PT_LOAD |
523 * --------------
524 * | sec_hdr |
525 * --------------
526 * | elf note |
527 * --------------
528 * | memory |
529 * --------------
531 * we only know where the memory is saved after we write elf note into
532 * vmcore.
535 /* write elf header to vmcore */
536 if (s->dump_info.d_class == ELFCLASS64) {
537 ret = write_elf64_header(s);
538 } else {
539 ret = write_elf32_header(s);
541 if (ret < 0) {
542 return -1;
545 if (s->dump_info.d_class == ELFCLASS64) {
546 /* write PT_NOTE to vmcore */
547 if (write_elf64_note(s) < 0) {
548 return -1;
551 /* write all PT_LOAD to vmcore */
552 if (write_elf_loads(s) < 0) {
553 return -1;
556 /* write section to vmcore */
557 if (s->have_section) {
558 if (write_elf_section(s, 1) < 0) {
559 return -1;
563 /* write notes to vmcore */
564 if (write_elf64_notes(s) < 0) {
565 return -1;
568 } else {
569 /* write PT_NOTE to vmcore */
570 if (write_elf32_note(s) < 0) {
571 return -1;
574 /* write all PT_LOAD to vmcore */
575 if (write_elf_loads(s) < 0) {
576 return -1;
579 /* write section to vmcore */
580 if (s->have_section) {
581 if (write_elf_section(s, 0) < 0) {
582 return -1;
586 /* write notes to vmcore */
587 if (write_elf32_notes(s) < 0) {
588 return -1;
592 return 0;
595 /* write PT_LOAD to vmcore */
596 static int dump_completed(DumpState *s)
598 dump_cleanup(s);
599 return 0;
602 static int get_next_block(DumpState *s, RAMBlock *block)
604 while (1) {
605 block = QTAILQ_NEXT(block, next);
606 if (!block) {
607 /* no more block */
608 return 1;
611 s->start = 0;
612 s->block = block;
613 if (s->has_filter) {
614 if (block->offset >= s->begin + s->length ||
615 block->offset + block->length <= s->begin) {
616 /* This block is out of the range */
617 continue;
620 if (s->begin > block->offset) {
621 s->start = s->begin - block->offset;
625 return 0;
629 /* write all memory to vmcore */
630 static int dump_iterate(DumpState *s)
632 RAMBlock *block;
633 int64_t size;
634 int ret;
636 while (1) {
637 block = s->block;
639 size = block->length;
640 if (s->has_filter) {
641 size -= s->start;
642 if (s->begin + s->length < block->offset + block->length) {
643 size -= block->offset + block->length - (s->begin + s->length);
646 ret = write_memory(s, block, s->start, size);
647 if (ret == -1) {
648 return ret;
651 ret = get_next_block(s, block);
652 if (ret == 1) {
653 dump_completed(s);
654 return 0;
659 static int create_vmcore(DumpState *s)
661 int ret;
663 ret = dump_begin(s);
664 if (ret < 0) {
665 return -1;
668 ret = dump_iterate(s);
669 if (ret < 0) {
670 return -1;
673 return 0;
676 static ram_addr_t get_start_block(DumpState *s)
678 RAMBlock *block;
680 if (!s->has_filter) {
681 s->block = QTAILQ_FIRST(&ram_list.blocks);
682 return 0;
685 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
686 if (block->offset >= s->begin + s->length ||
687 block->offset + block->length <= s->begin) {
688 /* This block is out of the range */
689 continue;
692 s->block = block;
693 if (s->begin > block->offset) {
694 s->start = s->begin - block->offset;
695 } else {
696 s->start = 0;
698 return s->start;
701 return -1;
704 static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
705 int64_t begin, int64_t length, Error **errp)
707 CPUArchState *env;
708 int nr_cpus;
709 int ret;
711 if (runstate_is_running()) {
712 vm_stop(RUN_STATE_SAVE_VM);
713 s->resume = true;
714 } else {
715 s->resume = false;
718 s->errp = errp;
719 s->fd = fd;
720 s->has_filter = has_filter;
721 s->begin = begin;
722 s->length = length;
723 s->start = get_start_block(s);
724 if (s->start == -1) {
725 error_set(errp, QERR_INVALID_PARAMETER, "begin");
726 goto cleanup;
730 * get dump info: endian, class and architecture.
731 * If the target architecture is not supported, cpu_get_dump_info() will
732 * return -1.
734 * if we use kvm, we should synchronize the register before we get dump
735 * info.
737 nr_cpus = 0;
738 for (env = first_cpu; env != NULL; env = env->next_cpu) {
739 cpu_synchronize_state(env);
740 nr_cpus++;
743 ret = cpu_get_dump_info(&s->dump_info);
744 if (ret < 0) {
745 error_set(errp, QERR_UNSUPPORTED);
746 goto cleanup;
749 s->note_size = cpu_get_note_size(s->dump_info.d_class,
750 s->dump_info.d_machine, nr_cpus);
751 if (ret < 0) {
752 error_set(errp, QERR_UNSUPPORTED);
753 goto cleanup;
756 /* get memory mapping */
757 memory_mapping_list_init(&s->list);
758 if (paging) {
759 qemu_get_guest_memory_mapping(&s->list);
760 } else {
761 qemu_get_guest_simple_memory_mapping(&s->list);
764 if (s->has_filter) {
765 memory_mapping_filter(&s->list, s->begin, s->length);
769 * calculate phdr_num
771 * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
773 s->phdr_num = 1; /* PT_NOTE */
774 if (s->list.num < UINT16_MAX - 2) {
775 s->phdr_num += s->list.num;
776 s->have_section = false;
777 } else {
778 s->have_section = true;
779 s->phdr_num = PN_XNUM;
780 s->sh_info = 1; /* PT_NOTE */
782 /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
783 if (s->list.num <= UINT32_MAX - 1) {
784 s->sh_info += s->list.num;
785 } else {
786 s->sh_info = UINT32_MAX;
790 if (s->dump_info.d_class == ELFCLASS64) {
791 if (s->have_section) {
792 s->memory_offset = sizeof(Elf64_Ehdr) +
793 sizeof(Elf64_Phdr) * s->sh_info +
794 sizeof(Elf64_Shdr) + s->note_size;
795 } else {
796 s->memory_offset = sizeof(Elf64_Ehdr) +
797 sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
799 } else {
800 if (s->have_section) {
801 s->memory_offset = sizeof(Elf32_Ehdr) +
802 sizeof(Elf32_Phdr) * s->sh_info +
803 sizeof(Elf32_Shdr) + s->note_size;
804 } else {
805 s->memory_offset = sizeof(Elf32_Ehdr) +
806 sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
810 return 0;
812 cleanup:
813 if (s->resume) {
814 vm_start();
817 return -1;
820 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
821 int64_t begin, bool has_length, int64_t length,
822 Error **errp)
824 const char *p;
825 int fd = -1;
826 DumpState *s;
827 int ret;
829 if (has_begin && !has_length) {
830 error_set(errp, QERR_MISSING_PARAMETER, "length");
831 return;
833 if (!has_begin && has_length) {
834 error_set(errp, QERR_MISSING_PARAMETER, "begin");
835 return;
838 #if !defined(WIN32)
839 if (strstart(file, "fd:", &p)) {
840 fd = monitor_get_fd(cur_mon, p, errp);
841 if (fd == -1) {
842 return;
845 #endif
847 if (strstart(file, "file:", &p)) {
848 fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
849 if (fd < 0) {
850 error_set(errp, QERR_OPEN_FILE_FAILED, p);
851 return;
855 if (fd == -1) {
856 error_set(errp, QERR_INVALID_PARAMETER, "protocol");
857 return;
860 s = g_malloc(sizeof(DumpState));
862 ret = dump_init(s, fd, paging, has_begin, begin, length, errp);
863 if (ret < 0) {
864 g_free(s);
865 return;
868 if (create_vmcore(s) < 0 && !error_is_set(s->errp)) {
869 error_set(errp, QERR_IO_ERROR);
872 g_free(s);