linux-user: Handle TARGET_MAP_STACK and TARGET_MAP_HUGETLB
[qemu/ar7.git] / scripts / dump-guest-memory.py
blob69dd5efadf9e3252ff14db3329d251fa4fcbe87c
1 """
2 This python script adds a new gdb command, "dump-guest-memory". It
3 should be loaded with "source dump-guest-memory.py" at the (gdb)
4 prompt.
6 Copyright (C) 2013, Red Hat, Inc.
8 Authors:
9 Laszlo Ersek <lersek@redhat.com>
10 Janosch Frank <frankja@linux.vnet.ibm.com>
12 This work is licensed under the terms of the GNU GPL, version 2 or later. See
13 the COPYING file in the top-level directory.
14 """
16 import ctypes
17 import struct
19 UINTPTR_T = gdb.lookup_type("uintptr_t")
21 TARGET_PAGE_SIZE = 0x1000
22 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
24 # Special value for e_phnum. This indicates that the real number of
25 # program headers is too large to fit into e_phnum. Instead the real
26 # value is in the field sh_info of section 0.
27 PN_XNUM = 0xFFFF
29 EV_CURRENT = 1
31 ELFCLASS32 = 1
32 ELFCLASS64 = 2
34 ELFDATA2LSB = 1
35 ELFDATA2MSB = 2
37 ET_CORE = 4
39 PT_LOAD = 1
40 PT_NOTE = 4
42 EM_386 = 3
43 EM_PPC = 20
44 EM_PPC64 = 21
45 EM_S390 = 22
46 EM_AARCH = 183
47 EM_X86_64 = 62
49 VMCOREINFO_FORMAT_ELF = 1
51 def le16_to_cpu(val):
52 return struct.unpack("<H", struct.pack("=H", val))[0]
54 def le32_to_cpu(val):
55 return struct.unpack("<I", struct.pack("=I", val))[0]
57 def le64_to_cpu(val):
58 return struct.unpack("<Q", struct.pack("=Q", val))[0]
60 class ELF(object):
61 """Representation of a ELF file."""
63 def __init__(self, arch):
64 self.ehdr = None
65 self.notes = []
66 self.segments = []
67 self.notes_size = 0
68 self.endianness = None
69 self.elfclass = ELFCLASS64
71 if arch == 'aarch64-le':
72 self.endianness = ELFDATA2LSB
73 self.elfclass = ELFCLASS64
74 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
75 self.ehdr.e_machine = EM_AARCH
77 elif arch == 'aarch64-be':
78 self.endianness = ELFDATA2MSB
79 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
80 self.ehdr.e_machine = EM_AARCH
82 elif arch == 'X86_64':
83 self.endianness = ELFDATA2LSB
84 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
85 self.ehdr.e_machine = EM_X86_64
87 elif arch == '386':
88 self.endianness = ELFDATA2LSB
89 self.elfclass = ELFCLASS32
90 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
91 self.ehdr.e_machine = EM_386
93 elif arch == 's390':
94 self.endianness = ELFDATA2MSB
95 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
96 self.ehdr.e_machine = EM_S390
98 elif arch == 'ppc64-le':
99 self.endianness = ELFDATA2LSB
100 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
101 self.ehdr.e_machine = EM_PPC64
103 elif arch == 'ppc64-be':
104 self.endianness = ELFDATA2MSB
105 self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
106 self.ehdr.e_machine = EM_PPC64
108 else:
109 raise gdb.GdbError("No valid arch type specified.\n"
110 "Currently supported types:\n"
111 "aarch64-be, aarch64-le, X86_64, 386, s390, "
112 "ppc64-be, ppc64-le")
114 self.add_segment(PT_NOTE, 0, 0)
116 def add_note(self, n_name, n_desc, n_type):
117 """Adds a note to the ELF."""
119 note = get_arch_note(self.endianness, len(n_name), len(n_desc))
120 note.n_namesz = len(n_name) + 1
121 note.n_descsz = len(n_desc)
122 note.n_name = n_name.encode()
123 note.n_type = n_type
125 # Desc needs to be 4 byte aligned (although the 64bit spec
126 # specifies 8 byte). When defining n_desc as uint32 it will be
127 # automatically aligned but we need the memmove to copy the
128 # string into it.
129 ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
131 self.notes.append(note)
132 self.segments[0].p_filesz += ctypes.sizeof(note)
133 self.segments[0].p_memsz += ctypes.sizeof(note)
136 def add_vmcoreinfo_note(self, vmcoreinfo):
137 """Adds a vmcoreinfo note to the ELF dump."""
138 # compute the header size, and copy that many bytes from the note
139 header = get_arch_note(self.endianness, 0, 0)
140 ctypes.memmove(ctypes.pointer(header),
141 vmcoreinfo, ctypes.sizeof(header))
142 if header.n_descsz > 1 << 20:
143 print('warning: invalid vmcoreinfo size')
144 return
145 # now get the full note
146 note = get_arch_note(self.endianness,
147 header.n_namesz - 1, header.n_descsz)
148 ctypes.memmove(ctypes.pointer(note), vmcoreinfo, ctypes.sizeof(note))
150 self.notes.append(note)
151 self.segments[0].p_filesz += ctypes.sizeof(note)
152 self.segments[0].p_memsz += ctypes.sizeof(note)
154 def add_segment(self, p_type, p_paddr, p_size):
155 """Adds a segment to the elf."""
157 phdr = get_arch_phdr(self.endianness, self.elfclass)
158 phdr.p_type = p_type
159 phdr.p_paddr = p_paddr
160 phdr.p_filesz = p_size
161 phdr.p_memsz = p_size
162 self.segments.append(phdr)
163 self.ehdr.e_phnum += 1
165 def to_file(self, elf_file):
166 """Writes all ELF structures to the the passed file.
168 Structure:
169 Ehdr
170 Segment 0:PT_NOTE
171 Segment 1:PT_LOAD
172 Segment N:PT_LOAD
173 Note 0..N
174 Dump contents
176 elf_file.write(self.ehdr)
177 off = ctypes.sizeof(self.ehdr) + \
178 len(self.segments) * ctypes.sizeof(self.segments[0])
180 for phdr in self.segments:
181 phdr.p_offset = off
182 elf_file.write(phdr)
183 off += phdr.p_filesz
185 for note in self.notes:
186 elf_file.write(note)
189 def get_arch_note(endianness, len_name, len_desc):
190 """Returns a Note class with the specified endianness."""
192 if endianness == ELFDATA2LSB:
193 superclass = ctypes.LittleEndianStructure
194 else:
195 superclass = ctypes.BigEndianStructure
197 len_name = len_name + 1
199 class Note(superclass):
200 """Represents an ELF note, includes the content."""
202 _fields_ = [("n_namesz", ctypes.c_uint32),
203 ("n_descsz", ctypes.c_uint32),
204 ("n_type", ctypes.c_uint32),
205 ("n_name", ctypes.c_char * len_name),
206 ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
207 return Note()
210 class Ident(ctypes.Structure):
211 """Represents the ELF ident array in the ehdr structure."""
213 _fields_ = [('ei_mag0', ctypes.c_ubyte),
214 ('ei_mag1', ctypes.c_ubyte),
215 ('ei_mag2', ctypes.c_ubyte),
216 ('ei_mag3', ctypes.c_ubyte),
217 ('ei_class', ctypes.c_ubyte),
218 ('ei_data', ctypes.c_ubyte),
219 ('ei_version', ctypes.c_ubyte),
220 ('ei_osabi', ctypes.c_ubyte),
221 ('ei_abiversion', ctypes.c_ubyte),
222 ('ei_pad', ctypes.c_ubyte * 7)]
224 def __init__(self, endianness, elfclass):
225 self.ei_mag0 = 0x7F
226 self.ei_mag1 = ord('E')
227 self.ei_mag2 = ord('L')
228 self.ei_mag3 = ord('F')
229 self.ei_class = elfclass
230 self.ei_data = endianness
231 self.ei_version = EV_CURRENT
234 def get_arch_ehdr(endianness, elfclass):
235 """Returns a EHDR64 class with the specified endianness."""
237 if endianness == ELFDATA2LSB:
238 superclass = ctypes.LittleEndianStructure
239 else:
240 superclass = ctypes.BigEndianStructure
242 class EHDR64(superclass):
243 """Represents the 64 bit ELF header struct."""
245 _fields_ = [('e_ident', Ident),
246 ('e_type', ctypes.c_uint16),
247 ('e_machine', ctypes.c_uint16),
248 ('e_version', ctypes.c_uint32),
249 ('e_entry', ctypes.c_uint64),
250 ('e_phoff', ctypes.c_uint64),
251 ('e_shoff', ctypes.c_uint64),
252 ('e_flags', ctypes.c_uint32),
253 ('e_ehsize', ctypes.c_uint16),
254 ('e_phentsize', ctypes.c_uint16),
255 ('e_phnum', ctypes.c_uint16),
256 ('e_shentsize', ctypes.c_uint16),
257 ('e_shnum', ctypes.c_uint16),
258 ('e_shstrndx', ctypes.c_uint16)]
260 def __init__(self):
261 super(superclass, self).__init__()
262 self.e_ident = Ident(endianness, elfclass)
263 self.e_type = ET_CORE
264 self.e_version = EV_CURRENT
265 self.e_ehsize = ctypes.sizeof(self)
266 self.e_phoff = ctypes.sizeof(self)
267 self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
268 self.e_phnum = 0
271 class EHDR32(superclass):
272 """Represents the 32 bit ELF header struct."""
274 _fields_ = [('e_ident', Ident),
275 ('e_type', ctypes.c_uint16),
276 ('e_machine', ctypes.c_uint16),
277 ('e_version', ctypes.c_uint32),
278 ('e_entry', ctypes.c_uint32),
279 ('e_phoff', ctypes.c_uint32),
280 ('e_shoff', ctypes.c_uint32),
281 ('e_flags', ctypes.c_uint32),
282 ('e_ehsize', ctypes.c_uint16),
283 ('e_phentsize', ctypes.c_uint16),
284 ('e_phnum', ctypes.c_uint16),
285 ('e_shentsize', ctypes.c_uint16),
286 ('e_shnum', ctypes.c_uint16),
287 ('e_shstrndx', ctypes.c_uint16)]
289 def __init__(self):
290 super(superclass, self).__init__()
291 self.e_ident = Ident(endianness, elfclass)
292 self.e_type = ET_CORE
293 self.e_version = EV_CURRENT
294 self.e_ehsize = ctypes.sizeof(self)
295 self.e_phoff = ctypes.sizeof(self)
296 self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
297 self.e_phnum = 0
299 # End get_arch_ehdr
300 if elfclass == ELFCLASS64:
301 return EHDR64()
302 else:
303 return EHDR32()
306 def get_arch_phdr(endianness, elfclass):
307 """Returns a 32 or 64 bit PHDR class with the specified endianness."""
309 if endianness == ELFDATA2LSB:
310 superclass = ctypes.LittleEndianStructure
311 else:
312 superclass = ctypes.BigEndianStructure
314 class PHDR64(superclass):
315 """Represents the 64 bit ELF program header struct."""
317 _fields_ = [('p_type', ctypes.c_uint32),
318 ('p_flags', ctypes.c_uint32),
319 ('p_offset', ctypes.c_uint64),
320 ('p_vaddr', ctypes.c_uint64),
321 ('p_paddr', ctypes.c_uint64),
322 ('p_filesz', ctypes.c_uint64),
323 ('p_memsz', ctypes.c_uint64),
324 ('p_align', ctypes.c_uint64)]
326 class PHDR32(superclass):
327 """Represents the 32 bit ELF program header struct."""
329 _fields_ = [('p_type', ctypes.c_uint32),
330 ('p_offset', ctypes.c_uint32),
331 ('p_vaddr', ctypes.c_uint32),
332 ('p_paddr', ctypes.c_uint32),
333 ('p_filesz', ctypes.c_uint32),
334 ('p_memsz', ctypes.c_uint32),
335 ('p_flags', ctypes.c_uint32),
336 ('p_align', ctypes.c_uint32)]
338 # End get_arch_phdr
339 if elfclass == ELFCLASS64:
340 return PHDR64()
341 else:
342 return PHDR32()
345 def int128_get64(val):
346 """Returns low 64bit part of Int128 struct."""
348 try:
349 assert val["hi"] == 0
350 return val["lo"]
351 except gdb.error:
352 u64t = gdb.lookup_type('uint64_t').array(2)
353 u64 = val.cast(u64t)
354 if sys.byteorder == 'little':
355 assert u64[1] == 0
356 return u64[0]
357 else:
358 assert u64[0] == 0
359 return u64[1]
362 def qlist_foreach(head, field_str):
363 """Generator for qlists."""
365 var_p = head["lh_first"]
366 while var_p != 0:
367 var = var_p.dereference()
368 var_p = var[field_str]["le_next"]
369 yield var
372 def qemu_map_ram_ptr(block, offset):
373 """Returns qemu vaddr for given guest physical address."""
375 return block["host"] + offset
378 def memory_region_get_ram_ptr(memory_region):
379 if memory_region["alias"] != 0:
380 return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
381 + memory_region["alias_offset"])
383 return qemu_map_ram_ptr(memory_region["ram_block"], 0)
386 def get_guest_phys_blocks():
387 """Returns a list of ram blocks.
389 Each block entry contains:
390 'target_start': guest block phys start address
391 'target_end': guest block phys end address
392 'host_addr': qemu vaddr of the block's start
395 guest_phys_blocks = []
397 print("guest RAM blocks:")
398 print("target_start target_end host_addr message "
399 "count")
400 print("---------------- ---------------- ---------------- ------- "
401 "-----")
403 current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
404 current_map = current_map_p.dereference()
406 # Conversion to int is needed for python 3
407 # compatibility. Otherwise range doesn't cast the value itself and
408 # breaks.
409 for cur in range(int(current_map["nr"])):
410 flat_range = (current_map["ranges"] + cur).dereference()
411 memory_region = flat_range["mr"].dereference()
413 # we only care about RAM
414 if not memory_region["ram"]:
415 continue
417 section_size = int128_get64(flat_range["addr"]["size"])
418 target_start = int128_get64(flat_range["addr"]["start"])
419 target_end = target_start + section_size
420 host_addr = (memory_region_get_ram_ptr(memory_region)
421 + flat_range["offset_in_region"])
422 predecessor = None
424 # find continuity in guest physical address space
425 if len(guest_phys_blocks) > 0:
426 predecessor = guest_phys_blocks[-1]
427 predecessor_size = (predecessor["target_end"] -
428 predecessor["target_start"])
430 # the memory API guarantees monotonically increasing
431 # traversal
432 assert predecessor["target_end"] <= target_start
434 # we want continuity in both guest-physical and
435 # host-virtual memory
436 if (predecessor["target_end"] < target_start or
437 predecessor["host_addr"] + predecessor_size != host_addr):
438 predecessor = None
440 if predecessor is None:
441 # isolated mapping, add it to the list
442 guest_phys_blocks.append({"target_start": target_start,
443 "target_end": target_end,
444 "host_addr": host_addr})
445 message = "added"
446 else:
447 # expand predecessor until @target_end; predecessor's
448 # start doesn't change
449 predecessor["target_end"] = target_end
450 message = "joined"
452 print("%016x %016x %016x %-7s %5u" %
453 (target_start, target_end, host_addr.cast(UINTPTR_T),
454 message, len(guest_phys_blocks)))
456 return guest_phys_blocks
459 # The leading docstring doesn't have idiomatic Python formatting. It is
460 # printed by gdb's "help" command (the first line is printed in the
461 # "help data" summary), and it should match how other help texts look in
462 # gdb.
463 class DumpGuestMemory(gdb.Command):
464 """Extract guest vmcore from qemu process coredump.
466 The two required arguments are FILE and ARCH:
467 FILE identifies the target file to write the guest vmcore to.
468 ARCH specifies the architecture for which the core will be generated.
470 This GDB command reimplements the dump-guest-memory QMP command in
471 python, using the representation of guest memory as captured in the qemu
472 coredump. The qemu process that has been dumped must have had the
473 command line option "-machine dump-guest-core=on" which is the default.
475 For simplicity, the "paging", "begin" and "end" parameters of the QMP
476 command are not supported -- no attempt is made to get the guest's
477 internal paging structures (ie. paging=false is hard-wired), and guest
478 memory is always fully dumped.
480 Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
481 ppc64-le guests are supported.
483 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
484 not written to the vmcore. Preparing these would require context that is
485 only present in the KVM host kernel module when the guest is alive. A
486 fake ELF note is written instead, only to keep the ELF parser of "crash"
487 happy.
489 Dependent on how busted the qemu process was at the time of the
490 coredump, this command might produce unpredictable results. If qemu
491 deliberately called abort(), or it was dumped in response to a signal at
492 a halfway fortunate point, then its coredump should be in reasonable
493 shape and this command should mostly work."""
495 def __init__(self):
496 super(DumpGuestMemory, self).__init__("dump-guest-memory",
497 gdb.COMMAND_DATA,
498 gdb.COMPLETE_FILENAME)
499 self.elf = None
500 self.guest_phys_blocks = None
502 def dump_init(self, vmcore):
503 """Prepares and writes ELF structures to core file."""
505 # Needed to make crash happy, data for more useful notes is
506 # not available in a qemu core.
507 self.elf.add_note("NONE", "EMPTY", 0)
509 # We should never reach PN_XNUM for paging=false dumps,
510 # there's just a handful of discontiguous ranges after
511 # merging.
512 # The constant is needed to account for the PT_NOTE segment.
513 phdr_num = len(self.guest_phys_blocks) + 1
514 assert phdr_num < PN_XNUM
516 for block in self.guest_phys_blocks:
517 block_size = block["target_end"] - block["target_start"]
518 self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
520 self.elf.to_file(vmcore)
522 def dump_iterate(self, vmcore):
523 """Writes guest core to file."""
525 qemu_core = gdb.inferiors()[0]
526 for block in self.guest_phys_blocks:
527 cur = block["host_addr"]
528 left = block["target_end"] - block["target_start"]
529 print("dumping range at %016x for length %016x" %
530 (cur.cast(UINTPTR_T), left))
532 while left > 0:
533 chunk_size = min(TARGET_PAGE_SIZE, left)
534 chunk = qemu_core.read_memory(cur, chunk_size)
535 vmcore.write(chunk)
536 cur += chunk_size
537 left -= chunk_size
539 def phys_memory_read(self, addr, size):
540 qemu_core = gdb.inferiors()[0]
541 for block in self.guest_phys_blocks:
542 if block["target_start"] <= addr \
543 and addr + size <= block["target_end"]:
544 haddr = block["host_addr"] + (addr - block["target_start"])
545 return qemu_core.read_memory(haddr, size)
546 return None
548 def add_vmcoreinfo(self):
549 if not gdb.parse_and_eval("vmcoreinfo_find()") \
550 or not gdb.parse_and_eval("vmcoreinfo_find()->has_vmcoreinfo"):
551 return
553 fmt = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.guest_format")
554 addr = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.paddr")
555 size = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.size")
557 fmt = le16_to_cpu(fmt)
558 addr = le64_to_cpu(addr)
559 size = le32_to_cpu(size)
561 if fmt != VMCOREINFO_FORMAT_ELF:
562 return
564 vmcoreinfo = self.phys_memory_read(addr, size)
565 if vmcoreinfo:
566 self.elf.add_vmcoreinfo_note(vmcoreinfo.tobytes())
568 def invoke(self, args, from_tty):
569 """Handles command invocation from gdb."""
571 # Unwittingly pressing the Enter key after the command should
572 # not dump the same multi-gig coredump to the same file.
573 self.dont_repeat()
575 argv = gdb.string_to_argv(args)
576 if len(argv) != 2:
577 raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
579 self.elf = ELF(argv[1])
580 self.guest_phys_blocks = get_guest_phys_blocks()
581 self.add_vmcoreinfo()
583 with open(argv[0], "wb") as vmcore:
584 self.dump_init(vmcore)
585 self.dump_iterate(vmcore)
587 DumpGuestMemory()