scripts/dump-guest-memory.py: Improve python 3 compatibility
[qemu.git] / scripts / dump-guest-memory.py
blobbb4ca8e3d480e3b19b2cbc6f1a5cc9a5c2614c85
1 # This python script adds a new gdb command, "dump-guest-memory". It
2 # should be loaded with "source dump-guest-memory.py" at the (gdb)
3 # prompt.
5 # Copyright (C) 2013, Red Hat, Inc.
7 # Authors:
8 # Laszlo Ersek <lersek@redhat.com>
10 # This work is licensed under the terms of the GNU GPL, version 2 or later. See
11 # the COPYING file in the top-level directory.
13 # The leading docstring doesn't have idiomatic Python formatting. It is
14 # printed by gdb's "help" command (the first line is printed in the
15 # "help data" summary), and it should match how other help texts look in
16 # gdb.
18 import struct
20 UINTPTR_T = gdb.lookup_type("uintptr_t")
22 TARGET_PAGE_SIZE = 0x1000
23 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
25 # Various ELF constants
26 EM_X86_64 = 62 # AMD x86-64 target machine
27 ELFDATA2LSB = 1 # little endian
28 ELFCLASS64 = 2
29 ELFMAG = "\x7FELF"
30 EV_CURRENT = 1
31 ET_CORE = 4
32 PT_LOAD = 1
33 PT_NOTE = 4
35 # Special value for e_phnum. This indicates that the real number of
36 # program headers is too large to fit into e_phnum. Instead the real
37 # value is in the field sh_info of section 0.
38 PN_XNUM = 0xFFFF
40 # Format strings for packing and header size calculation.
41 ELF64_EHDR = ("4s" # e_ident/magic
42 "B" # e_ident/class
43 "B" # e_ident/data
44 "B" # e_ident/version
45 "B" # e_ident/osabi
46 "8s" # e_ident/pad
47 "H" # e_type
48 "H" # e_machine
49 "I" # e_version
50 "Q" # e_entry
51 "Q" # e_phoff
52 "Q" # e_shoff
53 "I" # e_flags
54 "H" # e_ehsize
55 "H" # e_phentsize
56 "H" # e_phnum
57 "H" # e_shentsize
58 "H" # e_shnum
59 "H" # e_shstrndx
61 ELF64_PHDR = ("I" # p_type
62 "I" # p_flags
63 "Q" # p_offset
64 "Q" # p_vaddr
65 "Q" # p_paddr
66 "Q" # p_filesz
67 "Q" # p_memsz
68 "Q" # p_align
71 def int128_get64(val):
72 assert (val["hi"] == 0)
73 return val["lo"]
75 def qlist_foreach(head, field_str):
76 var_p = head["lh_first"]
77 while (var_p != 0):
78 var = var_p.dereference()
79 yield var
80 var_p = var[field_str]["le_next"]
82 def qemu_get_ram_block(ram_addr):
83 ram_blocks = gdb.parse_and_eval("ram_list.blocks")
84 for block in qlist_foreach(ram_blocks, "next"):
85 if (ram_addr - block["offset"] < block["used_length"]):
86 return block
87 raise gdb.GdbError("Bad ram offset %x" % ram_addr)
89 def qemu_get_ram_ptr(ram_addr):
90 block = qemu_get_ram_block(ram_addr)
91 return block["host"] + (ram_addr - block["offset"])
93 def memory_region_get_ram_ptr(mr):
94 if (mr["alias"] != 0):
95 return (memory_region_get_ram_ptr(mr["alias"].dereference()) +
96 mr["alias_offset"])
97 return qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK)
99 def get_guest_phys_blocks():
100 guest_phys_blocks = []
101 print("guest RAM blocks:")
102 print("target_start target_end host_addr message "
103 "count")
104 print("---------------- ---------------- ---------------- ------- "
105 "-----")
107 current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
108 current_map = current_map_p.dereference()
110 # Conversion to int is needed for python 3
111 # compatibility. Otherwise range doesn't cast the value itself and
112 # breaks.
113 for cur in range(int(current_map["nr"])):
114 flat_range = (current_map["ranges"] + cur).dereference()
115 mr = flat_range["mr"].dereference()
117 # we only care about RAM
118 if (not mr["ram"]):
119 continue
121 section_size = int128_get64(flat_range["addr"]["size"])
122 target_start = int128_get64(flat_range["addr"]["start"])
123 target_end = target_start + section_size
124 host_addr = (memory_region_get_ram_ptr(mr) +
125 flat_range["offset_in_region"])
126 predecessor = None
128 # find continuity in guest physical address space
129 if (len(guest_phys_blocks) > 0):
130 predecessor = guest_phys_blocks[-1]
131 predecessor_size = (predecessor["target_end"] -
132 predecessor["target_start"])
134 # the memory API guarantees monotonically increasing
135 # traversal
136 assert (predecessor["target_end"] <= target_start)
138 # we want continuity in both guest-physical and
139 # host-virtual memory
140 if (predecessor["target_end"] < target_start or
141 predecessor["host_addr"] + predecessor_size != host_addr):
142 predecessor = None
144 if (predecessor is None):
145 # isolated mapping, add it to the list
146 guest_phys_blocks.append({"target_start": target_start,
147 "target_end" : target_end,
148 "host_addr" : host_addr})
149 message = "added"
150 else:
151 # expand predecessor until @target_end; predecessor's
152 # start doesn't change
153 predecessor["target_end"] = target_end
154 message = "joined"
156 print("%016x %016x %016x %-7s %5u" %
157 (target_start, target_end, host_addr.cast(UINTPTR_T),
158 message, len(guest_phys_blocks)))
160 return guest_phys_blocks
163 class DumpGuestMemory(gdb.Command):
164 """Extract guest vmcore from qemu process coredump.
166 The sole argument is FILE, identifying the target file to write the
167 guest vmcore to.
169 This GDB command reimplements the dump-guest-memory QMP command in
170 python, using the representation of guest memory as captured in the qemu
171 coredump. The qemu process that has been dumped must have had the
172 command line option "-machine dump-guest-core=on".
174 For simplicity, the "paging", "begin" and "end" parameters of the QMP
175 command are not supported -- no attempt is made to get the guest's
176 internal paging structures (ie. paging=false is hard-wired), and guest
177 memory is always fully dumped.
179 Only x86_64 guests are supported.
181 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
182 not written to the vmcore. Preparing these would require context that is
183 only present in the KVM host kernel module when the guest is alive. A
184 fake ELF note is written instead, only to keep the ELF parser of "crash"
185 happy.
187 Dependent on how busted the qemu process was at the time of the
188 coredump, this command might produce unpredictable results. If qemu
189 deliberately called abort(), or it was dumped in response to a signal at
190 a halfway fortunate point, then its coredump should be in reasonable
191 shape and this command should mostly work."""
193 def __init__(self):
194 super(DumpGuestMemory, self).__init__("dump-guest-memory",
195 gdb.COMMAND_DATA,
196 gdb.COMPLETE_FILENAME)
197 self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR)
198 self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR)
199 self.guest_phys_blocks = None
201 def cpu_get_dump_info(self):
202 # We can't synchronize the registers with KVM post-mortem, and
203 # the bits in (first_x86_cpu->env.hflags) seem to be stale; they
204 # may not reflect long mode for example. Hence just assume the
205 # most common values. This also means that instruction pointer
206 # etc. will be bogus in the dump, but at least the RAM contents
207 # should be valid.
208 self.dump_info = {"d_machine": EM_X86_64,
209 "d_endian" : ELFDATA2LSB,
210 "d_class" : ELFCLASS64}
212 def encode_elf64_ehdr_le(self):
213 return self.elf64_ehdr_le.pack(
214 ELFMAG, # e_ident/magic
215 self.dump_info["d_class"], # e_ident/class
216 self.dump_info["d_endian"], # e_ident/data
217 EV_CURRENT, # e_ident/version
218 0, # e_ident/osabi
219 "", # e_ident/pad
220 ET_CORE, # e_type
221 self.dump_info["d_machine"], # e_machine
222 EV_CURRENT, # e_version
223 0, # e_entry
224 self.elf64_ehdr_le.size, # e_phoff
225 0, # e_shoff
226 0, # e_flags
227 self.elf64_ehdr_le.size, # e_ehsize
228 self.elf64_phdr_le.size, # e_phentsize
229 self.phdr_num, # e_phnum
230 0, # e_shentsize
231 0, # e_shnum
232 0 # e_shstrndx
235 def encode_elf64_note_le(self):
236 return self.elf64_phdr_le.pack(PT_NOTE, # p_type
237 0, # p_flags
238 (self.memory_offset -
239 len(self.note)), # p_offset
240 0, # p_vaddr
241 0, # p_paddr
242 len(self.note), # p_filesz
243 len(self.note), # p_memsz
244 0 # p_align
247 def encode_elf64_load_le(self, offset, start_hwaddr, range_size):
248 return self.elf64_phdr_le.pack(PT_LOAD, # p_type
249 0, # p_flags
250 offset, # p_offset
251 0, # p_vaddr
252 start_hwaddr, # p_paddr
253 range_size, # p_filesz
254 range_size, # p_memsz
255 0 # p_align
258 def note_init(self, name, desc, type):
259 # name must include a trailing NUL
260 namesz = (len(name) + 1 + 3) / 4 * 4
261 descsz = (len(desc) + 3) / 4 * 4
262 fmt = ("<" # little endian
263 "I" # n_namesz
264 "I" # n_descsz
265 "I" # n_type
266 "%us" # name
267 "%us" # desc
268 % (namesz, descsz))
269 self.note = struct.pack(fmt,
270 len(name) + 1, len(desc), type, name, desc)
272 def dump_init(self):
273 self.guest_phys_blocks = get_guest_phys_blocks()
274 self.cpu_get_dump_info()
275 # we have no way to retrieve the VCPU status from KVM
276 # post-mortem
277 self.note_init("NONE", "EMPTY", 0)
279 # Account for PT_NOTE.
280 self.phdr_num = 1
282 # We should never reach PN_XNUM for paging=false dumps: there's
283 # just a handful of discontiguous ranges after merging.
284 self.phdr_num += len(self.guest_phys_blocks)
285 assert (self.phdr_num < PN_XNUM)
287 # Calculate the ELF file offset where the memory dump commences:
289 # ELF header
290 # PT_NOTE
291 # PT_LOAD: 1
292 # PT_LOAD: 2
293 # ...
294 # PT_LOAD: len(self.guest_phys_blocks)
295 # ELF note
296 # memory dump
297 self.memory_offset = (self.elf64_ehdr_le.size +
298 self.elf64_phdr_le.size * self.phdr_num +
299 len(self.note))
301 def dump_begin(self, vmcore):
302 vmcore.write(self.encode_elf64_ehdr_le())
303 vmcore.write(self.encode_elf64_note_le())
304 running = self.memory_offset
305 for block in self.guest_phys_blocks:
306 range_size = block["target_end"] - block["target_start"]
307 vmcore.write(self.encode_elf64_load_le(running,
308 block["target_start"],
309 range_size))
310 running += range_size
311 vmcore.write(self.note)
313 def dump_iterate(self, vmcore):
314 qemu_core = gdb.inferiors()[0]
315 for block in self.guest_phys_blocks:
316 cur = block["host_addr"]
317 left = block["target_end"] - block["target_start"]
318 print("dumping range at %016x for length %016x" %
319 (cur.cast(UINTPTR_T), left))
320 while (left > 0):
321 chunk_size = min(TARGET_PAGE_SIZE, left)
322 chunk = qemu_core.read_memory(cur, chunk_size)
323 vmcore.write(chunk)
324 cur += chunk_size
325 left -= chunk_size
327 def create_vmcore(self, filename):
328 vmcore = open(filename, "wb")
329 self.dump_begin(vmcore)
330 self.dump_iterate(vmcore)
331 vmcore.close()
333 def invoke(self, args, from_tty):
334 # Unwittingly pressing the Enter key after the command should
335 # not dump the same multi-gig coredump to the same file.
336 self.dont_repeat()
338 argv = gdb.string_to_argv(args)
339 if (len(argv) != 1):
340 raise gdb.GdbError("usage: dump-guest-memory FILE")
342 self.dump_init()
343 self.create_vmcore(argv[0])
345 DumpGuestMemory()