2 * Copyright (c) 2006 Peter Wemm
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 * $FreeBSD: src/sys/amd64/amd64/minidump_machdep.c,v 1.10 2009/05/29 21:27:12 jamie Exp $
29 #include <sys/param.h>
30 #include <sys/systm.h>
33 #include <sys/device.h>
34 #include <sys/globaldata.h>
35 #include <sys/kernel.h>
36 #include <sys/kerneldump.h>
37 #include <sys/msgbuf.h>
40 #include <vm/vm_kern.h>
42 #include <machine/atomic.h>
43 #include <machine/elf.h>
44 #include <machine/globaldata.h>
45 #include <machine/md_var.h>
46 #include <machine/vmparam.h>
47 #include <machine/minidump.h>
49 CTASSERT(sizeof(struct kerneldumpheader
) == 512);
52 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
53 * is to protect us from metadata and to protect metadata from us.
55 #define SIZEOF_METADATA (64*1024)
57 #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
58 #define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE)
60 extern uint64_t KPDPphys
;
62 uint64_t *vm_page_dump
;
63 int vm_page_dump_size
;
65 static struct kerneldumpheader kdh
;
68 /* Handle chunked writes. */
71 static size_t counter
, progress
;
73 CTASSERT(sizeof(*vm_page_dump
) == 8);
76 is_dumpable(vm_paddr_t pa
)
80 for (i
= 0; dump_avail
[i
].phys_beg
|| dump_avail
[i
].phys_end
; ++i
) {
81 if (pa
>= dump_avail
[i
].phys_beg
&& pa
< dump_avail
[i
].phys_end
)
87 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
90 blk_flush(struct dumperinfo
*di
)
97 error
= dev_ddump(di
->priv
, dump_va
, 0, dumplo
, fragsz
);
104 blk_write(struct dumperinfo
*di
, char *ptr
, vm_paddr_t pa
, size_t sz
)
111 if ((sz
& PAGE_MASK
)) {
112 kprintf("size not page aligned\n");
115 if (ptr
!= NULL
&& pa
!= 0) {
116 kprintf("can't have both va and pa!\n");
119 if (pa
!= 0 && (((uintptr_t)pa
) & PAGE_MASK
) != 0) {
120 kprintf("address not page aligned\n");
125 * If we're doing a virtual dump, flush any
126 * pre-existing pa pages
128 error
= blk_flush(di
);
132 max_iosize
= min(MAXPHYS
, di
->maxiosize
);
134 len
= max_iosize
- fragsz
;
140 kprintf(" %ld", PG2MB(progress
>> PAGE_SHIFT
));
141 counter
&= (1<<24) - 1;
145 error
= dev_ddump(di
->priv
, ptr
, 0, dumplo
, len
);
153 for (i
= 0; i
< len
; i
+= PAGE_SIZE
) {
154 dump_va
= pmap_kenter_temporary(pa
+ i
,
155 (i
+ fragsz
) >> PAGE_SHIFT
);
161 if (fragsz
== max_iosize
) {
162 error
= blk_flush(di
);
169 /* Check for user abort. */
173 if (c
!= -1 && c
!= NOKEY
)
174 kprintf(" (CTRL-C to abort) ");
179 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
180 static pt_entry_t fakept
[NPTEPG
];
183 minidumpsys(struct dumperinfo
*di
)
188 vm_offset_t kern_end
;
191 uint64_t *pdp
, *pd
, *pt
, pa
;
193 struct minidumphdr mdhdr
;
194 struct mdglobaldata
*md
;
199 * Walk page table pages, set bits in vm_page_dump.
201 * NOTE: kernel_vm_end can actually be below KERNBASE.
202 * Just use KvaEnd. Also note that loops which go
203 * all the way to the end of the address space might
204 * overflow the loop variable.
208 md
= (struct mdglobaldata
*)globaldata_find(0);
211 if (kern_end
< (vm_offset_t
)&(md
[ncpus
]))
212 kern_end
= (vm_offset_t
)&(md
[ncpus
]);
214 pdp
= (uint64_t *)PHYS_TO_DMAP(KPDPphys
);
215 for (va
= VM_MIN_KERNEL_ADDRESS
; va
< kern_end
; va
+= NBPDR
) {
217 * The loop probably overflows a 64-bit int due to NBPDR.
219 if (va
< VM_MIN_KERNEL_ADDRESS
)
223 * We always write a page, even if it is zero. Each
224 * page written corresponds to 2MB of space
226 i
= (va
>> PDPSHIFT
) & ((1ul << NPDPEPGSHIFT
) - 1);
227 ptesize
+= PAGE_SIZE
;
228 if ((pdp
[i
] & kernel_pmap
.pmap_bits
[PG_V_IDX
]) == 0)
230 pd
= (uint64_t *)PHYS_TO_DMAP(pdp
[i
] & PG_FRAME
);
231 j
= ((va
>> PDRSHIFT
) & ((1ul << NPDEPGSHIFT
) - 1));
232 if ((pd
[j
] & (kernel_pmap
.pmap_bits
[PG_PS_IDX
] | kernel_pmap
.pmap_bits
[PG_V_IDX
])) ==
233 (kernel_pmap
.pmap_bits
[PG_PS_IDX
] | kernel_pmap
.pmap_bits
[PG_V_IDX
])) {
234 /* This is an entire 2M page. */
235 pa
= pd
[j
] & PG_PS_FRAME
;
236 for (k
= 0; k
< NPTEPG
; k
++) {
243 if ((pd
[j
] & kernel_pmap
.pmap_bits
[PG_V_IDX
]) == kernel_pmap
.pmap_bits
[PG_V_IDX
]) {
244 /* set bit for each valid page in this 2MB block */
245 pt
= (uint64_t *)PHYS_TO_DMAP(pd
[j
] & PG_FRAME
);
246 for (k
= 0; k
< NPTEPG
; k
++) {
247 if ((pt
[k
] & kernel_pmap
.pmap_bits
[PG_V_IDX
]) == kernel_pmap
.pmap_bits
[PG_V_IDX
]) {
248 pa
= pt
[k
] & PG_FRAME
;
254 /* nothing, we're going to dump a null page */
258 /* Calculate dump size. */
260 dumpsize
+= round_page(msgbufp
->msg_size
);
261 dumpsize
+= round_page(vm_page_dump_size
);
262 for (i
= 0; i
< vm_page_dump_size
/ sizeof(*vm_page_dump
); i
++) {
263 bits
= vm_page_dump
[i
];
266 pa
= (((uint64_t)i
* sizeof(*vm_page_dump
) * NBBY
) + bit
) * PAGE_SIZE
;
267 /* Clear out undumpable pages now if needed */
268 if (is_dumpable(pa
)) {
269 dumpsize
+= PAGE_SIZE
;
273 bits
&= ~(1ul << bit
);
276 dumpsize
+= PAGE_SIZE
;
278 /* Determine dump offset on device. */
279 if (di
->mediasize
< SIZEOF_METADATA
+ dumpsize
+ sizeof(kdh
) * 2) {
283 dumplo
= di
->mediaoffset
+ di
->mediasize
- dumpsize
;
284 dumplo
-= sizeof(kdh
) * 2;
287 /* Initialize mdhdr */
288 bzero(&mdhdr
, sizeof(mdhdr
));
289 strcpy(mdhdr
.magic
, MINIDUMP_MAGIC
);
290 mdhdr
.version
= MINIDUMP_VERSION
;
291 mdhdr
.msgbufsize
= msgbufp
->msg_size
;
292 mdhdr
.bitmapsize
= vm_page_dump_size
;
293 mdhdr
.ptesize
= ptesize
;
294 mdhdr
.kernbase
= VM_MIN_KERNEL_ADDRESS
;
295 mdhdr
.dmapbase
= DMAP_MIN_ADDRESS
;
296 mdhdr
.dmapend
= DMAP_MAX_ADDRESS
;
298 mkdumpheader(&kdh
, KERNELDUMPMAGIC
, KERNELDUMP_AMD64_VERSION
,
299 dumpsize
, di
->blocksize
);
301 kprintf("Physical memory: %jd MB\n", (intmax_t)ptoa(physmem
) / 1048576);
302 kprintf("Dumping %jd MB:", (intmax_t)dumpsize
>> 20);
305 error
= dev_ddump(di
->priv
, &kdh
, 0, dumplo
, sizeof(kdh
));
308 dumplo
+= sizeof(kdh
);
311 bzero(&fakept
, sizeof(fakept
));
312 bcopy(&mdhdr
, &fakept
, sizeof(mdhdr
));
313 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
317 /* Dump msgbuf up front */
318 error
= blk_write(di
, (char *)msgbufp
->msg_ptr
, 0, round_page(msgbufp
->msg_size
));
323 error
= blk_write(di
, (char *)vm_page_dump
, 0, round_page(vm_page_dump_size
));
327 /* Dump kernel page table pages */
328 pdp
= (uint64_t *)PHYS_TO_DMAP(KPDPphys
);
329 for (va
= VM_MIN_KERNEL_ADDRESS
; va
< kern_end
; va
+= NBPDR
) {
331 * The loop probably overflows a 64-bit int due to NBPDR.
333 if (va
< VM_MIN_KERNEL_ADDRESS
)
337 * We always write a page, even if it is zero
339 i
= (va
>> PDPSHIFT
) & ((1ul << NPDPEPGSHIFT
) - 1);
340 if ((pdp
[i
] & kernel_pmap
.pmap_bits
[PG_V_IDX
]) == 0) {
341 bzero(fakept
, sizeof(fakept
));
342 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
345 /* flush, in case we reuse fakept in the same block */
346 error
= blk_flush(di
);
351 pd
= (uint64_t *)PHYS_TO_DMAP(pdp
[i
] & PG_FRAME
);
352 j
= ((va
>> PDRSHIFT
) & ((1ul << NPDEPGSHIFT
) - 1));
353 if ((pd
[j
] & (kernel_pmap
.pmap_bits
[PG_PS_IDX
] | kernel_pmap
.pmap_bits
[PG_V_IDX
])) ==
354 (kernel_pmap
.pmap_bits
[PG_PS_IDX
] | kernel_pmap
.pmap_bits
[PG_V_IDX
])) {
355 /* This is a single 2M block. Generate a fake PTP */
356 pa
= pd
[j
] & PG_PS_FRAME
;
357 for (k
= 0; k
< NPTEPG
; k
++) {
358 fakept
[k
] = (pa
+ (k
* PAGE_SIZE
)) |
359 kernel_pmap
.pmap_bits
[PG_V_IDX
] |
360 kernel_pmap
.pmap_bits
[PG_RW_IDX
] |
361 kernel_pmap
.pmap_bits
[PG_A_IDX
] |
362 kernel_pmap
.pmap_bits
[PG_M_IDX
];
364 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
367 /* flush, in case we reuse fakept in the same block */
368 error
= blk_flush(di
);
373 if ((pd
[j
] & kernel_pmap
.pmap_bits
[PG_V_IDX
]) == kernel_pmap
.pmap_bits
[PG_V_IDX
]) {
374 pt
= (uint64_t *)PHYS_TO_DMAP(pd
[j
] & PG_FRAME
);
375 error
= blk_write(di
, (char *)pt
, 0, PAGE_SIZE
);
379 bzero(fakept
, sizeof(fakept
));
380 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
383 /* flush, in case we reuse fakept in the same block */
384 error
= blk_flush(di
);
390 /* Dump memory chunks */
391 /* XXX cluster it up and use blk_dump() */
392 for (i
= 0; i
< vm_page_dump_size
/ sizeof(*vm_page_dump
); i
++) {
393 bits
= vm_page_dump
[i
];
396 pa
= (((uint64_t)i
* sizeof(*vm_page_dump
) * NBBY
) + bit
) * PAGE_SIZE
;
397 error
= blk_write(di
, 0, pa
, PAGE_SIZE
);
400 bits
&= ~(1ul << bit
);
404 error
= blk_flush(di
);
409 error
= dev_ddump(di
->priv
, &kdh
, 0, dumplo
, sizeof(kdh
));
412 dumplo
+= sizeof(kdh
);
414 /* Signal completion, signoff and exit stage left. */
415 dev_ddump(di
->priv
, NULL
, 0, 0, 0);
416 kprintf("\nDump complete\n");
425 if (error
== ECANCELED
)
426 kprintf("\nDump aborted\n");
427 else if (error
== ENOSPC
)
428 kprintf("\nDump failed. Partition too small.\n");
430 kprintf("\n** DUMP FAILED (ERROR %d) **\n", error
);
434 dump_add_page(vm_paddr_t pa
)
439 idx
= pa
>> 6; /* 2^6 = 64 */
441 atomic_set_long(&vm_page_dump
[idx
], 1ul << bit
);
445 dump_drop_page(vm_paddr_t pa
)
450 idx
= pa
>> 6; /* 2^6 = 64 */
452 atomic_clear_long(&vm_page_dump
[idx
], 1ul << bit
);