2 * Copyright (c) 2006 Peter Wemm
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 * $FreeBSD: src/sys/amd64/amd64/minidump_machdep.c,v 1.10 2009/05/29 21:27:12 jamie Exp $
29 #include <sys/param.h>
30 #include <sys/systm.h>
33 #include <sys/device.h>
34 #include <sys/globaldata.h>
35 #include <sys/kernel.h>
36 #include <sys/kerneldump.h>
37 #include <sys/msgbuf.h>
39 #include <vm/vm_kern.h>
41 #include <machine/atomic.h>
42 #include <machine/elf.h>
43 #include <machine/globaldata.h>
44 #include <machine/md_var.h>
45 #include <machine/vmparam.h>
46 #include <machine/minidump.h>
48 CTASSERT(sizeof(struct kerneldumpheader
) == 512);
51 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
52 * is to protect us from metadata and to protect metadata from us.
54 #define SIZEOF_METADATA (64*1024)
56 #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
57 #define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
59 extern uint64_t KPDPphys
;
61 uint64_t *vm_page_dump
;
62 int vm_page_dump_size
;
64 static struct kerneldumpheader kdh
;
67 /* Handle chunked writes. */
70 static size_t counter
, progress
;
72 CTASSERT(sizeof(*vm_page_dump
) == 8);
75 is_dumpable(vm_paddr_t pa
)
79 for (i
= 0; dump_avail
[i
] != 0 || dump_avail
[i
+ 1] != 0; i
+= 2) {
80 if (pa
>= dump_avail
[i
] && pa
< dump_avail
[i
+ 1])
86 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
89 blk_flush(struct dumperinfo
*di
)
96 error
= dev_ddump(di
->priv
, dump_va
, 0, dumplo
, fragsz
);
103 blk_write(struct dumperinfo
*di
, char *ptr
, vm_paddr_t pa
, size_t sz
)
109 if ((sz
% PAGE_SIZE
) != 0) {
110 kprintf("size not page aligned\n");
113 if (ptr
!= NULL
&& pa
!= 0) {
114 kprintf("cant have both va and pa!\n");
117 if (pa
!= 0 && (((uintptr_t)ptr
) % PAGE_SIZE
) != 0) {
118 kprintf("address not page aligned\n");
122 /* If we're doing a virtual dump, flush any pre-existing pa pages */
123 error
= blk_flush(di
);
128 len
= (MAXDUMPPGS
* PAGE_SIZE
) - fragsz
;
134 kprintf(" %ld", PG2MB(progress
>> PAGE_SHIFT
));
135 counter
&= (1<<24) - 1;
138 error
= dev_ddump(di
->priv
, ptr
, 0, dumplo
, len
);
145 for (i
= 0; i
< len
; i
+= PAGE_SIZE
)
146 dump_va
= pmap_kenter_temporary(pa
+ i
, (i
+ fragsz
) >> PAGE_SHIFT
);
150 if (fragsz
== (MAXDUMPPGS
* PAGE_SIZE
)) {
151 error
= blk_flush(di
);
157 /* Check for user abort. */
162 kprintf(" (CTRL-C to abort) ");
168 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
169 static pt_entry_t fakept
[NPTEPG
];
172 minidumpsys(struct dumperinfo
*di
)
177 vm_offset_t kern_end
;
180 uint64_t *pdp
, *pd
, *pt
, pa
;
182 struct minidumphdr mdhdr
;
183 struct mdglobaldata
*md
;
186 /* Walk page table pages, set bits in vm_page_dump */
189 md
= (struct mdglobaldata
*)globaldata_find(0);
191 kern_end
= kernel_vm_end
;
192 if (kern_end
< (vm_offset_t
)&(md
[ncpus
]))
193 kern_end
= (vm_offset_t
)&(md
[ncpus
]);
195 pdp
= (uint64_t *)PHYS_TO_DMAP(KPDPphys
);
196 for (va
= KERNBASE
; va
< kern_end
; va
+= NBPDR
) {
197 i
= (va
>> PDPSHIFT
) & ((1ul << NPDPEPGSHIFT
) - 1);
199 * We always write a page, even if it is zero. Each
200 * page written corresponds to 2MB of space
202 ptesize
+= PAGE_SIZE
;
203 if ((pdp
[i
] & PG_V
) == 0)
205 pd
= (uint64_t *)PHYS_TO_DMAP(pdp
[i
] & PG_FRAME
);
206 j
= ((va
>> PDRSHIFT
) & ((1ul << NPDEPGSHIFT
) - 1));
207 if ((pd
[j
] & (PG_PS
| PG_V
)) == (PG_PS
| PG_V
)) {
208 /* This is an entire 2M page. */
209 pa
= pd
[j
] & PG_PS_FRAME
;
210 for (k
= 0; k
< NPTEPG
; k
++) {
217 if ((pd
[j
] & PG_V
) == PG_V
) {
218 /* set bit for each valid page in this 2MB block */
219 pt
= (uint64_t *)PHYS_TO_DMAP(pd
[j
] & PG_FRAME
);
220 for (k
= 0; k
< NPTEPG
; k
++) {
221 if ((pt
[k
] & PG_V
) == PG_V
) {
222 pa
= pt
[k
] & PG_FRAME
;
228 /* nothing, we're going to dump a null page */
232 /* Calculate dump size. */
234 dumpsize
+= round_page(msgbufp
->msg_size
);
235 dumpsize
+= round_page(vm_page_dump_size
);
236 for (i
= 0; i
< vm_page_dump_size
/ sizeof(*vm_page_dump
); i
++) {
237 bits
= vm_page_dump
[i
];
240 pa
= (((uint64_t)i
* sizeof(*vm_page_dump
) * NBBY
) + bit
) * PAGE_SIZE
;
241 /* Clear out undumpable pages now if needed */
242 if (is_dumpable(pa
)) {
243 dumpsize
+= PAGE_SIZE
;
247 bits
&= ~(1ul << bit
);
250 dumpsize
+= PAGE_SIZE
;
252 /* Determine dump offset on device. */
253 if (di
->mediasize
< SIZEOF_METADATA
+ dumpsize
+ sizeof(kdh
) * 2) {
257 dumplo
= di
->mediaoffset
+ di
->mediasize
- dumpsize
;
258 dumplo
-= sizeof(kdh
) * 2;
261 /* Initialize mdhdr */
262 bzero(&mdhdr
, sizeof(mdhdr
));
263 strcpy(mdhdr
.magic
, MINIDUMP_MAGIC
);
264 mdhdr
.version
= MINIDUMP_VERSION
;
265 mdhdr
.msgbufsize
= msgbufp
->msg_size
;
266 mdhdr
.bitmapsize
= vm_page_dump_size
;
267 mdhdr
.ptesize
= ptesize
;
268 mdhdr
.kernbase
= KERNBASE
;
269 mdhdr
.dmapbase
= DMAP_MIN_ADDRESS
;
270 mdhdr
.dmapend
= DMAP_MAX_ADDRESS
;
272 mkdumpheader(&kdh
, KERNELDUMPMAGIC
, KERNELDUMP_AMD64_VERSION
,
273 dumpsize
, di
->blocksize
);
275 kprintf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem
) / 1048576);
276 kprintf("Dumping %llu MB:", (long long)dumpsize
>> 20);
279 error
= dev_ddump(di
->priv
, &kdh
, 0, dumplo
, sizeof(kdh
));
282 dumplo
+= sizeof(kdh
);
285 bzero(&fakept
, sizeof(fakept
));
286 bcopy(&mdhdr
, &fakept
, sizeof(mdhdr
));
287 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
291 /* Dump msgbuf up front */
292 error
= blk_write(di
, (char *)msgbufp
->msg_ptr
, 0, round_page(msgbufp
->msg_size
));
297 error
= blk_write(di
, (char *)vm_page_dump
, 0, round_page(vm_page_dump_size
));
301 /* Dump kernel page table pages */
302 pdp
= (uint64_t *)PHYS_TO_DMAP(KPDPphys
);
303 for (va
= KERNBASE
; va
< kern_end
; va
+= NBPDR
) {
304 i
= (va
>> PDPSHIFT
) & ((1ul << NPDPEPGSHIFT
) - 1);
305 /* We always write a page, even if it is zero */
306 if ((pdp
[i
] & PG_V
) == 0) {
307 bzero(fakept
, sizeof(fakept
));
308 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
311 /* flush, in case we reuse fakept in the same block */
312 error
= blk_flush(di
);
317 pd
= (uint64_t *)PHYS_TO_DMAP(pdp
[i
] & PG_FRAME
);
318 j
= ((va
>> PDRSHIFT
) & ((1ul << NPDEPGSHIFT
) - 1));
319 if ((pd
[j
] & (PG_PS
| PG_V
)) == (PG_PS
| PG_V
)) {
320 /* This is a single 2M block. Generate a fake PTP */
321 pa
= pd
[j
] & PG_PS_FRAME
;
322 for (k
= 0; k
< NPTEPG
; k
++) {
323 fakept
[k
] = (pa
+ (k
* PAGE_SIZE
)) | PG_V
| PG_RW
| PG_A
| PG_M
;
325 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
328 /* flush, in case we reuse fakept in the same block */
329 error
= blk_flush(di
);
334 if ((pd
[j
] & PG_V
) == PG_V
) {
335 pt
= (uint64_t *)PHYS_TO_DMAP(pd
[j
] & PG_FRAME
);
336 error
= blk_write(di
, (char *)pt
, 0, PAGE_SIZE
);
340 bzero(fakept
, sizeof(fakept
));
341 error
= blk_write(di
, (char *)&fakept
, 0, PAGE_SIZE
);
344 /* flush, in case we reuse fakept in the same block */
345 error
= blk_flush(di
);
351 /* Dump memory chunks */
352 /* XXX cluster it up and use blk_dump() */
353 for (i
= 0; i
< vm_page_dump_size
/ sizeof(*vm_page_dump
); i
++) {
354 bits
= vm_page_dump
[i
];
357 pa
= (((uint64_t)i
* sizeof(*vm_page_dump
) * NBBY
) + bit
) * PAGE_SIZE
;
358 error
= blk_write(di
, 0, pa
, PAGE_SIZE
);
361 bits
&= ~(1ul << bit
);
365 error
= blk_flush(di
);
370 error
= dev_ddump(di
->priv
, &kdh
, 0, dumplo
, sizeof(kdh
));
373 dumplo
+= sizeof(kdh
);
375 /* Signal completion, signoff and exit stage left. */
376 dev_ddump(di
->priv
, NULL
, 0, 0, 0);
377 kprintf("\nDump complete\n");
384 if (error
== ECANCELED
)
385 kprintf("\nDump aborted\n");
386 else if (error
== ENOSPC
)
387 kprintf("\nDump failed. Partition too small.\n");
389 kprintf("\n** DUMP FAILED (ERROR %d) **\n", error
);
393 dump_add_page(vm_paddr_t pa
)
398 idx
= pa
>> 6; /* 2^6 = 64 */
400 atomic_set_long(&vm_page_dump
[idx
], 1ul << bit
);
404 dump_drop_page(vm_paddr_t pa
)
409 idx
= pa
>> 6; /* 2^6 = 64 */
411 atomic_clear_long(&vm_page_dump
[idx
], 1ul << bit
);