2 * Copyright (c) 1989, 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software developed by the Computer Systems
6 * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
7 * BG 91-66 and contributed to Berkeley.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid
[] = "@(#)kvm.c 8.2 (Berkeley) 2/13/94";
41 #endif /* LIBC_SCCS and not lint */
43 #include <sys/param.h>
44 #include <sys/fnv_hash.h>
49 #include <sys/linker.h>
65 #include "kvm_private.h"
67 SET_DECLARE(kvm_arch
, struct kvm_arch
);
69 /* from src/lib/libc/gen/nlist.c */
70 int __fdnlist(int, struct nlist
*);
73 kvm_fdnlist(kvm_t
*kd
, struct kvm_nlist
*list
)
78 if (kd
->resolve_symbol
== NULL
) {
82 for (count
= 0; list
[count
].n_name
!= NULL
&&
83 list
[count
].n_name
[0] != '\0'; count
++)
85 nl
= calloc(count
+ 1, sizeof(*nl
));
86 for (i
= 0; i
< count
; i
++)
87 nl
[i
].n_name
= list
[i
].n_name
;
88 nfail
= __fdnlist(kd
->nlfd
, nl
);
89 for (i
= 0; i
< count
; i
++) {
90 list
[i
].n_type
= nl
[i
].n_type
;
91 list
[i
].n_value
= nl
[i
].n_value
;
98 while (list
->n_name
!= NULL
&& list
->n_name
[0] != '\0') {
99 error
= kd
->resolve_symbol(list
->n_name
, &addr
);
105 list
->n_value
= addr
;
106 list
->n_type
= N_DATA
| N_EXT
;
114 kvm_geterr(kvm_t
*kd
)
122 * Report an error using printf style arguments. "program" is kd->program
123 * on hard errors, and 0 on soft errors, so that under sun error emulation,
124 * only hard errors are printed out (otherwise, programs like gdb will
125 * generate tons of error messages when trying to access bogus pointers).
128 _kvm_err(kvm_t
*kd
, const char *program
, const char *fmt
, ...)
133 if (program
!= NULL
) {
134 (void)fprintf(stderr
, "%s: ", program
);
135 (void)vfprintf(stderr
, fmt
, ap
);
136 (void)fputc('\n', stderr
);
138 (void)vsnprintf(kd
->errbuf
,
139 sizeof(kd
->errbuf
), fmt
, ap
);
145 _kvm_syserr(kvm_t
*kd
, const char *program
, const char *fmt
, ...)
151 if (program
!= NULL
) {
152 (void)fprintf(stderr
, "%s: ", program
);
153 (void)vfprintf(stderr
, fmt
, ap
);
154 (void)fprintf(stderr
, ": %s\n", strerror(errno
));
156 char *cp
= kd
->errbuf
;
158 (void)vsnprintf(cp
, sizeof(kd
->errbuf
), fmt
, ap
);
160 (void)snprintf(&cp
[n
], sizeof(kd
->errbuf
) - n
, ": %s",
167 _kvm_malloc(kvm_t
*kd
, size_t n
)
171 if ((p
= calloc(n
, sizeof(char))) == NULL
)
172 _kvm_err(kd
, kd
->program
, "can't allocate %zu bytes: %s",
178 _kvm_read_kernel_ehdr(kvm_t
*kd
)
182 if (elf_version(EV_CURRENT
) == EV_NONE
) {
183 _kvm_err(kd
, kd
->program
, "Unsupported libelf");
186 elf
= elf_begin(kd
->nlfd
, ELF_C_READ
, NULL
);
188 _kvm_err(kd
, kd
->program
, "%s", elf_errmsg(0));
191 if (elf_kind(elf
) != ELF_K_ELF
) {
192 _kvm_err(kd
, kd
->program
, "kernel is not an ELF file");
195 if (gelf_getehdr(elf
, &kd
->nlehdr
) == NULL
) {
196 _kvm_err(kd
, kd
->program
, "%s", elf_errmsg(0));
202 switch (kd
->nlehdr
.e_ident
[EI_DATA
]) {
207 _kvm_err(kd
, kd
->program
,
208 "unsupported ELF data encoding for kernel");
214 _kvm_probe_elf_kernel(kvm_t
*kd
, int class, int machine
)
217 return (kd
->nlehdr
.e_ident
[EI_CLASS
] == class &&
218 kd
->nlehdr
.e_type
== ET_EXEC
&&
219 kd
->nlehdr
.e_machine
== machine
);
223 _kvm_is_minidump(kvm_t
*kd
)
229 if (pread(kd
->pmfd
, &minihdr
, 8, 0) == 8 &&
230 memcmp(&minihdr
, "minidump", 8) == 0)
236 * The powerpc backend has a hack to strip a leading kerneldump
237 * header from the core before treating it as an ELF header.
239 * We can add that here if we can get a change to libelf to support
240 * an initial offset into the file. Alternatively we could patch
241 * savecore to extract cores from a regular file instead.
244 _kvm_read_core_phdrs(kvm_t
*kd
, size_t *phnump
, GElf_Phdr
**phdrp
)
251 elf
= elf_begin(kd
->pmfd
, ELF_C_READ
, NULL
);
253 _kvm_err(kd
, kd
->program
, "%s", elf_errmsg(0));
256 if (elf_kind(elf
) != ELF_K_ELF
) {
257 _kvm_err(kd
, kd
->program
, "invalid core");
260 if (gelf_getclass(elf
) != kd
->nlehdr
.e_ident
[EI_CLASS
]) {
261 _kvm_err(kd
, kd
->program
, "invalid core");
264 if (gelf_getehdr(elf
, &ehdr
) == NULL
) {
265 _kvm_err(kd
, kd
->program
, "%s", elf_errmsg(0));
268 if (ehdr
.e_type
!= ET_CORE
) {
269 _kvm_err(kd
, kd
->program
, "invalid core");
272 if (ehdr
.e_machine
!= kd
->nlehdr
.e_machine
) {
273 _kvm_err(kd
, kd
->program
, "invalid core");
277 if (elf_getphdrnum(elf
, &phnum
) == -1) {
278 _kvm_err(kd
, kd
->program
, "%s", elf_errmsg(0));
282 phdr
= calloc(phnum
, sizeof(*phdr
));
284 _kvm_err(kd
, kd
->program
, "failed to allocate phdrs");
288 for (i
= 0; i
< phnum
; i
++) {
289 if (gelf_getphdr(elf
, i
, &phdr
[i
]) == NULL
) {
290 _kvm_err(kd
, kd
->program
, "%s", elf_errmsg(0));
305 _kvm_hpt_insert(struct hpt
*hpt
, uint64_t pa
, off_t off
)
308 uint32_t fnv
= FNV1_32_INIT
;
310 fnv
= fnv_32_buf(&pa
, sizeof(pa
), fnv
);
311 fnv
&= (HPT_SIZE
- 1);
312 hpte
= malloc(sizeof(*hpte
));
315 hpte
->next
= hpt
->hpt_head
[fnv
];
316 hpt
->hpt_head
[fnv
] = hpte
;
320 _kvm_hpt_init(kvm_t
*kd
, struct hpt
*hpt
, void *base
, size_t len
, off_t off
,
321 int page_size
, int word_size
)
323 uint64_t bits
, idx
, pa
;
329 for (idx
= 0; idx
< len
/ word_size
; idx
++) {
330 if (word_size
== sizeof(uint64_t))
331 bits
= _kvm64toh(kd
, base64
[idx
]);
333 bits
= _kvm32toh(kd
, base32
[idx
]);
334 pa
= idx
* word_size
* NBBY
* page_size
;
335 for (; bits
!= 0; bits
>>= 1, pa
+= page_size
) {
338 _kvm_hpt_insert(hpt
, pa
, off
);
345 _kvm_hpt_find(struct hpt
*hpt
, uint64_t pa
)
348 uint32_t fnv
= FNV1_32_INIT
;
350 fnv
= fnv_32_buf(&pa
, sizeof(pa
), fnv
);
351 fnv
&= (HPT_SIZE
- 1);
352 for (hpte
= hpt
->hpt_head
[fnv
]; hpte
!= NULL
; hpte
= hpte
->next
) {
360 _kvm_hpt_free(struct hpt
*hpt
)
362 struct hpte
*hpte
, *next
;
365 for (i
= 0; i
< HPT_SIZE
; i
++) {
366 for (hpte
= hpt
->hpt_head
[i
]; hpte
!= NULL
; hpte
= next
) {
374 _kvm_open(kvm_t
*kd
, const char *uf
, const char *mf
, int flag
, char *errout
)
376 struct kvm_arch
**parch
;
389 else if (strlen(uf
) >= MAXPATHLEN
) {
390 _kvm_err(kd
, kd
->program
, "exec file name too long");
393 if (flag
& ~O_RDWR
) {
394 _kvm_err(kd
, kd
->program
, "bad flags arg");
400 if ((kd
->pmfd
= open(mf
, flag
| O_CLOEXEC
, 0)) < 0) {
401 _kvm_syserr(kd
, kd
->program
, "%s", mf
);
404 if (fstat(kd
->pmfd
, &st
) < 0) {
405 _kvm_syserr(kd
, kd
->program
, "%s", mf
);
408 if (S_ISREG(st
.st_mode
) && st
.st_size
<= 0) {
410 _kvm_syserr(kd
, kd
->program
, "empty file");
413 if (S_ISCHR(st
.st_mode
)) {
415 * If this is a character special device, then check that
416 * it's /dev/mem. If so, open kmem too. (Maybe we should
417 * make it work for either /dev/mem or /dev/kmem -- in either
418 * case you're working with a live kernel.)
420 if (strcmp(mf
, _PATH_DEVNULL
) == 0) {
421 kd
->vmfd
= open(_PATH_DEVNULL
, O_RDONLY
| O_CLOEXEC
);
423 } else if (strcmp(mf
, _PATH_MEM
) == 0) {
424 if ((kd
->vmfd
= open(_PATH_KMEM
, flag
| O_CLOEXEC
)) <
426 _kvm_syserr(kd
, kd
->program
, "%s", _PATH_KMEM
);
433 * This is a crash dump.
434 * Open the namelist fd and determine the architecture.
436 if ((kd
->nlfd
= open(uf
, O_RDONLY
| O_CLOEXEC
, 0)) < 0) {
437 _kvm_syserr(kd
, kd
->program
, "%s", uf
);
440 if (_kvm_read_kernel_ehdr(kd
) < 0)
442 if (strncmp(mf
, _PATH_FWMEM
, strlen(_PATH_FWMEM
)) == 0)
444 SET_FOREACH(parch
, kvm_arch
) {
445 if ((*parch
)->ka_probe(kd
)) {
450 if (kd
->arch
== NULL
) {
451 _kvm_err(kd
, kd
->program
, "unsupported architecture");
456 * Non-native kernels require a symbol resolver.
458 if (!kd
->arch
->ka_native(kd
) && kd
->resolve_symbol
== NULL
) {
459 _kvm_err(kd
, kd
->program
,
460 "non-native kernel requires a symbol resolver");
465 * Initialize the virtual address translation machinery.
467 if (kd
->arch
->ka_initvtop(kd
) < 0)
472 * Copy out the error if doing sane error semantics.
475 strlcpy(errout
, kd
->errbuf
, _POSIX2_LINE_MAX
);
481 kvm_openfiles(const char *uf
, const char *mf
, const char *sf __unused
, int flag
,
486 if ((kd
= calloc(1, sizeof(*kd
))) == NULL
) {
488 (void)strlcpy(errout
, strerror(errno
),
492 return (_kvm_open(kd
, uf
, mf
, flag
, errout
));
496 kvm_open(const char *uf
, const char *mf
, const char *sf __unused
, int flag
,
501 if ((kd
= calloc(1, sizeof(*kd
))) == NULL
) {
503 (void)fprintf(stderr
, "%s: %s\n",
504 errstr
, strerror(errno
));
507 kd
->program
= errstr
;
508 return (_kvm_open(kd
, uf
, mf
, flag
, NULL
));
512 kvm_open2(const char *uf
, const char *mf
, int flag
, char *errout
,
513 int (*resolver
)(const char *, kvaddr_t
*))
517 if ((kd
= calloc(1, sizeof(*kd
))) == NULL
) {
519 (void)strlcpy(errout
, strerror(errno
),
523 kd
->resolve_symbol
= resolver
;
524 return (_kvm_open(kd
, uf
, mf
, flag
, errout
));
532 if (kd
->vmst
!= NULL
)
533 kd
->arch
->ka_freevtop(kd
);
535 error
|= close(kd
->pmfd
);
537 error
|= close(kd
->vmfd
);
539 error
|= close(kd
->nlfd
);
540 if (kd
->procbase
!= 0)
541 free((void *)kd
->procbase
);
543 free((void *) kd
->argbuf
);
545 free((void *) kd
->argspc
);
547 free((void *)kd
->argv
);
554 * Walk the list of unresolved symbols, generate a new list and prefix the
555 * symbol names, try again, and merge back what we could resolve.
558 kvm_fdnlist_prefix(kvm_t
*kd
, struct kvm_nlist
*nl
, int missing
,
559 const char *prefix
, kvaddr_t (*validate_fn
)(kvm_t
*, kvaddr_t
))
561 struct kvm_nlist
*n
, *np
, *p
;
565 int slen
, unresolved
;
568 * Calculate the space we need to malloc for nlist and names.
569 * We are going to store the name twice for later lookups: once
570 * with the prefix and once the unmodified name delmited by \0.
574 for (p
= nl
; p
->n_name
&& p
->n_name
[0]; ++p
) {
575 if (p
->n_type
!= N_UNDF
)
577 len
+= sizeof(struct kvm_nlist
) + strlen(prefix
) +
578 2 * (strlen(p
->n_name
) + 1);
583 /* Add space for the terminating nlist entry. */
584 len
+= sizeof(struct kvm_nlist
);
587 /* Alloc one chunk for (nlist, [names]) and setup pointers. */
588 n
= np
= malloc(len
);
592 cp
= ce
= (char *)np
;
593 cp
+= unresolved
* sizeof(struct kvm_nlist
);
596 /* Generate shortened nlist with special prefix. */
598 for (p
= nl
; p
->n_name
&& p
->n_name
[0]; ++p
) {
599 if (p
->n_type
!= N_UNDF
)
602 /* Save the new\0orig. name so we can later match it again. */
603 slen
= snprintf(cp
, ce
- cp
, "%s%s%c%s", prefix
,
604 (prefix
[0] != '\0' && p
->n_name
[0] == '_') ?
605 (p
->n_name
+ 1) : p
->n_name
, '\0', p
->n_name
);
606 if (slen
< 0 || slen
>= ce
- cp
)
614 /* Do lookup on the reduced list. */
616 unresolved
= kvm_fdnlist(kd
, np
);
618 /* Check if we could resolve further symbols and update the list. */
619 if (unresolved
>= 0 && unresolved
< missing
) {
620 /* Find the first freshly resolved entry. */
621 for (; np
->n_name
&& np
->n_name
[0]; np
++)
622 if (np
->n_type
!= N_UNDF
)
625 * The lists are both in the same order,
626 * so we can walk them in parallel.
628 for (p
= nl
; np
->n_name
&& np
->n_name
[0] &&
629 p
->n_name
&& p
->n_name
[0]; ++p
) {
630 if (p
->n_type
!= N_UNDF
)
632 /* Skip expanded name and compare to orig. one. */
633 ccp
= np
->n_name
+ strlen(np
->n_name
) + 1;
634 if (strcmp(ccp
, p
->n_name
) != 0)
636 /* Update nlist with new, translated results. */
637 p
->n_type
= np
->n_type
;
639 p
->n_value
= (*validate_fn
)(kd
, np
->n_value
);
641 p
->n_value
= np
->n_value
;
643 /* Find next freshly resolved entry. */
644 for (np
++; np
->n_name
&& np
->n_name
[0]; np
++)
645 if (np
->n_type
!= N_UNDF
)
649 /* We could assert missing = unresolved here. */
656 _kvm_nlist(kvm_t
*kd
, struct kvm_nlist
*nl
, int initialize
)
660 struct kld_sym_lookup lookup
;
662 const char *prefix
= "";
663 char symname
[1024]; /* XXX-BZ symbol name length limit? */
664 int tried_vnet
, tried_dpcpu
;
667 * If we can't use the kld symbol lookup, revert to the
671 error
= kvm_fdnlist(kd
, nl
);
672 if (error
<= 0) /* Hard error or success. */
675 if (_kvm_vnet_initialized(kd
, initialize
))
676 error
= kvm_fdnlist_prefix(kd
, nl
, error
,
677 VNET_SYMPREFIX
, _kvm_vnet_validaddr
);
679 if (error
> 0 && _kvm_dpcpu_initialized(kd
, initialize
))
680 error
= kvm_fdnlist_prefix(kd
, nl
, error
,
681 DPCPU_SYMPREFIX
, _kvm_dpcpu_validaddr
);
687 * We can use the kld lookup syscall. Go through each nlist entry
688 * and look it up with a kldsym(2) syscall.
694 for (p
= nl
; p
->n_name
&& p
->n_name
[0]; ++p
) {
695 if (p
->n_type
!= N_UNDF
)
698 lookup
.version
= sizeof(lookup
);
702 error
= snprintf(symname
, sizeof(symname
), "%s%s", prefix
,
703 (prefix
[0] != '\0' && p
->n_name
[0] == '_') ?
704 (p
->n_name
+ 1) : p
->n_name
);
705 if (error
< 0 || error
>= (int)sizeof(symname
))
707 lookup
.symname
= symname
;
708 if (lookup
.symname
[0] == '_')
711 if (kldsym(0, KLDSYM_LOOKUP
, &lookup
) != -1) {
713 if (_kvm_vnet_initialized(kd
, initialize
) &&
714 strcmp(prefix
, VNET_SYMPREFIX
) == 0)
716 _kvm_vnet_validaddr(kd
, lookup
.symvalue
);
717 else if (_kvm_dpcpu_initialized(kd
, initialize
) &&
718 strcmp(prefix
, DPCPU_SYMPREFIX
) == 0)
720 _kvm_dpcpu_validaddr(kd
, lookup
.symvalue
);
722 p
->n_value
= lookup
.symvalue
;
729 * Check the number of entries that weren't found. If they exist,
730 * try again with a prefix for virtualized or DPCPU symbol names.
732 error
= ((p
- nl
) - nvalid
);
733 if (error
&& _kvm_vnet_initialized(kd
, initialize
) && !tried_vnet
) {
735 prefix
= VNET_SYMPREFIX
;
738 if (error
&& _kvm_dpcpu_initialized(kd
, initialize
) && !tried_dpcpu
) {
740 prefix
= DPCPU_SYMPREFIX
;
745 * Return the number of entries that weren't found. If they exist,
746 * also fill internal error buffer.
748 error
= ((p
- nl
) - nvalid
);
750 _kvm_syserr(kd
, kd
->program
, "kvm_nlist");
755 kvm_nlist2(kvm_t
*kd
, struct kvm_nlist
*nl
)
759 * If called via the public interface, permit initialization of
760 * further virtualized modules on demand.
762 return (_kvm_nlist(kd
, nl
, 1));
766 kvm_nlist(kvm_t
*kd
, struct nlist
*nl
)
768 struct kvm_nlist
*kl
;
772 * Avoid reporting truncated addresses by failing for non-native
775 if (!kvm_native(kd
)) {
776 _kvm_err(kd
, kd
->program
, "kvm_nlist of non-native vmcore");
780 for (count
= 0; nl
[count
].n_name
!= NULL
&& nl
[count
].n_name
[0] != '\0';
785 kl
= calloc(count
+ 1, sizeof(*kl
));
786 for (i
= 0; i
< count
; i
++)
787 kl
[i
].n_name
= nl
[i
].n_name
;
788 nfail
= kvm_nlist2(kd
, kl
);
789 for (i
= 0; i
< count
; i
++) {
790 nl
[i
].n_type
= kl
[i
].n_type
;
793 nl
[i
].n_value
= kl
[i
].n_value
;
799 kvm_read(kvm_t
*kd
, u_long kva
, void *buf
, size_t len
)
802 return (kvm_read2(kd
, kva
, buf
, len
));
806 kvm_read2(kvm_t
*kd
, kvaddr_t kva
, void *buf
, size_t len
)
815 * We're using /dev/kmem. Just read straight from the
816 * device and let the active kernel do the address translation.
819 if (lseek(kd
->vmfd
, (off_t
)kva
, 0) == -1 && errno
!= 0) {
820 _kvm_err(kd
, 0, "invalid address (0x%jx)",
824 cr
= read(kd
->vmfd
, buf
, len
);
826 _kvm_syserr(kd
, 0, "kvm_read");
828 } else if (cr
< (ssize_t
)len
)
829 _kvm_err(kd
, kd
->program
, "short read");
835 cc
= kd
->arch
->ka_kvatop(kd
, kva
, &pa
);
838 if (cc
> (ssize_t
)len
)
841 if (lseek(kd
->pmfd
, pa
, 0) == -1 && errno
!= 0) {
842 _kvm_syserr(kd
, 0, _PATH_MEM
);
845 cr
= read(kd
->pmfd
, cp
, cc
);
847 _kvm_syserr(kd
, kd
->program
, "kvm_read");
851 * If ka_kvatop returns a bogus value or our core file is
852 * truncated, we might wind up seeking beyond the end of the
853 * core file in which case the read will return 0 (EOF).
862 return (cp
- (char *)buf
);
866 kvm_write(kvm_t
*kd
, u_long kva
, const void *buf
, size_t len
)
872 * Just like kvm_read, only we write.
875 if (lseek(kd
->vmfd
, (off_t
)kva
, 0) == -1 && errno
!= 0) {
876 _kvm_err(kd
, 0, "invalid address (%lx)", kva
);
879 cc
= write(kd
->vmfd
, buf
, len
);
881 _kvm_syserr(kd
, 0, "kvm_write");
883 } else if ((size_t)cc
< len
)
884 _kvm_err(kd
, kd
->program
, "short write");
887 _kvm_err(kd
, kd
->program
,
888 "kvm_write not implemented for dead kernels");
895 kvm_native(kvm_t
*kd
)
900 return (kd
->arch
->ka_native(kd
));