2 * Copyright (c) 2000 David O'Brien
3 * Copyright (c) 1995-1996 Søren Schmidt
4 * Copyright (c) 1996 Peter Wemm
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include "opt_compat.h"
36 #include <sys/param.h>
38 #include <sys/fcntl.h>
39 #include <sys/imgact.h>
40 #include <sys/imgact_elf.h>
41 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/mutex.h>
47 #include <sys/namei.h>
48 #include <sys/pioctl.h>
50 #include <sys/procfs.h>
51 #include <sys/resourcevar.h>
52 #include <sys/sf_buf.h>
53 #include <sys/systm.h>
54 #include <sys/signalvar.h>
57 #include <sys/syscall.h>
58 #include <sys/sysctl.h>
59 #include <sys/sysent.h>
60 #include <sys/vnode.h>
63 #include <vm/vm_kern.h>
64 #include <vm/vm_param.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_extern.h>
70 #include <machine/elf.h>
71 #include <machine/md_var.h>
73 #if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
74 #include <machine/fpu.h>
75 #include <compat/ia32/ia32_reg.h>
78 #define OLD_EI_BRAND 8
80 static int __elfN(check_header
)(const Elf_Ehdr
*hdr
);
81 static Elf_Brandinfo
*__elfN(get_brandinfo
)(const Elf_Ehdr
*hdr
,
83 static int __elfN(load_file
)(struct proc
*p
, const char *file
, u_long
*addr
,
84 u_long
*entry
, size_t pagesize
);
85 static int __elfN(load_section
)(struct vmspace
*vmspace
, vm_object_t object
,
86 vm_offset_t offset
, caddr_t vmaddr
, size_t memsz
, size_t filsz
,
87 vm_prot_t prot
, size_t pagesize
);
88 static int __CONCAT(exec_
, __elfN(imgact
))(struct image_params
*imgp
);
90 SYSCTL_NODE(_kern
, OID_AUTO
, __CONCAT(elf
, __ELF_WORD_SIZE
), CTLFLAG_RW
, 0,
93 int __elfN(fallback_brand
) = -1;
94 SYSCTL_INT(__CONCAT(_kern_elf
, __ELF_WORD_SIZE
), OID_AUTO
,
95 fallback_brand
, CTLFLAG_RW
, &__elfN(fallback_brand
), 0,
96 __XSTRING(__CONCAT(ELF
, __ELF_WORD_SIZE
)) " brand of last resort");
97 TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE
) ".fallback_brand",
98 &__elfN(fallback_brand
));
100 static int elf_trace
= 0;
101 SYSCTL_INT(_debug
, OID_AUTO
, __elfN(trace
), CTLFLAG_RW
, &elf_trace
, 0, "");
103 static int elf_legacy_coredump
= 0;
104 SYSCTL_INT(_debug
, OID_AUTO
, __elfN(legacy_coredump
), CTLFLAG_RW
,
105 &elf_legacy_coredump
, 0, "");
107 static Elf_Brandinfo
*elf_brand_list
[MAX_BRANDS
];
109 #define trunc_page_ps(va, ps) ((va) & ~(ps - 1))
110 #define round_page_ps(va, ps) (((va) + (ps - 1)) & ~(ps - 1))
111 #define aligned(a, t) (trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
114 __elfN(insert_brand_entry
)(Elf_Brandinfo
*entry
)
118 for (i
= 0; i
< MAX_BRANDS
; i
++) {
119 if (elf_brand_list
[i
] == NULL
) {
120 elf_brand_list
[i
] = entry
;
130 __elfN(remove_brand_entry
)(Elf_Brandinfo
*entry
)
134 for (i
= 0; i
< MAX_BRANDS
; i
++) {
135 if (elf_brand_list
[i
] == entry
) {
136 elf_brand_list
[i
] = NULL
;
146 __elfN(brand_inuse
)(Elf_Brandinfo
*entry
)
151 sx_slock(&allproc_lock
);
152 FOREACH_PROC_IN_SYSTEM(p
) {
153 if (p
->p_sysent
== entry
->sysvec
) {
158 sx_sunlock(&allproc_lock
);
163 static Elf_Brandinfo
*
164 __elfN(get_brandinfo
)(const Elf_Ehdr
*hdr
, const char *interp
)
170 * We support three types of branding -- (1) the ELF EI_OSABI field
171 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
172 * branding w/in the ELF header, and (3) path of the `interp_path'
173 * field. We should also look for an ".note.ABI-tag" ELF section now
174 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones.
177 /* If the executable has a brand, search for it in the brand list. */
178 for (i
= 0; i
< MAX_BRANDS
; i
++) {
179 bi
= elf_brand_list
[i
];
180 if (bi
!= NULL
&& hdr
->e_machine
== bi
->machine
&&
181 (hdr
->e_ident
[EI_OSABI
] == bi
->brand
||
182 strncmp((const char *)&hdr
->e_ident
[OLD_EI_BRAND
],
183 bi
->compat_3_brand
, strlen(bi
->compat_3_brand
)) == 0))
187 /* Lacking a known brand, search for a recognized interpreter. */
188 if (interp
!= NULL
) {
189 for (i
= 0; i
< MAX_BRANDS
; i
++) {
190 bi
= elf_brand_list
[i
];
191 if (bi
!= NULL
&& hdr
->e_machine
== bi
->machine
&&
192 strcmp(interp
, bi
->interp_path
) == 0)
197 /* Lacking a recognized interpreter, try the default brand */
198 for (i
= 0; i
< MAX_BRANDS
; i
++) {
199 bi
= elf_brand_list
[i
];
200 if (bi
!= NULL
&& hdr
->e_machine
== bi
->machine
&&
201 __elfN(fallback_brand
) == bi
->brand
)
208 __elfN(check_header
)(const Elf_Ehdr
*hdr
)
214 hdr
->e_ident
[EI_CLASS
] != ELF_TARG_CLASS
||
215 hdr
->e_ident
[EI_DATA
] != ELF_TARG_DATA
||
216 hdr
->e_ident
[EI_VERSION
] != EV_CURRENT
||
217 hdr
->e_phentsize
!= sizeof(Elf_Phdr
) ||
218 hdr
->e_version
!= ELF_TARG_VER
)
222 * Make sure we have at least one brand for this machine.
225 for (i
= 0; i
< MAX_BRANDS
; i
++) {
226 bi
= elf_brand_list
[i
];
227 if (bi
!= NULL
&& bi
->machine
== hdr
->e_machine
)
237 __elfN(map_partial
)(vm_map_t map
, vm_object_t object
, vm_ooffset_t offset
,
238 vm_offset_t start
, vm_offset_t end
, vm_prot_t prot
)
245 * Create the page if it doesn't exist yet. Ignore errors.
248 vm_map_insert(map
, NULL
, 0, trunc_page(start
), round_page(end
),
249 VM_PROT_ALL
, VM_PROT_ALL
, 0);
253 * Find the page from the underlying object.
256 sf
= vm_imgact_map_page(object
, offset
);
258 return (KERN_FAILURE
);
259 off
= offset
- trunc_page(offset
);
260 error
= copyout((caddr_t
)sf_buf_kva(sf
) + off
, (caddr_t
)start
,
262 vm_imgact_unmap_page(sf
);
264 return (KERN_FAILURE
);
268 return (KERN_SUCCESS
);
272 __elfN(map_insert
)(vm_map_t map
, vm_object_t object
, vm_ooffset_t offset
,
273 vm_offset_t start
, vm_offset_t end
, vm_prot_t prot
, int cow
)
280 if (start
!= trunc_page(start
)) {
281 rv
= __elfN(map_partial
)(map
, object
, offset
, start
,
282 round_page(start
), prot
);
285 offset
+= round_page(start
) - start
;
286 start
= round_page(start
);
288 if (end
!= round_page(end
)) {
289 rv
= __elfN(map_partial
)(map
, object
, offset
+
290 trunc_page(end
) - start
, trunc_page(end
), end
, prot
);
293 end
= trunc_page(end
);
296 if (offset
& PAGE_MASK
) {
298 * The mapping is not page aligned. This means we have
299 * to copy the data. Sigh.
301 rv
= vm_map_find(map
, NULL
, 0, &start
, end
- start
,
302 FALSE
, prot
| VM_PROT_WRITE
, VM_PROT_ALL
, 0);
306 return (KERN_SUCCESS
);
307 for (; start
< end
; start
+= sz
) {
308 sf
= vm_imgact_map_page(object
, offset
);
310 return (KERN_FAILURE
);
311 off
= offset
- trunc_page(offset
);
313 if (sz
> PAGE_SIZE
- off
)
314 sz
= PAGE_SIZE
- off
;
315 error
= copyout((caddr_t
)sf_buf_kva(sf
) + off
,
317 vm_imgact_unmap_page(sf
);
319 return (KERN_FAILURE
);
325 vm_object_reference(object
);
327 rv
= vm_map_insert(map
, object
, offset
, start
, end
,
328 prot
, VM_PROT_ALL
, cow
);
330 if (rv
!= KERN_SUCCESS
)
331 vm_object_deallocate(object
);
335 return (KERN_SUCCESS
);
340 __elfN(load_section
)(struct vmspace
*vmspace
,
341 vm_object_t object
, vm_offset_t offset
,
342 caddr_t vmaddr
, size_t memsz
, size_t filsz
, vm_prot_t prot
,
347 vm_offset_t map_addr
;
350 vm_offset_t file_addr
;
353 * It's necessary to fail if the filsz + offset taken from the
354 * header is greater than the actual file pager object's size.
355 * If we were to allow this, then the vm_map_find() below would
356 * walk right off the end of the file object and into the ether.
358 * While I'm here, might as well check for something else that
359 * is invalid: filsz cannot be greater than memsz.
361 if ((off_t
)filsz
+ offset
> object
->un_pager
.vnp
.vnp_size
||
363 uprintf("elf_load_section: truncated ELF file\n");
367 map_addr
= trunc_page_ps((vm_offset_t
)vmaddr
, pagesize
);
368 file_addr
= trunc_page_ps(offset
, pagesize
);
371 * We have two choices. We can either clear the data in the last page
372 * of an oversized mapping, or we can start the anon mapping a page
373 * early and copy the initialized data into that first page. We
374 * choose the second..
377 map_len
= trunc_page_ps(offset
+ filsz
, pagesize
) - file_addr
;
379 map_len
= round_page_ps(offset
+ filsz
, pagesize
) - file_addr
;
382 /* cow flags: don't dump readonly sections in core */
383 cow
= MAP_COPY_ON_WRITE
| MAP_PREFAULT
|
384 (prot
& VM_PROT_WRITE
? 0 : MAP_DISABLE_COREDUMP
);
386 rv
= __elfN(map_insert
)(&vmspace
->vm_map
,
388 file_addr
, /* file offset */
389 map_addr
, /* virtual start */
390 map_addr
+ map_len
,/* virtual end */
393 if (rv
!= KERN_SUCCESS
)
396 /* we can stop now if we've covered it all */
397 if (memsz
== filsz
) {
404 * We have to get the remaining bit of the file into the first part
405 * of the oversized map segment. This is normally because the .data
406 * segment in the file is extended to provide bss. It's a neat idea
407 * to try and save a page, but it's a pain in the behind to implement.
409 copy_len
= (offset
+ filsz
) - trunc_page_ps(offset
+ filsz
, pagesize
);
410 map_addr
= trunc_page_ps((vm_offset_t
)vmaddr
+ filsz
, pagesize
);
411 map_len
= round_page_ps((vm_offset_t
)vmaddr
+ memsz
, pagesize
) -
414 /* This had damn well better be true! */
416 rv
= __elfN(map_insert
)(&vmspace
->vm_map
, NULL
, 0, map_addr
,
417 map_addr
+ map_len
, VM_PROT_ALL
, 0);
418 if (rv
!= KERN_SUCCESS
) {
426 sf
= vm_imgact_map_page(object
, offset
+ filsz
);
430 /* send the page fragment to user space */
431 off
= trunc_page_ps(offset
+ filsz
, pagesize
) -
432 trunc_page(offset
+ filsz
);
433 error
= copyout((caddr_t
)sf_buf_kva(sf
) + off
,
434 (caddr_t
)map_addr
, copy_len
);
435 vm_imgact_unmap_page(sf
);
442 * set it to the specified protection.
443 * XXX had better undo the damage from pasting over the cracks here!
445 vm_map_protect(&vmspace
->vm_map
, trunc_page(map_addr
),
446 round_page(map_addr
+ map_len
), prot
, FALSE
);
452 * Load the file "file" into memory. It may be either a shared object
455 * The "addr" reference parameter is in/out. On entry, it specifies
456 * the address where a shared object should be loaded. If the file is
457 * an executable, this value is ignored. On exit, "addr" specifies
458 * where the file was actually loaded.
460 * The "entry" reference parameter is out only. On exit, it specifies
461 * the entry point for the loaded file.
464 __elfN(load_file
)(struct proc
*p
, const char *file
, u_long
*addr
,
465 u_long
*entry
, size_t pagesize
)
470 struct image_params image_params
;
472 const Elf_Ehdr
*hdr
= NULL
;
473 const Elf_Phdr
*phdr
= NULL
;
474 struct nameidata
*nd
;
475 struct vmspace
*vmspace
= p
->p_vmspace
;
477 struct image_params
*imgp
;
480 u_long base_addr
= 0;
481 int vfslocked
, error
, i
, numsegs
;
483 tempdata
= malloc(sizeof(*tempdata
), M_TEMP
, M_WAITOK
);
485 attr
= &tempdata
->attr
;
486 imgp
= &tempdata
->image_params
;
489 * Initialize part of the common data
493 imgp
->firstpage
= NULL
;
494 imgp
->image_header
= NULL
;
496 imgp
->execlabel
= NULL
;
498 NDINIT(nd
, LOOKUP
, MPSAFE
|LOCKLEAF
|FOLLOW
, UIO_SYSSPACE
, file
,
501 if ((error
= namei(nd
)) != 0) {
505 vfslocked
= NDHASGIANT(nd
);
506 NDFREE(nd
, NDF_ONLY_PNBUF
);
507 imgp
->vp
= nd
->ni_vp
;
510 * Check permissions, modes, uid, etc on the file, and "open" it.
512 error
= exec_check_permissions(imgp
);
516 error
= exec_map_first_page(imgp
);
521 * Also make certain that the interpreter stays the same, so set
522 * its VV_TEXT flag, too.
524 nd
->ni_vp
->v_vflag
|= VV_TEXT
;
526 imgp
->object
= nd
->ni_vp
->v_object
;
528 hdr
= (const Elf_Ehdr
*)imgp
->image_header
;
529 if ((error
= __elfN(check_header
)(hdr
)) != 0)
531 if (hdr
->e_type
== ET_DYN
)
533 else if (hdr
->e_type
== ET_EXEC
)
540 /* Only support headers that fit within first page for now */
541 /* (multiplication of two Elf_Half fields will not overflow) */
542 if ((hdr
->e_phoff
> PAGE_SIZE
) ||
543 (hdr
->e_phentsize
* hdr
->e_phnum
) > PAGE_SIZE
- hdr
->e_phoff
) {
548 phdr
= (const Elf_Phdr
*)(imgp
->image_header
+ hdr
->e_phoff
);
549 if (!aligned(phdr
, Elf_Addr
)) {
554 for (i
= 0, numsegs
= 0; i
< hdr
->e_phnum
; i
++) {
555 if (phdr
[i
].p_type
== PT_LOAD
) { /* Loadable segment */
557 if (phdr
[i
].p_flags
& PF_X
)
558 prot
|= VM_PROT_EXECUTE
;
559 if (phdr
[i
].p_flags
& PF_W
)
560 prot
|= VM_PROT_WRITE
;
561 if (phdr
[i
].p_flags
& PF_R
)
562 prot
|= VM_PROT_READ
;
564 if ((error
= __elfN(load_section
)(vmspace
,
565 imgp
->object
, phdr
[i
].p_offset
,
566 (caddr_t
)(uintptr_t)phdr
[i
].p_vaddr
+ rbase
,
567 phdr
[i
].p_memsz
, phdr
[i
].p_filesz
, prot
,
571 * Establish the base address if this is the
575 base_addr
= trunc_page(phdr
[i
].p_vaddr
+
581 *entry
= (unsigned long)hdr
->e_entry
+ rbase
;
585 exec_unmap_first_page(imgp
);
590 VFS_UNLOCK_GIANT(vfslocked
);
591 free(tempdata
, M_TEMP
);
596 static const char FREEBSD_ABI_VENDOR
[] = "FreeBSD";
599 __CONCAT(exec_
, __elfN(imgact
))(struct image_params
*imgp
)
601 const Elf_Ehdr
*hdr
= (const Elf_Ehdr
*)imgp
->image_header
;
602 const Elf_Phdr
*phdr
, *pnote
= NULL
;
603 Elf_Auxargs
*elf_auxargs
;
604 struct vmspace
*vmspace
;
606 u_long text_size
= 0, data_size
= 0, total_size
= 0;
607 u_long text_addr
= 0, data_addr
= 0;
608 u_long seg_size
, seg_addr
;
609 u_long addr
, entry
= 0, proghdr
= 0;
611 const char *interp
= NULL
, *newinterp
= NULL
;
612 Elf_Brandinfo
*brand_info
;
613 const Elf_Note
*note
, *note_end
;
615 const char *note_name
;
616 struct sysentvec
*sv
;
619 * Do we have a valid ELF header ?
621 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
622 * if particular brand doesn't support it.
624 if (__elfN(check_header
)(hdr
) != 0 ||
625 (hdr
->e_type
!= ET_EXEC
&& hdr
->e_type
!= ET_DYN
))
629 * From here on down, we return an errno, not -1, as we've
630 * detected an ELF file.
633 if ((hdr
->e_phoff
> PAGE_SIZE
) ||
634 (hdr
->e_phoff
+ hdr
->e_phentsize
* hdr
->e_phnum
) > PAGE_SIZE
) {
635 /* Only support headers in first page for now */
638 phdr
= (const Elf_Phdr
*)(imgp
->image_header
+ hdr
->e_phoff
);
639 if (!aligned(phdr
, Elf_Addr
))
641 for (i
= 0; i
< hdr
->e_phnum
; i
++) {
642 if (phdr
[i
].p_type
== PT_INTERP
) {
643 /* Path to interpreter */
644 if (phdr
[i
].p_filesz
> MAXPATHLEN
||
645 phdr
[i
].p_offset
+ phdr
[i
].p_filesz
> PAGE_SIZE
)
647 interp
= imgp
->image_header
+ phdr
[i
].p_offset
;
652 brand_info
= __elfN(get_brandinfo
)(hdr
, interp
);
653 if (brand_info
== NULL
) {
654 uprintf("ELF binary type \"%u\" not known.\n",
655 hdr
->e_ident
[EI_OSABI
]);
658 if (hdr
->e_type
== ET_DYN
&&
659 (brand_info
->flags
& BI_CAN_EXEC_DYN
) == 0)
661 sv
= brand_info
->sysvec
;
662 if (interp
!= NULL
&& brand_info
->interp_newpath
!= NULL
)
663 newinterp
= brand_info
->interp_newpath
;
666 * Avoid a possible deadlock if the current address space is destroyed
667 * and that address space maps the locked vnode. In the common case,
668 * the locked vnode's v_usecount is decremented but remains greater
669 * than zero. Consequently, the vnode lock is not needed by vrele().
670 * However, in cases where the vnode lock is external, such as nullfs,
671 * v_usecount may become zero.
673 VOP_UNLOCK(imgp
->vp
, 0);
675 error
= exec_new_vmspace(imgp
, sv
);
676 imgp
->proc
->p_sysent
= sv
;
678 vn_lock(imgp
->vp
, LK_EXCLUSIVE
| LK_RETRY
);
682 vmspace
= imgp
->proc
->p_vmspace
;
684 for (i
= 0; i
< hdr
->e_phnum
; i
++) {
685 switch (phdr
[i
].p_type
) {
686 case PT_LOAD
: /* Loadable segment */
688 if (phdr
[i
].p_flags
& PF_X
)
689 prot
|= VM_PROT_EXECUTE
;
690 if (phdr
[i
].p_flags
& PF_W
)
691 prot
|= VM_PROT_WRITE
;
692 if (phdr
[i
].p_flags
& PF_R
)
693 prot
|= VM_PROT_READ
;
695 #if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
697 * Some x86 binaries assume read == executable,
698 * notably the M3 runtime and therefore cvsup
700 if (prot
& VM_PROT_READ
)
701 prot
|= VM_PROT_EXECUTE
;
704 if ((error
= __elfN(load_section
)(vmspace
,
705 imgp
->object
, phdr
[i
].p_offset
,
706 (caddr_t
)(uintptr_t)phdr
[i
].p_vaddr
,
707 phdr
[i
].p_memsz
, phdr
[i
].p_filesz
, prot
,
708 sv
->sv_pagesize
)) != 0)
712 * If this segment contains the program headers,
713 * remember their virtual address for the AT_PHDR
714 * aux entry. Static binaries don't usually include
717 if (phdr
[i
].p_offset
== 0 &&
718 hdr
->e_phoff
+ hdr
->e_phnum
* hdr
->e_phentsize
720 proghdr
= phdr
[i
].p_vaddr
+ hdr
->e_phoff
;
722 seg_addr
= trunc_page(phdr
[i
].p_vaddr
);
723 seg_size
= round_page(phdr
[i
].p_memsz
+
724 phdr
[i
].p_vaddr
- seg_addr
);
727 * Is this .text or .data? We can't use
728 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the
729 * alpha terribly and possibly does other bad
730 * things so we stick to the old way of figuring
731 * it out: If the segment contains the program
732 * entry point, it's a text segment, otherwise it
735 * Note that obreak() assumes that data_addr +
736 * data_size == end of data load area, and the ELF
737 * file format expects segments to be sorted by
738 * address. If multiple data segments exist, the
739 * last one will be used.
741 if (hdr
->e_entry
>= phdr
[i
].p_vaddr
&&
742 hdr
->e_entry
< (phdr
[i
].p_vaddr
+
744 text_size
= seg_size
;
745 text_addr
= seg_addr
;
746 entry
= (u_long
)hdr
->e_entry
;
748 data_size
= seg_size
;
749 data_addr
= seg_addr
;
751 total_size
+= seg_size
;
753 case PT_PHDR
: /* Program header table info */
754 proghdr
= phdr
[i
].p_vaddr
;
764 if (data_addr
== 0 && data_size
== 0) {
765 data_addr
= text_addr
;
766 data_size
= text_size
;
770 * Check limits. It should be safe to check the
771 * limits after loading the segments since we do
772 * not actually fault in all the segments pages.
774 PROC_LOCK(imgp
->proc
);
775 if (data_size
> lim_cur(imgp
->proc
, RLIMIT_DATA
) ||
776 text_size
> maxtsiz
||
777 total_size
> lim_cur(imgp
->proc
, RLIMIT_VMEM
)) {
778 PROC_UNLOCK(imgp
->proc
);
782 vmspace
->vm_tsize
= text_size
>> PAGE_SHIFT
;
783 vmspace
->vm_taddr
= (caddr_t
)(uintptr_t)text_addr
;
784 vmspace
->vm_dsize
= data_size
>> PAGE_SHIFT
;
785 vmspace
->vm_daddr
= (caddr_t
)(uintptr_t)data_addr
;
788 * We load the dynamic linker where a userland call
789 * to mmap(0, ...) would put it. The rationale behind this
790 * calculation is that it leaves room for the heap to grow to
791 * its maximum allowed size.
793 addr
= round_page((vm_offset_t
)imgp
->proc
->p_vmspace
->vm_daddr
+
794 lim_max(imgp
->proc
, RLIMIT_DATA
));
795 PROC_UNLOCK(imgp
->proc
);
797 imgp
->entry_addr
= entry
;
799 if (interp
!= NULL
) {
800 int have_interp
= FALSE
;
801 VOP_UNLOCK(imgp
->vp
, 0);
802 if (brand_info
->emul_path
!= NULL
&&
803 brand_info
->emul_path
[0] != '\0') {
804 path
= malloc(MAXPATHLEN
, M_TEMP
, M_WAITOK
);
805 snprintf(path
, MAXPATHLEN
, "%s%s",
806 brand_info
->emul_path
, interp
);
807 error
= __elfN(load_file
)(imgp
->proc
, path
, &addr
,
808 &imgp
->entry_addr
, sv
->sv_pagesize
);
813 if (!have_interp
&& newinterp
!= NULL
) {
814 error
= __elfN(load_file
)(imgp
->proc
, newinterp
, &addr
,
815 &imgp
->entry_addr
, sv
->sv_pagesize
);
819 error
= __elfN(load_file
)(imgp
->proc
, interp
, &addr
,
820 &imgp
->entry_addr
, sv
->sv_pagesize
);
822 vn_lock(imgp
->vp
, LK_EXCLUSIVE
| LK_RETRY
);
824 uprintf("ELF interpreter %s not found\n", interp
);
830 * Construct auxargs table (used by the fixup routine)
832 elf_auxargs
= malloc(sizeof(Elf_Auxargs
), M_TEMP
, M_WAITOK
);
833 elf_auxargs
->execfd
= -1;
834 elf_auxargs
->phdr
= proghdr
;
835 elf_auxargs
->phent
= hdr
->e_phentsize
;
836 elf_auxargs
->phnum
= hdr
->e_phnum
;
837 elf_auxargs
->pagesz
= PAGE_SIZE
;
838 elf_auxargs
->base
= addr
;
839 elf_auxargs
->flags
= 0;
840 elf_auxargs
->entry
= entry
;
841 elf_auxargs
->trace
= elf_trace
;
843 imgp
->auxargs
= elf_auxargs
;
844 imgp
->interpreted
= 0;
847 * Try to fetch the osreldate for FreeBSD binary from the ELF
848 * OSABI-note. Only the first page of the image is searched,
849 * the same as for headers.
851 if (pnote
!= NULL
&& pnote
->p_offset
< PAGE_SIZE
&&
852 pnote
->p_offset
+ pnote
->p_filesz
< PAGE_SIZE
) {
853 note
= (const Elf_Note
*)(imgp
->image_header
+ pnote
->p_offset
);
854 if (!aligned(note
, Elf32_Addr
)) {
855 free(imgp
->auxargs
, M_TEMP
);
856 imgp
->auxargs
= NULL
;
859 note_end
= (const Elf_Note
*)(imgp
->image_header
+ pnote
->p_offset
+
861 while (note
< note_end
) {
862 if (note
->n_namesz
== sizeof(FREEBSD_ABI_VENDOR
) &&
863 note
->n_descsz
== sizeof(int32_t) &&
864 note
->n_type
== 1 /* ABI_NOTETYPE */) {
865 note_name
= (const char *)(note
+ 1);
866 if (strncmp(FREEBSD_ABI_VENDOR
, note_name
,
867 sizeof(FREEBSD_ABI_VENDOR
)) == 0) {
868 imgp
->proc
->p_osrel
= *(const int32_t *)
870 round_page_ps(sizeof(FREEBSD_ABI_VENDOR
),
871 sizeof(Elf32_Addr
)));
875 note
= (const Elf_Note
*)((const char *)(note
+ 1) +
876 round_page_ps(note
->n_namesz
, sizeof(Elf32_Addr
)) +
877 round_page_ps(note
->n_descsz
, sizeof(Elf32_Addr
)));
884 #define suword __CONCAT(suword, __ELF_WORD_SIZE)
887 __elfN(freebsd_fixup
)(register_t
**stack_base
, struct image_params
*imgp
)
889 Elf_Auxargs
*args
= (Elf_Auxargs
*)imgp
->auxargs
;
893 base
= (Elf_Addr
*)*stack_base
;
894 pos
= base
+ (imgp
->args
->argc
+ imgp
->args
->envc
+ 2);
897 AUXARGS_ENTRY(pos
, AT_DEBUG
, 1);
899 if (args
->execfd
!= -1) {
900 AUXARGS_ENTRY(pos
, AT_EXECFD
, args
->execfd
);
902 AUXARGS_ENTRY(pos
, AT_PHDR
, args
->phdr
);
903 AUXARGS_ENTRY(pos
, AT_PHENT
, args
->phent
);
904 AUXARGS_ENTRY(pos
, AT_PHNUM
, args
->phnum
);
905 AUXARGS_ENTRY(pos
, AT_PAGESZ
, args
->pagesz
);
906 AUXARGS_ENTRY(pos
, AT_FLAGS
, args
->flags
);
907 AUXARGS_ENTRY(pos
, AT_ENTRY
, args
->entry
);
908 AUXARGS_ENTRY(pos
, AT_BASE
, args
->base
);
909 AUXARGS_ENTRY(pos
, AT_NULL
, 0);
911 free(imgp
->auxargs
, M_TEMP
);
912 imgp
->auxargs
= NULL
;
915 suword(base
, (long)imgp
->args
->argc
);
916 *stack_base
= (register_t
*)base
;
921 * Code for generating ELF core dumps.
924 typedef void (*segment_callback
)(vm_map_entry_t
, void *);
926 /* Closure for cb_put_phdr(). */
927 struct phdr_closure
{
928 Elf_Phdr
*phdr
; /* Program header to fill in */
929 Elf_Off offset
; /* Offset of segment in core file */
932 /* Closure for cb_size_segment(). */
933 struct sseg_closure
{
934 int count
; /* Count of writable segments. */
935 size_t size
; /* Total size of all writable segments. */
938 static void cb_put_phdr(vm_map_entry_t
, void *);
939 static void cb_size_segment(vm_map_entry_t
, void *);
940 static void each_writable_segment(struct thread
*, segment_callback
, void *);
941 static int __elfN(corehdr
)(struct thread
*, struct vnode
*, struct ucred
*,
942 int, void *, size_t);
943 static void __elfN(puthdr
)(struct thread
*, void *, size_t *, int);
944 static void __elfN(putnote
)(void *, size_t *, const char *, int,
945 const void *, size_t);
948 __elfN(coredump
)(td
, vp
, limit
)
953 struct ucred
*cred
= td
->td_ucred
;
955 struct sseg_closure seginfo
;
959 /* Size the program segments. */
962 each_writable_segment(td
, cb_size_segment
, &seginfo
);
965 * Calculate the size of the core file header area by making
966 * a dry run of generating it. Nothing is written, but the
967 * size is calculated.
970 __elfN(puthdr
)(td
, (void *)NULL
, &hdrsize
, seginfo
.count
);
972 if (hdrsize
+ seginfo
.size
>= limit
)
976 * Allocate memory for building the header, fill it up,
979 hdr
= malloc(hdrsize
, M_TEMP
, M_WAITOK
);
983 error
= __elfN(corehdr
)(td
, vp
, cred
, seginfo
.count
, hdr
, hdrsize
);
985 /* Write the contents of all of the writable segments. */
991 php
= (Elf_Phdr
*)((char *)hdr
+ sizeof(Elf_Ehdr
)) + 1;
993 for (i
= 0; i
< seginfo
.count
; i
++) {
994 error
= vn_rdwr_inchunks(UIO_WRITE
, vp
,
995 (caddr_t
)(uintptr_t)php
->p_vaddr
,
996 php
->p_filesz
, offset
, UIO_USERSPACE
,
997 IO_UNIT
| IO_DIRECT
, cred
, NOCRED
, NULL
,
1001 offset
+= php
->p_filesz
;
1011 * A callback for each_writable_segment() to write out the segment's
1012 * program header entry.
1015 cb_put_phdr(entry
, closure
)
1016 vm_map_entry_t entry
;
1019 struct phdr_closure
*phc
= (struct phdr_closure
*)closure
;
1020 Elf_Phdr
*phdr
= phc
->phdr
;
1022 phc
->offset
= round_page(phc
->offset
);
1024 phdr
->p_type
= PT_LOAD
;
1025 phdr
->p_offset
= phc
->offset
;
1026 phdr
->p_vaddr
= entry
->start
;
1028 phdr
->p_filesz
= phdr
->p_memsz
= entry
->end
- entry
->start
;
1029 phdr
->p_align
= PAGE_SIZE
;
1031 if (entry
->protection
& VM_PROT_READ
)
1032 phdr
->p_flags
|= PF_R
;
1033 if (entry
->protection
& VM_PROT_WRITE
)
1034 phdr
->p_flags
|= PF_W
;
1035 if (entry
->protection
& VM_PROT_EXECUTE
)
1036 phdr
->p_flags
|= PF_X
;
1038 phc
->offset
+= phdr
->p_filesz
;
1043 * A callback for each_writable_segment() to gather information about
1044 * the number of segments and their total size.
1047 cb_size_segment(entry
, closure
)
1048 vm_map_entry_t entry
;
1051 struct sseg_closure
*ssc
= (struct sseg_closure
*)closure
;
1054 ssc
->size
+= entry
->end
- entry
->start
;
1058 * For each writable segment in the process's memory map, call the given
1059 * function with a pointer to the map entry and some arbitrary
1060 * caller-supplied data.
1063 each_writable_segment(td
, func
, closure
)
1065 segment_callback func
;
1068 struct proc
*p
= td
->td_proc
;
1069 vm_map_t map
= &p
->p_vmspace
->vm_map
;
1070 vm_map_entry_t entry
;
1071 vm_object_t backing_object
, object
;
1072 boolean_t ignore_entry
;
1074 vm_map_lock_read(map
);
1075 for (entry
= map
->header
.next
; entry
!= &map
->header
;
1076 entry
= entry
->next
) {
1078 * Don't dump inaccessible mappings, deal with legacy
1081 * Note that read-only segments related to the elf binary
1082 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1083 * need to arbitrarily ignore such segments.
1085 if (elf_legacy_coredump
) {
1086 if ((entry
->protection
& VM_PROT_RW
) != VM_PROT_RW
)
1089 if ((entry
->protection
& VM_PROT_ALL
) == 0)
1094 * Dont include memory segment in the coredump if
1095 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1096 * madvise(2). Do not dump submaps (i.e. parts of the
1099 if (entry
->eflags
& (MAP_ENTRY_NOCOREDUMP
|MAP_ENTRY_IS_SUB_MAP
))
1102 if ((object
= entry
->object
.vm_object
) == NULL
)
1105 /* Ignore memory-mapped devices and such things. */
1106 VM_OBJECT_LOCK(object
);
1107 while ((backing_object
= object
->backing_object
) != NULL
) {
1108 VM_OBJECT_LOCK(backing_object
);
1109 VM_OBJECT_UNLOCK(object
);
1110 object
= backing_object
;
1112 ignore_entry
= object
->type
!= OBJT_DEFAULT
&&
1113 object
->type
!= OBJT_SWAP
&& object
->type
!= OBJT_VNODE
;
1114 VM_OBJECT_UNLOCK(object
);
1118 (*func
)(entry
, closure
);
1120 vm_map_unlock_read(map
);
1124 * Write the core file header to the file, including padding up to
1125 * the page boundary.
1128 __elfN(corehdr
)(td
, vp
, cred
, numsegs
, hdr
, hdrsize
)
1138 /* Fill in the header. */
1139 bzero(hdr
, hdrsize
);
1141 __elfN(puthdr
)(td
, hdr
, &off
, numsegs
);
1143 /* Write it to the core file. */
1144 return (vn_rdwr_inchunks(UIO_WRITE
, vp
, hdr
, hdrsize
, (off_t
)0,
1145 UIO_SYSSPACE
, IO_UNIT
| IO_DIRECT
, cred
, NOCRED
, NULL
,
1149 #if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1150 typedef struct prstatus32 elf_prstatus_t
;
1151 typedef struct prpsinfo32 elf_prpsinfo_t
;
1152 typedef struct fpreg32 elf_prfpregset_t
;
1153 typedef struct fpreg32 elf_fpregset_t
;
1154 typedef struct reg32 elf_gregset_t
;
1156 typedef prstatus_t elf_prstatus_t
;
1157 typedef prpsinfo_t elf_prpsinfo_t
;
1158 typedef prfpregset_t elf_prfpregset_t
;
1159 typedef prfpregset_t elf_fpregset_t
;
1160 typedef gregset_t elf_gregset_t
;
1164 __elfN(puthdr
)(struct thread
*td
, void *dst
, size_t *off
, int numsegs
)
1167 elf_prstatus_t status
;
1168 elf_prfpregset_t fpregset
;
1169 elf_prpsinfo_t psinfo
;
1171 elf_prstatus_t
*status
;
1172 elf_prfpregset_t
*fpregset
;
1173 elf_prpsinfo_t
*psinfo
;
1176 size_t ehoff
, noteoff
, notesz
, phoff
;
1181 *off
+= sizeof(Elf_Ehdr
);
1184 *off
+= (numsegs
+ 1) * sizeof(Elf_Phdr
);
1188 * Don't allocate space for the notes if we're just calculating
1189 * the size of the header. We also don't collect the data.
1192 tempdata
= malloc(sizeof(*tempdata
), M_TEMP
, M_ZERO
|M_WAITOK
);
1193 status
= &tempdata
->status
;
1194 fpregset
= &tempdata
->fpregset
;
1195 psinfo
= &tempdata
->psinfo
;
1204 psinfo
->pr_version
= PRPSINFO_VERSION
;
1205 psinfo
->pr_psinfosz
= sizeof(elf_prpsinfo_t
);
1206 strlcpy(psinfo
->pr_fname
, p
->p_comm
, sizeof(psinfo
->pr_fname
));
1208 * XXX - We don't fill in the command line arguments properly
1211 strlcpy(psinfo
->pr_psargs
, p
->p_comm
,
1212 sizeof(psinfo
->pr_psargs
));
1214 __elfN(putnote
)(dst
, off
, "FreeBSD", NT_PRPSINFO
, psinfo
,
1218 * To have the debugger select the right thread (LWP) as the initial
1219 * thread, we dump the state of the thread passed to us in td first.
1220 * This is the thread that causes the core dump and thus likely to
1221 * be the right thread one wants to have selected in the debugger.
1224 while (thr
!= NULL
) {
1226 status
->pr_version
= PRSTATUS_VERSION
;
1227 status
->pr_statussz
= sizeof(elf_prstatus_t
);
1228 status
->pr_gregsetsz
= sizeof(elf_gregset_t
);
1229 status
->pr_fpregsetsz
= sizeof(elf_fpregset_t
);
1230 status
->pr_osreldate
= osreldate
;
1231 status
->pr_cursig
= p
->p_sig
;
1232 status
->pr_pid
= thr
->td_tid
;
1233 #if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1234 fill_regs32(thr
, &status
->pr_reg
);
1235 fill_fpregs32(thr
, fpregset
);
1237 fill_regs(thr
, &status
->pr_reg
);
1238 fill_fpregs(thr
, fpregset
);
1241 __elfN(putnote
)(dst
, off
, "FreeBSD", NT_PRSTATUS
, status
,
1243 __elfN(putnote
)(dst
, off
, "FreeBSD", NT_FPREGSET
, fpregset
,
1246 * Allow for MD specific notes, as well as any MD
1247 * specific preparations for writing MI notes.
1249 __elfN(dump_thread
)(thr
, dst
, off
);
1251 thr
= (thr
== td
) ? TAILQ_FIRST(&p
->p_threads
) :
1252 TAILQ_NEXT(thr
, td_plist
);
1254 thr
= TAILQ_NEXT(thr
, td_plist
);
1257 notesz
= *off
- noteoff
;
1260 free(tempdata
, M_TEMP
);
1262 /* Align up to a page boundary for the program segments. */
1263 *off
= round_page(*off
);
1268 struct phdr_closure phc
;
1271 * Fill in the ELF header.
1273 ehdr
= (Elf_Ehdr
*)((char *)dst
+ ehoff
);
1274 ehdr
->e_ident
[EI_MAG0
] = ELFMAG0
;
1275 ehdr
->e_ident
[EI_MAG1
] = ELFMAG1
;
1276 ehdr
->e_ident
[EI_MAG2
] = ELFMAG2
;
1277 ehdr
->e_ident
[EI_MAG3
] = ELFMAG3
;
1278 ehdr
->e_ident
[EI_CLASS
] = ELF_CLASS
;
1279 ehdr
->e_ident
[EI_DATA
] = ELF_DATA
;
1280 ehdr
->e_ident
[EI_VERSION
] = EV_CURRENT
;
1281 ehdr
->e_ident
[EI_OSABI
] = ELFOSABI_FREEBSD
;
1282 ehdr
->e_ident
[EI_ABIVERSION
] = 0;
1283 ehdr
->e_ident
[EI_PAD
] = 0;
1284 ehdr
->e_type
= ET_CORE
;
1285 #if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1286 ehdr
->e_machine
= EM_386
;
1288 ehdr
->e_machine
= ELF_ARCH
;
1290 ehdr
->e_version
= EV_CURRENT
;
1292 ehdr
->e_phoff
= phoff
;
1294 ehdr
->e_ehsize
= sizeof(Elf_Ehdr
);
1295 ehdr
->e_phentsize
= sizeof(Elf_Phdr
);
1296 ehdr
->e_phnum
= numsegs
+ 1;
1297 ehdr
->e_shentsize
= sizeof(Elf_Shdr
);
1299 ehdr
->e_shstrndx
= SHN_UNDEF
;
1302 * Fill in the program header entries.
1304 phdr
= (Elf_Phdr
*)((char *)dst
+ phoff
);
1306 /* The note segement. */
1307 phdr
->p_type
= PT_NOTE
;
1308 phdr
->p_offset
= noteoff
;
1311 phdr
->p_filesz
= notesz
;
1317 /* All the writable segments from the program. */
1320 each_writable_segment(td
, cb_put_phdr
, &phc
);
1325 __elfN(putnote
)(void *dst
, size_t *off
, const char *name
, int type
,
1326 const void *desc
, size_t descsz
)
1330 note
.n_namesz
= strlen(name
) + 1;
1331 note
.n_descsz
= descsz
;
1334 bcopy(¬e
, (char *)dst
+ *off
, sizeof note
);
1335 *off
+= sizeof note
;
1337 bcopy(name
, (char *)dst
+ *off
, note
.n_namesz
);
1338 *off
+= roundup2(note
.n_namesz
, sizeof(Elf_Size
));
1340 bcopy(desc
, (char *)dst
+ *off
, note
.n_descsz
);
1341 *off
+= roundup2(note
.n_descsz
, sizeof(Elf_Size
));
1345 * Tell kern_execve.c about it, with a little help from the linker.
1347 static struct execsw
__elfN(execsw
) = {
1348 __CONCAT(exec_
, __elfN(imgact
)),
1349 __XSTRING(__CONCAT(ELF
, __ELF_WORD_SIZE
))
1351 EXEC_SET(__CONCAT(elf
, __ELF_WORD_SIZE
), __elfN(execsw
));