4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 * Copyright 2019, Joyent, Inc.
30 * Copyright 2022 Oxide Computer Company
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/thread.h>
36 #include <sys/sysmacros.h>
37 #include <sys/signal.h>
40 #include <sys/errno.h>
41 #include <sys/vnode.h>
45 #include <sys/pathname.h>
46 #include <sys/policy.h>
47 #include <sys/cmn_err.h>
48 #include <sys/systm.h>
50 #include <sys/vmsystm.h>
51 #include <sys/debug.h>
54 #include <sys/prsystm.h>
58 #include <vm/seg_vn.h>
59 #include <sys/modctl.h>
60 #include <sys/systeminfo.h>
61 #include <sys/vmparam.h>
62 #include <sys/machelf.h>
63 #include <sys/shm_impl.h>
64 #include <sys/archsystm.h>
65 #include <sys/fasttrap.h>
66 #include <sys/brand.h>
69 #include <sys/siginfo.h>
70 #include <sys/random.h>
72 #include <core_shstrtab.h>
75 #include <sys/comm_page_util.h>
77 #endif /* defined(__x86) */
81 extern volatile size_t aslr_max_brk_skew
;
83 #define ORIGIN_STR "ORIGIN"
84 #define ORIGIN_STR_SIZE 6
86 static int getelfhead(vnode_t
*, cred_t
*, Ehdr
*, uint_t
*, uint_t
*,
88 static int getelfphdr(vnode_t
*, cred_t
*, const Ehdr
*, uint_t
, caddr_t
*,
90 static int getelfshdr(vnode_t
*, cred_t
*, const Ehdr
*, uint_t
, uint_t
,
91 caddr_t
*, size_t *, caddr_t
*, size_t *);
92 static size_t elfsize(const Ehdr
*, uint_t
, const caddr_t
, uintptr_t *);
93 static int mapelfexec(vnode_t
*, Ehdr
*, uint_t
, caddr_t
, Phdr
**, Phdr
**,
94 Phdr
**, Phdr
**, Phdr
*, caddr_t
*, caddr_t
*, intptr_t *, uintptr_t *,
95 size_t, size_t *, size_t *);
99 /* Link against the non-compat instances when compiling the 32-bit version. */
100 extern size_t elf_datasz_max
;
101 extern size_t elf_zeropg_sz
;
102 extern void elf_ctx_resize_scratch(elf_core_ctx_t
*, size_t);
103 extern uint_t elf_nphdr_max
;
104 extern uint_t elf_nshdr_max
;
105 extern size_t elf_shstrtab_max
;
107 size_t elf_datasz_max
= 1 * 1024 * 1024;
108 size_t elf_zeropg_sz
= 4 * 1024;
109 uint_t elf_nphdr_max
= 1000;
110 uint_t elf_nshdr_max
= 10000;
111 size_t elf_shstrtab_max
= 100 * 1024;
115 dtrace_safe_phdr(Phdr
*phdrp
, struct uarg
*args
, uintptr_t base
)
117 ASSERT(phdrp
->p_type
== PT_SUNWDTRACE
);
120 * See the comment in fasttrap.h for information on how to safely
121 * update this program header.
123 if (phdrp
->p_memsz
< PT_SUNWDTRACE_SIZE
||
124 (phdrp
->p_flags
& (PF_R
| PF_W
| PF_X
)) != (PF_R
| PF_W
| PF_X
))
127 args
->thrptr
= phdrp
->p_vaddr
+ base
;
133 handle_secflag_dt(proc_t
*p
, uint_t dt
, uint_t val
)
139 flag
= PROC_SEC_ASLR
;
146 if (secflag_isset(p
->p_secflags
.psf_lower
, flag
))
148 if ((secpolicy_psecflags(CRED(), p
, p
) != 0) &&
149 secflag_isset(p
->p_secflags
.psf_inherit
, flag
))
152 secflag_clear(&p
->p_secflags
.psf_effective
, flag
);
154 if (!secflag_isset(p
->p_secflags
.psf_upper
, flag
))
157 if ((secpolicy_psecflags(CRED(), p
, p
) != 0) &&
158 !secflag_isset(p
->p_secflags
.psf_inherit
, flag
))
161 secflag_set(&p
->p_secflags
.psf_effective
, flag
);
167 #ifndef _ELF32_COMPAT
169 elf_ctx_resize_scratch(elf_core_ctx_t
*ctx
, size_t sz
)
171 size_t target
= MIN(sz
, elf_datasz_max
);
173 if (target
> ctx
->ecc_bufsz
) {
174 if (ctx
->ecc_buf
!= NULL
) {
175 kmem_free(ctx
->ecc_buf
, ctx
->ecc_bufsz
);
177 ctx
->ecc_buf
= kmem_alloc(target
, KM_SLEEP
);
178 ctx
->ecc_bufsz
= target
;
181 #endif /* _ELF32_COMPAT */
184 * Map in the executable pointed to by vp. Returns 0 on success.
187 mapexec_brand(vnode_t
*vp
, uarg_t
*args
, Ehdr
*ehdr
, Addr
*uphdr_vaddr
,
188 intptr_t *voffset
, caddr_t exec_file
, int *interp
, caddr_t
*bssbase
,
189 caddr_t
*brkbase
, size_t *brksize
, uintptr_t *lddatap
)
191 size_t len
, phdrsize
;
193 caddr_t phdrbase
= NULL
;
194 uint_t nshdrs
, shstrndx
, nphdrs
;
198 Phdr
*dynphdr
= NULL
;
199 Phdr
*dtrphdr
= NULL
;
200 uintptr_t lddata
, minaddr
;
206 if (error
= execpermissions(vp
, &vat
, args
)) {
207 uprintf("%s: Cannot execute %s\n", exec_file
, args
->pathname
);
211 if ((error
= getelfhead(vp
, CRED(), ehdr
, &nshdrs
, &shstrndx
,
213 (error
= getelfphdr(vp
, CRED(), ehdr
, nphdrs
, &phdrbase
,
215 uprintf("%s: Cannot read %s\n", exec_file
, args
->pathname
);
219 if ((len
= elfsize(ehdr
, nphdrs
, phdrbase
, &lddata
)) == 0) {
220 uprintf("%s: Nothing to load in %s", exec_file
, args
->pathname
);
221 kmem_free(phdrbase
, phdrsize
);
227 if (error
= mapelfexec(vp
, ehdr
, nphdrs
, phdrbase
, &uphdr
, &dynphdr
,
228 &junk
, &dtrphdr
, NULL
, bssbase
, brkbase
, voffset
, &minaddr
,
229 len
, &execsz
, brksize
)) {
230 uprintf("%s: Cannot map %s\n", exec_file
, args
->pathname
);
231 if (uphdr
!= NULL
&& uphdr
->p_flags
== 0)
232 kmem_free(uphdr
, sizeof (Phdr
));
233 kmem_free(phdrbase
, phdrsize
);
238 * Inform our caller if the executable needs an interpreter.
240 *interp
= (dynphdr
== NULL
) ? 0 : 1;
243 * If this is a statically linked executable, voffset should indicate
244 * the address of the executable itself (it normally holds the address
245 * of the interpreter).
247 if (ehdr
->e_type
== ET_EXEC
&& *interp
== 0)
251 *uphdr_vaddr
= uphdr
->p_vaddr
;
253 if (uphdr
->p_flags
== 0)
254 kmem_free(uphdr
, sizeof (Phdr
));
256 *uphdr_vaddr
= (Addr
)-1;
259 kmem_free(phdrbase
, phdrsize
);
264 elfexec(vnode_t
*vp
, execa_t
*uap
, uarg_t
*args
, intpdata_t
*idatap
,
265 int level
, size_t *execsz
, int setid
, caddr_t exec_file
, cred_t
*cred
,
268 caddr_t phdrbase
= NULL
;
278 Phdr
*intphdr
= NULL
;
279 Phdr
*dynamicphdr
= NULL
;
284 size_t postfixsize
= 0;
287 Phdr
*dataphdrp
= NULL
;
289 Phdr
*capphdr
= NULL
;
296 boolean_t dynuphdr
= B_FALSE
;
298 struct proc
*p
= ttoproc(curthread
);
299 struct user
*up
= PTOU(p
);
302 aux_entry_t elfargs
[__KERN_NAUXV_IMPL
];
303 char dl_name
[MAXPATHLEN
];
304 char pathbuf
[MAXPATHLEN
];
306 struct execenv exenv
;
307 } *bigwad
; /* kmem_alloc this behemoth so we don't blow stack */
309 uint_t nshdrs
, shstrndx
, nphdrs
;
316 ASSERT(p
->p_model
== DATAMODEL_ILP32
|| p
->p_model
== DATAMODEL_LP64
);
318 bigwad
= kmem_alloc(sizeof (struct bigwad
), KM_SLEEP
);
319 ehdrp
= &bigwad
->ehdr
;
320 dlnp
= bigwad
->dl_name
;
321 pathbufp
= bigwad
->pathbuf
;
324 * Obtain ELF and program header information.
326 if ((error
= getelfhead(vp
, CRED(), ehdrp
, &nshdrs
, &shstrndx
,
328 (error
= getelfphdr(vp
, CRED(), ehdrp
, nphdrs
, &phdrbase
,
333 * Prevent executing an ELF file that has no entry point.
335 if (ehdrp
->e_entry
== 0) {
336 uprintf("%s: Bad entry point\n", exec_file
);
341 * Put data model that we're exec-ing to into the args passed to
342 * exec_args(), so it will know what it is copying to on new stack.
343 * Now that we know whether we are exec-ing a 32-bit or 64-bit
344 * executable, we can set execsz with the appropriate NCARGS.
347 if (ehdrp
->e_ident
[EI_CLASS
] == ELFCLASS32
) {
348 args
->to_model
= DATAMODEL_ILP32
;
349 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS32
-1);
351 args
->to_model
= DATAMODEL_LP64
;
352 args
->stk_prot
&= ~PROT_EXEC
;
354 args
->dat_prot
&= ~PROT_EXEC
;
356 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS64
-1);
359 args
->to_model
= DATAMODEL_ILP32
;
360 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS
-1);
364 * We delay invoking the brand callback until we've figured out
365 * what kind of elf binary we're trying to run, 32-bit or 64-bit.
366 * We do this because now the brand library can just check
367 * args->to_model to see if the target is 32-bit or 64-bit without
368 * having do duplicate all the code above.
370 * The level checks associated with brand handling below are used to
371 * prevent a loop since the brand elfexec function typically comes back
372 * through this function. We must check <= here since the nested
373 * handling in the #! interpreter code will increment the level before
374 * calling gexec to run the final elfexec interpreter.
376 if ((level
<= INTP_MAXDEPTH
) &&
377 (brand_action
!= EBA_NATIVE
) && (PROC_IS_BRANDED(p
))) {
378 error
= BROP(p
)->b_elfexec(vp
, uap
, args
,
379 idatap
, level
+ 1, execsz
, setid
, exec_file
, cred
,
385 * Determine aux size now so that stack can be built
386 * in one shot (except actual copyout of aux image),
387 * determine any non-default stack protections,
388 * and still have this code be machine independent.
390 const uint_t hsize
= ehdrp
->e_phentsize
;
391 phdrp
= (Phdr
*)phdrbase
;
392 for (i
= nphdrs
; i
> 0; i
--) {
393 switch (phdrp
->p_type
) {
395 hasauxv
= hasintp
= 1;
401 args
->stk_prot
= PROT_USER
;
402 if (phdrp
->p_flags
& PF_R
)
403 args
->stk_prot
|= PROT_READ
;
404 if (phdrp
->p_flags
& PF_W
)
405 args
->stk_prot
|= PROT_WRITE
;
406 if (phdrp
->p_flags
& PF_X
)
407 args
->stk_prot
|= PROT_EXEC
;
419 phdrp
= (Phdr
*)((caddr_t
)phdrp
+ hsize
);
422 if (ehdrp
->e_type
!= ET_EXEC
) {
427 /* Copy BSS permissions to args->dat_prot */
428 if (dataphdrp
!= NULL
) {
429 args
->dat_prot
= PROT_USER
;
430 if (dataphdrp
->p_flags
& PF_R
)
431 args
->dat_prot
|= PROT_READ
;
432 if (dataphdrp
->p_flags
& PF_W
)
433 args
->dat_prot
|= PROT_WRITE
;
434 if (dataphdrp
->p_flags
& PF_X
)
435 args
->dat_prot
|= PROT_EXEC
;
439 * If a auxvector will be required - reserve the space for
440 * it now. This may be increased by exec_args if there are
441 * ISA-specific types (included in __KERN_NAUXV_IMPL).
445 * If a AUX vector is being built - the base AUX
455 * AT_SUN_PLATFORM (added in stk_copyout)
456 * AT_SUN_EXECNAME (added in stk_copyout)
461 if (hasintp
&& hasu
) {
463 * Has PT_INTERP & PT_PHDR - the auxvectors that
474 args
->auxsize
= (10 + 5) * sizeof (aux_entry_t
);
475 } else if (hasintp
) {
477 * Has PT_INTERP but no PT_PHDR
484 args
->auxsize
= (10 + 2) * sizeof (aux_entry_t
);
486 args
->auxsize
= 10 * sizeof (aux_entry_t
);
493 * If this binary is using an emulator, we need to add an
494 * AT_SUN_EMULATOR aux entry.
496 if (args
->emulator
!= NULL
)
497 args
->auxsize
+= sizeof (aux_entry_t
);
500 * On supported kernels (x86_64) make room in the auxv for the
501 * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
502 * which do not provide such functionality.
504 * Additionally cover the floating point information AT_SUN_FPSIZE and
508 args
->auxsize
+= 3 * sizeof (aux_entry_t
);
509 #endif /* defined(__amd64) */
511 if ((brand_action
!= EBA_NATIVE
) && (PROC_IS_BRANDED(p
))) {
514 * We will be adding 4 entries to the aux vectors. One for
515 * the the brandname and 3 for the brand specific aux vectors.
517 args
->auxsize
+= 4 * sizeof (aux_entry_t
);
520 /* If the binary has an explicit ASLR flag, it must be honoured */
521 if ((dynamicphdr
!= NULL
) && (dynamicphdr
->p_filesz
> 0)) {
522 const size_t dynfilesz
= dynamicphdr
->p_filesz
;
523 const size_t dynoffset
= dynamicphdr
->p_offset
;
526 if (dynoffset
> MAXOFFSET_T
||
527 dynfilesz
> MAXOFFSET_T
||
528 dynoffset
+ dynfilesz
> MAXOFFSET_T
) {
529 uprintf("%s: cannot read full .dynamic section\n",
535 #define DYN_STRIDE 100
536 for (i
= 0; i
< dynfilesz
; i
+= sizeof (*dyn
) * DYN_STRIDE
) {
537 const size_t remdyns
= (dynfilesz
- i
) / sizeof (*dyn
);
538 const size_t ndyns
= MIN(DYN_STRIDE
, remdyns
);
539 const size_t dynsize
= ndyns
* sizeof (*dyn
);
541 dyn
= kmem_alloc(dynsize
, KM_SLEEP
);
543 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)dyn
,
544 (ssize_t
)dynsize
, (offset_t
)(dynoffset
+ i
),
545 UIO_SYSSPACE
, 0, (rlim64_t
)0,
546 CRED(), NULL
)) != 0) {
547 uprintf("%s: cannot read .dynamic section\n",
552 for (dp
= dyn
; dp
< (dyn
+ ndyns
); dp
++) {
553 if (dp
->d_tag
== DT_SUNW_ASLR
) {
554 if ((error
= handle_secflag_dt(p
,
556 dp
->d_un
.d_val
)) != 0) {
557 uprintf("%s: error setting "
558 "security-flag from "
559 "DT_SUNW_ASLR: %d\n",
566 kmem_free(dyn
, dynsize
);
570 /* Hardware/Software capabilities */
571 if (capphdr
!= NULL
&&
572 (capsize
= capphdr
->p_filesz
) > 0 &&
573 capsize
<= 16 * sizeof (*cap
)) {
574 const uint_t ncaps
= capsize
/ sizeof (*cap
);
577 cap
= kmem_alloc(capsize
, KM_SLEEP
);
578 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)cap
,
579 (ssize_t
)capsize
, (offset_t
)capphdr
->p_offset
,
580 UIO_SYSSPACE
, 0, (rlim64_t
)0, CRED(), NULL
)) != 0) {
581 uprintf("%s: Cannot read capabilities section\n",
585 for (cp
= cap
; cp
< cap
+ ncaps
; cp
++) {
586 if (cp
->c_tag
== CA_SUNW_SF_1
&&
587 (cp
->c_un
.c_val
& SF1_SUNW_ADDR32
)) {
588 if (args
->to_model
== DATAMODEL_LP64
)
595 aux
= bigwad
->elfargs
;
597 * Move args to the user's stack.
598 * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
600 if ((error
= exec_args(uap
, args
, idatap
, (void **)&aux
)) != 0) {
607 /* we're single threaded after this point */
610 * If this is an ET_DYN executable (shared object),
611 * determine its memory size so that mapelfexec() can load it.
613 if (ehdrp
->e_type
== ET_DYN
)
614 len
= elfsize(ehdrp
, nphdrs
, phdrbase
, NULL
);
620 error
= mapelfexec(vp
, ehdrp
, nphdrs
, phdrbase
, &uphdr
, &intphdr
,
621 &stphdr
, &dtrphdr
, dataphdrp
, &bssbase
, &brkbase
, &voffset
, NULL
,
622 len
, execsz
, &brksize
);
625 * Our uphdr has been dynamically allocated if (and only if) its
626 * program header flags are clear. To avoid leaks, this must be
627 * checked regardless of whether mapelfexec() emitted an error.
629 dynuphdr
= (uphdr
!= NULL
&& uphdr
->p_flags
== 0);
634 if (uphdr
!= NULL
&& intphdr
== NULL
)
637 if (dtrphdr
!= NULL
&& dtrace_safe_phdr(dtrphdr
, args
, voffset
) != 0) {
638 uprintf("%s: Bad DTrace phdr in %s\n", exec_file
, exec_file
);
642 if (intphdr
!= NULL
) {
648 dlnsize
= intphdr
->p_filesz
;
651 * Make sure none of the component pieces of dlnsize result in
652 * an oversized or zeroed result.
654 if (intphdr
->p_filesz
> MAXPATHLEN
|| dlnsize
> MAXPATHLEN
||
655 dlnsize
== 0 || dlnsize
< intphdr
->p_filesz
) {
660 * Read in "interpreter" pathname.
662 if ((error
= vn_rdwr(UIO_READ
, vp
, dlnp
,
663 (ssize_t
)intphdr
->p_filesz
, (offset_t
)intphdr
->p_offset
,
664 UIO_SYSSPACE
, 0, (rlim64_t
)0, CRED(), &resid
)) != 0) {
665 uprintf("%s: Cannot obtain interpreter pathname\n",
670 if (resid
!= 0 || dlnp
[dlnsize
- 1] != '\0')
674 * Search for '$ORIGIN' token in interpreter path.
675 * If found, expand it.
677 for (p
= dlnp
; p
= strchr(p
, '$'); ) {
681 if (strncmp(++p
, ORIGIN_STR
, ORIGIN_STR_SIZE
))
685 * We don't support $ORIGIN on setid programs to close
686 * a potential attack vector.
688 if ((setid
& EXECSETID_SETID
) != 0) {
696 bcopy(dlnp
, pathbufp
, len
);
699 if (_ptr
= strrchr(args
->pathname
, '/')) {
700 len
= _ptr
- args
->pathname
;
701 if ((curlen
+ len
) > MAXPATHLEN
)
704 bcopy(args
->pathname
, &pathbufp
[curlen
], len
);
708 * executable is a basename found in the
709 * current directory. So - just substitue
712 pathbufp
[curlen
] = '.';
715 p
+= ORIGIN_STR_SIZE
;
718 if ((curlen
+ len
) > MAXPATHLEN
)
720 bcopy(p
, &pathbufp
[curlen
], len
);
722 pathbufp
[curlen
++] = '\0';
723 bcopy(pathbufp
, dlnp
, curlen
);
727 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
728 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
729 * Just in case /usr is not mounted, change it now.
731 if (strcmp(dlnp
, USR_LIB_RTLD
) == 0)
733 error
= lookupname(dlnp
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &nvp
);
734 if (error
&& dlnp
!= bigwad
->dl_name
) {
735 /* new kernel, old user-level */
736 error
= lookupname(dlnp
-= 4, UIO_SYSSPACE
, FOLLOW
,
740 uprintf("%s: Cannot find %s\n", exec_file
, dlnp
);
745 * Setup the "aux" vector.
748 if (ehdrp
->e_type
== ET_DYN
) {
749 /* don't use the first page */
750 bigwad
->exenv
.ex_brkbase
= (caddr_t
)PAGESIZE
;
751 bigwad
->exenv
.ex_bssbase
= (caddr_t
)PAGESIZE
;
753 bigwad
->exenv
.ex_bssbase
= bssbase
;
754 bigwad
->exenv
.ex_brkbase
= brkbase
;
756 bigwad
->exenv
.ex_brksize
= brksize
;
757 bigwad
->exenv
.ex_magic
= elfmagic
;
758 bigwad
->exenv
.ex_vp
= vp
;
759 setexecenv(&bigwad
->exenv
);
761 ADDAUX(aux
, AT_PHDR
, uphdr
->p_vaddr
+ voffset
)
762 ADDAUX(aux
, AT_PHENT
, ehdrp
->e_phentsize
)
763 ADDAUX(aux
, AT_PHNUM
, nphdrs
)
764 ADDAUX(aux
, AT_ENTRY
, ehdrp
->e_entry
+ voffset
)
766 if ((error
= execopen(&vp
, &fd
)) != 0) {
771 ADDAUX(aux
, AT_EXECFD
, fd
)
774 if ((error
= execpermissions(nvp
, &bigwad
->vattr
, args
)) != 0) {
776 uprintf("%s: Cannot execute %s\n", exec_file
, dlnp
);
781 * Now obtain the ELF header along with the entire program
782 * header contained in "nvp".
784 kmem_free(phdrbase
, phdrsize
);
786 if ((error
= getelfhead(nvp
, CRED(), ehdrp
, &nshdrs
,
787 &shstrndx
, &nphdrs
)) != 0 ||
788 (error
= getelfphdr(nvp
, CRED(), ehdrp
, nphdrs
, &phdrbase
,
791 uprintf("%s: Cannot read %s\n", exec_file
, dlnp
);
796 * Determine memory size of the "interpreter's" loadable
797 * sections. This size is then used to obtain the virtual
798 * address of a hole, in the user's address space, large
799 * enough to map the "interpreter".
801 if ((len
= elfsize(ehdrp
, nphdrs
, phdrbase
, &lddata
)) == 0) {
803 uprintf("%s: Nothing to load in %s\n", exec_file
, dlnp
);
809 error
= mapelfexec(nvp
, ehdrp
, nphdrs
, phdrbase
, NULL
, &junk
,
810 &junk
, &dtrphdr
, NULL
, NULL
, NULL
, &voffset
, NULL
, len
,
813 if (error
|| junk
!= NULL
) {
815 uprintf("%s: Cannot map %s\n", exec_file
, dlnp
);
820 * We use the DTrace program header to initialize the
821 * architecture-specific user per-LWP location. The dtrace
822 * fasttrap provider requires ready access to per-LWP scratch
823 * space. We assume that there is only one such program header
824 * in the interpreter.
826 if (dtrphdr
!= NULL
&&
827 dtrace_safe_phdr(dtrphdr
, args
, voffset
) != 0) {
829 uprintf("%s: Bad DTrace phdr in %s\n", exec_file
, dlnp
);
834 ADDAUX(aux
, AT_SUN_LDDATA
, voffset
+ lddata
)
838 int auxf
= AF_SUN_HWCAPVERIFY
;
842 #endif /* defined(__amd64) */
845 * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
848 ADDAUX(aux
, AT_BASE
, voffset
)
849 ADDAUX(aux
, AT_FLAGS
, at_flags
)
850 ADDAUX(aux
, AT_PAGESZ
, PAGESIZE
)
852 * Linker flags. (security)
853 * p_flag not yet set at this time.
854 * We rely on gexec() to provide us with the information.
855 * If the application is set-uid but this is not reflected
856 * in a mismatch between real/effective uids/gids, then
857 * don't treat this as a set-uid exec. So we care about
858 * the EXECSETID_UGIDS flag but not the ...SETID flag.
860 if ((setid
&= ~EXECSETID_SETID
) != 0)
861 auxf
|= AF_SUN_SETUGID
;
864 * If we're running a native process from within a branded
865 * zone under pfexec then we clear the AF_SUN_SETUGID flag so
866 * that the native ld.so.1 is able to link with the native
867 * libraries instead of using the brand libraries that are
868 * installed in the zone. We only do this for processes
869 * which we trust because we see they are already running
870 * under pfexec (where uid != euid). This prevents a
871 * malicious user within the zone from crafting a wrapper to
872 * run native suid commands with unsecure libraries interposed.
874 if ((brand_action
== EBA_NATIVE
) && (PROC_IS_BRANDED(p
) &&
875 (setid
&= ~EXECSETID_SETID
) != 0))
876 auxf
&= ~AF_SUN_SETUGID
;
879 * Record the user addr of the auxflags aux vector entry
880 * since brands may optionally want to manipulate this field.
882 args
->auxp_auxflags
=
883 (char *)((char *)args
->stackend
+
884 ((char *)&aux
->a_type
-
885 (char *)bigwad
->elfargs
));
886 ADDAUX(aux
, AT_SUN_AUXFLAGS
, auxf
);
889 * Hardware capability flag word (performance hints)
890 * Used for choosing faster library routines.
891 * (Potentially different between 32-bit and 64-bit ABIs)
893 if (args
->to_model
== DATAMODEL_NATIVE
) {
894 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap
)
895 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap_2
)
896 ADDAUX(aux
, AT_SUN_HWCAP3
, auxv_hwcap_3
)
898 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap32
)
899 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap32_2
)
900 ADDAUX(aux
, AT_SUN_HWCAP3
, auxv_hwcap32_3
)
905 * Reserve space for the brand-private aux vectors,
906 * and record the user addr of that space.
909 (char *)((char *)args
->stackend
+
910 ((char *)&aux
->a_type
-
911 (char *)bigwad
->elfargs
));
912 ADDAUX(aux
, AT_SUN_BRAND_AUX1
, 0)
913 ADDAUX(aux
, AT_SUN_BRAND_AUX2
, 0)
914 ADDAUX(aux
, AT_SUN_BRAND_AUX3
, 0)
918 * Add the comm page auxv entry, mapping it in if needed. Also
919 * take care of the FPU entries.
922 if (args
->commpage
!= (uintptr_t)NULL
||
923 (args
->commpage
= (uintptr_t)comm_page_mapin()) !=
925 ADDAUX(aux
, AT_SUN_COMMPAGE
, args
->commpage
)
928 * If the comm page cannot be mapped, pad out the auxv
929 * to satisfy later size checks.
931 ADDAUX(aux
, AT_NULL
, 0)
934 fptype
= AT_386_FPINFO_NONE
;
935 fpu_auxv_info(&fptype
, &fpsize
);
936 if (fptype
!= AT_386_FPINFO_NONE
) {
937 ADDAUX(aux
, AT_SUN_FPTYPE
, fptype
)
938 ADDAUX(aux
, AT_SUN_FPSIZE
, fpsize
)
940 ADDAUX(aux
, AT_NULL
, 0)
941 ADDAUX(aux
, AT_NULL
, 0)
943 #endif /* defined(__amd64) */
945 ADDAUX(aux
, AT_NULL
, 0)
946 postfixsize
= (uintptr_t)aux
- (uintptr_t)bigwad
->elfargs
;
949 * We make assumptions above when we determine how many aux
950 * vector entries we will be adding. However, if we have an
951 * invalid elf file, it is possible that mapelfexec might
952 * behave differently (but not return an error), in which case
953 * the number of aux entries we actually add will be different.
954 * We detect that now and error out.
956 if (postfixsize
!= args
->auxsize
) {
957 DTRACE_PROBE2(elfexec_badaux
, size_t, postfixsize
,
958 size_t, args
->auxsize
);
961 ASSERT(postfixsize
<= __KERN_NAUXV_IMPL
* sizeof (aux_entry_t
));
965 * For the 64-bit kernel, the limit is big enough that rounding it up
966 * to a page can overflow the 64-bit limit, so we check for btopr()
967 * overflowing here by comparing it with the unrounded limit in pages.
968 * If it hasn't overflowed, compare the exec size with the rounded up
969 * limit in pages. Otherwise, just compare with the unrounded limit.
971 limit
= btop(p
->p_vmem_ctl
);
972 roundlimit
= btopr(p
->p_vmem_ctl
);
973 if ((roundlimit
> limit
&& *execsz
> roundlimit
) ||
974 (roundlimit
< limit
&& *execsz
> limit
)) {
975 mutex_enter(&p
->p_lock
);
976 (void) rctl_action(rctlproc_legacy
[RLIMIT_VMEM
], p
->p_rctls
, p
,
978 mutex_exit(&p
->p_lock
);
983 bzero(up
->u_auxv
, sizeof (up
->u_auxv
));
984 up
->u_commpagep
= args
->commpage
;
989 * Copy the aux vector to the user stack.
991 error
= execpoststack(args
, bigwad
->elfargs
, postfixsize
);
996 * Copy auxv to the process's user structure for use by /proc.
997 * If this is a branded process, the brand's exec routine will
998 * copy it's private entries to the user structure later. It
999 * relies on the fact that the blank entries are at the end.
1001 num_auxv
= postfixsize
/ sizeof (aux_entry_t
);
1002 ASSERT(num_auxv
<= sizeof (up
->u_auxv
) / sizeof (auxv_t
));
1003 aux
= bigwad
->elfargs
;
1004 for (i
= 0; i
< num_auxv
; i
++) {
1005 up
->u_auxv
[i
].a_type
= aux
[i
].a_type
;
1006 up
->u_auxv
[i
].a_un
.a_val
= (aux_val_t
)aux
[i
].a_un
.a_val
;
1011 * Pass back the starting address so we can set the program counter.
1013 args
->entry
= (uintptr_t)(ehdrp
->e_entry
+ voffset
);
1016 if (ehdrp
->e_type
== ET_DYN
) {
1018 * If we are executing a shared library which doesn't
1019 * have a interpreter (probably ld.so.1) then
1020 * we don't set the brkbase now. Instead we
1021 * delay it's setting until the first call
1022 * via grow.c::brk(). This permits ld.so.1 to
1023 * initialize brkbase to the tail of the executable it
1024 * loads (which is where it needs to be).
1026 bigwad
->exenv
.ex_brkbase
= (caddr_t
)0;
1027 bigwad
->exenv
.ex_bssbase
= (caddr_t
)0;
1028 bigwad
->exenv
.ex_brksize
= 0;
1030 bigwad
->exenv
.ex_brkbase
= brkbase
;
1031 bigwad
->exenv
.ex_bssbase
= bssbase
;
1032 bigwad
->exenv
.ex_brksize
= brksize
;
1034 bigwad
->exenv
.ex_magic
= elfmagic
;
1035 bigwad
->exenv
.ex_vp
= vp
;
1036 setexecenv(&bigwad
->exenv
);
1043 if (fd
!= -1) /* did we open the a.out yet */
1044 (void) execclose(fd
);
1046 psignal(p
, SIGKILL
);
1052 kmem_free(uphdr
, sizeof (Phdr
));
1053 if (phdrbase
!= NULL
)
1054 kmem_free(phdrbase
, phdrsize
);
1056 kmem_free(cap
, capsize
);
1057 kmem_free(bigwad
, sizeof (struct bigwad
));
1062 * Compute the memory size requirement for the ELF file.
1065 elfsize(const Ehdr
*ehdrp
, uint_t nphdrs
, const caddr_t phdrbase
,
1068 const Phdr
*phdrp
= (Phdr
*)phdrbase
;
1069 const uint_t hsize
= ehdrp
->e_phentsize
;
1070 boolean_t dfirst
= B_TRUE
;
1071 uintptr_t loaddr
= UINTPTR_MAX
;
1072 uintptr_t hiaddr
= 0;
1075 for (i
= nphdrs
; i
> 0; i
--) {
1076 if (phdrp
->p_type
== PT_LOAD
) {
1077 const uintptr_t lo
= phdrp
->p_vaddr
;
1078 const uintptr_t hi
= lo
+ phdrp
->p_memsz
;
1080 loaddr
= MIN(lo
, loaddr
);
1081 hiaddr
= MAX(hi
, hiaddr
);
1084 * save the address of the first data segment
1085 * of a object - used for the AT_SUNW_LDDATA
1088 if ((lddata
!= NULL
) && dfirst
&&
1089 (phdrp
->p_flags
& PF_W
)) {
1094 phdrp
= (Phdr
*)((caddr_t
)phdrp
+ hsize
);
1097 if (hiaddr
<= loaddr
) {
1098 /* No non-zero PT_LOAD segment found */
1102 return (roundup(hiaddr
- (loaddr
& PAGEMASK
), PAGESIZE
));
1106 * Read in the ELF header and program header table.
1108 * ENOEXEC File format is not recognized
1109 * EINVAL Format recognized but execution not supported
1112 getelfhead(vnode_t
*vp
, cred_t
*credp
, Ehdr
*ehdr
, uint_t
*nshdrs
,
1113 uint_t
*shstrndx
, uint_t
*nphdrs
)
1119 * We got here by the first two bytes in ident,
1120 * now read the entire ELF header.
1122 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)ehdr
,
1123 sizeof (Ehdr
), (offset_t
)0, UIO_SYSSPACE
, 0,
1124 (rlim64_t
)0, credp
, &resid
)) != 0)
1128 * Since a separate version is compiled for handling 32-bit and
1129 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1130 * doesn't need to be able to deal with 32-bit ELF files.
1133 ehdr
->e_ident
[EI_MAG2
] != ELFMAG2
||
1134 ehdr
->e_ident
[EI_MAG3
] != ELFMAG3
)
1137 if ((ehdr
->e_type
!= ET_EXEC
&& ehdr
->e_type
!= ET_DYN
) ||
1138 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1139 ehdr
->e_ident
[EI_CLASS
] != ELFCLASS32
||
1141 ehdr
->e_ident
[EI_CLASS
] != ELFCLASS64
||
1143 !elfheadcheck(ehdr
->e_ident
[EI_DATA
], ehdr
->e_machine
,
1147 *nshdrs
= ehdr
->e_shnum
;
1148 *shstrndx
= ehdr
->e_shstrndx
;
1149 *nphdrs
= ehdr
->e_phnum
;
1152 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1153 * to read in the section header at index zero to access the true
1154 * values for those fields.
1156 if ((*nshdrs
== 0 && ehdr
->e_shoff
!= 0) ||
1157 *shstrndx
== SHN_XINDEX
|| *nphdrs
== PN_XNUM
) {
1160 if (ehdr
->e_shoff
== 0)
1163 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)&shdr
,
1164 sizeof (shdr
), (offset_t
)ehdr
->e_shoff
, UIO_SYSSPACE
, 0,
1165 (rlim64_t
)0, credp
, NULL
)) != 0) {
1170 *nshdrs
= shdr
.sh_size
;
1171 if (*shstrndx
== SHN_XINDEX
)
1172 *shstrndx
= shdr
.sh_link
;
1173 if (*nphdrs
== PN_XNUM
&& shdr
.sh_info
!= 0)
1174 *nphdrs
= shdr
.sh_info
;
1181 * We use members through p_flags on 32-bit files and p_memsz on 64-bit files,
1182 * so e_phentsize must be at least large enough to include those members.
1184 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1185 #define MINPHENTSZ (offsetof(Phdr, p_flags) + \
1186 sizeof (((Phdr *)NULL)->p_flags))
1188 #define MINPHENTSZ (offsetof(Phdr, p_memsz) + \
1189 sizeof (((Phdr *)NULL)->p_memsz))
1193 getelfphdr(vnode_t
*vp
, cred_t
*credp
, const Ehdr
*ehdr
, uint_t nphdrs
,
1194 caddr_t
*phbasep
, size_t *phsizep
)
1199 * Ensure that e_phentsize is large enough for required fields to be
1200 * accessible and will maintain 8-byte alignment.
1202 if (ehdr
->e_phentsize
< MINPHENTSZ
|| (ehdr
->e_phentsize
& 3))
1205 *phsizep
= nphdrs
* ehdr
->e_phentsize
;
1207 if (*phsizep
> sizeof (Phdr
) * elf_nphdr_max
) {
1208 if ((*phbasep
= kmem_alloc(*phsizep
, KM_NOSLEEP
)) == NULL
)
1211 *phbasep
= kmem_alloc(*phsizep
, KM_SLEEP
);
1214 if ((err
= vn_rdwr(UIO_READ
, vp
, *phbasep
, (ssize_t
)*phsizep
,
1215 (offset_t
)ehdr
->e_phoff
, UIO_SYSSPACE
, 0, (rlim64_t
)0,
1216 credp
, NULL
)) != 0) {
1217 kmem_free(*phbasep
, *phsizep
);
1225 #define MINSHDRSZ (offsetof(Shdr, sh_entsize) + \
1226 sizeof (((Shdr *)NULL)->sh_entsize))
1229 getelfshdr(vnode_t
*vp
, cred_t
*credp
, const Ehdr
*ehdr
, uint_t nshdrs
,
1230 uint_t shstrndx
, caddr_t
*shbasep
, size_t *shsizep
, char **shstrbasep
,
1237 * Since we're going to be using e_shentsize to iterate down the
1238 * array of section headers, it must be 8-byte aligned or else
1239 * a we might cause a misaligned access. We use all members through
1240 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1241 * must be at least large enough to include that member. The index
1242 * of the string table section must also be valid.
1244 if (ehdr
->e_shentsize
< MINSHDRSZ
|| (ehdr
->e_shentsize
& 3) ||
1245 nshdrs
== 0 || shstrndx
>= nshdrs
) {
1249 *shsizep
= nshdrs
* ehdr
->e_shentsize
;
1251 if (*shsizep
> sizeof (Shdr
) * elf_nshdr_max
) {
1252 if ((*shbasep
= kmem_alloc(*shsizep
, KM_NOSLEEP
)) == NULL
)
1255 *shbasep
= kmem_alloc(*shsizep
, KM_SLEEP
);
1258 if ((err
= vn_rdwr(UIO_READ
, vp
, *shbasep
, (ssize_t
)*shsizep
,
1259 (offset_t
)ehdr
->e_shoff
, UIO_SYSSPACE
, 0, (rlim64_t
)0,
1260 credp
, NULL
)) != 0) {
1261 kmem_free(*shbasep
, *shsizep
);
1266 * Grab the section string table. Walking through the shdrs is
1267 * pointless if their names cannot be interrogated.
1269 shdr
= (Shdr
*)(*shbasep
+ shstrndx
* ehdr
->e_shentsize
);
1270 if ((*shstrsizep
= shdr
->sh_size
) == 0) {
1271 kmem_free(*shbasep
, *shsizep
);
1275 if (*shstrsizep
> elf_shstrtab_max
) {
1276 if ((*shstrbasep
= kmem_alloc(*shstrsizep
,
1277 KM_NOSLEEP
)) == NULL
) {
1278 kmem_free(*shbasep
, *shsizep
);
1282 *shstrbasep
= kmem_alloc(*shstrsizep
, KM_SLEEP
);
1285 if ((err
= vn_rdwr(UIO_READ
, vp
, *shstrbasep
, (ssize_t
)*shstrsizep
,
1286 (offset_t
)shdr
->sh_offset
, UIO_SYSSPACE
, 0, (rlim64_t
)0,
1287 credp
, NULL
)) != 0) {
1288 kmem_free(*shbasep
, *shsizep
);
1289 kmem_free(*shstrbasep
, *shstrsizep
);
1294 * Make sure the strtab is null-terminated to make sure we
1295 * don't run off the end of the table.
1297 (*shstrbasep
)[*shstrsizep
- 1] = '\0';
1303 elfreadhdr(vnode_t
*vp
, cred_t
*credp
, Ehdr
*ehdrp
, uint_t
*nphdrs
,
1304 caddr_t
*phbasep
, size_t *phsizep
)
1307 uint_t nshdrs
, shstrndx
;
1309 if ((error
= getelfhead(vp
, credp
, ehdrp
, &nshdrs
, &shstrndx
,
1311 (error
= getelfphdr(vp
, credp
, ehdrp
, *nphdrs
, phbasep
,
1332 uintptr_t *minaddrp
,
1338 int error
, page
, prot
;
1339 caddr_t addr
= NULL
;
1340 caddr_t minaddr
= (caddr_t
)UINTPTR_MAX
;
1342 size_t zfodsz
, memsz
;
1343 boolean_t ptload
= B_FALSE
;
1345 const uint_t hsize
= ehdr
->e_phentsize
;
1346 extern int use_brk_lpg
;
1348 if (ehdr
->e_type
== ET_DYN
) {
1349 secflagset_t flags
= 0;
1351 * Obtain the virtual address of a hole in the
1352 * address space to map the "interpreter".
1354 if (secflag_enabled(curproc
, PROC_SEC_ASLR
))
1355 flags
|= _MAP_RANDOMIZE
;
1357 map_addr(&addr
, len
, (offset_t
)0, 1, flags
);
1360 *voffset
= (intptr_t)addr
;
1363 * Calculate the minimum vaddr so it can be subtracted out.
1364 * According to the ELF specification, since PT_LOAD sections
1365 * must be sorted by increasing p_vaddr values, this is
1366 * guaranteed to be the first PT_LOAD section.
1368 phdr
= (Phdr
*)phdrbase
;
1369 for (i
= nphdrs
; i
> 0; i
--) {
1370 if (phdr
->p_type
== PT_LOAD
) {
1371 *voffset
-= (uintptr_t)phdr
->p_vaddr
;
1374 phdr
= (Phdr
*)((caddr_t
)phdr
+ hsize
);
1381 phdr
= (Phdr
*)phdrbase
;
1382 for (i
= nphdrs
; i
> 0; i
--) {
1383 switch (phdr
->p_type
) {
1387 if (phdr
->p_flags
& PF_R
)
1389 if (phdr
->p_flags
& PF_W
)
1391 if (phdr
->p_flags
& PF_X
)
1394 addr
= (caddr_t
)((uintptr_t)phdr
->p_vaddr
+ *voffset
);
1396 if (*intphdr
!= NULL
&& uphdr
!= NULL
&&
1399 * The PT_PHDR program header is, strictly
1400 * speaking, optional. If we find that this
1401 * is missing, we will determine the location
1402 * of the program headers based on the address
1403 * of the lowest PT_LOAD segment (namely, this
1404 * one): we subtract the p_offset to get to
1405 * the ELF header and then add back the program
1406 * header offset to get to the program headers.
1407 * We then cons up a Phdr that corresponds to
1408 * the (missing) PT_PHDR, setting the flags
1409 * to 0 to denote that this is artificial and
1410 * should (must) be freed by the caller.
1414 cons
= kmem_zalloc(sizeof (Phdr
), KM_SLEEP
);
1417 cons
->p_type
= PT_PHDR
;
1418 cons
->p_vaddr
= ((uintptr_t)addr
-
1419 phdr
->p_offset
) + ehdr
->e_phoff
;
1425 * The ELF spec dictates that p_filesz may not be
1426 * larger than p_memsz in PT_LOAD segments.
1428 if (phdr
->p_filesz
> phdr
->p_memsz
) {
1434 * Keep track of the segment with the lowest starting
1440 zfodsz
= (size_t)phdr
->p_memsz
- phdr
->p_filesz
;
1442 offset
= phdr
->p_offset
;
1443 if (((uintptr_t)offset
& PAGEOFFSET
) ==
1444 ((uintptr_t)addr
& PAGEOFFSET
) &&
1445 (!(vp
->v_flag
& VNOMAP
))) {
1452 * Set the heap pagesize for OOB when the bss size
1453 * is known and use_brk_lpg is not 0.
1455 if (brksize
!= NULL
&& use_brk_lpg
&&
1456 zfodsz
!= 0 && phdr
== dataphdrp
&&
1457 (prot
& PROT_WRITE
)) {
1458 const size_t tlen
= P2NPHASE((uintptr_t)addr
+
1459 phdr
->p_filesz
, PAGESIZE
);
1461 if (zfodsz
> tlen
) {
1462 const caddr_t taddr
= addr
+
1463 phdr
->p_filesz
+ tlen
;
1466 * Since a hole in the AS large enough
1467 * for this object as calculated by
1468 * elfsize() is available, we do not
1469 * need to fear overflow for 'taddr'.
1471 curproc
->p_brkpageszc
=
1472 page_szc(map_pgsz(MAPPGSZ_HEAP
,
1473 curproc
, taddr
, zfodsz
- tlen
, 0));
1477 if (curproc
->p_brkpageszc
!= 0 && phdr
== dataphdrp
&&
1478 (prot
& PROT_WRITE
)) {
1479 uint_t szc
= curproc
->p_brkpageszc
;
1480 size_t pgsz
= page_get_pagesize(szc
);
1481 caddr_t ebss
= addr
+ phdr
->p_memsz
;
1483 * If we need extra space to keep the BSS an
1484 * integral number of pages in size, some of
1485 * that space may fall beyond p_brkbase, so we
1486 * need to set p_brksize to account for it
1487 * being (logically) part of the brk.
1489 size_t extra_zfodsz
;
1491 ASSERT(pgsz
> PAGESIZE
);
1493 extra_zfodsz
= P2NPHASE((uintptr_t)ebss
, pgsz
);
1495 if (error
= execmap(vp
, addr
, phdr
->p_filesz
,
1496 zfodsz
+ extra_zfodsz
, phdr
->p_offset
,
1499 if (brksize
!= NULL
)
1500 *brksize
= extra_zfodsz
;
1502 if (error
= execmap(vp
, addr
, phdr
->p_filesz
,
1503 zfodsz
, phdr
->p_offset
, prot
, page
, 0))
1507 if (bssbase
!= NULL
&& addr
>= *bssbase
&&
1508 phdr
== dataphdrp
) {
1509 *bssbase
= addr
+ phdr
->p_filesz
;
1511 if (brkbase
!= NULL
&& addr
>= *brkbase
) {
1512 *brkbase
= addr
+ phdr
->p_memsz
;
1515 memsz
= btopr(phdr
->p_memsz
);
1516 if ((*execsz
+ memsz
) < *execsz
) {
1534 if (ptload
|| phdr
->p_flags
== 0)
1555 phdr
= (Phdr
*)((caddr_t
)phdr
+ hsize
);
1558 if (minaddrp
!= NULL
) {
1559 ASSERT(minaddr
!= (caddr_t
)UINTPTR_MAX
);
1560 *minaddrp
= (uintptr_t)minaddr
;
1563 if (brkbase
!= NULL
&& secflag_enabled(curproc
, PROC_SEC_ASLR
)) {
1565 uintptr_t base
= (uintptr_t)*brkbase
;
1566 uintptr_t oend
= base
+ *brksize
;
1568 ASSERT(ISP2(aslr_max_brk_skew
));
1570 (void) random_get_pseudo_bytes((uint8_t *)&off
, sizeof (off
));
1571 base
+= P2PHASE(off
, aslr_max_brk_skew
);
1572 base
= P2ROUNDUP(base
, PAGESIZE
);
1573 *brkbase
= (caddr_t
)base
;
1575 * Above, we set *brksize to account for the possibility we
1576 * had to grow the 'brk' in padding out the BSS to a page
1579 * We now need to adjust that based on where we now are
1580 * actually putting the brk.
1583 *brksize
= oend
- base
;
1596 elfnote(vnode_t
*vp
, offset_t
*offsetp
, int type
, int descsz
, void *desc
,
1597 rlim64_t rlimit
, cred_t
*credp
)
1602 bzero(¬e
, sizeof (note
));
1603 bcopy("CORE", note
.name
, 4);
1604 note
.nhdr
.n_type
= type
;
1606 * The System V ABI states that n_namesz must be the length of the
1607 * string that follows the Nhdr structure including the terminating
1608 * null. The ABI also specifies that sufficient padding should be
1609 * included so that the description that follows the name string
1610 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1611 * respectively. However, since this change was not made correctly
1612 * at the time of the 64-bit port, both 32- and 64-bit binaries
1613 * descriptions are only guaranteed to begin on a 4-byte boundary.
1615 note
.nhdr
.n_namesz
= 5;
1616 note
.nhdr
.n_descsz
= roundup(descsz
, sizeof (Word
));
1618 if (error
= core_write(vp
, UIO_SYSSPACE
, *offsetp
, ¬e
,
1619 sizeof (note
), rlimit
, credp
))
1622 *offsetp
+= sizeof (note
);
1624 if (error
= core_write(vp
, UIO_SYSSPACE
, *offsetp
, desc
,
1625 note
.nhdr
.n_descsz
, rlimit
, credp
))
1628 *offsetp
+= note
.nhdr
.n_descsz
;
1633 * Copy the section data from one vnode to the section of another vnode.
1636 elf_copy_scn(elf_core_ctx_t
*ctx
, const Shdr
*src
, vnode_t
*src_vp
, Shdr
*dst
)
1638 size_t n
= src
->sh_size
;
1640 const u_offset_t soff
= src
->sh_offset
;
1641 const u_offset_t doff
= ctx
->ecc_doffset
;
1642 void *buf
= ctx
->ecc_buf
;
1643 vnode_t
*dst_vp
= ctx
->ecc_vp
;
1644 cred_t
*credp
= ctx
->ecc_credp
;
1646 /* Protect the copy loop below from overflow on the offsets */
1647 if (n
> OFF_MAX
|| (n
+ soff
) > OFF_MAX
|| (n
+ doff
) > OFF_MAX
||
1648 (n
+ soff
) < n
|| (n
+ doff
) < n
) {
1655 const size_t len
= MIN(ctx
->ecc_bufsz
, n
);
1658 if (vn_rdwr(UIO_READ
, src_vp
, buf
, (ssize_t
)len
,
1659 (offset_t
)(soff
+ off
),
1660 UIO_SYSSPACE
, 0, (rlim64_t
)0, credp
, &resid
) != 0 ||
1661 resid
>= len
|| resid
< 0 ||
1662 core_write(dst_vp
, UIO_SYSSPACE
, (offset_t
)(doff
+ off
),
1663 buf
, len
- resid
, ctx
->ecc_rlimit
, credp
) != 0) {
1669 ASSERT(n
>= len
- resid
);
1675 ctx
->ecc_doffset
+= src
->sh_size
;
1679 * Walk sections for a given ELF object, counting (or copying) those of
1680 * interest (CTF, symtab, strtab, .debug_*).
1683 elf_process_obj_scns(elf_core_ctx_t
*ctx
, vnode_t
*mvp
, caddr_t saddr
,
1684 Shdr
*v
, uint_t idx
, uint_t remain
, shstrtab_t
*shstrtab
, uint_t
*countp
)
1687 const core_content_t content
= ctx
->ecc_content
;
1688 cred_t
*credp
= ctx
->ecc_credp
;
1689 Shdr
*ctf
= NULL
, *symtab
= NULL
, *strtab
= NULL
;
1691 uint_t nshdrs
, shstrndx
, nphdrs
, count
= 0;
1692 u_offset_t
*doffp
= &ctx
->ecc_doffset
;
1693 boolean_t ctf_link
= B_FALSE
;
1695 size_t shsize
, shstrsize
;
1698 const boolean_t justcounting
= v
== NULL
;
1703 (CC_CONTENT_CTF
| CC_CONTENT_SYMTAB
| CC_CONTENT_DEBUG
)) == 0) {
1707 if (getelfhead(mvp
, credp
, &ehdr
, &nshdrs
, &shstrndx
, &nphdrs
) != 0 ||
1708 getelfshdr(mvp
, credp
, &ehdr
, nshdrs
, shstrndx
, &shbase
, &shsize
,
1709 &shstrbase
, &shstrsize
) != 0) {
1713 /* Starting at index 1 skips SHT_NULL which is expected at index 0 */
1714 off
= ehdr
.e_shentsize
;
1715 for (uint_t i
= 1; i
< nshdrs
; i
++, off
+= ehdr
.e_shentsize
) {
1716 Shdr
*shdr
, *symchk
= NULL
, *strchk
;
1719 shdr
= (Shdr
*)(shbase
+ off
);
1720 if (shdr
->sh_name
>= shstrsize
|| shdr
->sh_type
== SHT_NULL
)
1723 name
= shstrbase
+ shdr
->sh_name
;
1726 (content
& CC_CONTENT_CTF
) != 0 &&
1727 strcmp(name
, shstrtab_data
[STR_CTF
]) == 0) {
1729 if (ctf
->sh_link
!= 0 && ctf
->sh_link
< nshdrs
) {
1730 /* check linked symtab below */
1731 symchk
= (Shdr
*)(shbase
+
1732 shdr
->sh_link
* ehdr
.e_shentsize
);
1737 } else if (symtab
== NULL
&&
1738 (content
& CC_CONTENT_SYMTAB
) != 0 &&
1739 strcmp(name
, shstrtab_data
[STR_SYMTAB
]) == 0) {
1741 } else if ((content
& CC_CONTENT_DEBUG
) != 0 &&
1742 strncmp(name
, ".debug_", strlen(".debug_")) == 0) {
1744 * The design of the above check is intentional. In
1745 * particular, we want to capture any sections that
1746 * begin with '.debug_' for a few reasons:
1748 * 1) Various revisions to the DWARF spec end up
1749 * changing the set of section headers that exist. This
1750 * ensures that we don't need to change the kernel to
1751 * get a new version.
1753 * 2) Other software uses .debug_ sections for things
1754 * which aren't DWARF. This allows them to be captured
1759 if (!justcounting
) {
1760 if (count
> remain
) {
1765 elf_ctx_resize_scratch(ctx
, shdr
->sh_size
);
1767 if (!shstrtab_ndx(shstrtab
,
1768 name
, &v
[idx
].sh_name
)) {
1773 v
[idx
].sh_addr
= (Addr
)(uintptr_t)saddr
;
1774 v
[idx
].sh_type
= shdr
->sh_type
;
1775 v
[idx
].sh_addralign
= shdr
->sh_addralign
;
1776 *doffp
= roundup(*doffp
, v
[idx
].sh_addralign
);
1777 v
[idx
].sh_offset
= *doffp
;
1778 v
[idx
].sh_size
= shdr
->sh_size
;
1780 v
[idx
].sh_entsize
= shdr
->sh_entsize
;
1781 v
[idx
].sh_info
= shdr
->sh_info
;
1783 elf_copy_scn(ctx
, shdr
, mvp
, &v
[idx
]);
1792 ASSERT(symchk
!= NULL
);
1793 if ((symchk
->sh_type
!= SHT_DYNSYM
&&
1794 symchk
->sh_type
!= SHT_SYMTAB
) ||
1795 symchk
->sh_link
== 0 || symchk
->sh_link
>= nshdrs
) {
1799 strchk
= (Shdr
*)(shbase
+ symchk
->sh_link
* ehdr
.e_shentsize
);
1800 if (strchk
->sh_type
!= SHT_STRTAB
) {
1807 if (symtab
!= NULL
&& ctf
!= NULL
&&
1808 (content
& CC_CONTENT_DEBUG
) == 0) {
1809 /* No other shdrs are of interest at this point */
1819 if (count
> remain
) {
1829 /* output CTF section */
1831 elf_ctx_resize_scratch(ctx
, ctf
->sh_size
);
1833 if (!shstrtab_ndx(shstrtab
,
1834 shstrtab_data
[STR_CTF
], &v
[idx
].sh_name
)) {
1838 v
[idx
].sh_addr
= (Addr
)(uintptr_t)saddr
;
1839 v
[idx
].sh_type
= SHT_PROGBITS
;
1840 v
[idx
].sh_addralign
= 4;
1841 *doffp
= roundup(*doffp
, v
[idx
].sh_addralign
);
1842 v
[idx
].sh_offset
= *doffp
;
1843 v
[idx
].sh_size
= ctf
->sh_size
;
1847 * The linked symtab (and strtab) will be output
1848 * immediately after this CTF section. Its shdr index
1849 * directly follows this one.
1851 v
[idx
].sh_link
= idx
+ 1;
1852 ASSERT(symtab
!= NULL
);
1856 elf_copy_scn(ctx
, ctf
, mvp
, &v
[idx
]);
1860 /* output SYMTAB/STRTAB sections */
1861 if (symtab
!= NULL
) {
1862 shstrtype_t symtab_type
, strtab_type
;
1863 uint_t symtab_name
, strtab_name
;
1865 elf_ctx_resize_scratch(ctx
,
1866 MAX(symtab
->sh_size
, strtab
->sh_size
));
1868 if (symtab
->sh_type
== SHT_DYNSYM
) {
1869 symtab_type
= STR_DYNSYM
;
1870 strtab_type
= STR_DYNSTR
;
1872 symtab_type
= STR_SYMTAB
;
1873 strtab_type
= STR_STRTAB
;
1876 if (!shstrtab_ndx(shstrtab
,
1877 shstrtab_data
[symtab_type
], &symtab_name
)) {
1881 if (!shstrtab_ndx(shstrtab
,
1882 shstrtab_data
[strtab_type
], &strtab_name
)) {
1887 v
[idx
].sh_name
= symtab_name
;
1888 v
[idx
].sh_type
= symtab
->sh_type
;
1889 v
[idx
].sh_addr
= symtab
->sh_addr
;
1890 if (ehdr
.e_type
== ET_DYN
|| v
[idx
].sh_addr
== 0)
1891 v
[idx
].sh_addr
+= (Addr
)(uintptr_t)saddr
;
1892 v
[idx
].sh_addralign
= symtab
->sh_addralign
;
1893 *doffp
= roundup(*doffp
, v
[idx
].sh_addralign
);
1894 v
[idx
].sh_offset
= *doffp
;
1895 v
[idx
].sh_size
= symtab
->sh_size
;
1896 v
[idx
].sh_link
= idx
+ 1;
1897 v
[idx
].sh_entsize
= symtab
->sh_entsize
;
1898 v
[idx
].sh_info
= symtab
->sh_info
;
1900 elf_copy_scn(ctx
, symtab
, mvp
, &v
[idx
]);
1903 v
[idx
].sh_name
= strtab_name
;
1904 v
[idx
].sh_type
= SHT_STRTAB
;
1905 v
[idx
].sh_flags
= SHF_STRINGS
;
1906 v
[idx
].sh_addr
= strtab
->sh_addr
;
1907 if (ehdr
.e_type
== ET_DYN
|| v
[idx
].sh_addr
== 0)
1908 v
[idx
].sh_addr
+= (Addr
)(uintptr_t)saddr
;
1909 v
[idx
].sh_addralign
= strtab
->sh_addralign
;
1910 *doffp
= roundup(*doffp
, v
[idx
].sh_addralign
);
1911 v
[idx
].sh_offset
= *doffp
;
1912 v
[idx
].sh_size
= strtab
->sh_size
;
1914 elf_copy_scn(ctx
, strtab
, mvp
, &v
[idx
]);
1919 kmem_free(shstrbase
, shstrsize
);
1920 kmem_free(shbase
, shsize
);
1929 * Walk mappings in process address space, examining those which correspond to
1930 * loaded objects. It is called twice from elfcore: Once to simply count
1931 * relevant sections, and again later to copy those sections once an adequate
1932 * buffer has been allocated for the shdr details.
1935 elf_process_scns(elf_core_ctx_t
*ctx
, Shdr
*v
, uint_t nv
, uint_t
*nshdrsp
)
1937 vnode_t
*lastvp
= NULL
;
1939 uint_t idx
= 0, remain
;
1940 shstrtab_t shstrtab
;
1941 struct as
*as
= ctx
->ecc_p
->p_as
;
1944 ASSERT(AS_WRITE_HELD(as
));
1949 if (!shstrtab_init(&shstrtab
))
1956 * The shdrs are being counted, rather than outputting them
1957 * into a buffer. Leave room for two entries: the SHT_NULL at
1958 * index 0 and the shstrtab at the end.
1960 remain
= UINT_MAX
- 2;
1963 /* Per the ELF spec, shdr index 0 is reserved. */
1965 for (seg
= AS_SEGFIRST(as
); seg
!= NULL
; seg
= AS_SEGNEXT(as
, seg
)) {
1968 caddr_t saddr
= seg
->s_base
, naddr
, eaddr
;
1973 * Since we're just looking for text segments of load
1974 * objects, we only care about the protection bits; we don't
1975 * care about the actual size of the segment so we use the
1976 * reserved size. If the segment's size is zero, there's
1977 * something fishy going on so we ignore this segment.
1979 if (seg
->s_ops
!= &segvn_ops
||
1980 SEGOP_GETVP(seg
, seg
->s_base
, &mvp
) != 0 ||
1981 mvp
== lastvp
|| mvp
== NULL
|| mvp
->v_type
!= VREG
||
1982 (segsize
= pr_getsegsize(seg
, 1)) == 0)
1985 eaddr
= saddr
+ segsize
;
1986 prot
= pr_getprot(seg
, 1, &tmp
, &saddr
, &naddr
, eaddr
);
1987 pr_getprot_done(&tmp
);
1990 * Skip this segment unless the protection bits look like
1991 * what we'd expect for a text segment.
1993 if ((prot
& (PROT_WRITE
| PROT_EXEC
)) != PROT_EXEC
)
1996 error
= elf_process_obj_scns(ctx
, mvp
, saddr
, v
, idx
, remain
,
2001 ASSERT(count
<= remain
);
2002 ASSERT(v
== NULL
|| (idx
+ count
) < nv
);
2013 /* Include room for the shrstrtab at the end */
2019 if (idx
!= nv
- 1) {
2020 cmn_err(CE_WARN
, "elfcore: core dump failed for "
2021 "process %d; address space is changing",
2027 if (!shstrtab_ndx(&shstrtab
, shstrtab_data
[STR_SHSTRTAB
],
2032 v
[idx
].sh_size
= shstrtab_size(&shstrtab
);
2033 v
[idx
].sh_addralign
= 1;
2034 v
[idx
].sh_offset
= ctx
->ecc_doffset
;
2035 v
[idx
].sh_flags
= SHF_STRINGS
;
2036 v
[idx
].sh_type
= SHT_STRTAB
;
2038 elf_ctx_resize_scratch(ctx
, v
[idx
].sh_size
);
2039 VERIFY3U(ctx
->ecc_bufsz
, >=, v
[idx
].sh_size
);
2040 shstrtab_dump(&shstrtab
, ctx
->ecc_buf
);
2042 error
= core_write(ctx
->ecc_vp
, UIO_SYSSPACE
, ctx
->ecc_doffset
,
2043 ctx
->ecc_buf
, v
[idx
].sh_size
, ctx
->ecc_rlimit
, ctx
->ecc_credp
);
2045 ctx
->ecc_doffset
+= v
[idx
].sh_size
;
2050 shstrtab_fini(&shstrtab
);
2056 elfcore(vnode_t
*vp
, proc_t
*p
, cred_t
*credp
, rlim64_t rlimit
, int sig
,
2057 core_content_t content
)
2059 u_offset_t poffset
, soffset
, doffset
;
2061 uint_t i
, nphdrs
, nshdrs
;
2063 struct as
*as
= p
->p_as
;
2064 void *bigwad
, *zeropg
= NULL
;
2065 size_t bigsize
, phdrsz
, shdrsz
;
2069 caddr_t brkbase
, stkbase
;
2070 size_t brksize
, stksize
;
2071 boolean_t overflowed
= B_FALSE
, retried
= B_FALSE
;
2072 klwp_t
*lwp
= ttolwp(curthread
);
2073 elf_core_ctx_t ctx
= {
2077 .ecc_rlimit
= rlimit
,
2078 .ecc_content
= content
,
2086 * Make sure we have everything we need (registers, etc.).
2087 * All other lwps have already stopped and are in an orderly state.
2089 ASSERT(p
== ttoproc(curthread
));
2092 AS_LOCK_ENTER(as
, RW_WRITER
);
2093 nphdrs
= prnsegs(as
, 0) + 2; /* two CORE note sections */
2096 * Count the number of section headers we're going to need.
2099 if (content
& (CC_CONTENT_CTF
| CC_CONTENT_SYMTAB
| CC_CONTENT_DEBUG
)) {
2100 VERIFY0(elf_process_scns(&ctx
, NULL
, 0, &nshdrs
));
2105 * The core file contents may require zero section headers, but if
2106 * we overflow the 16 bits allotted to the program header count in
2107 * the ELF header, we'll need that program header at index zero.
2109 if (nshdrs
== 0 && nphdrs
>= PN_XNUM
)
2113 * Allocate a buffer which is sized adequately to hold the ehdr, phdrs
2114 * or shdrs needed to produce the core file. It is used for the three
2115 * tasks sequentially, not simultaneously, so it does not need space
2116 * for all three data at once, only the largest one.
2118 VERIFY(nphdrs
>= 2);
2119 phdrsz
= nphdrs
* sizeof (Phdr
);
2120 shdrsz
= nshdrs
* sizeof (Shdr
);
2121 bigsize
= MAX(sizeof (Ehdr
), MAX(phdrsz
, shdrsz
));
2122 bigwad
= kmem_alloc(bigsize
, KM_SLEEP
);
2124 ehdr
= (Ehdr
*)bigwad
;
2125 bzero(ehdr
, sizeof (*ehdr
));
2127 ehdr
->e_ident
[EI_MAG0
] = ELFMAG0
;
2128 ehdr
->e_ident
[EI_MAG1
] = ELFMAG1
;
2129 ehdr
->e_ident
[EI_MAG2
] = ELFMAG2
;
2130 ehdr
->e_ident
[EI_MAG3
] = ELFMAG3
;
2131 ehdr
->e_ident
[EI_CLASS
] = ELFCLASS
;
2132 ehdr
->e_type
= ET_CORE
;
2134 #if !defined(_LP64) || defined(_ELF32_COMPAT)
2136 #if defined(__sparc)
2137 ehdr
->e_ident
[EI_DATA
] = ELFDATA2MSB
;
2138 ehdr
->e_machine
= EM_SPARC
;
2139 #elif defined(__i386_COMPAT)
2140 ehdr
->e_ident
[EI_DATA
] = ELFDATA2LSB
;
2141 ehdr
->e_machine
= EM_386
;
2143 #error "no recognized machine type is defined"
2146 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2148 #if defined(__sparc)
2149 ehdr
->e_ident
[EI_DATA
] = ELFDATA2MSB
;
2150 ehdr
->e_machine
= EM_SPARCV9
;
2151 #elif defined(__amd64)
2152 ehdr
->e_ident
[EI_DATA
] = ELFDATA2LSB
;
2153 ehdr
->e_machine
= EM_AMD64
;
2155 #error "no recognized 64-bit machine type is defined"
2158 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2160 poffset
= sizeof (Ehdr
);
2161 soffset
= sizeof (Ehdr
) + phdrsz
;
2162 doffset
= sizeof (Ehdr
) + phdrsz
+ shdrsz
;
2163 bzero(&shdr0
, sizeof (shdr0
));
2166 * If the count of program headers or section headers or the index
2167 * of the section string table can't fit in the mere 16 bits
2168 * shortsightedly allotted to them in the ELF header, we use the
2169 * extended formats and put the real values in the section header
2172 if (nphdrs
>= PN_XNUM
) {
2173 ehdr
->e_phnum
= PN_XNUM
;
2174 shdr0
.sh_info
= nphdrs
;
2176 ehdr
->e_phnum
= (unsigned short)nphdrs
;
2180 if (nshdrs
>= SHN_LORESERVE
) {
2182 shdr0
.sh_size
= nshdrs
;
2184 ehdr
->e_shnum
= (unsigned short)nshdrs
;
2187 if (nshdrs
- 1 >= SHN_LORESERVE
) {
2188 ehdr
->e_shstrndx
= SHN_XINDEX
;
2189 shdr0
.sh_link
= nshdrs
- 1;
2191 ehdr
->e_shstrndx
= (unsigned short)(nshdrs
- 1);
2194 ehdr
->e_shoff
= soffset
;
2195 ehdr
->e_shentsize
= sizeof (Shdr
);
2198 ehdr
->e_ident
[EI_VERSION
] = EV_CURRENT
;
2199 ehdr
->e_version
= EV_CURRENT
;
2200 ehdr
->e_ehsize
= sizeof (Ehdr
);
2201 ehdr
->e_phoff
= poffset
;
2202 ehdr
->e_phentsize
= sizeof (Phdr
);
2204 if (error
= core_write(vp
, UIO_SYSSPACE
, (offset_t
)0, ehdr
,
2205 sizeof (Ehdr
), rlimit
, credp
)) {
2209 phdr
= (Phdr
*)bigwad
;
2210 bzero(phdr
, phdrsz
);
2212 setup_old_note_header(&phdr
[0], p
);
2213 phdr
[0].p_offset
= doffset
= roundup(doffset
, sizeof (Word
));
2214 doffset
+= phdr
[0].p_filesz
;
2216 setup_note_header(&phdr
[1], p
);
2217 phdr
[1].p_offset
= doffset
= roundup(doffset
, sizeof (Word
));
2218 doffset
+= phdr
[1].p_filesz
;
2220 mutex_enter(&p
->p_lock
);
2222 brkbase
= p
->p_brkbase
;
2223 brksize
= p
->p_brksize
;
2225 stkbase
= p
->p_usrstack
- p
->p_stksize
;
2226 stksize
= p
->p_stksize
;
2228 mutex_exit(&p
->p_lock
);
2230 AS_LOCK_ENTER(as
, RW_WRITER
);
2232 for (seg
= AS_SEGFIRST(as
); seg
!= NULL
; seg
= AS_SEGNEXT(as
, seg
)) {
2233 caddr_t eaddr
= seg
->s_base
+ pr_getsegsize(seg
, 0);
2234 caddr_t saddr
, naddr
;
2236 extern struct seg_ops segspt_shmops
;
2238 if ((seg
->s_flags
& S_HOLE
) != 0) {
2242 for (saddr
= seg
->s_base
; saddr
< eaddr
; saddr
= naddr
) {
2248 prot
= pr_getprot(seg
, 0, &tmp
, &saddr
, &naddr
, eaddr
);
2249 prot
&= PROT_READ
| PROT_WRITE
| PROT_EXEC
;
2250 if ((size
= (size_t)(naddr
- saddr
)) == 0) {
2251 ASSERT(tmp
== NULL
);
2253 } else if (i
== nphdrs
) {
2254 pr_getprot_done(&tmp
);
2255 overflowed
= B_TRUE
;
2258 phdr
[i
].p_type
= PT_LOAD
;
2259 phdr
[i
].p_vaddr
= (Addr
)(uintptr_t)saddr
;
2260 phdr
[i
].p_memsz
= size
;
2261 if (prot
& PROT_READ
)
2262 phdr
[i
].p_flags
|= PF_R
;
2263 if (prot
& PROT_WRITE
)
2264 phdr
[i
].p_flags
|= PF_W
;
2265 if (prot
& PROT_EXEC
)
2266 phdr
[i
].p_flags
|= PF_X
;
2269 * Figure out which mappings to include in the core.
2271 type
= SEGOP_GETTYPE(seg
, saddr
);
2273 if (saddr
== stkbase
&& size
== stksize
) {
2274 if (!(content
& CC_CONTENT_STACK
))
2277 } else if (saddr
== brkbase
&& size
== brksize
) {
2278 if (!(content
& CC_CONTENT_HEAP
))
2281 } else if (seg
->s_ops
== &segspt_shmops
) {
2282 if (type
& MAP_NORESERVE
) {
2283 if (!(content
& CC_CONTENT_DISM
))
2286 if (!(content
& CC_CONTENT_ISM
))
2290 } else if (seg
->s_ops
!= &segvn_ops
) {
2293 } else if (type
& MAP_SHARED
) {
2294 if (shmgetid(p
, saddr
) != SHMID_NONE
) {
2295 if (!(content
& CC_CONTENT_SHM
))
2298 } else if (SEGOP_GETVP(seg
, seg
->s_base
,
2299 &mvp
) != 0 || mvp
== NULL
||
2300 mvp
->v_type
!= VREG
) {
2301 if (!(content
& CC_CONTENT_SHANON
))
2305 if (!(content
& CC_CONTENT_SHFILE
))
2309 } else if (SEGOP_GETVP(seg
, seg
->s_base
, &mvp
) != 0 ||
2310 mvp
== NULL
|| mvp
->v_type
!= VREG
) {
2311 if (!(content
& CC_CONTENT_ANON
))
2314 } else if (prot
== (PROT_READ
| PROT_EXEC
)) {
2315 if (!(content
& CC_CONTENT_TEXT
))
2318 } else if (prot
== PROT_READ
) {
2319 if (!(content
& CC_CONTENT_RODATA
))
2323 if (!(content
& CC_CONTENT_DATA
))
2327 doffset
= roundup(doffset
, sizeof (Word
));
2328 phdr
[i
].p_offset
= doffset
;
2329 phdr
[i
].p_filesz
= size
;
2334 VERIFY(tmp
== NULL
);
2340 if (overflowed
|| i
!= nphdrs
) {
2343 overflowed
= B_FALSE
;
2344 kmem_free(bigwad
, bigsize
);
2347 cmn_err(CE_WARN
, "elfcore: core dump failed for "
2348 "process %d; address space is changing", p
->p_pid
);
2353 if ((error
= core_write(vp
, UIO_SYSSPACE
, poffset
,
2354 phdr
, phdrsz
, rlimit
, credp
)) != 0) {
2358 if ((error
= write_old_elfnotes(p
, sig
, vp
, phdr
[0].p_offset
, rlimit
,
2362 if ((error
= write_elfnotes(p
, sig
, vp
, phdr
[1].p_offset
, rlimit
,
2363 credp
, content
)) != 0) {
2367 for (i
= 2; i
< nphdrs
; i
++) {
2368 prkillinfo_t killinfo
;
2372 if (phdr
[i
].p_filesz
== 0)
2376 * If we hit a region that was mapped PROT_NONE then we cannot
2377 * continue dumping this normally as the kernel would be unable
2378 * to read from the page and that would result in us failing to
2379 * dump the page. As such, any region mapped PROT_NONE, we dump
2380 * as a zero-filled page such that this is still represented in
2383 * If dumping out this segment fails, rather than failing
2384 * the core dump entirely, we reset the size of the mapping
2385 * to zero to indicate that the data is absent from the core
2386 * file and or in the PF_SUNW_FAILURE flag to differentiate
2387 * this from mappings that were excluded due to the core file
2390 if ((phdr
[i
].p_flags
& (PF_R
| PF_W
| PF_X
)) == 0) {
2391 size_t towrite
= phdr
[i
].p_filesz
;
2394 if (zeropg
== NULL
) {
2395 zeropg
= kmem_zalloc(elf_zeropg_sz
, KM_SLEEP
);
2399 while (towrite
!= 0) {
2400 size_t len
= MIN(towrite
, elf_zeropg_sz
);
2402 error
= core_write(vp
, UIO_SYSSPACE
,
2403 phdr
[i
].p_offset
+ curoff
, zeropg
, len
,
2412 error
= core_seg(p
, vp
, phdr
[i
].p_offset
,
2413 (caddr_t
)(uintptr_t)phdr
[i
].p_vaddr
,
2414 phdr
[i
].p_filesz
, rlimit
, credp
);
2419 if ((sig
= lwp
->lwp_cursig
) == 0) {
2421 * We failed due to something other than a signal.
2422 * Since the space reserved for the segment is now
2423 * unused, we stash the errno in the first four
2424 * bytes. This undocumented interface will let us
2425 * understand the nature of the failure.
2427 (void) core_write(vp
, UIO_SYSSPACE
, phdr
[i
].p_offset
,
2428 &error
, sizeof (error
), rlimit
, credp
);
2430 phdr
[i
].p_filesz
= 0;
2431 phdr
[i
].p_flags
|= PF_SUNW_FAILURE
;
2432 if ((error
= core_write(vp
, UIO_SYSSPACE
,
2433 poffset
+ sizeof (Phdr
) * i
, &phdr
[i
],
2434 sizeof (Phdr
), rlimit
, credp
)) != 0)
2441 * We took a signal. We want to abort the dump entirely, but
2442 * we also want to indicate what failed and why. We therefore
2443 * use the space reserved for the first failing segment to
2444 * write our error (which, for purposes of compatability with
2445 * older core dump readers, we set to EINTR) followed by any
2446 * siginfo associated with the signal.
2448 bzero(&killinfo
, sizeof (killinfo
));
2449 killinfo
.prk_error
= EINTR
;
2451 sq
= sig
== SIGKILL
? curproc
->p_killsqp
: lwp
->lwp_curinfo
;
2454 bcopy(&sq
->sq_info
, &killinfo
.prk_info
,
2455 sizeof (sq
->sq_info
));
2457 killinfo
.prk_info
.si_signo
= lwp
->lwp_cursig
;
2458 killinfo
.prk_info
.si_code
= SI_NOINFO
;
2461 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2463 * If this is a 32-bit process, we need to translate from the
2464 * native siginfo to the 32-bit variant. (Core readers must
2465 * always have the same data model as their target or must
2466 * be aware of -- and compensate for -- data model differences.)
2468 if (curproc
->p_model
== DATAMODEL_ILP32
) {
2471 siginfo_kto32((k_siginfo_t
*)&killinfo
.prk_info
, &si32
);
2472 bcopy(&si32
, &killinfo
.prk_info
, sizeof (si32
));
2476 (void) core_write(vp
, UIO_SYSSPACE
, phdr
[i
].p_offset
,
2477 &killinfo
, sizeof (killinfo
), rlimit
, credp
);
2480 * For the segment on which we took the signal, indicate that
2481 * its data now refers to a siginfo.
2483 phdr
[i
].p_filesz
= 0;
2484 phdr
[i
].p_flags
|= PF_SUNW_FAILURE
| PF_SUNW_KILLED
|
2488 * And for every other segment, indicate that its absence
2489 * is due to a signal.
2491 for (j
= i
+ 1; j
< nphdrs
; j
++) {
2492 phdr
[j
].p_filesz
= 0;
2493 phdr
[j
].p_flags
|= PF_SUNW_FAILURE
| PF_SUNW_KILLED
;
2497 * Finally, write out our modified program headers.
2499 if ((error
= core_write(vp
, UIO_SYSSPACE
,
2500 poffset
+ sizeof (Phdr
) * i
, &phdr
[i
],
2501 sizeof (Phdr
) * (nphdrs
- i
), rlimit
, credp
)) != 0) {
2509 Shdr
*shdr
= (Shdr
*)bigwad
;
2511 bzero(shdr
, shdrsz
);
2513 ctx
.ecc_doffset
= doffset
;
2514 AS_LOCK_ENTER(as
, RW_WRITER
);
2515 error
= elf_process_scns(&ctx
, shdr
, nshdrs
, NULL
);
2521 /* Copy any extended format data destined for the first shdr */
2522 bcopy(&shdr0
, shdr
, sizeof (shdr0
));
2524 error
= core_write(vp
, UIO_SYSSPACE
, soffset
, shdr
, shdrsz
,
2530 kmem_free(zeropg
, elf_zeropg_sz
);
2531 if (ctx
.ecc_bufsz
!= 0)
2532 kmem_free(ctx
.ecc_buf
, ctx
.ecc_bufsz
);
2533 kmem_free(bigwad
, bigsize
);
2537 #ifndef _ELF32_COMPAT
2539 static struct execsw esw
= {
2551 static struct modlexec modlexec
= {
2552 &mod_execops
, "exec module for elf", &esw
2556 extern int elf32exec(vnode_t
*vp
, execa_t
*uap
, uarg_t
*args
,
2557 intpdata_t
*idatap
, int level
, size_t *execsz
,
2558 int setid
, caddr_t exec_file
, cred_t
*cred
,
2560 extern int elf32core(vnode_t
*vp
, proc_t
*p
, cred_t
*credp
,
2561 rlim64_t rlimit
, int sig
, core_content_t content
);
2563 static struct execsw esw32
= {
2571 static struct modlexec modlexec32
= {
2572 &mod_execops
, "32-bit exec module for elf", &esw32
2576 static struct modlinkage modlinkage
= {
2580 (void *)&modlexec32
,
2588 return (mod_install(&modlinkage
));
2594 return (mod_remove(&modlinkage
));
2598 _info(struct modinfo
*modinfop
)
2600 return (mod_info(&modlinkage
, modinfop
));
2603 #endif /* !_ELF32_COMPAT */