4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 * Copyright (c) 2018, Joyent, Inc.
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
39 #include <sys/errno.h>
40 #include <sys/vnode.h>
44 #include <sys/pathname.h>
45 #include <sys/policy.h>
46 #include <sys/cmn_err.h>
47 #include <sys/systm.h>
49 #include <sys/vmsystm.h>
50 #include <sys/debug.h>
53 #include <sys/prsystm.h>
57 #include <vm/seg_vn.h>
58 #include <sys/modctl.h>
59 #include <sys/systeminfo.h>
60 #include <sys/vmparam.h>
61 #include <sys/machelf.h>
62 #include <sys/shm_impl.h>
63 #include <sys/archsystm.h>
64 #include <sys/fasttrap.h>
65 #include <sys/brand.h>
68 #include <sys/siginfo.h>
69 #include <sys/random.h>
72 #include <sys/comm_page_util.h>
74 #endif /* defined(__x86) */
78 extern volatile size_t aslr_max_brk_skew
;
80 #define ORIGIN_STR "ORIGIN"
81 #define ORIGIN_STR_SIZE 6
83 static int getelfhead(vnode_t
*, cred_t
*, Ehdr
*, int *, int *, int *);
84 static int getelfphdr(vnode_t
*, cred_t
*, const Ehdr
*, int, caddr_t
*,
86 static int getelfshdr(vnode_t
*, cred_t
*, const Ehdr
*, int, int, caddr_t
*,
87 ssize_t
*, caddr_t
*, ssize_t
*);
88 static size_t elfsize(Ehdr
*, int, caddr_t
, uintptr_t *);
89 static int mapelfexec(vnode_t
*, Ehdr
*, int, caddr_t
,
90 Phdr
**, Phdr
**, Phdr
**, Phdr
**, Phdr
*,
91 caddr_t
*, caddr_t
*, intptr_t *, intptr_t *, size_t, long *, size_t *);
103 static const char *shstrtab_data
[] = {
112 typedef struct shstrtab
{
113 int sst_ndx
[STR_NUM
];
118 shstrtab_init(shstrtab_t
*s
)
120 bzero(&s
->sst_ndx
, sizeof (s
->sst_ndx
));
125 shstrtab_ndx(shstrtab_t
*s
, shstrtype_t type
)
129 if ((ret
= s
->sst_ndx
[type
]) != 0)
132 ret
= s
->sst_ndx
[type
] = s
->sst_cur
;
133 s
->sst_cur
+= strlen(shstrtab_data
[type
]) + 1;
139 shstrtab_size(const shstrtab_t
*s
)
145 shstrtab_dump(const shstrtab_t
*s
, char *buf
)
150 for (i
= 0; i
< STR_NUM
; i
++) {
151 if ((ndx
= s
->sst_ndx
[i
]) != 0)
152 (void) strcpy(buf
+ ndx
, shstrtab_data
[i
]);
157 dtrace_safe_phdr(Phdr
*phdrp
, struct uarg
*args
, uintptr_t base
)
159 ASSERT(phdrp
->p_type
== PT_SUNWDTRACE
);
162 * See the comment in fasttrap.h for information on how to safely
163 * update this program header.
165 if (phdrp
->p_memsz
< PT_SUNWDTRACE_SIZE
||
166 (phdrp
->p_flags
& (PF_R
| PF_W
| PF_X
)) != (PF_R
| PF_W
| PF_X
))
169 args
->thrptr
= phdrp
->p_vaddr
+ base
;
175 handle_secflag_dt(proc_t
*p
, uint_t dt
, uint_t val
)
181 flag
= PROC_SEC_ASLR
;
188 if (secflag_isset(p
->p_secflags
.psf_lower
, flag
))
190 if ((secpolicy_psecflags(CRED(), p
, p
) != 0) &&
191 secflag_isset(p
->p_secflags
.psf_inherit
, flag
))
194 secflag_clear(&p
->p_secflags
.psf_effective
, flag
);
196 if (!secflag_isset(p
->p_secflags
.psf_upper
, flag
))
199 if ((secpolicy_psecflags(CRED(), p
, p
) != 0) &&
200 !secflag_isset(p
->p_secflags
.psf_inherit
, flag
))
203 secflag_set(&p
->p_secflags
.psf_effective
, flag
);
210 * Map in the executable pointed to by vp. Returns 0 on success.
213 mapexec_brand(vnode_t
*vp
, uarg_t
*args
, Ehdr
*ehdr
, Addr
*uphdr_vaddr
,
214 intptr_t *voffset
, caddr_t exec_file
, int *interp
, caddr_t
*bssbase
,
215 caddr_t
*brkbase
, size_t *brksize
, uintptr_t *lddatap
)
219 caddr_t phdrbase
= NULL
;
221 int nshdrs
, shstrndx
, nphdrs
;
225 Phdr
*dynphdr
= NULL
;
226 Phdr
*dtrphdr
= NULL
;
234 if (error
= execpermissions(vp
, &vat
, args
)) {
235 uprintf("%s: Cannot execute %s\n", exec_file
, args
->pathname
);
239 if ((error
= getelfhead(vp
, CRED(), ehdr
, &nshdrs
, &shstrndx
,
241 (error
= getelfphdr(vp
, CRED(), ehdr
, nphdrs
, &phdrbase
,
243 uprintf("%s: Cannot read %s\n", exec_file
, args
->pathname
);
247 if ((len
= elfsize(ehdr
, nphdrs
, phdrbase
, &lddata
)) == 0) {
248 uprintf("%s: Nothing to load in %s", exec_file
, args
->pathname
);
249 kmem_free(phdrbase
, phdrsize
);
255 if (error
= mapelfexec(vp
, ehdr
, nphdrs
, phdrbase
, &uphdr
, &dynphdr
,
256 &junk
, &dtrphdr
, NULL
, bssbase
, brkbase
, voffset
, &minaddr
,
257 len
, &execsz
, brksize
)) {
258 uprintf("%s: Cannot map %s\n", exec_file
, args
->pathname
);
259 kmem_free(phdrbase
, phdrsize
);
264 * Inform our caller if the executable needs an interpreter.
266 *interp
= (dynphdr
== NULL
) ? 0 : 1;
269 * If this is a statically linked executable, voffset should indicate
270 * the address of the executable itself (it normally holds the address
271 * of the interpreter).
273 if (ehdr
->e_type
== ET_EXEC
&& *interp
== 0)
277 *uphdr_vaddr
= uphdr
->p_vaddr
;
279 *uphdr_vaddr
= (Addr
)-1;
282 kmem_free(phdrbase
, phdrsize
);
288 elfexec(vnode_t
*vp
, execa_t
*uap
, uarg_t
*args
, intpdata_t
*idatap
,
289 int level
, long *execsz
, int setid
, caddr_t exec_file
, cred_t
*cred
,
292 caddr_t phdrbase
= NULL
;
302 Phdr
*intphdr
= NULL
;
303 Phdr
*dynamicphdr
= NULL
;
312 Phdr
*dataphdrp
= NULL
;
314 Phdr
*capphdr
= NULL
;
323 struct proc
*p
= ttoproc(curthread
);
324 struct user
*up
= PTOU(p
);
327 aux_entry_t elfargs
[__KERN_NAUXV_IMPL
];
328 char dl_name
[MAXPATHLEN
];
329 char pathbuf
[MAXPATHLEN
];
331 struct execenv exenv
;
332 } *bigwad
; /* kmem_alloc this behemoth so we don't blow stack */
334 int nshdrs
, shstrndx
, nphdrs
;
340 ASSERT(p
->p_model
== DATAMODEL_ILP32
|| p
->p_model
== DATAMODEL_LP64
);
342 bigwad
= kmem_alloc(sizeof (struct bigwad
), KM_SLEEP
);
343 ehdrp
= &bigwad
->ehdr
;
344 dlnp
= bigwad
->dl_name
;
345 pathbufp
= bigwad
->pathbuf
;
348 * Obtain ELF and program header information.
350 if ((error
= getelfhead(vp
, CRED(), ehdrp
, &nshdrs
, &shstrndx
,
352 (error
= getelfphdr(vp
, CRED(), ehdrp
, nphdrs
, &phdrbase
,
357 * Prevent executing an ELF file that has no entry point.
359 if (ehdrp
->e_entry
== 0) {
360 uprintf("%s: Bad entry point\n", exec_file
);
365 * Put data model that we're exec-ing to into the args passed to
366 * exec_args(), so it will know what it is copying to on new stack.
367 * Now that we know whether we are exec-ing a 32-bit or 64-bit
368 * executable, we can set execsz with the appropriate NCARGS.
371 if (ehdrp
->e_ident
[EI_CLASS
] == ELFCLASS32
) {
372 args
->to_model
= DATAMODEL_ILP32
;
373 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS32
-1);
375 args
->to_model
= DATAMODEL_LP64
;
376 args
->stk_prot
&= ~PROT_EXEC
;
377 #if defined(__i386) || defined(__amd64)
378 args
->dat_prot
&= ~PROT_EXEC
;
380 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS64
-1);
383 args
->to_model
= DATAMODEL_ILP32
;
384 *execsz
= btopr(SINCR
) + btopr(SSIZE
) + btopr(NCARGS
-1);
388 * We delay invoking the brand callback until we've figured out
389 * what kind of elf binary we're trying to run, 32-bit or 64-bit.
390 * We do this because now the brand library can just check
391 * args->to_model to see if the target is 32-bit or 64-bit without
392 * having do duplicate all the code above.
394 * The level checks associated with brand handling below are used to
395 * prevent a loop since the brand elfexec function typically comes back
396 * through this function. We must check <= here since the nested
397 * handling in the #! interpreter code will increment the level before
398 * calling gexec to run the final elfexec interpreter.
400 if ((level
<= INTP_MAXDEPTH
) &&
401 (brand_action
!= EBA_NATIVE
) && (PROC_IS_BRANDED(p
))) {
402 error
= BROP(p
)->b_elfexec(vp
, uap
, args
,
403 idatap
, level
+ 1, execsz
, setid
, exec_file
, cred
,
409 * Determine aux size now so that stack can be built
410 * in one shot (except actual copyout of aux image),
411 * determine any non-default stack protections,
412 * and still have this code be machine independent.
414 hsize
= ehdrp
->e_phentsize
;
415 phdrp
= (Phdr
*)phdrbase
;
416 for (i
= nphdrs
; i
> 0; i
--) {
417 switch (phdrp
->p_type
) {
419 hasauxv
= hasintp
= 1;
425 args
->stk_prot
= PROT_USER
;
426 if (phdrp
->p_flags
& PF_R
)
427 args
->stk_prot
|= PROT_READ
;
428 if (phdrp
->p_flags
& PF_W
)
429 args
->stk_prot
|= PROT_WRITE
;
430 if (phdrp
->p_flags
& PF_X
)
431 args
->stk_prot
|= PROT_EXEC
;
443 phdrp
= (Phdr
*)((caddr_t
)phdrp
+ hsize
);
446 if (ehdrp
->e_type
!= ET_EXEC
) {
451 /* Copy BSS permissions to args->dat_prot */
452 if (dataphdrp
!= NULL
) {
453 args
->dat_prot
= PROT_USER
;
454 if (dataphdrp
->p_flags
& PF_R
)
455 args
->dat_prot
|= PROT_READ
;
456 if (dataphdrp
->p_flags
& PF_W
)
457 args
->dat_prot
|= PROT_WRITE
;
458 if (dataphdrp
->p_flags
& PF_X
)
459 args
->dat_prot
|= PROT_EXEC
;
463 * If a auxvector will be required - reserve the space for
464 * it now. This may be increased by exec_args if there are
465 * ISA-specific types (included in __KERN_NAUXV_IMPL).
469 * If a AUX vector is being built - the base AUX
478 * AT_SUN_PLATFORM (added in stk_copyout)
479 * AT_SUN_EXECNAME (added in stk_copyout)
484 if (hasintp
&& hasu
) {
486 * Has PT_INTERP & PT_PHDR - the auxvectors that
497 args
->auxsize
= (9 + 5) * sizeof (aux_entry_t
);
498 } else if (hasintp
) {
500 * Has PT_INTERP but no PT_PHDR
507 args
->auxsize
= (9 + 2) * sizeof (aux_entry_t
);
509 args
->auxsize
= 9 * sizeof (aux_entry_t
);
516 * If this binary is using an emulator, we need to add an
517 * AT_SUN_EMULATOR aux entry.
519 if (args
->emulator
!= NULL
)
520 args
->auxsize
+= sizeof (aux_entry_t
);
523 * On supported kernels (x86_64) make room in the auxv for the
524 * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems
525 * which do not provide such functionality.
527 * Additionally cover the floating point information AT_SUN_FPSIZE and
531 args
->auxsize
+= 3 * sizeof (aux_entry_t
);
532 #endif /* defined(__amd64) */
534 if ((brand_action
!= EBA_NATIVE
) && (PROC_IS_BRANDED(p
))) {
537 * We will be adding 4 entries to the aux vectors. One for
538 * the the brandname and 3 for the brand specific aux vectors.
540 args
->auxsize
+= 4 * sizeof (aux_entry_t
);
543 /* If the binary has an explicit ASLR flag, it must be honoured */
544 if ((dynamicphdr
!= NULL
) &&
545 (dynamicphdr
->p_filesz
> 0)) {
549 #define DYN_STRIDE 100
550 for (i
= 0; i
< dynamicphdr
->p_filesz
;
551 i
+= sizeof (*dyn
) * DYN_STRIDE
) {
552 int ndyns
= (dynamicphdr
->p_filesz
- i
) / sizeof (*dyn
);
555 ndyns
= MIN(DYN_STRIDE
, ndyns
);
556 dynsize
= ndyns
* sizeof (*dyn
);
558 dyn
= kmem_alloc(dynsize
, KM_SLEEP
);
560 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)dyn
,
561 dynsize
, (offset_t
)(dynamicphdr
->p_offset
+ i
),
562 UIO_SYSSPACE
, 0, (rlim64_t
)0,
563 CRED(), &resid
)) != 0) {
564 uprintf("%s: cannot read .dynamic section\n",
569 for (dp
= dyn
; dp
< (dyn
+ ndyns
); dp
++) {
570 if (dp
->d_tag
== DT_SUNW_ASLR
) {
571 if ((error
= handle_secflag_dt(p
,
573 dp
->d_un
.d_val
)) != 0) {
574 uprintf("%s: error setting "
575 "security-flag from "
576 "DT_SUNW_ASLR: %d\n",
583 kmem_free(dyn
, dynsize
);
587 /* Hardware/Software capabilities */
588 if (capphdr
!= NULL
&&
589 (capsize
= capphdr
->p_filesz
) > 0 &&
590 capsize
<= 16 * sizeof (*cap
)) {
591 int ncaps
= capsize
/ sizeof (*cap
);
594 cap
= kmem_alloc(capsize
, KM_SLEEP
);
595 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)cap
,
596 capsize
, (offset_t
)capphdr
->p_offset
,
597 UIO_SYSSPACE
, 0, (rlim64_t
)0, CRED(), &resid
)) != 0) {
598 uprintf("%s: Cannot read capabilities section\n",
602 for (cp
= cap
; cp
< cap
+ ncaps
; cp
++) {
603 if (cp
->c_tag
== CA_SUNW_SF_1
&&
604 (cp
->c_un
.c_val
& SF1_SUNW_ADDR32
)) {
605 if (args
->to_model
== DATAMODEL_LP64
)
612 aux
= bigwad
->elfargs
;
614 * Move args to the user's stack.
615 * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
617 if ((error
= exec_args(uap
, args
, idatap
, (void **)&aux
)) != 0) {
624 /* we're single threaded after this point */
627 * If this is an ET_DYN executable (shared object),
628 * determine its memory size so that mapelfexec() can load it.
630 if (ehdrp
->e_type
== ET_DYN
)
631 len
= elfsize(ehdrp
, nphdrs
, phdrbase
, NULL
);
637 if ((error
= mapelfexec(vp
, ehdrp
, nphdrs
, phdrbase
, &uphdr
, &intphdr
,
638 &stphdr
, &dtrphdr
, dataphdrp
, &bssbase
, &brkbase
, &voffset
, NULL
,
639 len
, execsz
, &brksize
)) != 0)
642 if (uphdr
!= NULL
&& intphdr
== NULL
)
645 if (dtrphdr
!= NULL
&& dtrace_safe_phdr(dtrphdr
, args
, voffset
) != 0) {
646 uprintf("%s: Bad DTrace phdr in %s\n", exec_file
, exec_file
);
650 if (intphdr
!= NULL
) {
656 dlnsize
= intphdr
->p_filesz
;
658 if (dlnsize
> MAXPATHLEN
|| dlnsize
<= 0)
662 * Read in "interpreter" pathname.
664 if ((error
= vn_rdwr(UIO_READ
, vp
, dlnp
, intphdr
->p_filesz
,
665 (offset_t
)intphdr
->p_offset
, UIO_SYSSPACE
, 0, (rlim64_t
)0,
666 CRED(), &resid
)) != 0) {
667 uprintf("%s: Cannot obtain interpreter pathname\n",
672 if (resid
!= 0 || dlnp
[dlnsize
- 1] != '\0')
676 * Search for '$ORIGIN' token in interpreter path.
677 * If found, expand it.
679 for (p
= dlnp
; p
= strchr(p
, '$'); ) {
683 if (strncmp(++p
, ORIGIN_STR
, ORIGIN_STR_SIZE
))
687 * We don't support $ORIGIN on setid programs to close
688 * a potential attack vector.
690 if ((setid
& EXECSETID_SETID
) != 0) {
698 bcopy(dlnp
, pathbufp
, len
);
701 if (_ptr
= strrchr(args
->pathname
, '/')) {
702 len
= _ptr
- args
->pathname
;
703 if ((curlen
+ len
) > MAXPATHLEN
)
706 bcopy(args
->pathname
, &pathbufp
[curlen
], len
);
710 * executable is a basename found in the
711 * current directory. So - just substitue
714 pathbufp
[curlen
] = '.';
717 p
+= ORIGIN_STR_SIZE
;
720 if ((curlen
+ len
) > MAXPATHLEN
)
722 bcopy(p
, &pathbufp
[curlen
], len
);
724 pathbufp
[curlen
++] = '\0';
725 bcopy(pathbufp
, dlnp
, curlen
);
729 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
730 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
731 * Just in case /usr is not mounted, change it now.
733 if (strcmp(dlnp
, USR_LIB_RTLD
) == 0)
735 error
= lookupname(dlnp
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &nvp
);
736 if (error
&& dlnp
!= bigwad
->dl_name
) {
737 /* new kernel, old user-level */
738 error
= lookupname(dlnp
-= 4, UIO_SYSSPACE
, FOLLOW
,
742 uprintf("%s: Cannot find %s\n", exec_file
, dlnp
);
747 * Setup the "aux" vector.
750 if (ehdrp
->e_type
== ET_DYN
) {
751 /* don't use the first page */
752 bigwad
->exenv
.ex_brkbase
= (caddr_t
)PAGESIZE
;
753 bigwad
->exenv
.ex_bssbase
= (caddr_t
)PAGESIZE
;
755 bigwad
->exenv
.ex_bssbase
= bssbase
;
756 bigwad
->exenv
.ex_brkbase
= brkbase
;
758 bigwad
->exenv
.ex_brksize
= brksize
;
759 bigwad
->exenv
.ex_magic
= elfmagic
;
760 bigwad
->exenv
.ex_vp
= vp
;
761 setexecenv(&bigwad
->exenv
);
763 ADDAUX(aux
, AT_PHDR
, uphdr
->p_vaddr
+ voffset
)
764 ADDAUX(aux
, AT_PHENT
, ehdrp
->e_phentsize
)
765 ADDAUX(aux
, AT_PHNUM
, nphdrs
)
766 ADDAUX(aux
, AT_ENTRY
, ehdrp
->e_entry
+ voffset
)
768 if ((error
= execopen(&vp
, &fd
)) != 0) {
773 ADDAUX(aux
, AT_EXECFD
, fd
)
776 if ((error
= execpermissions(nvp
, &bigwad
->vattr
, args
)) != 0) {
778 uprintf("%s: Cannot execute %s\n", exec_file
, dlnp
);
783 * Now obtain the ELF header along with the entire program
784 * header contained in "nvp".
786 kmem_free(phdrbase
, phdrsize
);
788 if ((error
= getelfhead(nvp
, CRED(), ehdrp
, &nshdrs
,
789 &shstrndx
, &nphdrs
)) != 0 ||
790 (error
= getelfphdr(nvp
, CRED(), ehdrp
, nphdrs
, &phdrbase
,
793 uprintf("%s: Cannot read %s\n", exec_file
, dlnp
);
798 * Determine memory size of the "interpreter's" loadable
799 * sections. This size is then used to obtain the virtual
800 * address of a hole, in the user's address space, large
801 * enough to map the "interpreter".
803 if ((len
= elfsize(ehdrp
, nphdrs
, phdrbase
, &lddata
)) == 0) {
805 uprintf("%s: Nothing to load in %s\n", exec_file
, dlnp
);
811 error
= mapelfexec(nvp
, ehdrp
, nphdrs
, phdrbase
, &junk
, &junk
,
812 &junk
, &dtrphdr
, NULL
, NULL
, NULL
, &voffset
, NULL
, len
,
814 if (error
|| junk
!= NULL
) {
816 uprintf("%s: Cannot map %s\n", exec_file
, dlnp
);
821 * We use the DTrace program header to initialize the
822 * architecture-specific user per-LWP location. The dtrace
823 * fasttrap provider requires ready access to per-LWP scratch
824 * space. We assume that there is only one such program header
825 * in the interpreter.
827 if (dtrphdr
!= NULL
&&
828 dtrace_safe_phdr(dtrphdr
, args
, voffset
) != 0) {
830 uprintf("%s: Bad DTrace phdr in %s\n", exec_file
, dlnp
);
835 ADDAUX(aux
, AT_SUN_LDDATA
, voffset
+ lddata
)
839 int auxf
= AF_SUN_HWCAPVERIFY
;
844 * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
847 ADDAUX(aux
, AT_BASE
, voffset
)
848 ADDAUX(aux
, AT_FLAGS
, at_flags
)
849 ADDAUX(aux
, AT_PAGESZ
, PAGESIZE
)
851 * Linker flags. (security)
852 * p_flag not yet set at this time.
853 * We rely on gexec() to provide us with the information.
854 * If the application is set-uid but this is not reflected
855 * in a mismatch between real/effective uids/gids, then
856 * don't treat this as a set-uid exec. So we care about
857 * the EXECSETID_UGIDS flag but not the ...SETID flag.
859 if ((setid
&= ~EXECSETID_SETID
) != 0)
860 auxf
|= AF_SUN_SETUGID
;
863 * If we're running a native process from within a branded
864 * zone under pfexec then we clear the AF_SUN_SETUGID flag so
865 * that the native ld.so.1 is able to link with the native
866 * libraries instead of using the brand libraries that are
867 * installed in the zone. We only do this for processes
868 * which we trust because we see they are already running
869 * under pfexec (where uid != euid). This prevents a
870 * malicious user within the zone from crafting a wrapper to
871 * run native suid commands with unsecure libraries interposed.
873 if ((brand_action
== EBA_NATIVE
) && (PROC_IS_BRANDED(p
) &&
874 (setid
&= ~EXECSETID_SETID
) != 0))
875 auxf
&= ~AF_SUN_SETUGID
;
878 * Record the user addr of the auxflags aux vector entry
879 * since brands may optionally want to manipulate this field.
881 args
->auxp_auxflags
=
882 (char *)((char *)args
->stackend
+
883 ((char *)&aux
->a_type
-
884 (char *)bigwad
->elfargs
));
885 ADDAUX(aux
, AT_SUN_AUXFLAGS
, auxf
);
888 * Hardware capability flag word (performance hints)
889 * Used for choosing faster library routines.
890 * (Potentially different between 32-bit and 64-bit ABIs)
893 if (args
->to_model
== DATAMODEL_NATIVE
) {
894 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap
)
895 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap_2
)
897 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap32
)
898 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap32_2
)
901 ADDAUX(aux
, AT_SUN_HWCAP
, auxv_hwcap
)
902 ADDAUX(aux
, AT_SUN_HWCAP2
, auxv_hwcap_2
)
906 * Reserve space for the brand-private aux vectors,
907 * and record the user addr of that space.
910 (char *)((char *)args
->stackend
+
911 ((char *)&aux
->a_type
-
912 (char *)bigwad
->elfargs
));
913 ADDAUX(aux
, AT_SUN_BRAND_AUX1
, 0)
914 ADDAUX(aux
, AT_SUN_BRAND_AUX2
, 0)
915 ADDAUX(aux
, AT_SUN_BRAND_AUX3
, 0)
919 * Add the comm page auxv entry, mapping it in if needed. Also
920 * take care of the FPU entries.
923 if (args
->commpage
!= NULL
||
924 (args
->commpage
= (uintptr_t)comm_page_mapin()) != NULL
) {
925 ADDAUX(aux
, AT_SUN_COMMPAGE
, args
->commpage
)
928 * If the comm page cannot be mapped, pad out the auxv
929 * to satisfy later size checks.
931 ADDAUX(aux
, AT_NULL
, 0)
934 fptype
= AT_386_FPINFO_NONE
;
935 fpu_auxv_info(&fptype
, &fpsize
);
936 if (fptype
!= AT_386_FPINFO_NONE
) {
937 ADDAUX(aux
, AT_SUN_FPTYPE
, fptype
)
938 ADDAUX(aux
, AT_SUN_FPSIZE
, fpsize
)
940 ADDAUX(aux
, AT_NULL
, 0)
941 ADDAUX(aux
, AT_NULL
, 0)
943 #endif /* defined(__amd64) */
945 ADDAUX(aux
, AT_NULL
, 0)
946 postfixsize
= (char *)aux
- (char *)bigwad
->elfargs
;
949 * We make assumptions above when we determine how many aux
950 * vector entries we will be adding. However, if we have an
951 * invalid elf file, it is possible that mapelfexec might
952 * behave differently (but not return an error), in which case
953 * the number of aux entries we actually add will be different.
954 * We detect that now and error out.
956 if (postfixsize
!= args
->auxsize
) {
957 DTRACE_PROBE2(elfexec_badaux
, int, postfixsize
,
961 ASSERT(postfixsize
<= __KERN_NAUXV_IMPL
* sizeof (aux_entry_t
));
965 * For the 64-bit kernel, the limit is big enough that rounding it up
966 * to a page can overflow the 64-bit limit, so we check for btopr()
967 * overflowing here by comparing it with the unrounded limit in pages.
968 * If it hasn't overflowed, compare the exec size with the rounded up
969 * limit in pages. Otherwise, just compare with the unrounded limit.
971 limit
= btop(p
->p_vmem_ctl
);
972 roundlimit
= btopr(p
->p_vmem_ctl
);
973 if ((roundlimit
> limit
&& *execsz
> roundlimit
) ||
974 (roundlimit
< limit
&& *execsz
> limit
)) {
975 mutex_enter(&p
->p_lock
);
976 (void) rctl_action(rctlproc_legacy
[RLIMIT_VMEM
], p
->p_rctls
, p
,
978 mutex_exit(&p
->p_lock
);
983 bzero(up
->u_auxv
, sizeof (up
->u_auxv
));
984 up
->u_commpagep
= args
->commpage
;
989 * Copy the aux vector to the user stack.
991 error
= execpoststack(args
, bigwad
->elfargs
, postfixsize
);
996 * Copy auxv to the process's user structure for use by /proc.
997 * If this is a branded process, the brand's exec routine will
998 * copy it's private entries to the user structure later. It
999 * relies on the fact that the blank entries are at the end.
1001 num_auxv
= postfixsize
/ sizeof (aux_entry_t
);
1002 ASSERT(num_auxv
<= sizeof (up
->u_auxv
) / sizeof (auxv_t
));
1003 aux
= bigwad
->elfargs
;
1004 for (i
= 0; i
< num_auxv
; i
++) {
1005 up
->u_auxv
[i
].a_type
= aux
[i
].a_type
;
1006 up
->u_auxv
[i
].a_un
.a_val
= (aux_val_t
)aux
[i
].a_un
.a_val
;
1011 * Pass back the starting address so we can set the program counter.
1013 args
->entry
= (uintptr_t)(ehdrp
->e_entry
+ voffset
);
1016 if (ehdrp
->e_type
== ET_DYN
) {
1018 * If we are executing a shared library which doesn't
1019 * have a interpreter (probably ld.so.1) then
1020 * we don't set the brkbase now. Instead we
1021 * delay it's setting until the first call
1022 * via grow.c::brk(). This permits ld.so.1 to
1023 * initialize brkbase to the tail of the executable it
1024 * loads (which is where it needs to be).
1026 bigwad
->exenv
.ex_brkbase
= (caddr_t
)0;
1027 bigwad
->exenv
.ex_bssbase
= (caddr_t
)0;
1028 bigwad
->exenv
.ex_brksize
= 0;
1030 bigwad
->exenv
.ex_brkbase
= brkbase
;
1031 bigwad
->exenv
.ex_bssbase
= bssbase
;
1032 bigwad
->exenv
.ex_brksize
= brksize
;
1034 bigwad
->exenv
.ex_magic
= elfmagic
;
1035 bigwad
->exenv
.ex_vp
= vp
;
1036 setexecenv(&bigwad
->exenv
);
1043 if (fd
!= -1) /* did we open the a.out yet */
1044 (void) execclose(fd
);
1046 psignal(p
, SIGKILL
);
1051 if (phdrbase
!= NULL
)
1052 kmem_free(phdrbase
, phdrsize
);
1054 kmem_free(cap
, capsize
);
1055 kmem_free(bigwad
, sizeof (struct bigwad
));
1060 * Compute the memory size requirement for the ELF file.
1063 elfsize(Ehdr
*ehdrp
, int nphdrs
, caddr_t phdrbase
, uintptr_t *lddata
)
1066 Phdr
*phdrp
= (Phdr
*)phdrbase
;
1067 int hsize
= ehdrp
->e_phentsize
;
1069 int dfirst
= 1; /* first data segment */
1070 uintptr_t loaddr
= 0;
1071 uintptr_t hiaddr
= 0;
1075 for (i
= nphdrs
; i
> 0; i
--) {
1076 if (phdrp
->p_type
== PT_LOAD
) {
1077 lo
= phdrp
->p_vaddr
;
1078 hi
= lo
+ phdrp
->p_memsz
;
1091 * save the address of the first data segment
1092 * of a object - used for the AT_SUNW_LDDATA
1095 if ((lddata
!= NULL
) && dfirst
&&
1096 (phdrp
->p_flags
& PF_W
)) {
1101 phdrp
= (Phdr
*)((caddr_t
)phdrp
+ hsize
);
1104 len
= hiaddr
- (loaddr
& PAGEMASK
);
1105 len
= roundup(len
, PAGESIZE
);
1111 * Read in the ELF header and program header table.
1113 * ENOEXEC File format is not recognized
1114 * EINVAL Format recognized but execution not supported
1117 getelfhead(vnode_t
*vp
, cred_t
*credp
, Ehdr
*ehdr
, int *nshdrs
, int *shstrndx
,
1124 * We got here by the first two bytes in ident,
1125 * now read the entire ELF header.
1127 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)ehdr
,
1128 sizeof (Ehdr
), (offset_t
)0, UIO_SYSSPACE
, 0,
1129 (rlim64_t
)0, credp
, &resid
)) != 0)
1133 * Since a separate version is compiled for handling 32-bit and
1134 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1135 * doesn't need to be able to deal with 32-bit ELF files.
1138 ehdr
->e_ident
[EI_MAG2
] != ELFMAG2
||
1139 ehdr
->e_ident
[EI_MAG3
] != ELFMAG3
)
1142 if ((ehdr
->e_type
!= ET_EXEC
&& ehdr
->e_type
!= ET_DYN
) ||
1143 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1144 ehdr
->e_ident
[EI_CLASS
] != ELFCLASS32
||
1146 ehdr
->e_ident
[EI_CLASS
] != ELFCLASS64
||
1148 !elfheadcheck(ehdr
->e_ident
[EI_DATA
], ehdr
->e_machine
,
1152 *nshdrs
= ehdr
->e_shnum
;
1153 *shstrndx
= ehdr
->e_shstrndx
;
1154 *nphdrs
= ehdr
->e_phnum
;
1157 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1158 * to read in the section header at index zero to acces the true
1159 * values for those fields.
1161 if ((*nshdrs
== 0 && ehdr
->e_shoff
!= 0) ||
1162 *shstrndx
== SHN_XINDEX
|| *nphdrs
== PN_XNUM
) {
1165 if (ehdr
->e_shoff
== 0)
1168 if ((error
= vn_rdwr(UIO_READ
, vp
, (caddr_t
)&shdr
,
1169 sizeof (shdr
), (offset_t
)ehdr
->e_shoff
, UIO_SYSSPACE
, 0,
1170 (rlim64_t
)0, credp
, &resid
)) != 0)
1174 *nshdrs
= shdr
.sh_size
;
1175 if (*shstrndx
== SHN_XINDEX
)
1176 *shstrndx
= shdr
.sh_link
;
1177 if (*nphdrs
== PN_XNUM
&& shdr
.sh_info
!= 0)
1178 *nphdrs
= shdr
.sh_info
;
1184 #ifdef _ELF32_COMPAT
1185 extern size_t elf_nphdr_max
;
1187 size_t elf_nphdr_max
= 1000;
1191 getelfphdr(vnode_t
*vp
, cred_t
*credp
, const Ehdr
*ehdr
, int nphdrs
,
1192 caddr_t
*phbasep
, ssize_t
*phsizep
)
1194 ssize_t resid
, minsize
;
1198 * Since we're going to be using e_phentsize to iterate down the
1199 * array of program headers, it must be 8-byte aligned or else
1200 * a we might cause a misaligned access. We use all members through
1201 * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1202 * e_phentsize must be at least large enough to include those
1205 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1206 minsize
= offsetof(Phdr
, p_flags
) + sizeof (((Phdr
*)NULL
)->p_flags
);
1208 minsize
= offsetof(Phdr
, p_memsz
) + sizeof (((Phdr
*)NULL
)->p_memsz
);
1210 if (ehdr
->e_phentsize
< minsize
|| (ehdr
->e_phentsize
& 3))
1213 *phsizep
= nphdrs
* ehdr
->e_phentsize
;
1215 if (*phsizep
> sizeof (Phdr
) * elf_nphdr_max
) {
1216 if ((*phbasep
= kmem_alloc(*phsizep
, KM_NOSLEEP
)) == NULL
)
1219 *phbasep
= kmem_alloc(*phsizep
, KM_SLEEP
);
1222 if ((err
= vn_rdwr(UIO_READ
, vp
, *phbasep
, *phsizep
,
1223 (offset_t
)ehdr
->e_phoff
, UIO_SYSSPACE
, 0, (rlim64_t
)0,
1224 credp
, &resid
)) != 0) {
1225 kmem_free(*phbasep
, *phsizep
);
1233 #ifdef _ELF32_COMPAT
1234 extern size_t elf_nshdr_max
;
1235 extern size_t elf_shstrtab_max
;
1237 size_t elf_nshdr_max
= 10000;
1238 size_t elf_shstrtab_max
= 100 * 1024;
1243 getelfshdr(vnode_t
*vp
, cred_t
*credp
, const Ehdr
*ehdr
,
1244 int nshdrs
, int shstrndx
, caddr_t
*shbasep
, ssize_t
*shsizep
,
1245 char **shstrbasep
, ssize_t
*shstrsizep
)
1247 ssize_t resid
, minsize
;
1252 * Since we're going to be using e_shentsize to iterate down the
1253 * array of section headers, it must be 8-byte aligned or else
1254 * a we might cause a misaligned access. We use all members through
1255 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1256 * must be at least large enough to include that member. The index
1257 * of the string table section must also be valid.
1259 minsize
= offsetof(Shdr
, sh_entsize
) + sizeof (shdr
->sh_entsize
);
1260 if (ehdr
->e_shentsize
< minsize
|| (ehdr
->e_shentsize
& 3) ||
1264 *shsizep
= nshdrs
* ehdr
->e_shentsize
;
1266 if (*shsizep
> sizeof (Shdr
) * elf_nshdr_max
) {
1267 if ((*shbasep
= kmem_alloc(*shsizep
, KM_NOSLEEP
)) == NULL
)
1270 *shbasep
= kmem_alloc(*shsizep
, KM_SLEEP
);
1273 if ((err
= vn_rdwr(UIO_READ
, vp
, *shbasep
, *shsizep
,
1274 (offset_t
)ehdr
->e_shoff
, UIO_SYSSPACE
, 0, (rlim64_t
)0,
1275 credp
, &resid
)) != 0) {
1276 kmem_free(*shbasep
, *shsizep
);
1281 * Pull the section string table out of the vnode; fail if the size
1284 shdr
= (Shdr
*)(*shbasep
+ shstrndx
* ehdr
->e_shentsize
);
1285 if ((*shstrsizep
= shdr
->sh_size
) == 0) {
1286 kmem_free(*shbasep
, *shsizep
);
1290 if (*shstrsizep
> elf_shstrtab_max
) {
1291 if ((*shstrbasep
= kmem_alloc(*shstrsizep
,
1292 KM_NOSLEEP
)) == NULL
) {
1293 kmem_free(*shbasep
, *shsizep
);
1297 *shstrbasep
= kmem_alloc(*shstrsizep
, KM_SLEEP
);
1300 if ((err
= vn_rdwr(UIO_READ
, vp
, *shstrbasep
, *shstrsizep
,
1301 (offset_t
)shdr
->sh_offset
, UIO_SYSSPACE
, 0, (rlim64_t
)0,
1302 credp
, &resid
)) != 0) {
1303 kmem_free(*shbasep
, *shsizep
);
1304 kmem_free(*shstrbasep
, *shstrsizep
);
1309 * Make sure the strtab is null-terminated to make sure we
1310 * don't run off the end of the table.
1312 (*shstrbasep
)[*shstrsizep
- 1] = '\0';
1338 caddr_t addr
= NULL
;
1343 int hsize
= ehdr
->e_phentsize
;
1344 caddr_t mintmp
= (caddr_t
)-1;
1345 extern int use_brk_lpg
;
1347 if (ehdr
->e_type
== ET_DYN
) {
1348 secflagset_t flags
= 0;
1350 * Obtain the virtual address of a hole in the
1351 * address space to map the "interpreter".
1353 if (secflag_enabled(curproc
, PROC_SEC_ASLR
))
1354 flags
|= _MAP_RANDOMIZE
;
1356 map_addr(&addr
, len
, (offset_t
)0, 1, flags
);
1359 *voffset
= (intptr_t)addr
;
1362 * Calculate the minimum vaddr so it can be subtracted out.
1363 * According to the ELF specification, since PT_LOAD sections
1364 * must be sorted by increasing p_vaddr values, this is
1365 * guaranteed to be the first PT_LOAD section.
1367 phdr
= (Phdr
*)phdrbase
;
1368 for (i
= nphdrs
; i
> 0; i
--) {
1369 if (phdr
->p_type
== PT_LOAD
) {
1370 *voffset
-= (uintptr_t)phdr
->p_vaddr
;
1373 phdr
= (Phdr
*)((caddr_t
)phdr
+ hsize
);
1379 phdr
= (Phdr
*)phdrbase
;
1380 for (i
= nphdrs
; i
> 0; i
--) {
1381 switch (phdr
->p_type
) {
1383 if ((*intphdr
!= NULL
) && (*uphdr
== NULL
))
1388 if (phdr
->p_flags
& PF_R
)
1390 if (phdr
->p_flags
& PF_W
)
1392 if (phdr
->p_flags
& PF_X
)
1395 addr
= (caddr_t
)((uintptr_t)phdr
->p_vaddr
+ *voffset
);
1398 * Keep track of the segment with the lowest starting
1404 zfodsz
= (size_t)phdr
->p_memsz
- phdr
->p_filesz
;
1406 offset
= phdr
->p_offset
;
1407 if (((uintptr_t)offset
& PAGEOFFSET
) ==
1408 ((uintptr_t)addr
& PAGEOFFSET
) &&
1409 (!(vp
->v_flag
& VNOMAP
))) {
1416 * Set the heap pagesize for OOB when the bss size
1417 * is known and use_brk_lpg is not 0.
1419 if (brksize
!= NULL
&& use_brk_lpg
&&
1420 zfodsz
!= 0 && phdr
== dataphdrp
&&
1421 (prot
& PROT_WRITE
)) {
1422 size_t tlen
= P2NPHASE((uintptr_t)addr
+
1423 phdr
->p_filesz
, PAGESIZE
);
1425 if (zfodsz
> tlen
) {
1426 curproc
->p_brkpageszc
=
1427 page_szc(map_pgsz(MAPPGSZ_HEAP
,
1428 curproc
, addr
+ phdr
->p_filesz
+
1429 tlen
, zfodsz
- tlen
, 0));
1433 if (curproc
->p_brkpageszc
!= 0 && phdr
== dataphdrp
&&
1434 (prot
& PROT_WRITE
)) {
1435 uint_t szc
= curproc
->p_brkpageszc
;
1436 size_t pgsz
= page_get_pagesize(szc
);
1437 caddr_t ebss
= addr
+ phdr
->p_memsz
;
1439 * If we need extra space to keep the BSS an
1440 * integral number of pages in size, some of
1441 * that space may fall beyond p_brkbase, so we
1442 * need to set p_brksize to account for it
1443 * being (logically) part of the brk.
1445 size_t extra_zfodsz
;
1447 ASSERT(pgsz
> PAGESIZE
);
1449 extra_zfodsz
= P2NPHASE((uintptr_t)ebss
, pgsz
);
1451 if (error
= execmap(vp
, addr
, phdr
->p_filesz
,
1452 zfodsz
+ extra_zfodsz
, phdr
->p_offset
,
1455 if (brksize
!= NULL
)
1456 *brksize
= extra_zfodsz
;
1458 if (error
= execmap(vp
, addr
, phdr
->p_filesz
,
1459 zfodsz
, phdr
->p_offset
, prot
, page
, 0))
1463 if (bssbase
!= NULL
&& addr
>= *bssbase
&&
1464 phdr
== dataphdrp
) {
1465 *bssbase
= addr
+ phdr
->p_filesz
;
1467 if (brkbase
!= NULL
&& addr
>= *brkbase
) {
1468 *brkbase
= addr
+ phdr
->p_memsz
;
1471 *execsz
+= btopr(phdr
->p_memsz
);
1503 phdr
= (Phdr
*)((caddr_t
)phdr
+ hsize
);
1506 if (minaddr
!= NULL
) {
1507 ASSERT(mintmp
!= (caddr_t
)-1);
1508 *minaddr
= (intptr_t)mintmp
;
1511 if (brkbase
!= NULL
&& secflag_enabled(curproc
, PROC_SEC_ASLR
)) {
1513 uintptr_t base
= (uintptr_t)*brkbase
;
1514 uintptr_t oend
= base
+ *brksize
;
1516 ASSERT(ISP2(aslr_max_brk_skew
));
1518 (void) random_get_pseudo_bytes((uint8_t *)&off
, sizeof (off
));
1519 base
+= P2PHASE(off
, aslr_max_brk_skew
);
1520 base
= P2ROUNDUP(base
, PAGESIZE
);
1521 *brkbase
= (caddr_t
)base
;
1523 * Above, we set *brksize to account for the possibility we
1524 * had to grow the 'brk' in padding out the BSS to a page
1527 * We now need to adjust that based on where we now are
1528 * actually putting the brk.
1531 *brksize
= oend
- base
;
1544 elfnote(vnode_t
*vp
, offset_t
*offsetp
, int type
, int descsz
, void *desc
,
1545 rlim64_t rlimit
, cred_t
*credp
)
1550 bzero(¬e
, sizeof (note
));
1551 bcopy("CORE", note
.name
, 4);
1552 note
.nhdr
.n_type
= type
;
1554 * The System V ABI states that n_namesz must be the length of the
1555 * string that follows the Nhdr structure including the terminating
1556 * null. The ABI also specifies that sufficient padding should be
1557 * included so that the description that follows the name string
1558 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1559 * respectively. However, since this change was not made correctly
1560 * at the time of the 64-bit port, both 32- and 64-bit binaries
1561 * descriptions are only guaranteed to begin on a 4-byte boundary.
1563 note
.nhdr
.n_namesz
= 5;
1564 note
.nhdr
.n_descsz
= roundup(descsz
, sizeof (Word
));
1566 if (error
= core_write(vp
, UIO_SYSSPACE
, *offsetp
, ¬e
,
1567 sizeof (note
), rlimit
, credp
))
1570 *offsetp
+= sizeof (note
);
1572 if (error
= core_write(vp
, UIO_SYSSPACE
, *offsetp
, desc
,
1573 note
.nhdr
.n_descsz
, rlimit
, credp
))
1576 *offsetp
+= note
.nhdr
.n_descsz
;
1581 * Copy the section data from one vnode to the section of another vnode.
1584 copy_scn(Shdr
*src
, vnode_t
*src_vp
, Shdr
*dst
, vnode_t
*dst_vp
, Off
*doffset
,
1585 void *buf
, size_t size
, cred_t
*credp
, rlim64_t rlimit
)
1588 size_t len
, n
= src
->sh_size
;
1593 if (vn_rdwr(UIO_READ
, src_vp
, buf
, len
, src
->sh_offset
+ off
,
1594 UIO_SYSSPACE
, 0, (rlim64_t
)0, credp
, &resid
) != 0 ||
1596 core_write(dst_vp
, UIO_SYSSPACE
, *doffset
+ off
,
1597 buf
, len
- resid
, rlimit
, credp
) != 0) {
1603 ASSERT(n
>= len
- resid
);
1609 *doffset
+= src
->sh_size
;
1612 #ifdef _ELF32_COMPAT
1613 extern size_t elf_datasz_max
;
1615 size_t elf_datasz_max
= 1 * 1024 * 1024;
1619 * This function processes mappings that correspond to load objects to
1620 * examine their respective sections for elfcore(). It's called once with
1621 * v set to NULL to count the number of sections that we're going to need
1622 * and then again with v set to some allocated buffer that we fill in with
1623 * all the section data.
1626 process_scns(core_content_t content
, proc_t
*p
, cred_t
*credp
, vnode_t
*vp
,
1627 Shdr
*v
, int nv
, rlim64_t rlimit
, Off
*doffsetp
, int *nshdrsp
)
1629 vnode_t
*lastvp
= NULL
;
1634 shstrtab_t shstrtab
;
1635 struct as
*as
= p
->p_as
;
1639 shstrtab_init(&shstrtab
);
1642 for (seg
= AS_SEGFIRST(as
); seg
!= NULL
; seg
= AS_SEGNEXT(as
, seg
)) {
1646 caddr_t saddr
= seg
->s_base
;
1652 int nshdrs
, shstrndx
, nphdrs
;
1667 * Since we're just looking for text segments of load
1668 * objects, we only care about the protection bits; we don't
1669 * care about the actual size of the segment so we use the
1670 * reserved size. If the segment's size is zero, there's
1671 * something fishy going on so we ignore this segment.
1673 if (seg
->s_ops
!= &segvn_ops
||
1674 SEGOP_GETVP(seg
, seg
->s_base
, &mvp
) != 0 ||
1675 mvp
== lastvp
|| mvp
== NULL
|| mvp
->v_type
!= VREG
||
1676 (segsize
= pr_getsegsize(seg
, 1)) == 0)
1679 eaddr
= saddr
+ segsize
;
1680 prot
= pr_getprot(seg
, 1, &tmp
, &saddr
, &naddr
, eaddr
);
1681 pr_getprot_done(&tmp
);
1684 * Skip this segment unless the protection bits look like
1685 * what we'd expect for a text segment.
1687 if ((prot
& (PROT_WRITE
| PROT_EXEC
)) != PROT_EXEC
)
1690 if (getelfhead(mvp
, credp
, &ehdr
, &nshdrs
, &shstrndx
,
1692 getelfshdr(mvp
, credp
, &ehdr
, nshdrs
, shstrndx
,
1693 &shbase
, &shsize
, &shstrbase
, &shstrsize
) != 0)
1696 off
= ehdr
.e_shentsize
;
1697 for (j
= 1; j
< nshdrs
; j
++, off
+= ehdr
.e_shentsize
) {
1698 Shdr
*symtab
= NULL
, *strtab
;
1700 shdr
= (Shdr
*)(shbase
+ off
);
1702 if (shdr
->sh_name
>= shstrsize
)
1705 name
= shstrbase
+ shdr
->sh_name
;
1707 if (strcmp(name
, shstrtab_data
[STR_CTF
]) == 0) {
1708 if ((content
& CC_CONTENT_CTF
) == 0 ||
1712 if (shdr
->sh_link
> 0 &&
1713 shdr
->sh_link
< nshdrs
) {
1714 symtab
= (Shdr
*)(shbase
+
1715 shdr
->sh_link
* ehdr
.e_shentsize
);
1718 if (v
!= NULL
&& i
< nv
- 1) {
1719 if (shdr
->sh_size
> datasz
&&
1720 shdr
->sh_size
<= elf_datasz_max
) {
1722 kmem_free(data
, datasz
);
1724 datasz
= shdr
->sh_size
;
1725 data
= kmem_alloc(datasz
,
1729 v
[i
].sh_name
= shstrtab_ndx(&shstrtab
,
1731 v
[i
].sh_addr
= (Addr
)(uintptr_t)saddr
;
1732 v
[i
].sh_type
= SHT_PROGBITS
;
1733 v
[i
].sh_addralign
= 4;
1734 *doffsetp
= roundup(*doffsetp
,
1736 v
[i
].sh_offset
= *doffsetp
;
1737 v
[i
].sh_size
= shdr
->sh_size
;
1738 if (symtab
== NULL
) {
1740 } else if (symtab
->sh_type
==
1746 v
[i
].sh_link
= i
+ 1;
1749 copy_scn(shdr
, mvp
, &v
[i
], vp
,
1750 doffsetp
, data
, datasz
, credp
,
1757 * We've already dumped the symtab.
1759 if (symtab
!= NULL
&&
1760 symtab
->sh_type
== SHT_SYMTAB
&&
1764 } else if (strcmp(name
,
1765 shstrtab_data
[STR_SYMTAB
]) == 0) {
1766 if ((content
& CC_CONTENT_SYMTAB
) == 0 ||
1773 if (symtab
!= NULL
) {
1774 if ((symtab
->sh_type
!= SHT_DYNSYM
&&
1775 symtab
->sh_type
!= SHT_SYMTAB
) ||
1776 symtab
->sh_link
== 0 ||
1777 symtab
->sh_link
>= nshdrs
)
1780 strtab
= (Shdr
*)(shbase
+
1781 symtab
->sh_link
* ehdr
.e_shentsize
);
1783 if (strtab
->sh_type
!= SHT_STRTAB
)
1786 if (v
!= NULL
&& i
< nv
- 2) {
1787 sz
= MAX(symtab
->sh_size
,
1790 sz
<= elf_datasz_max
) {
1792 kmem_free(data
, datasz
);
1795 data
= kmem_alloc(datasz
,
1799 if (symtab
->sh_type
== SHT_DYNSYM
) {
1800 v
[i
].sh_name
= shstrtab_ndx(
1801 &shstrtab
, STR_DYNSYM
);
1802 v
[i
+ 1].sh_name
= shstrtab_ndx(
1803 &shstrtab
, STR_DYNSTR
);
1805 v
[i
].sh_name
= shstrtab_ndx(
1806 &shstrtab
, STR_SYMTAB
);
1807 v
[i
+ 1].sh_name
= shstrtab_ndx(
1808 &shstrtab
, STR_STRTAB
);
1811 v
[i
].sh_type
= symtab
->sh_type
;
1812 v
[i
].sh_addr
= symtab
->sh_addr
;
1813 if (ehdr
.e_type
== ET_DYN
||
1816 (Addr
)(uintptr_t)saddr
;
1818 symtab
->sh_addralign
;
1819 *doffsetp
= roundup(*doffsetp
,
1821 v
[i
].sh_offset
= *doffsetp
;
1822 v
[i
].sh_size
= symtab
->sh_size
;
1823 v
[i
].sh_link
= i
+ 1;
1824 v
[i
].sh_entsize
= symtab
->sh_entsize
;
1825 v
[i
].sh_info
= symtab
->sh_info
;
1827 copy_scn(symtab
, mvp
, &v
[i
], vp
,
1828 doffsetp
, data
, datasz
, credp
,
1831 v
[i
+ 1].sh_type
= SHT_STRTAB
;
1832 v
[i
+ 1].sh_flags
= SHF_STRINGS
;
1833 v
[i
+ 1].sh_addr
= symtab
->sh_addr
;
1834 if (ehdr
.e_type
== ET_DYN
||
1835 v
[i
+ 1].sh_addr
== 0)
1837 (Addr
)(uintptr_t)saddr
;
1838 v
[i
+ 1].sh_addralign
=
1839 strtab
->sh_addralign
;
1840 *doffsetp
= roundup(*doffsetp
,
1841 v
[i
+ 1].sh_addralign
);
1842 v
[i
+ 1].sh_offset
= *doffsetp
;
1843 v
[i
+ 1].sh_size
= strtab
->sh_size
;
1845 copy_scn(strtab
, mvp
, &v
[i
+ 1], vp
,
1846 doffsetp
, data
, datasz
, credp
,
1850 if (symtab
->sh_type
== SHT_SYMTAB
)
1856 kmem_free(shstrbase
, shstrsize
);
1857 kmem_free(shbase
, shsize
);
1871 cmn_err(CE_WARN
, "elfcore: core dump failed for "
1872 "process %d; address space is changing", p
->p_pid
);
1877 v
[i
].sh_name
= shstrtab_ndx(&shstrtab
, STR_SHSTRTAB
);
1878 v
[i
].sh_size
= shstrtab_size(&shstrtab
);
1879 v
[i
].sh_addralign
= 1;
1880 *doffsetp
= roundup(*doffsetp
, v
[i
].sh_addralign
);
1881 v
[i
].sh_offset
= *doffsetp
;
1882 v
[i
].sh_flags
= SHF_STRINGS
;
1883 v
[i
].sh_type
= SHT_STRTAB
;
1885 if (v
[i
].sh_size
> datasz
) {
1887 kmem_free(data
, datasz
);
1889 datasz
= v
[i
].sh_size
;
1890 data
= kmem_alloc(datasz
,
1894 shstrtab_dump(&shstrtab
, data
);
1896 if ((error
= core_write(vp
, UIO_SYSSPACE
, *doffsetp
,
1897 data
, v
[i
].sh_size
, rlimit
, credp
)) != 0)
1900 *doffsetp
+= v
[i
].sh_size
;
1904 kmem_free(data
, datasz
);
1910 elfcore(vnode_t
*vp
, proc_t
*p
, cred_t
*credp
, rlim64_t rlimit
, int sig
,
1911 core_content_t content
)
1913 offset_t poffset
, soffset
;
1915 int error
, i
, nphdrs
, nshdrs
;
1918 struct as
*as
= p
->p_as
;
1925 size_t phdrsz
, shdrsz
;
1933 klwp_t
*lwp
= ttolwp(curthread
);
1937 * Make sure we have everything we need (registers, etc.).
1938 * All other lwps have already stopped and are in an orderly state.
1940 ASSERT(p
== ttoproc(curthread
));
1943 AS_LOCK_ENTER(as
, RW_WRITER
);
1944 nphdrs
= prnsegs(as
, 0) + 2; /* two CORE note sections */
1947 * Count the number of section headers we're going to need.
1950 if (content
& (CC_CONTENT_CTF
| CC_CONTENT_SYMTAB
)) {
1951 (void) process_scns(content
, p
, credp
, NULL
, NULL
, NULL
, 0,
1956 ASSERT(nshdrs
== 0 || nshdrs
> 1);
1959 * The core file contents may required zero section headers, but if
1960 * we overflow the 16 bits allotted to the program header count in
1961 * the ELF header, we'll need that program header at index zero.
1963 if (nshdrs
== 0 && nphdrs
>= PN_XNUM
)
1966 phdrsz
= nphdrs
* sizeof (Phdr
);
1967 shdrsz
= nshdrs
* sizeof (Shdr
);
1969 bigsize
= MAX(sizeof (*bigwad
), MAX(phdrsz
, shdrsz
));
1970 bigwad
= kmem_alloc(bigsize
, KM_SLEEP
);
1972 ehdr
= &bigwad
->ehdr
;
1973 bzero(ehdr
, sizeof (*ehdr
));
1975 ehdr
->e_ident
[EI_MAG0
] = ELFMAG0
;
1976 ehdr
->e_ident
[EI_MAG1
] = ELFMAG1
;
1977 ehdr
->e_ident
[EI_MAG2
] = ELFMAG2
;
1978 ehdr
->e_ident
[EI_MAG3
] = ELFMAG3
;
1979 ehdr
->e_ident
[EI_CLASS
] = ELFCLASS
;
1980 ehdr
->e_type
= ET_CORE
;
1982 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1984 #if defined(__sparc)
1985 ehdr
->e_ident
[EI_DATA
] = ELFDATA2MSB
;
1986 ehdr
->e_machine
= EM_SPARC
;
1987 #elif defined(__i386) || defined(__i386_COMPAT)
1988 ehdr
->e_ident
[EI_DATA
] = ELFDATA2LSB
;
1989 ehdr
->e_machine
= EM_386
;
1991 #error "no recognized machine type is defined"
1994 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1996 #if defined(__sparc)
1997 ehdr
->e_ident
[EI_DATA
] = ELFDATA2MSB
;
1998 ehdr
->e_machine
= EM_SPARCV9
;
1999 #elif defined(__amd64)
2000 ehdr
->e_ident
[EI_DATA
] = ELFDATA2LSB
;
2001 ehdr
->e_machine
= EM_AMD64
;
2003 #error "no recognized 64-bit machine type is defined"
2006 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2009 * If the count of program headers or section headers or the index
2010 * of the section string table can't fit in the mere 16 bits
2011 * shortsightedly allotted to them in the ELF header, we use the
2012 * extended formats and put the real values in the section header
2015 ehdr
->e_version
= EV_CURRENT
;
2016 ehdr
->e_ehsize
= sizeof (Ehdr
);
2018 if (nphdrs
>= PN_XNUM
)
2019 ehdr
->e_phnum
= PN_XNUM
;
2021 ehdr
->e_phnum
= (unsigned short)nphdrs
;
2023 ehdr
->e_phoff
= sizeof (Ehdr
);
2024 ehdr
->e_phentsize
= sizeof (Phdr
);
2027 if (nshdrs
>= SHN_LORESERVE
)
2030 ehdr
->e_shnum
= (unsigned short)nshdrs
;
2032 if (nshdrs
- 1 >= SHN_LORESERVE
)
2033 ehdr
->e_shstrndx
= SHN_XINDEX
;
2035 ehdr
->e_shstrndx
= (unsigned short)(nshdrs
- 1);
2037 ehdr
->e_shoff
= ehdr
->e_phoff
+ ehdr
->e_phentsize
* nphdrs
;
2038 ehdr
->e_shentsize
= sizeof (Shdr
);
2041 if (error
= core_write(vp
, UIO_SYSSPACE
, (offset_t
)0, ehdr
,
2042 sizeof (Ehdr
), rlimit
, credp
))
2045 poffset
= sizeof (Ehdr
);
2046 soffset
= sizeof (Ehdr
) + phdrsz
;
2047 doffset
= sizeof (Ehdr
) + phdrsz
+ shdrsz
;
2049 v
= &bigwad
->phdr
[0];
2052 setup_old_note_header(&v
[0], p
);
2053 v
[0].p_offset
= doffset
= roundup(doffset
, sizeof (Word
));
2054 doffset
+= v
[0].p_filesz
;
2056 setup_note_header(&v
[1], p
);
2057 v
[1].p_offset
= doffset
= roundup(doffset
, sizeof (Word
));
2058 doffset
+= v
[1].p_filesz
;
2060 mutex_enter(&p
->p_lock
);
2062 brkbase
= p
->p_brkbase
;
2063 brksize
= p
->p_brksize
;
2065 stkbase
= p
->p_usrstack
- p
->p_stksize
;
2066 stksize
= p
->p_stksize
;
2068 mutex_exit(&p
->p_lock
);
2070 AS_LOCK_ENTER(as
, RW_WRITER
);
2072 for (seg
= AS_SEGFIRST(as
); seg
!= NULL
; seg
= AS_SEGNEXT(as
, seg
)) {
2073 caddr_t eaddr
= seg
->s_base
+ pr_getsegsize(seg
, 0);
2074 caddr_t saddr
, naddr
;
2076 extern struct seg_ops segspt_shmops
;
2078 for (saddr
= seg
->s_base
; saddr
< eaddr
; saddr
= naddr
) {
2084 prot
= pr_getprot(seg
, 0, &tmp
, &saddr
, &naddr
, eaddr
);
2085 prot
&= PROT_READ
| PROT_WRITE
| PROT_EXEC
;
2086 if ((size
= (size_t)(naddr
- saddr
)) == 0)
2092 v
[i
].p_type
= PT_LOAD
;
2093 v
[i
].p_vaddr
= (Addr
)(uintptr_t)saddr
;
2094 v
[i
].p_memsz
= size
;
2095 if (prot
& PROT_READ
)
2096 v
[i
].p_flags
|= PF_R
;
2097 if (prot
& PROT_WRITE
)
2098 v
[i
].p_flags
|= PF_W
;
2099 if (prot
& PROT_EXEC
)
2100 v
[i
].p_flags
|= PF_X
;
2103 * Figure out which mappings to include in the core.
2105 type
= SEGOP_GETTYPE(seg
, saddr
);
2107 if (saddr
== stkbase
&& size
== stksize
) {
2108 if (!(content
& CC_CONTENT_STACK
))
2111 } else if (saddr
== brkbase
&& size
== brksize
) {
2112 if (!(content
& CC_CONTENT_HEAP
))
2115 } else if (seg
->s_ops
== &segspt_shmops
) {
2116 if (type
& MAP_NORESERVE
) {
2117 if (!(content
& CC_CONTENT_DISM
))
2120 if (!(content
& CC_CONTENT_ISM
))
2124 } else if (seg
->s_ops
!= &segvn_ops
) {
2127 } else if (type
& MAP_SHARED
) {
2128 if (shmgetid(p
, saddr
) != SHMID_NONE
) {
2129 if (!(content
& CC_CONTENT_SHM
))
2132 } else if (SEGOP_GETVP(seg
, seg
->s_base
,
2133 &mvp
) != 0 || mvp
== NULL
||
2134 mvp
->v_type
!= VREG
) {
2135 if (!(content
& CC_CONTENT_SHANON
))
2139 if (!(content
& CC_CONTENT_SHFILE
))
2143 } else if (SEGOP_GETVP(seg
, seg
->s_base
, &mvp
) != 0 ||
2144 mvp
== NULL
|| mvp
->v_type
!= VREG
) {
2145 if (!(content
& CC_CONTENT_ANON
))
2148 } else if (prot
== (PROT_READ
| PROT_EXEC
)) {
2149 if (!(content
& CC_CONTENT_TEXT
))
2152 } else if (prot
== PROT_READ
) {
2153 if (!(content
& CC_CONTENT_RODATA
))
2157 if (!(content
& CC_CONTENT_DATA
))
2161 doffset
= roundup(doffset
, sizeof (Word
));
2162 v
[i
].p_offset
= doffset
;
2163 v
[i
].p_filesz
= size
;
2168 ASSERT(tmp
== NULL
);
2172 if (overflow
|| i
!= nphdrs
) {
2173 if (ntries
++ == 0) {
2174 kmem_free(bigwad
, bigsize
);
2178 cmn_err(CE_WARN
, "elfcore: core dump failed for "
2179 "process %d; address space is changing", p
->p_pid
);
2184 if ((error
= core_write(vp
, UIO_SYSSPACE
, poffset
,
2185 v
, phdrsz
, rlimit
, credp
)) != 0)
2188 if ((error
= write_old_elfnotes(p
, sig
, vp
, v
[0].p_offset
, rlimit
,
2192 if ((error
= write_elfnotes(p
, sig
, vp
, v
[1].p_offset
, rlimit
,
2193 credp
, content
)) != 0)
2196 for (i
= 2; i
< nphdrs
; i
++) {
2197 prkillinfo_t killinfo
;
2201 if (v
[i
].p_filesz
== 0)
2205 * If dumping out this segment fails, rather than failing
2206 * the core dump entirely, we reset the size of the mapping
2207 * to zero to indicate that the data is absent from the core
2208 * file and or in the PF_SUNW_FAILURE flag to differentiate
2209 * this from mappings that were excluded due to the core file
2212 if ((error
= core_seg(p
, vp
, v
[i
].p_offset
,
2213 (caddr_t
)(uintptr_t)v
[i
].p_vaddr
, v
[i
].p_filesz
,
2214 rlimit
, credp
)) == 0) {
2218 if ((sig
= lwp
->lwp_cursig
) == 0) {
2220 * We failed due to something other than a signal.
2221 * Since the space reserved for the segment is now
2222 * unused, we stash the errno in the first four
2223 * bytes. This undocumented interface will let us
2224 * understand the nature of the failure.
2226 (void) core_write(vp
, UIO_SYSSPACE
, v
[i
].p_offset
,
2227 &error
, sizeof (error
), rlimit
, credp
);
2230 v
[i
].p_flags
|= PF_SUNW_FAILURE
;
2231 if ((error
= core_write(vp
, UIO_SYSSPACE
,
2232 poffset
+ sizeof (v
[i
]) * i
, &v
[i
], sizeof (v
[i
]),
2233 rlimit
, credp
)) != 0)
2240 * We took a signal. We want to abort the dump entirely, but
2241 * we also want to indicate what failed and why. We therefore
2242 * use the space reserved for the first failing segment to
2243 * write our error (which, for purposes of compatability with
2244 * older core dump readers, we set to EINTR) followed by any
2245 * siginfo associated with the signal.
2247 bzero(&killinfo
, sizeof (killinfo
));
2248 killinfo
.prk_error
= EINTR
;
2250 sq
= sig
== SIGKILL
? curproc
->p_killsqp
: lwp
->lwp_curinfo
;
2253 bcopy(&sq
->sq_info
, &killinfo
.prk_info
,
2254 sizeof (sq
->sq_info
));
2256 killinfo
.prk_info
.si_signo
= lwp
->lwp_cursig
;
2257 killinfo
.prk_info
.si_code
= SI_NOINFO
;
2260 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2262 * If this is a 32-bit process, we need to translate from the
2263 * native siginfo to the 32-bit variant. (Core readers must
2264 * always have the same data model as their target or must
2265 * be aware of -- and compensate for -- data model differences.)
2267 if (curproc
->p_model
== DATAMODEL_ILP32
) {
2270 siginfo_kto32((k_siginfo_t
*)&killinfo
.prk_info
, &si32
);
2271 bcopy(&si32
, &killinfo
.prk_info
, sizeof (si32
));
2275 (void) core_write(vp
, UIO_SYSSPACE
, v
[i
].p_offset
,
2276 &killinfo
, sizeof (killinfo
), rlimit
, credp
);
2279 * For the segment on which we took the signal, indicate that
2280 * its data now refers to a siginfo.
2283 v
[i
].p_flags
|= PF_SUNW_FAILURE
| PF_SUNW_KILLED
|
2287 * And for every other segment, indicate that its absence
2288 * is due to a signal.
2290 for (j
= i
+ 1; j
< nphdrs
; j
++) {
2292 v
[j
].p_flags
|= PF_SUNW_FAILURE
| PF_SUNW_KILLED
;
2296 * Finally, write out our modified program headers.
2298 if ((error
= core_write(vp
, UIO_SYSSPACE
,
2299 poffset
+ sizeof (v
[i
]) * i
, &v
[i
],
2300 sizeof (v
[i
]) * (nphdrs
- i
), rlimit
, credp
)) != 0)
2307 bzero(&bigwad
->shdr
[0], shdrsz
);
2309 if (nshdrs
>= SHN_LORESERVE
)
2310 bigwad
->shdr
[0].sh_size
= nshdrs
;
2312 if (nshdrs
- 1 >= SHN_LORESERVE
)
2313 bigwad
->shdr
[0].sh_link
= nshdrs
- 1;
2315 if (nphdrs
>= PN_XNUM
)
2316 bigwad
->shdr
[0].sh_info
= nphdrs
;
2319 AS_LOCK_ENTER(as
, RW_WRITER
);
2320 if ((error
= process_scns(content
, p
, credp
, vp
,
2321 &bigwad
->shdr
[0], nshdrs
, rlimit
, &doffset
,
2329 if ((error
= core_write(vp
, UIO_SYSSPACE
, soffset
,
2330 &bigwad
->shdr
[0], shdrsz
, rlimit
, credp
)) != 0)
2335 kmem_free(bigwad
, bigsize
);
2339 #ifndef _ELF32_COMPAT
2341 static struct execsw esw
= {
2353 static struct modlexec modlexec
= {
2354 &mod_execops
, "exec module for elf", &esw
2358 extern int elf32exec(vnode_t
*vp
, execa_t
*uap
, uarg_t
*args
,
2359 intpdata_t
*idatap
, int level
, long *execsz
,
2360 int setid
, caddr_t exec_file
, cred_t
*cred
,
2362 extern int elf32core(vnode_t
*vp
, proc_t
*p
, cred_t
*credp
,
2363 rlim64_t rlimit
, int sig
, core_content_t content
);
2365 static struct execsw esw32
= {
2373 static struct modlexec modlexec32
= {
2374 &mod_execops
, "32-bit exec module for elf", &esw32
2378 static struct modlinkage modlinkage
= {
2382 (void *)&modlexec32
,
2390 return (mod_install(&modlinkage
));
2396 return (mod_remove(&modlinkage
));
2400 _info(struct modinfo
*modinfop
)
2402 return (mod_info(&modlinkage
, modinfop
));
2405 #endif /* !_ELF32_COMPAT */