x86/nmi: Save regs in crash dump on external NMI
[linux-2.6/btrfs-unstable.git] / fs / binfmt_elf.c
blob3a93755e880fee23fa6d8370916caefb21f2c9eb
1 /*
2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <asm/uaccess.h>
40 #include <asm/param.h>
41 #include <asm/page.h>
43 #ifndef user_long_t
44 #define user_long_t long
45 #endif
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
48 #endif
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 int, int, unsigned long);
54 #ifdef CONFIG_USELIB
55 static int load_elf_library(struct file *);
56 #else
57 #define load_elf_library NULL
58 #endif
61 * If we don't support core dumping, then supply a NULL so we
62 * don't even try.
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
66 #else
67 #define elf_core_dump NULL
68 #endif
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
72 #else
73 #define ELF_MIN_ALIGN PAGE_SIZE
74 #endif
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS 0
78 #endif
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
84 static struct linux_binfmt elf_format = {
85 .module = THIS_MODULE,
86 .load_binary = load_elf_binary,
87 .load_shlib = load_elf_library,
88 .core_dump = elf_core_dump,
89 .min_coredump = ELF_EXEC_PAGESIZE,
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
94 static int set_brk(unsigned long start, unsigned long end)
96 start = ELF_PAGEALIGN(start);
97 end = ELF_PAGEALIGN(end);
98 if (end > start) {
99 unsigned long addr;
100 addr = vm_brk(start, end - start);
101 if (BAD_ADDR(addr))
102 return addr;
104 current->mm->start_brk = current->mm->brk = end;
105 return 0;
108 /* We need to explicitly zero any fractional pages
109 after the data section (i.e. bss). This would
110 contain the junk from the file that should not
111 be in memory
113 static int padzero(unsigned long elf_bss)
115 unsigned long nbyte;
117 nbyte = ELF_PAGEOFFSET(elf_bss);
118 if (nbyte) {
119 nbyte = ELF_MIN_ALIGN - nbyte;
120 if (clear_user((void __user *) elf_bss, nbyte))
121 return -EFAULT;
123 return 0;
126 /* Let's use some macros to make this stack manipulation a little clearer */
127 #ifdef CONFIG_STACK_GROWSUP
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
129 #define STACK_ROUND(sp, items) \
130 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ \
132 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
133 old_sp; })
134 #else
135 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
136 #define STACK_ROUND(sp, items) \
137 (((unsigned long) (sp - items)) &~ 15UL)
138 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
139 #endif
141 #ifndef ELF_BASE_PLATFORM
143 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
144 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
145 * will be copied to the user stack in the same manner as AT_PLATFORM.
147 #define ELF_BASE_PLATFORM NULL
148 #endif
150 static int
151 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152 unsigned long load_addr, unsigned long interp_load_addr)
154 unsigned long p = bprm->p;
155 int argc = bprm->argc;
156 int envc = bprm->envc;
157 elf_addr_t __user *argv;
158 elf_addr_t __user *envp;
159 elf_addr_t __user *sp;
160 elf_addr_t __user *u_platform;
161 elf_addr_t __user *u_base_platform;
162 elf_addr_t __user *u_rand_bytes;
163 const char *k_platform = ELF_PLATFORM;
164 const char *k_base_platform = ELF_BASE_PLATFORM;
165 unsigned char k_rand_bytes[16];
166 int items;
167 elf_addr_t *elf_info;
168 int ei_index = 0;
169 const struct cred *cred = current_cred();
170 struct vm_area_struct *vma;
173 * In some cases (e.g. Hyper-Threading), we want to avoid L1
174 * evictions by the processes running on the same package. One
175 * thing we can do is to shuffle the initial stack for them.
178 p = arch_align_stack(p);
181 * If this architecture has a platform capability string, copy it
182 * to userspace. In some cases (Sparc), this info is impossible
183 * for userspace to get any other way, in others (i386) it is
184 * merely difficult.
186 u_platform = NULL;
187 if (k_platform) {
188 size_t len = strlen(k_platform) + 1;
190 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191 if (__copy_to_user(u_platform, k_platform, len))
192 return -EFAULT;
196 * If this architecture has a "base" platform capability
197 * string, copy it to userspace.
199 u_base_platform = NULL;
200 if (k_base_platform) {
201 size_t len = strlen(k_base_platform) + 1;
203 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
204 if (__copy_to_user(u_base_platform, k_base_platform, len))
205 return -EFAULT;
209 * Generate 16 random bytes for userspace PRNG seeding.
211 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
212 u_rand_bytes = (elf_addr_t __user *)
213 STACK_ALLOC(p, sizeof(k_rand_bytes));
214 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
215 return -EFAULT;
217 /* Create the ELF interpreter info */
218 elf_info = (elf_addr_t *)current->mm->saved_auxv;
219 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
220 #define NEW_AUX_ENT(id, val) \
221 do { \
222 elf_info[ei_index++] = id; \
223 elf_info[ei_index++] = val; \
224 } while (0)
226 #ifdef ARCH_DLINFO
228 * ARCH_DLINFO must come first so PPC can do its special alignment of
229 * AUXV.
230 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
231 * ARCH_DLINFO changes
233 ARCH_DLINFO;
234 #endif
235 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
236 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
237 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
238 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
239 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
240 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
241 NEW_AUX_ENT(AT_BASE, interp_load_addr);
242 NEW_AUX_ENT(AT_FLAGS, 0);
243 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
244 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
245 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
246 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
247 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
248 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
249 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
250 #ifdef ELF_HWCAP2
251 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
252 #endif
253 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
254 if (k_platform) {
255 NEW_AUX_ENT(AT_PLATFORM,
256 (elf_addr_t)(unsigned long)u_platform);
258 if (k_base_platform) {
259 NEW_AUX_ENT(AT_BASE_PLATFORM,
260 (elf_addr_t)(unsigned long)u_base_platform);
262 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
263 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
265 #undef NEW_AUX_ENT
266 /* AT_NULL is zero; clear the rest too */
267 memset(&elf_info[ei_index], 0,
268 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
270 /* And advance past the AT_NULL entry. */
271 ei_index += 2;
273 sp = STACK_ADD(p, ei_index);
275 items = (argc + 1) + (envc + 1) + 1;
276 bprm->p = STACK_ROUND(sp, items);
278 /* Point sp at the lowest address on the stack */
279 #ifdef CONFIG_STACK_GROWSUP
280 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
281 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
282 #else
283 sp = (elf_addr_t __user *)bprm->p;
284 #endif
288 * Grow the stack manually; some architectures have a limit on how
289 * far ahead a user-space access may be in order to grow the stack.
291 vma = find_extend_vma(current->mm, bprm->p);
292 if (!vma)
293 return -EFAULT;
295 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
296 if (__put_user(argc, sp++))
297 return -EFAULT;
298 argv = sp;
299 envp = argv + argc + 1;
301 /* Populate argv and envp */
302 p = current->mm->arg_end = current->mm->arg_start;
303 while (argc-- > 0) {
304 size_t len;
305 if (__put_user((elf_addr_t)p, argv++))
306 return -EFAULT;
307 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
308 if (!len || len > MAX_ARG_STRLEN)
309 return -EINVAL;
310 p += len;
312 if (__put_user(0, argv))
313 return -EFAULT;
314 current->mm->arg_end = current->mm->env_start = p;
315 while (envc-- > 0) {
316 size_t len;
317 if (__put_user((elf_addr_t)p, envp++))
318 return -EFAULT;
319 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320 if (!len || len > MAX_ARG_STRLEN)
321 return -EINVAL;
322 p += len;
324 if (__put_user(0, envp))
325 return -EFAULT;
326 current->mm->env_end = p;
328 /* Put the elf_info on the stack in the right place. */
329 sp = (elf_addr_t __user *)envp + 1;
330 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
331 return -EFAULT;
332 return 0;
335 #ifndef elf_map
337 static unsigned long elf_map(struct file *filep, unsigned long addr,
338 struct elf_phdr *eppnt, int prot, int type,
339 unsigned long total_size)
341 unsigned long map_addr;
342 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
343 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
344 addr = ELF_PAGESTART(addr);
345 size = ELF_PAGEALIGN(size);
347 /* mmap() will return -EINVAL if given a zero size, but a
348 * segment with zero filesize is perfectly valid */
349 if (!size)
350 return addr;
353 * total_size is the size of the ELF (interpreter) image.
354 * The _first_ mmap needs to know the full size, otherwise
355 * randomization might put this image into an overlapping
356 * position with the ELF binary image. (since size < total_size)
357 * So we first map the 'big' image - and unmap the remainder at
358 * the end. (which unmap is needed for ELF images with holes.)
360 if (total_size) {
361 total_size = ELF_PAGEALIGN(total_size);
362 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
363 if (!BAD_ADDR(map_addr))
364 vm_munmap(map_addr+size, total_size-size);
365 } else
366 map_addr = vm_mmap(filep, addr, size, prot, type, off);
368 return(map_addr);
371 #endif /* !elf_map */
373 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
375 int i, first_idx = -1, last_idx = -1;
377 for (i = 0; i < nr; i++) {
378 if (cmds[i].p_type == PT_LOAD) {
379 last_idx = i;
380 if (first_idx == -1)
381 first_idx = i;
384 if (first_idx == -1)
385 return 0;
387 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
388 ELF_PAGESTART(cmds[first_idx].p_vaddr);
392 * load_elf_phdrs() - load ELF program headers
393 * @elf_ex: ELF header of the binary whose program headers should be loaded
394 * @elf_file: the opened ELF binary file
396 * Loads ELF program headers from the binary file elf_file, which has the ELF
397 * header pointed to by elf_ex, into a newly allocated array. The caller is
398 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
400 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
401 struct file *elf_file)
403 struct elf_phdr *elf_phdata = NULL;
404 int retval, size, err = -1;
407 * If the size of this structure has changed, then punt, since
408 * we will be doing the wrong thing.
410 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
411 goto out;
413 /* Sanity check the number of program headers... */
414 if (elf_ex->e_phnum < 1 ||
415 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
416 goto out;
418 /* ...and their total size. */
419 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
420 if (size > ELF_MIN_ALIGN)
421 goto out;
423 elf_phdata = kmalloc(size, GFP_KERNEL);
424 if (!elf_phdata)
425 goto out;
427 /* Read in the program headers */
428 retval = kernel_read(elf_file, elf_ex->e_phoff,
429 (char *)elf_phdata, size);
430 if (retval != size) {
431 err = (retval < 0) ? retval : -EIO;
432 goto out;
435 /* Success! */
436 err = 0;
437 out:
438 if (err) {
439 kfree(elf_phdata);
440 elf_phdata = NULL;
442 return elf_phdata;
445 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
448 * struct arch_elf_state - arch-specific ELF loading state
450 * This structure is used to preserve architecture specific data during
451 * the loading of an ELF file, throughout the checking of architecture
452 * specific ELF headers & through to the point where the ELF load is
453 * known to be proceeding (ie. SET_PERSONALITY).
455 * This implementation is a dummy for architectures which require no
456 * specific state.
458 struct arch_elf_state {
461 #define INIT_ARCH_ELF_STATE {}
464 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
465 * @ehdr: The main ELF header
466 * @phdr: The program header to check
467 * @elf: The open ELF file
468 * @is_interp: True if the phdr is from the interpreter of the ELF being
469 * loaded, else false.
470 * @state: Architecture-specific state preserved throughout the process
471 * of loading the ELF.
473 * Inspects the program header phdr to validate its correctness and/or
474 * suitability for the system. Called once per ELF program header in the
475 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
476 * interpreter.
478 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
479 * with that return code.
481 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
482 struct elf_phdr *phdr,
483 struct file *elf, bool is_interp,
484 struct arch_elf_state *state)
486 /* Dummy implementation, always proceed */
487 return 0;
491 * arch_check_elf() - check an ELF executable
492 * @ehdr: The main ELF header
493 * @has_interp: True if the ELF has an interpreter, else false.
494 * @state: Architecture-specific state preserved throughout the process
495 * of loading the ELF.
497 * Provides a final opportunity for architecture code to reject the loading
498 * of the ELF & cause an exec syscall to return an error. This is called after
499 * all program headers to be checked by arch_elf_pt_proc have been.
501 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
502 * with that return code.
504 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
505 struct arch_elf_state *state)
507 /* Dummy implementation, always proceed */
508 return 0;
511 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
513 /* This is much more generalized than the library routine read function,
514 so we keep this separate. Technically the library read function
515 is only provided so that we can read a.out libraries that have
516 an ELF header */
518 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
519 struct file *interpreter, unsigned long *interp_map_addr,
520 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
522 struct elf_phdr *eppnt;
523 unsigned long load_addr = 0;
524 int load_addr_set = 0;
525 unsigned long last_bss = 0, elf_bss = 0;
526 unsigned long error = ~0UL;
527 unsigned long total_size;
528 int i;
530 /* First of all, some simple consistency checks */
531 if (interp_elf_ex->e_type != ET_EXEC &&
532 interp_elf_ex->e_type != ET_DYN)
533 goto out;
534 if (!elf_check_arch(interp_elf_ex))
535 goto out;
536 if (!interpreter->f_op->mmap)
537 goto out;
539 total_size = total_mapping_size(interp_elf_phdata,
540 interp_elf_ex->e_phnum);
541 if (!total_size) {
542 error = -EINVAL;
543 goto out;
546 eppnt = interp_elf_phdata;
547 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
548 if (eppnt->p_type == PT_LOAD) {
549 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
550 int elf_prot = 0;
551 unsigned long vaddr = 0;
552 unsigned long k, map_addr;
554 if (eppnt->p_flags & PF_R)
555 elf_prot = PROT_READ;
556 if (eppnt->p_flags & PF_W)
557 elf_prot |= PROT_WRITE;
558 if (eppnt->p_flags & PF_X)
559 elf_prot |= PROT_EXEC;
560 vaddr = eppnt->p_vaddr;
561 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
562 elf_type |= MAP_FIXED;
563 else if (no_base && interp_elf_ex->e_type == ET_DYN)
564 load_addr = -vaddr;
566 map_addr = elf_map(interpreter, load_addr + vaddr,
567 eppnt, elf_prot, elf_type, total_size);
568 total_size = 0;
569 if (!*interp_map_addr)
570 *interp_map_addr = map_addr;
571 error = map_addr;
572 if (BAD_ADDR(map_addr))
573 goto out;
575 if (!load_addr_set &&
576 interp_elf_ex->e_type == ET_DYN) {
577 load_addr = map_addr - ELF_PAGESTART(vaddr);
578 load_addr_set = 1;
582 * Check to see if the section's size will overflow the
583 * allowed task size. Note that p_filesz must always be
584 * <= p_memsize so it's only necessary to check p_memsz.
586 k = load_addr + eppnt->p_vaddr;
587 if (BAD_ADDR(k) ||
588 eppnt->p_filesz > eppnt->p_memsz ||
589 eppnt->p_memsz > TASK_SIZE ||
590 TASK_SIZE - eppnt->p_memsz < k) {
591 error = -ENOMEM;
592 goto out;
596 * Find the end of the file mapping for this phdr, and
597 * keep track of the largest address we see for this.
599 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
600 if (k > elf_bss)
601 elf_bss = k;
604 * Do the same thing for the memory mapping - between
605 * elf_bss and last_bss is the bss section.
607 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
608 if (k > last_bss)
609 last_bss = k;
613 if (last_bss > elf_bss) {
615 * Now fill out the bss section. First pad the last page up
616 * to the page boundary, and then perform a mmap to make sure
617 * that there are zero-mapped pages up to and including the
618 * last bss page.
620 if (padzero(elf_bss)) {
621 error = -EFAULT;
622 goto out;
625 /* What we have mapped so far */
626 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
628 /* Map the last of the bss segment */
629 error = vm_brk(elf_bss, last_bss - elf_bss);
630 if (BAD_ADDR(error))
631 goto out;
634 error = load_addr;
635 out:
636 return error;
640 * These are the functions used to load ELF style executables and shared
641 * libraries. There is no binary dependent code anywhere else.
644 #ifndef STACK_RND_MASK
645 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
646 #endif
648 static unsigned long randomize_stack_top(unsigned long stack_top)
650 unsigned long random_variable = 0;
652 if ((current->flags & PF_RANDOMIZE) &&
653 !(current->personality & ADDR_NO_RANDOMIZE)) {
654 random_variable = (unsigned long) get_random_int();
655 random_variable &= STACK_RND_MASK;
656 random_variable <<= PAGE_SHIFT;
658 #ifdef CONFIG_STACK_GROWSUP
659 return PAGE_ALIGN(stack_top) + random_variable;
660 #else
661 return PAGE_ALIGN(stack_top) - random_variable;
662 #endif
665 static int load_elf_binary(struct linux_binprm *bprm)
667 struct file *interpreter = NULL; /* to shut gcc up */
668 unsigned long load_addr = 0, load_bias = 0;
669 int load_addr_set = 0;
670 char * elf_interpreter = NULL;
671 unsigned long error;
672 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
673 unsigned long elf_bss, elf_brk;
674 int retval, i;
675 unsigned long elf_entry;
676 unsigned long interp_load_addr = 0;
677 unsigned long start_code, end_code, start_data, end_data;
678 unsigned long reloc_func_desc __maybe_unused = 0;
679 int executable_stack = EXSTACK_DEFAULT;
680 struct pt_regs *regs = current_pt_regs();
681 struct {
682 struct elfhdr elf_ex;
683 struct elfhdr interp_elf_ex;
684 } *loc;
685 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
687 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
688 if (!loc) {
689 retval = -ENOMEM;
690 goto out_ret;
693 /* Get the exec-header */
694 loc->elf_ex = *((struct elfhdr *)bprm->buf);
696 retval = -ENOEXEC;
697 /* First of all, some simple consistency checks */
698 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
699 goto out;
701 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
702 goto out;
703 if (!elf_check_arch(&loc->elf_ex))
704 goto out;
705 if (!bprm->file->f_op->mmap)
706 goto out;
708 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
709 if (!elf_phdata)
710 goto out;
712 elf_ppnt = elf_phdata;
713 elf_bss = 0;
714 elf_brk = 0;
716 start_code = ~0UL;
717 end_code = 0;
718 start_data = 0;
719 end_data = 0;
721 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
722 if (elf_ppnt->p_type == PT_INTERP) {
723 /* This is the program interpreter used for
724 * shared libraries - for now assume that this
725 * is an a.out format binary
727 retval = -ENOEXEC;
728 if (elf_ppnt->p_filesz > PATH_MAX ||
729 elf_ppnt->p_filesz < 2)
730 goto out_free_ph;
732 retval = -ENOMEM;
733 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
734 GFP_KERNEL);
735 if (!elf_interpreter)
736 goto out_free_ph;
738 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
739 elf_interpreter,
740 elf_ppnt->p_filesz);
741 if (retval != elf_ppnt->p_filesz) {
742 if (retval >= 0)
743 retval = -EIO;
744 goto out_free_interp;
746 /* make sure path is NULL terminated */
747 retval = -ENOEXEC;
748 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
749 goto out_free_interp;
751 interpreter = open_exec(elf_interpreter);
752 retval = PTR_ERR(interpreter);
753 if (IS_ERR(interpreter))
754 goto out_free_interp;
757 * If the binary is not readable then enforce
758 * mm->dumpable = 0 regardless of the interpreter's
759 * permissions.
761 would_dump(bprm, interpreter);
763 /* Get the exec headers */
764 retval = kernel_read(interpreter, 0,
765 (void *)&loc->interp_elf_ex,
766 sizeof(loc->interp_elf_ex));
767 if (retval != sizeof(loc->interp_elf_ex)) {
768 if (retval >= 0)
769 retval = -EIO;
770 goto out_free_dentry;
773 break;
775 elf_ppnt++;
778 elf_ppnt = elf_phdata;
779 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
780 switch (elf_ppnt->p_type) {
781 case PT_GNU_STACK:
782 if (elf_ppnt->p_flags & PF_X)
783 executable_stack = EXSTACK_ENABLE_X;
784 else
785 executable_stack = EXSTACK_DISABLE_X;
786 break;
788 case PT_LOPROC ... PT_HIPROC:
789 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
790 bprm->file, false,
791 &arch_state);
792 if (retval)
793 goto out_free_dentry;
794 break;
797 /* Some simple consistency checks for the interpreter */
798 if (elf_interpreter) {
799 retval = -ELIBBAD;
800 /* Not an ELF interpreter */
801 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
802 goto out_free_dentry;
803 /* Verify the interpreter has a valid arch */
804 if (!elf_check_arch(&loc->interp_elf_ex))
805 goto out_free_dentry;
807 /* Load the interpreter program headers */
808 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
809 interpreter);
810 if (!interp_elf_phdata)
811 goto out_free_dentry;
813 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
814 elf_ppnt = interp_elf_phdata;
815 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
816 switch (elf_ppnt->p_type) {
817 case PT_LOPROC ... PT_HIPROC:
818 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
819 elf_ppnt, interpreter,
820 true, &arch_state);
821 if (retval)
822 goto out_free_dentry;
823 break;
828 * Allow arch code to reject the ELF at this point, whilst it's
829 * still possible to return an error to the code that invoked
830 * the exec syscall.
832 retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
833 if (retval)
834 goto out_free_dentry;
836 /* Flush all traces of the currently running executable */
837 retval = flush_old_exec(bprm);
838 if (retval)
839 goto out_free_dentry;
841 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
842 may depend on the personality. */
843 SET_PERSONALITY2(loc->elf_ex, &arch_state);
844 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
845 current->personality |= READ_IMPLIES_EXEC;
847 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
848 current->flags |= PF_RANDOMIZE;
850 setup_new_exec(bprm);
852 /* Do this so that we can load the interpreter, if need be. We will
853 change some of these later */
854 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
855 executable_stack);
856 if (retval < 0)
857 goto out_free_dentry;
859 current->mm->start_stack = bprm->p;
861 /* Now we do a little grungy work by mmapping the ELF image into
862 the correct location in memory. */
863 for(i = 0, elf_ppnt = elf_phdata;
864 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
865 int elf_prot = 0, elf_flags;
866 unsigned long k, vaddr;
867 unsigned long total_size = 0;
869 if (elf_ppnt->p_type != PT_LOAD)
870 continue;
872 if (unlikely (elf_brk > elf_bss)) {
873 unsigned long nbyte;
875 /* There was a PT_LOAD segment with p_memsz > p_filesz
876 before this one. Map anonymous pages, if needed,
877 and clear the area. */
878 retval = set_brk(elf_bss + load_bias,
879 elf_brk + load_bias);
880 if (retval)
881 goto out_free_dentry;
882 nbyte = ELF_PAGEOFFSET(elf_bss);
883 if (nbyte) {
884 nbyte = ELF_MIN_ALIGN - nbyte;
885 if (nbyte > elf_brk - elf_bss)
886 nbyte = elf_brk - elf_bss;
887 if (clear_user((void __user *)elf_bss +
888 load_bias, nbyte)) {
890 * This bss-zeroing can fail if the ELF
891 * file specifies odd protections. So
892 * we don't check the return value
898 if (elf_ppnt->p_flags & PF_R)
899 elf_prot |= PROT_READ;
900 if (elf_ppnt->p_flags & PF_W)
901 elf_prot |= PROT_WRITE;
902 if (elf_ppnt->p_flags & PF_X)
903 elf_prot |= PROT_EXEC;
905 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
907 vaddr = elf_ppnt->p_vaddr;
908 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
909 elf_flags |= MAP_FIXED;
910 } else if (loc->elf_ex.e_type == ET_DYN) {
911 /* Try and get dynamic programs out of the way of the
912 * default mmap base, as well as whatever program they
913 * might try to exec. This is because the brk will
914 * follow the loader, and is not movable. */
915 load_bias = ELF_ET_DYN_BASE - vaddr;
916 if (current->flags & PF_RANDOMIZE)
917 load_bias += arch_mmap_rnd();
918 load_bias = ELF_PAGESTART(load_bias);
919 total_size = total_mapping_size(elf_phdata,
920 loc->elf_ex.e_phnum);
921 if (!total_size) {
922 retval = -EINVAL;
923 goto out_free_dentry;
927 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
928 elf_prot, elf_flags, total_size);
929 if (BAD_ADDR(error)) {
930 retval = IS_ERR((void *)error) ?
931 PTR_ERR((void*)error) : -EINVAL;
932 goto out_free_dentry;
935 if (!load_addr_set) {
936 load_addr_set = 1;
937 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
938 if (loc->elf_ex.e_type == ET_DYN) {
939 load_bias += error -
940 ELF_PAGESTART(load_bias + vaddr);
941 load_addr += load_bias;
942 reloc_func_desc = load_bias;
945 k = elf_ppnt->p_vaddr;
946 if (k < start_code)
947 start_code = k;
948 if (start_data < k)
949 start_data = k;
952 * Check to see if the section's size will overflow the
953 * allowed task size. Note that p_filesz must always be
954 * <= p_memsz so it is only necessary to check p_memsz.
956 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
957 elf_ppnt->p_memsz > TASK_SIZE ||
958 TASK_SIZE - elf_ppnt->p_memsz < k) {
959 /* set_brk can never work. Avoid overflows. */
960 retval = -EINVAL;
961 goto out_free_dentry;
964 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
966 if (k > elf_bss)
967 elf_bss = k;
968 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
969 end_code = k;
970 if (end_data < k)
971 end_data = k;
972 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
973 if (k > elf_brk)
974 elf_brk = k;
977 loc->elf_ex.e_entry += load_bias;
978 elf_bss += load_bias;
979 elf_brk += load_bias;
980 start_code += load_bias;
981 end_code += load_bias;
982 start_data += load_bias;
983 end_data += load_bias;
985 /* Calling set_brk effectively mmaps the pages that we need
986 * for the bss and break sections. We must do this before
987 * mapping in the interpreter, to make sure it doesn't wind
988 * up getting placed where the bss needs to go.
990 retval = set_brk(elf_bss, elf_brk);
991 if (retval)
992 goto out_free_dentry;
993 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
994 retval = -EFAULT; /* Nobody gets to see this, but.. */
995 goto out_free_dentry;
998 if (elf_interpreter) {
999 unsigned long interp_map_addr = 0;
1001 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1002 interpreter,
1003 &interp_map_addr,
1004 load_bias, interp_elf_phdata);
1005 if (!IS_ERR((void *)elf_entry)) {
1007 * load_elf_interp() returns relocation
1008 * adjustment
1010 interp_load_addr = elf_entry;
1011 elf_entry += loc->interp_elf_ex.e_entry;
1013 if (BAD_ADDR(elf_entry)) {
1014 retval = IS_ERR((void *)elf_entry) ?
1015 (int)elf_entry : -EINVAL;
1016 goto out_free_dentry;
1018 reloc_func_desc = interp_load_addr;
1020 allow_write_access(interpreter);
1021 fput(interpreter);
1022 kfree(elf_interpreter);
1023 } else {
1024 elf_entry = loc->elf_ex.e_entry;
1025 if (BAD_ADDR(elf_entry)) {
1026 retval = -EINVAL;
1027 goto out_free_dentry;
1031 kfree(interp_elf_phdata);
1032 kfree(elf_phdata);
1034 set_binfmt(&elf_format);
1036 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1037 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1038 if (retval < 0)
1039 goto out;
1040 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1042 install_exec_creds(bprm);
1043 retval = create_elf_tables(bprm, &loc->elf_ex,
1044 load_addr, interp_load_addr);
1045 if (retval < 0)
1046 goto out;
1047 /* N.B. passed_fileno might not be initialized? */
1048 current->mm->end_code = end_code;
1049 current->mm->start_code = start_code;
1050 current->mm->start_data = start_data;
1051 current->mm->end_data = end_data;
1052 current->mm->start_stack = bprm->p;
1054 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1055 current->mm->brk = current->mm->start_brk =
1056 arch_randomize_brk(current->mm);
1057 #ifdef compat_brk_randomized
1058 current->brk_randomized = 1;
1059 #endif
1062 if (current->personality & MMAP_PAGE_ZERO) {
1063 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1064 and some applications "depend" upon this behavior.
1065 Since we do not have the power to recompile these, we
1066 emulate the SVr4 behavior. Sigh. */
1067 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1068 MAP_FIXED | MAP_PRIVATE, 0);
1071 #ifdef ELF_PLAT_INIT
1073 * The ABI may specify that certain registers be set up in special
1074 * ways (on i386 %edx is the address of a DT_FINI function, for
1075 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1076 * that the e_entry field is the address of the function descriptor
1077 * for the startup routine, rather than the address of the startup
1078 * routine itself. This macro performs whatever initialization to
1079 * the regs structure is required as well as any relocations to the
1080 * function descriptor entries when executing dynamically links apps.
1082 ELF_PLAT_INIT(regs, reloc_func_desc);
1083 #endif
1085 start_thread(regs, elf_entry, bprm->p);
1086 retval = 0;
1087 out:
1088 kfree(loc);
1089 out_ret:
1090 return retval;
1092 /* error cleanup */
1093 out_free_dentry:
1094 kfree(interp_elf_phdata);
1095 allow_write_access(interpreter);
1096 if (interpreter)
1097 fput(interpreter);
1098 out_free_interp:
1099 kfree(elf_interpreter);
1100 out_free_ph:
1101 kfree(elf_phdata);
1102 goto out;
1105 #ifdef CONFIG_USELIB
1106 /* This is really simpleminded and specialized - we are loading an
1107 a.out library that is given an ELF header. */
1108 static int load_elf_library(struct file *file)
1110 struct elf_phdr *elf_phdata;
1111 struct elf_phdr *eppnt;
1112 unsigned long elf_bss, bss, len;
1113 int retval, error, i, j;
1114 struct elfhdr elf_ex;
1116 error = -ENOEXEC;
1117 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1118 if (retval != sizeof(elf_ex))
1119 goto out;
1121 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1122 goto out;
1124 /* First of all, some simple consistency checks */
1125 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1126 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1127 goto out;
1129 /* Now read in all of the header information */
1131 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1132 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1134 error = -ENOMEM;
1135 elf_phdata = kmalloc(j, GFP_KERNEL);
1136 if (!elf_phdata)
1137 goto out;
1139 eppnt = elf_phdata;
1140 error = -ENOEXEC;
1141 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1142 if (retval != j)
1143 goto out_free_ph;
1145 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1146 if ((eppnt + i)->p_type == PT_LOAD)
1147 j++;
1148 if (j != 1)
1149 goto out_free_ph;
1151 while (eppnt->p_type != PT_LOAD)
1152 eppnt++;
1154 /* Now use mmap to map the library into memory. */
1155 error = vm_mmap(file,
1156 ELF_PAGESTART(eppnt->p_vaddr),
1157 (eppnt->p_filesz +
1158 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1159 PROT_READ | PROT_WRITE | PROT_EXEC,
1160 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1161 (eppnt->p_offset -
1162 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1163 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1164 goto out_free_ph;
1166 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1167 if (padzero(elf_bss)) {
1168 error = -EFAULT;
1169 goto out_free_ph;
1172 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1173 ELF_MIN_ALIGN - 1);
1174 bss = eppnt->p_memsz + eppnt->p_vaddr;
1175 if (bss > len)
1176 vm_brk(len, bss - len);
1177 error = 0;
1179 out_free_ph:
1180 kfree(elf_phdata);
1181 out:
1182 return error;
1184 #endif /* #ifdef CONFIG_USELIB */
1186 #ifdef CONFIG_ELF_CORE
1188 * ELF core dumper
1190 * Modelled on fs/exec.c:aout_core_dump()
1191 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1195 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1196 * that are useful for post-mortem analysis are included in every core dump.
1197 * In that way we ensure that the core dump is fully interpretable later
1198 * without matching up the same kernel and hardware config to see what PC values
1199 * meant. These special mappings include - vDSO, vsyscall, and other
1200 * architecture specific mappings
1202 static bool always_dump_vma(struct vm_area_struct *vma)
1204 /* Any vsyscall mappings? */
1205 if (vma == get_gate_vma(vma->vm_mm))
1206 return true;
1209 * Assume that all vmas with a .name op should always be dumped.
1210 * If this changes, a new vm_ops field can easily be added.
1212 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1213 return true;
1216 * arch_vma_name() returns non-NULL for special architecture mappings,
1217 * such as vDSO sections.
1219 if (arch_vma_name(vma))
1220 return true;
1222 return false;
1226 * Decide what to dump of a segment, part, all or none.
1228 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1229 unsigned long mm_flags)
1231 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1233 /* always dump the vdso and vsyscall sections */
1234 if (always_dump_vma(vma))
1235 goto whole;
1237 if (vma->vm_flags & VM_DONTDUMP)
1238 return 0;
1240 /* support for DAX */
1241 if (vma_is_dax(vma)) {
1242 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1243 goto whole;
1244 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1245 goto whole;
1246 return 0;
1249 /* Hugetlb memory check */
1250 if (vma->vm_flags & VM_HUGETLB) {
1251 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1252 goto whole;
1253 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1254 goto whole;
1255 return 0;
1258 /* Do not dump I/O mapped devices or special mappings */
1259 if (vma->vm_flags & VM_IO)
1260 return 0;
1262 /* By default, dump shared memory if mapped from an anonymous file. */
1263 if (vma->vm_flags & VM_SHARED) {
1264 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1265 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1266 goto whole;
1267 return 0;
1270 /* Dump segments that have been written to. */
1271 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1272 goto whole;
1273 if (vma->vm_file == NULL)
1274 return 0;
1276 if (FILTER(MAPPED_PRIVATE))
1277 goto whole;
1280 * If this looks like the beginning of a DSO or executable mapping,
1281 * check for an ELF header. If we find one, dump the first page to
1282 * aid in determining what was mapped here.
1284 if (FILTER(ELF_HEADERS) &&
1285 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1286 u32 __user *header = (u32 __user *) vma->vm_start;
1287 u32 word;
1288 mm_segment_t fs = get_fs();
1290 * Doing it this way gets the constant folded by GCC.
1292 union {
1293 u32 cmp;
1294 char elfmag[SELFMAG];
1295 } magic;
1296 BUILD_BUG_ON(SELFMAG != sizeof word);
1297 magic.elfmag[EI_MAG0] = ELFMAG0;
1298 magic.elfmag[EI_MAG1] = ELFMAG1;
1299 magic.elfmag[EI_MAG2] = ELFMAG2;
1300 magic.elfmag[EI_MAG3] = ELFMAG3;
1302 * Switch to the user "segment" for get_user(),
1303 * then put back what elf_core_dump() had in place.
1305 set_fs(USER_DS);
1306 if (unlikely(get_user(word, header)))
1307 word = 0;
1308 set_fs(fs);
1309 if (word == magic.cmp)
1310 return PAGE_SIZE;
1313 #undef FILTER
1315 return 0;
1317 whole:
1318 return vma->vm_end - vma->vm_start;
1321 /* An ELF note in memory */
1322 struct memelfnote
1324 const char *name;
1325 int type;
1326 unsigned int datasz;
1327 void *data;
1330 static int notesize(struct memelfnote *en)
1332 int sz;
1334 sz = sizeof(struct elf_note);
1335 sz += roundup(strlen(en->name) + 1, 4);
1336 sz += roundup(en->datasz, 4);
1338 return sz;
1341 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1343 struct elf_note en;
1344 en.n_namesz = strlen(men->name) + 1;
1345 en.n_descsz = men->datasz;
1346 en.n_type = men->type;
1348 return dump_emit(cprm, &en, sizeof(en)) &&
1349 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1350 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1353 static void fill_elf_header(struct elfhdr *elf, int segs,
1354 u16 machine, u32 flags)
1356 memset(elf, 0, sizeof(*elf));
1358 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1359 elf->e_ident[EI_CLASS] = ELF_CLASS;
1360 elf->e_ident[EI_DATA] = ELF_DATA;
1361 elf->e_ident[EI_VERSION] = EV_CURRENT;
1362 elf->e_ident[EI_OSABI] = ELF_OSABI;
1364 elf->e_type = ET_CORE;
1365 elf->e_machine = machine;
1366 elf->e_version = EV_CURRENT;
1367 elf->e_phoff = sizeof(struct elfhdr);
1368 elf->e_flags = flags;
1369 elf->e_ehsize = sizeof(struct elfhdr);
1370 elf->e_phentsize = sizeof(struct elf_phdr);
1371 elf->e_phnum = segs;
1373 return;
1376 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1378 phdr->p_type = PT_NOTE;
1379 phdr->p_offset = offset;
1380 phdr->p_vaddr = 0;
1381 phdr->p_paddr = 0;
1382 phdr->p_filesz = sz;
1383 phdr->p_memsz = 0;
1384 phdr->p_flags = 0;
1385 phdr->p_align = 0;
1386 return;
1389 static void fill_note(struct memelfnote *note, const char *name, int type,
1390 unsigned int sz, void *data)
1392 note->name = name;
1393 note->type = type;
1394 note->datasz = sz;
1395 note->data = data;
1396 return;
1400 * fill up all the fields in prstatus from the given task struct, except
1401 * registers which need to be filled up separately.
1403 static void fill_prstatus(struct elf_prstatus *prstatus,
1404 struct task_struct *p, long signr)
1406 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1407 prstatus->pr_sigpend = p->pending.signal.sig[0];
1408 prstatus->pr_sighold = p->blocked.sig[0];
1409 rcu_read_lock();
1410 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1411 rcu_read_unlock();
1412 prstatus->pr_pid = task_pid_vnr(p);
1413 prstatus->pr_pgrp = task_pgrp_vnr(p);
1414 prstatus->pr_sid = task_session_vnr(p);
1415 if (thread_group_leader(p)) {
1416 struct task_cputime cputime;
1419 * This is the record for the group leader. It shows the
1420 * group-wide total, not its individual thread total.
1422 thread_group_cputime(p, &cputime);
1423 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1424 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1425 } else {
1426 cputime_t utime, stime;
1428 task_cputime(p, &utime, &stime);
1429 cputime_to_timeval(utime, &prstatus->pr_utime);
1430 cputime_to_timeval(stime, &prstatus->pr_stime);
1432 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1433 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1436 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1437 struct mm_struct *mm)
1439 const struct cred *cred;
1440 unsigned int i, len;
1442 /* first copy the parameters from user space */
1443 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1445 len = mm->arg_end - mm->arg_start;
1446 if (len >= ELF_PRARGSZ)
1447 len = ELF_PRARGSZ-1;
1448 if (copy_from_user(&psinfo->pr_psargs,
1449 (const char __user *)mm->arg_start, len))
1450 return -EFAULT;
1451 for(i = 0; i < len; i++)
1452 if (psinfo->pr_psargs[i] == 0)
1453 psinfo->pr_psargs[i] = ' ';
1454 psinfo->pr_psargs[len] = 0;
1456 rcu_read_lock();
1457 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1458 rcu_read_unlock();
1459 psinfo->pr_pid = task_pid_vnr(p);
1460 psinfo->pr_pgrp = task_pgrp_vnr(p);
1461 psinfo->pr_sid = task_session_vnr(p);
1463 i = p->state ? ffz(~p->state) + 1 : 0;
1464 psinfo->pr_state = i;
1465 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1466 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1467 psinfo->pr_nice = task_nice(p);
1468 psinfo->pr_flag = p->flags;
1469 rcu_read_lock();
1470 cred = __task_cred(p);
1471 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1472 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1473 rcu_read_unlock();
1474 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1476 return 0;
1479 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1481 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1482 int i = 0;
1484 i += 2;
1485 while (auxv[i - 2] != AT_NULL);
1486 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1489 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1490 const siginfo_t *siginfo)
1492 mm_segment_t old_fs = get_fs();
1493 set_fs(KERNEL_DS);
1494 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1495 set_fs(old_fs);
1496 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1499 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1501 * Format of NT_FILE note:
1503 * long count -- how many files are mapped
1504 * long page_size -- units for file_ofs
1505 * array of [COUNT] elements of
1506 * long start
1507 * long end
1508 * long file_ofs
1509 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1511 static int fill_files_note(struct memelfnote *note)
1513 struct vm_area_struct *vma;
1514 unsigned count, size, names_ofs, remaining, n;
1515 user_long_t *data;
1516 user_long_t *start_end_ofs;
1517 char *name_base, *name_curpos;
1519 /* *Estimated* file count and total data size needed */
1520 count = current->mm->map_count;
1521 size = count * 64;
1523 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1524 alloc:
1525 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1526 return -EINVAL;
1527 size = round_up(size, PAGE_SIZE);
1528 data = vmalloc(size);
1529 if (!data)
1530 return -ENOMEM;
1532 start_end_ofs = data + 2;
1533 name_base = name_curpos = ((char *)data) + names_ofs;
1534 remaining = size - names_ofs;
1535 count = 0;
1536 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1537 struct file *file;
1538 const char *filename;
1540 file = vma->vm_file;
1541 if (!file)
1542 continue;
1543 filename = file_path(file, name_curpos, remaining);
1544 if (IS_ERR(filename)) {
1545 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1546 vfree(data);
1547 size = size * 5 / 4;
1548 goto alloc;
1550 continue;
1553 /* file_path() fills at the end, move name down */
1554 /* n = strlen(filename) + 1: */
1555 n = (name_curpos + remaining) - filename;
1556 remaining = filename - name_curpos;
1557 memmove(name_curpos, filename, n);
1558 name_curpos += n;
1560 *start_end_ofs++ = vma->vm_start;
1561 *start_end_ofs++ = vma->vm_end;
1562 *start_end_ofs++ = vma->vm_pgoff;
1563 count++;
1566 /* Now we know exact count of files, can store it */
1567 data[0] = count;
1568 data[1] = PAGE_SIZE;
1570 * Count usually is less than current->mm->map_count,
1571 * we need to move filenames down.
1573 n = current->mm->map_count - count;
1574 if (n != 0) {
1575 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1576 memmove(name_base - shift_bytes, name_base,
1577 name_curpos - name_base);
1578 name_curpos -= shift_bytes;
1581 size = name_curpos - (char *)data;
1582 fill_note(note, "CORE", NT_FILE, size, data);
1583 return 0;
1586 #ifdef CORE_DUMP_USE_REGSET
1587 #include <linux/regset.h>
1589 struct elf_thread_core_info {
1590 struct elf_thread_core_info *next;
1591 struct task_struct *task;
1592 struct elf_prstatus prstatus;
1593 struct memelfnote notes[0];
1596 struct elf_note_info {
1597 struct elf_thread_core_info *thread;
1598 struct memelfnote psinfo;
1599 struct memelfnote signote;
1600 struct memelfnote auxv;
1601 struct memelfnote files;
1602 user_siginfo_t csigdata;
1603 size_t size;
1604 int thread_notes;
1608 * When a regset has a writeback hook, we call it on each thread before
1609 * dumping user memory. On register window machines, this makes sure the
1610 * user memory backing the register data is up to date before we read it.
1612 static void do_thread_regset_writeback(struct task_struct *task,
1613 const struct user_regset *regset)
1615 if (regset->writeback)
1616 regset->writeback(task, regset, 1);
1619 #ifndef PR_REG_SIZE
1620 #define PR_REG_SIZE(S) sizeof(S)
1621 #endif
1623 #ifndef PRSTATUS_SIZE
1624 #define PRSTATUS_SIZE(S) sizeof(S)
1625 #endif
1627 #ifndef PR_REG_PTR
1628 #define PR_REG_PTR(S) (&((S)->pr_reg))
1629 #endif
1631 #ifndef SET_PR_FPVALID
1632 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1633 #endif
1635 static int fill_thread_core_info(struct elf_thread_core_info *t,
1636 const struct user_regset_view *view,
1637 long signr, size_t *total)
1639 unsigned int i;
1642 * NT_PRSTATUS is the one special case, because the regset data
1643 * goes into the pr_reg field inside the note contents, rather
1644 * than being the whole note contents. We fill the reset in here.
1645 * We assume that regset 0 is NT_PRSTATUS.
1647 fill_prstatus(&t->prstatus, t->task, signr);
1648 (void) view->regsets[0].get(t->task, &view->regsets[0],
1649 0, PR_REG_SIZE(t->prstatus.pr_reg),
1650 PR_REG_PTR(&t->prstatus), NULL);
1652 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1653 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1654 *total += notesize(&t->notes[0]);
1656 do_thread_regset_writeback(t->task, &view->regsets[0]);
1659 * Each other regset might generate a note too. For each regset
1660 * that has no core_note_type or is inactive, we leave t->notes[i]
1661 * all zero and we'll know to skip writing it later.
1663 for (i = 1; i < view->n; ++i) {
1664 const struct user_regset *regset = &view->regsets[i];
1665 do_thread_regset_writeback(t->task, regset);
1666 if (regset->core_note_type && regset->get &&
1667 (!regset->active || regset->active(t->task, regset))) {
1668 int ret;
1669 size_t size = regset->n * regset->size;
1670 void *data = kmalloc(size, GFP_KERNEL);
1671 if (unlikely(!data))
1672 return 0;
1673 ret = regset->get(t->task, regset,
1674 0, size, data, NULL);
1675 if (unlikely(ret))
1676 kfree(data);
1677 else {
1678 if (regset->core_note_type != NT_PRFPREG)
1679 fill_note(&t->notes[i], "LINUX",
1680 regset->core_note_type,
1681 size, data);
1682 else {
1683 SET_PR_FPVALID(&t->prstatus, 1);
1684 fill_note(&t->notes[i], "CORE",
1685 NT_PRFPREG, size, data);
1687 *total += notesize(&t->notes[i]);
1692 return 1;
1695 static int fill_note_info(struct elfhdr *elf, int phdrs,
1696 struct elf_note_info *info,
1697 const siginfo_t *siginfo, struct pt_regs *regs)
1699 struct task_struct *dump_task = current;
1700 const struct user_regset_view *view = task_user_regset_view(dump_task);
1701 struct elf_thread_core_info *t;
1702 struct elf_prpsinfo *psinfo;
1703 struct core_thread *ct;
1704 unsigned int i;
1706 info->size = 0;
1707 info->thread = NULL;
1709 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1710 if (psinfo == NULL) {
1711 info->psinfo.data = NULL; /* So we don't free this wrongly */
1712 return 0;
1715 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1718 * Figure out how many notes we're going to need for each thread.
1720 info->thread_notes = 0;
1721 for (i = 0; i < view->n; ++i)
1722 if (view->regsets[i].core_note_type != 0)
1723 ++info->thread_notes;
1726 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1727 * since it is our one special case.
1729 if (unlikely(info->thread_notes == 0) ||
1730 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1731 WARN_ON(1);
1732 return 0;
1736 * Initialize the ELF file header.
1738 fill_elf_header(elf, phdrs,
1739 view->e_machine, view->e_flags);
1742 * Allocate a structure for each thread.
1744 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1745 t = kzalloc(offsetof(struct elf_thread_core_info,
1746 notes[info->thread_notes]),
1747 GFP_KERNEL);
1748 if (unlikely(!t))
1749 return 0;
1751 t->task = ct->task;
1752 if (ct->task == dump_task || !info->thread) {
1753 t->next = info->thread;
1754 info->thread = t;
1755 } else {
1757 * Make sure to keep the original task at
1758 * the head of the list.
1760 t->next = info->thread->next;
1761 info->thread->next = t;
1766 * Now fill in each thread's information.
1768 for (t = info->thread; t != NULL; t = t->next)
1769 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1770 return 0;
1773 * Fill in the two process-wide notes.
1775 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1776 info->size += notesize(&info->psinfo);
1778 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1779 info->size += notesize(&info->signote);
1781 fill_auxv_note(&info->auxv, current->mm);
1782 info->size += notesize(&info->auxv);
1784 if (fill_files_note(&info->files) == 0)
1785 info->size += notesize(&info->files);
1787 return 1;
1790 static size_t get_note_info_size(struct elf_note_info *info)
1792 return info->size;
1796 * Write all the notes for each thread. When writing the first thread, the
1797 * process-wide notes are interleaved after the first thread-specific note.
1799 static int write_note_info(struct elf_note_info *info,
1800 struct coredump_params *cprm)
1802 bool first = true;
1803 struct elf_thread_core_info *t = info->thread;
1805 do {
1806 int i;
1808 if (!writenote(&t->notes[0], cprm))
1809 return 0;
1811 if (first && !writenote(&info->psinfo, cprm))
1812 return 0;
1813 if (first && !writenote(&info->signote, cprm))
1814 return 0;
1815 if (first && !writenote(&info->auxv, cprm))
1816 return 0;
1817 if (first && info->files.data &&
1818 !writenote(&info->files, cprm))
1819 return 0;
1821 for (i = 1; i < info->thread_notes; ++i)
1822 if (t->notes[i].data &&
1823 !writenote(&t->notes[i], cprm))
1824 return 0;
1826 first = false;
1827 t = t->next;
1828 } while (t);
1830 return 1;
1833 static void free_note_info(struct elf_note_info *info)
1835 struct elf_thread_core_info *threads = info->thread;
1836 while (threads) {
1837 unsigned int i;
1838 struct elf_thread_core_info *t = threads;
1839 threads = t->next;
1840 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1841 for (i = 1; i < info->thread_notes; ++i)
1842 kfree(t->notes[i].data);
1843 kfree(t);
1845 kfree(info->psinfo.data);
1846 vfree(info->files.data);
1849 #else
1851 /* Here is the structure in which status of each thread is captured. */
1852 struct elf_thread_status
1854 struct list_head list;
1855 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1856 elf_fpregset_t fpu; /* NT_PRFPREG */
1857 struct task_struct *thread;
1858 #ifdef ELF_CORE_COPY_XFPREGS
1859 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1860 #endif
1861 struct memelfnote notes[3];
1862 int num_notes;
1866 * In order to add the specific thread information for the elf file format,
1867 * we need to keep a linked list of every threads pr_status and then create
1868 * a single section for them in the final core file.
1870 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1872 int sz = 0;
1873 struct task_struct *p = t->thread;
1874 t->num_notes = 0;
1876 fill_prstatus(&t->prstatus, p, signr);
1877 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1879 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1880 &(t->prstatus));
1881 t->num_notes++;
1882 sz += notesize(&t->notes[0]);
1884 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1885 &t->fpu))) {
1886 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1887 &(t->fpu));
1888 t->num_notes++;
1889 sz += notesize(&t->notes[1]);
1892 #ifdef ELF_CORE_COPY_XFPREGS
1893 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1894 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1895 sizeof(t->xfpu), &t->xfpu);
1896 t->num_notes++;
1897 sz += notesize(&t->notes[2]);
1899 #endif
1900 return sz;
1903 struct elf_note_info {
1904 struct memelfnote *notes;
1905 struct memelfnote *notes_files;
1906 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1907 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1908 struct list_head thread_list;
1909 elf_fpregset_t *fpu;
1910 #ifdef ELF_CORE_COPY_XFPREGS
1911 elf_fpxregset_t *xfpu;
1912 #endif
1913 user_siginfo_t csigdata;
1914 int thread_status_size;
1915 int numnote;
1918 static int elf_note_info_init(struct elf_note_info *info)
1920 memset(info, 0, sizeof(*info));
1921 INIT_LIST_HEAD(&info->thread_list);
1923 /* Allocate space for ELF notes */
1924 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1925 if (!info->notes)
1926 return 0;
1927 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1928 if (!info->psinfo)
1929 return 0;
1930 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1931 if (!info->prstatus)
1932 return 0;
1933 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1934 if (!info->fpu)
1935 return 0;
1936 #ifdef ELF_CORE_COPY_XFPREGS
1937 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1938 if (!info->xfpu)
1939 return 0;
1940 #endif
1941 return 1;
1944 static int fill_note_info(struct elfhdr *elf, int phdrs,
1945 struct elf_note_info *info,
1946 const siginfo_t *siginfo, struct pt_regs *regs)
1948 struct list_head *t;
1949 struct core_thread *ct;
1950 struct elf_thread_status *ets;
1952 if (!elf_note_info_init(info))
1953 return 0;
1955 for (ct = current->mm->core_state->dumper.next;
1956 ct; ct = ct->next) {
1957 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1958 if (!ets)
1959 return 0;
1961 ets->thread = ct->task;
1962 list_add(&ets->list, &info->thread_list);
1965 list_for_each(t, &info->thread_list) {
1966 int sz;
1968 ets = list_entry(t, struct elf_thread_status, list);
1969 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1970 info->thread_status_size += sz;
1972 /* now collect the dump for the current */
1973 memset(info->prstatus, 0, sizeof(*info->prstatus));
1974 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1975 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1977 /* Set up header */
1978 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1981 * Set up the notes in similar form to SVR4 core dumps made
1982 * with info from their /proc.
1985 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1986 sizeof(*info->prstatus), info->prstatus);
1987 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1988 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1989 sizeof(*info->psinfo), info->psinfo);
1991 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1992 fill_auxv_note(info->notes + 3, current->mm);
1993 info->numnote = 4;
1995 if (fill_files_note(info->notes + info->numnote) == 0) {
1996 info->notes_files = info->notes + info->numnote;
1997 info->numnote++;
2000 /* Try to dump the FPU. */
2001 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2002 info->fpu);
2003 if (info->prstatus->pr_fpvalid)
2004 fill_note(info->notes + info->numnote++,
2005 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2006 #ifdef ELF_CORE_COPY_XFPREGS
2007 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2008 fill_note(info->notes + info->numnote++,
2009 "LINUX", ELF_CORE_XFPREG_TYPE,
2010 sizeof(*info->xfpu), info->xfpu);
2011 #endif
2013 return 1;
2016 static size_t get_note_info_size(struct elf_note_info *info)
2018 int sz = 0;
2019 int i;
2021 for (i = 0; i < info->numnote; i++)
2022 sz += notesize(info->notes + i);
2024 sz += info->thread_status_size;
2026 return sz;
2029 static int write_note_info(struct elf_note_info *info,
2030 struct coredump_params *cprm)
2032 int i;
2033 struct list_head *t;
2035 for (i = 0; i < info->numnote; i++)
2036 if (!writenote(info->notes + i, cprm))
2037 return 0;
2039 /* write out the thread status notes section */
2040 list_for_each(t, &info->thread_list) {
2041 struct elf_thread_status *tmp =
2042 list_entry(t, struct elf_thread_status, list);
2044 for (i = 0; i < tmp->num_notes; i++)
2045 if (!writenote(&tmp->notes[i], cprm))
2046 return 0;
2049 return 1;
2052 static void free_note_info(struct elf_note_info *info)
2054 while (!list_empty(&info->thread_list)) {
2055 struct list_head *tmp = info->thread_list.next;
2056 list_del(tmp);
2057 kfree(list_entry(tmp, struct elf_thread_status, list));
2060 /* Free data possibly allocated by fill_files_note(): */
2061 if (info->notes_files)
2062 vfree(info->notes_files->data);
2064 kfree(info->prstatus);
2065 kfree(info->psinfo);
2066 kfree(info->notes);
2067 kfree(info->fpu);
2068 #ifdef ELF_CORE_COPY_XFPREGS
2069 kfree(info->xfpu);
2070 #endif
2073 #endif
2075 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2076 struct vm_area_struct *gate_vma)
2078 struct vm_area_struct *ret = tsk->mm->mmap;
2080 if (ret)
2081 return ret;
2082 return gate_vma;
2085 * Helper function for iterating across a vma list. It ensures that the caller
2086 * will visit `gate_vma' prior to terminating the search.
2088 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2089 struct vm_area_struct *gate_vma)
2091 struct vm_area_struct *ret;
2093 ret = this_vma->vm_next;
2094 if (ret)
2095 return ret;
2096 if (this_vma == gate_vma)
2097 return NULL;
2098 return gate_vma;
2101 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2102 elf_addr_t e_shoff, int segs)
2104 elf->e_shoff = e_shoff;
2105 elf->e_shentsize = sizeof(*shdr4extnum);
2106 elf->e_shnum = 1;
2107 elf->e_shstrndx = SHN_UNDEF;
2109 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2111 shdr4extnum->sh_type = SHT_NULL;
2112 shdr4extnum->sh_size = elf->e_shnum;
2113 shdr4extnum->sh_link = elf->e_shstrndx;
2114 shdr4extnum->sh_info = segs;
2118 * Actual dumper
2120 * This is a two-pass process; first we find the offsets of the bits,
2121 * and then they are actually written out. If we run out of core limit
2122 * we just truncate.
2124 static int elf_core_dump(struct coredump_params *cprm)
2126 int has_dumped = 0;
2127 mm_segment_t fs;
2128 int segs, i;
2129 size_t vma_data_size = 0;
2130 struct vm_area_struct *vma, *gate_vma;
2131 struct elfhdr *elf = NULL;
2132 loff_t offset = 0, dataoff;
2133 struct elf_note_info info = { };
2134 struct elf_phdr *phdr4note = NULL;
2135 struct elf_shdr *shdr4extnum = NULL;
2136 Elf_Half e_phnum;
2137 elf_addr_t e_shoff;
2138 elf_addr_t *vma_filesz = NULL;
2141 * We no longer stop all VM operations.
2143 * This is because those proceses that could possibly change map_count
2144 * or the mmap / vma pages are now blocked in do_exit on current
2145 * finishing this core dump.
2147 * Only ptrace can touch these memory addresses, but it doesn't change
2148 * the map_count or the pages allocated. So no possibility of crashing
2149 * exists while dumping the mm->vm_next areas to the core file.
2152 /* alloc memory for large data structures: too large to be on stack */
2153 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2154 if (!elf)
2155 goto out;
2157 * The number of segs are recored into ELF header as 16bit value.
2158 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2160 segs = current->mm->map_count;
2161 segs += elf_core_extra_phdrs();
2163 gate_vma = get_gate_vma(current->mm);
2164 if (gate_vma != NULL)
2165 segs++;
2167 /* for notes section */
2168 segs++;
2170 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2171 * this, kernel supports extended numbering. Have a look at
2172 * include/linux/elf.h for further information. */
2173 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2176 * Collect all the non-memory information about the process for the
2177 * notes. This also sets up the file header.
2179 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2180 goto cleanup;
2182 has_dumped = 1;
2184 fs = get_fs();
2185 set_fs(KERNEL_DS);
2187 offset += sizeof(*elf); /* Elf header */
2188 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2190 /* Write notes phdr entry */
2192 size_t sz = get_note_info_size(&info);
2194 sz += elf_coredump_extra_notes_size();
2196 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2197 if (!phdr4note)
2198 goto end_coredump;
2200 fill_elf_note_phdr(phdr4note, sz, offset);
2201 offset += sz;
2204 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2206 vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2207 if (!vma_filesz)
2208 goto end_coredump;
2210 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2211 vma = next_vma(vma, gate_vma)) {
2212 unsigned long dump_size;
2214 dump_size = vma_dump_size(vma, cprm->mm_flags);
2215 vma_filesz[i++] = dump_size;
2216 vma_data_size += dump_size;
2219 offset += vma_data_size;
2220 offset += elf_core_extra_data_size();
2221 e_shoff = offset;
2223 if (e_phnum == PN_XNUM) {
2224 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2225 if (!shdr4extnum)
2226 goto end_coredump;
2227 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2230 offset = dataoff;
2232 if (!dump_emit(cprm, elf, sizeof(*elf)))
2233 goto end_coredump;
2235 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2236 goto end_coredump;
2238 /* Write program headers for segments dump */
2239 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2240 vma = next_vma(vma, gate_vma)) {
2241 struct elf_phdr phdr;
2243 phdr.p_type = PT_LOAD;
2244 phdr.p_offset = offset;
2245 phdr.p_vaddr = vma->vm_start;
2246 phdr.p_paddr = 0;
2247 phdr.p_filesz = vma_filesz[i++];
2248 phdr.p_memsz = vma->vm_end - vma->vm_start;
2249 offset += phdr.p_filesz;
2250 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2251 if (vma->vm_flags & VM_WRITE)
2252 phdr.p_flags |= PF_W;
2253 if (vma->vm_flags & VM_EXEC)
2254 phdr.p_flags |= PF_X;
2255 phdr.p_align = ELF_EXEC_PAGESIZE;
2257 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2258 goto end_coredump;
2261 if (!elf_core_write_extra_phdrs(cprm, offset))
2262 goto end_coredump;
2264 /* write out the notes section */
2265 if (!write_note_info(&info, cprm))
2266 goto end_coredump;
2268 if (elf_coredump_extra_notes_write(cprm))
2269 goto end_coredump;
2271 /* Align to page */
2272 if (!dump_skip(cprm, dataoff - cprm->written))
2273 goto end_coredump;
2275 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2276 vma = next_vma(vma, gate_vma)) {
2277 unsigned long addr;
2278 unsigned long end;
2280 end = vma->vm_start + vma_filesz[i++];
2282 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2283 struct page *page;
2284 int stop;
2286 page = get_dump_page(addr);
2287 if (page) {
2288 void *kaddr = kmap(page);
2289 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2290 kunmap(page);
2291 page_cache_release(page);
2292 } else
2293 stop = !dump_skip(cprm, PAGE_SIZE);
2294 if (stop)
2295 goto end_coredump;
2299 if (!elf_core_write_extra_data(cprm))
2300 goto end_coredump;
2302 if (e_phnum == PN_XNUM) {
2303 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2304 goto end_coredump;
2307 end_coredump:
2308 set_fs(fs);
2310 cleanup:
2311 free_note_info(&info);
2312 kfree(shdr4extnum);
2313 kfree(vma_filesz);
2314 kfree(phdr4note);
2315 kfree(elf);
2316 out:
2317 return has_dumped;
2320 #endif /* CONFIG_ELF_CORE */
2322 static int __init init_elf_binfmt(void)
2324 register_binfmt(&elf_format);
2325 return 0;
2328 static void __exit exit_elf_binfmt(void)
2330 /* Remove the COFF and ELF loaders. */
2331 unregister_binfmt(&elf_format);
2334 core_initcall(init_elf_binfmt);
2335 module_exit(exit_elf_binfmt);
2336 MODULE_LICENSE("GPL");