coredump masking: documentation for /proc/pid/coredump_filter
[usb.git] / fs / binfmt_elf.c
blobba24cb2ff6ceb83859f782677372ded1c69b20c2
1 /*
2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump NULL
58 #endif
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN PAGE_SIZE
64 #endif
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74 static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
79 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
83 #define BAD_ADDR(x) IS_ERR_VALUE(x)
85 static int set_brk(unsigned long start, unsigned long end)
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
101 /* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
104 be in memory
106 static int padzero(unsigned long elf_bss)
108 unsigned long nbyte;
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
116 return 0;
119 /* Let's use some macros to make this stack manipulation a litle clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
151 struct vm_area_struct *vma;
154 * If this architecture has a platform capability string, copy it
155 * to userspace. In some cases (Sparc), this info is impossible
156 * for userspace to get any other way, in others (i386) it is
157 * merely difficult.
159 u_platform = NULL;
160 if (k_platform) {
161 size_t len = strlen(k_platform) + 1;
164 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 * evictions by the processes running on the same package. One
166 * thing we can do is to shuffle the initial stack for them.
169 p = arch_align_stack(p);
171 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 if (__copy_to_user(u_platform, k_platform, len))
173 return -EFAULT;
176 /* Create the ELF interpreter info */
177 elf_info = (elf_addr_t *)current->mm->saved_auxv;
178 #define NEW_AUX_ENT(id, val) \
179 do { \
180 elf_info[ei_index++] = id; \
181 elf_info[ei_index++] = val; \
182 } while (0)
184 #ifdef ARCH_DLINFO
186 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 * AUXV.
189 ARCH_DLINFO;
190 #endif
191 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
192 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
193 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
194 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
195 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
196 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
197 NEW_AUX_ENT(AT_BASE, interp_load_addr);
198 NEW_AUX_ENT(AT_FLAGS, 0);
199 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
200 NEW_AUX_ENT(AT_UID, tsk->uid);
201 NEW_AUX_ENT(AT_EUID, tsk->euid);
202 NEW_AUX_ENT(AT_GID, tsk->gid);
203 NEW_AUX_ENT(AT_EGID, tsk->egid);
204 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
205 if (k_platform) {
206 NEW_AUX_ENT(AT_PLATFORM,
207 (elf_addr_t)(unsigned long)u_platform);
209 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
210 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
212 #undef NEW_AUX_ENT
213 /* AT_NULL is zero; clear the rest too */
214 memset(&elf_info[ei_index], 0,
215 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
217 /* And advance past the AT_NULL entry. */
218 ei_index += 2;
220 sp = STACK_ADD(p, ei_index);
222 items = (argc + 1) + (envc + 1);
223 if (interp_aout) {
224 items += 3; /* a.out interpreters require argv & envp too */
225 } else {
226 items += 1; /* ELF interpreters only put argc on the stack */
228 bprm->p = STACK_ROUND(sp, items);
230 /* Point sp at the lowest address on the stack */
231 #ifdef CONFIG_STACK_GROWSUP
232 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
233 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
234 #else
235 sp = (elf_addr_t __user *)bprm->p;
236 #endif
240 * Grow the stack manually; some architectures have a limit on how
241 * far ahead a user-space access may be in order to grow the stack.
243 vma = find_extend_vma(current->mm, bprm->p);
244 if (!vma)
245 return -EFAULT;
247 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
248 if (__put_user(argc, sp++))
249 return -EFAULT;
250 if (interp_aout) {
251 argv = sp + 2;
252 envp = argv + argc + 1;
253 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
254 __put_user((elf_addr_t)(unsigned long)envp, sp++))
255 return -EFAULT;
256 } else {
257 argv = sp;
258 envp = argv + argc + 1;
261 /* Populate argv and envp */
262 p = current->mm->arg_end = current->mm->arg_start;
263 while (argc-- > 0) {
264 size_t len;
265 if (__put_user((elf_addr_t)p, argv++))
266 return -EFAULT;
267 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
268 if (!len || len > MAX_ARG_STRLEN)
269 return 0;
270 p += len;
272 if (__put_user(0, argv))
273 return -EFAULT;
274 current->mm->arg_end = current->mm->env_start = p;
275 while (envc-- > 0) {
276 size_t len;
277 if (__put_user((elf_addr_t)p, envp++))
278 return -EFAULT;
279 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
280 if (!len || len > MAX_ARG_STRLEN)
281 return 0;
282 p += len;
284 if (__put_user(0, envp))
285 return -EFAULT;
286 current->mm->env_end = p;
288 /* Put the elf_info on the stack in the right place. */
289 sp = (elf_addr_t __user *)envp + 1;
290 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
291 return -EFAULT;
292 return 0;
295 #ifndef elf_map
297 static unsigned long elf_map(struct file *filep, unsigned long addr,
298 struct elf_phdr *eppnt, int prot, int type,
299 unsigned long total_size)
301 unsigned long map_addr;
302 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
303 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
304 addr = ELF_PAGESTART(addr);
305 size = ELF_PAGEALIGN(size);
307 /* mmap() will return -EINVAL if given a zero size, but a
308 * segment with zero filesize is perfectly valid */
309 if (!size)
310 return addr;
312 down_write(&current->mm->mmap_sem);
314 * total_size is the size of the ELF (interpreter) image.
315 * The _first_ mmap needs to know the full size, otherwise
316 * randomization might put this image into an overlapping
317 * position with the ELF binary image. (since size < total_size)
318 * So we first map the 'big' image - and unmap the remainder at
319 * the end. (which unmap is needed for ELF images with holes.)
321 if (total_size) {
322 total_size = ELF_PAGEALIGN(total_size);
323 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
324 if (!BAD_ADDR(map_addr))
325 do_munmap(current->mm, map_addr+size, total_size-size);
326 } else
327 map_addr = do_mmap(filep, addr, size, prot, type, off);
329 up_write(&current->mm->mmap_sem);
330 return(map_addr);
333 #endif /* !elf_map */
335 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
337 int i, first_idx = -1, last_idx = -1;
339 for (i = 0; i < nr; i++) {
340 if (cmds[i].p_type == PT_LOAD) {
341 last_idx = i;
342 if (first_idx == -1)
343 first_idx = i;
346 if (first_idx == -1)
347 return 0;
349 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
350 ELF_PAGESTART(cmds[first_idx].p_vaddr);
354 /* This is much more generalized than the library routine read function,
355 so we keep this separate. Technically the library read function
356 is only provided so that we can read a.out libraries that have
357 an ELF header */
359 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
360 struct file *interpreter, unsigned long *interp_map_addr,
361 unsigned long no_base)
363 struct elf_phdr *elf_phdata;
364 struct elf_phdr *eppnt;
365 unsigned long load_addr = 0;
366 int load_addr_set = 0;
367 unsigned long last_bss = 0, elf_bss = 0;
368 unsigned long error = ~0UL;
369 unsigned long total_size;
370 int retval, i, size;
372 /* First of all, some simple consistency checks */
373 if (interp_elf_ex->e_type != ET_EXEC &&
374 interp_elf_ex->e_type != ET_DYN)
375 goto out;
376 if (!elf_check_arch(interp_elf_ex))
377 goto out;
378 if (!interpreter->f_op || !interpreter->f_op->mmap)
379 goto out;
382 * If the size of this structure has changed, then punt, since
383 * we will be doing the wrong thing.
385 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
386 goto out;
387 if (interp_elf_ex->e_phnum < 1 ||
388 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
389 goto out;
391 /* Now read in all of the header information */
392 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
393 if (size > ELF_MIN_ALIGN)
394 goto out;
395 elf_phdata = kmalloc(size, GFP_KERNEL);
396 if (!elf_phdata)
397 goto out;
399 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
400 (char *)elf_phdata,size);
401 error = -EIO;
402 if (retval != size) {
403 if (retval < 0)
404 error = retval;
405 goto out_close;
408 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
409 if (!total_size) {
410 error = -EINVAL;
411 goto out_close;
414 eppnt = elf_phdata;
415 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
416 if (eppnt->p_type == PT_LOAD) {
417 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
418 int elf_prot = 0;
419 unsigned long vaddr = 0;
420 unsigned long k, map_addr;
422 if (eppnt->p_flags & PF_R)
423 elf_prot = PROT_READ;
424 if (eppnt->p_flags & PF_W)
425 elf_prot |= PROT_WRITE;
426 if (eppnt->p_flags & PF_X)
427 elf_prot |= PROT_EXEC;
428 vaddr = eppnt->p_vaddr;
429 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
430 elf_type |= MAP_FIXED;
431 else if (no_base && interp_elf_ex->e_type == ET_DYN)
432 load_addr = -vaddr;
434 map_addr = elf_map(interpreter, load_addr + vaddr,
435 eppnt, elf_prot, elf_type, total_size);
436 total_size = 0;
437 if (!*interp_map_addr)
438 *interp_map_addr = map_addr;
439 error = map_addr;
440 if (BAD_ADDR(map_addr))
441 goto out_close;
443 if (!load_addr_set &&
444 interp_elf_ex->e_type == ET_DYN) {
445 load_addr = map_addr - ELF_PAGESTART(vaddr);
446 load_addr_set = 1;
450 * Check to see if the section's size will overflow the
451 * allowed task size. Note that p_filesz must always be
452 * <= p_memsize so it's only necessary to check p_memsz.
454 k = load_addr + eppnt->p_vaddr;
455 if (BAD_ADDR(k) ||
456 eppnt->p_filesz > eppnt->p_memsz ||
457 eppnt->p_memsz > TASK_SIZE ||
458 TASK_SIZE - eppnt->p_memsz < k) {
459 error = -ENOMEM;
460 goto out_close;
464 * Find the end of the file mapping for this phdr, and
465 * keep track of the largest address we see for this.
467 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
468 if (k > elf_bss)
469 elf_bss = k;
472 * Do the same thing for the memory mapping - between
473 * elf_bss and last_bss is the bss section.
475 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
476 if (k > last_bss)
477 last_bss = k;
482 * Now fill out the bss section. First pad the last page up
483 * to the page boundary, and then perform a mmap to make sure
484 * that there are zero-mapped pages up to and including the
485 * last bss page.
487 if (padzero(elf_bss)) {
488 error = -EFAULT;
489 goto out_close;
492 /* What we have mapped so far */
493 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
495 /* Map the last of the bss segment */
496 if (last_bss > elf_bss) {
497 down_write(&current->mm->mmap_sem);
498 error = do_brk(elf_bss, last_bss - elf_bss);
499 up_write(&current->mm->mmap_sem);
500 if (BAD_ADDR(error))
501 goto out_close;
504 error = load_addr;
506 out_close:
507 kfree(elf_phdata);
508 out:
509 return error;
512 static unsigned long load_aout_interp(struct exec *interp_ex,
513 struct file *interpreter)
515 unsigned long text_data, elf_entry = ~0UL;
516 char __user * addr;
517 loff_t offset;
519 current->mm->end_code = interp_ex->a_text;
520 text_data = interp_ex->a_text + interp_ex->a_data;
521 current->mm->end_data = text_data;
522 current->mm->brk = interp_ex->a_bss + text_data;
524 switch (N_MAGIC(*interp_ex)) {
525 case OMAGIC:
526 offset = 32;
527 addr = (char __user *)0;
528 break;
529 case ZMAGIC:
530 case QMAGIC:
531 offset = N_TXTOFF(*interp_ex);
532 addr = (char __user *)N_TXTADDR(*interp_ex);
533 break;
534 default:
535 goto out;
538 down_write(&current->mm->mmap_sem);
539 do_brk(0, text_data);
540 up_write(&current->mm->mmap_sem);
541 if (!interpreter->f_op || !interpreter->f_op->read)
542 goto out;
543 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
544 goto out;
545 flush_icache_range((unsigned long)addr,
546 (unsigned long)addr + text_data);
548 down_write(&current->mm->mmap_sem);
549 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
550 interp_ex->a_bss);
551 up_write(&current->mm->mmap_sem);
552 elf_entry = interp_ex->a_entry;
554 out:
555 return elf_entry;
559 * These are the functions used to load ELF style executables and shared
560 * libraries. There is no binary dependent code anywhere else.
563 #define INTERPRETER_NONE 0
564 #define INTERPRETER_AOUT 1
565 #define INTERPRETER_ELF 2
567 #ifndef STACK_RND_MASK
568 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
569 #endif
571 static unsigned long randomize_stack_top(unsigned long stack_top)
573 unsigned int random_variable = 0;
575 if ((current->flags & PF_RANDOMIZE) &&
576 !(current->personality & ADDR_NO_RANDOMIZE)) {
577 random_variable = get_random_int() & STACK_RND_MASK;
578 random_variable <<= PAGE_SHIFT;
580 #ifdef CONFIG_STACK_GROWSUP
581 return PAGE_ALIGN(stack_top) + random_variable;
582 #else
583 return PAGE_ALIGN(stack_top) - random_variable;
584 #endif
587 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
589 struct file *interpreter = NULL; /* to shut gcc up */
590 unsigned long load_addr = 0, load_bias = 0;
591 int load_addr_set = 0;
592 char * elf_interpreter = NULL;
593 unsigned int interpreter_type = INTERPRETER_NONE;
594 unsigned char ibcs2_interpreter = 0;
595 unsigned long error;
596 struct elf_phdr *elf_ppnt, *elf_phdata;
597 unsigned long elf_bss, elf_brk;
598 int elf_exec_fileno;
599 int retval, i;
600 unsigned int size;
601 unsigned long elf_entry;
602 unsigned long interp_load_addr = 0;
603 unsigned long start_code, end_code, start_data, end_data;
604 unsigned long reloc_func_desc = 0;
605 char passed_fileno[6];
606 struct files_struct *files;
607 int executable_stack = EXSTACK_DEFAULT;
608 unsigned long def_flags = 0;
609 struct {
610 struct elfhdr elf_ex;
611 struct elfhdr interp_elf_ex;
612 struct exec interp_ex;
613 } *loc;
615 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
616 if (!loc) {
617 retval = -ENOMEM;
618 goto out_ret;
621 /* Get the exec-header */
622 loc->elf_ex = *((struct elfhdr *)bprm->buf);
624 retval = -ENOEXEC;
625 /* First of all, some simple consistency checks */
626 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
627 goto out;
629 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
630 goto out;
631 if (!elf_check_arch(&loc->elf_ex))
632 goto out;
633 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
634 goto out;
636 /* Now read in all of the header information */
637 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
638 goto out;
639 if (loc->elf_ex.e_phnum < 1 ||
640 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
641 goto out;
642 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
643 retval = -ENOMEM;
644 elf_phdata = kmalloc(size, GFP_KERNEL);
645 if (!elf_phdata)
646 goto out;
648 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
649 (char *)elf_phdata, size);
650 if (retval != size) {
651 if (retval >= 0)
652 retval = -EIO;
653 goto out_free_ph;
656 files = current->files; /* Refcounted so ok */
657 retval = unshare_files();
658 if (retval < 0)
659 goto out_free_ph;
660 if (files == current->files) {
661 put_files_struct(files);
662 files = NULL;
665 /* exec will make our files private anyway, but for the a.out
666 loader stuff we need to do it earlier */
667 retval = get_unused_fd();
668 if (retval < 0)
669 goto out_free_fh;
670 get_file(bprm->file);
671 fd_install(elf_exec_fileno = retval, bprm->file);
673 elf_ppnt = elf_phdata;
674 elf_bss = 0;
675 elf_brk = 0;
677 start_code = ~0UL;
678 end_code = 0;
679 start_data = 0;
680 end_data = 0;
682 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
683 if (elf_ppnt->p_type == PT_INTERP) {
684 /* This is the program interpreter used for
685 * shared libraries - for now assume that this
686 * is an a.out format binary
688 retval = -ENOEXEC;
689 if (elf_ppnt->p_filesz > PATH_MAX ||
690 elf_ppnt->p_filesz < 2)
691 goto out_free_file;
693 retval = -ENOMEM;
694 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
695 GFP_KERNEL);
696 if (!elf_interpreter)
697 goto out_free_file;
699 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
700 elf_interpreter,
701 elf_ppnt->p_filesz);
702 if (retval != elf_ppnt->p_filesz) {
703 if (retval >= 0)
704 retval = -EIO;
705 goto out_free_interp;
707 /* make sure path is NULL terminated */
708 retval = -ENOEXEC;
709 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
710 goto out_free_interp;
712 /* If the program interpreter is one of these two,
713 * then assume an iBCS2 image. Otherwise assume
714 * a native linux image.
716 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
717 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
718 ibcs2_interpreter = 1;
721 * The early SET_PERSONALITY here is so that the lookup
722 * for the interpreter happens in the namespace of the
723 * to-be-execed image. SET_PERSONALITY can select an
724 * alternate root.
726 * However, SET_PERSONALITY is NOT allowed to switch
727 * this task into the new images's memory mapping
728 * policy - that is, TASK_SIZE must still evaluate to
729 * that which is appropriate to the execing application.
730 * This is because exit_mmap() needs to have TASK_SIZE
731 * evaluate to the size of the old image.
733 * So if (say) a 64-bit application is execing a 32-bit
734 * application it is the architecture's responsibility
735 * to defer changing the value of TASK_SIZE until the
736 * switch really is going to happen - do this in
737 * flush_thread(). - akpm
739 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
741 interpreter = open_exec(elf_interpreter);
742 retval = PTR_ERR(interpreter);
743 if (IS_ERR(interpreter))
744 goto out_free_interp;
747 * If the binary is not readable then enforce
748 * mm->dumpable = 0 regardless of the interpreter's
749 * permissions.
751 if (file_permission(interpreter, MAY_READ) < 0)
752 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
754 retval = kernel_read(interpreter, 0, bprm->buf,
755 BINPRM_BUF_SIZE);
756 if (retval != BINPRM_BUF_SIZE) {
757 if (retval >= 0)
758 retval = -EIO;
759 goto out_free_dentry;
762 /* Get the exec headers */
763 loc->interp_ex = *((struct exec *)bprm->buf);
764 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
765 break;
767 elf_ppnt++;
770 elf_ppnt = elf_phdata;
771 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
772 if (elf_ppnt->p_type == PT_GNU_STACK) {
773 if (elf_ppnt->p_flags & PF_X)
774 executable_stack = EXSTACK_ENABLE_X;
775 else
776 executable_stack = EXSTACK_DISABLE_X;
777 break;
780 /* Some simple consistency checks for the interpreter */
781 if (elf_interpreter) {
782 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
784 /* Now figure out which format our binary is */
785 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
786 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
787 (N_MAGIC(loc->interp_ex) != QMAGIC))
788 interpreter_type = INTERPRETER_ELF;
790 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
791 interpreter_type &= ~INTERPRETER_ELF;
793 retval = -ELIBBAD;
794 if (!interpreter_type)
795 goto out_free_dentry;
797 /* Make sure only one type was selected */
798 if ((interpreter_type & INTERPRETER_ELF) &&
799 interpreter_type != INTERPRETER_ELF) {
800 // FIXME - ratelimit this before re-enabling
801 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
802 interpreter_type = INTERPRETER_ELF;
804 /* Verify the interpreter has a valid arch */
805 if ((interpreter_type == INTERPRETER_ELF) &&
806 !elf_check_arch(&loc->interp_elf_ex))
807 goto out_free_dentry;
808 } else {
809 /* Executables without an interpreter also need a personality */
810 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
813 /* OK, we are done with that, now set up the arg stuff,
814 and then start this sucker up */
815 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
816 char *passed_p = passed_fileno;
817 sprintf(passed_fileno, "%d", elf_exec_fileno);
819 if (elf_interpreter) {
820 retval = copy_strings_kernel(1, &passed_p, bprm);
821 if (retval)
822 goto out_free_dentry;
823 bprm->argc++;
827 /* Flush all traces of the currently running executable */
828 retval = flush_old_exec(bprm);
829 if (retval)
830 goto out_free_dentry;
832 /* Discard our unneeded old files struct */
833 if (files) {
834 put_files_struct(files);
835 files = NULL;
838 /* OK, This is the point of no return */
839 current->flags &= ~PF_FORKNOEXEC;
840 current->mm->def_flags = def_flags;
842 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
843 may depend on the personality. */
844 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
845 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
846 current->personality |= READ_IMPLIES_EXEC;
848 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
849 current->flags |= PF_RANDOMIZE;
850 arch_pick_mmap_layout(current->mm);
852 /* Do this so that we can load the interpreter, if need be. We will
853 change some of these later */
854 current->mm->free_area_cache = current->mm->mmap_base;
855 current->mm->cached_hole_size = 0;
856 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
857 executable_stack);
858 if (retval < 0) {
859 send_sig(SIGKILL, current, 0);
860 goto out_free_dentry;
863 current->mm->start_stack = bprm->p;
865 /* Now we do a little grungy work by mmaping the ELF image into
866 the correct location in memory. */
867 for(i = 0, elf_ppnt = elf_phdata;
868 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
869 int elf_prot = 0, elf_flags;
870 unsigned long k, vaddr;
872 if (elf_ppnt->p_type != PT_LOAD)
873 continue;
875 if (unlikely (elf_brk > elf_bss)) {
876 unsigned long nbyte;
878 /* There was a PT_LOAD segment with p_memsz > p_filesz
879 before this one. Map anonymous pages, if needed,
880 and clear the area. */
881 retval = set_brk (elf_bss + load_bias,
882 elf_brk + load_bias);
883 if (retval) {
884 send_sig(SIGKILL, current, 0);
885 goto out_free_dentry;
887 nbyte = ELF_PAGEOFFSET(elf_bss);
888 if (nbyte) {
889 nbyte = ELF_MIN_ALIGN - nbyte;
890 if (nbyte > elf_brk - elf_bss)
891 nbyte = elf_brk - elf_bss;
892 if (clear_user((void __user *)elf_bss +
893 load_bias, nbyte)) {
895 * This bss-zeroing can fail if the ELF
896 * file specifies odd protections. So
897 * we don't check the return value
903 if (elf_ppnt->p_flags & PF_R)
904 elf_prot |= PROT_READ;
905 if (elf_ppnt->p_flags & PF_W)
906 elf_prot |= PROT_WRITE;
907 if (elf_ppnt->p_flags & PF_X)
908 elf_prot |= PROT_EXEC;
910 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
912 vaddr = elf_ppnt->p_vaddr;
913 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
914 elf_flags |= MAP_FIXED;
915 } else if (loc->elf_ex.e_type == ET_DYN) {
916 /* Try and get dynamic programs out of the way of the
917 * default mmap base, as well as whatever program they
918 * might try to exec. This is because the brk will
919 * follow the loader, and is not movable. */
920 #ifdef CONFIG_X86
921 load_bias = 0;
922 #else
923 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
924 #endif
927 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
928 elf_prot, elf_flags,0);
929 if (BAD_ADDR(error)) {
930 send_sig(SIGKILL, current, 0);
931 retval = IS_ERR((void *)error) ?
932 PTR_ERR((void*)error) : -EINVAL;
933 goto out_free_dentry;
936 if (!load_addr_set) {
937 load_addr_set = 1;
938 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
939 if (loc->elf_ex.e_type == ET_DYN) {
940 load_bias += error -
941 ELF_PAGESTART(load_bias + vaddr);
942 load_addr += load_bias;
943 reloc_func_desc = load_bias;
946 k = elf_ppnt->p_vaddr;
947 if (k < start_code)
948 start_code = k;
949 if (start_data < k)
950 start_data = k;
953 * Check to see if the section's size will overflow the
954 * allowed task size. Note that p_filesz must always be
955 * <= p_memsz so it is only necessary to check p_memsz.
957 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
958 elf_ppnt->p_memsz > TASK_SIZE ||
959 TASK_SIZE - elf_ppnt->p_memsz < k) {
960 /* set_brk can never work. Avoid overflows. */
961 send_sig(SIGKILL, current, 0);
962 retval = -EINVAL;
963 goto out_free_dentry;
966 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
968 if (k > elf_bss)
969 elf_bss = k;
970 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
971 end_code = k;
972 if (end_data < k)
973 end_data = k;
974 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
975 if (k > elf_brk)
976 elf_brk = k;
979 loc->elf_ex.e_entry += load_bias;
980 elf_bss += load_bias;
981 elf_brk += load_bias;
982 start_code += load_bias;
983 end_code += load_bias;
984 start_data += load_bias;
985 end_data += load_bias;
987 /* Calling set_brk effectively mmaps the pages that we need
988 * for the bss and break sections. We must do this before
989 * mapping in the interpreter, to make sure it doesn't wind
990 * up getting placed where the bss needs to go.
992 retval = set_brk(elf_bss, elf_brk);
993 if (retval) {
994 send_sig(SIGKILL, current, 0);
995 goto out_free_dentry;
997 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
998 send_sig(SIGSEGV, current, 0);
999 retval = -EFAULT; /* Nobody gets to see this, but.. */
1000 goto out_free_dentry;
1003 if (elf_interpreter) {
1004 if (interpreter_type == INTERPRETER_AOUT) {
1005 elf_entry = load_aout_interp(&loc->interp_ex,
1006 interpreter);
1007 } else {
1008 unsigned long uninitialized_var(interp_map_addr);
1010 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1011 interpreter,
1012 &interp_map_addr,
1013 load_bias);
1014 if (!BAD_ADDR(elf_entry)) {
1016 * load_elf_interp() returns relocation
1017 * adjustment
1019 interp_load_addr = elf_entry;
1020 elf_entry += loc->interp_elf_ex.e_entry;
1023 if (BAD_ADDR(elf_entry)) {
1024 force_sig(SIGSEGV, current);
1025 retval = IS_ERR((void *)elf_entry) ?
1026 (int)elf_entry : -EINVAL;
1027 goto out_free_dentry;
1029 reloc_func_desc = interp_load_addr;
1031 allow_write_access(interpreter);
1032 fput(interpreter);
1033 kfree(elf_interpreter);
1034 } else {
1035 elf_entry = loc->elf_ex.e_entry;
1036 if (BAD_ADDR(elf_entry)) {
1037 force_sig(SIGSEGV, current);
1038 retval = -EINVAL;
1039 goto out_free_dentry;
1043 kfree(elf_phdata);
1045 if (interpreter_type != INTERPRETER_AOUT)
1046 sys_close(elf_exec_fileno);
1048 set_binfmt(&elf_format);
1050 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1051 retval = arch_setup_additional_pages(bprm, executable_stack);
1052 if (retval < 0) {
1053 send_sig(SIGKILL, current, 0);
1054 goto out;
1056 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1058 compute_creds(bprm);
1059 current->flags &= ~PF_FORKNOEXEC;
1060 retval = create_elf_tables(bprm, &loc->elf_ex,
1061 (interpreter_type == INTERPRETER_AOUT),
1062 load_addr, interp_load_addr);
1063 if (retval < 0) {
1064 send_sig(SIGKILL, current, 0);
1065 goto out;
1067 /* N.B. passed_fileno might not be initialized? */
1068 if (interpreter_type == INTERPRETER_AOUT)
1069 current->mm->arg_start += strlen(passed_fileno) + 1;
1070 current->mm->end_code = end_code;
1071 current->mm->start_code = start_code;
1072 current->mm->start_data = start_data;
1073 current->mm->end_data = end_data;
1074 current->mm->start_stack = bprm->p;
1076 if (current->personality & MMAP_PAGE_ZERO) {
1077 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1078 and some applications "depend" upon this behavior.
1079 Since we do not have the power to recompile these, we
1080 emulate the SVr4 behavior. Sigh. */
1081 down_write(&current->mm->mmap_sem);
1082 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1083 MAP_FIXED | MAP_PRIVATE, 0);
1084 up_write(&current->mm->mmap_sem);
1087 #ifdef ELF_PLAT_INIT
1089 * The ABI may specify that certain registers be set up in special
1090 * ways (on i386 %edx is the address of a DT_FINI function, for
1091 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1092 * that the e_entry field is the address of the function descriptor
1093 * for the startup routine, rather than the address of the startup
1094 * routine itself. This macro performs whatever initialization to
1095 * the regs structure is required as well as any relocations to the
1096 * function descriptor entries when executing dynamically links apps.
1098 ELF_PLAT_INIT(regs, reloc_func_desc);
1099 #endif
1101 start_thread(regs, elf_entry, bprm->p);
1102 if (unlikely(current->ptrace & PT_PTRACED)) {
1103 if (current->ptrace & PT_TRACE_EXEC)
1104 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1105 else
1106 send_sig(SIGTRAP, current, 0);
1108 retval = 0;
1109 out:
1110 kfree(loc);
1111 out_ret:
1112 return retval;
1114 /* error cleanup */
1115 out_free_dentry:
1116 allow_write_access(interpreter);
1117 if (interpreter)
1118 fput(interpreter);
1119 out_free_interp:
1120 kfree(elf_interpreter);
1121 out_free_file:
1122 sys_close(elf_exec_fileno);
1123 out_free_fh:
1124 if (files)
1125 reset_files_struct(current, files);
1126 out_free_ph:
1127 kfree(elf_phdata);
1128 goto out;
1131 /* This is really simpleminded and specialized - we are loading an
1132 a.out library that is given an ELF header. */
1133 static int load_elf_library(struct file *file)
1135 struct elf_phdr *elf_phdata;
1136 struct elf_phdr *eppnt;
1137 unsigned long elf_bss, bss, len;
1138 int retval, error, i, j;
1139 struct elfhdr elf_ex;
1141 error = -ENOEXEC;
1142 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1143 if (retval != sizeof(elf_ex))
1144 goto out;
1146 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1147 goto out;
1149 /* First of all, some simple consistency checks */
1150 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1151 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1152 goto out;
1154 /* Now read in all of the header information */
1156 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1157 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1159 error = -ENOMEM;
1160 elf_phdata = kmalloc(j, GFP_KERNEL);
1161 if (!elf_phdata)
1162 goto out;
1164 eppnt = elf_phdata;
1165 error = -ENOEXEC;
1166 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1167 if (retval != j)
1168 goto out_free_ph;
1170 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1171 if ((eppnt + i)->p_type == PT_LOAD)
1172 j++;
1173 if (j != 1)
1174 goto out_free_ph;
1176 while (eppnt->p_type != PT_LOAD)
1177 eppnt++;
1179 /* Now use mmap to map the library into memory. */
1180 down_write(&current->mm->mmap_sem);
1181 error = do_mmap(file,
1182 ELF_PAGESTART(eppnt->p_vaddr),
1183 (eppnt->p_filesz +
1184 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1185 PROT_READ | PROT_WRITE | PROT_EXEC,
1186 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1187 (eppnt->p_offset -
1188 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1189 up_write(&current->mm->mmap_sem);
1190 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1191 goto out_free_ph;
1193 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1194 if (padzero(elf_bss)) {
1195 error = -EFAULT;
1196 goto out_free_ph;
1199 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1200 ELF_MIN_ALIGN - 1);
1201 bss = eppnt->p_memsz + eppnt->p_vaddr;
1202 if (bss > len) {
1203 down_write(&current->mm->mmap_sem);
1204 do_brk(len, bss - len);
1205 up_write(&current->mm->mmap_sem);
1207 error = 0;
1209 out_free_ph:
1210 kfree(elf_phdata);
1211 out:
1212 return error;
1216 * Note that some platforms still use traditional core dumps and not
1217 * the ELF core dump. Each platform can select it as appropriate.
1219 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1222 * ELF core dumper
1224 * Modelled on fs/exec.c:aout_core_dump()
1225 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1228 * These are the only things you should do on a core-file: use only these
1229 * functions to write out all the necessary info.
1231 static int dump_write(struct file *file, const void *addr, int nr)
1233 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1236 static int dump_seek(struct file *file, loff_t off)
1238 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1239 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1240 return 0;
1241 } else {
1242 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1243 if (!buf)
1244 return 0;
1245 while (off > 0) {
1246 unsigned long n = off;
1247 if (n > PAGE_SIZE)
1248 n = PAGE_SIZE;
1249 if (!dump_write(file, buf, n))
1250 return 0;
1251 off -= n;
1253 free_page((unsigned long)buf);
1255 return 1;
1259 * Decide whether a segment is worth dumping; default is yes to be
1260 * sure (missing info is worse than too much; etc).
1261 * Personally I'd include everything, and use the coredump limit...
1263 * I think we should skip something. But I am not sure how. H.J.
1265 static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
1267 /* The vma can be set up to tell us the answer directly. */
1268 if (vma->vm_flags & VM_ALWAYSDUMP)
1269 return 1;
1271 /* Do not dump I/O mapped devices or special mappings */
1272 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1273 return 0;
1275 /* By default, dump shared memory if mapped from an anonymous file. */
1276 if (vma->vm_flags & VM_SHARED) {
1277 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0)
1278 return test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
1279 else
1280 return test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
1283 /* By default, if it hasn't been written to, don't write it out. */
1284 if (!vma->anon_vma)
1285 return test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
1287 return test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
1290 /* An ELF note in memory */
1291 struct memelfnote
1293 const char *name;
1294 int type;
1295 unsigned int datasz;
1296 void *data;
1299 static int notesize(struct memelfnote *en)
1301 int sz;
1303 sz = sizeof(struct elf_note);
1304 sz += roundup(strlen(en->name) + 1, 4);
1305 sz += roundup(en->datasz, 4);
1307 return sz;
1310 #define DUMP_WRITE(addr, nr, foffset) \
1311 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1313 static int alignfile(struct file *file, loff_t *foffset)
1315 static const char buf[4] = { 0, };
1316 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1317 return 1;
1320 static int writenote(struct memelfnote *men, struct file *file,
1321 loff_t *foffset)
1323 struct elf_note en;
1324 en.n_namesz = strlen(men->name) + 1;
1325 en.n_descsz = men->datasz;
1326 en.n_type = men->type;
1328 DUMP_WRITE(&en, sizeof(en), foffset);
1329 DUMP_WRITE(men->name, en.n_namesz, foffset);
1330 if (!alignfile(file, foffset))
1331 return 0;
1332 DUMP_WRITE(men->data, men->datasz, foffset);
1333 if (!alignfile(file, foffset))
1334 return 0;
1336 return 1;
1338 #undef DUMP_WRITE
1340 #define DUMP_WRITE(addr, nr) \
1341 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1342 goto end_coredump;
1343 #define DUMP_SEEK(off) \
1344 if (!dump_seek(file, (off))) \
1345 goto end_coredump;
1347 static void fill_elf_header(struct elfhdr *elf, int segs)
1349 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1350 elf->e_ident[EI_CLASS] = ELF_CLASS;
1351 elf->e_ident[EI_DATA] = ELF_DATA;
1352 elf->e_ident[EI_VERSION] = EV_CURRENT;
1353 elf->e_ident[EI_OSABI] = ELF_OSABI;
1354 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1356 elf->e_type = ET_CORE;
1357 elf->e_machine = ELF_ARCH;
1358 elf->e_version = EV_CURRENT;
1359 elf->e_entry = 0;
1360 elf->e_phoff = sizeof(struct elfhdr);
1361 elf->e_shoff = 0;
1362 elf->e_flags = ELF_CORE_EFLAGS;
1363 elf->e_ehsize = sizeof(struct elfhdr);
1364 elf->e_phentsize = sizeof(struct elf_phdr);
1365 elf->e_phnum = segs;
1366 elf->e_shentsize = 0;
1367 elf->e_shnum = 0;
1368 elf->e_shstrndx = 0;
1369 return;
1372 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1374 phdr->p_type = PT_NOTE;
1375 phdr->p_offset = offset;
1376 phdr->p_vaddr = 0;
1377 phdr->p_paddr = 0;
1378 phdr->p_filesz = sz;
1379 phdr->p_memsz = 0;
1380 phdr->p_flags = 0;
1381 phdr->p_align = 0;
1382 return;
1385 static void fill_note(struct memelfnote *note, const char *name, int type,
1386 unsigned int sz, void *data)
1388 note->name = name;
1389 note->type = type;
1390 note->datasz = sz;
1391 note->data = data;
1392 return;
1396 * fill up all the fields in prstatus from the given task struct, except
1397 * registers which need to be filled up separately.
1399 static void fill_prstatus(struct elf_prstatus *prstatus,
1400 struct task_struct *p, long signr)
1402 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1403 prstatus->pr_sigpend = p->pending.signal.sig[0];
1404 prstatus->pr_sighold = p->blocked.sig[0];
1405 prstatus->pr_pid = p->pid;
1406 prstatus->pr_ppid = p->parent->pid;
1407 prstatus->pr_pgrp = process_group(p);
1408 prstatus->pr_sid = process_session(p);
1409 if (thread_group_leader(p)) {
1411 * This is the record for the group leader. Add in the
1412 * cumulative times of previous dead threads. This total
1413 * won't include the time of each live thread whose state
1414 * is included in the core dump. The final total reported
1415 * to our parent process when it calls wait4 will include
1416 * those sums as well as the little bit more time it takes
1417 * this and each other thread to finish dying after the
1418 * core dump synchronization phase.
1420 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1421 &prstatus->pr_utime);
1422 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1423 &prstatus->pr_stime);
1424 } else {
1425 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1426 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1428 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1429 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1432 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1433 struct mm_struct *mm)
1435 unsigned int i, len;
1437 /* first copy the parameters from user space */
1438 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1440 len = mm->arg_end - mm->arg_start;
1441 if (len >= ELF_PRARGSZ)
1442 len = ELF_PRARGSZ-1;
1443 if (copy_from_user(&psinfo->pr_psargs,
1444 (const char __user *)mm->arg_start, len))
1445 return -EFAULT;
1446 for(i = 0; i < len; i++)
1447 if (psinfo->pr_psargs[i] == 0)
1448 psinfo->pr_psargs[i] = ' ';
1449 psinfo->pr_psargs[len] = 0;
1451 psinfo->pr_pid = p->pid;
1452 psinfo->pr_ppid = p->parent->pid;
1453 psinfo->pr_pgrp = process_group(p);
1454 psinfo->pr_sid = process_session(p);
1456 i = p->state ? ffz(~p->state) + 1 : 0;
1457 psinfo->pr_state = i;
1458 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1459 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1460 psinfo->pr_nice = task_nice(p);
1461 psinfo->pr_flag = p->flags;
1462 SET_UID(psinfo->pr_uid, p->uid);
1463 SET_GID(psinfo->pr_gid, p->gid);
1464 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1466 return 0;
1469 /* Here is the structure in which status of each thread is captured. */
1470 struct elf_thread_status
1472 struct list_head list;
1473 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1474 elf_fpregset_t fpu; /* NT_PRFPREG */
1475 struct task_struct *thread;
1476 #ifdef ELF_CORE_COPY_XFPREGS
1477 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1478 #endif
1479 struct memelfnote notes[3];
1480 int num_notes;
1484 * In order to add the specific thread information for the elf file format,
1485 * we need to keep a linked list of every threads pr_status and then create
1486 * a single section for them in the final core file.
1488 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1490 int sz = 0;
1491 struct task_struct *p = t->thread;
1492 t->num_notes = 0;
1494 fill_prstatus(&t->prstatus, p, signr);
1495 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1497 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1498 &(t->prstatus));
1499 t->num_notes++;
1500 sz += notesize(&t->notes[0]);
1502 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1503 &t->fpu))) {
1504 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1505 &(t->fpu));
1506 t->num_notes++;
1507 sz += notesize(&t->notes[1]);
1510 #ifdef ELF_CORE_COPY_XFPREGS
1511 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1512 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1513 &t->xfpu);
1514 t->num_notes++;
1515 sz += notesize(&t->notes[2]);
1517 #endif
1518 return sz;
1521 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1522 struct vm_area_struct *gate_vma)
1524 struct vm_area_struct *ret = tsk->mm->mmap;
1526 if (ret)
1527 return ret;
1528 return gate_vma;
1531 * Helper function for iterating across a vma list. It ensures that the caller
1532 * will visit `gate_vma' prior to terminating the search.
1534 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1535 struct vm_area_struct *gate_vma)
1537 struct vm_area_struct *ret;
1539 ret = this_vma->vm_next;
1540 if (ret)
1541 return ret;
1542 if (this_vma == gate_vma)
1543 return NULL;
1544 return gate_vma;
1548 * Actual dumper
1550 * This is a two-pass process; first we find the offsets of the bits,
1551 * and then they are actually written out. If we run out of core limit
1552 * we just truncate.
1554 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1556 #define NUM_NOTES 6
1557 int has_dumped = 0;
1558 mm_segment_t fs;
1559 int segs;
1560 size_t size = 0;
1561 int i;
1562 struct vm_area_struct *vma, *gate_vma;
1563 struct elfhdr *elf = NULL;
1564 loff_t offset = 0, dataoff, foffset;
1565 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1566 int numnote;
1567 struct memelfnote *notes = NULL;
1568 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1569 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1570 struct task_struct *g, *p;
1571 LIST_HEAD(thread_list);
1572 struct list_head *t;
1573 elf_fpregset_t *fpu = NULL;
1574 #ifdef ELF_CORE_COPY_XFPREGS
1575 elf_fpxregset_t *xfpu = NULL;
1576 #endif
1577 int thread_status_size = 0;
1578 elf_addr_t *auxv;
1579 unsigned long mm_flags;
1580 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1581 int extra_notes_size;
1582 #endif
1585 * We no longer stop all VM operations.
1587 * This is because those proceses that could possibly change map_count
1588 * or the mmap / vma pages are now blocked in do_exit on current
1589 * finishing this core dump.
1591 * Only ptrace can touch these memory addresses, but it doesn't change
1592 * the map_count or the pages allocated. So no possibility of crashing
1593 * exists while dumping the mm->vm_next areas to the core file.
1596 /* alloc memory for large data structures: too large to be on stack */
1597 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1598 if (!elf)
1599 goto cleanup;
1600 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1601 if (!prstatus)
1602 goto cleanup;
1603 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1604 if (!psinfo)
1605 goto cleanup;
1606 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1607 if (!notes)
1608 goto cleanup;
1609 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1610 if (!fpu)
1611 goto cleanup;
1612 #ifdef ELF_CORE_COPY_XFPREGS
1613 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1614 if (!xfpu)
1615 goto cleanup;
1616 #endif
1618 if (signr) {
1619 struct elf_thread_status *tmp;
1620 rcu_read_lock();
1621 do_each_thread(g,p)
1622 if (current->mm == p->mm && current != p) {
1623 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1624 if (!tmp) {
1625 rcu_read_unlock();
1626 goto cleanup;
1628 tmp->thread = p;
1629 list_add(&tmp->list, &thread_list);
1631 while_each_thread(g,p);
1632 rcu_read_unlock();
1633 list_for_each(t, &thread_list) {
1634 struct elf_thread_status *tmp;
1635 int sz;
1637 tmp = list_entry(t, struct elf_thread_status, list);
1638 sz = elf_dump_thread_status(signr, tmp);
1639 thread_status_size += sz;
1642 /* now collect the dump for the current */
1643 memset(prstatus, 0, sizeof(*prstatus));
1644 fill_prstatus(prstatus, current, signr);
1645 elf_core_copy_regs(&prstatus->pr_reg, regs);
1647 segs = current->mm->map_count;
1648 #ifdef ELF_CORE_EXTRA_PHDRS
1649 segs += ELF_CORE_EXTRA_PHDRS;
1650 #endif
1652 gate_vma = get_gate_vma(current);
1653 if (gate_vma != NULL)
1654 segs++;
1656 /* Set up header */
1657 fill_elf_header(elf, segs + 1); /* including notes section */
1659 has_dumped = 1;
1660 current->flags |= PF_DUMPCORE;
1663 * Set up the notes in similar form to SVR4 core dumps made
1664 * with info from their /proc.
1667 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1668 fill_psinfo(psinfo, current->group_leader, current->mm);
1669 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1671 numnote = 2;
1673 auxv = (elf_addr_t *)current->mm->saved_auxv;
1675 i = 0;
1677 i += 2;
1678 while (auxv[i - 2] != AT_NULL);
1679 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1680 i * sizeof(elf_addr_t), auxv);
1682 /* Try to dump the FPU. */
1683 if ((prstatus->pr_fpvalid =
1684 elf_core_copy_task_fpregs(current, regs, fpu)))
1685 fill_note(notes + numnote++,
1686 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1687 #ifdef ELF_CORE_COPY_XFPREGS
1688 if (elf_core_copy_task_xfpregs(current, xfpu))
1689 fill_note(notes + numnote++,
1690 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1691 #endif
1693 fs = get_fs();
1694 set_fs(KERNEL_DS);
1696 DUMP_WRITE(elf, sizeof(*elf));
1697 offset += sizeof(*elf); /* Elf header */
1698 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1699 foffset = offset;
1701 /* Write notes phdr entry */
1703 struct elf_phdr phdr;
1704 int sz = 0;
1706 for (i = 0; i < numnote; i++)
1707 sz += notesize(notes + i);
1709 sz += thread_status_size;
1711 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1712 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1713 sz += extra_notes_size;
1714 #endif
1716 fill_elf_note_phdr(&phdr, sz, offset);
1717 offset += sz;
1718 DUMP_WRITE(&phdr, sizeof(phdr));
1721 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1724 * We must use the same mm->flags while dumping core to avoid
1725 * inconsistency between the program headers and bodies, otherwise an
1726 * unusable core file can be generated.
1728 mm_flags = current->mm->flags;
1730 /* Write program headers for segments dump */
1731 for (vma = first_vma(current, gate_vma); vma != NULL;
1732 vma = next_vma(vma, gate_vma)) {
1733 struct elf_phdr phdr;
1734 size_t sz;
1736 sz = vma->vm_end - vma->vm_start;
1738 phdr.p_type = PT_LOAD;
1739 phdr.p_offset = offset;
1740 phdr.p_vaddr = vma->vm_start;
1741 phdr.p_paddr = 0;
1742 phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0;
1743 phdr.p_memsz = sz;
1744 offset += phdr.p_filesz;
1745 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1746 if (vma->vm_flags & VM_WRITE)
1747 phdr.p_flags |= PF_W;
1748 if (vma->vm_flags & VM_EXEC)
1749 phdr.p_flags |= PF_X;
1750 phdr.p_align = ELF_EXEC_PAGESIZE;
1752 DUMP_WRITE(&phdr, sizeof(phdr));
1755 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1756 ELF_CORE_WRITE_EXTRA_PHDRS;
1757 #endif
1759 /* write out the notes section */
1760 for (i = 0; i < numnote; i++)
1761 if (!writenote(notes + i, file, &foffset))
1762 goto end_coredump;
1764 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1765 ELF_CORE_WRITE_EXTRA_NOTES;
1766 foffset += extra_notes_size;
1767 #endif
1769 /* write out the thread status notes section */
1770 list_for_each(t, &thread_list) {
1771 struct elf_thread_status *tmp =
1772 list_entry(t, struct elf_thread_status, list);
1774 for (i = 0; i < tmp->num_notes; i++)
1775 if (!writenote(&tmp->notes[i], file, &foffset))
1776 goto end_coredump;
1779 /* Align to page */
1780 DUMP_SEEK(dataoff - foffset);
1782 for (vma = first_vma(current, gate_vma); vma != NULL;
1783 vma = next_vma(vma, gate_vma)) {
1784 unsigned long addr;
1786 if (!maydump(vma, mm_flags))
1787 continue;
1789 for (addr = vma->vm_start;
1790 addr < vma->vm_end;
1791 addr += PAGE_SIZE) {
1792 struct page *page;
1793 struct vm_area_struct *vma;
1795 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1796 &page, &vma) <= 0) {
1797 DUMP_SEEK(PAGE_SIZE);
1798 } else {
1799 if (page == ZERO_PAGE(addr)) {
1800 if (!dump_seek(file, PAGE_SIZE)) {
1801 page_cache_release(page);
1802 goto end_coredump;
1804 } else {
1805 void *kaddr;
1806 flush_cache_page(vma, addr,
1807 page_to_pfn(page));
1808 kaddr = kmap(page);
1809 if ((size += PAGE_SIZE) > limit ||
1810 !dump_write(file, kaddr,
1811 PAGE_SIZE)) {
1812 kunmap(page);
1813 page_cache_release(page);
1814 goto end_coredump;
1816 kunmap(page);
1818 page_cache_release(page);
1823 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1824 ELF_CORE_WRITE_EXTRA_DATA;
1825 #endif
1827 end_coredump:
1828 set_fs(fs);
1830 cleanup:
1831 while (!list_empty(&thread_list)) {
1832 struct list_head *tmp = thread_list.next;
1833 list_del(tmp);
1834 kfree(list_entry(tmp, struct elf_thread_status, list));
1837 kfree(elf);
1838 kfree(prstatus);
1839 kfree(psinfo);
1840 kfree(notes);
1841 kfree(fpu);
1842 #ifdef ELF_CORE_COPY_XFPREGS
1843 kfree(xfpu);
1844 #endif
1845 return has_dumped;
1846 #undef NUM_NOTES
1849 #endif /* USE_ELF_CORE_DUMP */
1851 static int __init init_elf_binfmt(void)
1853 return register_binfmt(&elf_format);
1856 static void __exit exit_elf_binfmt(void)
1858 /* Remove the COFF and ELF loaders. */
1859 unregister_binfmt(&elf_format);
1862 core_initcall(init_elf_binfmt);
1863 module_exit(exit_elf_binfmt);
1864 MODULE_LICENSE("GPL");