[SCSI] Add SATA support to libsas
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / fs / binfmt_elf.c
bloba27e42bf340030402e5905b4830e959906e875ab
1 /*
2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump NULL
58 #endif
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN PAGE_SIZE
64 #endif
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74 static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
79 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
83 #define BAD_ADDR(x) IS_ERR_VALUE(x)
85 static int set_brk(unsigned long start, unsigned long end)
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
101 /* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
104 be in memory
106 static int padzero(unsigned long elf_bss)
108 unsigned long nbyte;
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
116 return 0;
119 /* Let's use some macros to make this stack manipulation a litle clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
153 * If this architecture has a platform capability string, copy it
154 * to userspace. In some cases (Sparc), this info is impossible
155 * for userspace to get any other way, in others (i386) it is
156 * merely difficult.
158 u_platform = NULL;
159 if (k_platform) {
160 size_t len = strlen(k_platform) + 1;
163 * In some cases (e.g. Hyper-Threading), we want to avoid L1
164 * evictions by the processes running on the same package. One
165 * thing we can do is to shuffle the initial stack for them.
168 p = arch_align_stack(p);
170 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171 if (__copy_to_user(u_platform, k_platform, len))
172 return -EFAULT;
175 /* Create the ELF interpreter info */
176 elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 #define NEW_AUX_ENT(id, val) \
178 do { \
179 elf_info[ei_index++] = id; \
180 elf_info[ei_index++] = val; \
181 } while (0)
183 #ifdef ARCH_DLINFO
185 * ARCH_DLINFO must come first so PPC can do its special alignment of
186 * AUXV.
188 ARCH_DLINFO;
189 #endif
190 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
191 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
192 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
193 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
194 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
195 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
196 NEW_AUX_ENT(AT_BASE, interp_load_addr);
197 NEW_AUX_ENT(AT_FLAGS, 0);
198 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
199 NEW_AUX_ENT(AT_UID, tsk->uid);
200 NEW_AUX_ENT(AT_EUID, tsk->euid);
201 NEW_AUX_ENT(AT_GID, tsk->gid);
202 NEW_AUX_ENT(AT_EGID, tsk->egid);
203 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
204 if (k_platform) {
205 NEW_AUX_ENT(AT_PLATFORM,
206 (elf_addr_t)(unsigned long)u_platform);
208 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
209 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
211 #undef NEW_AUX_ENT
212 /* AT_NULL is zero; clear the rest too */
213 memset(&elf_info[ei_index], 0,
214 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
216 /* And advance past the AT_NULL entry. */
217 ei_index += 2;
219 sp = STACK_ADD(p, ei_index);
221 items = (argc + 1) + (envc + 1);
222 if (interp_aout) {
223 items += 3; /* a.out interpreters require argv & envp too */
224 } else {
225 items += 1; /* ELF interpreters only put argc on the stack */
227 bprm->p = STACK_ROUND(sp, items);
229 /* Point sp at the lowest address on the stack */
230 #ifdef CONFIG_STACK_GROWSUP
231 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
232 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
233 #else
234 sp = (elf_addr_t __user *)bprm->p;
235 #endif
237 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
238 if (__put_user(argc, sp++))
239 return -EFAULT;
240 if (interp_aout) {
241 argv = sp + 2;
242 envp = argv + argc + 1;
243 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
244 __put_user((elf_addr_t)(unsigned long)envp, sp++))
245 return -EFAULT;
246 } else {
247 argv = sp;
248 envp = argv + argc + 1;
251 /* Populate argv and envp */
252 p = current->mm->arg_end = current->mm->arg_start;
253 while (argc-- > 0) {
254 size_t len;
255 if (__put_user((elf_addr_t)p, argv++))
256 return -EFAULT;
257 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
258 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
259 return 0;
260 p += len;
262 if (__put_user(0, argv))
263 return -EFAULT;
264 current->mm->arg_end = current->mm->env_start = p;
265 while (envc-- > 0) {
266 size_t len;
267 if (__put_user((elf_addr_t)p, envp++))
268 return -EFAULT;
269 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 return 0;
272 p += len;
274 if (__put_user(0, envp))
275 return -EFAULT;
276 current->mm->env_end = p;
278 /* Put the elf_info on the stack in the right place. */
279 sp = (elf_addr_t __user *)envp + 1;
280 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 return -EFAULT;
282 return 0;
285 #ifndef elf_map
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type,
289 unsigned long total_size)
291 unsigned long map_addr;
292 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294 addr = ELF_PAGESTART(addr);
295 size = ELF_PAGEALIGN(size);
297 /* mmap() will return -EINVAL if given a zero size, but a
298 * segment with zero filesize is perfectly valid */
299 if (!size)
300 return addr;
302 down_write(&current->mm->mmap_sem);
304 * total_size is the size of the ELF (interpreter) image.
305 * The _first_ mmap needs to know the full size, otherwise
306 * randomization might put this image into an overlapping
307 * position with the ELF binary image. (since size < total_size)
308 * So we first map the 'big' image - and unmap the remainder at
309 * the end. (which unmap is needed for ELF images with holes.)
311 if (total_size) {
312 total_size = ELF_PAGEALIGN(total_size);
313 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314 if (!BAD_ADDR(map_addr))
315 do_munmap(current->mm, map_addr+size, total_size-size);
316 } else
317 map_addr = do_mmap(filep, addr, size, prot, type, off);
319 up_write(&current->mm->mmap_sem);
320 return(map_addr);
323 #endif /* !elf_map */
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
327 int i, first_idx = -1, last_idx = -1;
329 for (i = 0; i < nr; i++) {
330 if (cmds[i].p_type == PT_LOAD) {
331 last_idx = i;
332 if (first_idx == -1)
333 first_idx = i;
336 if (first_idx == -1)
337 return 0;
339 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340 ELF_PAGESTART(cmds[first_idx].p_vaddr);
344 /* This is much more generalized than the library routine read function,
345 so we keep this separate. Technically the library read function
346 is only provided so that we can read a.out libraries that have
347 an ELF header */
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350 struct file *interpreter, unsigned long *interp_map_addr,
351 unsigned long no_base)
353 struct elf_phdr *elf_phdata;
354 struct elf_phdr *eppnt;
355 unsigned long load_addr = 0;
356 int load_addr_set = 0;
357 unsigned long last_bss = 0, elf_bss = 0;
358 unsigned long error = ~0UL;
359 unsigned long total_size;
360 int retval, i, size;
362 /* First of all, some simple consistency checks */
363 if (interp_elf_ex->e_type != ET_EXEC &&
364 interp_elf_ex->e_type != ET_DYN)
365 goto out;
366 if (!elf_check_arch(interp_elf_ex))
367 goto out;
368 if (!interpreter->f_op || !interpreter->f_op->mmap)
369 goto out;
372 * If the size of this structure has changed, then punt, since
373 * we will be doing the wrong thing.
375 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376 goto out;
377 if (interp_elf_ex->e_phnum < 1 ||
378 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379 goto out;
381 /* Now read in all of the header information */
382 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383 if (size > ELF_MIN_ALIGN)
384 goto out;
385 elf_phdata = kmalloc(size, GFP_KERNEL);
386 if (!elf_phdata)
387 goto out;
389 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390 (char *)elf_phdata,size);
391 error = -EIO;
392 if (retval != size) {
393 if (retval < 0)
394 error = retval;
395 goto out_close;
398 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399 if (!total_size) {
400 error = -EINVAL;
401 goto out_close;
404 eppnt = elf_phdata;
405 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406 if (eppnt->p_type == PT_LOAD) {
407 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408 int elf_prot = 0;
409 unsigned long vaddr = 0;
410 unsigned long k, map_addr;
412 if (eppnt->p_flags & PF_R)
413 elf_prot = PROT_READ;
414 if (eppnt->p_flags & PF_W)
415 elf_prot |= PROT_WRITE;
416 if (eppnt->p_flags & PF_X)
417 elf_prot |= PROT_EXEC;
418 vaddr = eppnt->p_vaddr;
419 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420 elf_type |= MAP_FIXED;
421 else if (no_base && interp_elf_ex->e_type == ET_DYN)
422 load_addr = -vaddr;
424 map_addr = elf_map(interpreter, load_addr + vaddr,
425 eppnt, elf_prot, elf_type, total_size);
426 total_size = 0;
427 if (!*interp_map_addr)
428 *interp_map_addr = map_addr;
429 error = map_addr;
430 if (BAD_ADDR(map_addr))
431 goto out_close;
433 if (!load_addr_set &&
434 interp_elf_ex->e_type == ET_DYN) {
435 load_addr = map_addr - ELF_PAGESTART(vaddr);
436 load_addr_set = 1;
440 * Check to see if the section's size will overflow the
441 * allowed task size. Note that p_filesz must always be
442 * <= p_memsize so it's only necessary to check p_memsz.
444 k = load_addr + eppnt->p_vaddr;
445 if (BAD_ADDR(k) ||
446 eppnt->p_filesz > eppnt->p_memsz ||
447 eppnt->p_memsz > TASK_SIZE ||
448 TASK_SIZE - eppnt->p_memsz < k) {
449 error = -ENOMEM;
450 goto out_close;
454 * Find the end of the file mapping for this phdr, and
455 * keep track of the largest address we see for this.
457 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458 if (k > elf_bss)
459 elf_bss = k;
462 * Do the same thing for the memory mapping - between
463 * elf_bss and last_bss is the bss section.
465 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466 if (k > last_bss)
467 last_bss = k;
472 * Now fill out the bss section. First pad the last page up
473 * to the page boundary, and then perform a mmap to make sure
474 * that there are zero-mapped pages up to and including the
475 * last bss page.
477 if (padzero(elf_bss)) {
478 error = -EFAULT;
479 goto out_close;
482 /* What we have mapped so far */
483 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
485 /* Map the last of the bss segment */
486 if (last_bss > elf_bss) {
487 down_write(&current->mm->mmap_sem);
488 error = do_brk(elf_bss, last_bss - elf_bss);
489 up_write(&current->mm->mmap_sem);
490 if (BAD_ADDR(error))
491 goto out_close;
494 error = load_addr;
496 out_close:
497 kfree(elf_phdata);
498 out:
499 return error;
502 static unsigned long load_aout_interp(struct exec *interp_ex,
503 struct file *interpreter)
505 unsigned long text_data, elf_entry = ~0UL;
506 char __user * addr;
507 loff_t offset;
509 current->mm->end_code = interp_ex->a_text;
510 text_data = interp_ex->a_text + interp_ex->a_data;
511 current->mm->end_data = text_data;
512 current->mm->brk = interp_ex->a_bss + text_data;
514 switch (N_MAGIC(*interp_ex)) {
515 case OMAGIC:
516 offset = 32;
517 addr = (char __user *)0;
518 break;
519 case ZMAGIC:
520 case QMAGIC:
521 offset = N_TXTOFF(*interp_ex);
522 addr = (char __user *)N_TXTADDR(*interp_ex);
523 break;
524 default:
525 goto out;
528 down_write(&current->mm->mmap_sem);
529 do_brk(0, text_data);
530 up_write(&current->mm->mmap_sem);
531 if (!interpreter->f_op || !interpreter->f_op->read)
532 goto out;
533 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
534 goto out;
535 flush_icache_range((unsigned long)addr,
536 (unsigned long)addr + text_data);
538 down_write(&current->mm->mmap_sem);
539 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
540 interp_ex->a_bss);
541 up_write(&current->mm->mmap_sem);
542 elf_entry = interp_ex->a_entry;
544 out:
545 return elf_entry;
549 * These are the functions used to load ELF style executables and shared
550 * libraries. There is no binary dependent code anywhere else.
553 #define INTERPRETER_NONE 0
554 #define INTERPRETER_AOUT 1
555 #define INTERPRETER_ELF 2
557 #ifndef STACK_RND_MASK
558 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
559 #endif
561 static unsigned long randomize_stack_top(unsigned long stack_top)
563 unsigned int random_variable = 0;
565 if ((current->flags & PF_RANDOMIZE) &&
566 !(current->personality & ADDR_NO_RANDOMIZE)) {
567 random_variable = get_random_int() & STACK_RND_MASK;
568 random_variable <<= PAGE_SHIFT;
570 #ifdef CONFIG_STACK_GROWSUP
571 return PAGE_ALIGN(stack_top) + random_variable;
572 #else
573 return PAGE_ALIGN(stack_top) - random_variable;
574 #endif
577 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
579 struct file *interpreter = NULL; /* to shut gcc up */
580 unsigned long load_addr = 0, load_bias = 0;
581 int load_addr_set = 0;
582 char * elf_interpreter = NULL;
583 unsigned int interpreter_type = INTERPRETER_NONE;
584 unsigned char ibcs2_interpreter = 0;
585 unsigned long error;
586 struct elf_phdr *elf_ppnt, *elf_phdata;
587 unsigned long elf_bss, elf_brk;
588 int elf_exec_fileno;
589 int retval, i;
590 unsigned int size;
591 unsigned long elf_entry;
592 unsigned long interp_load_addr = 0;
593 unsigned long start_code, end_code, start_data, end_data;
594 unsigned long reloc_func_desc = 0;
595 char passed_fileno[6];
596 struct files_struct *files;
597 int executable_stack = EXSTACK_DEFAULT;
598 unsigned long def_flags = 0;
599 struct {
600 struct elfhdr elf_ex;
601 struct elfhdr interp_elf_ex;
602 struct exec interp_ex;
603 } *loc;
605 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
606 if (!loc) {
607 retval = -ENOMEM;
608 goto out_ret;
611 /* Get the exec-header */
612 loc->elf_ex = *((struct elfhdr *)bprm->buf);
614 retval = -ENOEXEC;
615 /* First of all, some simple consistency checks */
616 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
617 goto out;
619 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
620 goto out;
621 if (!elf_check_arch(&loc->elf_ex))
622 goto out;
623 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
624 goto out;
626 /* Now read in all of the header information */
627 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
628 goto out;
629 if (loc->elf_ex.e_phnum < 1 ||
630 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
631 goto out;
632 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
633 retval = -ENOMEM;
634 elf_phdata = kmalloc(size, GFP_KERNEL);
635 if (!elf_phdata)
636 goto out;
638 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
639 (char *)elf_phdata, size);
640 if (retval != size) {
641 if (retval >= 0)
642 retval = -EIO;
643 goto out_free_ph;
646 files = current->files; /* Refcounted so ok */
647 retval = unshare_files();
648 if (retval < 0)
649 goto out_free_ph;
650 if (files == current->files) {
651 put_files_struct(files);
652 files = NULL;
655 /* exec will make our files private anyway, but for the a.out
656 loader stuff we need to do it earlier */
657 retval = get_unused_fd();
658 if (retval < 0)
659 goto out_free_fh;
660 get_file(bprm->file);
661 fd_install(elf_exec_fileno = retval, bprm->file);
663 elf_ppnt = elf_phdata;
664 elf_bss = 0;
665 elf_brk = 0;
667 start_code = ~0UL;
668 end_code = 0;
669 start_data = 0;
670 end_data = 0;
672 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
673 if (elf_ppnt->p_type == PT_INTERP) {
674 /* This is the program interpreter used for
675 * shared libraries - for now assume that this
676 * is an a.out format binary
678 retval = -ENOEXEC;
679 if (elf_ppnt->p_filesz > PATH_MAX ||
680 elf_ppnt->p_filesz < 2)
681 goto out_free_file;
683 retval = -ENOMEM;
684 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
685 GFP_KERNEL);
686 if (!elf_interpreter)
687 goto out_free_file;
689 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
690 elf_interpreter,
691 elf_ppnt->p_filesz);
692 if (retval != elf_ppnt->p_filesz) {
693 if (retval >= 0)
694 retval = -EIO;
695 goto out_free_interp;
697 /* make sure path is NULL terminated */
698 retval = -ENOEXEC;
699 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
700 goto out_free_interp;
702 /* If the program interpreter is one of these two,
703 * then assume an iBCS2 image. Otherwise assume
704 * a native linux image.
706 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
707 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
708 ibcs2_interpreter = 1;
711 * The early SET_PERSONALITY here is so that the lookup
712 * for the interpreter happens in the namespace of the
713 * to-be-execed image. SET_PERSONALITY can select an
714 * alternate root.
716 * However, SET_PERSONALITY is NOT allowed to switch
717 * this task into the new images's memory mapping
718 * policy - that is, TASK_SIZE must still evaluate to
719 * that which is appropriate to the execing application.
720 * This is because exit_mmap() needs to have TASK_SIZE
721 * evaluate to the size of the old image.
723 * So if (say) a 64-bit application is execing a 32-bit
724 * application it is the architecture's responsibility
725 * to defer changing the value of TASK_SIZE until the
726 * switch really is going to happen - do this in
727 * flush_thread(). - akpm
729 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
731 interpreter = open_exec(elf_interpreter);
732 retval = PTR_ERR(interpreter);
733 if (IS_ERR(interpreter))
734 goto out_free_interp;
737 * If the binary is not readable then enforce
738 * mm->dumpable = 0 regardless of the interpreter's
739 * permissions.
741 if (file_permission(interpreter, MAY_READ) < 0)
742 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
744 retval = kernel_read(interpreter, 0, bprm->buf,
745 BINPRM_BUF_SIZE);
746 if (retval != BINPRM_BUF_SIZE) {
747 if (retval >= 0)
748 retval = -EIO;
749 goto out_free_dentry;
752 /* Get the exec headers */
753 loc->interp_ex = *((struct exec *)bprm->buf);
754 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
755 break;
757 elf_ppnt++;
760 elf_ppnt = elf_phdata;
761 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
762 if (elf_ppnt->p_type == PT_GNU_STACK) {
763 if (elf_ppnt->p_flags & PF_X)
764 executable_stack = EXSTACK_ENABLE_X;
765 else
766 executable_stack = EXSTACK_DISABLE_X;
767 break;
770 /* Some simple consistency checks for the interpreter */
771 if (elf_interpreter) {
772 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
774 /* Now figure out which format our binary is */
775 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
776 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
777 (N_MAGIC(loc->interp_ex) != QMAGIC))
778 interpreter_type = INTERPRETER_ELF;
780 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
781 interpreter_type &= ~INTERPRETER_ELF;
783 retval = -ELIBBAD;
784 if (!interpreter_type)
785 goto out_free_dentry;
787 /* Make sure only one type was selected */
788 if ((interpreter_type & INTERPRETER_ELF) &&
789 interpreter_type != INTERPRETER_ELF) {
790 // FIXME - ratelimit this before re-enabling
791 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
792 interpreter_type = INTERPRETER_ELF;
794 /* Verify the interpreter has a valid arch */
795 if ((interpreter_type == INTERPRETER_ELF) &&
796 !elf_check_arch(&loc->interp_elf_ex))
797 goto out_free_dentry;
798 } else {
799 /* Executables without an interpreter also need a personality */
800 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
803 /* OK, we are done with that, now set up the arg stuff,
804 and then start this sucker up */
805 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
806 char *passed_p = passed_fileno;
807 sprintf(passed_fileno, "%d", elf_exec_fileno);
809 if (elf_interpreter) {
810 retval = copy_strings_kernel(1, &passed_p, bprm);
811 if (retval)
812 goto out_free_dentry;
813 bprm->argc++;
817 /* Flush all traces of the currently running executable */
818 retval = flush_old_exec(bprm);
819 if (retval)
820 goto out_free_dentry;
822 /* Discard our unneeded old files struct */
823 if (files) {
824 put_files_struct(files);
825 files = NULL;
828 /* OK, This is the point of no return */
829 current->mm->start_data = 0;
830 current->mm->end_data = 0;
831 current->mm->end_code = 0;
832 current->mm->mmap = NULL;
833 current->flags &= ~PF_FORKNOEXEC;
834 current->mm->def_flags = def_flags;
836 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
837 may depend on the personality. */
838 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
839 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
840 current->personality |= READ_IMPLIES_EXEC;
842 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
843 current->flags |= PF_RANDOMIZE;
844 arch_pick_mmap_layout(current->mm);
846 /* Do this so that we can load the interpreter, if need be. We will
847 change some of these later */
848 current->mm->free_area_cache = current->mm->mmap_base;
849 current->mm->cached_hole_size = 0;
850 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
851 executable_stack);
852 if (retval < 0) {
853 send_sig(SIGKILL, current, 0);
854 goto out_free_dentry;
857 current->mm->start_stack = bprm->p;
859 /* Now we do a little grungy work by mmaping the ELF image into
860 the correct location in memory. */
861 for(i = 0, elf_ppnt = elf_phdata;
862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 int elf_prot = 0, elf_flags;
864 unsigned long k, vaddr;
866 if (elf_ppnt->p_type != PT_LOAD)
867 continue;
869 if (unlikely (elf_brk > elf_bss)) {
870 unsigned long nbyte;
872 /* There was a PT_LOAD segment with p_memsz > p_filesz
873 before this one. Map anonymous pages, if needed,
874 and clear the area. */
875 retval = set_brk (elf_bss + load_bias,
876 elf_brk + load_bias);
877 if (retval) {
878 send_sig(SIGKILL, current, 0);
879 goto out_free_dentry;
881 nbyte = ELF_PAGEOFFSET(elf_bss);
882 if (nbyte) {
883 nbyte = ELF_MIN_ALIGN - nbyte;
884 if (nbyte > elf_brk - elf_bss)
885 nbyte = elf_brk - elf_bss;
886 if (clear_user((void __user *)elf_bss +
887 load_bias, nbyte)) {
889 * This bss-zeroing can fail if the ELF
890 * file specifies odd protections. So
891 * we don't check the return value
897 if (elf_ppnt->p_flags & PF_R)
898 elf_prot |= PROT_READ;
899 if (elf_ppnt->p_flags & PF_W)
900 elf_prot |= PROT_WRITE;
901 if (elf_ppnt->p_flags & PF_X)
902 elf_prot |= PROT_EXEC;
904 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
906 vaddr = elf_ppnt->p_vaddr;
907 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908 elf_flags |= MAP_FIXED;
909 } else if (loc->elf_ex.e_type == ET_DYN) {
910 /* Try and get dynamic programs out of the way of the
911 * default mmap base, as well as whatever program they
912 * might try to exec. This is because the brk will
913 * follow the loader, and is not movable. */
914 #ifdef CONFIG_X86
915 load_bias = 0;
916 #else
917 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
918 #endif
921 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
922 elf_prot, elf_flags,0);
923 if (BAD_ADDR(error)) {
924 send_sig(SIGKILL, current, 0);
925 retval = IS_ERR((void *)error) ?
926 PTR_ERR((void*)error) : -EINVAL;
927 goto out_free_dentry;
930 if (!load_addr_set) {
931 load_addr_set = 1;
932 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
933 if (loc->elf_ex.e_type == ET_DYN) {
934 load_bias += error -
935 ELF_PAGESTART(load_bias + vaddr);
936 load_addr += load_bias;
937 reloc_func_desc = load_bias;
940 k = elf_ppnt->p_vaddr;
941 if (k < start_code)
942 start_code = k;
943 if (start_data < k)
944 start_data = k;
947 * Check to see if the section's size will overflow the
948 * allowed task size. Note that p_filesz must always be
949 * <= p_memsz so it is only necessary to check p_memsz.
951 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
952 elf_ppnt->p_memsz > TASK_SIZE ||
953 TASK_SIZE - elf_ppnt->p_memsz < k) {
954 /* set_brk can never work. Avoid overflows. */
955 send_sig(SIGKILL, current, 0);
956 retval = -EINVAL;
957 goto out_free_dentry;
960 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
962 if (k > elf_bss)
963 elf_bss = k;
964 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
965 end_code = k;
966 if (end_data < k)
967 end_data = k;
968 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
969 if (k > elf_brk)
970 elf_brk = k;
973 loc->elf_ex.e_entry += load_bias;
974 elf_bss += load_bias;
975 elf_brk += load_bias;
976 start_code += load_bias;
977 end_code += load_bias;
978 start_data += load_bias;
979 end_data += load_bias;
981 /* Calling set_brk effectively mmaps the pages that we need
982 * for the bss and break sections. We must do this before
983 * mapping in the interpreter, to make sure it doesn't wind
984 * up getting placed where the bss needs to go.
986 retval = set_brk(elf_bss, elf_brk);
987 if (retval) {
988 send_sig(SIGKILL, current, 0);
989 goto out_free_dentry;
991 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
992 send_sig(SIGSEGV, current, 0);
993 retval = -EFAULT; /* Nobody gets to see this, but.. */
994 goto out_free_dentry;
997 if (elf_interpreter) {
998 if (interpreter_type == INTERPRETER_AOUT) {
999 elf_entry = load_aout_interp(&loc->interp_ex,
1000 interpreter);
1001 } else {
1002 unsigned long uninitialized_var(interp_map_addr);
1004 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1005 interpreter,
1006 &interp_map_addr,
1007 load_bias);
1008 if (!BAD_ADDR(elf_entry)) {
1010 * load_elf_interp() returns relocation
1011 * adjustment
1013 interp_load_addr = elf_entry;
1014 elf_entry += loc->interp_elf_ex.e_entry;
1017 if (BAD_ADDR(elf_entry)) {
1018 force_sig(SIGSEGV, current);
1019 retval = IS_ERR((void *)elf_entry) ?
1020 (int)elf_entry : -EINVAL;
1021 goto out_free_dentry;
1023 reloc_func_desc = interp_load_addr;
1025 allow_write_access(interpreter);
1026 fput(interpreter);
1027 kfree(elf_interpreter);
1028 } else {
1029 elf_entry = loc->elf_ex.e_entry;
1030 if (BAD_ADDR(elf_entry)) {
1031 force_sig(SIGSEGV, current);
1032 retval = -EINVAL;
1033 goto out_free_dentry;
1037 kfree(elf_phdata);
1039 if (interpreter_type != INTERPRETER_AOUT)
1040 sys_close(elf_exec_fileno);
1042 set_binfmt(&elf_format);
1044 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1045 retval = arch_setup_additional_pages(bprm, executable_stack);
1046 if (retval < 0) {
1047 send_sig(SIGKILL, current, 0);
1048 goto out;
1050 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1052 compute_creds(bprm);
1053 current->flags &= ~PF_FORKNOEXEC;
1054 create_elf_tables(bprm, &loc->elf_ex,
1055 (interpreter_type == INTERPRETER_AOUT),
1056 load_addr, interp_load_addr);
1057 /* N.B. passed_fileno might not be initialized? */
1058 if (interpreter_type == INTERPRETER_AOUT)
1059 current->mm->arg_start += strlen(passed_fileno) + 1;
1060 current->mm->end_code = end_code;
1061 current->mm->start_code = start_code;
1062 current->mm->start_data = start_data;
1063 current->mm->end_data = end_data;
1064 current->mm->start_stack = bprm->p;
1066 if (current->personality & MMAP_PAGE_ZERO) {
1067 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1068 and some applications "depend" upon this behavior.
1069 Since we do not have the power to recompile these, we
1070 emulate the SVr4 behavior. Sigh. */
1071 down_write(&current->mm->mmap_sem);
1072 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1073 MAP_FIXED | MAP_PRIVATE, 0);
1074 up_write(&current->mm->mmap_sem);
1077 #ifdef ELF_PLAT_INIT
1079 * The ABI may specify that certain registers be set up in special
1080 * ways (on i386 %edx is the address of a DT_FINI function, for
1081 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1082 * that the e_entry field is the address of the function descriptor
1083 * for the startup routine, rather than the address of the startup
1084 * routine itself. This macro performs whatever initialization to
1085 * the regs structure is required as well as any relocations to the
1086 * function descriptor entries when executing dynamically links apps.
1088 ELF_PLAT_INIT(regs, reloc_func_desc);
1089 #endif
1091 start_thread(regs, elf_entry, bprm->p);
1092 if (unlikely(current->ptrace & PT_PTRACED)) {
1093 if (current->ptrace & PT_TRACE_EXEC)
1094 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1095 else
1096 send_sig(SIGTRAP, current, 0);
1098 retval = 0;
1099 out:
1100 kfree(loc);
1101 out_ret:
1102 return retval;
1104 /* error cleanup */
1105 out_free_dentry:
1106 allow_write_access(interpreter);
1107 if (interpreter)
1108 fput(interpreter);
1109 out_free_interp:
1110 kfree(elf_interpreter);
1111 out_free_file:
1112 sys_close(elf_exec_fileno);
1113 out_free_fh:
1114 if (files)
1115 reset_files_struct(current, files);
1116 out_free_ph:
1117 kfree(elf_phdata);
1118 goto out;
1121 /* This is really simpleminded and specialized - we are loading an
1122 a.out library that is given an ELF header. */
1123 static int load_elf_library(struct file *file)
1125 struct elf_phdr *elf_phdata;
1126 struct elf_phdr *eppnt;
1127 unsigned long elf_bss, bss, len;
1128 int retval, error, i, j;
1129 struct elfhdr elf_ex;
1131 error = -ENOEXEC;
1132 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1133 if (retval != sizeof(elf_ex))
1134 goto out;
1136 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1137 goto out;
1139 /* First of all, some simple consistency checks */
1140 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1141 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1142 goto out;
1144 /* Now read in all of the header information */
1146 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1147 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1149 error = -ENOMEM;
1150 elf_phdata = kmalloc(j, GFP_KERNEL);
1151 if (!elf_phdata)
1152 goto out;
1154 eppnt = elf_phdata;
1155 error = -ENOEXEC;
1156 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1157 if (retval != j)
1158 goto out_free_ph;
1160 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1161 if ((eppnt + i)->p_type == PT_LOAD)
1162 j++;
1163 if (j != 1)
1164 goto out_free_ph;
1166 while (eppnt->p_type != PT_LOAD)
1167 eppnt++;
1169 /* Now use mmap to map the library into memory. */
1170 down_write(&current->mm->mmap_sem);
1171 error = do_mmap(file,
1172 ELF_PAGESTART(eppnt->p_vaddr),
1173 (eppnt->p_filesz +
1174 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1175 PROT_READ | PROT_WRITE | PROT_EXEC,
1176 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1177 (eppnt->p_offset -
1178 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1179 up_write(&current->mm->mmap_sem);
1180 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1181 goto out_free_ph;
1183 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1184 if (padzero(elf_bss)) {
1185 error = -EFAULT;
1186 goto out_free_ph;
1189 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1190 ELF_MIN_ALIGN - 1);
1191 bss = eppnt->p_memsz + eppnt->p_vaddr;
1192 if (bss > len) {
1193 down_write(&current->mm->mmap_sem);
1194 do_brk(len, bss - len);
1195 up_write(&current->mm->mmap_sem);
1197 error = 0;
1199 out_free_ph:
1200 kfree(elf_phdata);
1201 out:
1202 return error;
1206 * Note that some platforms still use traditional core dumps and not
1207 * the ELF core dump. Each platform can select it as appropriate.
1209 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1212 * ELF core dumper
1214 * Modelled on fs/exec.c:aout_core_dump()
1215 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1218 * These are the only things you should do on a core-file: use only these
1219 * functions to write out all the necessary info.
1221 static int dump_write(struct file *file, const void *addr, int nr)
1223 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1226 static int dump_seek(struct file *file, loff_t off)
1228 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1229 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1230 return 0;
1231 } else {
1232 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1233 if (!buf)
1234 return 0;
1235 while (off > 0) {
1236 unsigned long n = off;
1237 if (n > PAGE_SIZE)
1238 n = PAGE_SIZE;
1239 if (!dump_write(file, buf, n))
1240 return 0;
1241 off -= n;
1243 free_page((unsigned long)buf);
1245 return 1;
1249 * Decide whether a segment is worth dumping; default is yes to be
1250 * sure (missing info is worse than too much; etc).
1251 * Personally I'd include everything, and use the coredump limit...
1253 * I think we should skip something. But I am not sure how. H.J.
1255 static int maydump(struct vm_area_struct *vma)
1257 /* The vma can be set up to tell us the answer directly. */
1258 if (vma->vm_flags & VM_ALWAYSDUMP)
1259 return 1;
1261 /* Do not dump I/O mapped devices or special mappings */
1262 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1263 return 0;
1265 /* Dump shared memory only if mapped from an anonymous file. */
1266 if (vma->vm_flags & VM_SHARED)
1267 return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1269 /* If it hasn't been written to, don't write it out */
1270 if (!vma->anon_vma)
1271 return 0;
1273 return 1;
1276 /* An ELF note in memory */
1277 struct memelfnote
1279 const char *name;
1280 int type;
1281 unsigned int datasz;
1282 void *data;
1285 static int notesize(struct memelfnote *en)
1287 int sz;
1289 sz = sizeof(struct elf_note);
1290 sz += roundup(strlen(en->name) + 1, 4);
1291 sz += roundup(en->datasz, 4);
1293 return sz;
1296 #define DUMP_WRITE(addr, nr, foffset) \
1297 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1299 static int alignfile(struct file *file, loff_t *foffset)
1301 static const char buf[4] = { 0, };
1302 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1303 return 1;
1306 static int writenote(struct memelfnote *men, struct file *file,
1307 loff_t *foffset)
1309 struct elf_note en;
1310 en.n_namesz = strlen(men->name) + 1;
1311 en.n_descsz = men->datasz;
1312 en.n_type = men->type;
1314 DUMP_WRITE(&en, sizeof(en), foffset);
1315 DUMP_WRITE(men->name, en.n_namesz, foffset);
1316 if (!alignfile(file, foffset))
1317 return 0;
1318 DUMP_WRITE(men->data, men->datasz, foffset);
1319 if (!alignfile(file, foffset))
1320 return 0;
1322 return 1;
1324 #undef DUMP_WRITE
1326 #define DUMP_WRITE(addr, nr) \
1327 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1328 goto end_coredump;
1329 #define DUMP_SEEK(off) \
1330 if (!dump_seek(file, (off))) \
1331 goto end_coredump;
1333 static void fill_elf_header(struct elfhdr *elf, int segs)
1335 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1336 elf->e_ident[EI_CLASS] = ELF_CLASS;
1337 elf->e_ident[EI_DATA] = ELF_DATA;
1338 elf->e_ident[EI_VERSION] = EV_CURRENT;
1339 elf->e_ident[EI_OSABI] = ELF_OSABI;
1340 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1342 elf->e_type = ET_CORE;
1343 elf->e_machine = ELF_ARCH;
1344 elf->e_version = EV_CURRENT;
1345 elf->e_entry = 0;
1346 elf->e_phoff = sizeof(struct elfhdr);
1347 elf->e_shoff = 0;
1348 elf->e_flags = ELF_CORE_EFLAGS;
1349 elf->e_ehsize = sizeof(struct elfhdr);
1350 elf->e_phentsize = sizeof(struct elf_phdr);
1351 elf->e_phnum = segs;
1352 elf->e_shentsize = 0;
1353 elf->e_shnum = 0;
1354 elf->e_shstrndx = 0;
1355 return;
1358 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1360 phdr->p_type = PT_NOTE;
1361 phdr->p_offset = offset;
1362 phdr->p_vaddr = 0;
1363 phdr->p_paddr = 0;
1364 phdr->p_filesz = sz;
1365 phdr->p_memsz = 0;
1366 phdr->p_flags = 0;
1367 phdr->p_align = 0;
1368 return;
1371 static void fill_note(struct memelfnote *note, const char *name, int type,
1372 unsigned int sz, void *data)
1374 note->name = name;
1375 note->type = type;
1376 note->datasz = sz;
1377 note->data = data;
1378 return;
1382 * fill up all the fields in prstatus from the given task struct, except
1383 * registers which need to be filled up separately.
1385 static void fill_prstatus(struct elf_prstatus *prstatus,
1386 struct task_struct *p, long signr)
1388 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1389 prstatus->pr_sigpend = p->pending.signal.sig[0];
1390 prstatus->pr_sighold = p->blocked.sig[0];
1391 prstatus->pr_pid = p->pid;
1392 prstatus->pr_ppid = p->parent->pid;
1393 prstatus->pr_pgrp = process_group(p);
1394 prstatus->pr_sid = process_session(p);
1395 if (thread_group_leader(p)) {
1397 * This is the record for the group leader. Add in the
1398 * cumulative times of previous dead threads. This total
1399 * won't include the time of each live thread whose state
1400 * is included in the core dump. The final total reported
1401 * to our parent process when it calls wait4 will include
1402 * those sums as well as the little bit more time it takes
1403 * this and each other thread to finish dying after the
1404 * core dump synchronization phase.
1406 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1407 &prstatus->pr_utime);
1408 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1409 &prstatus->pr_stime);
1410 } else {
1411 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1412 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1414 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1415 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1418 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1419 struct mm_struct *mm)
1421 unsigned int i, len;
1423 /* first copy the parameters from user space */
1424 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1426 len = mm->arg_end - mm->arg_start;
1427 if (len >= ELF_PRARGSZ)
1428 len = ELF_PRARGSZ-1;
1429 if (copy_from_user(&psinfo->pr_psargs,
1430 (const char __user *)mm->arg_start, len))
1431 return -EFAULT;
1432 for(i = 0; i < len; i++)
1433 if (psinfo->pr_psargs[i] == 0)
1434 psinfo->pr_psargs[i] = ' ';
1435 psinfo->pr_psargs[len] = 0;
1437 psinfo->pr_pid = p->pid;
1438 psinfo->pr_ppid = p->parent->pid;
1439 psinfo->pr_pgrp = process_group(p);
1440 psinfo->pr_sid = process_session(p);
1442 i = p->state ? ffz(~p->state) + 1 : 0;
1443 psinfo->pr_state = i;
1444 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1445 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1446 psinfo->pr_nice = task_nice(p);
1447 psinfo->pr_flag = p->flags;
1448 SET_UID(psinfo->pr_uid, p->uid);
1449 SET_GID(psinfo->pr_gid, p->gid);
1450 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1452 return 0;
1455 /* Here is the structure in which status of each thread is captured. */
1456 struct elf_thread_status
1458 struct list_head list;
1459 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1460 elf_fpregset_t fpu; /* NT_PRFPREG */
1461 struct task_struct *thread;
1462 #ifdef ELF_CORE_COPY_XFPREGS
1463 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1464 #endif
1465 struct memelfnote notes[3];
1466 int num_notes;
1470 * In order to add the specific thread information for the elf file format,
1471 * we need to keep a linked list of every threads pr_status and then create
1472 * a single section for them in the final core file.
1474 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1476 int sz = 0;
1477 struct task_struct *p = t->thread;
1478 t->num_notes = 0;
1480 fill_prstatus(&t->prstatus, p, signr);
1481 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1483 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1484 &(t->prstatus));
1485 t->num_notes++;
1486 sz += notesize(&t->notes[0]);
1488 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1489 &t->fpu))) {
1490 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1491 &(t->fpu));
1492 t->num_notes++;
1493 sz += notesize(&t->notes[1]);
1496 #ifdef ELF_CORE_COPY_XFPREGS
1497 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1498 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1499 &t->xfpu);
1500 t->num_notes++;
1501 sz += notesize(&t->notes[2]);
1503 #endif
1504 return sz;
1507 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1508 struct vm_area_struct *gate_vma)
1510 struct vm_area_struct *ret = tsk->mm->mmap;
1512 if (ret)
1513 return ret;
1514 return gate_vma;
1517 * Helper function for iterating across a vma list. It ensures that the caller
1518 * will visit `gate_vma' prior to terminating the search.
1520 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1521 struct vm_area_struct *gate_vma)
1523 struct vm_area_struct *ret;
1525 ret = this_vma->vm_next;
1526 if (ret)
1527 return ret;
1528 if (this_vma == gate_vma)
1529 return NULL;
1530 return gate_vma;
1534 * Actual dumper
1536 * This is a two-pass process; first we find the offsets of the bits,
1537 * and then they are actually written out. If we run out of core limit
1538 * we just truncate.
1540 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1542 #define NUM_NOTES 6
1543 int has_dumped = 0;
1544 mm_segment_t fs;
1545 int segs;
1546 size_t size = 0;
1547 int i;
1548 struct vm_area_struct *vma, *gate_vma;
1549 struct elfhdr *elf = NULL;
1550 loff_t offset = 0, dataoff, foffset;
1551 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1552 int numnote;
1553 struct memelfnote *notes = NULL;
1554 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1555 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1556 struct task_struct *g, *p;
1557 LIST_HEAD(thread_list);
1558 struct list_head *t;
1559 elf_fpregset_t *fpu = NULL;
1560 #ifdef ELF_CORE_COPY_XFPREGS
1561 elf_fpxregset_t *xfpu = NULL;
1562 #endif
1563 int thread_status_size = 0;
1564 elf_addr_t *auxv;
1565 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1566 int extra_notes_size;
1567 #endif
1570 * We no longer stop all VM operations.
1572 * This is because those proceses that could possibly change map_count
1573 * or the mmap / vma pages are now blocked in do_exit on current
1574 * finishing this core dump.
1576 * Only ptrace can touch these memory addresses, but it doesn't change
1577 * the map_count or the pages allocated. So no possibility of crashing
1578 * exists while dumping the mm->vm_next areas to the core file.
1581 /* alloc memory for large data structures: too large to be on stack */
1582 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1583 if (!elf)
1584 goto cleanup;
1585 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1586 if (!prstatus)
1587 goto cleanup;
1588 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1589 if (!psinfo)
1590 goto cleanup;
1591 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1592 if (!notes)
1593 goto cleanup;
1594 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1595 if (!fpu)
1596 goto cleanup;
1597 #ifdef ELF_CORE_COPY_XFPREGS
1598 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1599 if (!xfpu)
1600 goto cleanup;
1601 #endif
1603 if (signr) {
1604 struct elf_thread_status *tmp;
1605 rcu_read_lock();
1606 do_each_thread(g,p)
1607 if (current->mm == p->mm && current != p) {
1608 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1609 if (!tmp) {
1610 rcu_read_unlock();
1611 goto cleanup;
1613 tmp->thread = p;
1614 list_add(&tmp->list, &thread_list);
1616 while_each_thread(g,p);
1617 rcu_read_unlock();
1618 list_for_each(t, &thread_list) {
1619 struct elf_thread_status *tmp;
1620 int sz;
1622 tmp = list_entry(t, struct elf_thread_status, list);
1623 sz = elf_dump_thread_status(signr, tmp);
1624 thread_status_size += sz;
1627 /* now collect the dump for the current */
1628 memset(prstatus, 0, sizeof(*prstatus));
1629 fill_prstatus(prstatus, current, signr);
1630 elf_core_copy_regs(&prstatus->pr_reg, regs);
1632 segs = current->mm->map_count;
1633 #ifdef ELF_CORE_EXTRA_PHDRS
1634 segs += ELF_CORE_EXTRA_PHDRS;
1635 #endif
1637 gate_vma = get_gate_vma(current);
1638 if (gate_vma != NULL)
1639 segs++;
1641 /* Set up header */
1642 fill_elf_header(elf, segs + 1); /* including notes section */
1644 has_dumped = 1;
1645 current->flags |= PF_DUMPCORE;
1648 * Set up the notes in similar form to SVR4 core dumps made
1649 * with info from their /proc.
1652 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1653 fill_psinfo(psinfo, current->group_leader, current->mm);
1654 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1656 numnote = 2;
1658 auxv = (elf_addr_t *)current->mm->saved_auxv;
1660 i = 0;
1662 i += 2;
1663 while (auxv[i - 2] != AT_NULL);
1664 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1665 i * sizeof(elf_addr_t), auxv);
1667 /* Try to dump the FPU. */
1668 if ((prstatus->pr_fpvalid =
1669 elf_core_copy_task_fpregs(current, regs, fpu)))
1670 fill_note(notes + numnote++,
1671 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1672 #ifdef ELF_CORE_COPY_XFPREGS
1673 if (elf_core_copy_task_xfpregs(current, xfpu))
1674 fill_note(notes + numnote++,
1675 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1676 #endif
1678 fs = get_fs();
1679 set_fs(KERNEL_DS);
1681 DUMP_WRITE(elf, sizeof(*elf));
1682 offset += sizeof(*elf); /* Elf header */
1683 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1684 foffset = offset;
1686 /* Write notes phdr entry */
1688 struct elf_phdr phdr;
1689 int sz = 0;
1691 for (i = 0; i < numnote; i++)
1692 sz += notesize(notes + i);
1694 sz += thread_status_size;
1696 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1697 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1698 sz += extra_notes_size;
1699 #endif
1701 fill_elf_note_phdr(&phdr, sz, offset);
1702 offset += sz;
1703 DUMP_WRITE(&phdr, sizeof(phdr));
1706 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1708 /* Write program headers for segments dump */
1709 for (vma = first_vma(current, gate_vma); vma != NULL;
1710 vma = next_vma(vma, gate_vma)) {
1711 struct elf_phdr phdr;
1712 size_t sz;
1714 sz = vma->vm_end - vma->vm_start;
1716 phdr.p_type = PT_LOAD;
1717 phdr.p_offset = offset;
1718 phdr.p_vaddr = vma->vm_start;
1719 phdr.p_paddr = 0;
1720 phdr.p_filesz = maydump(vma) ? sz : 0;
1721 phdr.p_memsz = sz;
1722 offset += phdr.p_filesz;
1723 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1724 if (vma->vm_flags & VM_WRITE)
1725 phdr.p_flags |= PF_W;
1726 if (vma->vm_flags & VM_EXEC)
1727 phdr.p_flags |= PF_X;
1728 phdr.p_align = ELF_EXEC_PAGESIZE;
1730 DUMP_WRITE(&phdr, sizeof(phdr));
1733 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1734 ELF_CORE_WRITE_EXTRA_PHDRS;
1735 #endif
1737 /* write out the notes section */
1738 for (i = 0; i < numnote; i++)
1739 if (!writenote(notes + i, file, &foffset))
1740 goto end_coredump;
1742 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1743 ELF_CORE_WRITE_EXTRA_NOTES;
1744 foffset += extra_notes_size;
1745 #endif
1747 /* write out the thread status notes section */
1748 list_for_each(t, &thread_list) {
1749 struct elf_thread_status *tmp =
1750 list_entry(t, struct elf_thread_status, list);
1752 for (i = 0; i < tmp->num_notes; i++)
1753 if (!writenote(&tmp->notes[i], file, &foffset))
1754 goto end_coredump;
1757 /* Align to page */
1758 DUMP_SEEK(dataoff - foffset);
1760 for (vma = first_vma(current, gate_vma); vma != NULL;
1761 vma = next_vma(vma, gate_vma)) {
1762 unsigned long addr;
1764 if (!maydump(vma))
1765 continue;
1767 for (addr = vma->vm_start;
1768 addr < vma->vm_end;
1769 addr += PAGE_SIZE) {
1770 struct page *page;
1771 struct vm_area_struct *vma;
1773 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1774 &page, &vma) <= 0) {
1775 DUMP_SEEK(PAGE_SIZE);
1776 } else {
1777 if (page == ZERO_PAGE(addr)) {
1778 if (!dump_seek(file, PAGE_SIZE)) {
1779 page_cache_release(page);
1780 goto end_coredump;
1782 } else {
1783 void *kaddr;
1784 flush_cache_page(vma, addr,
1785 page_to_pfn(page));
1786 kaddr = kmap(page);
1787 if ((size += PAGE_SIZE) > limit ||
1788 !dump_write(file, kaddr,
1789 PAGE_SIZE)) {
1790 kunmap(page);
1791 page_cache_release(page);
1792 goto end_coredump;
1794 kunmap(page);
1796 page_cache_release(page);
1801 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1802 ELF_CORE_WRITE_EXTRA_DATA;
1803 #endif
1805 end_coredump:
1806 set_fs(fs);
1808 cleanup:
1809 while (!list_empty(&thread_list)) {
1810 struct list_head *tmp = thread_list.next;
1811 list_del(tmp);
1812 kfree(list_entry(tmp, struct elf_thread_status, list));
1815 kfree(elf);
1816 kfree(prstatus);
1817 kfree(psinfo);
1818 kfree(notes);
1819 kfree(fpu);
1820 #ifdef ELF_CORE_COPY_XFPREGS
1821 kfree(xfpu);
1822 #endif
1823 return has_dumped;
1824 #undef NUM_NOTES
1827 #endif /* USE_ELF_CORE_DUMP */
1829 static int __init init_elf_binfmt(void)
1831 return register_binfmt(&elf_format);
1834 static void __exit exit_elf_binfmt(void)
1836 /* Remove the COFF and ELF loaders. */
1837 unregister_binfmt(&elf_format);
1840 core_initcall(init_elf_binfmt);
1841 module_exit(exit_elf_binfmt);
1842 MODULE_LICENSE("GPL");