fast initial bufsize increase
[cor.git] / fs / binfmt_elf.c
blobecd8d26985154b15b4597c07cc85f3a636163f2d
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * linux/fs/binfmt_elf.c
5 * These are the functions used to load ELF format executables as used
6 * on SVr4 machines. Information on the format may be found in the book
7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8 * Tools".
10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/mm.h>
17 #include <linux/mman.h>
18 #include <linux/errno.h>
19 #include <linux/signal.h>
20 #include <linux/binfmts.h>
21 #include <linux/string.h>
22 #include <linux/file.h>
23 #include <linux/slab.h>
24 #include <linux/personality.h>
25 #include <linux/elfcore.h>
26 #include <linux/init.h>
27 #include <linux/highuid.h>
28 #include <linux/compiler.h>
29 #include <linux/highmem.h>
30 #include <linux/pagemap.h>
31 #include <linux/vmalloc.h>
32 #include <linux/security.h>
33 #include <linux/random.h>
34 #include <linux/elf.h>
35 #include <linux/elf-randomize.h>
36 #include <linux/utsname.h>
37 #include <linux/coredump.h>
38 #include <linux/sched.h>
39 #include <linux/sched/coredump.h>
40 #include <linux/sched/task_stack.h>
41 #include <linux/sched/cputime.h>
42 #include <linux/cred.h>
43 #include <linux/dax.h>
44 #include <linux/uaccess.h>
45 #include <asm/param.h>
46 #include <asm/page.h>
48 #ifndef user_long_t
49 #define user_long_t long
50 #endif
51 #ifndef user_siginfo_t
52 #define user_siginfo_t siginfo_t
53 #endif
55 /* That's for binfmt_elf_fdpic to deal with */
56 #ifndef elf_check_fdpic
57 #define elf_check_fdpic(ex) false
58 #endif
60 static int load_elf_binary(struct linux_binprm *bprm);
62 #ifdef CONFIG_USELIB
63 static int load_elf_library(struct file *);
64 #else
65 #define load_elf_library NULL
66 #endif
69 * If we don't support core dumping, then supply a NULL so we
70 * don't even try.
72 #ifdef CONFIG_ELF_CORE
73 static int elf_core_dump(struct coredump_params *cprm);
74 #else
75 #define elf_core_dump NULL
76 #endif
78 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
79 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
80 #else
81 #define ELF_MIN_ALIGN PAGE_SIZE
82 #endif
84 #ifndef ELF_CORE_EFLAGS
85 #define ELF_CORE_EFLAGS 0
86 #endif
88 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
89 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
90 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
92 static struct linux_binfmt elf_format = {
93 .module = THIS_MODULE,
94 .load_binary = load_elf_binary,
95 .load_shlib = load_elf_library,
96 .core_dump = elf_core_dump,
97 .min_coredump = ELF_EXEC_PAGESIZE,
100 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
102 static int set_brk(unsigned long start, unsigned long end, int prot)
104 start = ELF_PAGEALIGN(start);
105 end = ELF_PAGEALIGN(end);
106 if (end > start) {
108 * Map the last of the bss segment.
109 * If the header is requesting these pages to be
110 * executable, honour that (ppc32 needs this).
112 int error = vm_brk_flags(start, end - start,
113 prot & PROT_EXEC ? VM_EXEC : 0);
114 if (error)
115 return error;
117 current->mm->start_brk = current->mm->brk = end;
118 return 0;
121 /* We need to explicitly zero any fractional pages
122 after the data section (i.e. bss). This would
123 contain the junk from the file that should not
124 be in memory
126 static int padzero(unsigned long elf_bss)
128 unsigned long nbyte;
130 nbyte = ELF_PAGEOFFSET(elf_bss);
131 if (nbyte) {
132 nbyte = ELF_MIN_ALIGN - nbyte;
133 if (clear_user((void __user *) elf_bss, nbyte))
134 return -EFAULT;
136 return 0;
139 /* Let's use some macros to make this stack manipulation a little clearer */
140 #ifdef CONFIG_STACK_GROWSUP
141 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
142 #define STACK_ROUND(sp, items) \
143 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
144 #define STACK_ALLOC(sp, len) ({ \
145 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
146 old_sp; })
147 #else
148 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
149 #define STACK_ROUND(sp, items) \
150 (((unsigned long) (sp - items)) &~ 15UL)
151 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
152 #endif
154 #ifndef ELF_BASE_PLATFORM
156 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
157 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
158 * will be copied to the user stack in the same manner as AT_PLATFORM.
160 #define ELF_BASE_PLATFORM NULL
161 #endif
163 static int
164 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
165 unsigned long load_addr, unsigned long interp_load_addr)
167 unsigned long p = bprm->p;
168 int argc = bprm->argc;
169 int envc = bprm->envc;
170 elf_addr_t __user *sp;
171 elf_addr_t __user *u_platform;
172 elf_addr_t __user *u_base_platform;
173 elf_addr_t __user *u_rand_bytes;
174 const char *k_platform = ELF_PLATFORM;
175 const char *k_base_platform = ELF_BASE_PLATFORM;
176 unsigned char k_rand_bytes[16];
177 int items;
178 elf_addr_t *elf_info;
179 int ei_index = 0;
180 const struct cred *cred = current_cred();
181 struct vm_area_struct *vma;
184 * In some cases (e.g. Hyper-Threading), we want to avoid L1
185 * evictions by the processes running on the same package. One
186 * thing we can do is to shuffle the initial stack for them.
189 p = arch_align_stack(p);
192 * If this architecture has a platform capability string, copy it
193 * to userspace. In some cases (Sparc), this info is impossible
194 * for userspace to get any other way, in others (i386) it is
195 * merely difficult.
197 u_platform = NULL;
198 if (k_platform) {
199 size_t len = strlen(k_platform) + 1;
201 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202 if (__copy_to_user(u_platform, k_platform, len))
203 return -EFAULT;
207 * If this architecture has a "base" platform capability
208 * string, copy it to userspace.
210 u_base_platform = NULL;
211 if (k_base_platform) {
212 size_t len = strlen(k_base_platform) + 1;
214 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
215 if (__copy_to_user(u_base_platform, k_base_platform, len))
216 return -EFAULT;
220 * Generate 16 random bytes for userspace PRNG seeding.
222 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
223 u_rand_bytes = (elf_addr_t __user *)
224 STACK_ALLOC(p, sizeof(k_rand_bytes));
225 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
226 return -EFAULT;
228 /* Create the ELF interpreter info */
229 elf_info = (elf_addr_t *)current->mm->saved_auxv;
230 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
231 #define NEW_AUX_ENT(id, val) \
232 do { \
233 elf_info[ei_index++] = id; \
234 elf_info[ei_index++] = val; \
235 } while (0)
237 #ifdef ARCH_DLINFO
239 * ARCH_DLINFO must come first so PPC can do its special alignment of
240 * AUXV.
241 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
242 * ARCH_DLINFO changes
244 ARCH_DLINFO;
245 #endif
246 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
247 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
248 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
249 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
250 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
251 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
252 NEW_AUX_ENT(AT_BASE, interp_load_addr);
253 NEW_AUX_ENT(AT_FLAGS, 0);
254 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
255 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
256 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
257 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
258 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
259 NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
260 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
261 #ifdef ELF_HWCAP2
262 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
263 #endif
264 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
265 if (k_platform) {
266 NEW_AUX_ENT(AT_PLATFORM,
267 (elf_addr_t)(unsigned long)u_platform);
269 if (k_base_platform) {
270 NEW_AUX_ENT(AT_BASE_PLATFORM,
271 (elf_addr_t)(unsigned long)u_base_platform);
273 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
274 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
276 #undef NEW_AUX_ENT
277 /* AT_NULL is zero; clear the rest too */
278 memset(&elf_info[ei_index], 0,
279 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
281 /* And advance past the AT_NULL entry. */
282 ei_index += 2;
284 sp = STACK_ADD(p, ei_index);
286 items = (argc + 1) + (envc + 1) + 1;
287 bprm->p = STACK_ROUND(sp, items);
289 /* Point sp at the lowest address on the stack */
290 #ifdef CONFIG_STACK_GROWSUP
291 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
292 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
293 #else
294 sp = (elf_addr_t __user *)bprm->p;
295 #endif
299 * Grow the stack manually; some architectures have a limit on how
300 * far ahead a user-space access may be in order to grow the stack.
302 vma = find_extend_vma(current->mm, bprm->p);
303 if (!vma)
304 return -EFAULT;
306 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
307 if (__put_user(argc, sp++))
308 return -EFAULT;
310 /* Populate list of argv pointers back to argv strings. */
311 p = current->mm->arg_end = current->mm->arg_start;
312 while (argc-- > 0) {
313 size_t len;
314 if (__put_user((elf_addr_t)p, sp++))
315 return -EFAULT;
316 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
317 if (!len || len > MAX_ARG_STRLEN)
318 return -EINVAL;
319 p += len;
321 if (__put_user(0, sp++))
322 return -EFAULT;
323 current->mm->arg_end = p;
325 /* Populate list of envp pointers back to envp strings. */
326 current->mm->env_end = current->mm->env_start = p;
327 while (envc-- > 0) {
328 size_t len;
329 if (__put_user((elf_addr_t)p, sp++))
330 return -EFAULT;
331 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
332 if (!len || len > MAX_ARG_STRLEN)
333 return -EINVAL;
334 p += len;
336 if (__put_user(0, sp++))
337 return -EFAULT;
338 current->mm->env_end = p;
340 /* Put the elf_info on the stack in the right place. */
341 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
342 return -EFAULT;
343 return 0;
346 #ifndef elf_map
348 static unsigned long elf_map(struct file *filep, unsigned long addr,
349 const struct elf_phdr *eppnt, int prot, int type,
350 unsigned long total_size)
352 unsigned long map_addr;
353 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
354 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
355 addr = ELF_PAGESTART(addr);
356 size = ELF_PAGEALIGN(size);
358 /* mmap() will return -EINVAL if given a zero size, but a
359 * segment with zero filesize is perfectly valid */
360 if (!size)
361 return addr;
364 * total_size is the size of the ELF (interpreter) image.
365 * The _first_ mmap needs to know the full size, otherwise
366 * randomization might put this image into an overlapping
367 * position with the ELF binary image. (since size < total_size)
368 * So we first map the 'big' image - and unmap the remainder at
369 * the end. (which unmap is needed for ELF images with holes.)
371 if (total_size) {
372 total_size = ELF_PAGEALIGN(total_size);
373 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
374 if (!BAD_ADDR(map_addr))
375 vm_munmap(map_addr+size, total_size-size);
376 } else
377 map_addr = vm_mmap(filep, addr, size, prot, type, off);
379 if ((type & MAP_FIXED_NOREPLACE) &&
380 PTR_ERR((void *)map_addr) == -EEXIST)
381 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
382 task_pid_nr(current), current->comm, (void *)addr);
384 return(map_addr);
387 #endif /* !elf_map */
389 static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
391 int i, first_idx = -1, last_idx = -1;
393 for (i = 0; i < nr; i++) {
394 if (cmds[i].p_type == PT_LOAD) {
395 last_idx = i;
396 if (first_idx == -1)
397 first_idx = i;
400 if (first_idx == -1)
401 return 0;
403 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
404 ELF_PAGESTART(cmds[first_idx].p_vaddr);
407 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
409 ssize_t rv;
411 rv = kernel_read(file, buf, len, &pos);
412 if (unlikely(rv != len)) {
413 return (rv < 0) ? rv : -EIO;
415 return 0;
419 * load_elf_phdrs() - load ELF program headers
420 * @elf_ex: ELF header of the binary whose program headers should be loaded
421 * @elf_file: the opened ELF binary file
423 * Loads ELF program headers from the binary file elf_file, which has the ELF
424 * header pointed to by elf_ex, into a newly allocated array. The caller is
425 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
427 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
428 struct file *elf_file)
430 struct elf_phdr *elf_phdata = NULL;
431 int retval, err = -1;
432 unsigned int size;
435 * If the size of this structure has changed, then punt, since
436 * we will be doing the wrong thing.
438 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
439 goto out;
441 /* Sanity check the number of program headers... */
442 /* ...and their total size. */
443 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
444 if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
445 goto out;
447 elf_phdata = kmalloc(size, GFP_KERNEL);
448 if (!elf_phdata)
449 goto out;
451 /* Read in the program headers */
452 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
453 if (retval < 0) {
454 err = retval;
455 goto out;
458 /* Success! */
459 err = 0;
460 out:
461 if (err) {
462 kfree(elf_phdata);
463 elf_phdata = NULL;
465 return elf_phdata;
468 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
471 * struct arch_elf_state - arch-specific ELF loading state
473 * This structure is used to preserve architecture specific data during
474 * the loading of an ELF file, throughout the checking of architecture
475 * specific ELF headers & through to the point where the ELF load is
476 * known to be proceeding (ie. SET_PERSONALITY).
478 * This implementation is a dummy for architectures which require no
479 * specific state.
481 struct arch_elf_state {
484 #define INIT_ARCH_ELF_STATE {}
487 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
488 * @ehdr: The main ELF header
489 * @phdr: The program header to check
490 * @elf: The open ELF file
491 * @is_interp: True if the phdr is from the interpreter of the ELF being
492 * loaded, else false.
493 * @state: Architecture-specific state preserved throughout the process
494 * of loading the ELF.
496 * Inspects the program header phdr to validate its correctness and/or
497 * suitability for the system. Called once per ELF program header in the
498 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
499 * interpreter.
501 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
502 * with that return code.
504 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
505 struct elf_phdr *phdr,
506 struct file *elf, bool is_interp,
507 struct arch_elf_state *state)
509 /* Dummy implementation, always proceed */
510 return 0;
514 * arch_check_elf() - check an ELF executable
515 * @ehdr: The main ELF header
516 * @has_interp: True if the ELF has an interpreter, else false.
517 * @interp_ehdr: The interpreter's ELF header
518 * @state: Architecture-specific state preserved throughout the process
519 * of loading the ELF.
521 * Provides a final opportunity for architecture code to reject the loading
522 * of the ELF & cause an exec syscall to return an error. This is called after
523 * all program headers to be checked by arch_elf_pt_proc have been.
525 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
526 * with that return code.
528 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
529 struct elfhdr *interp_ehdr,
530 struct arch_elf_state *state)
532 /* Dummy implementation, always proceed */
533 return 0;
536 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
538 static inline int make_prot(u32 p_flags)
540 int prot = 0;
542 if (p_flags & PF_R)
543 prot |= PROT_READ;
544 if (p_flags & PF_W)
545 prot |= PROT_WRITE;
546 if (p_flags & PF_X)
547 prot |= PROT_EXEC;
548 return prot;
551 /* This is much more generalized than the library routine read function,
552 so we keep this separate. Technically the library read function
553 is only provided so that we can read a.out libraries that have
554 an ELF header */
556 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
557 struct file *interpreter,
558 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
560 struct elf_phdr *eppnt;
561 unsigned long load_addr = 0;
562 int load_addr_set = 0;
563 unsigned long last_bss = 0, elf_bss = 0;
564 int bss_prot = 0;
565 unsigned long error = ~0UL;
566 unsigned long total_size;
567 int i;
569 /* First of all, some simple consistency checks */
570 if (interp_elf_ex->e_type != ET_EXEC &&
571 interp_elf_ex->e_type != ET_DYN)
572 goto out;
573 if (!elf_check_arch(interp_elf_ex) ||
574 elf_check_fdpic(interp_elf_ex))
575 goto out;
576 if (!interpreter->f_op->mmap)
577 goto out;
579 total_size = total_mapping_size(interp_elf_phdata,
580 interp_elf_ex->e_phnum);
581 if (!total_size) {
582 error = -EINVAL;
583 goto out;
586 eppnt = interp_elf_phdata;
587 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
588 if (eppnt->p_type == PT_LOAD) {
589 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
590 int elf_prot = make_prot(eppnt->p_flags);
591 unsigned long vaddr = 0;
592 unsigned long k, map_addr;
594 vaddr = eppnt->p_vaddr;
595 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
596 elf_type |= MAP_FIXED_NOREPLACE;
597 else if (no_base && interp_elf_ex->e_type == ET_DYN)
598 load_addr = -vaddr;
600 map_addr = elf_map(interpreter, load_addr + vaddr,
601 eppnt, elf_prot, elf_type, total_size);
602 total_size = 0;
603 error = map_addr;
604 if (BAD_ADDR(map_addr))
605 goto out;
607 if (!load_addr_set &&
608 interp_elf_ex->e_type == ET_DYN) {
609 load_addr = map_addr - ELF_PAGESTART(vaddr);
610 load_addr_set = 1;
614 * Check to see if the section's size will overflow the
615 * allowed task size. Note that p_filesz must always be
616 * <= p_memsize so it's only necessary to check p_memsz.
618 k = load_addr + eppnt->p_vaddr;
619 if (BAD_ADDR(k) ||
620 eppnt->p_filesz > eppnt->p_memsz ||
621 eppnt->p_memsz > TASK_SIZE ||
622 TASK_SIZE - eppnt->p_memsz < k) {
623 error = -ENOMEM;
624 goto out;
628 * Find the end of the file mapping for this phdr, and
629 * keep track of the largest address we see for this.
631 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
632 if (k > elf_bss)
633 elf_bss = k;
636 * Do the same thing for the memory mapping - between
637 * elf_bss and last_bss is the bss section.
639 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
640 if (k > last_bss) {
641 last_bss = k;
642 bss_prot = elf_prot;
648 * Now fill out the bss section: first pad the last page from
649 * the file up to the page boundary, and zero it from elf_bss
650 * up to the end of the page.
652 if (padzero(elf_bss)) {
653 error = -EFAULT;
654 goto out;
657 * Next, align both the file and mem bss up to the page size,
658 * since this is where elf_bss was just zeroed up to, and where
659 * last_bss will end after the vm_brk_flags() below.
661 elf_bss = ELF_PAGEALIGN(elf_bss);
662 last_bss = ELF_PAGEALIGN(last_bss);
663 /* Finally, if there is still more bss to allocate, do it. */
664 if (last_bss > elf_bss) {
665 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
666 bss_prot & PROT_EXEC ? VM_EXEC : 0);
667 if (error)
668 goto out;
671 error = load_addr;
672 out:
673 return error;
677 * These are the functions used to load ELF style executables and shared
678 * libraries. There is no binary dependent code anywhere else.
681 static int load_elf_binary(struct linux_binprm *bprm)
683 struct file *interpreter = NULL; /* to shut gcc up */
684 unsigned long load_addr = 0, load_bias = 0;
685 int load_addr_set = 0;
686 unsigned long error;
687 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
688 unsigned long elf_bss, elf_brk;
689 int bss_prot = 0;
690 int retval, i;
691 unsigned long elf_entry;
692 unsigned long interp_load_addr = 0;
693 unsigned long start_code, end_code, start_data, end_data;
694 unsigned long reloc_func_desc __maybe_unused = 0;
695 int executable_stack = EXSTACK_DEFAULT;
696 struct {
697 struct elfhdr elf_ex;
698 struct elfhdr interp_elf_ex;
699 } *loc;
700 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
701 struct pt_regs *regs;
703 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
704 if (!loc) {
705 retval = -ENOMEM;
706 goto out_ret;
709 /* Get the exec-header */
710 loc->elf_ex = *((struct elfhdr *)bprm->buf);
712 retval = -ENOEXEC;
713 /* First of all, some simple consistency checks */
714 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715 goto out;
717 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
718 goto out;
719 if (!elf_check_arch(&loc->elf_ex))
720 goto out;
721 if (elf_check_fdpic(&loc->elf_ex))
722 goto out;
723 if (!bprm->file->f_op->mmap)
724 goto out;
726 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
727 if (!elf_phdata)
728 goto out;
730 elf_ppnt = elf_phdata;
731 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
732 char *elf_interpreter;
734 if (elf_ppnt->p_type != PT_INTERP)
735 continue;
738 * This is the program interpreter used for shared libraries -
739 * for now assume that this is an a.out format binary.
741 retval = -ENOEXEC;
742 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
743 goto out_free_ph;
745 retval = -ENOMEM;
746 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
747 if (!elf_interpreter)
748 goto out_free_ph;
750 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
751 elf_ppnt->p_offset);
752 if (retval < 0)
753 goto out_free_interp;
754 /* make sure path is NULL terminated */
755 retval = -ENOEXEC;
756 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
757 goto out_free_interp;
759 interpreter = open_exec(elf_interpreter);
760 kfree(elf_interpreter);
761 retval = PTR_ERR(interpreter);
762 if (IS_ERR(interpreter))
763 goto out_free_ph;
766 * If the binary is not readable then enforce mm->dumpable = 0
767 * regardless of the interpreter's permissions.
769 would_dump(bprm, interpreter);
771 /* Get the exec headers */
772 retval = elf_read(interpreter, &loc->interp_elf_ex,
773 sizeof(loc->interp_elf_ex), 0);
774 if (retval < 0)
775 goto out_free_dentry;
777 break;
779 out_free_interp:
780 kfree(elf_interpreter);
781 goto out_free_ph;
784 elf_ppnt = elf_phdata;
785 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
786 switch (elf_ppnt->p_type) {
787 case PT_GNU_STACK:
788 if (elf_ppnt->p_flags & PF_X)
789 executable_stack = EXSTACK_ENABLE_X;
790 else
791 executable_stack = EXSTACK_DISABLE_X;
792 break;
794 case PT_LOPROC ... PT_HIPROC:
795 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
796 bprm->file, false,
797 &arch_state);
798 if (retval)
799 goto out_free_dentry;
800 break;
803 /* Some simple consistency checks for the interpreter */
804 if (interpreter) {
805 retval = -ELIBBAD;
806 /* Not an ELF interpreter */
807 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
808 goto out_free_dentry;
809 /* Verify the interpreter has a valid arch */
810 if (!elf_check_arch(&loc->interp_elf_ex) ||
811 elf_check_fdpic(&loc->interp_elf_ex))
812 goto out_free_dentry;
814 /* Load the interpreter program headers */
815 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
816 interpreter);
817 if (!interp_elf_phdata)
818 goto out_free_dentry;
820 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
821 elf_ppnt = interp_elf_phdata;
822 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
823 switch (elf_ppnt->p_type) {
824 case PT_LOPROC ... PT_HIPROC:
825 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
826 elf_ppnt, interpreter,
827 true, &arch_state);
828 if (retval)
829 goto out_free_dentry;
830 break;
835 * Allow arch code to reject the ELF at this point, whilst it's
836 * still possible to return an error to the code that invoked
837 * the exec syscall.
839 retval = arch_check_elf(&loc->elf_ex,
840 !!interpreter, &loc->interp_elf_ex,
841 &arch_state);
842 if (retval)
843 goto out_free_dentry;
845 /* Flush all traces of the currently running executable */
846 retval = flush_old_exec(bprm);
847 if (retval)
848 goto out_free_dentry;
850 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
851 may depend on the personality. */
852 SET_PERSONALITY2(loc->elf_ex, &arch_state);
853 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
854 current->personality |= READ_IMPLIES_EXEC;
856 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
857 current->flags |= PF_RANDOMIZE;
859 setup_new_exec(bprm);
860 install_exec_creds(bprm);
862 /* Do this so that we can load the interpreter, if need be. We will
863 change some of these later */
864 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
865 executable_stack);
866 if (retval < 0)
867 goto out_free_dentry;
869 elf_bss = 0;
870 elf_brk = 0;
872 start_code = ~0UL;
873 end_code = 0;
874 start_data = 0;
875 end_data = 0;
877 /* Now we do a little grungy work by mmapping the ELF image into
878 the correct location in memory. */
879 for(i = 0, elf_ppnt = elf_phdata;
880 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
881 int elf_prot, elf_flags;
882 unsigned long k, vaddr;
883 unsigned long total_size = 0;
885 if (elf_ppnt->p_type != PT_LOAD)
886 continue;
888 if (unlikely (elf_brk > elf_bss)) {
889 unsigned long nbyte;
891 /* There was a PT_LOAD segment with p_memsz > p_filesz
892 before this one. Map anonymous pages, if needed,
893 and clear the area. */
894 retval = set_brk(elf_bss + load_bias,
895 elf_brk + load_bias,
896 bss_prot);
897 if (retval)
898 goto out_free_dentry;
899 nbyte = ELF_PAGEOFFSET(elf_bss);
900 if (nbyte) {
901 nbyte = ELF_MIN_ALIGN - nbyte;
902 if (nbyte > elf_brk - elf_bss)
903 nbyte = elf_brk - elf_bss;
904 if (clear_user((void __user *)elf_bss +
905 load_bias, nbyte)) {
907 * This bss-zeroing can fail if the ELF
908 * file specifies odd protections. So
909 * we don't check the return value
915 elf_prot = make_prot(elf_ppnt->p_flags);
917 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
919 vaddr = elf_ppnt->p_vaddr;
921 * If we are loading ET_EXEC or we have already performed
922 * the ET_DYN load_addr calculations, proceed normally.
924 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
925 elf_flags |= MAP_FIXED;
926 } else if (loc->elf_ex.e_type == ET_DYN) {
928 * This logic is run once for the first LOAD Program
929 * Header for ET_DYN binaries to calculate the
930 * randomization (load_bias) for all the LOAD
931 * Program Headers, and to calculate the entire
932 * size of the ELF mapping (total_size). (Note that
933 * load_addr_set is set to true later once the
934 * initial mapping is performed.)
936 * There are effectively two types of ET_DYN
937 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
938 * and loaders (ET_DYN without INTERP, since they
939 * _are_ the ELF interpreter). The loaders must
940 * be loaded away from programs since the program
941 * may otherwise collide with the loader (especially
942 * for ET_EXEC which does not have a randomized
943 * position). For example to handle invocations of
944 * "./ld.so someprog" to test out a new version of
945 * the loader, the subsequent program that the
946 * loader loads must avoid the loader itself, so
947 * they cannot share the same load range. Sufficient
948 * room for the brk must be allocated with the
949 * loader as well, since brk must be available with
950 * the loader.
952 * Therefore, programs are loaded offset from
953 * ELF_ET_DYN_BASE and loaders are loaded into the
954 * independently randomized mmap region (0 load_bias
955 * without MAP_FIXED).
957 if (interpreter) {
958 load_bias = ELF_ET_DYN_BASE;
959 if (current->flags & PF_RANDOMIZE)
960 load_bias += arch_mmap_rnd();
961 elf_flags |= MAP_FIXED;
962 } else
963 load_bias = 0;
966 * Since load_bias is used for all subsequent loading
967 * calculations, we must lower it by the first vaddr
968 * so that the remaining calculations based on the
969 * ELF vaddrs will be correctly offset. The result
970 * is then page aligned.
972 load_bias = ELF_PAGESTART(load_bias - vaddr);
974 total_size = total_mapping_size(elf_phdata,
975 loc->elf_ex.e_phnum);
976 if (!total_size) {
977 retval = -EINVAL;
978 goto out_free_dentry;
982 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
983 elf_prot, elf_flags, total_size);
984 if (BAD_ADDR(error)) {
985 retval = IS_ERR((void *)error) ?
986 PTR_ERR((void*)error) : -EINVAL;
987 goto out_free_dentry;
990 if (!load_addr_set) {
991 load_addr_set = 1;
992 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
993 if (loc->elf_ex.e_type == ET_DYN) {
994 load_bias += error -
995 ELF_PAGESTART(load_bias + vaddr);
996 load_addr += load_bias;
997 reloc_func_desc = load_bias;
1000 k = elf_ppnt->p_vaddr;
1001 if (k < start_code)
1002 start_code = k;
1003 if (start_data < k)
1004 start_data = k;
1007 * Check to see if the section's size will overflow the
1008 * allowed task size. Note that p_filesz must always be
1009 * <= p_memsz so it is only necessary to check p_memsz.
1011 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1012 elf_ppnt->p_memsz > TASK_SIZE ||
1013 TASK_SIZE - elf_ppnt->p_memsz < k) {
1014 /* set_brk can never work. Avoid overflows. */
1015 retval = -EINVAL;
1016 goto out_free_dentry;
1019 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1021 if (k > elf_bss)
1022 elf_bss = k;
1023 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1024 end_code = k;
1025 if (end_data < k)
1026 end_data = k;
1027 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1028 if (k > elf_brk) {
1029 bss_prot = elf_prot;
1030 elf_brk = k;
1034 loc->elf_ex.e_entry += load_bias;
1035 elf_bss += load_bias;
1036 elf_brk += load_bias;
1037 start_code += load_bias;
1038 end_code += load_bias;
1039 start_data += load_bias;
1040 end_data += load_bias;
1042 /* Calling set_brk effectively mmaps the pages that we need
1043 * for the bss and break sections. We must do this before
1044 * mapping in the interpreter, to make sure it doesn't wind
1045 * up getting placed where the bss needs to go.
1047 retval = set_brk(elf_bss, elf_brk, bss_prot);
1048 if (retval)
1049 goto out_free_dentry;
1050 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1051 retval = -EFAULT; /* Nobody gets to see this, but.. */
1052 goto out_free_dentry;
1055 if (interpreter) {
1056 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1057 interpreter,
1058 load_bias, interp_elf_phdata);
1059 if (!IS_ERR((void *)elf_entry)) {
1061 * load_elf_interp() returns relocation
1062 * adjustment
1064 interp_load_addr = elf_entry;
1065 elf_entry += loc->interp_elf_ex.e_entry;
1067 if (BAD_ADDR(elf_entry)) {
1068 retval = IS_ERR((void *)elf_entry) ?
1069 (int)elf_entry : -EINVAL;
1070 goto out_free_dentry;
1072 reloc_func_desc = interp_load_addr;
1074 allow_write_access(interpreter);
1075 fput(interpreter);
1076 } else {
1077 elf_entry = loc->elf_ex.e_entry;
1078 if (BAD_ADDR(elf_entry)) {
1079 retval = -EINVAL;
1080 goto out_free_dentry;
1084 kfree(interp_elf_phdata);
1085 kfree(elf_phdata);
1087 set_binfmt(&elf_format);
1089 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1090 retval = arch_setup_additional_pages(bprm, !!interpreter);
1091 if (retval < 0)
1092 goto out;
1093 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1095 retval = create_elf_tables(bprm, &loc->elf_ex,
1096 load_addr, interp_load_addr);
1097 if (retval < 0)
1098 goto out;
1099 current->mm->end_code = end_code;
1100 current->mm->start_code = start_code;
1101 current->mm->start_data = start_data;
1102 current->mm->end_data = end_data;
1103 current->mm->start_stack = bprm->p;
1105 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1107 * For architectures with ELF randomization, when executing
1108 * a loader directly (i.e. no interpreter listed in ELF
1109 * headers), move the brk area out of the mmap region
1110 * (since it grows up, and may collide early with the stack
1111 * growing down), and into the unused ELF_ET_DYN_BASE region.
1113 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1114 loc->elf_ex.e_type == ET_DYN && !interpreter)
1115 current->mm->brk = current->mm->start_brk =
1116 ELF_ET_DYN_BASE;
1118 current->mm->brk = current->mm->start_brk =
1119 arch_randomize_brk(current->mm);
1120 #ifdef compat_brk_randomized
1121 current->brk_randomized = 1;
1122 #endif
1125 if (current->personality & MMAP_PAGE_ZERO) {
1126 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1127 and some applications "depend" upon this behavior.
1128 Since we do not have the power to recompile these, we
1129 emulate the SVr4 behavior. Sigh. */
1130 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1131 MAP_FIXED | MAP_PRIVATE, 0);
1134 regs = current_pt_regs();
1135 #ifdef ELF_PLAT_INIT
1137 * The ABI may specify that certain registers be set up in special
1138 * ways (on i386 %edx is the address of a DT_FINI function, for
1139 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1140 * that the e_entry field is the address of the function descriptor
1141 * for the startup routine, rather than the address of the startup
1142 * routine itself. This macro performs whatever initialization to
1143 * the regs structure is required as well as any relocations to the
1144 * function descriptor entries when executing dynamically links apps.
1146 ELF_PLAT_INIT(regs, reloc_func_desc);
1147 #endif
1149 finalize_exec(bprm);
1150 start_thread(regs, elf_entry, bprm->p);
1151 retval = 0;
1152 out:
1153 kfree(loc);
1154 out_ret:
1155 return retval;
1157 /* error cleanup */
1158 out_free_dentry:
1159 kfree(interp_elf_phdata);
1160 allow_write_access(interpreter);
1161 if (interpreter)
1162 fput(interpreter);
1163 out_free_ph:
1164 kfree(elf_phdata);
1165 goto out;
1168 #ifdef CONFIG_USELIB
1169 /* This is really simpleminded and specialized - we are loading an
1170 a.out library that is given an ELF header. */
1171 static int load_elf_library(struct file *file)
1173 struct elf_phdr *elf_phdata;
1174 struct elf_phdr *eppnt;
1175 unsigned long elf_bss, bss, len;
1176 int retval, error, i, j;
1177 struct elfhdr elf_ex;
1179 error = -ENOEXEC;
1180 retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1181 if (retval < 0)
1182 goto out;
1184 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1185 goto out;
1187 /* First of all, some simple consistency checks */
1188 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1189 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1190 goto out;
1191 if (elf_check_fdpic(&elf_ex))
1192 goto out;
1194 /* Now read in all of the header information */
1196 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1197 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1199 error = -ENOMEM;
1200 elf_phdata = kmalloc(j, GFP_KERNEL);
1201 if (!elf_phdata)
1202 goto out;
1204 eppnt = elf_phdata;
1205 error = -ENOEXEC;
1206 retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1207 if (retval < 0)
1208 goto out_free_ph;
1210 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1211 if ((eppnt + i)->p_type == PT_LOAD)
1212 j++;
1213 if (j != 1)
1214 goto out_free_ph;
1216 while (eppnt->p_type != PT_LOAD)
1217 eppnt++;
1219 /* Now use mmap to map the library into memory. */
1220 error = vm_mmap(file,
1221 ELF_PAGESTART(eppnt->p_vaddr),
1222 (eppnt->p_filesz +
1223 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1224 PROT_READ | PROT_WRITE | PROT_EXEC,
1225 MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1226 (eppnt->p_offset -
1227 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1228 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1229 goto out_free_ph;
1231 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1232 if (padzero(elf_bss)) {
1233 error = -EFAULT;
1234 goto out_free_ph;
1237 len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1238 bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1239 if (bss > len) {
1240 error = vm_brk(len, bss - len);
1241 if (error)
1242 goto out_free_ph;
1244 error = 0;
1246 out_free_ph:
1247 kfree(elf_phdata);
1248 out:
1249 return error;
1251 #endif /* #ifdef CONFIG_USELIB */
1253 #ifdef CONFIG_ELF_CORE
1255 * ELF core dumper
1257 * Modelled on fs/exec.c:aout_core_dump()
1258 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1262 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1263 * that are useful for post-mortem analysis are included in every core dump.
1264 * In that way we ensure that the core dump is fully interpretable later
1265 * without matching up the same kernel and hardware config to see what PC values
1266 * meant. These special mappings include - vDSO, vsyscall, and other
1267 * architecture specific mappings
1269 static bool always_dump_vma(struct vm_area_struct *vma)
1271 /* Any vsyscall mappings? */
1272 if (vma == get_gate_vma(vma->vm_mm))
1273 return true;
1276 * Assume that all vmas with a .name op should always be dumped.
1277 * If this changes, a new vm_ops field can easily be added.
1279 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1280 return true;
1283 * arch_vma_name() returns non-NULL for special architecture mappings,
1284 * such as vDSO sections.
1286 if (arch_vma_name(vma))
1287 return true;
1289 return false;
1293 * Decide what to dump of a segment, part, all or none.
1295 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1296 unsigned long mm_flags)
1298 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1300 /* always dump the vdso and vsyscall sections */
1301 if (always_dump_vma(vma))
1302 goto whole;
1304 if (vma->vm_flags & VM_DONTDUMP)
1305 return 0;
1307 /* support for DAX */
1308 if (vma_is_dax(vma)) {
1309 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1310 goto whole;
1311 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1312 goto whole;
1313 return 0;
1316 /* Hugetlb memory check */
1317 if (vma->vm_flags & VM_HUGETLB) {
1318 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1319 goto whole;
1320 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1321 goto whole;
1322 return 0;
1325 /* Do not dump I/O mapped devices or special mappings */
1326 if (vma->vm_flags & VM_IO)
1327 return 0;
1329 /* By default, dump shared memory if mapped from an anonymous file. */
1330 if (vma->vm_flags & VM_SHARED) {
1331 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1332 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1333 goto whole;
1334 return 0;
1337 /* Dump segments that have been written to. */
1338 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1339 goto whole;
1340 if (vma->vm_file == NULL)
1341 return 0;
1343 if (FILTER(MAPPED_PRIVATE))
1344 goto whole;
1347 * If this looks like the beginning of a DSO or executable mapping,
1348 * check for an ELF header. If we find one, dump the first page to
1349 * aid in determining what was mapped here.
1351 if (FILTER(ELF_HEADERS) &&
1352 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1353 u32 __user *header = (u32 __user *) vma->vm_start;
1354 u32 word;
1355 mm_segment_t fs = get_fs();
1357 * Doing it this way gets the constant folded by GCC.
1359 union {
1360 u32 cmp;
1361 char elfmag[SELFMAG];
1362 } magic;
1363 BUILD_BUG_ON(SELFMAG != sizeof word);
1364 magic.elfmag[EI_MAG0] = ELFMAG0;
1365 magic.elfmag[EI_MAG1] = ELFMAG1;
1366 magic.elfmag[EI_MAG2] = ELFMAG2;
1367 magic.elfmag[EI_MAG3] = ELFMAG3;
1369 * Switch to the user "segment" for get_user(),
1370 * then put back what elf_core_dump() had in place.
1372 set_fs(USER_DS);
1373 if (unlikely(get_user(word, header)))
1374 word = 0;
1375 set_fs(fs);
1376 if (word == magic.cmp)
1377 return PAGE_SIZE;
1380 #undef FILTER
1382 return 0;
1384 whole:
1385 return vma->vm_end - vma->vm_start;
1388 /* An ELF note in memory */
1389 struct memelfnote
1391 const char *name;
1392 int type;
1393 unsigned int datasz;
1394 void *data;
1397 static int notesize(struct memelfnote *en)
1399 int sz;
1401 sz = sizeof(struct elf_note);
1402 sz += roundup(strlen(en->name) + 1, 4);
1403 sz += roundup(en->datasz, 4);
1405 return sz;
1408 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1410 struct elf_note en;
1411 en.n_namesz = strlen(men->name) + 1;
1412 en.n_descsz = men->datasz;
1413 en.n_type = men->type;
1415 return dump_emit(cprm, &en, sizeof(en)) &&
1416 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1417 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1420 static void fill_elf_header(struct elfhdr *elf, int segs,
1421 u16 machine, u32 flags)
1423 memset(elf, 0, sizeof(*elf));
1425 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1426 elf->e_ident[EI_CLASS] = ELF_CLASS;
1427 elf->e_ident[EI_DATA] = ELF_DATA;
1428 elf->e_ident[EI_VERSION] = EV_CURRENT;
1429 elf->e_ident[EI_OSABI] = ELF_OSABI;
1431 elf->e_type = ET_CORE;
1432 elf->e_machine = machine;
1433 elf->e_version = EV_CURRENT;
1434 elf->e_phoff = sizeof(struct elfhdr);
1435 elf->e_flags = flags;
1436 elf->e_ehsize = sizeof(struct elfhdr);
1437 elf->e_phentsize = sizeof(struct elf_phdr);
1438 elf->e_phnum = segs;
1441 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1443 phdr->p_type = PT_NOTE;
1444 phdr->p_offset = offset;
1445 phdr->p_vaddr = 0;
1446 phdr->p_paddr = 0;
1447 phdr->p_filesz = sz;
1448 phdr->p_memsz = 0;
1449 phdr->p_flags = 0;
1450 phdr->p_align = 0;
1453 static void fill_note(struct memelfnote *note, const char *name, int type,
1454 unsigned int sz, void *data)
1456 note->name = name;
1457 note->type = type;
1458 note->datasz = sz;
1459 note->data = data;
1463 * fill up all the fields in prstatus from the given task struct, except
1464 * registers which need to be filled up separately.
1466 static void fill_prstatus(struct elf_prstatus *prstatus,
1467 struct task_struct *p, long signr)
1469 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1470 prstatus->pr_sigpend = p->pending.signal.sig[0];
1471 prstatus->pr_sighold = p->blocked.sig[0];
1472 rcu_read_lock();
1473 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1474 rcu_read_unlock();
1475 prstatus->pr_pid = task_pid_vnr(p);
1476 prstatus->pr_pgrp = task_pgrp_vnr(p);
1477 prstatus->pr_sid = task_session_vnr(p);
1478 if (thread_group_leader(p)) {
1479 struct task_cputime cputime;
1482 * This is the record for the group leader. It shows the
1483 * group-wide total, not its individual thread total.
1485 thread_group_cputime(p, &cputime);
1486 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1487 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1488 } else {
1489 u64 utime, stime;
1491 task_cputime(p, &utime, &stime);
1492 prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1493 prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1496 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1497 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1500 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1501 struct mm_struct *mm)
1503 const struct cred *cred;
1504 unsigned int i, len;
1506 /* first copy the parameters from user space */
1507 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1509 len = mm->arg_end - mm->arg_start;
1510 if (len >= ELF_PRARGSZ)
1511 len = ELF_PRARGSZ-1;
1512 if (copy_from_user(&psinfo->pr_psargs,
1513 (const char __user *)mm->arg_start, len))
1514 return -EFAULT;
1515 for(i = 0; i < len; i++)
1516 if (psinfo->pr_psargs[i] == 0)
1517 psinfo->pr_psargs[i] = ' ';
1518 psinfo->pr_psargs[len] = 0;
1520 rcu_read_lock();
1521 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1522 rcu_read_unlock();
1523 psinfo->pr_pid = task_pid_vnr(p);
1524 psinfo->pr_pgrp = task_pgrp_vnr(p);
1525 psinfo->pr_sid = task_session_vnr(p);
1527 i = p->state ? ffz(~p->state) + 1 : 0;
1528 psinfo->pr_state = i;
1529 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1530 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1531 psinfo->pr_nice = task_nice(p);
1532 psinfo->pr_flag = p->flags;
1533 rcu_read_lock();
1534 cred = __task_cred(p);
1535 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1536 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1537 rcu_read_unlock();
1538 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1540 return 0;
1543 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1545 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1546 int i = 0;
1548 i += 2;
1549 while (auxv[i - 2] != AT_NULL);
1550 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1553 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1554 const kernel_siginfo_t *siginfo)
1556 mm_segment_t old_fs = get_fs();
1557 set_fs(KERNEL_DS);
1558 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1559 set_fs(old_fs);
1560 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1563 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1565 * Format of NT_FILE note:
1567 * long count -- how many files are mapped
1568 * long page_size -- units for file_ofs
1569 * array of [COUNT] elements of
1570 * long start
1571 * long end
1572 * long file_ofs
1573 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1575 static int fill_files_note(struct memelfnote *note)
1577 struct vm_area_struct *vma;
1578 unsigned count, size, names_ofs, remaining, n;
1579 user_long_t *data;
1580 user_long_t *start_end_ofs;
1581 char *name_base, *name_curpos;
1583 /* *Estimated* file count and total data size needed */
1584 count = current->mm->map_count;
1585 if (count > UINT_MAX / 64)
1586 return -EINVAL;
1587 size = count * 64;
1589 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1590 alloc:
1591 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1592 return -EINVAL;
1593 size = round_up(size, PAGE_SIZE);
1594 data = kvmalloc(size, GFP_KERNEL);
1595 if (ZERO_OR_NULL_PTR(data))
1596 return -ENOMEM;
1598 start_end_ofs = data + 2;
1599 name_base = name_curpos = ((char *)data) + names_ofs;
1600 remaining = size - names_ofs;
1601 count = 0;
1602 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1603 struct file *file;
1604 const char *filename;
1606 file = vma->vm_file;
1607 if (!file)
1608 continue;
1609 filename = file_path(file, name_curpos, remaining);
1610 if (IS_ERR(filename)) {
1611 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1612 kvfree(data);
1613 size = size * 5 / 4;
1614 goto alloc;
1616 continue;
1619 /* file_path() fills at the end, move name down */
1620 /* n = strlen(filename) + 1: */
1621 n = (name_curpos + remaining) - filename;
1622 remaining = filename - name_curpos;
1623 memmove(name_curpos, filename, n);
1624 name_curpos += n;
1626 *start_end_ofs++ = vma->vm_start;
1627 *start_end_ofs++ = vma->vm_end;
1628 *start_end_ofs++ = vma->vm_pgoff;
1629 count++;
1632 /* Now we know exact count of files, can store it */
1633 data[0] = count;
1634 data[1] = PAGE_SIZE;
1636 * Count usually is less than current->mm->map_count,
1637 * we need to move filenames down.
1639 n = current->mm->map_count - count;
1640 if (n != 0) {
1641 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1642 memmove(name_base - shift_bytes, name_base,
1643 name_curpos - name_base);
1644 name_curpos -= shift_bytes;
1647 size = name_curpos - (char *)data;
1648 fill_note(note, "CORE", NT_FILE, size, data);
1649 return 0;
1652 #ifdef CORE_DUMP_USE_REGSET
1653 #include <linux/regset.h>
1655 struct elf_thread_core_info {
1656 struct elf_thread_core_info *next;
1657 struct task_struct *task;
1658 struct elf_prstatus prstatus;
1659 struct memelfnote notes[0];
1662 struct elf_note_info {
1663 struct elf_thread_core_info *thread;
1664 struct memelfnote psinfo;
1665 struct memelfnote signote;
1666 struct memelfnote auxv;
1667 struct memelfnote files;
1668 user_siginfo_t csigdata;
1669 size_t size;
1670 int thread_notes;
1674 * When a regset has a writeback hook, we call it on each thread before
1675 * dumping user memory. On register window machines, this makes sure the
1676 * user memory backing the register data is up to date before we read it.
1678 static void do_thread_regset_writeback(struct task_struct *task,
1679 const struct user_regset *regset)
1681 if (regset->writeback)
1682 regset->writeback(task, regset, 1);
1685 #ifndef PRSTATUS_SIZE
1686 #define PRSTATUS_SIZE(S, R) sizeof(S)
1687 #endif
1689 #ifndef SET_PR_FPVALID
1690 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1691 #endif
1693 static int fill_thread_core_info(struct elf_thread_core_info *t,
1694 const struct user_regset_view *view,
1695 long signr, size_t *total)
1697 unsigned int i;
1698 unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1701 * NT_PRSTATUS is the one special case, because the regset data
1702 * goes into the pr_reg field inside the note contents, rather
1703 * than being the whole note contents. We fill the reset in here.
1704 * We assume that regset 0 is NT_PRSTATUS.
1706 fill_prstatus(&t->prstatus, t->task, signr);
1707 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1708 &t->prstatus.pr_reg, NULL);
1710 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1711 PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1712 *total += notesize(&t->notes[0]);
1714 do_thread_regset_writeback(t->task, &view->regsets[0]);
1717 * Each other regset might generate a note too. For each regset
1718 * that has no core_note_type or is inactive, we leave t->notes[i]
1719 * all zero and we'll know to skip writing it later.
1721 for (i = 1; i < view->n; ++i) {
1722 const struct user_regset *regset = &view->regsets[i];
1723 do_thread_regset_writeback(t->task, regset);
1724 if (regset->core_note_type && regset->get &&
1725 (!regset->active || regset->active(t->task, regset) > 0)) {
1726 int ret;
1727 size_t size = regset_size(t->task, regset);
1728 void *data = kmalloc(size, GFP_KERNEL);
1729 if (unlikely(!data))
1730 return 0;
1731 ret = regset->get(t->task, regset,
1732 0, size, data, NULL);
1733 if (unlikely(ret))
1734 kfree(data);
1735 else {
1736 if (regset->core_note_type != NT_PRFPREG)
1737 fill_note(&t->notes[i], "LINUX",
1738 regset->core_note_type,
1739 size, data);
1740 else {
1741 SET_PR_FPVALID(&t->prstatus,
1742 1, regset0_size);
1743 fill_note(&t->notes[i], "CORE",
1744 NT_PRFPREG, size, data);
1746 *total += notesize(&t->notes[i]);
1751 return 1;
1754 static int fill_note_info(struct elfhdr *elf, int phdrs,
1755 struct elf_note_info *info,
1756 const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1758 struct task_struct *dump_task = current;
1759 const struct user_regset_view *view = task_user_regset_view(dump_task);
1760 struct elf_thread_core_info *t;
1761 struct elf_prpsinfo *psinfo;
1762 struct core_thread *ct;
1763 unsigned int i;
1765 info->size = 0;
1766 info->thread = NULL;
1768 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1769 if (psinfo == NULL) {
1770 info->psinfo.data = NULL; /* So we don't free this wrongly */
1771 return 0;
1774 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1777 * Figure out how many notes we're going to need for each thread.
1779 info->thread_notes = 0;
1780 for (i = 0; i < view->n; ++i)
1781 if (view->regsets[i].core_note_type != 0)
1782 ++info->thread_notes;
1785 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1786 * since it is our one special case.
1788 if (unlikely(info->thread_notes == 0) ||
1789 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1790 WARN_ON(1);
1791 return 0;
1795 * Initialize the ELF file header.
1797 fill_elf_header(elf, phdrs,
1798 view->e_machine, view->e_flags);
1801 * Allocate a structure for each thread.
1803 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1804 t = kzalloc(offsetof(struct elf_thread_core_info,
1805 notes[info->thread_notes]),
1806 GFP_KERNEL);
1807 if (unlikely(!t))
1808 return 0;
1810 t->task = ct->task;
1811 if (ct->task == dump_task || !info->thread) {
1812 t->next = info->thread;
1813 info->thread = t;
1814 } else {
1816 * Make sure to keep the original task at
1817 * the head of the list.
1819 t->next = info->thread->next;
1820 info->thread->next = t;
1825 * Now fill in each thread's information.
1827 for (t = info->thread; t != NULL; t = t->next)
1828 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1829 return 0;
1832 * Fill in the two process-wide notes.
1834 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1835 info->size += notesize(&info->psinfo);
1837 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1838 info->size += notesize(&info->signote);
1840 fill_auxv_note(&info->auxv, current->mm);
1841 info->size += notesize(&info->auxv);
1843 if (fill_files_note(&info->files) == 0)
1844 info->size += notesize(&info->files);
1846 return 1;
1849 static size_t get_note_info_size(struct elf_note_info *info)
1851 return info->size;
1855 * Write all the notes for each thread. When writing the first thread, the
1856 * process-wide notes are interleaved after the first thread-specific note.
1858 static int write_note_info(struct elf_note_info *info,
1859 struct coredump_params *cprm)
1861 bool first = true;
1862 struct elf_thread_core_info *t = info->thread;
1864 do {
1865 int i;
1867 if (!writenote(&t->notes[0], cprm))
1868 return 0;
1870 if (first && !writenote(&info->psinfo, cprm))
1871 return 0;
1872 if (first && !writenote(&info->signote, cprm))
1873 return 0;
1874 if (first && !writenote(&info->auxv, cprm))
1875 return 0;
1876 if (first && info->files.data &&
1877 !writenote(&info->files, cprm))
1878 return 0;
1880 for (i = 1; i < info->thread_notes; ++i)
1881 if (t->notes[i].data &&
1882 !writenote(&t->notes[i], cprm))
1883 return 0;
1885 first = false;
1886 t = t->next;
1887 } while (t);
1889 return 1;
1892 static void free_note_info(struct elf_note_info *info)
1894 struct elf_thread_core_info *threads = info->thread;
1895 while (threads) {
1896 unsigned int i;
1897 struct elf_thread_core_info *t = threads;
1898 threads = t->next;
1899 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1900 for (i = 1; i < info->thread_notes; ++i)
1901 kfree(t->notes[i].data);
1902 kfree(t);
1904 kfree(info->psinfo.data);
1905 kvfree(info->files.data);
1908 #else
1910 /* Here is the structure in which status of each thread is captured. */
1911 struct elf_thread_status
1913 struct list_head list;
1914 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1915 elf_fpregset_t fpu; /* NT_PRFPREG */
1916 struct task_struct *thread;
1917 #ifdef ELF_CORE_COPY_XFPREGS
1918 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1919 #endif
1920 struct memelfnote notes[3];
1921 int num_notes;
1925 * In order to add the specific thread information for the elf file format,
1926 * we need to keep a linked list of every threads pr_status and then create
1927 * a single section for them in the final core file.
1929 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1931 int sz = 0;
1932 struct task_struct *p = t->thread;
1933 t->num_notes = 0;
1935 fill_prstatus(&t->prstatus, p, signr);
1936 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1938 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1939 &(t->prstatus));
1940 t->num_notes++;
1941 sz += notesize(&t->notes[0]);
1943 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1944 &t->fpu))) {
1945 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1946 &(t->fpu));
1947 t->num_notes++;
1948 sz += notesize(&t->notes[1]);
1951 #ifdef ELF_CORE_COPY_XFPREGS
1952 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1953 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1954 sizeof(t->xfpu), &t->xfpu);
1955 t->num_notes++;
1956 sz += notesize(&t->notes[2]);
1958 #endif
1959 return sz;
1962 struct elf_note_info {
1963 struct memelfnote *notes;
1964 struct memelfnote *notes_files;
1965 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1966 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1967 struct list_head thread_list;
1968 elf_fpregset_t *fpu;
1969 #ifdef ELF_CORE_COPY_XFPREGS
1970 elf_fpxregset_t *xfpu;
1971 #endif
1972 user_siginfo_t csigdata;
1973 int thread_status_size;
1974 int numnote;
1977 static int elf_note_info_init(struct elf_note_info *info)
1979 memset(info, 0, sizeof(*info));
1980 INIT_LIST_HEAD(&info->thread_list);
1982 /* Allocate space for ELF notes */
1983 info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
1984 if (!info->notes)
1985 return 0;
1986 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1987 if (!info->psinfo)
1988 return 0;
1989 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1990 if (!info->prstatus)
1991 return 0;
1992 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1993 if (!info->fpu)
1994 return 0;
1995 #ifdef ELF_CORE_COPY_XFPREGS
1996 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1997 if (!info->xfpu)
1998 return 0;
1999 #endif
2000 return 1;
2003 static int fill_note_info(struct elfhdr *elf, int phdrs,
2004 struct elf_note_info *info,
2005 const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2007 struct core_thread *ct;
2008 struct elf_thread_status *ets;
2010 if (!elf_note_info_init(info))
2011 return 0;
2013 for (ct = current->mm->core_state->dumper.next;
2014 ct; ct = ct->next) {
2015 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2016 if (!ets)
2017 return 0;
2019 ets->thread = ct->task;
2020 list_add(&ets->list, &info->thread_list);
2023 list_for_each_entry(ets, &info->thread_list, list) {
2024 int sz;
2026 sz = elf_dump_thread_status(siginfo->si_signo, ets);
2027 info->thread_status_size += sz;
2029 /* now collect the dump for the current */
2030 memset(info->prstatus, 0, sizeof(*info->prstatus));
2031 fill_prstatus(info->prstatus, current, siginfo->si_signo);
2032 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2034 /* Set up header */
2035 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2038 * Set up the notes in similar form to SVR4 core dumps made
2039 * with info from their /proc.
2042 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2043 sizeof(*info->prstatus), info->prstatus);
2044 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2045 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2046 sizeof(*info->psinfo), info->psinfo);
2048 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2049 fill_auxv_note(info->notes + 3, current->mm);
2050 info->numnote = 4;
2052 if (fill_files_note(info->notes + info->numnote) == 0) {
2053 info->notes_files = info->notes + info->numnote;
2054 info->numnote++;
2057 /* Try to dump the FPU. */
2058 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2059 info->fpu);
2060 if (info->prstatus->pr_fpvalid)
2061 fill_note(info->notes + info->numnote++,
2062 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2063 #ifdef ELF_CORE_COPY_XFPREGS
2064 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2065 fill_note(info->notes + info->numnote++,
2066 "LINUX", ELF_CORE_XFPREG_TYPE,
2067 sizeof(*info->xfpu), info->xfpu);
2068 #endif
2070 return 1;
2073 static size_t get_note_info_size(struct elf_note_info *info)
2075 int sz = 0;
2076 int i;
2078 for (i = 0; i < info->numnote; i++)
2079 sz += notesize(info->notes + i);
2081 sz += info->thread_status_size;
2083 return sz;
2086 static int write_note_info(struct elf_note_info *info,
2087 struct coredump_params *cprm)
2089 struct elf_thread_status *ets;
2090 int i;
2092 for (i = 0; i < info->numnote; i++)
2093 if (!writenote(info->notes + i, cprm))
2094 return 0;
2096 /* write out the thread status notes section */
2097 list_for_each_entry(ets, &info->thread_list, list) {
2098 for (i = 0; i < ets->num_notes; i++)
2099 if (!writenote(&ets->notes[i], cprm))
2100 return 0;
2103 return 1;
2106 static void free_note_info(struct elf_note_info *info)
2108 while (!list_empty(&info->thread_list)) {
2109 struct list_head *tmp = info->thread_list.next;
2110 list_del(tmp);
2111 kfree(list_entry(tmp, struct elf_thread_status, list));
2114 /* Free data possibly allocated by fill_files_note(): */
2115 if (info->notes_files)
2116 kvfree(info->notes_files->data);
2118 kfree(info->prstatus);
2119 kfree(info->psinfo);
2120 kfree(info->notes);
2121 kfree(info->fpu);
2122 #ifdef ELF_CORE_COPY_XFPREGS
2123 kfree(info->xfpu);
2124 #endif
2127 #endif
2129 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2130 struct vm_area_struct *gate_vma)
2132 struct vm_area_struct *ret = tsk->mm->mmap;
2134 if (ret)
2135 return ret;
2136 return gate_vma;
2139 * Helper function for iterating across a vma list. It ensures that the caller
2140 * will visit `gate_vma' prior to terminating the search.
2142 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2143 struct vm_area_struct *gate_vma)
2145 struct vm_area_struct *ret;
2147 ret = this_vma->vm_next;
2148 if (ret)
2149 return ret;
2150 if (this_vma == gate_vma)
2151 return NULL;
2152 return gate_vma;
2155 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2156 elf_addr_t e_shoff, int segs)
2158 elf->e_shoff = e_shoff;
2159 elf->e_shentsize = sizeof(*shdr4extnum);
2160 elf->e_shnum = 1;
2161 elf->e_shstrndx = SHN_UNDEF;
2163 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2165 shdr4extnum->sh_type = SHT_NULL;
2166 shdr4extnum->sh_size = elf->e_shnum;
2167 shdr4extnum->sh_link = elf->e_shstrndx;
2168 shdr4extnum->sh_info = segs;
2172 * Actual dumper
2174 * This is a two-pass process; first we find the offsets of the bits,
2175 * and then they are actually written out. If we run out of core limit
2176 * we just truncate.
2178 static int elf_core_dump(struct coredump_params *cprm)
2180 int has_dumped = 0;
2181 mm_segment_t fs;
2182 int segs, i;
2183 size_t vma_data_size = 0;
2184 struct vm_area_struct *vma, *gate_vma;
2185 struct elfhdr *elf = NULL;
2186 loff_t offset = 0, dataoff;
2187 struct elf_note_info info = { };
2188 struct elf_phdr *phdr4note = NULL;
2189 struct elf_shdr *shdr4extnum = NULL;
2190 Elf_Half e_phnum;
2191 elf_addr_t e_shoff;
2192 elf_addr_t *vma_filesz = NULL;
2195 * We no longer stop all VM operations.
2197 * This is because those proceses that could possibly change map_count
2198 * or the mmap / vma pages are now blocked in do_exit on current
2199 * finishing this core dump.
2201 * Only ptrace can touch these memory addresses, but it doesn't change
2202 * the map_count or the pages allocated. So no possibility of crashing
2203 * exists while dumping the mm->vm_next areas to the core file.
2206 /* alloc memory for large data structures: too large to be on stack */
2207 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2208 if (!elf)
2209 goto out;
2211 * The number of segs are recored into ELF header as 16bit value.
2212 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2214 segs = current->mm->map_count;
2215 segs += elf_core_extra_phdrs();
2217 gate_vma = get_gate_vma(current->mm);
2218 if (gate_vma != NULL)
2219 segs++;
2221 /* for notes section */
2222 segs++;
2224 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2225 * this, kernel supports extended numbering. Have a look at
2226 * include/linux/elf.h for further information. */
2227 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2230 * Collect all the non-memory information about the process for the
2231 * notes. This also sets up the file header.
2233 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2234 goto cleanup;
2236 has_dumped = 1;
2238 fs = get_fs();
2239 set_fs(KERNEL_DS);
2241 offset += sizeof(*elf); /* Elf header */
2242 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2244 /* Write notes phdr entry */
2246 size_t sz = get_note_info_size(&info);
2248 sz += elf_coredump_extra_notes_size();
2250 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2251 if (!phdr4note)
2252 goto end_coredump;
2254 fill_elf_note_phdr(phdr4note, sz, offset);
2255 offset += sz;
2258 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2260 if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2261 goto end_coredump;
2262 vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2263 GFP_KERNEL);
2264 if (ZERO_OR_NULL_PTR(vma_filesz))
2265 goto end_coredump;
2267 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2268 vma = next_vma(vma, gate_vma)) {
2269 unsigned long dump_size;
2271 dump_size = vma_dump_size(vma, cprm->mm_flags);
2272 vma_filesz[i++] = dump_size;
2273 vma_data_size += dump_size;
2276 offset += vma_data_size;
2277 offset += elf_core_extra_data_size();
2278 e_shoff = offset;
2280 if (e_phnum == PN_XNUM) {
2281 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2282 if (!shdr4extnum)
2283 goto end_coredump;
2284 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2287 offset = dataoff;
2289 if (!dump_emit(cprm, elf, sizeof(*elf)))
2290 goto end_coredump;
2292 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2293 goto end_coredump;
2295 /* Write program headers for segments dump */
2296 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2297 vma = next_vma(vma, gate_vma)) {
2298 struct elf_phdr phdr;
2300 phdr.p_type = PT_LOAD;
2301 phdr.p_offset = offset;
2302 phdr.p_vaddr = vma->vm_start;
2303 phdr.p_paddr = 0;
2304 phdr.p_filesz = vma_filesz[i++];
2305 phdr.p_memsz = vma->vm_end - vma->vm_start;
2306 offset += phdr.p_filesz;
2307 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2308 if (vma->vm_flags & VM_WRITE)
2309 phdr.p_flags |= PF_W;
2310 if (vma->vm_flags & VM_EXEC)
2311 phdr.p_flags |= PF_X;
2312 phdr.p_align = ELF_EXEC_PAGESIZE;
2314 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2315 goto end_coredump;
2318 if (!elf_core_write_extra_phdrs(cprm, offset))
2319 goto end_coredump;
2321 /* write out the notes section */
2322 if (!write_note_info(&info, cprm))
2323 goto end_coredump;
2325 if (elf_coredump_extra_notes_write(cprm))
2326 goto end_coredump;
2328 /* Align to page */
2329 if (!dump_skip(cprm, dataoff - cprm->pos))
2330 goto end_coredump;
2332 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2333 vma = next_vma(vma, gate_vma)) {
2334 unsigned long addr;
2335 unsigned long end;
2337 end = vma->vm_start + vma_filesz[i++];
2339 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2340 struct page *page;
2341 int stop;
2343 page = get_dump_page(addr);
2344 if (page) {
2345 void *kaddr = kmap(page);
2346 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2347 kunmap(page);
2348 put_page(page);
2349 } else
2350 stop = !dump_skip(cprm, PAGE_SIZE);
2351 if (stop)
2352 goto end_coredump;
2355 dump_truncate(cprm);
2357 if (!elf_core_write_extra_data(cprm))
2358 goto end_coredump;
2360 if (e_phnum == PN_XNUM) {
2361 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2362 goto end_coredump;
2365 end_coredump:
2366 set_fs(fs);
2368 cleanup:
2369 free_note_info(&info);
2370 kfree(shdr4extnum);
2371 kvfree(vma_filesz);
2372 kfree(phdr4note);
2373 kfree(elf);
2374 out:
2375 return has_dumped;
2378 #endif /* CONFIG_ELF_CORE */
2380 static int __init init_elf_binfmt(void)
2382 register_binfmt(&elf_format);
2383 return 0;
2386 static void __exit exit_elf_binfmt(void)
2388 /* Remove the COFF and ELF loaders. */
2389 unregister_binfmt(&elf_format);
2392 core_initcall(init_elf_binfmt);
2393 module_exit(exit_elf_binfmt);
2394 MODULE_LICENSE("GPL");