Import 2.3.11pre8
[davej-history.git] / fs / exec.c
blob131a15ab538b87bba7388e7aad2496890c700b5b
1 /*
2 * linux/fs/exec.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * #!-checking implemented by tytso.
9 */
11 * Demand-loading implemented 01.12.91 - no need to read anything but
12 * the header into memory. The inode of the executable is put into
13 * "current->executable", and page faults do the actual loading. Clean.
15 * Once more I can proudly say that linux stood up to being changed: it
16 * was less than 2 hours work to get demand-loading completely implemented.
18 * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary
22 * formats.
25 #include <linux/config.h>
26 #include <linux/slab.h>
27 #include <linux/file.h>
28 #include <linux/mman.h>
29 #include <linux/a.out.h>
30 #include <linux/stat.h>
31 #include <linux/fcntl.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
35 #include <asm/uaccess.h>
36 #include <asm/pgtable.h>
37 #include <asm/mmu_context.h>
39 #ifdef CONFIG_KMOD
40 #include <linux/kmod.h>
41 #endif
44 * Here are the actual binaries that will be accepted:
45 * add more with "register_binfmt()" if using modules...
47 * These are defined again for the 'real' modules if you are using a
48 * module definition for these routines.
51 static struct linux_binfmt *formats = (struct linux_binfmt *) NULL;
53 void __init binfmt_setup(void)
55 #ifdef CONFIG_BINFMT_MISC
56 init_misc_binfmt();
57 #endif
59 #ifdef CONFIG_BINFMT_ELF
60 init_elf_binfmt();
61 #endif
63 #ifdef CONFIG_BINFMT_ELF32
64 init_elf32_binfmt();
65 #endif
67 #ifdef CONFIG_BINFMT_AOUT
68 init_aout_binfmt();
69 #endif
71 #ifdef CONFIG_BINFMT_AOUT32
72 init_aout32_binfmt();
73 #endif
75 #ifdef CONFIG_BINFMT_EM86
76 init_em86_binfmt();
77 #endif
79 /* This cannot be configured out of the kernel */
80 init_script_binfmt();
83 int register_binfmt(struct linux_binfmt * fmt)
85 struct linux_binfmt ** tmp = &formats;
87 if (!fmt)
88 return -EINVAL;
89 if (fmt->next)
90 return -EBUSY;
91 while (*tmp) {
92 if (fmt == *tmp)
93 return -EBUSY;
94 tmp = &(*tmp)->next;
96 fmt->next = formats;
97 formats = fmt;
98 return 0;
101 #ifdef CONFIG_MODULES
102 int unregister_binfmt(struct linux_binfmt * fmt)
104 struct linux_binfmt ** tmp = &formats;
106 while (*tmp) {
107 if (fmt == *tmp) {
108 *tmp = fmt->next;
109 return 0;
111 tmp = &(*tmp)->next;
113 return -EINVAL;
115 #endif /* CONFIG_MODULES */
117 /* N.B. Error returns must be < 0 */
118 int open_dentry(struct dentry * dentry, int mode)
120 struct inode * inode = dentry->d_inode;
121 struct file * f;
122 struct list_head * l = NULL;
123 int fd, error;
125 if (inode->i_sb)
126 l = &inode->i_sb->s_files;
128 error = -EINVAL;
129 if (!inode->i_op || !inode->i_op->default_file_ops)
130 goto out;
131 fd = get_unused_fd();
132 if (fd >= 0) {
133 error = -ENFILE;
134 f = get_empty_filp();
135 if (!f)
136 goto out_fd;
137 f->f_flags = mode;
138 f->f_mode = (mode+1) & O_ACCMODE;
139 f->f_dentry = dentry;
140 f->f_pos = 0;
141 f->f_reada = 0;
142 f->f_op = inode->i_op->default_file_ops;
143 if (f->f_op->open) {
144 error = f->f_op->open(inode,f);
145 if (error)
146 goto out_filp;
148 file_move(f, l);
149 fd_install(fd, f);
150 dget(dentry);
152 return fd;
154 out_filp:
155 if (error > 0)
156 error = -EIO;
157 put_filp(f);
158 out_fd:
159 put_unused_fd(fd);
160 out:
161 return error;
165 * Note that a shared library must be both readable and executable due to
166 * security reasons.
168 * Also note that we take the address to load from from the file itself.
170 asmlinkage int sys_uselib(const char * library)
172 int fd, retval;
173 struct file * file;
174 struct linux_binfmt * fmt;
176 lock_kernel();
177 fd = sys_open(library, 0, 0);
178 retval = fd;
179 if (fd < 0)
180 goto out;
181 file = fget(fd);
182 retval = -ENOEXEC;
183 if (file && file->f_dentry && file->f_op && file->f_op->read) {
184 for (fmt = formats ; fmt ; fmt = fmt->next) {
185 int (*fn)(int) = fmt->load_shlib;
186 if (!fn)
187 continue;
188 /* N.B. Should use file instead of fd */
189 retval = fn(fd);
190 if (retval != -ENOEXEC)
191 break;
194 fput(file);
195 sys_close(fd);
196 out:
197 unlock_kernel();
198 return retval;
202 * count() counts the number of arguments/envelopes
204 static int count(char ** argv)
206 int i = 0;
208 if (argv != NULL) {
209 for (;;) {
210 char * p;
211 int error;
213 error = get_user(p,argv);
214 if (error)
215 return error;
216 if (!p)
217 break;
218 argv++;
219 i++;
222 return i;
226 * 'copy_strings()' copies argument/envelope strings from user
227 * memory to free pages in kernel mem. These are in a format ready
228 * to be put directly into the top of new user memory.
230 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
232 while (argc-- > 0) {
233 char *str;
234 int len;
235 unsigned long pos;
237 if (get_user(str, argv+argc) || !str || !(len = strlen_user(str)))
238 return -EFAULT;
239 if (bprm->p < len)
240 return -E2BIG;
242 bprm->p -= len;
243 /* XXX: add architecture specific overflow check here. */
245 pos = bprm->p;
246 while (len) {
247 char *pag;
248 int offset, bytes_to_copy;
250 offset = pos % PAGE_SIZE;
251 if (!(pag = (char *) bprm->page[pos/PAGE_SIZE]) &&
252 !(pag = (char *) bprm->page[pos/PAGE_SIZE] =
253 (unsigned long *) get_free_page(GFP_USER)))
254 return -ENOMEM;
256 bytes_to_copy = PAGE_SIZE - offset;
257 if (bytes_to_copy > len)
258 bytes_to_copy = len;
259 if (copy_from_user(pag + offset, str, bytes_to_copy))
260 return -EFAULT;
262 pos += bytes_to_copy;
263 str += bytes_to_copy;
264 len -= bytes_to_copy;
267 return 0;
271 * Like copy_strings, but get argv and its values from kernel memory.
273 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
275 int r;
276 mm_segment_t oldfs = get_fs();
277 set_fs(KERNEL_DS);
278 r = copy_strings(argc, argv, bprm);
279 set_fs(oldfs);
280 return r;
283 int setup_arg_pages(struct linux_binprm *bprm)
285 unsigned long stack_base;
286 struct vm_area_struct *mpnt;
287 int i;
289 stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
291 bprm->p += stack_base;
292 if (bprm->loader)
293 bprm->loader += stack_base;
294 bprm->exec += stack_base;
296 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
297 if (!mpnt)
298 return -ENOMEM;
301 mpnt->vm_mm = current->mm;
302 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
303 mpnt->vm_end = STACK_TOP;
304 mpnt->vm_page_prot = PAGE_COPY;
305 mpnt->vm_flags = VM_STACK_FLAGS;
306 mpnt->vm_ops = NULL;
307 mpnt->vm_offset = 0;
308 mpnt->vm_file = NULL;
309 mpnt->vm_pte = 0;
310 insert_vm_struct(current->mm, mpnt);
311 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
314 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
315 if (bprm->page[i]) {
316 current->mm->rss++;
317 put_dirty_page(current,bprm->page[i],stack_base);
319 stack_base += PAGE_SIZE;
322 return 0;
326 * Read in the complete executable. This is used for "-N" files
327 * that aren't on a block boundary, and for files on filesystems
328 * without get_block support.
330 int read_exec(struct dentry *dentry, unsigned long offset,
331 char * addr, unsigned long count, int to_kmem)
333 struct file file;
334 struct inode * inode = dentry->d_inode;
335 int result = -ENOEXEC;
337 if (!inode->i_op || !inode->i_op->default_file_ops)
338 goto end_readexec;
339 if (init_private_file(&file, dentry, 1))
340 goto end_readexec;
341 if (!file.f_op->read)
342 goto close_readexec;
343 if (file.f_op->llseek) {
344 if (file.f_op->llseek(&file,offset,0) != offset)
345 goto close_readexec;
346 } else
347 file.f_pos = offset;
348 if (to_kmem) {
349 mm_segment_t old_fs = get_fs();
350 set_fs(get_ds());
351 result = file.f_op->read(&file, addr, count, &file.f_pos);
352 set_fs(old_fs);
353 } else {
354 result = verify_area(VERIFY_WRITE, addr, count);
355 if (result)
356 goto close_readexec;
357 result = file.f_op->read(&file, addr, count, &file.f_pos);
359 close_readexec:
360 if (file.f_op->release)
361 file.f_op->release(inode,&file);
362 end_readexec:
363 return result;
366 static int exec_mmap(void)
368 struct mm_struct * mm, * old_mm, * active_mm;
370 old_mm = current->mm;
371 if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
372 flush_cache_mm(old_mm);
373 mm_release();
374 release_segments(old_mm);
375 exit_mmap(old_mm);
376 flush_tlb_mm(old_mm);
377 return 0;
380 mm = mm_alloc();
381 if (mm) {
382 mm->cpu_vm_mask = (1UL << smp_processor_id());
383 mm->total_vm = 0;
384 mm->rss = 0;
385 mm->pgd = pgd_alloc();
386 if (mm->pgd) {
387 struct mm_struct *active_mm = current->active_mm;
389 current->mm = mm;
390 current->active_mm = mm;
391 SET_PAGE_DIR(current, mm->pgd);
392 activate_context(current);
393 mm_release();
394 if (old_mm) {
395 mmput(old_mm);
396 return 0;
398 mmdrop(active_mm);
399 return 0;
401 kmem_cache_free(mm_cachep, mm);
403 return -ENOMEM;
407 * This function makes sure the current process has its own signal table,
408 * so that flush_signal_handlers can later reset the handlers without
409 * disturbing other processes. (Other processes might share the signal
410 * table via the CLONE_SIGHAND option to clone().)
413 static inline int make_private_signals(void)
415 struct signal_struct * newsig;
417 if (atomic_read(&current->sig->count) <= 1)
418 return 0;
419 newsig = kmalloc(sizeof(*newsig), GFP_KERNEL);
420 if (newsig == NULL)
421 return -ENOMEM;
422 spin_lock_init(&newsig->siglock);
423 atomic_set(&newsig->count, 1);
424 memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
425 current->sig = newsig;
426 return 0;
430 * If make_private_signals() made a copy of the signal table, decrement the
431 * refcount of the original table, and free it if necessary.
432 * We don't do that in make_private_signals() so that we can back off
433 * in flush_old_exec() if an error occurs after calling make_private_signals().
436 static inline void release_old_signals(struct signal_struct * oldsig)
438 if (current->sig == oldsig)
439 return;
440 if (atomic_dec_and_test(&oldsig->count))
441 kfree(oldsig);
445 * These functions flushes out all traces of the currently running executable
446 * so that a new one can be started
449 static inline void flush_old_files(struct files_struct * files)
451 unsigned long j;
453 j = 0;
454 for (;;) {
455 unsigned long set, i;
457 i = j * __NFDBITS;
458 if (i >= files->max_fds)
459 break;
460 set = xchg(&files->close_on_exec.fds_bits[j], 0);
461 j++;
462 for ( ; set ; i++,set >>= 1) {
463 if (set & 1)
464 sys_close(i);
469 int flush_old_exec(struct linux_binprm * bprm)
471 char * name;
472 int i, ch, retval;
473 struct signal_struct * oldsig;
476 * Make sure we have a private signal table
478 oldsig = current->sig;
479 retval = make_private_signals();
480 if (retval) goto flush_failed;
483 * Release all of the old mmap stuff
485 retval = exec_mmap();
486 if (retval) goto mmap_failed;
488 /* This is the point of no return */
489 release_old_signals(oldsig);
491 if (current->euid == current->uid && current->egid == current->gid)
492 current->dumpable = 1;
493 name = bprm->filename;
494 for (i=0; (ch = *(name++)) != '\0';) {
495 if (ch == '/')
496 i = 0;
497 else
498 if (i < 15)
499 current->comm[i++] = ch;
501 current->comm[i] = '\0';
503 flush_thread();
505 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
506 permission(bprm->dentry->d_inode,MAY_READ))
507 current->dumpable = 0;
509 flush_signal_handlers(current);
510 flush_old_files(current->files);
512 return 0;
514 mmap_failed:
515 if (current->sig != oldsig)
516 kfree(current->sig);
517 flush_failed:
518 current->sig = oldsig;
519 return retval;
523 * We mustn't allow tracing of suid binaries, unless
524 * the tracer has the capability to trace anything..
526 static inline int must_not_trace_exec(struct task_struct * p)
528 return (p->flags & PF_PTRACED) && !cap_raised(p->p_pptr->cap_effective, CAP_SYS_PTRACE);
532 * Fill the binprm structure from the inode.
533 * Check permissions, then read the first 512 bytes
535 int prepare_binprm(struct linux_binprm *bprm)
537 int mode;
538 int retval,id_change,cap_raised;
539 struct inode * inode = bprm->dentry->d_inode;
541 mode = inode->i_mode;
542 if (!S_ISREG(mode)) /* must be regular file */
543 return -EACCES;
544 if (!(mode & 0111)) /* with at least _one_ execute bit set */
545 return -EACCES;
546 if (IS_NOEXEC(inode)) /* FS mustn't be mounted noexec */
547 return -EACCES;
548 if (!inode->i_sb)
549 return -EACCES;
550 if ((retval = permission(inode, MAY_EXEC)) != 0)
551 return retval;
552 /* better not execute files which are being written to */
553 if (atomic_read(&inode->i_writecount) > 0)
554 return -ETXTBSY;
556 bprm->e_uid = current->euid;
557 bprm->e_gid = current->egid;
558 id_change = cap_raised = 0;
560 /* Set-uid? */
561 if (mode & S_ISUID) {
562 bprm->e_uid = inode->i_uid;
563 if (bprm->e_uid != current->euid)
564 id_change = 1;
567 /* Set-gid? */
569 * If setgid is set but no group execute bit then this
570 * is a candidate for mandatory locking, not a setgid
571 * executable.
573 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
574 bprm->e_gid = inode->i_gid;
575 if (!in_group_p(bprm->e_gid))
576 id_change = 1;
579 /* We don't have VFS support for capabilities yet */
580 cap_clear(bprm->cap_inheritable);
581 cap_clear(bprm->cap_permitted);
582 cap_clear(bprm->cap_effective);
584 /* To support inheritance of root-permissions and suid-root
585 * executables under compatibility mode, we raise the
586 * effective and inherited bitmasks of the executable file
587 * (translation: we set the executable "capability dumb" and
588 * set the allowed set to maximum). We don't set any forced
589 * bits.
591 * If only the real uid is 0, we only raise the inheritable
592 * bitmask of the executable file (translation: we set the
593 * allowed set to maximum and the application to "capability
594 * smart").
597 if (!issecure(SECURE_NOROOT)) {
598 if (bprm->e_uid == 0 || current->uid == 0)
599 cap_set_full(bprm->cap_inheritable);
600 if (bprm->e_uid == 0)
601 cap_set_full(bprm->cap_effective);
604 /* Only if pP' is _not_ a subset of pP, do we consider there
605 * has been a capability related "change of capability". In
606 * such cases, we need to check that the elevation of
607 * privilege does not go against other system constraints.
608 * The new Permitted set is defined below -- see (***). */
610 kernel_cap_t working =
611 cap_combine(bprm->cap_permitted,
612 cap_intersect(bprm->cap_inheritable,
613 current->cap_inheritable));
614 if (!cap_issubset(working, current->cap_permitted)) {
615 cap_raised = 1;
619 if (id_change || cap_raised) {
620 /* We can't suid-execute if we're sharing parts of the executable */
621 /* or if we're being traced (or if suid execs are not allowed) */
622 /* (current->mm->mm_users > 1 is ok, as we'll get a new mm anyway) */
623 if (IS_NOSUID(inode)
624 || must_not_trace_exec(current)
625 || (atomic_read(&current->fs->count) > 1)
626 || (atomic_read(&current->sig->count) > 1)
627 || (atomic_read(&current->files->count) > 1)) {
628 if (id_change && !capable(CAP_SETUID))
629 return -EPERM;
630 if (cap_raised && !capable(CAP_SETPCAP))
631 return -EPERM;
635 memset(bprm->buf,0,sizeof(bprm->buf));
636 return read_exec(bprm->dentry,0,bprm->buf,128,1);
640 * This function is used to produce the new IDs and capabilities
641 * from the old ones and the file's capabilities.
643 * The formula used for evolving capabilities is:
645 * pI' = pI
646 * (***) pP' = fP | (fI & pI)
647 * pE' = pP' & fE [NB. fE is 0 or ~0]
649 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
650 * ' indicates post-exec().
653 void compute_creds(struct linux_binprm *bprm)
655 int new_permitted = cap_t(bprm->cap_permitted) |
656 (cap_t(bprm->cap_inheritable) &
657 cap_t(current->cap_inheritable));
659 /* For init, we want to retain the capabilities set
660 * in the init_task struct. Thus we skip the usual
661 * capability rules */
662 if (current->pid != 1) {
663 cap_t(current->cap_permitted) = new_permitted;
664 cap_t(current->cap_effective) = new_permitted &
665 cap_t(bprm->cap_effective);
668 /* AUD: Audit candidate if current->cap_effective is set */
670 current->suid = current->euid = current->fsuid = bprm->e_uid;
671 current->sgid = current->egid = current->fsgid = bprm->e_gid;
672 if (current->euid != current->uid || current->egid != current->gid ||
673 !cap_issubset(new_permitted, current->cap_permitted))
674 current->dumpable = 0;
678 void remove_arg_zero(struct linux_binprm *bprm)
680 if (bprm->argc) {
681 unsigned long offset;
682 char * page;
683 offset = bprm->p % PAGE_SIZE;
684 page = (char*)bprm->page[bprm->p/PAGE_SIZE];
685 while(bprm->p++,*(page+offset++))
686 if(offset==PAGE_SIZE){
687 offset=0;
688 page = (char*)bprm->page[bprm->p/PAGE_SIZE];
690 bprm->argc--;
695 * cycle the list of binary formats handler, until one recognizes the image
697 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
699 int try,retval=0;
700 struct linux_binfmt *fmt;
701 #ifdef __alpha__
702 /* handle /sbin/loader.. */
704 struct exec * eh = (struct exec *) bprm->buf;
705 struct linux_binprm bprm_loader;
707 if (!bprm->loader && eh->fh.f_magic == 0x183 &&
708 (eh->fh.f_flags & 0x3000) == 0x3000)
710 int i;
711 char * dynloader[] = { "/sbin/loader" };
712 struct dentry * dentry;
714 dput(bprm->dentry);
715 bprm->dentry = NULL;
717 bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
718 for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
719 bprm_loader.page[i] = 0;
721 dentry = open_namei(dynloader[0], 0, 0);
722 retval = PTR_ERR(dentry);
723 if (IS_ERR(dentry))
724 return retval;
725 bprm->dentry = dentry;
726 bprm->loader = bprm_loader.p;
727 retval = prepare_binprm(bprm);
728 if (retval<0)
729 return retval;
730 /* should call search_binary_handler recursively here,
731 but it does not matter */
734 #endif
735 for (try=0; try<2; try++) {
736 for (fmt = formats ; fmt ; fmt = fmt->next) {
737 int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
738 if (!fn)
739 continue;
740 retval = fn(bprm, regs);
741 if (retval >= 0) {
742 if (bprm->dentry)
743 dput(bprm->dentry);
744 bprm->dentry = NULL;
745 current->did_exec = 1;
746 return retval;
748 if (retval != -ENOEXEC)
749 break;
750 if (!bprm->dentry) /* We don't have the dentry anymore */
751 return retval;
753 if (retval != -ENOEXEC) {
754 break;
755 #ifdef CONFIG_KMOD
756 }else{
757 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
758 char modname[20];
759 if (printable(bprm->buf[0]) &&
760 printable(bprm->buf[1]) &&
761 printable(bprm->buf[2]) &&
762 printable(bprm->buf[3]))
763 break; /* -ENOEXEC */
764 sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
765 request_module(modname);
766 #endif
769 return retval;
774 * sys_execve() executes a new program.
776 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
778 struct linux_binprm bprm;
779 struct dentry * dentry;
780 int retval;
781 int i;
783 bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
784 memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));
786 dentry = open_namei(filename, 0, 0);
787 retval = PTR_ERR(dentry);
788 if (IS_ERR(dentry))
789 return retval;
791 bprm.dentry = dentry;
792 bprm.filename = filename;
793 bprm.sh_bang = 0;
794 bprm.loader = 0;
795 bprm.exec = 0;
796 if ((bprm.argc = count(argv)) < 0) {
797 dput(dentry);
798 return bprm.argc;
801 if ((bprm.envc = count(envp)) < 0) {
802 dput(dentry);
803 return bprm.envc;
806 retval = prepare_binprm(&bprm);
807 if (retval < 0)
808 goto out;
810 retval = copy_strings_kernel(1, &bprm.filename, &bprm);
811 if (retval < 0)
812 goto out;
814 bprm.exec = bprm.p;
815 retval = copy_strings(bprm.envc, envp, &bprm);
816 if (retval < 0)
817 goto out;
819 retval = copy_strings(bprm.argc, argv, &bprm);
820 if (retval < 0)
821 goto out;
823 retval = search_binary_handler(&bprm,regs);
824 if (retval >= 0)
825 /* execve success */
826 return retval;
828 out:
829 /* Something went wrong, return the inode and free the argument pages*/
830 if (bprm.dentry)
831 dput(bprm.dentry);
833 /* Assumes that free_page() can take a NULL argument. */
834 /* I hope this is ok for all architectures */
835 for (i=0 ; i<MAX_ARG_PAGES ; i++)
836 free_page(bprm.page[i]);
838 return retval;