Import 2.3.9pre5
[davej-history.git] / fs / exec.c
blob83b1834de5db67523ee726bd6a581a37030d6cb8
1 /*
2 * linux/fs/exec.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * #!-checking implemented by tytso.
9 */
11 * Demand-loading implemented 01.12.91 - no need to read anything but
12 * the header into memory. The inode of the executable is put into
13 * "current->executable", and page faults do the actual loading. Clean.
15 * Once more I can proudly say that linux stood up to being changed: it
16 * was less than 2 hours work to get demand-loading completely implemented.
18 * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary
22 * formats.
25 #include <linux/config.h>
26 #include <linux/slab.h>
27 #include <linux/file.h>
28 #include <linux/mman.h>
29 #include <linux/a.out.h>
30 #include <linux/stat.h>
31 #include <linux/fcntl.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
35 #include <asm/uaccess.h>
36 #include <asm/pgtable.h>
37 #include <asm/mmu_context.h>
39 #ifdef CONFIG_KMOD
40 #include <linux/kmod.h>
41 #endif
44 * Here are the actual binaries that will be accepted:
45 * add more with "register_binfmt()" if using modules...
47 * These are defined again for the 'real' modules if you are using a
48 * module definition for these routines.
51 static struct linux_binfmt *formats = (struct linux_binfmt *) NULL;
53 void __init binfmt_setup(void)
55 #ifdef CONFIG_BINFMT_MISC
56 init_misc_binfmt();
57 #endif
59 #ifdef CONFIG_BINFMT_ELF
60 init_elf_binfmt();
61 #endif
63 #ifdef CONFIG_BINFMT_ELF32
64 init_elf32_binfmt();
65 #endif
67 #ifdef CONFIG_BINFMT_AOUT
68 init_aout_binfmt();
69 #endif
71 #ifdef CONFIG_BINFMT_AOUT32
72 init_aout32_binfmt();
73 #endif
75 #ifdef CONFIG_BINFMT_EM86
76 init_em86_binfmt();
77 #endif
79 /* This cannot be configured out of the kernel */
80 init_script_binfmt();
83 int register_binfmt(struct linux_binfmt * fmt)
85 struct linux_binfmt ** tmp = &formats;
87 if (!fmt)
88 return -EINVAL;
89 if (fmt->next)
90 return -EBUSY;
91 while (*tmp) {
92 if (fmt == *tmp)
93 return -EBUSY;
94 tmp = &(*tmp)->next;
96 fmt->next = formats;
97 formats = fmt;
98 return 0;
101 #ifdef CONFIG_MODULES
102 int unregister_binfmt(struct linux_binfmt * fmt)
104 struct linux_binfmt ** tmp = &formats;
106 while (*tmp) {
107 if (fmt == *tmp) {
108 *tmp = fmt->next;
109 return 0;
111 tmp = &(*tmp)->next;
113 return -EINVAL;
115 #endif /* CONFIG_MODULES */
117 /* N.B. Error returns must be < 0 */
118 int open_dentry(struct dentry * dentry, int mode)
120 struct inode * inode = dentry->d_inode;
121 struct file * f;
122 int fd, error;
124 error = -EINVAL;
125 if (!inode->i_op || !inode->i_op->default_file_ops)
126 goto out;
127 fd = get_unused_fd();
128 if (fd >= 0) {
129 error = -ENFILE;
130 f = get_empty_filp();
131 if (!f)
132 goto out_fd;
133 f->f_flags = mode;
134 f->f_mode = (mode+1) & O_ACCMODE;
135 f->f_dentry = dentry;
136 f->f_pos = 0;
137 f->f_reada = 0;
138 f->f_op = inode->i_op->default_file_ops;
139 if (f->f_op->open) {
140 error = f->f_op->open(inode,f);
141 if (error)
142 goto out_filp;
144 fd_install(fd, f);
145 dget(dentry);
147 return fd;
149 out_filp:
150 if (error > 0)
151 error = -EIO;
152 put_filp(f);
153 out_fd:
154 put_unused_fd(fd);
155 out:
156 return error;
160 * Note that a shared library must be both readable and executable due to
161 * security reasons.
163 * Also note that we take the address to load from from the file itself.
165 asmlinkage int sys_uselib(const char * library)
167 int fd, retval;
168 struct file * file;
169 struct linux_binfmt * fmt;
171 lock_kernel();
172 fd = sys_open(library, 0, 0);
173 retval = fd;
174 if (fd < 0)
175 goto out;
176 file = fget(fd);
177 retval = -ENOEXEC;
178 if (file && file->f_dentry && file->f_op && file->f_op->read) {
179 for (fmt = formats ; fmt ; fmt = fmt->next) {
180 int (*fn)(int) = fmt->load_shlib;
181 if (!fn)
182 continue;
183 /* N.B. Should use file instead of fd */
184 retval = fn(fd);
185 if (retval != -ENOEXEC)
186 break;
189 fput(file);
190 sys_close(fd);
191 out:
192 unlock_kernel();
193 return retval;
197 * count() counts the number of arguments/envelopes
199 static int count(char ** argv)
201 int i = 0;
203 if (argv != NULL) {
204 for (;;) {
205 char * p;
206 int error;
208 error = get_user(p,argv);
209 if (error)
210 return error;
211 if (!p)
212 break;
213 argv++;
214 i++;
217 return i;
221 * 'copy_strings()' copies argument/envelope strings from user
222 * memory to free pages in kernel mem. These are in a format ready
223 * to be put directly into the top of new user memory.
225 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
227 while (argc-- > 0) {
228 char *str;
229 int len;
230 unsigned long pos;
232 if (get_user(str, argv+argc) || !str || !(len = strlen_user(str)))
233 return -EFAULT;
234 if (bprm->p < len)
235 return -E2BIG;
237 bprm->p -= len;
238 /* XXX: add architecture specific overflow check here. */
240 pos = bprm->p;
241 while (len) {
242 char *pag;
243 int offset, bytes_to_copy;
245 offset = pos % PAGE_SIZE;
246 if (!(pag = (char *) bprm->page[pos/PAGE_SIZE]) &&
247 !(pag = (char *) bprm->page[pos/PAGE_SIZE] =
248 (unsigned long *) get_free_page(GFP_USER)))
249 return -ENOMEM;
251 bytes_to_copy = PAGE_SIZE - offset;
252 if (bytes_to_copy > len)
253 bytes_to_copy = len;
254 if (copy_from_user(pag + offset, str, bytes_to_copy))
255 return -EFAULT;
257 pos += bytes_to_copy;
258 str += bytes_to_copy;
259 len -= bytes_to_copy;
262 return 0;
266 * Like copy_strings, but get argv and its values from kernel memory.
268 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
270 int r;
271 mm_segment_t oldfs = get_fs();
272 set_fs(KERNEL_DS);
273 r = copy_strings(argc, argv, bprm);
274 set_fs(oldfs);
275 return r;
278 int setup_arg_pages(struct linux_binprm *bprm)
280 unsigned long stack_base;
281 struct vm_area_struct *mpnt;
282 int i;
284 stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
286 bprm->p += stack_base;
287 if (bprm->loader)
288 bprm->loader += stack_base;
289 bprm->exec += stack_base;
291 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
292 if (!mpnt)
293 return -ENOMEM;
296 mpnt->vm_mm = current->mm;
297 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
298 mpnt->vm_end = STACK_TOP;
299 mpnt->vm_page_prot = PAGE_COPY;
300 mpnt->vm_flags = VM_STACK_FLAGS;
301 mpnt->vm_ops = NULL;
302 mpnt->vm_offset = 0;
303 mpnt->vm_file = NULL;
304 mpnt->vm_pte = 0;
305 insert_vm_struct(current->mm, mpnt);
306 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
309 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
310 if (bprm->page[i]) {
311 current->mm->rss++;
312 put_dirty_page(current,bprm->page[i],stack_base);
314 stack_base += PAGE_SIZE;
317 return 0;
321 * Read in the complete executable. This is used for "-N" files
322 * that aren't on a block boundary, and for files on filesystems
323 * without get_block support.
325 int read_exec(struct dentry *dentry, unsigned long offset,
326 char * addr, unsigned long count, int to_kmem)
328 struct file file;
329 struct inode * inode = dentry->d_inode;
330 int result = -ENOEXEC;
332 if (!inode->i_op || !inode->i_op->default_file_ops)
333 goto end_readexec;
334 if (init_private_file(&file, dentry, 1))
335 goto end_readexec;
336 if (!file.f_op->read)
337 goto close_readexec;
338 if (file.f_op->llseek) {
339 if (file.f_op->llseek(&file,offset,0) != offset)
340 goto close_readexec;
341 } else
342 file.f_pos = offset;
343 if (to_kmem) {
344 mm_segment_t old_fs = get_fs();
345 set_fs(get_ds());
346 result = file.f_op->read(&file, addr, count, &file.f_pos);
347 set_fs(old_fs);
348 } else {
349 result = verify_area(VERIFY_WRITE, addr, count);
350 if (result)
351 goto close_readexec;
352 result = file.f_op->read(&file, addr, count, &file.f_pos);
354 close_readexec:
355 if (file.f_op->release)
356 file.f_op->release(inode,&file);
357 end_readexec:
358 return result;
361 static int exec_mmap(void)
363 struct mm_struct * mm, * old_mm;
364 int retval, nr;
366 if (atomic_read(&current->mm->count) == 1) {
367 flush_cache_mm(current->mm);
368 mm_release();
369 release_segments(current->mm);
370 exit_mmap(current->mm);
371 flush_tlb_mm(current->mm);
372 return 0;
375 retval = -ENOMEM;
376 mm = mm_alloc();
377 if (!mm)
378 goto fail_nomem;
380 mm->cpu_vm_mask = (1UL << smp_processor_id());
381 mm->total_vm = 0;
382 mm->rss = 0;
384 * Make sure we have a private ldt if needed ...
386 nr = current->tarray_ptr - &task[0];
387 copy_segments(nr, current, mm);
389 old_mm = current->mm;
390 current->mm = mm;
391 retval = new_page_tables(current);
392 if (retval)
393 goto fail_restore;
394 activate_context(current);
395 up(&mm->mmap_sem);
396 mm_release();
397 mmput(old_mm);
398 return 0;
401 * Failure ... restore the prior mm_struct.
403 fail_restore:
404 current->mm = old_mm;
405 /* restore the ldt for this task */
406 copy_segments(nr, current, NULL);
407 release_segments(mm);
408 kmem_cache_free(mm_cachep, mm);
410 fail_nomem:
411 return retval;
415 * This function makes sure the current process has its own signal table,
416 * so that flush_signal_handlers can later reset the handlers without
417 * disturbing other processes. (Other processes might share the signal
418 * table via the CLONE_SIGHAND option to clone().)
421 static inline int make_private_signals(void)
423 struct signal_struct * newsig;
425 if (atomic_read(&current->sig->count) <= 1)
426 return 0;
427 newsig = kmalloc(sizeof(*newsig), GFP_KERNEL);
428 if (newsig == NULL)
429 return -ENOMEM;
430 spin_lock_init(&newsig->siglock);
431 atomic_set(&newsig->count, 1);
432 memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
433 current->sig = newsig;
434 return 0;
438 * If make_private_signals() made a copy of the signal table, decrement the
439 * refcount of the original table, and free it if necessary.
440 * We don't do that in make_private_signals() so that we can back off
441 * in flush_old_exec() if an error occurs after calling make_private_signals().
444 static inline void release_old_signals(struct signal_struct * oldsig)
446 if (current->sig == oldsig)
447 return;
448 if (atomic_dec_and_test(&oldsig->count))
449 kfree(oldsig);
453 * These functions flushes out all traces of the currently running executable
454 * so that a new one can be started
457 static inline void flush_old_files(struct files_struct * files)
459 unsigned long j;
461 j = 0;
462 for (;;) {
463 unsigned long set, i;
465 i = j * __NFDBITS;
466 if (i >= files->max_fds)
467 break;
468 set = files->close_on_exec.fds_bits[j];
469 files->close_on_exec.fds_bits[j] = 0;
470 j++;
471 for ( ; set ; i++,set >>= 1) {
472 if (set & 1)
473 sys_close(i);
478 int flush_old_exec(struct linux_binprm * bprm)
480 char * name;
481 int i, ch, retval;
482 struct signal_struct * oldsig;
485 * Make sure we have a private signal table
487 oldsig = current->sig;
488 retval = make_private_signals();
489 if (retval) goto flush_failed;
492 * Release all of the old mmap stuff
494 retval = exec_mmap();
495 if (retval) goto mmap_failed;
497 /* This is the point of no return */
498 release_old_signals(oldsig);
500 if (current->euid == current->uid && current->egid == current->gid)
501 current->dumpable = 1;
502 name = bprm->filename;
503 for (i=0; (ch = *(name++)) != '\0';) {
504 if (ch == '/')
505 i = 0;
506 else
507 if (i < 15)
508 current->comm[i++] = ch;
510 current->comm[i] = '\0';
512 flush_thread();
514 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
515 permission(bprm->dentry->d_inode,MAY_READ))
516 current->dumpable = 0;
518 flush_signal_handlers(current);
519 flush_old_files(current->files);
521 return 0;
523 mmap_failed:
524 if (current->sig != oldsig)
525 kfree(current->sig);
526 flush_failed:
527 current->sig = oldsig;
528 return retval;
532 * We mustn't allow tracing of suid binaries, unless
533 * the tracer has the capability to trace anything..
535 static inline int must_not_trace_exec(struct task_struct * p)
537 return (p->flags & PF_PTRACED) && !cap_raised(p->p_pptr->cap_effective, CAP_SYS_PTRACE);
541 * Fill the binprm structure from the inode.
542 * Check permissions, then read the first 512 bytes
544 int prepare_binprm(struct linux_binprm *bprm)
546 int mode;
547 int retval,id_change,cap_raised;
548 struct inode * inode = bprm->dentry->d_inode;
550 mode = inode->i_mode;
551 if (!S_ISREG(mode)) /* must be regular file */
552 return -EACCES;
553 if (!(mode & 0111)) /* with at least _one_ execute bit set */
554 return -EACCES;
555 if (IS_NOEXEC(inode)) /* FS mustn't be mounted noexec */
556 return -EACCES;
557 if (!inode->i_sb)
558 return -EACCES;
559 if ((retval = permission(inode, MAY_EXEC)) != 0)
560 return retval;
561 /* better not execute files which are being written to */
562 if (inode->i_writecount > 0)
563 return -ETXTBSY;
565 bprm->e_uid = current->euid;
566 bprm->e_gid = current->egid;
567 id_change = cap_raised = 0;
569 /* Set-uid? */
570 if (mode & S_ISUID) {
571 bprm->e_uid = inode->i_uid;
572 if (bprm->e_uid != current->euid)
573 id_change = 1;
576 /* Set-gid? */
578 * If setgid is set but no group execute bit then this
579 * is a candidate for mandatory locking, not a setgid
580 * executable.
582 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
583 bprm->e_gid = inode->i_gid;
584 if (!in_group_p(bprm->e_gid))
585 id_change = 1;
588 /* We don't have VFS support for capabilities yet */
589 cap_clear(bprm->cap_inheritable);
590 cap_clear(bprm->cap_permitted);
591 cap_clear(bprm->cap_effective);
593 /* To support inheritance of root-permissions and suid-root
594 * executables under compatibility mode, we raise the
595 * effective and inherited bitmasks of the executable file
596 * (translation: we set the executable "capability dumb" and
597 * set the allowed set to maximum). We don't set any forced
598 * bits.
600 * If only the real uid is 0, we only raise the inheritable
601 * bitmask of the executable file (translation: we set the
602 * allowed set to maximum and the application to "capability
603 * smart").
606 if (!issecure(SECURE_NOROOT)) {
607 if (bprm->e_uid == 0 || current->uid == 0)
608 cap_set_full(bprm->cap_inheritable);
609 if (bprm->e_uid == 0)
610 cap_set_full(bprm->cap_effective);
613 /* Only if pP' is _not_ a subset of pP, do we consider there
614 * has been a capability related "change of capability". In
615 * such cases, we need to check that the elevation of
616 * privilege does not go against other system constraints.
617 * The new Permitted set is defined below -- see (***). */
619 kernel_cap_t working =
620 cap_combine(bprm->cap_permitted,
621 cap_intersect(bprm->cap_inheritable,
622 current->cap_inheritable));
623 if (!cap_issubset(working, current->cap_permitted)) {
624 cap_raised = 1;
628 if (id_change || cap_raised) {
629 /* We can't suid-execute if we're sharing parts of the executable */
630 /* or if we're being traced (or if suid execs are not allowed) */
631 /* (current->mm->count > 1 is ok, as we'll get a new mm anyway) */
632 if (IS_NOSUID(inode)
633 || must_not_trace_exec(current)
634 || (atomic_read(&current->fs->count) > 1)
635 || (atomic_read(&current->sig->count) > 1)
636 || (atomic_read(&current->files->count) > 1)) {
637 if (id_change && !capable(CAP_SETUID))
638 return -EPERM;
639 if (cap_raised && !capable(CAP_SETPCAP))
640 return -EPERM;
644 memset(bprm->buf,0,sizeof(bprm->buf));
645 return read_exec(bprm->dentry,0,bprm->buf,128,1);
649 * This function is used to produce the new IDs and capabilities
650 * from the old ones and the file's capabilities.
652 * The formula used for evolving capabilities is:
654 * pI' = pI
655 * (***) pP' = fP | (fI & pI)
656 * pE' = pP' & fE [NB. fE is 0 or ~0]
658 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
659 * ' indicates post-exec().
662 void compute_creds(struct linux_binprm *bprm)
664 int new_permitted = cap_t(bprm->cap_permitted) |
665 (cap_t(bprm->cap_inheritable) &
666 cap_t(current->cap_inheritable));
668 /* For init, we want to retain the capabilities set
669 * in the init_task struct. Thus we skip the usual
670 * capability rules */
671 if (current->pid != 1) {
672 cap_t(current->cap_permitted) = new_permitted;
673 cap_t(current->cap_effective) = new_permitted &
674 cap_t(bprm->cap_effective);
677 /* AUD: Audit candidate if current->cap_effective is set */
679 current->suid = current->euid = current->fsuid = bprm->e_uid;
680 current->sgid = current->egid = current->fsgid = bprm->e_gid;
681 if (current->euid != current->uid || current->egid != current->gid ||
682 !cap_issubset(new_permitted, current->cap_permitted))
683 current->dumpable = 0;
687 void remove_arg_zero(struct linux_binprm *bprm)
689 if (bprm->argc) {
690 unsigned long offset;
691 char * page;
692 offset = bprm->p % PAGE_SIZE;
693 page = (char*)bprm->page[bprm->p/PAGE_SIZE];
694 while(bprm->p++,*(page+offset++))
695 if(offset==PAGE_SIZE){
696 offset=0;
697 page = (char*)bprm->page[bprm->p/PAGE_SIZE];
699 bprm->argc--;
704 * cycle the list of binary formats handler, until one recognizes the image
706 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
708 int try,retval=0;
709 struct linux_binfmt *fmt;
710 #ifdef __alpha__
711 /* handle /sbin/loader.. */
713 struct exec * eh = (struct exec *) bprm->buf;
714 struct linux_binprm bprm_loader;
716 if (!bprm->loader && eh->fh.f_magic == 0x183 &&
717 (eh->fh.f_flags & 0x3000) == 0x3000)
719 int i;
720 char * dynloader[] = { "/sbin/loader" };
721 struct dentry * dentry;
723 dput(bprm->dentry);
724 bprm->dentry = NULL;
726 bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
727 for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
728 bprm_loader.page[i] = 0;
730 dentry = open_namei(dynloader[0], 0, 0);
731 retval = PTR_ERR(dentry);
732 if (IS_ERR(dentry))
733 return retval;
734 bprm->dentry = dentry;
735 bprm->loader = bprm_loader.p;
736 retval = prepare_binprm(bprm);
737 if (retval<0)
738 return retval;
739 /* should call search_binary_handler recursively here,
740 but it does not matter */
743 #endif
744 for (try=0; try<2; try++) {
745 for (fmt = formats ; fmt ; fmt = fmt->next) {
746 int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
747 if (!fn)
748 continue;
749 retval = fn(bprm, regs);
750 if (retval >= 0) {
751 if (bprm->dentry)
752 dput(bprm->dentry);
753 bprm->dentry = NULL;
754 current->did_exec = 1;
755 return retval;
757 if (retval != -ENOEXEC)
758 break;
759 if (!bprm->dentry) /* We don't have the dentry anymore */
760 return retval;
762 if (retval != -ENOEXEC) {
763 break;
764 #ifdef CONFIG_KMOD
765 }else{
766 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
767 char modname[20];
768 if (printable(bprm->buf[0]) &&
769 printable(bprm->buf[1]) &&
770 printable(bprm->buf[2]) &&
771 printable(bprm->buf[3]))
772 break; /* -ENOEXEC */
773 sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
774 request_module(modname);
775 #endif
778 return retval;
783 * sys_execve() executes a new program.
785 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
787 struct linux_binprm bprm;
788 struct dentry * dentry;
789 int retval;
790 int i;
792 bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
793 memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));
795 dentry = open_namei(filename, 0, 0);
796 retval = PTR_ERR(dentry);
797 if (IS_ERR(dentry))
798 return retval;
800 bprm.dentry = dentry;
801 bprm.filename = filename;
802 bprm.sh_bang = 0;
803 bprm.loader = 0;
804 bprm.exec = 0;
805 if ((bprm.argc = count(argv)) < 0) {
806 dput(dentry);
807 return bprm.argc;
810 if ((bprm.envc = count(envp)) < 0) {
811 dput(dentry);
812 return bprm.envc;
815 retval = prepare_binprm(&bprm);
816 if (retval < 0)
817 goto out;
819 retval = copy_strings_kernel(1, &bprm.filename, &bprm);
820 if (retval < 0)
821 goto out;
823 bprm.exec = bprm.p;
824 retval = copy_strings(bprm.envc, envp, &bprm);
825 if (retval < 0)
826 goto out;
828 retval = copy_strings(bprm.argc, argv, &bprm);
829 if (retval < 0)
830 goto out;
832 retval = search_binary_handler(&bprm,regs);
833 if (retval >= 0)
834 /* execve success */
835 return retval;
837 out:
838 /* Something went wrong, return the inode and free the argument pages*/
839 if (bprm.dentry)
840 dput(bprm.dentry);
842 /* Assumes that free_page() can take a NULL argument. */
843 /* I hope this is ok for all architectures */
844 for (i=0 ; i<MAX_ARG_PAGES ; i++)
845 free_page(bprm.page[i]);
847 return retval;