kernel/fork.c

   1 /*
   2  *  linux/kernel/fork.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  *  'fork.c' contains the help-routines for the 'fork' system call
   9  * (see also entry.S and others).
  10  * Fork is rather simple, once you get the hang of it, but the memory
  11  * management can be a bitch. See 'mm/memory.c': 'copy_page_tables()'
  12  */
  13
  14 #include <linux/config.h>
  15 #include <linux/malloc.h>
  16 #include <linux/init.h>
  17 #include <linux/unistd.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/module.h>
  20 #include <linux/vmalloc.h>
  21
  22 #include <asm/pgtable.h>
  23 #include <asm/pgalloc.h>
  24 #include <asm/uaccess.h>
  25 #include <asm/mmu_context.h>
  26
  27 /* The idle threads do not count.. */
  28 int nr_threads;
  29 int nr_running;
  30
  31 int max_threads;
  32 unsigned long total_forks;      /* Handle normal Linux uptimes. */
  33 int last_pid;
  34
  35 /* SLAB cache for mm_struct's. */
  36 kmem_cache_t *mm_cachep;
  37
  38 /* SLAB cache for files structs */
  39 kmem_cache_t *files_cachep;
  40
  41 struct task_struct *pidhash[PIDHASH_SZ];
  42
  43 /* UID task count cache, to prevent walking entire process list every
  44  * single fork() operation.
  45  */
  46 #define UIDHASH_SZ      (PIDHASH_SZ >> 2)
  47
  48 static struct user_struct {
  49         atomic_t count;
  50         struct user_struct *next, **pprev;
  51         unsigned int uid;
  52 } *uidhash[UIDHASH_SZ];
  53
  54 spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
  55
  56 kmem_cache_t *uid_cachep;
  57
  58 #define uidhashfn(uid)  (((uid >> 8) ^ uid) & (UIDHASH_SZ - 1))
  59
  60 /*
  61  * These routines must be called with the uidhash spinlock held!
  62  */
  63 static inline void uid_hash_insert(struct user_struct *up, unsigned int hashent)
  64 {
  65         if((up->next = uidhash[hashent]) != NULL)
  66                 uidhash[hashent]->pprev = &up->next;
  67         up->pprev = &uidhash[hashent];
  68         uidhash[hashent] = up;
  69 }
  70
  71 static inline void uid_hash_remove(struct user_struct *up)
  72 {
  73         if(up->next)
  74                 up->next->pprev = up->pprev;
  75         *up->pprev = up->next;
  76 }
  77
  78 static inline struct user_struct *uid_hash_find(unsigned short uid, unsigned int hashent)
  79 {
  80         struct user_struct *up, *next;
  81
  82         next = uidhash[hashent];
  83         for (;;) {
  84                 up = next;
  85                 if (next) {
  86                         next = up->next;
  87                         if (up->uid != uid)
  88                                 continue;
  89                         atomic_inc(&up->count);
  90                 }
  91                 break;
  92         }
  93         return up;
  94 }
  95
  96 /*
  97  * For SMP, we need to re-test the user struct counter
  98  * after having aquired the spinlock. This allows us to do
  99  * the common case (not freeing anything) without having
 100  * any locking.
 101  */
 102 #ifdef CONFIG_SMP
 103   #define uid_hash_free(up)     (!atomic_read(&(up)->count))
 104 #else
 105   #define uid_hash_free(up)     (1)
 106 #endif
 107
 108 void free_uid(struct task_struct *p)
 109 {
 110         struct user_struct *up = p->user;
 111
 112         if (up) {
 113                 p->user = NULL;
 114                 if (atomic_dec_and_test(&up->count)) {
 115                         spin_lock(&uidhash_lock);
 116                         if (uid_hash_free(up)) {
 117                                 uid_hash_remove(up);
 118                                 kmem_cache_free(uid_cachep, up);
 119                         }
 120                         spin_unlock(&uidhash_lock);
 121                 }
 122         }
 123 }
 124
 125 int alloc_uid(struct task_struct *p)
 126 {
 127         unsigned int hashent = uidhashfn(p->uid);
 128         struct user_struct *up;
 129
 130         spin_lock(&uidhash_lock);
 131         up = uid_hash_find(p->uid, hashent);
 132         spin_unlock(&uidhash_lock);
 133
 134         if (!up) {
 135                 struct user_struct *new;
 136
 137                 new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
 138                 if (!new)
 139                         return -EAGAIN;
 140                 new->uid = p->uid;
 141                 atomic_set(&new->count, 1);
 142
 143                 /*
 144                  * Before adding this, check whether we raced
 145                  * on adding the same user already..
 146                  */
 147                 spin_lock(&uidhash_lock);
 148                 up = uid_hash_find(p->uid, hashent);
 149                 if (up) {
 150                         kmem_cache_free(uid_cachep, new);
 151                 } else {
 152                         uid_hash_insert(new, hashent);
 153                         up = new;
 154                 }
 155                 spin_unlock(&uidhash_lock);
 156
 157         }
 158         p->user = up;
 159         return 0;
 160 }
 161
 162 void __init fork_init(unsigned long mempages)
 163 {
 164         int i;
 165
 166         uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
 167                                        0,
 168                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 169         if(!uid_cachep)
 170                 panic("Cannot create uid taskcount SLAB cache\n");
 171
 172         for(i = 0; i < UIDHASH_SZ; i++)
 173                 uidhash[i] = 0;
 174
 175         /*
 176          * The default maximum number of threads is set to a safe
 177          * value: the thread structures can take up at most half
 178          * of memory.
 179          */
 180         max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
 181
 182         init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
 183         init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 184 }
 185
 186 /* Protects next_safe and last_pid. */
 187 spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
 188
 189 static int get_pid(unsigned long flags)
 190 {
 191         static int next_safe = PID_MAX;
 192         struct task_struct *p;
 193
 194         if (flags & CLONE_PID)
 195                 return current->pid;
 196
 197         spin_lock(&lastpid_lock);
 198         if((++last_pid) & 0xffff8000) {
 199                 last_pid = 300;         /* Skip daemons etc. */
 200                 goto inside;
 201         }
 202         if(last_pid >= next_safe) {
 203 inside:
 204                 next_safe = PID_MAX;
 205                 read_lock(&tasklist_lock);
 206         repeat:
 207                 for_each_task(p) {
 208                         if(p->pid == last_pid   ||
 209                            p->pgrp == last_pid  ||
 210                            p->session == last_pid) {
 211                                 if(++last_pid >= next_safe) {
 212                                         if(last_pid & 0xffff8000)
 213                                                 last_pid = 300;
 214                                         next_safe = PID_MAX;
 215                                 }
 216                                 goto repeat;
 217                         }
 218                         if(p->pid > last_pid && next_safe > p->pid)
 219                                 next_safe = p->pid;
 220                         if(p->pgrp > last_pid && next_safe > p->pgrp)
 221                                 next_safe = p->pgrp;
 222                         if(p->session > last_pid && next_safe > p->session)
 223                                 next_safe = p->session;
 224                 }
 225                 read_unlock(&tasklist_lock);
 226         }
 227         spin_unlock(&lastpid_lock);
 228
 229         return last_pid;
 230 }
 231
 232 static inline int dup_mmap(struct mm_struct * mm)
 233 {
 234         struct vm_area_struct * mpnt, *tmp, **pprev;
 235         int retval;
 236
 237         /* Kill me slowly. UGLY! FIXME! */
 238         memcpy(&mm->start_code, &current->mm->start_code, 15*sizeof(unsigned long));
 239
 240         flush_cache_mm(current->mm);
 241         pprev = &mm->mmap;
 242         for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
 243                 struct file *file;
 244
 245                 retval = -ENOMEM;
 246                 if(mpnt->vm_flags & VM_DONTCOPY)
 247                         continue;
 248                 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 249                 if (!tmp)
 250                         goto fail_nomem;
 251                 *tmp = *mpnt;
 252                 tmp->vm_flags &= ~VM_LOCKED;
 253                 tmp->vm_mm = mm;
 254                 mm->map_count++;
 255                 tmp->vm_next = NULL;
 256                 file = tmp->vm_file;
 257                 if (file) {
 258                         struct inode *inode = file->f_dentry->d_inode;
 259                         get_file(file);
 260                         if (tmp->vm_flags & VM_DENYWRITE)
 261                                 atomic_dec(&inode->i_writecount);
 262
 263                         /* insert tmp into the share list, just after mpnt */
 264                         spin_lock(&inode->i_mapping->i_shared_lock);
 265                         if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
 266                                 mpnt->vm_next_share->vm_pprev_share =
 267                                         &tmp->vm_next_share;
 268                         mpnt->vm_next_share = tmp;
 269                         tmp->vm_pprev_share = &mpnt->vm_next_share;
 270                         spin_unlock(&inode->i_mapping->i_shared_lock);
 271                 }
 272
 273                 /* Copy the pages, but defer checking for errors */
 274                 retval = copy_page_range(mm, current->mm, tmp);
 275                 if (!retval && tmp->vm_ops && tmp->vm_ops->open)
 276                         tmp->vm_ops->open(tmp);
 277
 278                 /*
 279                  * Link in the new vma even if an error occurred,
 280                  * so that exit_mmap() can clean up the mess.
 281                  */
 282                 tmp->vm_next = *pprev;
 283                 *pprev = tmp;
 284
 285                 pprev = &tmp->vm_next;
 286                 if (retval)
 287                         goto fail_nomem;
 288         }
 289         retval = 0;
 290         if (mm->map_count >= AVL_MIN_MAP_COUNT)
 291                 build_mmap_avl(mm);
 292
 293 fail_nomem:
 294         flush_tlb_mm(current->mm);
 295         return retval;
 296 }
 297
 298 /*
 299  * Allocate and initialize an mm_struct.
 300  */
 301 struct mm_struct * mm_alloc(void)
 302 {
 303         struct mm_struct * mm;
 304
 305         mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
 306         if (mm) {
 307                 memset(mm, 0, sizeof(*mm));
 308                 atomic_set(&mm->mm_users, 1);
 309                 atomic_set(&mm->mm_count, 1);
 310                 init_MUTEX(&mm->mmap_sem);
 311                 mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 312                 mm->pgd = pgd_alloc();
 313                 if (mm->pgd)
 314                         return mm;
 315                 kmem_cache_free(mm_cachep, mm);
 316         }
 317         return NULL;
 318 }
 319
 320 /*
 321  * Called when the last reference to the mm
 322  * is dropped: either by a lazy thread or by
 323  * mmput. Free the page directory and the mm.
 324  */
 325 inline void __mmdrop(struct mm_struct *mm)
 326 {
 327         if (mm == &init_mm) BUG();
 328         pgd_free(mm->pgd);
 329         destroy_context(mm);
 330         kmem_cache_free(mm_cachep, mm);
 331 }
 332
 333 /*
 334  * Decrement the use count and release all resources for an mm.
 335  */
 336 void mmput(struct mm_struct *mm)
 337 {
 338         if (atomic_dec_and_test(&mm->mm_users)) {
 339                 exit_mmap(mm);
 340                 mmdrop(mm);
 341         }
 342 }
 343
 344 /* Please note the differences between mmput and mm_release.
 345  * mmput is called whenever we stop holding onto a mm_struct,
 346  * error success whatever.
 347  *
 348  * mm_release is called after a mm_struct has been removed
 349  * from the current process.
 350  *
 351  * This difference is important for error handling, when we
 352  * only half set up a mm_struct for a new process and need to restore
 353  * the old one.  Because we mmput the new mm_struct before
 354  * restoring the old one. . .
 355  * Eric Biederman 10 January 1998
 356  */
 357 void mm_release(void)
 358 {
 359         struct task_struct *tsk = current;
 360
 361         /* notify parent sleeping on vfork() */
 362         if (tsk->flags & PF_VFORK) {
 363                 tsk->flags &= ~PF_VFORK;
 364                 up(tsk->p_opptr->vfork_sem);
 365         }
 366 }
 367
 368 static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 369 {
 370         struct mm_struct * mm;
 371         int retval;
 372
 373         tsk->min_flt = tsk->maj_flt = 0;
 374         tsk->cmin_flt = tsk->cmaj_flt = 0;
 375         tsk->nswap = tsk->cnswap = 0;
 376
 377         tsk->mm = NULL;
 378         tsk->active_mm = NULL;
 379
 380         /*
 381          * Are we cloning a kernel thread?
 382          *
 383          * We need to steal a active VM for that..
 384          */
 385         mm = current->mm;
 386         if (!mm)
 387                 return 0;
 388
 389         if (clone_flags & CLONE_VM) {
 390                 atomic_inc(&mm->mm_users);
 391                 goto good_mm;
 392         }
 393
 394         retval = -ENOMEM;
 395         mm = mm_alloc();
 396         if (!mm)
 397                 goto fail_nomem;
 398
 399         tsk->mm = mm;
 400         tsk->active_mm = mm;
 401
 402         /*
 403          * child gets a private LDT (if there was an LDT in the parent)
 404          */
 405         copy_segments(tsk, mm);
 406
 407         down(&current->mm->mmap_sem);
 408         retval = dup_mmap(mm);
 409         up(&current->mm->mmap_sem);
 410         if (retval)
 411                 goto free_pt;
 412
 413 good_mm:
 414         tsk->mm = mm;
 415         tsk->active_mm = mm;
 416         init_new_context(tsk,mm);
 417         return 0;
 418
 419 free_pt:
 420         mmput(mm);
 421 fail_nomem:
 422         return retval;
 423 }
 424
 425 static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 426 {
 427         struct fs_struct *fs = kmalloc(sizeof(*old), GFP_KERNEL);
 428         /* We don't need to lock fs - think why ;-) */
 429         if (fs) {
 430                 atomic_set(&fs->count, 1);
 431                 fs->lock = RW_LOCK_UNLOCKED;
 432                 fs->umask = old->umask;
 433                 read_lock(&old->lock);
 434                 fs->rootmnt = mntget(old->rootmnt);
 435                 fs->root = dget(old->root);
 436                 fs->pwdmnt = mntget(old->pwdmnt);
 437                 fs->pwd = dget(old->pwd);
 438                 if (old->altroot) {
 439                         fs->altrootmnt = mntget(old->altrootmnt);
 440                         fs->altroot = dget(old->altroot);
 441                 } else {
 442                         fs->altrootmnt = NULL;
 443                         fs->altroot = NULL;
 444                 }
 445                 read_unlock(&old->lock);
 446         }
 447         return fs;
 448 }
 449
 450 struct fs_struct *copy_fs_struct(struct fs_struct *old)
 451 {
 452         return __copy_fs_struct(old);
 453 }
 454
 455 static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 456 {
 457         if (clone_flags & CLONE_FS) {
 458                 atomic_inc(&current->fs->count);
 459                 return 0;
 460         }
 461         tsk->fs = __copy_fs_struct(current->fs);
 462         if (!tsk->fs)
 463                 return -1;
 464         return 0;
 465 }
 466
 467 static int count_open_files(struct files_struct *files, int size)
 468 {
 469         int i;
 470
 471         /* Find the last open fd */
 472         for (i = size/(8*sizeof(long)); i > 0; ) {
 473                 if (files->open_fds->fds_bits[--i])
 474                         break;
 475         }
 476         i = (i+1) * 8 * sizeof(long);
 477         return i;
 478 }
 479
 480 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 481 {
 482         struct files_struct *oldf, *newf;
 483         struct file **old_fds, **new_fds;
 484         int open_files, nfds, size, i, error = 0;
 485
 486         /*
 487          * A background process may not have any files ...
 488          */
 489         oldf = current->files;
 490         if (!oldf)
 491                 goto out;
 492
 493         if (clone_flags & CLONE_FILES) {
 494                 atomic_inc(&oldf->count);
 495                 goto out;
 496         }
 497
 498         tsk->files = NULL;
 499         error = -ENOMEM;
 500         newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
 501         if (!newf)
 502                 goto out;
 503
 504         atomic_set(&newf->count, 1);
 505
 506         newf->file_lock     = RW_LOCK_UNLOCKED;
 507         newf->next_fd       = 0;
 508         newf->max_fds       = NR_OPEN_DEFAULT;
 509         newf->max_fdset     = __FD_SETSIZE;
 510         newf->close_on_exec = &newf->close_on_exec_init;
 511         newf->open_fds      = &newf->open_fds_init;
 512         newf->fd            = &newf->fd_array[0];
 513
 514         /* We don't yet have the oldf readlock, but even if the old
 515            fdset gets grown now, we'll only copy up to "size" fds */
 516         size = oldf->max_fdset;
 517         if (size > __FD_SETSIZE) {
 518                 newf->max_fdset = 0;
 519                 write_lock(&newf->file_lock);
 520                 error = expand_fdset(newf, size);
 521                 write_unlock(&newf->file_lock);
 522                 if (error)
 523                         goto out_release;
 524         }
 525         read_lock(&oldf->file_lock);
 526
 527         open_files = count_open_files(oldf, size);
 528
 529         /*
 530          * Check whether we need to allocate a larger fd array.
 531          * Note: we're not a clone task, so the open count won't
 532          * change.
 533          */
 534         nfds = NR_OPEN_DEFAULT;
 535         if (open_files > nfds) {
 536                 read_unlock(&oldf->file_lock);
 537                 newf->max_fds = 0;
 538                 write_lock(&newf->file_lock);
 539                 error = expand_fd_array(newf, open_files);
 540                 write_unlock(&newf->file_lock);
 541                 if (error)
 542                         goto out_release;
 543                 nfds = newf->max_fds;
 544                 read_lock(&oldf->file_lock);
 545         }
 546
 547         old_fds = oldf->fd;
 548         new_fds = newf->fd;
 549
 550         memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
 551         memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
 552
 553         for (i = open_files; i != 0; i--) {
 554                 struct file *f = *old_fds++;
 555                 if (f)
 556                         get_file(f);
 557                 *new_fds++ = f;
 558         }
 559         read_unlock(&oldf->file_lock);
 560
 561         /* compute the remainder to be cleared */
 562         size = (newf->max_fds - open_files) * sizeof(struct file *);
 563
 564         /* This is long word aligned thus could use a optimized version */
 565         memset(new_fds, 0, size);
 566
 567         if (newf->max_fdset > open_files) {
 568                 int left = (newf->max_fdset-open_files)/8;
 569                 int start = open_files / (8 * sizeof(unsigned long));
 570
 571                 memset(&newf->open_fds->fds_bits[start], 0, left);
 572                 memset(&newf->close_on_exec->fds_bits[start], 0, left);
 573         }
 574
 575         tsk->files = newf;
 576         error = 0;
 577 out:
 578         return error;
 579
 580 out_release:
 581         free_fdset (newf->close_on_exec, newf->max_fdset);
 582         free_fdset (newf->open_fds, newf->max_fdset);
 583         kmem_cache_free(files_cachep, newf);
 584         goto out;
 585 }
 586
 587 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 588 {
 589         if (clone_flags & CLONE_SIGHAND) {
 590                 atomic_inc(&current->sig->count);
 591                 return 0;
 592         }
 593         tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
 594         if (!tsk->sig)
 595                 return -1;
 596         spin_lock_init(&tsk->sig->siglock);
 597         atomic_set(&tsk->sig->count, 1);
 598         memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
 599         return 0;
 600 }
 601
 602 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 603 {
 604         unsigned long new_flags = p->flags;
 605
 606         new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU | PF_VFORK);
 607         new_flags |= PF_FORKNOEXEC;
 608         if (!(clone_flags & CLONE_PTRACE))
 609                 p->ptrace = 0;
 610         if (clone_flags & CLONE_VFORK)
 611                 new_flags |= PF_VFORK;
 612         p->flags = new_flags;
 613 }
 614
 615 /*
 616  *  Ok, this is the main fork-routine. It copies the system process
 617  * information (task[nr]) and sets up the necessary registers. It
 618  * also copies the data segment in its entirety.
 619  */
 620 int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 621 {
 622         int retval = -ENOMEM;
 623         struct task_struct *p;
 624         DECLARE_MUTEX_LOCKED(sem);
 625
 626         if (clone_flags & CLONE_PID) {
 627                 /* This is only allowed from the boot up thread */
 628                 if (current->pid)
 629                         return -EPERM;
 630         }
 631
 632         current->vfork_sem = &sem;
 633
 634         p = alloc_task_struct();
 635         if (!p)
 636                 goto fork_out;
 637
 638         *p = *current;
 639
 640         lock_kernel();
 641
 642         retval = -EAGAIN;
 643         if (p->user) {
 644                 if (atomic_read(&p->user->count) >= p->rlim[RLIMIT_NPROC].rlim_cur)
 645                         goto bad_fork_free;
 646                 atomic_inc(&p->user->count);
 647         }
 648
 649         /*
 650          * Counter increases are protected by
 651          * the kernel lock so nr_threads can't
 652          * increase under us (but it may decrease).
 653          */
 654         if (nr_threads >= max_threads)
 655                 goto bad_fork_cleanup_count;
 656
 657         if (p->exec_domain && p->exec_domain->module)
 658                 __MOD_INC_USE_COUNT(p->exec_domain->module);
 659         if (p->binfmt && p->binfmt->module)
 660                 __MOD_INC_USE_COUNT(p->binfmt->module);
 661
 662         p->did_exec = 0;
 663         p->swappable = 0;
 664         p->state = TASK_UNINTERRUPTIBLE;
 665
 666         copy_flags(clone_flags, p);
 667         p->pid = get_pid(clone_flags);
 668
 669         /*
 670          * This is a "shadow run" state. The process
 671          * is marked runnable, but isn't actually on
 672          * any run queue yet.. (that happens at the
 673          * very end).
 674          */
 675         p->state = TASK_RUNNING;
 676         p->run_list.next = NULL;
 677         p->run_list.prev = NULL;
 678
 679         if ((clone_flags & CLONE_VFORK) || !(clone_flags & CLONE_PARENT)) {
 680                 p->p_opptr = current;
 681                 if (!(p->ptrace & PT_PTRACED))
 682                         p->p_pptr = current;
 683         }
 684         p->p_cptr = NULL;
 685         init_waitqueue_head(&p->wait_chldexit);
 686         p->vfork_sem = NULL;
 687         spin_lock_init(&p->alloc_lock);
 688
 689         p->sigpending = 0;
 690         sigemptyset(&p->signal);
 691         p->sigqueue = NULL;
 692         p->sigqueue_tail = &p->sigqueue;
 693
 694         p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
 695         p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
 696         init_timer(&p->real_timer);
 697         p->real_timer.data = (unsigned long) p;
 698
 699         p->leader = 0;          /* session leadership doesn't inherit */
 700         p->tty_old_pgrp = 0;
 701         p->times.tms_utime = p->times.tms_stime = 0;
 702         p->times.tms_cutime = p->times.tms_cstime = 0;
 703 #ifdef CONFIG_SMP
 704         {
 705                 int i;
 706                 p->has_cpu = 0;
 707                 p->processor = current->processor;
 708                 /* ?? should we just memset this ?? */
 709                 for(i = 0; i < smp_num_cpus; i++)
 710                         p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
 711                 spin_lock_init(&p->sigmask_lock);
 712         }
 713 #endif
 714         p->lock_depth = -1;             /* -1 = no lock */
 715         p->start_time = jiffies;
 716
 717         retval = -ENOMEM;
 718         /* copy all the process information */
 719         if (copy_files(clone_flags, p))
 720                 goto bad_fork_cleanup;
 721         if (copy_fs(clone_flags, p))
 722                 goto bad_fork_cleanup_files;
 723         if (copy_sighand(clone_flags, p))
 724                 goto bad_fork_cleanup_fs;
 725         if (copy_mm(clone_flags, p))
 726                 goto bad_fork_cleanup_sighand;
 727         retval = copy_thread(0, clone_flags, usp, p, regs);
 728         if (retval)
 729                 goto bad_fork_cleanup_sighand;
 730         p->semundo = NULL;
 731
 732         /* Our parent execution domain becomes current domain
 733            These must match for thread signalling to apply */
 734
 735         p->parent_exec_id = p->self_exec_id;
 736
 737         /* ok, now we should be set up.. */
 738         p->swappable = 1;
 739         p->exit_signal = clone_flags & CSIGNAL;
 740         p->pdeath_signal = 0;
 741
 742         /*
 743          * "share" dynamic priority between parent and child, thus the
 744          * total amount of dynamic priorities in the system doesnt change,
 745          * more scheduling fairness. This is only important in the first
 746          * timeslice, on the long run the scheduling behaviour is unchanged.
 747          */
 748         p->counter = (current->counter + 1) >> 1;
 749         current->counter >>= 1;
 750         if (!current->counter)
 751                 current->need_resched = 1;
 752
 753         /*
 754          * Ok, add it to the run-queues and make it
 755          * visible to the rest of the system.
 756          *
 757          * Let it rip!
 758          */
 759         retval = p->pid;
 760         write_lock_irq(&tasklist_lock);
 761         SET_LINKS(p);
 762         hash_pid(p);
 763         nr_threads++;
 764         write_unlock_irq(&tasklist_lock);
 765
 766         wake_up_process(p);             /* do this last */
 767         ++total_forks;
 768
 769 bad_fork:
 770         unlock_kernel();
 771 fork_out:
 772         if ((clone_flags & CLONE_VFORK) && (retval > 0))
 773                 down(&sem);
 774         return retval;
 775
 776 bad_fork_cleanup_sighand:
 777         exit_sighand(p);
 778 bad_fork_cleanup_fs:
 779         exit_fs(p); /* blocking */
 780 bad_fork_cleanup_files:
 781         exit_files(p); /* blocking */
 782 bad_fork_cleanup:
 783         put_exec_domain(p->exec_domain);
 784         if (p->binfmt && p->binfmt->module)
 785                 __MOD_DEC_USE_COUNT(p->binfmt->module);
 786 bad_fork_cleanup_count:
 787         if (p->user)
 788                 free_uid(p);
 789 bad_fork_free:
 790         free_task_struct(p);
 791         goto bad_fork;
 792 }
 793
 794 void __init filescache_init(void)
 795 {
 796         files_cachep = kmem_cache_create("files_cache",
 797                                          sizeof(struct files_struct),
 798                                          0,
 799                                          SLAB_HWCACHE_ALIGN,
 800                                          NULL, NULL);
 801         if (!files_cachep)
 802                 panic("Cannot create files cache");
 803 }