kernel/fork.c

   1 /*
   2  *  linux/kernel/fork.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  *  'fork.c' contains the help-routines for the 'fork' system call
   9  * (see also entry.S and others).
  10  * Fork is rather simple, once you get the hang of it, but the memory
  11  * management can be a bitch. See 'mm/memory.c': 'copy_page_tables()'
  12  */
  13
  14 #include <linux/config.h>
  15 #include <linux/malloc.h>
  16 #include <linux/init.h>
  17 #include <linux/unistd.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/module.h>
  20 #include <linux/vmalloc.h>
  21
  22 #include <asm/pgtable.h>
  23 #include <asm/pgalloc.h>
  24 #include <asm/uaccess.h>
  25 #include <asm/mmu_context.h>
  26
  27 /* The idle threads do not count.. */
  28 int nr_threads;
  29 int nr_running;
  30
  31 int max_threads;
  32 unsigned long total_forks;      /* Handle normal Linux uptimes. */
  33 int last_pid;
  34
  35 /* SLAB cache for mm_struct's. */
  36 kmem_cache_t *mm_cachep;
  37
  38 /* SLAB cache for files structs */
  39 kmem_cache_t *files_cachep;
  40
  41 struct task_struct *pidhash[PIDHASH_SZ];
  42
  43 /* UID task count cache, to prevent walking entire process list every
  44  * single fork() operation.
  45  */
  46 #define UIDHASH_SZ      (PIDHASH_SZ >> 2)
  47
  48 static struct user_struct {
  49         atomic_t count;
  50         struct user_struct *next, **pprev;
  51         unsigned int uid;
  52 } *uidhash[UIDHASH_SZ];
  53
  54 spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
  55
  56 kmem_cache_t *uid_cachep;
  57
  58 #define uidhashfn(uid)  (((uid >> 8) ^ uid) & (UIDHASH_SZ - 1))
  59
  60 /*
  61  * These routines must be called with the uidhash spinlock held!
  62  */
  63 static inline void uid_hash_insert(struct user_struct *up, unsigned int hashent)
  64 {
  65         if((up->next = uidhash[hashent]) != NULL)
  66                 uidhash[hashent]->pprev = &up->next;
  67         up->pprev = &uidhash[hashent];
  68         uidhash[hashent] = up;
  69 }
  70
  71 static inline void uid_hash_remove(struct user_struct *up)
  72 {
  73         if(up->next)
  74                 up->next->pprev = up->pprev;
  75         *up->pprev = up->next;
  76 }
  77
  78 static inline struct user_struct *uid_hash_find(unsigned short uid, unsigned int hashent)
  79 {
  80         struct user_struct *up, *next;
  81
  82         next = uidhash[hashent];
  83         for (;;) {
  84                 up = next;
  85                 if (next) {
  86                         next = up->next;
  87                         if (up->uid != uid)
  88                                 continue;
  89                         atomic_inc(&up->count);
  90                 }
  91                 break;
  92         }
  93         return up;
  94 }
  95
  96 /*
  97  * For SMP, we need to re-test the user struct counter
  98  * after having aquired the spinlock. This allows us to do
  99  * the common case (not freeing anything) without having
 100  * any locking.
 101  */
 102 #ifdef CONFIG_SMP
 103   #define uid_hash_free(up)     (!atomic_read(&(up)->count))
 104 #else
 105   #define uid_hash_free(up)     (1)
 106 #endif
 107
 108 void free_uid(struct task_struct *p)
 109 {
 110         struct user_struct *up = p->user;
 111
 112         if (up) {
 113                 p->user = NULL;
 114                 if (atomic_dec_and_test(&up->count)) {
 115                         spin_lock(&uidhash_lock);
 116                         if (uid_hash_free(up)) {
 117                                 uid_hash_remove(up);
 118                                 kmem_cache_free(uid_cachep, up);
 119                         }
 120                         spin_unlock(&uidhash_lock);
 121                 }
 122         }
 123 }
 124
 125 int alloc_uid(struct task_struct *p)
 126 {
 127         unsigned int hashent = uidhashfn(p->uid);
 128         struct user_struct *up;
 129
 130         spin_lock(&uidhash_lock);
 131         up = uid_hash_find(p->uid, hashent);
 132         spin_unlock(&uidhash_lock);
 133
 134         if (!up) {
 135                 struct user_struct *new;
 136
 137                 new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
 138                 if (!new)
 139                         return -EAGAIN;
 140                 new->uid = p->uid;
 141                 atomic_set(&new->count, 1);
 142
 143                 /*
 144                  * Before adding this, check whether we raced
 145                  * on adding the same user already..
 146                  */
 147                 spin_lock(&uidhash_lock);
 148                 up = uid_hash_find(p->uid, hashent);
 149                 if (up) {
 150                         kmem_cache_free(uid_cachep, new);
 151                 } else {
 152                         uid_hash_insert(new, hashent);
 153                         up = new;
 154                 }
 155                 spin_unlock(&uidhash_lock);
 156
 157         }
 158         p->user = up;
 159         return 0;
 160 }
 161
 162 void __init fork_init(unsigned long mempages)
 163 {
 164         int i;
 165
 166         uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
 167                                        0,
 168                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 169         if(!uid_cachep)
 170                 panic("Cannot create uid taskcount SLAB cache\n");
 171
 172         for(i = 0; i < UIDHASH_SZ; i++)
 173                 uidhash[i] = 0;
 174
 175         /*
 176          * The default maximum number of threads is set to a safe
 177          * value: the thread structures can take up at most half
 178          * of memory.
 179          */
 180         max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
 181
 182         init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
 183         init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 184 }
 185
 186 /* Protects next_safe and last_pid. */
 187 spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
 188
 189 static int get_pid(unsigned long flags)
 190 {
 191         static int next_safe = PID_MAX;
 192         struct task_struct *p;
 193
 194         if (flags & CLONE_PID)
 195                 return current->pid;
 196
 197         spin_lock(&lastpid_lock);
 198         if((++last_pid) & 0xffff8000) {
 199                 last_pid = 300;         /* Skip daemons etc. */
 200                 goto inside;
 201         }
 202         if(last_pid >= next_safe) {
 203 inside:
 204                 next_safe = PID_MAX;
 205                 read_lock(&tasklist_lock);
 206         repeat:
 207                 for_each_task(p) {
 208                         if(p->pid == last_pid   ||
 209                            p->pgrp == last_pid  ||
 210                            p->session == last_pid) {
 211                                 if(++last_pid >= next_safe) {
 212                                         if(last_pid & 0xffff8000)
 213                                                 last_pid = 300;
 214                                         next_safe = PID_MAX;
 215                                 }
 216                                 goto repeat;
 217                         }
 218                         if(p->pid > last_pid && next_safe > p->pid)
 219                                 next_safe = p->pid;
 220                         if(p->pgrp > last_pid && next_safe > p->pgrp)
 221                                 next_safe = p->pgrp;
 222                         if(p->session > last_pid && next_safe > p->session)
 223                                 next_safe = p->session;
 224                 }
 225                 read_unlock(&tasklist_lock);
 226         }
 227         spin_unlock(&lastpid_lock);
 228
 229         return last_pid;
 230 }
 231
 232 static inline int dup_mmap(struct mm_struct * mm)
 233 {
 234         struct vm_area_struct * mpnt, *tmp, **pprev;
 235         int retval;
 236
 237         /* Kill me slowly. UGLY! FIXME! */
 238         memcpy(&mm->start_code, &current->mm->start_code, 15*sizeof(unsigned long));
 239
 240         flush_cache_mm(current->mm);
 241         pprev = &mm->mmap;
 242         for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
 243                 struct file *file;
 244
 245                 retval = -ENOMEM;
 246                 if(mpnt->vm_flags & VM_DONTCOPY)
 247                         continue;
 248                 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 249                 if (!tmp)
 250                         goto fail_nomem;
 251                 *tmp = *mpnt;
 252                 tmp->vm_flags &= ~VM_LOCKED;
 253                 tmp->vm_mm = mm;
 254                 mm->map_count++;
 255                 tmp->vm_next = NULL;
 256                 file = tmp->vm_file;
 257                 if (file) {
 258                         struct inode *inode = file->f_dentry->d_inode;
 259                         get_file(file);
 260                         if (tmp->vm_flags & VM_DENYWRITE)
 261                                 atomic_dec(&inode->i_writecount);
 262
 263                         /* insert tmp into the share list, just after mpnt */
 264                         spin_lock(&inode->i_mapping->i_shared_lock);
 265                         if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
 266                                 mpnt->vm_next_share->vm_pprev_share =
 267                                         &tmp->vm_next_share;
 268                         mpnt->vm_next_share = tmp;
 269                         tmp->vm_pprev_share = &mpnt->vm_next_share;
 270                         spin_unlock(&inode->i_mapping->i_shared_lock);
 271                 }
 272
 273                 /* Copy the pages, but defer checking for errors */
 274                 retval = copy_page_range(mm, current->mm, tmp);
 275                 if (!retval && tmp->vm_ops && tmp->vm_ops->open)
 276                         tmp->vm_ops->open(tmp);
 277
 278                 /*
 279                  * Link in the new vma even if an error occurred,
 280                  * so that exit_mmap() can clean up the mess.
 281                  */
 282                 tmp->vm_next = *pprev;
 283                 *pprev = tmp;
 284
 285                 pprev = &tmp->vm_next;
 286                 if (retval)
 287                         goto fail_nomem;
 288         }
 289         retval = 0;
 290         if (mm->map_count >= AVL_MIN_MAP_COUNT)
 291                 build_mmap_avl(mm);
 292
 293 fail_nomem:
 294         flush_tlb_mm(current->mm);
 295         return retval;
 296 }
 297
 298 /*
 299  * Allocate and initialize an mm_struct.
 300  */
 301 struct mm_struct * mm_alloc(void)
 302 {
 303         struct mm_struct * mm;
 304
 305         mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
 306         if (mm) {
 307                 memset(mm, 0, sizeof(*mm));
 308                 atomic_set(&mm->mm_users, 1);
 309                 atomic_set(&mm->mm_count, 1);
 310                 init_MUTEX(&mm->mmap_sem);
 311                 mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 312                 mm->pgd = pgd_alloc();
 313                 if (mm->pgd)
 314                         return mm;
 315                 kmem_cache_free(mm_cachep, mm);
 316         }
 317         return NULL;
 318 }
 319
 320 /*
 321  * Called when the last reference to the mm
 322  * is dropped: either by a lazy thread or by
 323  * mmput. Free the page directory and the mm.
 324  */
 325 inline void __mmdrop(struct mm_struct *mm)
 326 {
 327         if (mm == &init_mm) BUG();
 328         pgd_free(mm->pgd);
 329         destroy_context(mm);
 330         kmem_cache_free(mm_cachep, mm);
 331 }
 332
 333 /*
 334  * Decrement the use count and release all resources for an mm.
 335  */
 336 void mmput(struct mm_struct *mm)
 337 {
 338         if (atomic_dec_and_test(&mm->mm_users)) {
 339                 exit_mmap(mm);
 340                 mmdrop(mm);
 341         }
 342 }
 343
 344 /* Please note the differences between mmput and mm_release.
 345  * mmput is called whenever we stop holding onto a mm_struct,
 346  * error success whatever.
 347  *
 348  * mm_release is called after a mm_struct has been removed
 349  * from the current process.
 350  *
 351  * This difference is important for error handling, when we
 352  * only half set up a mm_struct for a new process and need to restore
 353  * the old one.  Because we mmput the new mm_struct before
 354  * restoring the old one. . .
 355  * Eric Biederman 10 January 1998
 356  */
 357 void mm_release(void)
 358 {
 359         struct task_struct *tsk = current;
 360
 361         /* notify parent sleeping on vfork() */
 362         if (tsk->flags & PF_VFORK) {
 363                 tsk->flags &= ~PF_VFORK;
 364                 up(tsk->p_opptr->vfork_sem);
 365         }
 366 }
 367
 368 static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 369 {
 370         struct mm_struct * mm;
 371         int retval;
 372
 373         tsk->min_flt = tsk->maj_flt = 0;
 374         tsk->cmin_flt = tsk->cmaj_flt = 0;
 375         tsk->nswap = tsk->cnswap = 0;
 376
 377         tsk->mm = NULL;
 378         tsk->active_mm = NULL;
 379
 380         /*
 381          * Are we cloning a kernel thread?
 382          *
 383          * We need to steal a active VM for that..
 384          */
 385         mm = current->mm;
 386         if (!mm)
 387                 return 0;
 388
 389         if (clone_flags & CLONE_VM) {
 390                 atomic_inc(&mm->mm_users);
 391                 goto good_mm;
 392         }
 393
 394         retval = -ENOMEM;
 395         mm = mm_alloc();
 396         if (!mm)
 397                 goto fail_nomem;
 398
 399         tsk->mm = mm;
 400         tsk->active_mm = mm;
 401
 402         /*
 403          * child gets a private LDT (if there was an LDT in the parent)
 404          */
 405         copy_segments(tsk, mm);
 406
 407         down(&current->mm->mmap_sem);
 408         retval = dup_mmap(mm);
 409         up(&current->mm->mmap_sem);
 410         if (retval)
 411                 goto free_pt;
 412
 413 good_mm:
 414         tsk->mm = mm;
 415         tsk->active_mm = mm;
 416         init_new_context(tsk,mm);
 417         return 0;
 418
 419 free_pt:
 420         mmput(mm);
 421 fail_nomem:
 422         return retval;
 423 }
 424
 425 static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 426 {
 427         struct fs_struct *fs = kmalloc(sizeof(*old), GFP_KERNEL);
 428         if (fs) {
 429                 atomic_set(&fs->count, 1);
 430                 fs->umask = old->umask;
 431                 fs->rootmnt = mntget(old->rootmnt);
 432                 fs->root = dget(old->root);
 433                 fs->pwdmnt = mntget(old->pwdmnt);
 434                 fs->pwd = dget(old->pwd);
 435                 if (old->altroot) {
 436                         fs->altrootmnt = mntget(old->altrootmnt);
 437                         fs->altroot = dget(old->altroot);
 438                 } else {
 439                         fs->altrootmnt = NULL;
 440                         fs->altroot = NULL;
 441                 }
 442         }
 443         return fs;
 444 }
 445
 446 struct fs_struct *copy_fs_struct(struct fs_struct *old)
 447 {
 448         return __copy_fs_struct(old);
 449 }
 450
 451 static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 452 {
 453         if (clone_flags & CLONE_FS) {
 454                 atomic_inc(&current->fs->count);
 455                 return 0;
 456         }
 457         tsk->fs = __copy_fs_struct(current->fs);
 458         if (!tsk->fs)
 459                 return -1;
 460         return 0;
 461 }
 462
 463 static int count_open_files(struct files_struct *files, int size)
 464 {
 465         int i;
 466
 467         /* Find the last open fd */
 468         for (i = size/(8*sizeof(long)); i > 0; ) {
 469                 if (files->open_fds->fds_bits[--i])
 470                         break;
 471         }
 472         i = (i+1) * 8 * sizeof(long);
 473         return i;
 474 }
 475
 476 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 477 {
 478         struct files_struct *oldf, *newf;
 479         struct file **old_fds, **new_fds;
 480         int open_files, nfds, size, i, error = 0;
 481
 482         /*
 483          * A background process may not have any files ...
 484          */
 485         oldf = current->files;
 486         if (!oldf)
 487                 goto out;
 488
 489         if (clone_flags & CLONE_FILES) {
 490                 atomic_inc(&oldf->count);
 491                 goto out;
 492         }
 493
 494         tsk->files = NULL;
 495         error = -ENOMEM;
 496         newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
 497         if (!newf)
 498                 goto out;
 499
 500         atomic_set(&newf->count, 1);
 501
 502         newf->file_lock     = RW_LOCK_UNLOCKED;
 503         newf->next_fd       = 0;
 504         newf->max_fds       = NR_OPEN_DEFAULT;
 505         newf->max_fdset     = __FD_SETSIZE;
 506         newf->close_on_exec = &newf->close_on_exec_init;
 507         newf->open_fds      = &newf->open_fds_init;
 508         newf->fd            = &newf->fd_array[0];
 509
 510         /* We don't yet have the oldf readlock, but even if the old
 511            fdset gets grown now, we'll only copy up to "size" fds */
 512         size = oldf->max_fdset;
 513         if (size > __FD_SETSIZE) {
 514                 newf->max_fdset = 0;
 515                 write_lock(&newf->file_lock);
 516                 error = expand_fdset(newf, size);
 517                 write_unlock(&newf->file_lock);
 518                 if (error)
 519                         goto out_release;
 520         }
 521         read_lock(&oldf->file_lock);
 522
 523         open_files = count_open_files(oldf, size);
 524
 525         /*
 526          * Check whether we need to allocate a larger fd array.
 527          * Note: we're not a clone task, so the open count won't
 528          * change.
 529          */
 530         nfds = NR_OPEN_DEFAULT;
 531         if (open_files > nfds) {
 532                 read_unlock(&oldf->file_lock);
 533                 newf->max_fds = 0;
 534                 write_lock(&newf->file_lock);
 535                 error = expand_fd_array(newf, open_files);
 536                 write_unlock(&newf->file_lock);
 537                 if (error)
 538                         goto out_release;
 539                 nfds = newf->max_fds;
 540                 read_lock(&oldf->file_lock);
 541         }
 542
 543         old_fds = oldf->fd;
 544         new_fds = newf->fd;
 545
 546         memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
 547         memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
 548
 549         for (i = open_files; i != 0; i--) {
 550                 struct file *f = *old_fds++;
 551                 if (f)
 552                         get_file(f);
 553                 *new_fds++ = f;
 554         }
 555         read_unlock(&oldf->file_lock);
 556
 557         /* compute the remainder to be cleared */
 558         size = (newf->max_fds - open_files) * sizeof(struct file *);
 559
 560         /* This is long word aligned thus could use a optimized version */
 561         memset(new_fds, 0, size);
 562
 563         if (newf->max_fdset > open_files) {
 564                 int left = (newf->max_fdset-open_files)/8;
 565                 int start = open_files / (8 * sizeof(unsigned long));
 566
 567                 memset(&newf->open_fds->fds_bits[start], 0, left);
 568                 memset(&newf->close_on_exec->fds_bits[start], 0, left);
 569         }
 570
 571         tsk->files = newf;
 572         error = 0;
 573 out:
 574         return error;
 575
 576 out_release:
 577         free_fdset (newf->close_on_exec, newf->max_fdset);
 578         free_fdset (newf->open_fds, newf->max_fdset);
 579         kmem_cache_free(files_cachep, newf);
 580         goto out;
 581 }
 582
 583 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 584 {
 585         if (clone_flags & CLONE_SIGHAND) {
 586                 atomic_inc(&current->sig->count);
 587                 return 0;
 588         }
 589         tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
 590         if (!tsk->sig)
 591                 return -1;
 592         spin_lock_init(&tsk->sig->siglock);
 593         atomic_set(&tsk->sig->count, 1);
 594         memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
 595         return 0;
 596 }
 597
 598 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 599 {
 600         unsigned long new_flags = p->flags;
 601
 602         new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU | PF_VFORK);
 603         new_flags |= PF_FORKNOEXEC;
 604         if (!(clone_flags & CLONE_PTRACE))
 605                 new_flags &= ~(PF_PTRACED|PF_TRACESYS);
 606         if (clone_flags & CLONE_VFORK)
 607                 new_flags |= PF_VFORK;
 608         p->flags = new_flags;
 609 }
 610
 611 /*
 612  *  Ok, this is the main fork-routine. It copies the system process
 613  * information (task[nr]) and sets up the necessary registers. It
 614  * also copies the data segment in its entirety.
 615  */
 616 int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 617 {
 618         int retval = -ENOMEM;
 619         struct task_struct *p;
 620         DECLARE_MUTEX_LOCKED(sem);
 621
 622         if (clone_flags & CLONE_PID) {
 623                 /* This is only allowed from the boot up thread */
 624                 if (current->pid)
 625                         return -EPERM;
 626         }
 627
 628         current->vfork_sem = &sem;
 629
 630         p = alloc_task_struct();
 631         if (!p)
 632                 goto fork_out;
 633
 634         *p = *current;
 635
 636         lock_kernel();
 637
 638         retval = -EAGAIN;
 639         if (p->user) {
 640                 if (atomic_read(&p->user->count) >= p->rlim[RLIMIT_NPROC].rlim_cur)
 641                         goto bad_fork_free;
 642                 atomic_inc(&p->user->count);
 643         }
 644
 645         /*
 646          * Counter increases are protected by
 647          * the kernel lock so nr_threads can't
 648          * increase under us (but it may decrease).
 649          */
 650         if (nr_threads >= max_threads)
 651                 goto bad_fork_cleanup_count;
 652
 653         if (p->exec_domain && p->exec_domain->module)
 654                 __MOD_INC_USE_COUNT(p->exec_domain->module);
 655         if (p->binfmt && p->binfmt->module)
 656                 __MOD_INC_USE_COUNT(p->binfmt->module);
 657
 658         p->did_exec = 0;
 659         p->swappable = 0;
 660         p->state = TASK_UNINTERRUPTIBLE;
 661
 662         copy_flags(clone_flags, p);
 663         p->pid = get_pid(clone_flags);
 664
 665         /*
 666          * This is a "shadow run" state. The process
 667          * is marked runnable, but isn't actually on
 668          * any run queue yet.. (that happens at the
 669          * very end).
 670          */
 671         p->state = TASK_RUNNING;
 672         p->run_list.next = NULL;
 673         p->run_list.prev = NULL;
 674
 675         if ((clone_flags & CLONE_VFORK) || !(clone_flags & CLONE_PARENT)) {
 676                 p->p_opptr = current;
 677                 if (!(current->flags & PF_PTRACED))
 678                         p->p_pptr = current;
 679         }
 680         p->p_cptr = NULL;
 681         init_waitqueue_head(&p->wait_chldexit);
 682         p->vfork_sem = NULL;
 683         spin_lock_init(&p->alloc_lock);
 684
 685         p->sigpending = 0;
 686         sigemptyset(&p->signal);
 687         p->sigqueue = NULL;
 688         p->sigqueue_tail = &p->sigqueue;
 689
 690         p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
 691         p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
 692         init_timer(&p->real_timer);
 693         p->real_timer.data = (unsigned long) p;
 694
 695         p->leader = 0;          /* session leadership doesn't inherit */
 696         p->tty_old_pgrp = 0;
 697         p->times.tms_utime = p->times.tms_stime = 0;
 698         p->times.tms_cutime = p->times.tms_cstime = 0;
 699 #ifdef CONFIG_SMP
 700         {
 701                 int i;
 702                 p->has_cpu = 0;
 703                 p->processor = current->processor;
 704                 /* ?? should we just memset this ?? */
 705                 for(i = 0; i < smp_num_cpus; i++)
 706                         p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
 707                 spin_lock_init(&p->sigmask_lock);
 708         }
 709 #endif
 710         p->lock_depth = -1;             /* -1 = no lock */
 711         p->start_time = jiffies;
 712
 713         retval = -ENOMEM;
 714         /* copy all the process information */
 715         if (copy_files(clone_flags, p))
 716                 goto bad_fork_cleanup;
 717         if (copy_fs(clone_flags, p))
 718                 goto bad_fork_cleanup_files;
 719         if (copy_sighand(clone_flags, p))
 720                 goto bad_fork_cleanup_fs;
 721         if (copy_mm(clone_flags, p))
 722                 goto bad_fork_cleanup_sighand;
 723         retval = copy_thread(0, clone_flags, usp, p, regs);
 724         if (retval)
 725                 goto bad_fork_cleanup_sighand;
 726         p->semundo = NULL;
 727
 728         /* Our parent execution domain becomes current domain
 729            These must match for thread signalling to apply */
 730
 731         p->parent_exec_id = p->self_exec_id;
 732
 733         /* ok, now we should be set up.. */
 734         p->swappable = 1;
 735         p->exit_signal = clone_flags & CSIGNAL;
 736         p->pdeath_signal = 0;
 737
 738         /*
 739          * "share" dynamic priority between parent and child, thus the
 740          * total amount of dynamic priorities in the system doesnt change,
 741          * more scheduling fairness. This is only important in the first
 742          * timeslice, on the long run the scheduling behaviour is unchanged.
 743          */
 744         p->counter = (current->counter + 1) >> 1;
 745         current->counter >>= 1;
 746         if (!current->counter)
 747                 current->need_resched = 1;
 748
 749         /*
 750          * Ok, add it to the run-queues and make it
 751          * visible to the rest of the system.
 752          *
 753          * Let it rip!
 754          */
 755         retval = p->pid;
 756         write_lock_irq(&tasklist_lock);
 757         SET_LINKS(p);
 758         hash_pid(p);
 759         nr_threads++;
 760         write_unlock_irq(&tasklist_lock);
 761
 762         wake_up_process(p);             /* do this last */
 763         ++total_forks;
 764
 765 bad_fork:
 766         unlock_kernel();
 767 fork_out:
 768         if ((clone_flags & CLONE_VFORK) && (retval > 0))
 769                 down(&sem);
 770         return retval;
 771
 772 bad_fork_cleanup_sighand:
 773         exit_sighand(p);
 774 bad_fork_cleanup_fs:
 775         exit_fs(p); /* blocking */
 776 bad_fork_cleanup_files:
 777         exit_files(p); /* blocking */
 778 bad_fork_cleanup:
 779         put_exec_domain(p->exec_domain);
 780         if (p->binfmt && p->binfmt->module)
 781                 __MOD_DEC_USE_COUNT(p->binfmt->module);
 782 bad_fork_cleanup_count:
 783         if (p->user)
 784                 free_uid(p);
 785 bad_fork_free:
 786         free_task_struct(p);
 787         goto bad_fork;
 788 }
 789
 790 void __init filescache_init(void)
 791 {
 792         files_cachep = kmem_cache_create("files_cache",
 793                                          sizeof(struct files_struct),
 794                                          0,
 795                                          SLAB_HWCACHE_ALIGN,
 796                                          NULL, NULL);
 797         if (!files_cachep)
 798                 panic("Cannot create files cache");
 799 }