kernel/fork.c

   1 /*
   2  *  linux/kernel/fork.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  *  'fork.c' contains the help-routines for the 'fork' system call
   9  * (see also entry.S and others).
  10  * Fork is rather simple, once you get the hang of it, but the memory
  11  * management can be a bitch. See 'mm/memory.c': 'copy_page_tables()'
  12  */
  13
  14 #include <linux/config.h>
  15 #include <linux/malloc.h>
  16 #include <linux/init.h>
  17 #include <linux/unistd.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/module.h>
  20 #include <linux/vmalloc.h>
  21
  22 #include <asm/pgtable.h>
  23 #include <asm/pgalloc.h>
  24 #include <asm/uaccess.h>
  25 #include <asm/mmu_context.h>
  26
  27 /* The idle threads do not count.. */
  28 int nr_threads;
  29 int nr_running;
  30
  31 int max_threads;
  32 unsigned long total_forks;      /* Handle normal Linux uptimes. */
  33 int last_pid;
  34
  35 /* SLAB cache for mm_struct's. */
  36 kmem_cache_t *mm_cachep;
  37
  38 /* SLAB cache for files structs */
  39 kmem_cache_t *files_cachep;
  40
  41 struct task_struct *pidhash[PIDHASH_SZ];
  42
  43 /* UID task count cache, to prevent walking entire process list every
  44  * single fork() operation.
  45  */
  46 #define UIDHASH_SZ      (PIDHASH_SZ >> 2)
  47
  48 static struct user_struct {
  49         atomic_t count;
  50         struct user_struct *next, **pprev;
  51         unsigned int uid;
  52 } *uidhash[UIDHASH_SZ];
  53
  54 spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
  55
  56 kmem_cache_t *uid_cachep;
  57
  58 #define uidhashfn(uid)  (((uid >> 8) ^ uid) & (UIDHASH_SZ - 1))
  59
  60 /*
  61  * These routines must be called with the uidhash spinlock held!
  62  */
  63 static inline void uid_hash_insert(struct user_struct *up, unsigned int hashent)
  64 {
  65         if((up->next = uidhash[hashent]) != NULL)
  66                 uidhash[hashent]->pprev = &up->next;
  67         up->pprev = &uidhash[hashent];
  68         uidhash[hashent] = up;
  69 }
  70
  71 static inline void uid_hash_remove(struct user_struct *up)
  72 {
  73         if(up->next)
  74                 up->next->pprev = up->pprev;
  75         *up->pprev = up->next;
  76 }
  77
  78 static inline struct user_struct *uid_hash_find(unsigned short uid, unsigned int hashent)
  79 {
  80         struct user_struct *up, *next;
  81
  82         next = uidhash[hashent];
  83         for (;;) {
  84                 up = next;
  85                 if (next) {
  86                         next = up->next;
  87                         if (up->uid != uid)
  88                                 continue;
  89                         atomic_inc(&up->count);
  90                 }
  91                 break;
  92         }
  93         return up;
  94 }
  95
  96 /*
  97  * For SMP, we need to re-test the user struct counter
  98  * after having aquired the spinlock. This allows us to do
  99  * the common case (not freeing anything) without having
 100  * any locking.
 101  */
 102 #ifdef CONFIG_SMP
 103   #define uid_hash_free(up)     (!atomic_read(&(up)->count))
 104 #else
 105   #define uid_hash_free(up)     (1)
 106 #endif
 107
 108 void free_uid(struct task_struct *p)
 109 {
 110         struct user_struct *up = p->user;
 111
 112         if (up) {
 113                 p->user = NULL;
 114                 if (atomic_dec_and_test(&up->count)) {
 115                         spin_lock(&uidhash_lock);
 116                         if (uid_hash_free(up)) {
 117                                 uid_hash_remove(up);
 118                                 kmem_cache_free(uid_cachep, up);
 119                         }
 120                         spin_unlock(&uidhash_lock);
 121                 }
 122         }
 123 }
 124
 125 int alloc_uid(struct task_struct *p)
 126 {
 127         unsigned int hashent = uidhashfn(p->uid);
 128         struct user_struct *up;
 129
 130         spin_lock(&uidhash_lock);
 131         up = uid_hash_find(p->uid, hashent);
 132         spin_unlock(&uidhash_lock);
 133
 134         if (!up) {
 135                 struct user_struct *new;
 136
 137                 new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
 138                 if (!new)
 139                         return -EAGAIN;
 140                 new->uid = p->uid;
 141                 atomic_set(&new->count, 1);
 142
 143                 /*
 144                  * Before adding this, check whether we raced
 145                  * on adding the same user already..
 146                  */
 147                 spin_lock(&uidhash_lock);
 148                 up = uid_hash_find(p->uid, hashent);
 149                 if (up) {
 150                         kmem_cache_free(uid_cachep, new);
 151                 } else {
 152                         uid_hash_insert(new, hashent);
 153                         up = new;
 154                 }
 155                 spin_unlock(&uidhash_lock);
 156
 157         }
 158         p->user = up;
 159         return 0;
 160 }
 161
 162 void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
 163 {
 164         unsigned long flags;
 165
 166         wq_write_lock_irqsave(&q->lock, flags);
 167         __add_wait_queue(q, wait);
 168         wq_write_unlock_irqrestore(&q->lock, flags);
 169 }
 170
 171 void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
 172 {
 173         unsigned long flags;
 174
 175         wq_write_lock_irqsave(&q->lock, flags);
 176         __add_wait_queue_tail(q, wait);
 177         wq_write_unlock_irqrestore(&q->lock, flags);
 178 }
 179
 180 void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
 181 {
 182         unsigned long flags;
 183
 184         wq_write_lock_irqsave(&q->lock, flags);
 185         __remove_wait_queue(q, wait);
 186         wq_write_unlock_irqrestore(&q->lock, flags);
 187 }
 188
 189 void __init fork_init(unsigned long mempages)
 190 {
 191         int i;
 192
 193         uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
 194                                        0,
 195                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 196         if(!uid_cachep)
 197                 panic("Cannot create uid taskcount SLAB cache\n");
 198
 199         for(i = 0; i < UIDHASH_SZ; i++)
 200                 uidhash[i] = 0;
 201
 202         /*
 203          * The default maximum number of threads is set to a safe
 204          * value: the thread structures can take up at most half
 205          * of memory.
 206          */
 207         max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
 208
 209         init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
 210         init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 211 }
 212
 213 /* Protects next_safe and last_pid. */
 214 spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
 215
 216 static int get_pid(unsigned long flags)
 217 {
 218         static int next_safe = PID_MAX;
 219         struct task_struct *p;
 220
 221         if (flags & CLONE_PID)
 222                 return current->pid;
 223
 224         spin_lock(&lastpid_lock);
 225         if((++last_pid) & 0xffff8000) {
 226                 last_pid = 300;         /* Skip daemons etc. */
 227                 goto inside;
 228         }
 229         if(last_pid >= next_safe) {
 230 inside:
 231                 next_safe = PID_MAX;
 232                 read_lock(&tasklist_lock);
 233         repeat:
 234                 for_each_task(p) {
 235                         if(p->pid == last_pid   ||
 236                            p->pgrp == last_pid  ||
 237                            p->session == last_pid) {
 238                                 if(++last_pid >= next_safe) {
 239                                         if(last_pid & 0xffff8000)
 240                                                 last_pid = 300;
 241                                         next_safe = PID_MAX;
 242                                 }
 243                                 goto repeat;
 244                         }
 245                         if(p->pid > last_pid && next_safe > p->pid)
 246                                 next_safe = p->pid;
 247                         if(p->pgrp > last_pid && next_safe > p->pgrp)
 248                                 next_safe = p->pgrp;
 249                         if(p->session > last_pid && next_safe > p->session)
 250                                 next_safe = p->session;
 251                 }
 252                 read_unlock(&tasklist_lock);
 253         }
 254         spin_unlock(&lastpid_lock);
 255
 256         return last_pid;
 257 }
 258
 259 static inline int dup_mmap(struct mm_struct * mm)
 260 {
 261         struct vm_area_struct * mpnt, *tmp, **pprev;
 262         int retval;
 263
 264         /* Kill me slowly. UGLY! FIXME! */
 265         memcpy(&mm->start_code, &current->mm->start_code, 15*sizeof(unsigned long));
 266
 267         flush_cache_mm(current->mm);
 268         pprev = &mm->mmap;
 269         for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
 270                 struct file *file;
 271
 272                 retval = -ENOMEM;
 273                 if(mpnt->vm_flags & VM_DONTCOPY)
 274                         continue;
 275                 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 276                 if (!tmp)
 277                         goto fail_nomem;
 278                 *tmp = *mpnt;
 279                 tmp->vm_flags &= ~VM_LOCKED;
 280                 tmp->vm_mm = mm;
 281                 mm->map_count++;
 282                 tmp->vm_next = NULL;
 283                 file = tmp->vm_file;
 284                 if (file) {
 285                         struct inode *inode = file->f_dentry->d_inode;
 286                         get_file(file);
 287                         if (tmp->vm_flags & VM_DENYWRITE)
 288                                 atomic_dec(&inode->i_writecount);
 289
 290                         /* insert tmp into the share list, just after mpnt */
 291                         spin_lock(&inode->i_mapping->i_shared_lock);
 292                         if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
 293                                 mpnt->vm_next_share->vm_pprev_share =
 294                                         &tmp->vm_next_share;
 295                         mpnt->vm_next_share = tmp;
 296                         tmp->vm_pprev_share = &mpnt->vm_next_share;
 297                         spin_unlock(&inode->i_mapping->i_shared_lock);
 298                 }
 299
 300                 /* Copy the pages, but defer checking for errors */
 301                 retval = copy_page_range(mm, current->mm, tmp);
 302                 if (!retval && tmp->vm_ops && tmp->vm_ops->open)
 303                         tmp->vm_ops->open(tmp);
 304
 305                 /*
 306                  * Link in the new vma even if an error occurred,
 307                  * so that exit_mmap() can clean up the mess.
 308                  */
 309                 tmp->vm_next = *pprev;
 310                 *pprev = tmp;
 311
 312                 pprev = &tmp->vm_next;
 313                 if (retval)
 314                         goto fail_nomem;
 315         }
 316         retval = 0;
 317         if (mm->map_count >= AVL_MIN_MAP_COUNT)
 318                 build_mmap_avl(mm);
 319
 320 fail_nomem:
 321         flush_tlb_mm(current->mm);
 322         return retval;
 323 }
 324
 325 /*
 326  * Allocate and initialize an mm_struct.
 327  */
 328 struct mm_struct * mm_alloc(void)
 329 {
 330         struct mm_struct * mm;
 331
 332         mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
 333         if (mm) {
 334                 memset(mm, 0, sizeof(*mm));
 335                 atomic_set(&mm->mm_users, 1);
 336                 atomic_set(&mm->mm_count, 1);
 337                 init_MUTEX(&mm->mmap_sem);
 338                 mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 339                 mm->pgd = pgd_alloc();
 340                 if (mm->pgd)
 341                         return mm;
 342                 kmem_cache_free(mm_cachep, mm);
 343         }
 344         return NULL;
 345 }
 346
 347 /*
 348  * Called when the last reference to the mm
 349  * is dropped: either by a lazy thread or by
 350  * mmput. Free the page directory and the mm.
 351  */
 352 inline void __mmdrop(struct mm_struct *mm)
 353 {
 354         if (mm == &init_mm) BUG();
 355         pgd_free(mm->pgd);
 356         destroy_context(mm);
 357         kmem_cache_free(mm_cachep, mm);
 358 }
 359
 360 /*
 361  * Decrement the use count and release all resources for an mm.
 362  */
 363 void mmput(struct mm_struct *mm)
 364 {
 365         if (atomic_dec_and_test(&mm->mm_users)) {
 366                 exit_mmap(mm);
 367                 mmdrop(mm);
 368         }
 369 }
 370
 371 /* Please note the differences between mmput and mm_release.
 372  * mmput is called whenever we stop holding onto a mm_struct,
 373  * error success whatever.
 374  *
 375  * mm_release is called after a mm_struct has been removed
 376  * from the current process.
 377  *
 378  * This difference is important for error handling, when we
 379  * only half set up a mm_struct for a new process and need to restore
 380  * the old one.  Because we mmput the new mm_struct before
 381  * restoring the old one. . .
 382  * Eric Biederman 10 January 1998
 383  */
 384 void mm_release(void)
 385 {
 386         struct task_struct *tsk = current;
 387
 388         /* notify parent sleeping on vfork() */
 389         if (tsk->flags & PF_VFORK) {
 390                 tsk->flags &= ~PF_VFORK;
 391                 up(tsk->p_opptr->vfork_sem);
 392         }
 393 }
 394
 395 static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 396 {
 397         struct mm_struct * mm;
 398         int retval;
 399
 400         tsk->min_flt = tsk->maj_flt = 0;
 401         tsk->cmin_flt = tsk->cmaj_flt = 0;
 402         tsk->nswap = tsk->cnswap = 0;
 403
 404         tsk->mm = NULL;
 405         tsk->active_mm = NULL;
 406
 407         /*
 408          * Are we cloning a kernel thread?
 409          *
 410          * We need to steal a active VM for that..
 411          */
 412         mm = current->mm;
 413         if (!mm)
 414                 return 0;
 415
 416         if (clone_flags & CLONE_VM) {
 417                 atomic_inc(&mm->mm_users);
 418                 goto good_mm;
 419         }
 420
 421         retval = -ENOMEM;
 422         mm = mm_alloc();
 423         if (!mm)
 424                 goto fail_nomem;
 425
 426         tsk->mm = mm;
 427         tsk->active_mm = mm;
 428
 429         /*
 430          * child gets a private LDT (if there was an LDT in the parent)
 431          */
 432         copy_segments(tsk, mm);
 433
 434         down(&current->mm->mmap_sem);
 435         retval = dup_mmap(mm);
 436         up(&current->mm->mmap_sem);
 437         if (retval)
 438                 goto free_pt;
 439
 440 good_mm:
 441         tsk->mm = mm;
 442         tsk->active_mm = mm;
 443         init_new_context(tsk,mm);
 444         return 0;
 445
 446 free_pt:
 447         mmput(mm);
 448 fail_nomem:
 449         return retval;
 450 }
 451
 452 static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 453 {
 454         struct fs_struct *fs = kmalloc(sizeof(*old), GFP_KERNEL);
 455         /* We don't need to lock fs - think why ;-) */
 456         if (fs) {
 457                 atomic_set(&fs->count, 1);
 458                 fs->lock = RW_LOCK_UNLOCKED;
 459                 fs->umask = old->umask;
 460                 read_lock(&old->lock);
 461                 fs->rootmnt = mntget(old->rootmnt);
 462                 fs->root = dget(old->root);
 463                 fs->pwdmnt = mntget(old->pwdmnt);
 464                 fs->pwd = dget(old->pwd);
 465                 if (old->altroot) {
 466                         fs->altrootmnt = mntget(old->altrootmnt);
 467                         fs->altroot = dget(old->altroot);
 468                 } else {
 469                         fs->altrootmnt = NULL;
 470                         fs->altroot = NULL;
 471                 }
 472                 read_unlock(&old->lock);
 473         }
 474         return fs;
 475 }
 476
 477 struct fs_struct *copy_fs_struct(struct fs_struct *old)
 478 {
 479         return __copy_fs_struct(old);
 480 }
 481
 482 static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 483 {
 484         if (clone_flags & CLONE_FS) {
 485                 atomic_inc(&current->fs->count);
 486                 return 0;
 487         }
 488         tsk->fs = __copy_fs_struct(current->fs);
 489         if (!tsk->fs)
 490                 return -1;
 491         return 0;
 492 }
 493
 494 static int count_open_files(struct files_struct *files, int size)
 495 {
 496         int i;
 497
 498         /* Find the last open fd */
 499         for (i = size/(8*sizeof(long)); i > 0; ) {
 500                 if (files->open_fds->fds_bits[--i])
 501                         break;
 502         }
 503         i = (i+1) * 8 * sizeof(long);
 504         return i;
 505 }
 506
 507 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 508 {
 509         struct files_struct *oldf, *newf;
 510         struct file **old_fds, **new_fds;
 511         int open_files, nfds, size, i, error = 0;
 512
 513         /*
 514          * A background process may not have any files ...
 515          */
 516         oldf = current->files;
 517         if (!oldf)
 518                 goto out;
 519
 520         if (clone_flags & CLONE_FILES) {
 521                 atomic_inc(&oldf->count);
 522                 goto out;
 523         }
 524
 525         tsk->files = NULL;
 526         error = -ENOMEM;
 527         newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
 528         if (!newf)
 529                 goto out;
 530
 531         atomic_set(&newf->count, 1);
 532
 533         newf->file_lock     = RW_LOCK_UNLOCKED;
 534         newf->next_fd       = 0;
 535         newf->max_fds       = NR_OPEN_DEFAULT;
 536         newf->max_fdset     = __FD_SETSIZE;
 537         newf->close_on_exec = &newf->close_on_exec_init;
 538         newf->open_fds      = &newf->open_fds_init;
 539         newf->fd            = &newf->fd_array[0];
 540
 541         /* We don't yet have the oldf readlock, but even if the old
 542            fdset gets grown now, we'll only copy up to "size" fds */
 543         size = oldf->max_fdset;
 544         if (size > __FD_SETSIZE) {
 545                 newf->max_fdset = 0;
 546                 write_lock(&newf->file_lock);
 547                 error = expand_fdset(newf, size);
 548                 write_unlock(&newf->file_lock);
 549                 if (error)
 550                         goto out_release;
 551         }
 552         read_lock(&oldf->file_lock);
 553
 554         open_files = count_open_files(oldf, size);
 555
 556         /*
 557          * Check whether we need to allocate a larger fd array.
 558          * Note: we're not a clone task, so the open count won't
 559          * change.
 560          */
 561         nfds = NR_OPEN_DEFAULT;
 562         if (open_files > nfds) {
 563                 read_unlock(&oldf->file_lock);
 564                 newf->max_fds = 0;
 565                 write_lock(&newf->file_lock);
 566                 error = expand_fd_array(newf, open_files);
 567                 write_unlock(&newf->file_lock);
 568                 if (error)
 569                         goto out_release;
 570                 nfds = newf->max_fds;
 571                 read_lock(&oldf->file_lock);
 572         }
 573
 574         old_fds = oldf->fd;
 575         new_fds = newf->fd;
 576
 577         memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
 578         memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
 579
 580         for (i = open_files; i != 0; i--) {
 581                 struct file *f = *old_fds++;
 582                 if (f)
 583                         get_file(f);
 584                 *new_fds++ = f;
 585         }
 586         read_unlock(&oldf->file_lock);
 587
 588         /* compute the remainder to be cleared */
 589         size = (newf->max_fds - open_files) * sizeof(struct file *);
 590
 591         /* This is long word aligned thus could use a optimized version */
 592         memset(new_fds, 0, size);
 593
 594         if (newf->max_fdset > open_files) {
 595                 int left = (newf->max_fdset-open_files)/8;
 596                 int start = open_files / (8 * sizeof(unsigned long));
 597
 598                 memset(&newf->open_fds->fds_bits[start], 0, left);
 599                 memset(&newf->close_on_exec->fds_bits[start], 0, left);
 600         }
 601
 602         tsk->files = newf;
 603         error = 0;
 604 out:
 605         return error;
 606
 607 out_release:
 608         free_fdset (newf->close_on_exec, newf->max_fdset);
 609         free_fdset (newf->open_fds, newf->max_fdset);
 610         kmem_cache_free(files_cachep, newf);
 611         goto out;
 612 }
 613
 614 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 615 {
 616         if (clone_flags & CLONE_SIGHAND) {
 617                 atomic_inc(&current->sig->count);
 618                 return 0;
 619         }
 620         tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
 621         if (!tsk->sig)
 622                 return -1;
 623         spin_lock_init(&tsk->sig->siglock);
 624         atomic_set(&tsk->sig->count, 1);
 625         memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
 626         return 0;
 627 }
 628
 629 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 630 {
 631         unsigned long new_flags = p->flags;
 632
 633         new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU | PF_VFORK);
 634         new_flags |= PF_FORKNOEXEC;
 635         if (!(clone_flags & CLONE_PTRACE))
 636                 p->ptrace = 0;
 637         if (clone_flags & CLONE_VFORK)
 638                 new_flags |= PF_VFORK;
 639         p->flags = new_flags;
 640 }
 641
 642 /*
 643  *  Ok, this is the main fork-routine. It copies the system process
 644  * information (task[nr]) and sets up the necessary registers. It
 645  * also copies the data segment in its entirety.
 646  */
 647 int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 648 {
 649         int retval = -ENOMEM;
 650         struct task_struct *p;
 651         DECLARE_MUTEX_LOCKED(sem);
 652
 653         if (clone_flags & CLONE_PID) {
 654                 /* This is only allowed from the boot up thread */
 655                 if (current->pid)
 656                         return -EPERM;
 657         }
 658
 659         current->vfork_sem = &sem;
 660
 661         p = alloc_task_struct();
 662         if (!p)
 663                 goto fork_out;
 664
 665         *p = *current;
 666
 667         lock_kernel();
 668
 669         retval = -EAGAIN;
 670         if (p->user) {
 671                 if (atomic_read(&p->user->count) >= p->rlim[RLIMIT_NPROC].rlim_cur)
 672                         goto bad_fork_free;
 673                 atomic_inc(&p->user->count);
 674         }
 675
 676         /*
 677          * Counter increases are protected by
 678          * the kernel lock so nr_threads can't
 679          * increase under us (but it may decrease).
 680          */
 681         if (nr_threads >= max_threads)
 682                 goto bad_fork_cleanup_count;
 683
 684         if (p->exec_domain && p->exec_domain->module)
 685                 __MOD_INC_USE_COUNT(p->exec_domain->module);
 686         if (p->binfmt && p->binfmt->module)
 687                 __MOD_INC_USE_COUNT(p->binfmt->module);
 688
 689         p->did_exec = 0;
 690         p->swappable = 0;
 691         p->state = TASK_UNINTERRUPTIBLE;
 692
 693         copy_flags(clone_flags, p);
 694         p->pid = get_pid(clone_flags);
 695
 696         p->run_list.next = NULL;
 697         p->run_list.prev = NULL;
 698
 699         if ((clone_flags & CLONE_VFORK) || !(clone_flags & CLONE_PARENT)) {
 700                 p->p_opptr = current;
 701                 if (!(p->ptrace & PT_PTRACED))
 702                         p->p_pptr = current;
 703         }
 704         p->p_cptr = NULL;
 705         init_waitqueue_head(&p->wait_chldexit);
 706         p->vfork_sem = NULL;
 707         spin_lock_init(&p->alloc_lock);
 708
 709         p->sigpending = 0;
 710         sigemptyset(&p->signal);
 711         p->sigqueue = NULL;
 712         p->sigqueue_tail = &p->sigqueue;
 713
 714         p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
 715         p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
 716         init_timer(&p->real_timer);
 717         p->real_timer.data = (unsigned long) p;
 718
 719         p->leader = 0;          /* session leadership doesn't inherit */
 720         p->tty_old_pgrp = 0;
 721         p->times.tms_utime = p->times.tms_stime = 0;
 722         p->times.tms_cutime = p->times.tms_cstime = 0;
 723 #ifdef CONFIG_SMP
 724         {
 725                 int i;
 726                 p->has_cpu = 0;
 727                 p->processor = current->processor;
 728                 /* ?? should we just memset this ?? */
 729                 for(i = 0; i < smp_num_cpus; i++)
 730                         p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
 731                 spin_lock_init(&p->sigmask_lock);
 732         }
 733 #endif
 734         p->lock_depth = -1;             /* -1 = no lock */
 735         p->start_time = jiffies;
 736
 737         retval = -ENOMEM;
 738         /* copy all the process information */
 739         if (copy_files(clone_flags, p))
 740                 goto bad_fork_cleanup;
 741         if (copy_fs(clone_flags, p))
 742                 goto bad_fork_cleanup_files;
 743         if (copy_sighand(clone_flags, p))
 744                 goto bad_fork_cleanup_fs;
 745         if (copy_mm(clone_flags, p))
 746                 goto bad_fork_cleanup_sighand;
 747         retval = copy_thread(0, clone_flags, usp, p, regs);
 748         if (retval)
 749                 goto bad_fork_cleanup_sighand;
 750         p->semundo = NULL;
 751
 752         /* Our parent execution domain becomes current domain
 753            These must match for thread signalling to apply */
 754
 755         p->parent_exec_id = p->self_exec_id;
 756
 757         /* ok, now we should be set up.. */
 758         p->swappable = 1;
 759         p->exit_signal = clone_flags & CSIGNAL;
 760         p->pdeath_signal = 0;
 761
 762         /*
 763          * "share" dynamic priority between parent and child, thus the
 764          * total amount of dynamic priorities in the system doesnt change,
 765          * more scheduling fairness. This is only important in the first
 766          * timeslice, on the long run the scheduling behaviour is unchanged.
 767          */
 768         p->counter = (current->counter + 1) >> 1;
 769         current->counter >>= 1;
 770         if (!current->counter)
 771                 current->need_resched = 1;
 772
 773         /*
 774          * Ok, add it to the run-queues and make it
 775          * visible to the rest of the system.
 776          *
 777          * Let it rip!
 778          */
 779         retval = p->pid;
 780         write_lock_irq(&tasklist_lock);
 781         SET_LINKS(p);
 782         hash_pid(p);
 783         nr_threads++;
 784         write_unlock_irq(&tasklist_lock);
 785
 786         wake_up_process(p);             /* do this last */
 787         ++total_forks;
 788
 789 bad_fork:
 790         unlock_kernel();
 791 fork_out:
 792         if ((clone_flags & CLONE_VFORK) && (retval > 0))
 793                 down(&sem);
 794         return retval;
 795
 796 bad_fork_cleanup_sighand:
 797         exit_sighand(p);
 798 bad_fork_cleanup_fs:
 799         exit_fs(p); /* blocking */
 800 bad_fork_cleanup_files:
 801         exit_files(p); /* blocking */
 802 bad_fork_cleanup:
 803         put_exec_domain(p->exec_domain);
 804         if (p->binfmt && p->binfmt->module)
 805                 __MOD_DEC_USE_COUNT(p->binfmt->module);
 806 bad_fork_cleanup_count:
 807         if (p->user)
 808                 free_uid(p);
 809 bad_fork_free:
 810         free_task_struct(p);
 811         goto bad_fork;
 812 }
 813
 814 void __init filescache_init(void)
 815 {
 816         files_cachep = kmem_cache_create("files_cache",
 817                                          sizeof(struct files_struct),
 818                                          0,
 819                                          SLAB_HWCACHE_ALIGN,
 820                                          NULL, NULL);
 821         if (!files_cachep)
 822                 panic("Cannot create files cache");
 823 }