kernel/fork.c

   1 /*
   2  *  linux/kernel/fork.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  *  'fork.c' contains the help-routines for the 'fork' system call
   9  * (see also system_call.s).
  10  * Fork is rather simple, once you get the hang of it, but the memory
  11  * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()'
  12  */
  13
  14 #include <linux/malloc.h>
  15 #include <linux/init.h>
  16 #include <linux/unistd.h>
  17 #include <linux/smp_lock.h>
  18 #include <linux/module.h>
  19 #include <linux/vmalloc.h>
  20
  21 #include <asm/pgtable.h>
  22 #include <asm/mmu_context.h>
  23 #include <asm/uaccess.h>
  24
  25 /* The idle threads do not count.. */
  26 int nr_threads=0;
  27 int nr_running=0;
  28
  29 int max_threads;
  30 unsigned long total_forks = 0;  /* Handle normal Linux uptimes. */
  31 int last_pid=0;
  32
  33 /* SLAB cache for mm_struct's. */
  34 kmem_cache_t *mm_cachep;
  35
  36 /* SLAB cache for files structs */
  37 kmem_cache_t *files_cachep;
  38
  39 struct task_struct *pidhash[PIDHASH_SZ];
  40
  41 /* UID task count cache, to prevent walking entire process list every
  42  * single fork() operation.
  43  */
  44 #define UIDHASH_SZ      (PIDHASH_SZ >> 2)
  45
  46 static struct user_struct {
  47         atomic_t count;
  48         struct user_struct *next, **pprev;
  49         unsigned int uid;
  50 } *uidhash[UIDHASH_SZ];
  51
  52 spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
  53
  54 kmem_cache_t *uid_cachep;
  55
  56 #define uidhashfn(uid)  (((uid >> 8) ^ uid) & (UIDHASH_SZ - 1))
  57
  58 /*
  59  * These routines must be called with the uidhash spinlock held!
  60  */
  61 static inline void uid_hash_insert(struct user_struct *up, unsigned int hashent)
  62 {
  63         if((up->next = uidhash[hashent]) != NULL)
  64                 uidhash[hashent]->pprev = &up->next;
  65         up->pprev = &uidhash[hashent];
  66         uidhash[hashent] = up;
  67 }
  68
  69 static inline void uid_hash_remove(struct user_struct *up)
  70 {
  71         if(up->next)
  72                 up->next->pprev = up->pprev;
  73         *up->pprev = up->next;
  74 }
  75
  76 static inline struct user_struct *uid_hash_find(unsigned short uid, unsigned int hashent)
  77 {
  78         struct user_struct *up, *next;
  79
  80         next = uidhash[hashent];
  81         for (;;) {
  82                 up = next;
  83                 if (next) {
  84                         next = up->next;
  85                         if (up->uid != uid)
  86                                 continue;
  87                         atomic_inc(&up->count);
  88                 }
  89                 break;
  90         }
  91         return up;
  92 }
  93
  94 /*
  95  * For SMP, we need to re-test the user struct counter
  96  * after having aquired the spinlock. This allows us to do
  97  * the common case (not freeing anything) without having
  98  * any locking.
  99  */
 100 #ifdef __SMP__
 101   #define uid_hash_free(up)     (!atomic_read(&(up)->count))
 102 #else
 103   #define uid_hash_free(up)     (1)
 104 #endif
 105
 106 void free_uid(struct task_struct *p)
 107 {
 108         struct user_struct *up = p->user;
 109
 110         if (up) {
 111                 p->user = NULL;
 112                 if (atomic_dec_and_test(&up->count)) {
 113                         spin_lock(&uidhash_lock);
 114                         if (uid_hash_free(up)) {
 115                                 uid_hash_remove(up);
 116                                 kmem_cache_free(uid_cachep, up);
 117                         }
 118                         spin_unlock(&uidhash_lock);
 119                 }
 120         }
 121 }
 122
 123 int alloc_uid(struct task_struct *p)
 124 {
 125         unsigned int hashent = uidhashfn(p->uid);
 126         struct user_struct *up;
 127
 128         spin_lock(&uidhash_lock);
 129         up = uid_hash_find(p->uid, hashent);
 130         spin_unlock(&uidhash_lock);
 131
 132         if (!up) {
 133                 struct user_struct *new;
 134
 135                 new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
 136                 if (!new)
 137                         return -EAGAIN;
 138                 new->uid = p->uid;
 139                 atomic_set(&new->count, 1);
 140
 141                 /*
 142                  * Before adding this, check whether we raced
 143                  * on adding the same user already..
 144                  */
 145                 spin_lock(&uidhash_lock);
 146                 up = uid_hash_find(p->uid, hashent);
 147                 if (up) {
 148                         kmem_cache_free(uid_cachep, new);
 149                 } else {
 150                         uid_hash_insert(new, hashent);
 151                         up = new;
 152                 }
 153                 spin_unlock(&uidhash_lock);
 154
 155         }
 156         p->user = up;
 157         return 0;
 158 }
 159
 160 void __init fork_init(unsigned long memsize)
 161 {
 162         int i;
 163
 164         uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
 165                                        0,
 166                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
 167         if(!uid_cachep)
 168                 panic("Cannot create uid taskcount SLAB cache\n");
 169
 170         for(i = 0; i < UIDHASH_SZ; i++)
 171                 uidhash[i] = 0;
 172
 173         /*
 174          * The default maximum number of threads is set to a safe
 175          * value: the thread structures can take up at most half
 176          * of memory.
 177          */
 178         max_threads = memsize / THREAD_SIZE / 2;
 179
 180         init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
 181         init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 182 }
 183
 184 /* Protects next_safe and last_pid. */
 185 spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
 186
 187 static int get_pid(unsigned long flags)
 188 {
 189         static int next_safe = PID_MAX;
 190         struct task_struct *p;
 191
 192         if (flags & CLONE_PID)
 193                 return current->pid;
 194
 195         spin_lock(&lastpid_lock);
 196         if((++last_pid) & 0xffff8000) {
 197                 last_pid = 300;         /* Skip daemons etc. */
 198                 goto inside;
 199         }
 200         if(last_pid >= next_safe) {
 201 inside:
 202                 next_safe = PID_MAX;
 203                 read_lock(&tasklist_lock);
 204         repeat:
 205                 for_each_task(p) {
 206                         if(p->pid == last_pid   ||
 207                            p->pgrp == last_pid  ||
 208                            p->session == last_pid) {
 209                                 if(++last_pid >= next_safe) {
 210                                         if(last_pid & 0xffff8000)
 211                                                 last_pid = 300;
 212                                         next_safe = PID_MAX;
 213                                 }
 214                                 goto repeat;
 215                         }
 216                         if(p->pid > last_pid && next_safe > p->pid)
 217                                 next_safe = p->pid;
 218                         if(p->pgrp > last_pid && next_safe > p->pgrp)
 219                                 next_safe = p->pgrp;
 220                         if(p->session > last_pid && next_safe > p->session)
 221                                 next_safe = p->session;
 222                 }
 223                 read_unlock(&tasklist_lock);
 224         }
 225         spin_unlock(&lastpid_lock);
 226
 227         return last_pid;
 228 }
 229
 230 static inline int dup_mmap(struct mm_struct * mm)
 231 {
 232         struct vm_area_struct * mpnt, *tmp, **pprev;
 233         int retval;
 234
 235         /* Kill me slowly. UGLY! FIXME! */
 236         memcpy(&mm->start_code, &current->mm->start_code, 15*sizeof(unsigned long));
 237
 238         flush_cache_mm(current->mm);
 239         pprev = &mm->mmap;
 240         for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
 241                 struct file *file;
 242
 243                 retval = -ENOMEM;
 244                 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 245                 if (!tmp)
 246                         goto fail_nomem;
 247                 *tmp = *mpnt;
 248                 tmp->vm_flags &= ~VM_LOCKED;
 249                 tmp->vm_mm = mm;
 250                 mm->map_count++;
 251                 tmp->vm_next = NULL;
 252                 file = tmp->vm_file;
 253                 if (file) {
 254                         get_file(file);
 255                         if (tmp->vm_flags & VM_DENYWRITE)
 256                                 atomic_dec(&file->f_dentry->d_inode->i_writecount);
 257
 258                         /* insert tmp into the share list, just after mpnt */
 259                         spin_lock(&file->f_dentry->d_inode->i_shared_lock);
 260                         if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
 261                                 mpnt->vm_next_share->vm_pprev_share =
 262                                         &tmp->vm_next_share;
 263                         mpnt->vm_next_share = tmp;
 264                         tmp->vm_pprev_share = &mpnt->vm_next_share;
 265                         spin_unlock(&file->f_dentry->d_inode->i_shared_lock);
 266                 }
 267
 268                 /* Copy the pages, but defer checking for errors */
 269                 retval = copy_page_range(mm, current->mm, tmp);
 270                 if (!retval && tmp->vm_ops && tmp->vm_ops->open)
 271                         tmp->vm_ops->open(tmp);
 272
 273                 /*
 274                  * Link in the new vma even if an error occurred,
 275                  * so that exit_mmap() can clean up the mess.
 276                  */
 277                 tmp->vm_next = *pprev;
 278                 *pprev = tmp;
 279
 280                 pprev = &tmp->vm_next;
 281                 if (retval)
 282                         goto fail_nomem;
 283         }
 284         retval = 0;
 285         if (mm->map_count >= AVL_MIN_MAP_COUNT)
 286                 build_mmap_avl(mm);
 287
 288 fail_nomem:
 289         flush_tlb_mm(current->mm);
 290         return retval;
 291 }
 292
 293 /*
 294  * Allocate and initialize an mm_struct.
 295  */
 296 struct mm_struct * mm_alloc(void)
 297 {
 298         struct mm_struct * mm;
 299
 300         mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
 301         if (mm) {
 302                 memset(mm, 0, sizeof(*mm));
 303                 atomic_set(&mm->mm_users, 1);
 304                 atomic_set(&mm->mm_count, 1);
 305                 init_MUTEX(&mm->mmap_sem);
 306                 mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 307                 mm->pgd = pgd_alloc();
 308                 if (mm->pgd)
 309                         return mm;
 310                 kmem_cache_free(mm_cachep, mm);
 311         }
 312         return NULL;
 313 }
 314
 315 /*
 316  * Called when the last reference to the mm
 317  * is dropped: either by a lazy thread or by
 318  * mmput. Free the page directory and the mm.
 319  */
 320 inline void __mmdrop(struct mm_struct *mm)
 321 {
 322         if (mm == &init_mm) BUG();
 323         pgd_free(mm->pgd);
 324         kmem_cache_free(mm_cachep, mm);
 325 }
 326
 327 /*
 328  * Decrement the use count and release all resources for an mm.
 329  */
 330 void mmput(struct mm_struct *mm)
 331 {
 332         if (atomic_dec_and_test(&mm->mm_users)) {
 333                 exit_mmap(mm);
 334                 mmdrop(mm);
 335         }
 336 }
 337
 338 /* Please note the differences between mmput and mm_release.
 339  * mmput is called whenever we stop holding onto a mm_struct,
 340  * error success whatever.
 341  *
 342  * mm_release is called after a mm_struct has been removed
 343  * from the current process.
 344  *
 345  * This difference is important for error handling, when we
 346  * only half set up a mm_struct for a new process and need to restore
 347  * the old one.  Because we mmput the new mm_struct before
 348  * restoring the old one. . .
 349  * Eric Biederman 10 January 1998
 350  */
 351 void mm_release(void)
 352 {
 353         struct task_struct *tsk = current;
 354         forget_segments();
 355         /* notify parent sleeping on vfork() */
 356         if (tsk->flags & PF_VFORK) {
 357                 tsk->flags &= ~PF_VFORK;
 358                 up(tsk->p_opptr->vfork_sem);
 359         }
 360 }
 361
 362 static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 363 {
 364         struct mm_struct * mm;
 365         int retval;
 366
 367         tsk->min_flt = tsk->maj_flt = 0;
 368         tsk->cmin_flt = tsk->cmaj_flt = 0;
 369         tsk->nswap = tsk->cnswap = 0;
 370
 371         tsk->mm = NULL;
 372         tsk->active_mm = NULL;
 373
 374         /*
 375          * Are we cloning a kernel thread?
 376          *
 377          * We need to steal a active VM for that..
 378          */
 379         mm = current->mm;
 380         if (!mm)
 381                 return 0;
 382
 383         if (clone_flags & CLONE_VM) {
 384                 atomic_inc(&mm->mm_users);
 385                 goto good_mm;
 386         }
 387
 388         retval = -ENOMEM;
 389         mm = mm_alloc();
 390         if (!mm)
 391                 goto fail_nomem;
 392
 393         tsk->mm = mm;
 394         tsk->active_mm = mm;
 395
 396         /*
 397          * child gets a private LDT (if there was an LDT in the parent)
 398          */
 399         copy_segments(tsk, mm);
 400
 401         down(&current->mm->mmap_sem);
 402         retval = dup_mmap(mm);
 403         up(&current->mm->mmap_sem);
 404         if (retval)
 405                 goto free_pt;
 406
 407 good_mm:
 408         tsk->mm = mm;
 409         tsk->active_mm = mm;
 410         init_new_context(tsk,mm);
 411         return 0;
 412
 413 free_pt:
 414         mmput(mm);
 415 fail_nomem:
 416         return retval;
 417 }
 418
 419 static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 420 {
 421         if (clone_flags & CLONE_FS) {
 422                 atomic_inc(&current->fs->count);
 423                 return 0;
 424         }
 425         tsk->fs = kmalloc(sizeof(*tsk->fs), GFP_KERNEL);
 426         if (!tsk->fs)
 427                 return -1;
 428         atomic_set(&tsk->fs->count, 1);
 429         tsk->fs->umask = current->fs->umask;
 430         tsk->fs->root = dget(current->fs->root);
 431         tsk->fs->pwd = dget(current->fs->pwd);
 432         return 0;
 433 }
 434
 435 static int count_open_files(struct files_struct *files, int size)
 436 {
 437         int i;
 438
 439         /* Find the last open fd */
 440         for (i = size/(8*sizeof(long)); i > 0; ) {
 441                 if (files->open_fds->fds_bits[--i])
 442                         break;
 443         }
 444         i = (i+1) * 8 * sizeof(long);
 445         return i;
 446 }
 447
 448 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 449 {
 450         struct files_struct *oldf, *newf;
 451         struct file **old_fds, **new_fds;
 452         int open_files, nfds, size, i, error = 0;
 453
 454         /*
 455          * A background process may not have any files ...
 456          */
 457         oldf = current->files;
 458         if (!oldf)
 459                 goto out;
 460
 461         if (clone_flags & CLONE_FILES) {
 462                 atomic_inc(&oldf->count);
 463                 goto out;
 464         }
 465
 466         tsk->files = NULL;
 467         error = -ENOMEM;
 468         newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
 469         if (!newf)
 470                 goto out;
 471
 472         atomic_set(&newf->count, 1);
 473
 474         newf->file_lock     = RW_LOCK_UNLOCKED;
 475         newf->next_fd       = 0;
 476         newf->max_fds       = NR_OPEN_DEFAULT;
 477         newf->max_fdset     = __FD_SETSIZE;
 478         newf->close_on_exec = &newf->close_on_exec_init;
 479         newf->open_fds      = &newf->open_fds_init;
 480         newf->fd            = &newf->fd_array[0];
 481
 482         /* We don't yet have the oldf readlock, but even if the old
 483            fdset gets grown now, we'll only copy up to "size" fds */
 484         size = oldf->max_fdset;
 485         if (size > __FD_SETSIZE) {
 486                 newf->max_fdset = 0;
 487                 write_lock(&newf->file_lock);
 488                 error = expand_fdset(newf, size);
 489                 write_unlock(&newf->file_lock);
 490                 if (error)
 491                         goto out_release;
 492         }
 493         read_lock(&oldf->file_lock);
 494
 495         open_files = count_open_files(oldf, size);
 496
 497         /*
 498          * Check whether we need to allocate a larger fd array.
 499          * Note: we're not a clone task, so the open count won't
 500          * change.
 501          */
 502         nfds = NR_OPEN_DEFAULT;
 503         if (open_files > nfds) {
 504                 read_unlock(&oldf->file_lock);
 505                 newf->max_fds = 0;
 506                 write_lock(&newf->file_lock);
 507                 error = expand_fd_array(newf, open_files);
 508                 write_unlock(&newf->file_lock);
 509                 if (error)
 510                         goto out_release;
 511                 nfds = newf->max_fds;
 512                 read_lock(&oldf->file_lock);
 513         }
 514
 515         old_fds = oldf->fd;
 516         new_fds = newf->fd;
 517
 518         memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
 519         memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
 520
 521         for (i = open_files; i != 0; i--) {
 522                 struct file *f = *old_fds++;
 523                 if (f)
 524                         get_file(f);
 525                 *new_fds++ = f;
 526         }
 527         read_unlock(&oldf->file_lock);
 528
 529         /* compute the remainder to be cleared */
 530         size = (newf->max_fds - open_files) * sizeof(struct file *);
 531
 532         /* This is long word aligned thus could use a optimized version */
 533         memset(new_fds, 0, size);
 534
 535         if (newf->max_fdset > open_files) {
 536                 int left = (newf->max_fdset-open_files)/8;
 537                 int start = open_files / (8 * sizeof(unsigned long));
 538
 539                 memset(&newf->open_fds->fds_bits[start], 0, left);
 540                 memset(&newf->close_on_exec->fds_bits[start], 0, left);
 541         }
 542
 543         tsk->files = newf;
 544         error = 0;
 545 out:
 546         return error;
 547
 548 out_release:
 549         free_fdset (newf->close_on_exec, newf->max_fdset);
 550         free_fdset (newf->open_fds, newf->max_fdset);
 551         kmem_cache_free(files_cachep, newf);
 552         goto out;
 553 }
 554
 555 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 556 {
 557         if (clone_flags & CLONE_SIGHAND) {
 558                 atomic_inc(&current->sig->count);
 559                 return 0;
 560         }
 561         tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
 562         if (!tsk->sig)
 563                 return -1;
 564         spin_lock_init(&tsk->sig->siglock);
 565         atomic_set(&tsk->sig->count, 1);
 566         memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
 567         return 0;
 568 }
 569
 570 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 571 {
 572         unsigned long new_flags = p->flags;
 573
 574         new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU | PF_VFORK);
 575         new_flags |= PF_FORKNOEXEC;
 576         if (!(clone_flags & CLONE_PTRACE))
 577                 new_flags &= ~(PF_PTRACED|PF_TRACESYS);
 578         if (clone_flags & CLONE_VFORK)
 579                 new_flags |= PF_VFORK;
 580         p->flags = new_flags;
 581 }
 582
 583 /*
 584  *  Ok, this is the main fork-routine. It copies the system process
 585  * information (task[nr]) and sets up the necessary registers. It
 586  * also copies the data segment in its entirety.
 587  */
 588 int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 589 {
 590         int retval = -ENOMEM;
 591         struct task_struct *p;
 592         DECLARE_MUTEX_LOCKED(sem);
 593
 594         current->vfork_sem = &sem;
 595
 596         p = alloc_task_struct();
 597         if (!p)
 598                 goto fork_out;
 599
 600         *p = *current;
 601
 602         lock_kernel();
 603
 604         retval = -EAGAIN;
 605         if (p->user) {
 606                 if (atomic_read(&p->user->count) >= p->rlim[RLIMIT_NPROC].rlim_cur)
 607                         goto bad_fork_free;
 608                 atomic_inc(&p->user->count);
 609         }
 610
 611         /*
 612          * Counter atomicity is protected by
 613          * the kernel lock
 614          */
 615         if (nr_threads >= max_threads)
 616                 goto bad_fork_cleanup_count;
 617
 618         if (p->exec_domain && p->exec_domain->module)
 619                 __MOD_INC_USE_COUNT(p->exec_domain->module);
 620         if (p->binfmt && p->binfmt->module)
 621                 __MOD_INC_USE_COUNT(p->binfmt->module);
 622
 623         p->did_exec = 0;
 624         p->swappable = 0;
 625         p->state = TASK_UNINTERRUPTIBLE;
 626
 627         copy_flags(clone_flags, p);
 628         p->pid = get_pid(clone_flags);
 629
 630         /*
 631          * This is a "shadow run" state. The process
 632          * is marked runnable, but isn't actually on
 633          * any run queue yet.. (that happens at the
 634          * very end).
 635          */
 636         p->state = TASK_RUNNING;
 637         p->run_list.next = NULL;
 638         p->run_list.prev = NULL;
 639
 640         p->p_pptr = p->p_opptr = current;
 641         p->p_cptr = NULL;
 642         init_waitqueue_head(&p->wait_chldexit);
 643         p->vfork_sem = NULL;
 644
 645         p->sigpending = 0;
 646         sigemptyset(&p->signal);
 647         p->sigqueue = NULL;
 648         p->sigqueue_tail = &p->sigqueue;
 649
 650         p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
 651         p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
 652         init_timer(&p->real_timer);
 653         p->real_timer.data = (unsigned long) p;
 654
 655         p->leader = 0;          /* session leadership doesn't inherit */
 656         p->tty_old_pgrp = 0;
 657         p->times.tms_utime = p->times.tms_stime = 0;
 658         p->times.tms_cutime = p->times.tms_cstime = 0;
 659 #ifdef __SMP__
 660         {
 661                 int i;
 662                 p->has_cpu = 0;
 663                 p->processor = current->processor;
 664                 /* ?? should we just memset this ?? */
 665                 for(i = 0; i < smp_num_cpus; i++)
 666                         p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
 667                 spin_lock_init(&p->sigmask_lock);
 668         }
 669 #endif
 670         p->lock_depth = -1;             /* -1 = no lock */
 671         p->start_time = jiffies;
 672
 673         retval = -ENOMEM;
 674         /* copy all the process information */
 675         if (copy_files(clone_flags, p))
 676                 goto bad_fork_cleanup;
 677         if (copy_fs(clone_flags, p))
 678                 goto bad_fork_cleanup_files;
 679         if (copy_sighand(clone_flags, p))
 680                 goto bad_fork_cleanup_fs;
 681         if (copy_mm(clone_flags, p))
 682                 goto bad_fork_cleanup_sighand;
 683         retval = copy_thread(0, clone_flags, usp, p, regs);
 684         if (retval)
 685                 goto bad_fork_cleanup_sighand;
 686         p->semundo = NULL;
 687
 688         /* ok, now we should be set up.. */
 689         p->swappable = 1;
 690         p->exit_signal = clone_flags & CSIGNAL;
 691         p->pdeath_signal = 0;
 692
 693         /*
 694          * "share" dynamic priority between parent and child, thus the
 695          * total amount of dynamic priorities in the system doesnt change,
 696          * more scheduling fairness. This is only important in the first
 697          * timeslice, on the long run the scheduling behaviour is unchanged.
 698          */
 699         current->counter >>= 1;
 700         p->counter = current->counter;
 701
 702         /*
 703          * Ok, add it to the run-queues and make it
 704          * visible to the rest of the system.
 705          *
 706          * Let it rip!
 707          */
 708         retval = p->pid;
 709         write_lock_irq(&tasklist_lock);
 710         SET_LINKS(p);
 711         hash_pid(p);
 712         write_unlock_irq(&tasklist_lock);
 713
 714         nr_threads++;
 715         wake_up_process(p);             /* do this last */
 716         ++total_forks;
 717
 718 bad_fork:
 719         unlock_kernel();
 720 fork_out:
 721         if ((clone_flags & CLONE_VFORK) && (retval > 0))
 722                 down(&sem);
 723         return retval;
 724
 725 bad_fork_cleanup_sighand:
 726         exit_sighand(p);
 727 bad_fork_cleanup_fs:
 728         exit_fs(p); /* blocking */
 729 bad_fork_cleanup_files:
 730         exit_files(p); /* blocking */
 731 bad_fork_cleanup:
 732         if (p->exec_domain && p->exec_domain->module)
 733                 __MOD_DEC_USE_COUNT(p->exec_domain->module);
 734         if (p->binfmt && p->binfmt->module)
 735                 __MOD_DEC_USE_COUNT(p->binfmt->module);
 736
 737         nr_threads--;
 738 bad_fork_cleanup_count:
 739         if (p->user)
 740                 free_uid(p);
 741 bad_fork_free:
 742         free_task_struct(p);
 743         goto bad_fork;
 744 }
 745
 746 void __init filescache_init(void)
 747 {
 748         files_cachep = kmem_cache_create("files_cache",
 749                                          sizeof(struct files_struct),
 750                                          0,
 751                                          SLAB_HWCACHE_ALIGN,
 752                                          NULL, NULL);
 753         if (!files_cachep)
 754                 panic("Cannot create files cache");
 755 }