kernel/timer.c

   1 /*
   2  *  linux/kernel/timer.c
   3  *
   4  *  Kernel internal timers, kernel timekeeping, basic process system calls
   5  *
   6  *  Copyright (C) 1991, 1992  Linus Torvalds
   7  *
   8  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
   9  *
  10  *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
  11  *              "A Kernel Model for Precision Timekeeping" by Dave Mills
  12  *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  13  *              serialize accesses to xtime/lost_ticks).
  14  *                              Copyright (C) 1998  Andrea Arcangeli
  15  *  1999-03-10  Improved NTP compatibility by Ulrich Windl
  16  */
  17
  18 #include <linux/config.h>
  19 #include <linux/mm.h>
  20 #include <linux/timex.h>
  21 #include <linux/delay.h>
  22 #include <linux/smp_lock.h>
  23 #include <linux/interrupt.h>
  24 #include <linux/kernel_stat.h>
  25 #include <linux/slab.h>
  26
  27 #include <asm/uaccess.h>
  28
  29 /*
  30  * Timekeeping variables
  31  */
  32
  33 long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */
  34
  35 /* The current time */
  36 volatile struct timeval xtime __attribute__ ((aligned (16)));
  37
  38 /* Don't completely fail for HZ > 500.  */
  39 int tickadj = 500/HZ ? : 1;             /* microsecs */
  40
  41 DECLARE_TASK_QUEUE(tq_timer);
  42 DECLARE_TASK_QUEUE(tq_immediate);
  43 DECLARE_TASK_QUEUE(tq_scheduler);
  44
  45 /*
  46  * phase-lock loop variables
  47  */
  48 /* TIME_ERROR prevents overwriting the CMOS clock */
  49 int time_state = TIME_OK;               /* clock synchronization status */
  50 int time_status = STA_UNSYNC;           /* clock status bits            */
  51 long time_offset;                       /* time adjustment (us)         */
  52 long time_constant = 2;                 /* pll time constant            */
  53 long time_tolerance = MAXFREQ;          /* frequency tolerance (ppm)    */
  54 long time_precision = 1;                /* clock precision (us)         */
  55 long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
  56 long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
  57 long time_phase;                        /* phase offset (scaled us)     */
  58 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
  59                                         /* frequency offset (scaled ppm)*/
  60 long time_adj;                          /* tick adjust (scaled 1 / HZ)  */
  61 long time_reftime;                      /* time at last adjustment (s)  */
  62
  63 long time_adjust;
  64 long time_adjust_step;
  65
  66 unsigned long event;
  67
  68 extern int do_setitimer(int, struct itimerval *, struct itimerval *);
  69
  70 unsigned long volatile jiffies;
  71
  72 unsigned int * prof_buffer;
  73 unsigned long prof_len;
  74 unsigned long prof_shift;
  75
  76 /*
  77  * Event timer code
  78  */
  79 #define TVN_BITS 6
  80 #define TVR_BITS 8
  81 #define TVN_SIZE (1 << TVN_BITS)
  82 #define TVR_SIZE (1 << TVR_BITS)
  83 #define TVN_MASK (TVN_SIZE - 1)
  84 #define TVR_MASK (TVR_SIZE - 1)
  85
  86 struct timer_vec {
  87         int index;
  88         struct list_head vec[TVN_SIZE];
  89 };
  90
  91 struct timer_vec_root {
  92         int index;
  93         struct list_head vec[TVR_SIZE];
  94 };
  95
  96 static struct timer_vec tv5;
  97 static struct timer_vec tv4;
  98 static struct timer_vec tv3;
  99 static struct timer_vec tv2;
 100 static struct timer_vec_root tv1;
 101
 102 static struct timer_vec * const tvecs[] = {
 103         (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 104 };
 105
 106 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 107
 108 void init_timervecs (void)
 109 {
 110         int i;
 111
 112         for (i = 0; i < TVN_SIZE; i++) {
 113                 INIT_LIST_HEAD(tv5.vec + i);
 114                 INIT_LIST_HEAD(tv4.vec + i);
 115                 INIT_LIST_HEAD(tv3.vec + i);
 116                 INIT_LIST_HEAD(tv2.vec + i);
 117         }
 118         for (i = 0; i < TVR_SIZE; i++)
 119                 INIT_LIST_HEAD(tv1.vec + i);
 120 }
 121
 122 static unsigned long timer_jiffies;
 123
 124 static inline void internal_add_timer(struct timer_list *timer)
 125 {
 126         /*
 127          * must be cli-ed when calling this
 128          */
 129         unsigned long expires = timer->expires;
 130         unsigned long idx = expires - timer_jiffies;
 131         struct list_head * vec;
 132
 133         if (idx < TVR_SIZE) {
 134                 int i = expires & TVR_MASK;
 135                 vec = tv1.vec + i;
 136         } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 137                 int i = (expires >> TVR_BITS) & TVN_MASK;
 138                 vec = tv2.vec + i;
 139         } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 140                 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
 141                 vec =  tv3.vec + i;
 142         } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 143                 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
 144                 vec = tv4.vec + i;
 145         } else if ((signed long) idx < 0) {
 146                 /* can happen if you add a timer with expires == jiffies,
 147                  * or you set a timer to go off in the past
 148                  */
 149                 vec = tv1.vec + tv1.index;
 150         } else if (idx <= 0xffffffffUL) {
 151                 int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 152                 vec = tv5.vec + i;
 153         } else {
 154                 /* Can only get here on architectures with 64-bit jiffies */
 155                 INIT_LIST_HEAD(&timer->list);
 156                 return;
 157         }
 158         /*
 159          * Timers are FIFO!
 160          */
 161         list_add(&timer->list, vec->prev);
 162 }
 163
 164 /* Initialize both explicitly - let's try to have them in the same cache line */
 165 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
 166
 167 #ifdef CONFIG_SMP
 168 volatile struct timer_list * volatile running_timer = NULL;
 169 #define timer_enter(t) do { running_timer = t; mb(); } while (0)
 170 #define timer_exit() do { running_timer = NULL; } while (0)
 171 #define timer_is_running(t) (running_timer == t)
 172 #define timer_synchronize(t) while (timer_is_running(t)) barrier()
 173 #else
 174 #define timer_enter(t)          do { } while (0)
 175 #define timer_exit()            do { } while (0)
 176 #endif
 177
 178 void add_timer(struct timer_list *timer)
 179 {
 180         unsigned long flags;
 181
 182         spin_lock_irqsave(&timerlist_lock, flags);
 183         if (timer_pending(timer))
 184                 goto bug;
 185         internal_add_timer(timer);
 186         spin_unlock_irqrestore(&timerlist_lock, flags);
 187         return;
 188 bug:
 189         spin_unlock_irqrestore(&timerlist_lock, flags);
 190         printk("bug: kernel timer added twice at %p.\n",
 191                         __builtin_return_address(0));
 192 }
 193
 194 static inline int detach_timer (struct timer_list *timer)
 195 {
 196         if (!timer_pending(timer))
 197                 return 0;
 198         list_del(&timer->list);
 199         return 1;
 200 }
 201
 202 int mod_timer(struct timer_list *timer, unsigned long expires)
 203 {
 204         int ret;
 205         unsigned long flags;
 206
 207         spin_lock_irqsave(&timerlist_lock, flags);
 208         timer->expires = expires;
 209         ret = detach_timer(timer);
 210         internal_add_timer(timer);
 211         spin_unlock_irqrestore(&timerlist_lock, flags);
 212         return ret;
 213 }
 214
 215 int del_timer(struct timer_list * timer)
 216 {
 217         int ret;
 218         unsigned long flags;
 219
 220         spin_lock_irqsave(&timerlist_lock, flags);
 221         ret = detach_timer(timer);
 222         timer->list.next = timer->list.prev = NULL;
 223         spin_unlock_irqrestore(&timerlist_lock, flags);
 224         return ret;
 225 }
 226
 227 #ifdef CONFIG_SMP
 228 void sync_timers(void)
 229 {
 230         spin_unlock_wait(&global_bh_lock);
 231 }
 232
 233 /*
 234  * SMP specific function to delete periodic timer.
 235  * Caller must disable by some means restarting the timer
 236  * for new. Upon exit the timer is not queued and handler is not running
 237  * on any CPU. It returns number of times, which timer was deleted
 238  * (for reference counting).
 239  */
 240
 241 int del_timer_sync(struct timer_list * timer)
 242 {
 243         int ret = 0;
 244
 245         for (;;) {
 246                 unsigned long flags;
 247                 int running;
 248
 249                 spin_lock_irqsave(&timerlist_lock, flags);
 250                 ret += detach_timer(timer);
 251                 timer->list.next = timer->list.prev = 0;
 252                 running = timer_is_running(timer);
 253                 spin_unlock_irqrestore(&timerlist_lock, flags);
 254
 255                 if (!running)
 256                         break;
 257
 258                 timer_synchronize(timer);
 259         }
 260
 261         return ret;
 262 }
 263 #endif
 264
 265
 266 static inline void cascade_timers(struct timer_vec *tv)
 267 {
 268         /* cascade all the timers from tv up one level */
 269         struct list_head *head, *curr, *next;
 270
 271         head = tv->vec + tv->index;
 272         curr = head->next;
 273         /*
 274          * We are removing _all_ timers from the list, so we don't  have to
 275          * detach them individually, just clear the list afterwards.
 276          */
 277         while (curr != head) {
 278                 struct timer_list *tmp;
 279
 280                 tmp = list_entry(curr, struct timer_list, list);
 281                 next = curr->next;
 282                 list_del(curr); // not needed
 283                 internal_add_timer(tmp);
 284                 curr = next;
 285         }
 286         INIT_LIST_HEAD(head);
 287         tv->index = (tv->index + 1) & TVN_MASK;
 288 }
 289
 290 static inline void run_timer_list(void)
 291 {
 292         spin_lock_irq(&timerlist_lock);
 293         while ((long)(jiffies - timer_jiffies) >= 0) {
 294                 struct list_head *head, *curr;
 295                 if (!tv1.index) {
 296                         int n = 1;
 297                         do {
 298                                 cascade_timers(tvecs[n]);
 299                         } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 300                 }
 301 repeat:
 302                 head = tv1.vec + tv1.index;
 303                 curr = head->next;
 304                 if (curr != head) {
 305                         struct timer_list *timer;
 306                         void (*fn)(unsigned long);
 307                         unsigned long data;
 308
 309                         timer = list_entry(curr, struct timer_list, list);
 310                         fn = timer->function;
 311                         data= timer->data;
 312
 313                         detach_timer(timer);
 314                         timer->list.next = timer->list.prev = NULL;
 315                         timer_enter(timer);
 316                         spin_unlock_irq(&timerlist_lock);
 317                         fn(data);
 318                         spin_lock_irq(&timerlist_lock);
 319                         timer_exit();
 320                         goto repeat;
 321                 }
 322                 ++timer_jiffies;
 323                 tv1.index = (tv1.index + 1) & TVR_MASK;
 324         }
 325         spin_unlock_irq(&timerlist_lock);
 326 }
 327
 328 spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
 329
 330 void tqueue_bh(void)
 331 {
 332         run_task_queue(&tq_timer);
 333 }
 334
 335 void immediate_bh(void)
 336 {
 337         run_task_queue(&tq_immediate);
 338 }
 339
 340 /*
 341  * this routine handles the overflow of the microsecond field
 342  *
 343  * The tricky bits of code to handle the accurate clock support
 344  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 345  * They were originally developed for SUN and DEC kernels.
 346  * All the kudos should go to Dave for this stuff.
 347  *
 348  */
 349 static void second_overflow(void)
 350 {
 351     long ltemp;
 352
 353     /* Bump the maxerror field */
 354     time_maxerror += time_tolerance >> SHIFT_USEC;
 355     if ( time_maxerror > NTP_PHASE_LIMIT ) {
 356         time_maxerror = NTP_PHASE_LIMIT;
 357         time_status |= STA_UNSYNC;
 358     }
 359
 360     /*
 361      * Leap second processing. If in leap-insert state at
 362      * the end of the day, the system clock is set back one
 363      * second; if in leap-delete state, the system clock is
 364      * set ahead one second. The microtime() routine or
 365      * external clock driver will insure that reported time
 366      * is always monotonic. The ugly divides should be
 367      * replaced.
 368      */
 369     switch (time_state) {
 370
 371     case TIME_OK:
 372         if (time_status & STA_INS)
 373             time_state = TIME_INS;
 374         else if (time_status & STA_DEL)
 375             time_state = TIME_DEL;
 376         break;
 377
 378     case TIME_INS:
 379         if (xtime.tv_sec % 86400 == 0) {
 380             xtime.tv_sec--;
 381             time_state = TIME_OOP;
 382             printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
 383         }
 384         break;
 385
 386     case TIME_DEL:
 387         if ((xtime.tv_sec + 1) % 86400 == 0) {
 388             xtime.tv_sec++;
 389             time_state = TIME_WAIT;
 390             printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
 391         }
 392         break;
 393
 394     case TIME_OOP:
 395         time_state = TIME_WAIT;
 396         break;
 397
 398     case TIME_WAIT:
 399         if (!(time_status & (STA_INS | STA_DEL)))
 400             time_state = TIME_OK;
 401     }
 402
 403     /*
 404      * Compute the phase adjustment for the next second. In
 405      * PLL mode, the offset is reduced by a fixed factor
 406      * times the time constant. In FLL mode the offset is
 407      * used directly. In either mode, the maximum phase
 408      * adjustment for each second is clamped so as to spread
 409      * the adjustment over not more than the number of
 410      * seconds between updates.
 411      */
 412     if (time_offset < 0) {
 413         ltemp = -time_offset;
 414         if (!(time_status & STA_FLL))
 415             ltemp >>= SHIFT_KG + time_constant;
 416         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 417             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 418         time_offset += ltemp;
 419         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 420     } else {
 421         ltemp = time_offset;
 422         if (!(time_status & STA_FLL))
 423             ltemp >>= SHIFT_KG + time_constant;
 424         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 425             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 426         time_offset -= ltemp;
 427         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 428     }
 429
 430     /*
 431      * Compute the frequency estimate and additional phase
 432      * adjustment due to frequency error for the next
 433      * second. When the PPS signal is engaged, gnaw on the
 434      * watchdog counter and update the frequency computed by
 435      * the pll and the PPS signal.
 436      */
 437     pps_valid++;
 438     if (pps_valid == PPS_VALID) {       /* PPS signal lost */
 439         pps_jitter = MAXTIME;
 440         pps_stabil = MAXFREQ;
 441         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 442                          STA_PPSWANDER | STA_PPSERROR);
 443     }
 444     ltemp = time_freq + pps_freq;
 445     if (ltemp < 0)
 446         time_adj -= -ltemp >>
 447             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 448     else
 449         time_adj += ltemp >>
 450             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 451
 452 #if HZ == 100
 453     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
 454      * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
 455      */
 456     if (time_adj < 0)
 457         time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
 458     else
 459         time_adj += (time_adj >> 2) + (time_adj >> 5);
 460 #endif
 461 }
 462
 463 /* in the NTP reference this is called "hardclock()" */
 464 static void update_wall_time_one_tick(void)
 465 {
 466         if ( (time_adjust_step = time_adjust) != 0 ) {
 467             /* We are doing an adjtime thing.
 468              *
 469              * Prepare time_adjust_step to be within bounds.
 470              * Note that a positive time_adjust means we want the clock
 471              * to run faster.
 472              *
 473              * Limit the amount of the step to be in the range
 474              * -tickadj .. +tickadj
 475              */
 476              if (time_adjust > tickadj)
 477                 time_adjust_step = tickadj;
 478              else if (time_adjust < -tickadj)
 479                 time_adjust_step = -tickadj;
 480
 481             /* Reduce by this step the amount of time left  */
 482             time_adjust -= time_adjust_step;
 483         }
 484         xtime.tv_usec += tick + time_adjust_step;
 485         /*
 486          * Advance the phase, once it gets to one microsecond, then
 487          * advance the tick more.
 488          */
 489         time_phase += time_adj;
 490         if (time_phase <= -FINEUSEC) {
 491                 long ltemp = -time_phase >> SHIFT_SCALE;
 492                 time_phase += ltemp << SHIFT_SCALE;
 493                 xtime.tv_usec -= ltemp;
 494         }
 495         else if (time_phase >= FINEUSEC) {
 496                 long ltemp = time_phase >> SHIFT_SCALE;
 497                 time_phase -= ltemp << SHIFT_SCALE;
 498                 xtime.tv_usec += ltemp;
 499         }
 500 }
 501
 502 /*
 503  * Using a loop looks inefficient, but "ticks" is
 504  * usually just one (we shouldn't be losing ticks,
 505  * we're doing this this way mainly for interrupt
 506  * latency reasons, not because we think we'll
 507  * have lots of lost timer ticks
 508  */
 509 static void update_wall_time(unsigned long ticks)
 510 {
 511         do {
 512                 ticks--;
 513                 update_wall_time_one_tick();
 514         } while (ticks);
 515
 516         if (xtime.tv_usec >= 1000000) {
 517             xtime.tv_usec -= 1000000;
 518             xtime.tv_sec++;
 519             second_overflow();
 520         }
 521 }
 522
 523 static inline void do_process_times(struct task_struct *p,
 524         unsigned long user, unsigned long system)
 525 {
 526         unsigned long psecs;
 527
 528         psecs = (p->times.tms_utime += user);
 529         psecs += (p->times.tms_stime += system);
 530         if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
 531                 /* Send SIGXCPU every second.. */
 532                 if (!(psecs % HZ))
 533                         send_sig(SIGXCPU, p, 1);
 534                 /* and SIGKILL when we go over max.. */
 535                 if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
 536                         send_sig(SIGKILL, p, 1);
 537         }
 538 }
 539
 540 static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
 541 {
 542         unsigned long it_virt = p->it_virt_value;
 543
 544         if (it_virt) {
 545                 it_virt -= ticks;
 546                 if (!it_virt) {
 547                         it_virt = p->it_virt_incr;
 548                         send_sig(SIGVTALRM, p, 1);
 549                 }
 550                 p->it_virt_value = it_virt;
 551         }
 552 }
 553
 554 static inline void do_it_prof(struct task_struct *p)
 555 {
 556         unsigned long it_prof = p->it_prof_value;
 557
 558         if (it_prof) {
 559                 if (--it_prof == 0) {
 560                         it_prof = p->it_prof_incr;
 561                         send_sig(SIGPROF, p, 1);
 562                 }
 563                 p->it_prof_value = it_prof;
 564         }
 565 }
 566
 567 void update_one_process(struct task_struct *p, unsigned long user,
 568                         unsigned long system, int cpu)
 569 {
 570         p->per_cpu_utime[cpu] += user;
 571         p->per_cpu_stime[cpu] += system;
 572         do_process_times(p, user, system);
 573         do_it_virt(p, user);
 574         do_it_prof(p);
 575 }
 576
 577 /*
 578  * Called from the timer interrupt handler to charge one tick to the current
 579  * process.  user_tick is 1 if the tick is user time, 0 for system.
 580  */
 581 void update_process_times(int user_tick)
 582 {
 583         struct task_struct *p = current;
 584         int cpu = smp_processor_id(), system = user_tick ^ 1;
 585
 586         update_one_process(p, user_tick, system, cpu);
 587         if (p->pid) {
 588                 if (--p->counter <= 0) {
 589                         p->counter = 0;
 590                         p->need_resched = 1;
 591                 }
 592                 if (p->nice > 0)
 593                         kstat.per_cpu_nice[cpu] += user_tick;
 594                 else
 595                         kstat.per_cpu_user[cpu] += user_tick;
 596                 kstat.per_cpu_system[cpu] += system;
 597         } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
 598                 kstat.per_cpu_system[cpu] += system;
 599
 600         if (slab_cache_drain_mask & (1UL << cpu))
 601                 slab_drain_local_cache();
 602 }
 603
 604 /*
 605  * Nr of active tasks - counted in fixed-point numbers
 606  */
 607 static unsigned long count_active_tasks(void)
 608 {
 609         struct task_struct *p;
 610         unsigned long nr = 0;
 611
 612         read_lock(&tasklist_lock);
 613         for_each_task(p) {
 614                 if ((p->state == TASK_RUNNING ||
 615                      (p->state & TASK_UNINTERRUPTIBLE)))
 616                         nr += FIXED_1;
 617         }
 618         read_unlock(&tasklist_lock);
 619         return nr;
 620 }
 621
 622 /*
 623  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 624  * imply that avenrun[] is the standard name for this kind of thing.
 625  * Nothing else seems to be standardized: the fractional size etc
 626  * all seem to differ on different machines.
 627  */
 628 unsigned long avenrun[3];
 629
 630 static inline void calc_load(unsigned long ticks)
 631 {
 632         unsigned long active_tasks; /* fixed-point */
 633         static int count = LOAD_FREQ;
 634
 635         count -= ticks;
 636         if (count < 0) {
 637                 count += LOAD_FREQ;
 638                 active_tasks = count_active_tasks();
 639                 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 640                 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 641                 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 642         }
 643 }
 644
 645 /* jiffies at the most recent update of wall time */
 646 unsigned long wall_jiffies;
 647
 648 /*
 649  * This spinlock protect us from races in SMP while playing with xtime. -arca
 650  */
 651 rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
 652
 653 static inline void update_times(void)
 654 {
 655         unsigned long ticks;
 656
 657         /*
 658          * update_times() is run from the raw timer_bh handler so we
 659          * just know that the irqs are locally enabled and so we don't
 660          * need to save/restore the flags of the local CPU here. -arca
 661          */
 662         write_lock_irq(&xtime_lock);
 663
 664         ticks = jiffies - wall_jiffies;
 665         if (ticks) {
 666                 wall_jiffies += ticks;
 667                 update_wall_time(ticks);
 668         }
 669         write_unlock_irq(&xtime_lock);
 670         calc_load(ticks);
 671 }
 672
 673 void timer_bh(void)
 674 {
 675         update_times();
 676         run_timer_list();
 677 }
 678
 679 void do_timer(struct pt_regs *regs)
 680 {
 681         (*(unsigned long *)&jiffies)++;
 682 #ifndef CONFIG_SMP
 683         /* SMP process accounting uses the local APIC timer */
 684
 685         update_process_times(user_mode(regs));
 686 #endif
 687         mark_bh(TIMER_BH);
 688         if (tq_timer)
 689                 mark_bh(TQUEUE_BH);
 690 }
 691
 692 #if !defined(__alpha__) && !defined(__ia64__)
 693
 694 /*
 695  * For backwards compatibility?  This can be done in libc so Alpha
 696  * and all newer ports shouldn't need it.
 697  */
 698 asmlinkage unsigned long sys_alarm(unsigned int seconds)
 699 {
 700         struct itimerval it_new, it_old;
 701         unsigned int oldalarm;
 702
 703         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 704         it_new.it_value.tv_sec = seconds;
 705         it_new.it_value.tv_usec = 0;
 706         do_setitimer(ITIMER_REAL, &it_new, &it_old);
 707         oldalarm = it_old.it_value.tv_sec;
 708         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 709         /* And we'd better return too much than too little anyway */
 710         if (it_old.it_value.tv_usec)
 711                 oldalarm++;
 712         return oldalarm;
 713 }
 714
 715 #endif
 716
 717 #ifndef __alpha__
 718
 719 /*
 720  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 721  * should be moved into arch/i386 instead?
 722  */
 723
 724 asmlinkage long sys_getpid(void)
 725 {
 726         /* This is SMP safe - current->pid doesn't change */
 727         return current->pid;
 728 }
 729
 730 /*
 731  * This is not strictly SMP safe: p_opptr could change
 732  * from under us. However, rather than getting any lock
 733  * we can use an optimistic algorithm: get the parent
 734  * pid, and go back and check that the parent is still
 735  * the same. If it has changed (which is extremely unlikely
 736  * indeed), we just try again..
 737  *
 738  * NOTE! This depends on the fact that even if we _do_
 739  * get an old value of "parent", we can happily dereference
 740  * the pointer: we just can't necessarily trust the result
 741  * until we know that the parent pointer is valid.
 742  *
 743  * The "mb()" macro is a memory barrier - a synchronizing
 744  * event. It also makes sure that gcc doesn't optimize
 745  * away the necessary memory references.. The barrier doesn't
 746  * have to have all that strong semantics: on x86 we don't
 747  * really require a synchronizing instruction, for example.
 748  * The barrier is more important for code generation than
 749  * for any real memory ordering semantics (even if there is
 750  * a small window for a race, using the old pointer is
 751  * harmless for a while).
 752  */
 753 asmlinkage long sys_getppid(void)
 754 {
 755         int pid;
 756         struct task_struct * me = current;
 757         struct task_struct * parent;
 758
 759         parent = me->p_opptr;
 760         for (;;) {
 761                 pid = parent->pid;
 762 #if CONFIG_SMP
 763 {
 764                 struct task_struct *old = parent;
 765                 mb();
 766                 parent = me->p_opptr;
 767                 if (old != parent)
 768                         continue;
 769 }
 770 #endif
 771                 break;
 772         }
 773         return pid;
 774 }
 775
 776 asmlinkage long sys_getuid(void)
 777 {
 778         /* Only we change this so SMP safe */
 779         return current->uid;
 780 }
 781
 782 asmlinkage long sys_geteuid(void)
 783 {
 784         /* Only we change this so SMP safe */
 785         return current->euid;
 786 }
 787
 788 asmlinkage long sys_getgid(void)
 789 {
 790         /* Only we change this so SMP safe */
 791         return current->gid;
 792 }
 793
 794 asmlinkage long sys_getegid(void)
 795 {
 796         /* Only we change this so SMP safe */
 797         return  current->egid;
 798 }
 799
 800 #endif
 801
 802 asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 803 {
 804         struct timespec t;
 805         unsigned long expire;
 806
 807         if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
 808                 return -EFAULT;
 809
 810         if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
 811                 return -EINVAL;
 812
 813
 814         if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
 815             current->policy != SCHED_OTHER)
 816         {
 817                 /*
 818                  * Short delay requests up to 2 ms will be handled with
 819                  * high precision by a busy wait for all real-time processes.
 820                  *
 821                  * Its important on SMP not to do this holding locks.
 822                  */
 823                 udelay((t.tv_nsec + 999) / 1000);
 824                 return 0;
 825         }
 826
 827         expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 828
 829         current->state = TASK_INTERRUPTIBLE;
 830         expire = schedule_timeout(expire);
 831
 832         if (expire) {
 833                 if (rmtp) {
 834                         jiffies_to_timespec(expire, &t);
 835                         if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
 836                                 return -EFAULT;
 837                 }
 838                 return -EINTR;
 839         }
 840         return 0;
 841 }
 842