kernel/timer.c

   1 /*
   2  *  linux/kernel/timer.c
   3  *
   4  *  Kernel internal timers, kernel timekeeping, basic process system calls
   5  *
   6  *  Copyright (C) 1991, 1992  Linus Torvalds
   7  *
   8  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
   9  *
  10  *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
  11  *              "A Kernel Model for Precision Timekeeping" by Dave Mills
  12  *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  13  *              serialize accesses to xtime/lost_ticks).
  14  *                              Copyright (C) 1998  Andrea Arcangeli
  15  *  1999-03-10  Improved NTP compatibility by Ulrich Windl
  16  */
  17
  18 #include <linux/config.h>
  19 #include <linux/mm.h>
  20 #include <linux/timex.h>
  21 #include <linux/delay.h>
  22 #include <linux/smp_lock.h>
  23 #include <linux/interrupt.h>
  24 #include <linux/kernel_stat.h>
  25
  26 #include <asm/uaccess.h>
  27
  28 /*
  29  * Timekeeping variables
  30  */
  31
  32 long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */
  33
  34 /* The current time */
  35 volatile struct timeval xtime __attribute__ ((aligned (16)));
  36
  37 /* Don't completely fail for HZ > 500.  */
  38 int tickadj = 500/HZ ? : 1;             /* microsecs */
  39
  40 DECLARE_TASK_QUEUE(tq_timer);
  41 DECLARE_TASK_QUEUE(tq_immediate);
  42 DECLARE_TASK_QUEUE(tq_scheduler);
  43
  44 /*
  45  * phase-lock loop variables
  46  */
  47 /* TIME_ERROR prevents overwriting the CMOS clock */
  48 int time_state = TIME_OK;               /* clock synchronization status */
  49 int time_status = STA_UNSYNC;           /* clock status bits            */
  50 long time_offset;                       /* time adjustment (us)         */
  51 long time_constant = 2;                 /* pll time constant            */
  52 long time_tolerance = MAXFREQ;          /* frequency tolerance (ppm)    */
  53 long time_precision = 1;                /* clock precision (us)         */
  54 long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
  55 long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
  56 long time_phase;                        /* phase offset (scaled us)     */
  57 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
  58                                         /* frequency offset (scaled ppm)*/
  59 long time_adj;                          /* tick adjust (scaled 1 / HZ)  */
  60 long time_reftime;                      /* time at last adjustment (s)  */
  61
  62 long time_adjust;
  63 long time_adjust_step;
  64
  65 unsigned long event;
  66
  67 extern int do_setitimer(int, struct itimerval *, struct itimerval *);
  68
  69 unsigned long volatile jiffies;
  70
  71 unsigned int * prof_buffer;
  72 unsigned long prof_len;
  73 unsigned long prof_shift;
  74
  75 /*
  76  * Event timer code
  77  */
  78 #define TVN_BITS 6
  79 #define TVR_BITS 8
  80 #define TVN_SIZE (1 << TVN_BITS)
  81 #define TVR_SIZE (1 << TVR_BITS)
  82 #define TVN_MASK (TVN_SIZE - 1)
  83 #define TVR_MASK (TVR_SIZE - 1)
  84
  85 struct timer_vec {
  86         int index;
  87         struct list_head vec[TVN_SIZE];
  88 };
  89
  90 struct timer_vec_root {
  91         int index;
  92         struct list_head vec[TVR_SIZE];
  93 };
  94
  95 static struct timer_vec tv5;
  96 static struct timer_vec tv4;
  97 static struct timer_vec tv3;
  98 static struct timer_vec tv2;
  99 static struct timer_vec_root tv1;
 100
 101 static struct timer_vec * const tvecs[] = {
 102         (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 103 };
 104
 105 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 106
 107 void init_timervecs (void)
 108 {
 109         int i;
 110
 111         for (i = 0; i < TVN_SIZE; i++) {
 112                 INIT_LIST_HEAD(tv5.vec + i);
 113                 INIT_LIST_HEAD(tv4.vec + i);
 114                 INIT_LIST_HEAD(tv3.vec + i);
 115                 INIT_LIST_HEAD(tv2.vec + i);
 116         }
 117         for (i = 0; i < TVR_SIZE; i++)
 118                 INIT_LIST_HEAD(tv1.vec + i);
 119 }
 120
 121 static unsigned long timer_jiffies;
 122
 123 static inline void internal_add_timer(struct timer_list *timer)
 124 {
 125         /*
 126          * must be cli-ed when calling this
 127          */
 128         unsigned long expires = timer->expires;
 129         unsigned long idx = expires - timer_jiffies;
 130         struct list_head * vec;
 131
 132         if (idx < TVR_SIZE) {
 133                 int i = expires & TVR_MASK;
 134                 vec = tv1.vec + i;
 135         } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 136                 int i = (expires >> TVR_BITS) & TVN_MASK;
 137                 vec = tv2.vec + i;
 138         } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 139                 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
 140                 vec =  tv3.vec + i;
 141         } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 142                 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
 143                 vec = tv4.vec + i;
 144         } else if ((signed long) idx < 0) {
 145                 /* can happen if you add a timer with expires == jiffies,
 146                  * or you set a timer to go off in the past
 147                  */
 148                 vec = tv1.vec + tv1.index;
 149         } else if (idx <= 0xffffffffUL) {
 150                 int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 151                 vec = tv5.vec + i;
 152         } else {
 153                 /* Can only get here on architectures with 64-bit jiffies */
 154                 INIT_LIST_HEAD(&timer->list);
 155                 return;
 156         }
 157         /*
 158          * Timers are FIFO!
 159          */
 160         list_add(&timer->list, vec->prev);
 161 }
 162
 163 /* Initialize both explicitly - let's try to have them in the same cache line */
 164 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
 165
 166 #ifdef CONFIG_SMP
 167 volatile struct timer_list * volatile running_timer;
 168 #define timer_enter(t) do { running_timer = t; mb(); } while (0)
 169 #define timer_exit() do { running_timer = NULL; } while (0)
 170 #define timer_is_running(t) (running_timer == t)
 171 #define timer_synchronize(t) while (timer_is_running(t)) barrier()
 172 #else
 173 #define timer_enter(t)          do { } while (0)
 174 #define timer_exit()            do { } while (0)
 175 #endif
 176
 177 void add_timer(struct timer_list *timer)
 178 {
 179         unsigned long flags;
 180
 181         spin_lock_irqsave(&timerlist_lock, flags);
 182         if (timer_pending(timer))
 183                 goto bug;
 184         internal_add_timer(timer);
 185         spin_unlock_irqrestore(&timerlist_lock, flags);
 186         return;
 187 bug:
 188         spin_unlock_irqrestore(&timerlist_lock, flags);
 189         printk("bug: kernel timer added twice at %p.\n",
 190                         __builtin_return_address(0));
 191 }
 192
 193 static inline int detach_timer (struct timer_list *timer)
 194 {
 195         if (!timer_pending(timer))
 196                 return 0;
 197         list_del(&timer->list);
 198         return 1;
 199 }
 200
 201 int mod_timer(struct timer_list *timer, unsigned long expires)
 202 {
 203         int ret;
 204         unsigned long flags;
 205
 206         spin_lock_irqsave(&timerlist_lock, flags);
 207         timer->expires = expires;
 208         ret = detach_timer(timer);
 209         internal_add_timer(timer);
 210         spin_unlock_irqrestore(&timerlist_lock, flags);
 211         return ret;
 212 }
 213
 214 int del_timer(struct timer_list * timer)
 215 {
 216         int ret;
 217         unsigned long flags;
 218
 219         spin_lock_irqsave(&timerlist_lock, flags);
 220         ret = detach_timer(timer);
 221         timer->list.next = timer->list.prev = NULL;
 222         spin_unlock_irqrestore(&timerlist_lock, flags);
 223         return ret;
 224 }
 225
 226 #ifdef CONFIG_SMP
 227 void sync_timers(void)
 228 {
 229         spin_unlock_wait(&global_bh_lock);
 230 }
 231
 232 /*
 233  * SMP specific function to delete periodic timer.
 234  * Caller must disable by some means restarting the timer
 235  * for new. Upon exit the timer is not queued and handler is not running
 236  * on any CPU. It returns number of times, which timer was deleted
 237  * (for reference counting).
 238  */
 239
 240 int del_timer_sync(struct timer_list * timer)
 241 {
 242         int ret = 0;
 243
 244         for (;;) {
 245                 unsigned long flags;
 246                 int running;
 247
 248                 spin_lock_irqsave(&timerlist_lock, flags);
 249                 ret += detach_timer(timer);
 250                 timer->list.next = timer->list.prev = 0;
 251                 running = timer_is_running(timer);
 252                 spin_unlock_irqrestore(&timerlist_lock, flags);
 253
 254                 if (!running)
 255                         break;
 256
 257                 timer_synchronize(timer);
 258         }
 259
 260         return ret;
 261 }
 262 #endif
 263
 264
 265 static inline void cascade_timers(struct timer_vec *tv)
 266 {
 267         /* cascade all the timers from tv up one level */
 268         struct list_head *head, *curr, *next;
 269
 270         head = tv->vec + tv->index;
 271         curr = head->next;
 272         /*
 273          * We are removing _all_ timers from the list, so we don't  have to
 274          * detach them individually, just clear the list afterwards.
 275          */
 276         while (curr != head) {
 277                 struct timer_list *tmp;
 278
 279                 tmp = list_entry(curr, struct timer_list, list);
 280                 next = curr->next;
 281                 list_del(curr); // not needed
 282                 internal_add_timer(tmp);
 283                 curr = next;
 284         }
 285         INIT_LIST_HEAD(head);
 286         tv->index = (tv->index + 1) & TVN_MASK;
 287 }
 288
 289 static inline void run_timer_list(void)
 290 {
 291         spin_lock_irq(&timerlist_lock);
 292         while ((long)(jiffies - timer_jiffies) >= 0) {
 293                 struct list_head *head, *curr;
 294                 if (!tv1.index) {
 295                         int n = 1;
 296                         do {
 297                                 cascade_timers(tvecs[n]);
 298                         } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 299                 }
 300 repeat:
 301                 head = tv1.vec + tv1.index;
 302                 curr = head->next;
 303                 if (curr != head) {
 304                         struct timer_list *timer;
 305                         void (*fn)(unsigned long);
 306                         unsigned long data;
 307
 308                         timer = list_entry(curr, struct timer_list, list);
 309                         fn = timer->function;
 310                         data= timer->data;
 311
 312                         detach_timer(timer);
 313                         timer->list.next = timer->list.prev = NULL;
 314                         timer_enter(timer);
 315                         spin_unlock_irq(&timerlist_lock);
 316                         fn(data);
 317                         spin_lock_irq(&timerlist_lock);
 318                         timer_exit();
 319                         goto repeat;
 320                 }
 321                 ++timer_jiffies;
 322                 tv1.index = (tv1.index + 1) & TVR_MASK;
 323         }
 324         spin_unlock_irq(&timerlist_lock);
 325 }
 326
 327 spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
 328
 329 void tqueue_bh(void)
 330 {
 331         run_task_queue(&tq_timer);
 332 }
 333
 334 void immediate_bh(void)
 335 {
 336         run_task_queue(&tq_immediate);
 337 }
 338
 339 /*
 340  * this routine handles the overflow of the microsecond field
 341  *
 342  * The tricky bits of code to handle the accurate clock support
 343  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 344  * They were originally developed for SUN and DEC kernels.
 345  * All the kudos should go to Dave for this stuff.
 346  *
 347  */
 348 static void second_overflow(void)
 349 {
 350     long ltemp;
 351
 352     /* Bump the maxerror field */
 353     time_maxerror += time_tolerance >> SHIFT_USEC;
 354     if ( time_maxerror > NTP_PHASE_LIMIT ) {
 355         time_maxerror = NTP_PHASE_LIMIT;
 356         time_status |= STA_UNSYNC;
 357     }
 358
 359     /*
 360      * Leap second processing. If in leap-insert state at
 361      * the end of the day, the system clock is set back one
 362      * second; if in leap-delete state, the system clock is
 363      * set ahead one second. The microtime() routine or
 364      * external clock driver will insure that reported time
 365      * is always monotonic. The ugly divides should be
 366      * replaced.
 367      */
 368     switch (time_state) {
 369
 370     case TIME_OK:
 371         if (time_status & STA_INS)
 372             time_state = TIME_INS;
 373         else if (time_status & STA_DEL)
 374             time_state = TIME_DEL;
 375         break;
 376
 377     case TIME_INS:
 378         if (xtime.tv_sec % 86400 == 0) {
 379             xtime.tv_sec--;
 380             time_state = TIME_OOP;
 381             printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
 382         }
 383         break;
 384
 385     case TIME_DEL:
 386         if ((xtime.tv_sec + 1) % 86400 == 0) {
 387             xtime.tv_sec++;
 388             time_state = TIME_WAIT;
 389             printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
 390         }
 391         break;
 392
 393     case TIME_OOP:
 394         time_state = TIME_WAIT;
 395         break;
 396
 397     case TIME_WAIT:
 398         if (!(time_status & (STA_INS | STA_DEL)))
 399             time_state = TIME_OK;
 400     }
 401
 402     /*
 403      * Compute the phase adjustment for the next second. In
 404      * PLL mode, the offset is reduced by a fixed factor
 405      * times the time constant. In FLL mode the offset is
 406      * used directly. In either mode, the maximum phase
 407      * adjustment for each second is clamped so as to spread
 408      * the adjustment over not more than the number of
 409      * seconds between updates.
 410      */
 411     if (time_offset < 0) {
 412         ltemp = -time_offset;
 413         if (!(time_status & STA_FLL))
 414             ltemp >>= SHIFT_KG + time_constant;
 415         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 416             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 417         time_offset += ltemp;
 418         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 419     } else {
 420         ltemp = time_offset;
 421         if (!(time_status & STA_FLL))
 422             ltemp >>= SHIFT_KG + time_constant;
 423         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 424             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 425         time_offset -= ltemp;
 426         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 427     }
 428
 429     /*
 430      * Compute the frequency estimate and additional phase
 431      * adjustment due to frequency error for the next
 432      * second. When the PPS signal is engaged, gnaw on the
 433      * watchdog counter and update the frequency computed by
 434      * the pll and the PPS signal.
 435      */
 436     pps_valid++;
 437     if (pps_valid == PPS_VALID) {       /* PPS signal lost */
 438         pps_jitter = MAXTIME;
 439         pps_stabil = MAXFREQ;
 440         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 441                          STA_PPSWANDER | STA_PPSERROR);
 442     }
 443     ltemp = time_freq + pps_freq;
 444     if (ltemp < 0)
 445         time_adj -= -ltemp >>
 446             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 447     else
 448         time_adj += ltemp >>
 449             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 450
 451 #if HZ == 100
 452     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
 453      * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
 454      */
 455     if (time_adj < 0)
 456         time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
 457     else
 458         time_adj += (time_adj >> 2) + (time_adj >> 5);
 459 #endif
 460 }
 461
 462 /* in the NTP reference this is called "hardclock()" */
 463 static void update_wall_time_one_tick(void)
 464 {
 465         if ( (time_adjust_step = time_adjust) != 0 ) {
 466             /* We are doing an adjtime thing.
 467              *
 468              * Prepare time_adjust_step to be within bounds.
 469              * Note that a positive time_adjust means we want the clock
 470              * to run faster.
 471              *
 472              * Limit the amount of the step to be in the range
 473              * -tickadj .. +tickadj
 474              */
 475              if (time_adjust > tickadj)
 476                 time_adjust_step = tickadj;
 477              else if (time_adjust < -tickadj)
 478                 time_adjust_step = -tickadj;
 479
 480             /* Reduce by this step the amount of time left  */
 481             time_adjust -= time_adjust_step;
 482         }
 483         xtime.tv_usec += tick + time_adjust_step;
 484         /*
 485          * Advance the phase, once it gets to one microsecond, then
 486          * advance the tick more.
 487          */
 488         time_phase += time_adj;
 489         if (time_phase <= -FINEUSEC) {
 490                 long ltemp = -time_phase >> SHIFT_SCALE;
 491                 time_phase += ltemp << SHIFT_SCALE;
 492                 xtime.tv_usec -= ltemp;
 493         }
 494         else if (time_phase >= FINEUSEC) {
 495                 long ltemp = time_phase >> SHIFT_SCALE;
 496                 time_phase -= ltemp << SHIFT_SCALE;
 497                 xtime.tv_usec += ltemp;
 498         }
 499 }
 500
 501 /*
 502  * Using a loop looks inefficient, but "ticks" is
 503  * usually just one (we shouldn't be losing ticks,
 504  * we're doing this this way mainly for interrupt
 505  * latency reasons, not because we think we'll
 506  * have lots of lost timer ticks
 507  */
 508 static void update_wall_time(unsigned long ticks)
 509 {
 510         do {
 511                 ticks--;
 512                 update_wall_time_one_tick();
 513         } while (ticks);
 514
 515         if (xtime.tv_usec >= 1000000) {
 516             xtime.tv_usec -= 1000000;
 517             xtime.tv_sec++;
 518             second_overflow();
 519         }
 520 }
 521
 522 static inline void do_process_times(struct task_struct *p,
 523         unsigned long user, unsigned long system)
 524 {
 525         unsigned long psecs;
 526
 527         psecs = (p->times.tms_utime += user);
 528         psecs += (p->times.tms_stime += system);
 529         if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
 530                 /* Send SIGXCPU every second.. */
 531                 if (!(psecs % HZ))
 532                         send_sig(SIGXCPU, p, 1);
 533                 /* and SIGKILL when we go over max.. */
 534                 if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
 535                         send_sig(SIGKILL, p, 1);
 536         }
 537 }
 538
 539 static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
 540 {
 541         unsigned long it_virt = p->it_virt_value;
 542
 543         if (it_virt) {
 544                 it_virt -= ticks;
 545                 if (!it_virt) {
 546                         it_virt = p->it_virt_incr;
 547                         send_sig(SIGVTALRM, p, 1);
 548                 }
 549                 p->it_virt_value = it_virt;
 550         }
 551 }
 552
 553 static inline void do_it_prof(struct task_struct *p)
 554 {
 555         unsigned long it_prof = p->it_prof_value;
 556
 557         if (it_prof) {
 558                 if (--it_prof == 0) {
 559                         it_prof = p->it_prof_incr;
 560                         send_sig(SIGPROF, p, 1);
 561                 }
 562                 p->it_prof_value = it_prof;
 563         }
 564 }
 565
 566 void update_one_process(struct task_struct *p, unsigned long user,
 567                         unsigned long system, int cpu)
 568 {
 569         p->per_cpu_utime[cpu] += user;
 570         p->per_cpu_stime[cpu] += system;
 571         do_process_times(p, user, system);
 572         do_it_virt(p, user);
 573         do_it_prof(p);
 574 }
 575
 576 /*
 577  * Called from the timer interrupt handler to charge one tick to the current
 578  * process.  user_tick is 1 if the tick is user time, 0 for system.
 579  */
 580 void update_process_times(int user_tick)
 581 {
 582         struct task_struct *p = current;
 583         int cpu = smp_processor_id(), system = user_tick ^ 1;
 584
 585         update_one_process(p, user_tick, system, cpu);
 586         if (p->pid) {
 587                 if (--p->counter <= 0) {
 588                         p->counter = 0;
 589                         p->need_resched = 1;
 590                 }
 591                 if (p->nice > 0)
 592                         kstat.per_cpu_nice[cpu] += user_tick;
 593                 else
 594                         kstat.per_cpu_user[cpu] += user_tick;
 595                 kstat.per_cpu_system[cpu] += system;
 596         } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
 597                 kstat.per_cpu_system[cpu] += system;
 598 }
 599
 600 /*
 601  * Nr of active tasks - counted in fixed-point numbers
 602  */
 603 static unsigned long count_active_tasks(void)
 604 {
 605         struct task_struct *p;
 606         unsigned long nr = 0;
 607
 608         read_lock(&tasklist_lock);
 609         for_each_task(p) {
 610                 if ((p->state == TASK_RUNNING ||
 611                      (p->state & TASK_UNINTERRUPTIBLE)))
 612                         nr += FIXED_1;
 613         }
 614         read_unlock(&tasklist_lock);
 615         return nr;
 616 }
 617
 618 /*
 619  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 620  * imply that avenrun[] is the standard name for this kind of thing.
 621  * Nothing else seems to be standardized: the fractional size etc
 622  * all seem to differ on different machines.
 623  */
 624 unsigned long avenrun[3];
 625
 626 static inline void calc_load(unsigned long ticks)
 627 {
 628         unsigned long active_tasks; /* fixed-point */
 629         static int count = LOAD_FREQ;
 630
 631         count -= ticks;
 632         if (count < 0) {
 633                 count += LOAD_FREQ;
 634                 active_tasks = count_active_tasks();
 635                 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 636                 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 637                 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 638         }
 639 }
 640
 641 /* jiffies at the most recent update of wall time */
 642 unsigned long wall_jiffies;
 643
 644 /*
 645  * This spinlock protect us from races in SMP while playing with xtime. -arca
 646  */
 647 rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
 648
 649 static inline void update_times(void)
 650 {
 651         unsigned long ticks;
 652
 653         /*
 654          * update_times() is run from the raw timer_bh handler so we
 655          * just know that the irqs are locally enabled and so we don't
 656          * need to save/restore the flags of the local CPU here. -arca
 657          */
 658         write_lock_irq(&xtime_lock);
 659
 660         ticks = jiffies - wall_jiffies;
 661         if (ticks) {
 662                 wall_jiffies += ticks;
 663                 update_wall_time(ticks);
 664         }
 665         write_unlock_irq(&xtime_lock);
 666         calc_load(ticks);
 667 }
 668
 669 void timer_bh(void)
 670 {
 671         update_times();
 672         run_timer_list();
 673 }
 674
 675 void do_timer(struct pt_regs *regs)
 676 {
 677         (*(unsigned long *)&jiffies)++;
 678 #ifndef CONFIG_SMP
 679         /* SMP process accounting uses the local APIC timer */
 680
 681         update_process_times(user_mode(regs));
 682 #endif
 683         mark_bh(TIMER_BH);
 684         if (tq_timer)
 685                 mark_bh(TQUEUE_BH);
 686 }
 687
 688 #if !defined(__alpha__) && !defined(__ia64__)
 689
 690 /*
 691  * For backwards compatibility?  This can be done in libc so Alpha
 692  * and all newer ports shouldn't need it.
 693  */
 694 asmlinkage unsigned long sys_alarm(unsigned int seconds)
 695 {
 696         struct itimerval it_new, it_old;
 697         unsigned int oldalarm;
 698
 699         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 700         it_new.it_value.tv_sec = seconds;
 701         it_new.it_value.tv_usec = 0;
 702         do_setitimer(ITIMER_REAL, &it_new, &it_old);
 703         oldalarm = it_old.it_value.tv_sec;
 704         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 705         /* And we'd better return too much than too little anyway */
 706         if (it_old.it_value.tv_usec)
 707                 oldalarm++;
 708         return oldalarm;
 709 }
 710
 711 #endif
 712
 713 #ifndef __alpha__
 714
 715 /*
 716  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 717  * should be moved into arch/i386 instead?
 718  */
 719
 720 asmlinkage long sys_getpid(void)
 721 {
 722         /* This is SMP safe - current->pid doesn't change */
 723         return current->tgid;
 724 }
 725
 726 /*
 727  * This is not strictly SMP safe: p_opptr could change
 728  * from under us. However, rather than getting any lock
 729  * we can use an optimistic algorithm: get the parent
 730  * pid, and go back and check that the parent is still
 731  * the same. If it has changed (which is extremely unlikely
 732  * indeed), we just try again..
 733  *
 734  * NOTE! This depends on the fact that even if we _do_
 735  * get an old value of "parent", we can happily dereference
 736  * the pointer: we just can't necessarily trust the result
 737  * until we know that the parent pointer is valid.
 738  *
 739  * The "mb()" macro is a memory barrier - a synchronizing
 740  * event. It also makes sure that gcc doesn't optimize
 741  * away the necessary memory references.. The barrier doesn't
 742  * have to have all that strong semantics: on x86 we don't
 743  * really require a synchronizing instruction, for example.
 744  * The barrier is more important for code generation than
 745  * for any real memory ordering semantics (even if there is
 746  * a small window for a race, using the old pointer is
 747  * harmless for a while).
 748  */
 749 asmlinkage long sys_getppid(void)
 750 {
 751         int pid;
 752         struct task_struct * me = current;
 753         struct task_struct * parent;
 754
 755         parent = me->p_opptr;
 756         for (;;) {
 757                 pid = parent->pid;
 758 #if CONFIG_SMP
 759 {
 760                 struct task_struct *old = parent;
 761                 mb();
 762                 parent = me->p_opptr;
 763                 if (old != parent)
 764                         continue;
 765 }
 766 #endif
 767                 break;
 768         }
 769         return pid;
 770 }
 771
 772 asmlinkage long sys_getuid(void)
 773 {
 774         /* Only we change this so SMP safe */
 775         return current->uid;
 776 }
 777
 778 asmlinkage long sys_geteuid(void)
 779 {
 780         /* Only we change this so SMP safe */
 781         return current->euid;
 782 }
 783
 784 asmlinkage long sys_getgid(void)
 785 {
 786         /* Only we change this so SMP safe */
 787         return current->gid;
 788 }
 789
 790 asmlinkage long sys_getegid(void)
 791 {
 792         /* Only we change this so SMP safe */
 793         return  current->egid;
 794 }
 795
 796 #endif
 797
 798 asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 799 {
 800         struct timespec t;
 801         unsigned long expire;
 802
 803         if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
 804                 return -EFAULT;
 805
 806         if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
 807                 return -EINVAL;
 808
 809
 810         if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
 811             current->policy != SCHED_OTHER)
 812         {
 813                 /*
 814                  * Short delay requests up to 2 ms will be handled with
 815                  * high precision by a busy wait for all real-time processes.
 816                  *
 817                  * Its important on SMP not to do this holding locks.
 818                  */
 819                 udelay((t.tv_nsec + 999) / 1000);
 820                 return 0;
 821         }
 822
 823         expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 824
 825         current->state = TASK_INTERRUPTIBLE;
 826         expire = schedule_timeout(expire);
 827
 828         if (expire) {
 829                 if (rmtp) {
 830                         jiffies_to_timespec(expire, &t);
 831                         if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
 832                                 return -EFAULT;
 833                 }
 834                 return -EINTR;
 835         }
 836         return 0;
 837 }
 838