kernel/timer.c

   1 /*
   2  *  linux/kernel/timer.c
   3  *
   4  *  Kernel internal timers, kernel timekeeping, basic process system calls
   5  *
   6  *  Copyright (C) 1991, 1992  Linus Torvalds
   7  *
   8  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
   9  *
  10  *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
  11  *              "A Kernel Model for Precision Timekeeping" by Dave Mills
  12  *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  13  *              serialize accesses to xtime/lost_ticks).
  14  *                              Copyright (C) 1998  Andrea Arcangeli
  15  *  1999-03-10  Improved NTP compatibility by Ulrich Windl
  16  */
  17
  18 #include <linux/config.h>
  19 #include <linux/mm.h>
  20 #include <linux/timex.h>
  21 #include <linux/delay.h>
  22 #include <linux/smp_lock.h>
  23 #include <linux/interrupt.h>
  24 #include <linux/kernel_stat.h>
  25
  26 #include <asm/uaccess.h>
  27
  28 /*
  29  * Timekeeping variables
  30  */
  31
  32 long tick = (1000000 + HZ/2) / HZ;      /* timer interrupt period */
  33
  34 /* The current time */
  35 volatile struct timeval xtime __attribute__ ((aligned (16)));
  36
  37 /* Don't completely fail for HZ > 500.  */
  38 int tickadj = 500/HZ ? : 1;             /* microsecs */
  39
  40 DECLARE_TASK_QUEUE(tq_timer);
  41 DECLARE_TASK_QUEUE(tq_immediate);
  42 DECLARE_TASK_QUEUE(tq_scheduler);
  43
  44 /*
  45  * phase-lock loop variables
  46  */
  47 /* TIME_ERROR prevents overwriting the CMOS clock */
  48 int time_state = TIME_OK;               /* clock synchronization status */
  49 int time_status = STA_UNSYNC;           /* clock status bits            */
  50 long time_offset;                       /* time adjustment (us)         */
  51 long time_constant = 2;                 /* pll time constant            */
  52 long time_tolerance = MAXFREQ;          /* frequency tolerance (ppm)    */
  53 long time_precision = 1;                /* clock precision (us)         */
  54 long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
  55 long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
  56 long time_phase;                        /* phase offset (scaled us)     */
  57 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
  58                                         /* frequency offset (scaled ppm)*/
  59 long time_adj;                          /* tick adjust (scaled 1 / HZ)  */
  60 long time_reftime;                      /* time at last adjustment (s)  */
  61
  62 long time_adjust;
  63 long time_adjust_step;
  64
  65 unsigned long event;
  66
  67 extern int do_setitimer(int, struct itimerval *, struct itimerval *);
  68
  69 unsigned long volatile jiffies;
  70
  71 unsigned int * prof_buffer;
  72 unsigned long prof_len;
  73 unsigned long prof_shift;
  74
  75 /*
  76  * Event timer code
  77  */
  78 #define TVN_BITS 6
  79 #define TVR_BITS 8
  80 #define TVN_SIZE (1 << TVN_BITS)
  81 #define TVR_SIZE (1 << TVR_BITS)
  82 #define TVN_MASK (TVN_SIZE - 1)
  83 #define TVR_MASK (TVR_SIZE - 1)
  84
  85 struct timer_vec {
  86         int index;
  87         struct list_head vec[TVN_SIZE];
  88 };
  89
  90 struct timer_vec_root {
  91         int index;
  92         struct list_head vec[TVR_SIZE];
  93 };
  94
  95 static struct timer_vec tv5;
  96 static struct timer_vec tv4;
  97 static struct timer_vec tv3;
  98 static struct timer_vec tv2;
  99 static struct timer_vec_root tv1;
 100
 101 static struct timer_vec * const tvecs[] = {
 102         (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 103 };
 104
 105 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 106
 107 void init_timervecs (void)
 108 {
 109         int i;
 110
 111         for (i = 0; i < TVN_SIZE; i++) {
 112                 INIT_LIST_HEAD(tv5.vec + i);
 113                 INIT_LIST_HEAD(tv4.vec + i);
 114                 INIT_LIST_HEAD(tv3.vec + i);
 115                 INIT_LIST_HEAD(tv2.vec + i);
 116         }
 117         for (i = 0; i < TVR_SIZE; i++)
 118                 INIT_LIST_HEAD(tv1.vec + i);
 119 }
 120
 121 static unsigned long timer_jiffies;
 122
 123 static inline void internal_add_timer(struct timer_list *timer)
 124 {
 125         /*
 126          * must be cli-ed when calling this
 127          */
 128         unsigned long expires = timer->expires;
 129         unsigned long idx = expires - timer_jiffies;
 130         struct list_head * vec;
 131
 132         if (idx < TVR_SIZE) {
 133                 int i = expires & TVR_MASK;
 134                 vec = tv1.vec + i;
 135         } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 136                 int i = (expires >> TVR_BITS) & TVN_MASK;
 137                 vec = tv2.vec + i;
 138         } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 139                 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
 140                 vec =  tv3.vec + i;
 141         } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 142                 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
 143                 vec = tv4.vec + i;
 144         } else if ((signed long) idx < 0) {
 145                 /* can happen if you add a timer with expires == jiffies,
 146                  * or you set a timer to go off in the past
 147                  */
 148                 vec = tv1.vec + tv1.index;
 149         } else if (idx <= 0xffffffffUL) {
 150                 int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 151                 vec = tv5.vec + i;
 152         } else {
 153                 /* Can only get here on architectures with 64-bit jiffies */
 154                 INIT_LIST_HEAD(&timer->list);
 155                 return;
 156         }
 157         /*
 158          * Timers are FIFO!
 159          */
 160         list_add(&timer->list, vec->prev);
 161 }
 162
 163 /* Initialize both explicitly - let's try to have them in the same cache line */
 164 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
 165 volatile struct timer_list * volatile running_timer = NULL;
 166
 167 #ifdef CONFIG_SMP
 168 #define timer_enter(t) do { running_timer = t; mb(); } while (0)
 169 #define timer_exit() do { running_timer = NULL; } while (0)
 170 #define timer_is_running(t) (running_timer == t)
 171 #define timer_synchronize(t) while (timer_is_running(t)) barrier()
 172 #else
 173 #define timer_enter(t)          do { } while (0)
 174 #define timer_exit()            do { } while (0)
 175 #define timer_is_running(t)     (0)
 176 #define timer_synchronize(t)    do { (void)(t); barrier(); } while(0)
 177 #endif
 178
 179 void add_timer(struct timer_list *timer)
 180 {
 181         unsigned long flags;
 182
 183         spin_lock_irqsave(&timerlist_lock, flags);
 184         if (timer->list.next)
 185                 goto bug;
 186         internal_add_timer(timer);
 187 out:
 188         spin_unlock_irqrestore(&timerlist_lock, flags);
 189         return;
 190
 191 bug:
 192         printk("bug: kernel timer added twice at %p.\n",
 193                         __builtin_return_address(0));
 194         goto out;
 195 }
 196
 197 static inline int detach_timer (struct timer_list *timer)
 198 {
 199         if (!timer_pending(timer))
 200                 return 0;
 201         list_del(&timer->list);
 202         return 1;
 203 }
 204
 205 int mod_timer(struct timer_list *timer, unsigned long expires)
 206 {
 207         int ret;
 208         unsigned long flags;
 209
 210         spin_lock_irqsave(&timerlist_lock, flags);
 211         timer->expires = expires;
 212         ret = detach_timer(timer);
 213         internal_add_timer(timer);
 214         spin_unlock_irqrestore(&timerlist_lock, flags);
 215         return ret;
 216 }
 217
 218 int del_timer(struct timer_list * timer)
 219 {
 220         int ret;
 221         unsigned long flags;
 222
 223         spin_lock_irqsave(&timerlist_lock, flags);
 224         ret = detach_timer(timer);
 225         timer->list.next = timer->list.prev = NULL;
 226         spin_unlock_irqrestore(&timerlist_lock, flags);
 227         return ret;
 228 }
 229
 230 #ifdef CONFIG_SMP
 231 void sync_timers(void)
 232 {
 233         spin_unlock_wait(&global_bh_lock);
 234 }
 235
 236 /*
 237  * SMP specific function to delete periodic timer.
 238  * Caller must disable by some means restarting the timer
 239  * for new. Upon exit the timer is not queued and handler is not running
 240  * on any CPU. It returns number of times, which timer was deleted
 241  * (for reference counting).
 242  */
 243
 244 int del_timer_sync(struct timer_list * timer)
 245 {
 246         int ret = 0;
 247
 248         for (;;) {
 249                 unsigned long flags;
 250                 int running;
 251
 252                 spin_lock_irqsave(&timerlist_lock, flags);
 253                 ret += detach_timer(timer);
 254                 timer->list.next = timer->list.prev = 0;
 255                 running = timer_is_running(timer);
 256                 spin_unlock_irqrestore(&timerlist_lock, flags);
 257
 258                 if (!running)
 259                         break;
 260
 261                 timer_synchronize(timer);
 262         }
 263
 264         return ret;
 265 }
 266 #endif
 267
 268
 269 static inline void cascade_timers(struct timer_vec *tv)
 270 {
 271         /* cascade all the timers from tv up one level */
 272         struct list_head *head, *curr, *next;
 273
 274         head = tv->vec + tv->index;
 275         curr = head->next;
 276         /*
 277          * We are removing _all_ timers from the list, so we don't  have to
 278          * detach them individually, just clear the list afterwards.
 279          */
 280         while (curr != head) {
 281                 struct timer_list *tmp;
 282
 283                 tmp = list_entry(curr, struct timer_list, list);
 284                 next = curr->next;
 285                 list_del(curr); // not needed
 286                 internal_add_timer(tmp);
 287                 curr = next;
 288         }
 289         INIT_LIST_HEAD(head);
 290         tv->index = (tv->index + 1) & TVN_MASK;
 291 }
 292
 293 static inline void run_timer_list(void)
 294 {
 295         spin_lock_irq(&timerlist_lock);
 296         while ((long)(jiffies - timer_jiffies) >= 0) {
 297                 struct list_head *head, *curr;
 298                 if (!tv1.index) {
 299                         int n = 1;
 300                         do {
 301                                 cascade_timers(tvecs[n]);
 302                         } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 303                 }
 304 repeat:
 305                 head = tv1.vec + tv1.index;
 306                 curr = head->next;
 307                 if (curr != head) {
 308                         struct timer_list *timer;
 309                         void (*fn)(unsigned long);
 310                         unsigned long data;
 311
 312                         timer = list_entry(curr, struct timer_list, list);
 313                         fn = timer->function;
 314                         data= timer->data;
 315
 316                         detach_timer(timer);
 317                         timer->list.next = timer->list.prev = NULL;
 318                         timer_enter(timer);
 319                         spin_unlock_irq(&timerlist_lock);
 320                         fn(data);
 321                         spin_lock_irq(&timerlist_lock);
 322                         timer_exit();
 323                         goto repeat;
 324                 }
 325                 ++timer_jiffies;
 326                 tv1.index = (tv1.index + 1) & TVR_MASK;
 327         }
 328         spin_unlock_irq(&timerlist_lock);
 329 }
 330
 331 spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
 332
 333 void tqueue_bh(void)
 334 {
 335         run_task_queue(&tq_timer);
 336 }
 337
 338 void immediate_bh(void)
 339 {
 340         run_task_queue(&tq_immediate);
 341 }
 342
 343 /*
 344  * this routine handles the overflow of the microsecond field
 345  *
 346  * The tricky bits of code to handle the accurate clock support
 347  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 348  * They were originally developed for SUN and DEC kernels.
 349  * All the kudos should go to Dave for this stuff.
 350  *
 351  */
 352 static void second_overflow(void)
 353 {
 354     long ltemp;
 355
 356     /* Bump the maxerror field */
 357     time_maxerror += time_tolerance >> SHIFT_USEC;
 358     if ( time_maxerror > NTP_PHASE_LIMIT ) {
 359         time_maxerror = NTP_PHASE_LIMIT;
 360         time_status |= STA_UNSYNC;
 361     }
 362
 363     /*
 364      * Leap second processing. If in leap-insert state at
 365      * the end of the day, the system clock is set back one
 366      * second; if in leap-delete state, the system clock is
 367      * set ahead one second. The microtime() routine or
 368      * external clock driver will insure that reported time
 369      * is always monotonic. The ugly divides should be
 370      * replaced.
 371      */
 372     switch (time_state) {
 373
 374     case TIME_OK:
 375         if (time_status & STA_INS)
 376             time_state = TIME_INS;
 377         else if (time_status & STA_DEL)
 378             time_state = TIME_DEL;
 379         break;
 380
 381     case TIME_INS:
 382         if (xtime.tv_sec % 86400 == 0) {
 383             xtime.tv_sec--;
 384             time_state = TIME_OOP;
 385             printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
 386         }
 387         break;
 388
 389     case TIME_DEL:
 390         if ((xtime.tv_sec + 1) % 86400 == 0) {
 391             xtime.tv_sec++;
 392             time_state = TIME_WAIT;
 393             printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
 394         }
 395         break;
 396
 397     case TIME_OOP:
 398         time_state = TIME_WAIT;
 399         break;
 400
 401     case TIME_WAIT:
 402         if (!(time_status & (STA_INS | STA_DEL)))
 403             time_state = TIME_OK;
 404     }
 405
 406     /*
 407      * Compute the phase adjustment for the next second. In
 408      * PLL mode, the offset is reduced by a fixed factor
 409      * times the time constant. In FLL mode the offset is
 410      * used directly. In either mode, the maximum phase
 411      * adjustment for each second is clamped so as to spread
 412      * the adjustment over not more than the number of
 413      * seconds between updates.
 414      */
 415     if (time_offset < 0) {
 416         ltemp = -time_offset;
 417         if (!(time_status & STA_FLL))
 418             ltemp >>= SHIFT_KG + time_constant;
 419         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 420             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 421         time_offset += ltemp;
 422         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 423     } else {
 424         ltemp = time_offset;
 425         if (!(time_status & STA_FLL))
 426             ltemp >>= SHIFT_KG + time_constant;
 427         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 428             ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 429         time_offset -= ltemp;
 430         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
 431     }
 432
 433     /*
 434      * Compute the frequency estimate and additional phase
 435      * adjustment due to frequency error for the next
 436      * second. When the PPS signal is engaged, gnaw on the
 437      * watchdog counter and update the frequency computed by
 438      * the pll and the PPS signal.
 439      */
 440     pps_valid++;
 441     if (pps_valid == PPS_VALID) {       /* PPS signal lost */
 442         pps_jitter = MAXTIME;
 443         pps_stabil = MAXFREQ;
 444         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 445                          STA_PPSWANDER | STA_PPSERROR);
 446     }
 447     ltemp = time_freq + pps_freq;
 448     if (ltemp < 0)
 449         time_adj -= -ltemp >>
 450             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 451     else
 452         time_adj += ltemp >>
 453             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 454
 455 #if HZ == 100
 456     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
 457      * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
 458      */
 459     if (time_adj < 0)
 460         time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
 461     else
 462         time_adj += (time_adj >> 2) + (time_adj >> 5);
 463 #endif
 464 }
 465
 466 /* in the NTP reference this is called "hardclock()" */
 467 static void update_wall_time_one_tick(void)
 468 {
 469         if ( (time_adjust_step = time_adjust) != 0 ) {
 470             /* We are doing an adjtime thing.
 471              *
 472              * Prepare time_adjust_step to be within bounds.
 473              * Note that a positive time_adjust means we want the clock
 474              * to run faster.
 475              *
 476              * Limit the amount of the step to be in the range
 477              * -tickadj .. +tickadj
 478              */
 479              if (time_adjust > tickadj)
 480                 time_adjust_step = tickadj;
 481              else if (time_adjust < -tickadj)
 482                 time_adjust_step = -tickadj;
 483
 484             /* Reduce by this step the amount of time left  */
 485             time_adjust -= time_adjust_step;
 486         }
 487         xtime.tv_usec += tick + time_adjust_step;
 488         /*
 489          * Advance the phase, once it gets to one microsecond, then
 490          * advance the tick more.
 491          */
 492         time_phase += time_adj;
 493         if (time_phase <= -FINEUSEC) {
 494                 long ltemp = -time_phase >> SHIFT_SCALE;
 495                 time_phase += ltemp << SHIFT_SCALE;
 496                 xtime.tv_usec -= ltemp;
 497         }
 498         else if (time_phase >= FINEUSEC) {
 499                 long ltemp = time_phase >> SHIFT_SCALE;
 500                 time_phase -= ltemp << SHIFT_SCALE;
 501                 xtime.tv_usec += ltemp;
 502         }
 503 }
 504
 505 /*
 506  * Using a loop looks inefficient, but "ticks" is
 507  * usually just one (we shouldn't be losing ticks,
 508  * we're doing this this way mainly for interrupt
 509  * latency reasons, not because we think we'll
 510  * have lots of lost timer ticks
 511  */
 512 static void update_wall_time(unsigned long ticks)
 513 {
 514         do {
 515                 ticks--;
 516                 update_wall_time_one_tick();
 517         } while (ticks);
 518
 519         if (xtime.tv_usec >= 1000000) {
 520             xtime.tv_usec -= 1000000;
 521             xtime.tv_sec++;
 522             second_overflow();
 523         }
 524 }
 525
 526 static inline void do_process_times(struct task_struct *p,
 527         unsigned long user, unsigned long system)
 528 {
 529         unsigned long psecs;
 530
 531         psecs = (p->times.tms_utime += user);
 532         psecs += (p->times.tms_stime += system);
 533         if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
 534                 /* Send SIGXCPU every second.. */
 535                 if (!(psecs % HZ))
 536                         send_sig(SIGXCPU, p, 1);
 537                 /* and SIGKILL when we go over max.. */
 538                 if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
 539                         send_sig(SIGKILL, p, 1);
 540         }
 541 }
 542
 543 static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
 544 {
 545         unsigned long it_virt = p->it_virt_value;
 546
 547         if (it_virt) {
 548                 it_virt -= ticks;
 549                 if (!it_virt) {
 550                         it_virt = p->it_virt_incr;
 551                         send_sig(SIGVTALRM, p, 1);
 552                 }
 553                 p->it_virt_value = it_virt;
 554         }
 555 }
 556
 557 static inline void do_it_prof(struct task_struct *p)
 558 {
 559         unsigned long it_prof = p->it_prof_value;
 560
 561         if (it_prof) {
 562                 if (--it_prof == 0) {
 563                         it_prof = p->it_prof_incr;
 564                         send_sig(SIGPROF, p, 1);
 565                 }
 566                 p->it_prof_value = it_prof;
 567         }
 568 }
 569
 570 void update_one_process(struct task_struct *p, unsigned long user,
 571                         unsigned long system, int cpu)
 572 {
 573         p->per_cpu_utime[cpu] += user;
 574         p->per_cpu_stime[cpu] += system;
 575         do_process_times(p, user, system);
 576         do_it_virt(p, user);
 577         do_it_prof(p);
 578 }
 579
 580 /*
 581  * Called from the timer interrupt handler to charge one tick to the current
 582  * process.  user_tick is 1 if the tick is user time, 0 for system.
 583  */
 584 void update_process_times(int user_tick)
 585 {
 586         struct task_struct *p = current;
 587         int cpu = smp_processor_id(), system = user_tick ^ 1;
 588
 589         update_one_process(p, user_tick, system, cpu);
 590         if (p->pid) {
 591                 if (--p->counter <= 0) {
 592                         p->counter = 0;
 593                         p->need_resched = 1;
 594                 }
 595                 if (p->nice > 0)
 596                         kstat.per_cpu_nice[cpu] += user_tick;
 597                 else
 598                         kstat.per_cpu_user[cpu] += user_tick;
 599                 kstat.per_cpu_system[cpu] += system;
 600         } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
 601                 kstat.per_cpu_system[cpu] += system;
 602 }
 603
 604 /*
 605  * Nr of active tasks - counted in fixed-point numbers
 606  */
 607 static unsigned long count_active_tasks(void)
 608 {
 609         struct task_struct *p;
 610         unsigned long nr = 0;
 611
 612         read_lock(&tasklist_lock);
 613         for_each_task(p) {
 614                 if ((p->state == TASK_RUNNING ||
 615                      (p->state & TASK_UNINTERRUPTIBLE)))
 616                         nr += FIXED_1;
 617         }
 618         read_unlock(&tasklist_lock);
 619         return nr;
 620 }
 621
 622 /*
 623  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 624  * imply that avenrun[] is the standard name for this kind of thing.
 625  * Nothing else seems to be standardized: the fractional size etc
 626  * all seem to differ on different machines.
 627  */
 628 unsigned long avenrun[3];
 629
 630 static inline void calc_load(unsigned long ticks)
 631 {
 632         unsigned long active_tasks; /* fixed-point */
 633         static int count = LOAD_FREQ;
 634
 635         count -= ticks;
 636         if (count < 0) {
 637                 count += LOAD_FREQ;
 638                 active_tasks = count_active_tasks();
 639                 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 640                 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 641                 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 642         }
 643 }
 644
 645 /* jiffies at the most recent update of wall time */
 646 unsigned long wall_jiffies;
 647
 648 /*
 649  * This spinlock protect us from races in SMP while playing with xtime. -arca
 650  */
 651 rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
 652
 653 static inline void update_times(void)
 654 {
 655         unsigned long ticks;
 656
 657         /*
 658          * update_times() is run from the raw timer_bh handler so we
 659          * just know that the irqs are locally enabled and so we don't
 660          * need to save/restore the flags of the local CPU here. -arca
 661          */
 662         write_lock_irq(&xtime_lock);
 663
 664         ticks = jiffies - wall_jiffies;
 665         if (ticks) {
 666                 wall_jiffies += ticks;
 667                 update_wall_time(ticks);
 668         }
 669         write_unlock_irq(&xtime_lock);
 670         calc_load(ticks);
 671 }
 672
 673 void timer_bh(void)
 674 {
 675         update_times();
 676         run_timer_list();
 677 }
 678
 679 void do_timer(struct pt_regs *regs)
 680 {
 681         (*(unsigned long *)&jiffies)++;
 682 #ifndef CONFIG_SMP
 683         /* SMP process accounting uses the local APIC timer */
 684
 685         update_process_times(user_mode(regs));
 686 #endif
 687         mark_bh(TIMER_BH);
 688         if (tq_timer)
 689                 mark_bh(TQUEUE_BH);
 690 }
 691
 692 #if !defined(__alpha__) && !defined(__ia64__)
 693
 694 /*
 695  * For backwards compatibility?  This can be done in libc so Alpha
 696  * and all newer ports shouldn't need it.
 697  */
 698 asmlinkage unsigned long sys_alarm(unsigned int seconds)
 699 {
 700         struct itimerval it_new, it_old;
 701         unsigned int oldalarm;
 702
 703         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 704         it_new.it_value.tv_sec = seconds;
 705         it_new.it_value.tv_usec = 0;
 706         do_setitimer(ITIMER_REAL, &it_new, &it_old);
 707         oldalarm = it_old.it_value.tv_sec;
 708         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 709         /* And we'd better return too much than too little anyway */
 710         if (it_old.it_value.tv_usec)
 711                 oldalarm++;
 712         return oldalarm;
 713 }
 714
 715 #endif
 716
 717 #ifndef __alpha__
 718
 719 /*
 720  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 721  * should be moved into arch/i386 instead?
 722  */
 723
 724 asmlinkage long sys_getpid(void)
 725 {
 726         /* This is SMP safe - current->pid doesn't change */
 727         return current->pid;
 728 }
 729
 730 /*
 731  * This is not strictly SMP safe: p_opptr could change
 732  * from under us. However, rather than getting any lock
 733  * we can use an optimistic algorithm: get the parent
 734  * pid, and go back and check that the parent is still
 735  * the same. If it has changed (which is extremely unlikely
 736  * indeed), we just try again..
 737  *
 738  * NOTE! This depends on the fact that even if we _do_
 739  * get an old value of "parent", we can happily dereference
 740  * the pointer: we just can't necessarily trust the result
 741  * until we know that the parent pointer is valid.
 742  *
 743  * The "mb()" macro is a memory barrier - a synchronizing
 744  * event. It also makes sure that gcc doesn't optimize
 745  * away the necessary memory references.. The barrier doesn't
 746  * have to have all that strong semantics: on x86 we don't
 747  * really require a synchronizing instruction, for example.
 748  * The barrier is more important for code generation than
 749  * for any real memory ordering semantics (even if there is
 750  * a small window for a race, using the old pointer is
 751  * harmless for a while).
 752  */
 753 asmlinkage long sys_getppid(void)
 754 {
 755         int pid;
 756         struct task_struct * me = current;
 757         struct task_struct * parent;
 758
 759         parent = me->p_opptr;
 760         for (;;) {
 761                 pid = parent->pid;
 762 #if CONFIG_SMP
 763 {
 764                 struct task_struct *old = parent;
 765                 mb();
 766                 parent = me->p_opptr;
 767                 if (old != parent)
 768                         continue;
 769 }
 770 #endif
 771                 break;
 772         }
 773         return pid;
 774 }
 775
 776 asmlinkage long sys_getuid(void)
 777 {
 778         /* Only we change this so SMP safe */
 779         return current->uid;
 780 }
 781
 782 asmlinkage long sys_geteuid(void)
 783 {
 784         /* Only we change this so SMP safe */
 785         return current->euid;
 786 }
 787
 788 asmlinkage long sys_getgid(void)
 789 {
 790         /* Only we change this so SMP safe */
 791         return current->gid;
 792 }
 793
 794 asmlinkage long sys_getegid(void)
 795 {
 796         /* Only we change this so SMP safe */
 797         return  current->egid;
 798 }
 799
 800 #endif
 801
 802 asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 803 {
 804         struct timespec t;
 805         unsigned long expire;
 806
 807         if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
 808                 return -EFAULT;
 809
 810         if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
 811                 return -EINVAL;
 812
 813
 814         if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
 815             current->policy != SCHED_OTHER)
 816         {
 817                 /*
 818                  * Short delay requests up to 2 ms will be handled with
 819                  * high precision by a busy wait for all real-time processes.
 820                  *
 821                  * Its important on SMP not to do this holding locks.
 822                  */
 823                 udelay((t.tv_nsec + 999) / 1000);
 824                 return 0;
 825         }
 826
 827         expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 828
 829         current->state = TASK_INTERRUPTIBLE;
 830         expire = schedule_timeout(expire);
 831
 832         if (expire) {
 833                 if (rmtp) {
 834                         jiffies_to_timespec(expire, &t);
 835                         if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
 836                                 return -EFAULT;
 837                 }
 838                 return -EINTR;
 839         }
 840         return 0;
 841 }
 842