kernel/sys.c

   1 /*
   2  *  linux/kernel/sys.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/module.h>
   8 #include <linux/mm.h>
   9 #include <linux/utsname.h>
  10 #include <linux/mman.h>
  11 #include <linux/smp_lock.h>
  12 #include <linux/notifier.h>
  13 #include <linux/reboot.h>
  14 #include <linux/prctl.h>
  15 #include <linux/init.h>
  16 #include <linux/highuid.h>
  17
  18 #include <asm/uaccess.h>
  19 #include <asm/io.h>
  20
  21 /*
  22  * this is where the system-wide overflow UID and GID are defined, for
  23  * architectures that now have 32-bit UID/GID but didn't in the past
  24  */
  25
  26 int overflowuid = DEFAULT_OVERFLOWUID;
  27 int overflowgid = DEFAULT_OVERFLOWGID;
  28
  29 /*
  30  * the same as above, but for filesystems which can only store a 16-bit
  31  * UID and GID. as such, this is needed on all architectures
  32  */
  33
  34 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  35 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  36
  37 /*
  38  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  39  */
  40
  41 int C_A_D = 1;
  42
  43
  44 /*
  45  *      Notifier list for kernel code which wants to be called
  46  *      at shutdown. This is used to stop any idling DMA operations
  47  *      and the like.
  48  */
  49
  50 static struct notifier_block *reboot_notifier_list;
  51 rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
  52
  53 /**
  54  *      notifier_chain_register - Add notifier to a notifier chain
  55  *      @list: Pointer to root list pointer
  56  *      @n: New entry in notifier chain
  57  *
  58  *      Adds a notifier to a notifier chain.
  59  *
  60  *      Currently always returns zero.
  61  */
  62
  63 int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
  64 {
  65         write_lock(&notifier_lock);
  66         while(*list)
  67         {
  68                 if(n->priority > (*list)->priority)
  69                         break;
  70                 list= &((*list)->next);
  71         }
  72         n->next = *list;
  73         *list=n;
  74         write_unlock(&notifier_lock);
  75         return 0;
  76 }
  77
  78 /**
  79  *      notifier_chain_unregister - Remove notifier from a notifier chain
  80  *      @nl: Pointer to root list pointer
  81  *      @n: New entry in notifier chain
  82  *
  83  *      Removes a notifier from a notifier chain.
  84  *
  85  *      Returns zero on success, or %-ENOENT on failure.
  86  */
  87
  88 int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
  89 {
  90         write_lock(&notifier_lock);
  91         while((*nl)!=NULL)
  92         {
  93                 if((*nl)==n)
  94                 {
  95                         *nl=n->next;
  96                         write_unlock(&notifier_lock);
  97                         return 0;
  98                 }
  99                 nl=&((*nl)->next);
 100         }
 101         write_unlock(&notifier_lock);
 102         return -ENOENT;
 103 }
 104
 105 /**
 106  *      notifier_call_chain - Call functions in a notifier chain
 107  *      @n: Pointer to root pointer of notifier chain
 108  *      @val: Value passed unmodified to notifier function
 109  *      @v: Pointer passed unmodified to notifier function
 110  *
 111  *      Calls each function in a notifier chain in turn.
 112  *
 113  *      If the return value of the notifier can be and'd
 114  *      with %NOTIFY_STOP_MASK, then notifier_call_chain
 115  *      will return immediately, with the return value of
 116  *      the notifier function which halted execution.
 117  *      Otherwise, the return value is the return value
 118  *      of the last notifier function called.
 119  */
 120
 121 int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
 122 {
 123         int ret=NOTIFY_DONE;
 124         struct notifier_block *nb = *n;
 125
 126         while(nb)
 127         {
 128                 ret=nb->notifier_call(nb,val,v);
 129                 if(ret&NOTIFY_STOP_MASK)
 130                 {
 131                         return ret;
 132                 }
 133                 nb=nb->next;
 134         }
 135         return ret;
 136 }
 137
 138 /**
 139  *      register_reboot_notifier - Register function to be called at reboot time
 140  *      @nb: Info about notifier function to be called
 141  *
 142  *      Registers a function with the list of functions
 143  *      to be called at reboot time.
 144  *
 145  *      Currently always returns zero, as notifier_chain_register
 146  *      always returns zero.
 147  */
 148
 149 int register_reboot_notifier(struct notifier_block * nb)
 150 {
 151         return notifier_chain_register(&reboot_notifier_list, nb);
 152 }
 153
 154 /**
 155  *      unregister_reboot_notifier - Unregister previously registered reboot notifier
 156  *      @nb: Hook to be unregistered
 157  *
 158  *      Unregisters a previously registered reboot
 159  *      notifier function.
 160  *
 161  *      Returns zero on success, or %-ENOENT on failure.
 162  */
 163
 164 int unregister_reboot_notifier(struct notifier_block * nb)
 165 {
 166         return notifier_chain_unregister(&reboot_notifier_list, nb);
 167 }
 168
 169 asmlinkage long sys_ni_syscall(void)
 170 {
 171         return -ENOSYS;
 172 }
 173
 174 static int proc_sel(struct task_struct *p, int which, int who)
 175 {
 176         if(p->pid)
 177         {
 178                 switch (which) {
 179                         case PRIO_PROCESS:
 180                                 if (!who && p == current)
 181                                         return 1;
 182                                 return(p->pid == who);
 183                         case PRIO_PGRP:
 184                                 if (!who)
 185                                         who = current->pgrp;
 186                                 return(p->pgrp == who);
 187                         case PRIO_USER:
 188                                 if (!who)
 189                                         who = current->uid;
 190                                 return(p->uid == who);
 191                 }
 192         }
 193         return 0;
 194 }
 195
 196 asmlinkage long sys_setpriority(int which, int who, int niceval)
 197 {
 198         struct task_struct *p;
 199         int error;
 200
 201         if (which > 2 || which < 0)
 202                 return -EINVAL;
 203
 204         /* normalize: avoid signed division (rounding problems) */
 205         error = -ESRCH;
 206         if (niceval < -20)
 207                 niceval = -20;
 208         if (niceval > 19)
 209                 niceval = 19;
 210
 211         read_lock(&tasklist_lock);
 212         for_each_task(p) {
 213                 if (!proc_sel(p, which, who))
 214                         continue;
 215                 if (p->uid != current->euid &&
 216                         p->uid != current->uid && !capable(CAP_SYS_NICE)) {
 217                         error = -EPERM;
 218                         continue;
 219                 }
 220                 if (error == -ESRCH)
 221                         error = 0;
 222                 if (niceval < p->nice && !capable(CAP_SYS_NICE))
 223                         error = -EACCES;
 224                 else
 225                         p->nice = niceval;
 226         }
 227         read_unlock(&tasklist_lock);
 228
 229         return error;
 230 }
 231
 232 /*
 233  * Ugh. To avoid negative return values, "getpriority()" will
 234  * not return the normal nice-value, but a negated value that
 235  * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 236  * to stay compatible.
 237  */
 238 asmlinkage long sys_getpriority(int which, int who)
 239 {
 240         struct task_struct *p;
 241         long retval = -ESRCH;
 242
 243         if (which > 2 || which < 0)
 244                 return -EINVAL;
 245
 246         read_lock(&tasklist_lock);
 247         for_each_task (p) {
 248                 long niceval;
 249                 if (!proc_sel(p, which, who))
 250                         continue;
 251                 niceval = 20 - p->nice;
 252                 if (niceval > retval)
 253                         retval = niceval;
 254         }
 255         read_unlock(&tasklist_lock);
 256
 257         return retval;
 258 }
 259
 260
 261 /*
 262  * Reboot system call: for obvious reasons only root may call it,
 263  * and even root needs to set up some magic numbers in the registers
 264  * so that some mistake won't make this reboot the whole machine.
 265  * You can also set the meaning of the ctrl-alt-del-key here.
 266  *
 267  * reboot doesn't sync: do that yourself before calling this.
 268  */
 269 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
 270 {
 271         char buffer[256];
 272
 273         /* We only trust the superuser with rebooting the system. */
 274         if (!capable(CAP_SYS_BOOT))
 275                 return -EPERM;
 276
 277         /* For safety, we require "magic" arguments. */
 278         if (magic1 != LINUX_REBOOT_MAGIC1 ||
 279             (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
 280                         magic2 != LINUX_REBOOT_MAGIC2B))
 281                 return -EINVAL;
 282
 283         lock_kernel();
 284         switch (cmd) {
 285         case LINUX_REBOOT_CMD_RESTART:
 286                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 287                 printk(KERN_EMERG "Restarting system.\n");
 288                 machine_restart(NULL);
 289                 break;
 290
 291         case LINUX_REBOOT_CMD_CAD_ON:
 292                 C_A_D = 1;
 293                 break;
 294
 295         case LINUX_REBOOT_CMD_CAD_OFF:
 296                 C_A_D = 0;
 297                 break;
 298
 299         case LINUX_REBOOT_CMD_HALT:
 300                 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 301                 printk(KERN_EMERG "System halted.\n");
 302                 machine_halt();
 303                 do_exit(0);
 304                 break;
 305
 306         case LINUX_REBOOT_CMD_POWER_OFF:
 307                 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 308                 printk(KERN_EMERG "Power down.\n");
 309                 machine_power_off();
 310                 do_exit(0);
 311                 break;
 312
 313         case LINUX_REBOOT_CMD_RESTART2:
 314                 if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
 315                         unlock_kernel();
 316                         return -EFAULT;
 317                 }
 318                 buffer[sizeof(buffer) - 1] = '\0';
 319
 320                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
 321                 printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
 322                 machine_restart(buffer);
 323                 break;
 324
 325         default:
 326                 unlock_kernel();
 327                 return -EINVAL;
 328         }
 329         unlock_kernel();
 330         return 0;
 331 }
 332
 333 /*
 334  * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 335  * As it's called within an interrupt, it may NOT sync: the only choice
 336  * is whether to reboot at once, or just ignore the ctrl-alt-del.
 337  */
 338 void ctrl_alt_del(void)
 339 {
 340         if (C_A_D) {
 341                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 342                 machine_restart(NULL);
 343         } else
 344                 kill_proc(1, SIGINT, 1);
 345 }
 346
 347
 348 /*
 349  * Unprivileged users may change the real gid to the effective gid
 350  * or vice versa.  (BSD-style)
 351  *
 352  * If you set the real gid at all, or set the effective gid to a value not
 353  * equal to the real gid, then the saved gid is set to the new effective gid.
 354  *
 355  * This makes it possible for a setgid program to completely drop its
 356  * privileges, which is often a useful assertion to make when you are doing
 357  * a security audit over a program.
 358  *
 359  * The general idea is that a program which uses just setregid() will be
 360  * 100% compatible with BSD.  A program which uses just setgid() will be
 361  * 100% compatible with POSIX with saved IDs.
 362  *
 363  * SMP: There are not races, the GIDs are checked only by filesystem
 364  *      operations (as far as semantic preservation is concerned).
 365  */
 366 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 367 {
 368         int old_rgid = current->gid;
 369         int old_egid = current->egid;
 370
 371         if (rgid != (gid_t) -1) {
 372                 if ((old_rgid == rgid) ||
 373                     (current->egid==rgid) ||
 374                     capable(CAP_SETGID))
 375                         current->gid = rgid;
 376                 else
 377                         return -EPERM;
 378         }
 379         if (egid != (gid_t) -1) {
 380                 if ((old_rgid == egid) ||
 381                     (current->egid == egid) ||
 382                     (current->sgid == egid) ||
 383                     capable(CAP_SETGID))
 384                         current->fsgid = current->egid = egid;
 385                 else {
 386                         current->gid = old_rgid;
 387                         return -EPERM;
 388                 }
 389         }
 390         if (rgid != (gid_t) -1 ||
 391             (egid != (gid_t) -1 && egid != old_rgid))
 392                 current->sgid = current->egid;
 393         current->fsgid = current->egid;
 394         if (current->egid != old_egid)
 395                 current->dumpable = 0;
 396         return 0;
 397 }
 398
 399 /*
 400  * setgid() is implemented like SysV w/ SAVED_IDS
 401  *
 402  * SMP: Same implicit races as above.
 403  */
 404 asmlinkage long sys_setgid(gid_t gid)
 405 {
 406         int old_egid = current->egid;
 407
 408         if (capable(CAP_SETGID))
 409                 current->gid = current->egid = current->sgid = current->fsgid = gid;
 410         else if ((gid == current->gid) || (gid == current->sgid))
 411                 current->egid = current->fsgid = gid;
 412         else
 413                 return -EPERM;
 414
 415         if (current->egid != old_egid)
 416                 current->dumpable = 0;
 417         return 0;
 418 }
 419
 420 /*
 421  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
 422  * a process after a call to setuid, setreuid, or setresuid.
 423  *
 424  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
 425  *  {r,e,s}uid != 0, the permitted and effective capabilities are
 426  *  cleared.
 427  *
 428  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
 429  *  capabilities of the process are cleared.
 430  *
 431  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
 432  *  capabilities are set to the permitted capabilities.
 433  *
 434  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
 435  *  never happen.
 436  *
 437  *  -astor
 438  *
 439  * cevans - New behaviour, Oct '99
 440  * A process may, via prctl(), elect to keep its capabilities when it
 441  * calls setuid() and switches away from uid==0. Both permitted and
 442  * effective sets will be retained.
 443  * Without this change, it was impossible for a daemon to drop only some
 444  * of its privilege. The call to setuid(!=0) would drop all privileges!
 445  * Keeping uid 0 is not an option because uid 0 owns too many vital
 446  * files..
 447  * Thanks to Olaf Kirch and Peter Benie for spotting this.
 448  */
 449 extern inline void cap_emulate_setxuid(int old_ruid, int old_euid,
 450                                        int old_suid)
 451 {
 452         if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
 453             (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
 454             !current->keep_capabilities) {
 455                 cap_clear(current->cap_permitted);
 456                 cap_clear(current->cap_effective);
 457         }
 458         if (old_euid == 0 && current->euid != 0) {
 459                 cap_clear(current->cap_effective);
 460         }
 461         if (old_euid != 0 && current->euid == 0) {
 462                 current->cap_effective = current->cap_permitted;
 463         }
 464 }
 465
 466 static int set_user(uid_t new_ruid)
 467 {
 468         struct user_struct *new_user, *old_user;
 469
 470         /* What if a process setreuid()'s and this brings the
 471          * new uid over his NPROC rlimit?  We can check this now
 472          * cheaply with the new uid cache, so if it matters
 473          * we should be checking for it.  -DaveM
 474          */
 475         new_user = alloc_uid(new_ruid);
 476         if (!new_user)
 477                 return -EAGAIN;
 478         old_user = current->user;
 479         atomic_dec(&old_user->processes);
 480         atomic_inc(&new_user->processes);
 481
 482         current->uid = new_ruid;
 483         current->user = new_user;
 484         free_uid(old_user);
 485         return 0;
 486 }
 487
 488 /*
 489  * Unprivileged users may change the real uid to the effective uid
 490  * or vice versa.  (BSD-style)
 491  *
 492  * If you set the real uid at all, or set the effective uid to a value not
 493  * equal to the real uid, then the saved uid is set to the new effective uid.
 494  *
 495  * This makes it possible for a setuid program to completely drop its
 496  * privileges, which is often a useful assertion to make when you are doing
 497  * a security audit over a program.
 498  *
 499  * The general idea is that a program which uses just setreuid() will be
 500  * 100% compatible with BSD.  A program which uses just setuid() will be
 501  * 100% compatible with POSIX with saved IDs.
 502  */
 503 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 504 {
 505         int old_ruid, old_euid, old_suid, new_ruid, new_euid;
 506
 507         new_ruid = old_ruid = current->uid;
 508         new_euid = old_euid = current->euid;
 509         old_suid = current->suid;
 510
 511         if (ruid != (uid_t) -1) {
 512                 new_ruid = ruid;
 513                 if ((old_ruid != ruid) &&
 514                     (current->euid != ruid) &&
 515                     !capable(CAP_SETUID))
 516                         return -EPERM;
 517         }
 518
 519         if (euid != (uid_t) -1) {
 520                 new_euid = euid;
 521                 if ((old_ruid != euid) &&
 522                     (current->euid != euid) &&
 523                     (current->suid != euid) &&
 524                     !capable(CAP_SETUID))
 525                         return -EPERM;
 526         }
 527
 528         if (new_ruid != old_ruid && set_user(new_ruid) < 0)
 529                 return -EAGAIN;
 530
 531         current->fsuid = current->euid = new_euid;
 532         if (ruid != (uid_t) -1 ||
 533             (euid != (uid_t) -1 && euid != old_ruid))
 534                 current->suid = current->euid;
 535         current->fsuid = current->euid;
 536         if (current->euid != old_euid)
 537                 current->dumpable = 0;
 538
 539         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 540                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 541         }
 542
 543         return 0;
 544 }
 545
 546
 547
 548 /*
 549  * setuid() is implemented like SysV with SAVED_IDS
 550  *
 551  * Note that SAVED_ID's is deficient in that a setuid root program
 552  * like sendmail, for example, cannot set its uid to be a normal
 553  * user and then switch back, because if you're root, setuid() sets
 554  * the saved uid too.  If you don't like this, blame the bright people
 555  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 556  * will allow a root program to temporarily drop privileges and be able to
 557  * regain them by swapping the real and effective uid.
 558  */
 559 asmlinkage long sys_setuid(uid_t uid)
 560 {
 561         int old_euid = current->euid;
 562         int old_ruid, old_suid, new_ruid;
 563
 564         old_ruid = new_ruid = current->uid;
 565         old_suid = current->suid;
 566         if (capable(CAP_SETUID)) {
 567                 if (uid != old_ruid && set_user(uid) < 0)
 568                         return -EAGAIN;
 569                 current->suid = uid;
 570         } else if ((uid != current->uid) && (uid != current->suid))
 571                 return -EPERM;
 572
 573         current->fsuid = current->euid = uid;
 574
 575         if (old_euid != uid)
 576                 current->dumpable = 0;
 577
 578         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 579                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 580         }
 581
 582         return 0;
 583 }
 584
 585
 586 /*
 587  * This function implements a generic ability to update ruid, euid,
 588  * and suid.  This allows you to implement the 4.4 compatible seteuid().
 589  */
 590 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 591 {
 592         int old_ruid = current->uid;
 593         int old_euid = current->euid;
 594         int old_suid = current->suid;
 595
 596         if (!capable(CAP_SETUID)) {
 597                 if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
 598                     (ruid != current->euid) && (ruid != current->suid))
 599                         return -EPERM;
 600                 if ((euid != (uid_t) -1) && (euid != current->uid) &&
 601                     (euid != current->euid) && (euid != current->suid))
 602                         return -EPERM;
 603                 if ((suid != (uid_t) -1) && (suid != current->uid) &&
 604                     (suid != current->euid) && (suid != current->suid))
 605                         return -EPERM;
 606         }
 607         if (ruid != (uid_t) -1) {
 608                 if (ruid != current->uid && set_user(ruid) < 0)
 609                         return -EAGAIN;
 610         }
 611         if (euid != (uid_t) -1) {
 612                 if (euid != current->euid)
 613                         current->dumpable = 0;
 614                 current->euid = euid;
 615                 current->fsuid = euid;
 616         }
 617         if (suid != (uid_t) -1)
 618                 current->suid = suid;
 619
 620         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 621                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 622         }
 623
 624         return 0;
 625 }
 626
 627 asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
 628 {
 629         int retval;
 630
 631         if (!(retval = put_user(current->uid, ruid)) &&
 632             !(retval = put_user(current->euid, euid)))
 633                 retval = put_user(current->suid, suid);
 634
 635         return retval;
 636 }
 637
 638 /*
 639  * Same as above, but for rgid, egid, sgid.
 640  */
 641 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 642 {
 643        if (!capable(CAP_SETGID)) {
 644                 if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
 645                     (rgid != current->egid) && (rgid != current->sgid))
 646                         return -EPERM;
 647                 if ((egid != (gid_t) -1) && (egid != current->gid) &&
 648                     (egid != current->egid) && (egid != current->sgid))
 649                         return -EPERM;
 650                 if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
 651                     (sgid != current->egid) && (sgid != current->sgid))
 652                         return -EPERM;
 653         }
 654         if (rgid != (gid_t) -1)
 655                 current->gid = rgid;
 656         if (egid != (gid_t) -1) {
 657                 if (egid != current->egid)
 658                         current->dumpable = 0;
 659                 current->egid = egid;
 660                 current->fsgid = egid;
 661         }
 662         if (sgid != (gid_t) -1)
 663                 current->sgid = sgid;
 664         return 0;
 665 }
 666
 667 asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
 668 {
 669         int retval;
 670
 671         if (!(retval = put_user(current->gid, rgid)) &&
 672             !(retval = put_user(current->egid, egid)))
 673                 retval = put_user(current->sgid, sgid);
 674
 675         return retval;
 676 }
 677
 678
 679 /*
 680  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 681  * is used for "access()" and for the NFS daemon (letting nfsd stay at
 682  * whatever uid it wants to). It normally shadows "euid", except when
 683  * explicitly set by setfsuid() or for access..
 684  */
 685 asmlinkage long sys_setfsuid(uid_t uid)
 686 {
 687         int old_fsuid;
 688
 689         old_fsuid = current->fsuid;
 690         if (uid == current->uid || uid == current->euid ||
 691             uid == current->suid || uid == current->fsuid ||
 692             capable(CAP_SETUID))
 693                 current->fsuid = uid;
 694         if (current->fsuid != old_fsuid)
 695                 current->dumpable = 0;
 696
 697         /* We emulate fsuid by essentially doing a scaled-down version
 698          * of what we did in setresuid and friends. However, we only
 699          * operate on the fs-specific bits of the process' effective
 700          * capabilities
 701          *
 702          * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
 703          *          if not, we might be a bit too harsh here.
 704          */
 705
 706         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 707                 if (old_fsuid == 0 && current->fsuid != 0) {
 708                         cap_t(current->cap_effective) &= ~CAP_FS_MASK;
 709                 }
 710                 if (old_fsuid != 0 && current->fsuid == 0) {
 711                         cap_t(current->cap_effective) |=
 712                                 (cap_t(current->cap_permitted) & CAP_FS_MASK);
 713                 }
 714         }
 715
 716         return old_fsuid;
 717 }
 718
 719 /*
 720  * Samma på svenska..
 721  */
 722 asmlinkage long sys_setfsgid(gid_t gid)
 723 {
 724         int old_fsgid;
 725
 726         old_fsgid = current->fsgid;
 727         if (gid == current->gid || gid == current->egid ||
 728             gid == current->sgid || gid == current->fsgid ||
 729             capable(CAP_SETGID))
 730                 current->fsgid = gid;
 731         if (current->fsgid != old_fsgid)
 732                 current->dumpable = 0;
 733
 734         return old_fsgid;
 735 }
 736
 737 asmlinkage long sys_times(struct tms * tbuf)
 738 {
 739         /*
 740          *      In the SMP world we might just be unlucky and have one of
 741          *      the times increment as we use it. Since the value is an
 742          *      atomically safe type this is just fine. Conceptually its
 743          *      as if the syscall took an instant longer to occur.
 744          */
 745         if (tbuf)
 746                 if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
 747                         return -EFAULT;
 748         return jiffies;
 749 }
 750
 751 /*
 752  * This needs some heavy checking ...
 753  * I just haven't the stomach for it. I also don't fully
 754  * understand sessions/pgrp etc. Let somebody who does explain it.
 755  *
 756  * OK, I think I have the protection semantics right.... this is really
 757  * only important on a multi-user system anyway, to make sure one user
 758  * can't send a signal to a process owned by another.  -TYT, 12/12/91
 759  *
 760  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 761  * LBT 04.03.94
 762  */
 763
 764 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 765 {
 766         struct task_struct * p;
 767         int err = -EINVAL;
 768
 769         if (!pid)
 770                 pid = current->pid;
 771         if (!pgid)
 772                 pgid = pid;
 773         if (pgid < 0)
 774                 return -EINVAL;
 775
 776         /* From this point forward we keep holding onto the tasklist lock
 777          * so that our parent does not change from under us. -DaveM
 778          */
 779         read_lock(&tasklist_lock);
 780
 781         err = -ESRCH;
 782         p = find_task_by_pid(pid);
 783         if (!p)
 784                 goto out;
 785
 786         if (p->p_pptr == current || p->p_opptr == current) {
 787                 err = -EPERM;
 788                 if (p->session != current->session)
 789                         goto out;
 790                 err = -EACCES;
 791                 if (p->did_exec)
 792                         goto out;
 793         } else if (p != current)
 794                 goto out;
 795         err = -EPERM;
 796         if (p->leader)
 797                 goto out;
 798         if (pgid != pid) {
 799                 struct task_struct * tmp;
 800                 for_each_task (tmp) {
 801                         if (tmp->pgrp == pgid &&
 802                             tmp->session == current->session)
 803                                 goto ok_pgid;
 804                 }
 805                 goto out;
 806         }
 807
 808 ok_pgid:
 809         p->pgrp = pgid;
 810         err = 0;
 811 out:
 812         /* All paths lead to here, thus we are safe. -DaveM */
 813         read_unlock(&tasklist_lock);
 814         return err;
 815 }
 816
 817 asmlinkage long sys_getpgid(pid_t pid)
 818 {
 819         if (!pid) {
 820                 return current->pgrp;
 821         } else {
 822                 int retval;
 823                 struct task_struct *p;
 824
 825                 read_lock(&tasklist_lock);
 826                 p = find_task_by_pid(pid);
 827
 828                 retval = -ESRCH;
 829                 if (p)
 830                         retval = p->pgrp;
 831                 read_unlock(&tasklist_lock);
 832                 return retval;
 833         }
 834 }
 835
 836 asmlinkage long sys_getpgrp(void)
 837 {
 838         /* SMP - assuming writes are word atomic this is fine */
 839         return current->pgrp;
 840 }
 841
 842 asmlinkage long sys_getsid(pid_t pid)
 843 {
 844         if (!pid) {
 845                 return current->session;
 846         } else {
 847                 int retval;
 848                 struct task_struct *p;
 849
 850                 read_lock(&tasklist_lock);
 851                 p = find_task_by_pid(pid);
 852
 853                 retval = -ESRCH;
 854                 if(p)
 855                         retval = p->session;
 856                 read_unlock(&tasklist_lock);
 857                 return retval;
 858         }
 859 }
 860
 861 asmlinkage long sys_setsid(void)
 862 {
 863         struct task_struct * p;
 864         int err = -EPERM;
 865
 866         read_lock(&tasklist_lock);
 867         for_each_task(p) {
 868                 if (p->pgrp == current->pid)
 869                         goto out;
 870         }
 871
 872         current->leader = 1;
 873         current->session = current->pgrp = current->pid;
 874         current->tty = NULL;
 875         current->tty_old_pgrp = 0;
 876         err = current->pgrp;
 877 out:
 878         read_unlock(&tasklist_lock);
 879         return err;
 880 }
 881
 882 /*
 883  * Supplementary group IDs
 884  */
 885 asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
 886 {
 887         int i;
 888
 889         /*
 890          *      SMP: Nobody else can change our grouplist. Thus we are
 891          *      safe.
 892          */
 893
 894         if (gidsetsize < 0)
 895                 return -EINVAL;
 896         i = current->ngroups;
 897         if (gidsetsize) {
 898                 if (i > gidsetsize)
 899                         return -EINVAL;
 900                 if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
 901                         return -EFAULT;
 902         }
 903         return i;
 904 }
 905
 906 /*
 907  *      SMP: Our groups are not shared. We can copy to/from them safely
 908  *      without another task interfering.
 909  */
 910
 911 asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
 912 {
 913         if (!capable(CAP_SETGID))
 914                 return -EPERM;
 915         if ((unsigned) gidsetsize > NGROUPS)
 916                 return -EINVAL;
 917         if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
 918                 return -EFAULT;
 919         current->ngroups = gidsetsize;
 920         return 0;
 921 }
 922
 923 static int supplemental_group_member(gid_t grp)
 924 {
 925         int i = current->ngroups;
 926
 927         if (i) {
 928                 gid_t *groups = current->groups;
 929                 do {
 930                         if (*groups == grp)
 931                                 return 1;
 932                         groups++;
 933                         i--;
 934                 } while (i);
 935         }
 936         return 0;
 937 }
 938
 939 /*
 940  * Check whether we're fsgid/egid or in the supplemental group..
 941  */
 942 int in_group_p(gid_t grp)
 943 {
 944         int retval = 1;
 945         if (grp != current->fsgid)
 946                 retval = supplemental_group_member(grp);
 947         return retval;
 948 }
 949
 950 int in_egroup_p(gid_t grp)
 951 {
 952         int retval = 1;
 953         if (grp != current->egid)
 954                 retval = supplemental_group_member(grp);
 955         return retval;
 956 }
 957
 958 DECLARE_RWSEM(uts_sem);
 959
 960 asmlinkage long sys_newuname(struct new_utsname * name)
 961 {
 962         int errno = 0;
 963
 964         down_read(&uts_sem);
 965         if (copy_to_user(name,&system_utsname,sizeof *name))
 966                 errno = -EFAULT;
 967         up_read(&uts_sem);
 968         return errno;
 969 }
 970
 971 asmlinkage long sys_sethostname(char *name, int len)
 972 {
 973         int errno;
 974
 975         if (!capable(CAP_SYS_ADMIN))
 976                 return -EPERM;
 977         if (len < 0 || len > __NEW_UTS_LEN)
 978                 return -EINVAL;
 979         down_write(&uts_sem);
 980         errno = -EFAULT;
 981         if (!copy_from_user(system_utsname.nodename, name, len)) {
 982                 system_utsname.nodename[len] = 0;
 983                 errno = 0;
 984         }
 985         up_write(&uts_sem);
 986         return errno;
 987 }
 988
 989 asmlinkage long sys_gethostname(char *name, int len)
 990 {
 991         int i, errno;
 992
 993         if (len < 0)
 994                 return -EINVAL;
 995         down_read(&uts_sem);
 996         i = 1 + strlen(system_utsname.nodename);
 997         if (i > len)
 998                 i = len;
 999         errno = 0;
1000         if (copy_to_user(name, system_utsname.nodename, i))
1001                 errno = -EFAULT;
1002         up_read(&uts_sem);
1003         return errno;
1004 }
1005
1006 /*
1007  * Only setdomainname; getdomainname can be implemented by calling
1008  * uname()
1009  */
1010 asmlinkage long sys_setdomainname(char *name, int len)
1011 {
1012         int errno;
1013
1014         if (!capable(CAP_SYS_ADMIN))
1015                 return -EPERM;
1016         if (len < 0 || len > __NEW_UTS_LEN)
1017                 return -EINVAL;
1018
1019         down_write(&uts_sem);
1020         errno = -EFAULT;
1021         if (!copy_from_user(system_utsname.domainname, name, len)) {
1022                 errno = 0;
1023                 system_utsname.domainname[len] = 0;
1024         }
1025         up_write(&uts_sem);
1026         return errno;
1027 }
1028
1029 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
1030 {
1031         if (resource >= RLIM_NLIMITS)
1032                 return -EINVAL;
1033         else
1034                 return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1035                         ? -EFAULT : 0;
1036 }
1037
1038 #if !defined(__ia64__) && !defined(__s390__)
1039
1040 /*
1041  *      Back compatibility for getrlimit. Needed for some apps.
1042  */
1043
1044 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
1045 {
1046         struct rlimit x;
1047         if (resource >= RLIM_NLIMITS)
1048                 return -EINVAL;
1049
1050         memcpy(&x, current->rlim + resource, sizeof(*rlim));
1051         if(x.rlim_cur > 0x7FFFFFFF)
1052                 x.rlim_cur = 0x7FFFFFFF;
1053         if(x.rlim_max > 0x7FFFFFFF)
1054                 x.rlim_max = 0x7FFFFFFF;
1055         return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1056 }
1057
1058 #endif
1059
1060 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
1061 {
1062         struct rlimit new_rlim, *old_rlim;
1063
1064         if (resource >= RLIM_NLIMITS)
1065                 return -EINVAL;
1066         if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1067                 return -EFAULT;
1068         if (new_rlim.rlim_cur < 0 || new_rlim.rlim_max < 0)
1069                 return -EINVAL;
1070         old_rlim = current->rlim + resource;
1071         if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
1072              (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1073             !capable(CAP_SYS_RESOURCE))
1074                 return -EPERM;
1075         if (resource == RLIMIT_NOFILE) {
1076                 if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1077                         return -EPERM;
1078         }
1079         *old_rlim = new_rlim;
1080         return 0;
1081 }
1082
1083 /*
1084  * It would make sense to put struct rusage in the task_struct,
1085  * except that would make the task_struct be *really big*.  After
1086  * task_struct gets moved into malloc'ed memory, it would
1087  * make sense to do this.  It will make moving the rest of the information
1088  * a lot simpler!  (Which we're not doing right now because we're not
1089  * measuring them yet).
1090  *
1091  * This is SMP safe.  Either we are called from sys_getrusage on ourselves
1092  * below (we know we aren't going to exit/disappear and only we change our
1093  * rusage counters), or we are called from wait4() on a process which is
1094  * either stopped or zombied.  In the zombied case the task won't get
1095  * reaped till shortly after the call to getrusage(), in both cases the
1096  * task being examined is in a frozen state so the counters won't change.
1097  *
1098  * FIXME! Get the fault counts properly!
1099  */
1100 int getrusage(struct task_struct *p, int who, struct rusage *ru)
1101 {
1102         struct rusage r;
1103
1104         memset((char *) &r, 0, sizeof(r));
1105         switch (who) {
1106                 case RUSAGE_SELF:
1107                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
1108                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
1109                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
1110                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
1111                         r.ru_minflt = p->min_flt;
1112                         r.ru_majflt = p->maj_flt;
1113                         r.ru_nswap = p->nswap;
1114                         break;
1115                 case RUSAGE_CHILDREN:
1116                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
1117                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
1118                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
1119                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
1120                         r.ru_minflt = p->cmin_flt;
1121                         r.ru_majflt = p->cmaj_flt;
1122                         r.ru_nswap = p->cnswap;
1123                         break;
1124                 default:
1125                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
1126                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
1127                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
1128                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
1129                         r.ru_minflt = p->min_flt + p->cmin_flt;
1130                         r.ru_majflt = p->maj_flt + p->cmaj_flt;
1131                         r.ru_nswap = p->nswap + p->cnswap;
1132                         break;
1133         }
1134         return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1135 }
1136
1137 asmlinkage long sys_getrusage(int who, struct rusage *ru)
1138 {
1139         if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1140                 return -EINVAL;
1141         return getrusage(current, who, ru);
1142 }
1143
1144 asmlinkage long sys_umask(int mask)
1145 {
1146         mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1147         return mask;
1148 }
1149
1150 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1151                           unsigned long arg4, unsigned long arg5)
1152 {
1153         int error = 0;
1154         int sig;
1155
1156         switch (option) {
1157                 case PR_SET_PDEATHSIG:
1158                         sig = arg2;
1159                         if (sig > _NSIG) {
1160                                 error = -EINVAL;
1161                                 break;
1162                         }
1163                         current->pdeath_signal = sig;
1164                         break;
1165                 case PR_GET_PDEATHSIG:
1166                         error = put_user(current->pdeath_signal, (int *)arg2);
1167                         break;
1168                 case PR_GET_DUMPABLE:
1169                         if (current->dumpable)
1170                                 error = 1;
1171                         break;
1172                 case PR_SET_DUMPABLE:
1173                         if (arg2 != 0 && arg2 != 1) {
1174                                 error = -EINVAL;
1175                                 break;
1176                         }
1177                         current->dumpable = arg2;
1178                         break;
1179                 case PR_SET_UNALIGN:
1180 #ifdef SET_UNALIGN_CTL
1181                         error = SET_UNALIGN_CTL(current, arg2);
1182 #else
1183                         error = -EINVAL;
1184 #endif
1185                         break;
1186
1187                 case PR_GET_UNALIGN:
1188 #ifdef GET_UNALIGN_CTL
1189                         error = GET_UNALIGN_CTL(current, arg2);
1190 #else
1191                         error = -EINVAL;
1192 #endif
1193                         break;
1194
1195                 case PR_GET_KEEPCAPS:
1196                         if (current->keep_capabilities)
1197                                 error = 1;
1198                         break;
1199                 case PR_SET_KEEPCAPS:
1200                         if (arg2 != 0 && arg2 != 1) {
1201                                 error = -EINVAL;
1202                                 break;
1203                         }
1204                         current->keep_capabilities = arg2;
1205                         break;
1206                 default:
1207                         error = -EINVAL;
1208                         break;
1209         }
1210         return error;
1211 }
1212
1213 EXPORT_SYMBOL(notifier_chain_register);
1214 EXPORT_SYMBOL(notifier_chain_unregister);
1215 EXPORT_SYMBOL(notifier_call_chain);
1216 EXPORT_SYMBOL(register_reboot_notifier);
1217 EXPORT_SYMBOL(unregister_reboot_notifier);
1218 EXPORT_SYMBOL(in_group_p);
1219 EXPORT_SYMBOL(in_egroup_p);