kernel/sys.c

   1 /*
   2  *  linux/kernel/sys.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/module.h>
   8 #include <linux/mm.h>
   9 #include <linux/utsname.h>
  10 #include <linux/mman.h>
  11 #include <linux/smp_lock.h>
  12 #include <linux/notifier.h>
  13 #include <linux/reboot.h>
  14 #include <linux/prctl.h>
  15 #include <linux/init.h>
  16 #include <linux/highuid.h>
  17
  18 #include <asm/uaccess.h>
  19 #include <asm/io.h>
  20
  21 /*
  22  * this is where the system-wide overflow UID and GID are defined, for
  23  * architectures that now have 32-bit UID/GID but didn't in the past
  24  */
  25
  26 int overflowuid = DEFAULT_OVERFLOWUID;
  27 int overflowgid = DEFAULT_OVERFLOWGID;
  28
  29 /*
  30  * the same as above, but for filesystems which can only store a 16-bit
  31  * UID and GID. as such, this is needed on all architectures
  32  */
  33
  34 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  35 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  36
  37 /*
  38  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  39  */
  40
  41 int C_A_D = 1;
  42
  43
  44 /*
  45  *      Notifier list for kernel code which wants to be called
  46  *      at shutdown. This is used to stop any idling DMA operations
  47  *      and the like.
  48  */
  49
  50 static struct notifier_block *reboot_notifier_list = NULL;
  51 rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
  52
  53 /**
  54  *      notifier_chain_register - Add notifier to a notifier chain
  55  *      @list: Pointer to root list pointer
  56  *      @n: New entry in notifier chain
  57  *
  58  *      Adds a notifier to a notifier chain.
  59  *
  60  *      Currently always returns zero.
  61  */
  62
  63 int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
  64 {
  65         write_lock(&notifier_lock);
  66         while(*list)
  67         {
  68                 if(n->priority > (*list)->priority)
  69                         break;
  70                 list= &((*list)->next);
  71         }
  72         n->next = *list;
  73         *list=n;
  74         write_unlock(&notifier_lock);
  75         return 0;
  76 }
  77
  78 /**
  79  *      notifier_chain_unregister - Remove notifier from a notifier chain
  80  *      @nl: Pointer to root list pointer
  81  *      @n: New entry in notifier chain
  82  *
  83  *      Removes a notifier from a notifier chain.
  84  *
  85  *      Returns zero on success, or %-ENOENT on failure.
  86  */
  87
  88 int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
  89 {
  90         write_lock(&notifier_lock);
  91         while((*nl)!=NULL)
  92         {
  93                 if((*nl)==n)
  94                 {
  95                         *nl=n->next;
  96                         write_unlock(&notifier_lock);
  97                         return 0;
  98                 }
  99                 nl=&((*nl)->next);
 100         }
 101         write_unlock(&notifier_lock);
 102         return -ENOENT;
 103 }
 104
 105 /**
 106  *      notifier_call_chain - Call functions in a notifier chain
 107  *      @n: Pointer to root pointer of notifier chain
 108  *      @val: Value passed unmodified to notifier function
 109  *      @v: Pointer passed unmodified to notifier function
 110  *
 111  *      Calls each function in a notifier chain in turn.
 112  *
 113  *      If the return value of the notifier can be and'd
 114  *      with %NOTIFY_STOP_MASK, then notifier_call_chain
 115  *      will return immediately, with the return value of
 116  *      the notifier function which halted execution.
 117  *      Otherwise, the return value is the return value
 118  *      of the last notifier function called.
 119  */
 120
 121 int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
 122 {
 123         int ret=NOTIFY_DONE;
 124         struct notifier_block *nb = *n;
 125
 126         read_lock(&notifier_lock);
 127         while(nb)
 128         {
 129                 ret=nb->notifier_call(nb,val,v);
 130                 if(ret&NOTIFY_STOP_MASK)
 131                 {
 132                         read_unlock(&notifier_lock);
 133                         return ret;
 134                 }
 135                 nb=nb->next;
 136         }
 137         read_unlock(&notifier_lock);
 138         return ret;
 139 }
 140
 141 /**
 142  *      register_reboot_notifier - Register function to be called at reboot time
 143  *      @nb: Info about notifier function to be called
 144  *
 145  *      Registers a function with the list of functions
 146  *      to be called at reboot time.
 147  *
 148  *      Currently always returns zero, as notifier_chain_register
 149  *      always returns zero.
 150  */
 151
 152 int register_reboot_notifier(struct notifier_block * nb)
 153 {
 154         return notifier_chain_register(&reboot_notifier_list, nb);
 155 }
 156
 157 /**
 158  *      unregister_reboot_notifier - Unregister previously registered reboot notifier
 159  *      @nb: Hook to be unregistered
 160  *
 161  *      Unregisters a previously registered reboot
 162  *      notifier function.
 163  *
 164  *      Returns zero on success, or %-ENOENT on failure.
 165  */
 166
 167 int unregister_reboot_notifier(struct notifier_block * nb)
 168 {
 169         return notifier_chain_unregister(&reboot_notifier_list, nb);
 170 }
 171
 172 asmlinkage long sys_ni_syscall(void)
 173 {
 174         return -ENOSYS;
 175 }
 176
 177 static int proc_sel(struct task_struct *p, int which, int who)
 178 {
 179         if(p->pid)
 180         {
 181                 switch (which) {
 182                         case PRIO_PROCESS:
 183                                 if (!who && p == current)
 184                                         return 1;
 185                                 return(p->pid == who);
 186                         case PRIO_PGRP:
 187                                 if (!who)
 188                                         who = current->pgrp;
 189                                 return(p->pgrp == who);
 190                         case PRIO_USER:
 191                                 if (!who)
 192                                         who = current->uid;
 193                                 return(p->uid == who);
 194                 }
 195         }
 196         return 0;
 197 }
 198
 199 asmlinkage long sys_setpriority(int which, int who, int niceval)
 200 {
 201         struct task_struct *p;
 202         int error;
 203
 204         if (which > 2 || which < 0)
 205                 return -EINVAL;
 206
 207         /* normalize: avoid signed division (rounding problems) */
 208         error = -ESRCH;
 209         if (niceval < -20)
 210                 niceval = -20;
 211         if (niceval > 19)
 212                 niceval = 19;
 213
 214         read_lock(&tasklist_lock);
 215         for_each_task(p) {
 216                 if (!proc_sel(p, which, who))
 217                         continue;
 218                 if (p->uid != current->euid &&
 219                         p->uid != current->uid && !capable(CAP_SYS_NICE)) {
 220                         error = -EPERM;
 221                         continue;
 222                 }
 223                 if (error == -ESRCH)
 224                         error = 0;
 225                 if (niceval < p->nice && !capable(CAP_SYS_NICE))
 226                         error = -EACCES;
 227                 else
 228                         p->nice = niceval;
 229         }
 230         read_unlock(&tasklist_lock);
 231
 232         return error;
 233 }
 234
 235 /*
 236  * Ugh. To avoid negative return values, "getpriority()" will
 237  * not return the normal nice-value, but a value that has been
 238  * offset by 20 (ie it returns 0..39 instead of -20..19)
 239  */
 240 asmlinkage long sys_getpriority(int which, int who)
 241 {
 242         struct task_struct *p;
 243         long retval = -ESRCH;
 244
 245         if (which > 2 || which < 0)
 246                 return -EINVAL;
 247
 248         read_lock(&tasklist_lock);
 249         for_each_task (p) {
 250                 unsigned niceval;
 251                 if (!proc_sel(p, which, who))
 252                         continue;
 253                 niceval = 20 - p->nice;
 254                 if (niceval < (unsigned)retval)
 255                         retval = niceval;
 256         }
 257         read_unlock(&tasklist_lock);
 258
 259         return retval;
 260 }
 261
 262
 263 /*
 264  * Reboot system call: for obvious reasons only root may call it,
 265  * and even root needs to set up some magic numbers in the registers
 266  * so that some mistake won't make this reboot the whole machine.
 267  * You can also set the meaning of the ctrl-alt-del-key here.
 268  *
 269  * reboot doesn't sync: do that yourself before calling this.
 270  */
 271 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
 272 {
 273         char buffer[256];
 274
 275         /* We only trust the superuser with rebooting the system. */
 276         if (!capable(CAP_SYS_BOOT))
 277                 return -EPERM;
 278
 279         /* For safety, we require "magic" arguments. */
 280         if (magic1 != LINUX_REBOOT_MAGIC1 ||
 281             (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
 282                         magic2 != LINUX_REBOOT_MAGIC2B))
 283                 return -EINVAL;
 284
 285         lock_kernel();
 286         switch (cmd) {
 287         case LINUX_REBOOT_CMD_RESTART:
 288                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 289                 printk(KERN_EMERG "Restarting system.\n");
 290                 machine_restart(NULL);
 291                 break;
 292
 293         case LINUX_REBOOT_CMD_CAD_ON:
 294                 C_A_D = 1;
 295                 break;
 296
 297         case LINUX_REBOOT_CMD_CAD_OFF:
 298                 C_A_D = 0;
 299                 break;
 300
 301         case LINUX_REBOOT_CMD_HALT:
 302                 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 303                 printk(KERN_EMERG "System halted.\n");
 304                 machine_halt();
 305                 do_exit(0);
 306                 break;
 307
 308         case LINUX_REBOOT_CMD_POWER_OFF:
 309                 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 310                 printk(KERN_EMERG "Power down.\n");
 311                 machine_power_off();
 312                 do_exit(0);
 313                 break;
 314
 315         case LINUX_REBOOT_CMD_RESTART2:
 316                 if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
 317                         unlock_kernel();
 318                         return -EFAULT;
 319                 }
 320                 buffer[sizeof(buffer) - 1] = '\0';
 321
 322                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
 323                 printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
 324                 machine_restart(buffer);
 325                 break;
 326
 327         default:
 328                 unlock_kernel();
 329                 return -EINVAL;
 330         }
 331         unlock_kernel();
 332         return 0;
 333 }
 334
 335 /*
 336  * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 337  * As it's called within an interrupt, it may NOT sync: the only choice
 338  * is whether to reboot at once, or just ignore the ctrl-alt-del.
 339  */
 340 void ctrl_alt_del(void)
 341 {
 342         if (C_A_D) {
 343                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 344                 machine_restart(NULL);
 345         } else
 346                 kill_proc(1, SIGINT, 1);
 347 }
 348
 349
 350 /*
 351  * Unprivileged users may change the real gid to the effective gid
 352  * or vice versa.  (BSD-style)
 353  *
 354  * If you set the real gid at all, or set the effective gid to a value not
 355  * equal to the real gid, then the saved gid is set to the new effective gid.
 356  *
 357  * This makes it possible for a setgid program to completely drop its
 358  * privileges, which is often a useful assertion to make when you are doing
 359  * a security audit over a program.
 360  *
 361  * The general idea is that a program which uses just setregid() will be
 362  * 100% compatible with BSD.  A program which uses just setgid() will be
 363  * 100% compatible with POSIX with saved IDs.
 364  *
 365  * SMP: There are not races, the GIDs are checked only by filesystem
 366  *      operations (as far as semantic preservation is concerned).
 367  */
 368 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 369 {
 370         int old_rgid = current->gid;
 371         int old_egid = current->egid;
 372
 373         if (rgid != (gid_t) -1) {
 374                 if ((old_rgid == rgid) ||
 375                     (current->egid==rgid) ||
 376                     capable(CAP_SETGID))
 377                         current->gid = rgid;
 378                 else
 379                         return -EPERM;
 380         }
 381         if (egid != (gid_t) -1) {
 382                 if ((old_rgid == egid) ||
 383                     (current->egid == egid) ||
 384                     (current->sgid == egid) ||
 385                     capable(CAP_SETGID))
 386                         current->fsgid = current->egid = egid;
 387                 else {
 388                         current->gid = old_rgid;
 389                         return -EPERM;
 390                 }
 391         }
 392         if (rgid != (gid_t) -1 ||
 393             (egid != (gid_t) -1 && egid != old_rgid))
 394                 current->sgid = current->egid;
 395         current->fsgid = current->egid;
 396         if (current->egid != old_egid)
 397                 current->dumpable = 0;
 398         return 0;
 399 }
 400
 401 /*
 402  * setgid() is implemented like SysV w/ SAVED_IDS
 403  *
 404  * SMP: Same implicit races as above.
 405  */
 406 asmlinkage long sys_setgid(gid_t gid)
 407 {
 408         int old_egid = current->egid;
 409
 410         if (capable(CAP_SETGID))
 411                 current->gid = current->egid = current->sgid = current->fsgid = gid;
 412         else if ((gid == current->gid) || (gid == current->sgid))
 413                 current->egid = current->fsgid = gid;
 414         else
 415                 return -EPERM;
 416
 417         if (current->egid != old_egid)
 418                 current->dumpable = 0;
 419         return 0;
 420 }
 421
 422 /*
 423  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
 424  * a process after a call to setuid, setreuid, or setresuid.
 425  *
 426  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
 427  *  {r,e,s}uid != 0, the permitted and effective capabilities are
 428  *  cleared.
 429  *
 430  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
 431  *  capabilities of the process are cleared.
 432  *
 433  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
 434  *  capabilities are set to the permitted capabilities.
 435  *
 436  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
 437  *  never happen.
 438  *
 439  *  -astor
 440  *
 441  * cevans - New behaviour, Oct '99
 442  * A process may, via prctl(), elect to keep its capabilities when it
 443  * calls setuid() and switches away from uid==0. Both permitted and
 444  * effective sets will be retained.
 445  * Without this change, it was impossible for a daemon to drop only some
 446  * of its privilege. The call to setuid(!=0) would drop all privileges!
 447  * Keeping uid 0 is not an option because uid 0 owns too many vital
 448  * files..
 449  * Thanks to Olaf Kirch and Peter Benie for spotting this.
 450  */
 451 extern inline void cap_emulate_setxuid(int old_ruid, int old_euid,
 452                                        int old_suid)
 453 {
 454         if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
 455             (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
 456             !current->keep_capabilities) {
 457                 cap_clear(current->cap_permitted);
 458                 cap_clear(current->cap_effective);
 459         }
 460         if (old_euid == 0 && current->euid != 0) {
 461                 cap_clear(current->cap_effective);
 462         }
 463         if (old_euid != 0 && current->euid == 0) {
 464                 current->cap_effective = current->cap_permitted;
 465         }
 466 }
 467
 468 /*
 469  * Unprivileged users may change the real uid to the effective uid
 470  * or vice versa.  (BSD-style)
 471  *
 472  * If you set the real uid at all, or set the effective uid to a value not
 473  * equal to the real uid, then the saved uid is set to the new effective uid.
 474  *
 475  * This makes it possible for a setuid program to completely drop its
 476  * privileges, which is often a useful assertion to make when you are doing
 477  * a security audit over a program.
 478  *
 479  * The general idea is that a program which uses just setreuid() will be
 480  * 100% compatible with BSD.  A program which uses just setuid() will be
 481  * 100% compatible with POSIX with saved IDs.
 482  */
 483 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 484 {
 485         int old_ruid, old_euid, old_suid, new_ruid;
 486
 487         new_ruid = old_ruid = current->uid;
 488         old_euid = current->euid;
 489         old_suid = current->suid;
 490         if (ruid != (uid_t) -1) {
 491                 if ((old_ruid == ruid) ||
 492                     (current->euid==ruid) ||
 493                     capable(CAP_SETUID))
 494                         new_ruid = ruid;
 495                 else
 496                         return -EPERM;
 497         }
 498         if (euid != (uid_t) -1) {
 499                 if ((old_ruid == euid) ||
 500                     (current->euid == euid) ||
 501                     (current->suid == euid) ||
 502                     capable(CAP_SETUID))
 503                         current->fsuid = current->euid = euid;
 504                 else
 505                         return -EPERM;
 506         }
 507         if (ruid != (uid_t) -1 ||
 508             (euid != (uid_t) -1 && euid != old_ruid))
 509                 current->suid = current->euid;
 510         current->fsuid = current->euid;
 511         if (current->euid != old_euid)
 512                 current->dumpable = 0;
 513
 514         if(new_ruid != old_ruid) {
 515                 /* What if a process setreuid()'s and this brings the
 516                  * new uid over his NPROC rlimit?  We can check this now
 517                  * cheaply with the new uid cache, so if it matters
 518                  * we should be checking for it.  -DaveM
 519                  */
 520                 free_uid(current);
 521                 current->uid = new_ruid;
 522                 alloc_uid(current);
 523         }
 524
 525         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 526                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 527         }
 528
 529         return 0;
 530 }
 531
 532
 533
 534 /*
 535  * setuid() is implemented like SysV with SAVED_IDS
 536  *
 537  * Note that SAVED_ID's is deficient in that a setuid root program
 538  * like sendmail, for example, cannot set its uid to be a normal
 539  * user and then switch back, because if you're root, setuid() sets
 540  * the saved uid too.  If you don't like this, blame the bright people
 541  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 542  * will allow a root program to temporarily drop privileges and be able to
 543  * regain them by swapping the real and effective uid.
 544  */
 545 asmlinkage long sys_setuid(uid_t uid)
 546 {
 547         int old_euid = current->euid;
 548         int old_ruid, old_suid, new_ruid;
 549
 550         old_ruid = new_ruid = current->uid;
 551         old_suid = current->suid;
 552         if (capable(CAP_SETUID))
 553                 new_ruid = current->euid = current->suid = current->fsuid = uid;
 554         else if ((uid == current->uid) || (uid == current->suid))
 555                 current->fsuid = current->euid = uid;
 556         else
 557                 return -EPERM;
 558
 559         if (current->euid != old_euid)
 560                 current->dumpable = 0;
 561
 562        if (new_ruid != old_ruid) {
 563                 /* See comment above about NPROC rlimit issues... */
 564                 free_uid(current);
 565                 current->uid = new_ruid;
 566                 alloc_uid(current);
 567         }
 568
 569         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 570                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 571         }
 572
 573         return 0;
 574 }
 575
 576
 577 /*
 578  * This function implements a generic ability to update ruid, euid,
 579  * and suid.  This allows you to implement the 4.4 compatible seteuid().
 580  */
 581 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 582 {
 583         int old_ruid = current->uid;
 584         int old_euid = current->euid;
 585         int old_suid = current->suid;
 586
 587         if (!capable(CAP_SETUID)) {
 588                 if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
 589                     (ruid != current->euid) && (ruid != current->suid))
 590                         return -EPERM;
 591                 if ((euid != (uid_t) -1) && (euid != current->uid) &&
 592                     (euid != current->euid) && (euid != current->suid))
 593                         return -EPERM;
 594                 if ((suid != (uid_t) -1) && (suid != current->uid) &&
 595                     (suid != current->euid) && (suid != current->suid))
 596                         return -EPERM;
 597         }
 598         if (ruid != (uid_t) -1) {
 599                 /* See above commentary about NPROC rlimit issues here. */
 600                 free_uid(current);
 601                 current->uid = ruid;
 602                 alloc_uid(current);
 603         }
 604         if (euid != (uid_t) -1) {
 605                 if (euid != current->euid)
 606                         current->dumpable = 0;
 607                 current->euid = euid;
 608                 current->fsuid = euid;
 609         }
 610         if (suid != (uid_t) -1)
 611                 current->suid = suid;
 612
 613         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 614                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 615         }
 616
 617         return 0;
 618 }
 619
 620 asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
 621 {
 622         int retval;
 623
 624         if (!(retval = put_user(current->uid, ruid)) &&
 625             !(retval = put_user(current->euid, euid)))
 626                 retval = put_user(current->suid, suid);
 627
 628         return retval;
 629 }
 630
 631 /*
 632  * Same as above, but for rgid, egid, sgid.
 633  */
 634 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 635 {
 636        if (!capable(CAP_SETGID)) {
 637                 if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
 638                     (rgid != current->egid) && (rgid != current->sgid))
 639                         return -EPERM;
 640                 if ((egid != (gid_t) -1) && (egid != current->gid) &&
 641                     (egid != current->egid) && (egid != current->sgid))
 642                         return -EPERM;
 643                 if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
 644                     (sgid != current->egid) && (sgid != current->sgid))
 645                         return -EPERM;
 646         }
 647         if (rgid != (gid_t) -1)
 648                 current->gid = rgid;
 649         if (egid != (gid_t) -1) {
 650                 if (egid != current->egid)
 651                         current->dumpable = 0;
 652                 current->egid = egid;
 653                 current->fsgid = egid;
 654         }
 655         if (sgid != (gid_t) -1)
 656                 current->sgid = sgid;
 657         return 0;
 658 }
 659
 660 asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
 661 {
 662         int retval;
 663
 664         if (!(retval = put_user(current->gid, rgid)) &&
 665             !(retval = put_user(current->egid, egid)))
 666                 retval = put_user(current->sgid, sgid);
 667
 668         return retval;
 669 }
 670
 671
 672 /*
 673  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 674  * is used for "access()" and for the NFS daemon (letting nfsd stay at
 675  * whatever uid it wants to). It normally shadows "euid", except when
 676  * explicitly set by setfsuid() or for access..
 677  */
 678 asmlinkage long sys_setfsuid(uid_t uid)
 679 {
 680         int old_fsuid;
 681
 682         old_fsuid = current->fsuid;
 683         if (uid == current->uid || uid == current->euid ||
 684             uid == current->suid || uid == current->fsuid ||
 685             capable(CAP_SETUID))
 686                 current->fsuid = uid;
 687         if (current->fsuid != old_fsuid)
 688                 current->dumpable = 0;
 689
 690         /* We emulate fsuid by essentially doing a scaled-down version
 691          * of what we did in setresuid and friends. However, we only
 692          * operate on the fs-specific bits of the process' effective
 693          * capabilities
 694          *
 695          * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
 696          *          if not, we might be a bit too harsh here.
 697          */
 698
 699         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 700                 if (old_fsuid == 0 && current->fsuid != 0) {
 701                         cap_t(current->cap_effective) &= ~CAP_FS_MASK;
 702                 }
 703                 if (old_fsuid != 0 && current->fsuid == 0) {
 704                         cap_t(current->cap_effective) |=
 705                                 (cap_t(current->cap_permitted) & CAP_FS_MASK);
 706                 }
 707         }
 708
 709         return old_fsuid;
 710 }
 711
 712 /*
 713  * Samma på svenska..
 714  */
 715 asmlinkage long sys_setfsgid(gid_t gid)
 716 {
 717         int old_fsgid;
 718
 719         old_fsgid = current->fsgid;
 720         if (gid == current->gid || gid == current->egid ||
 721             gid == current->sgid || gid == current->fsgid ||
 722             capable(CAP_SETGID))
 723                 current->fsgid = gid;
 724         if (current->fsgid != old_fsgid)
 725                 current->dumpable = 0;
 726
 727         return old_fsgid;
 728 }
 729
 730 asmlinkage long sys_times(struct tms * tbuf)
 731 {
 732         struct tms temp;
 733
 734         /*
 735          *      In the SMP world we might just be unlucky and have one of
 736          *      the times increment as we use it. Since the value is an
 737          *      atomically safe type this is just fine. Conceptually its
 738          *      as if the syscall took an instant longer to occur.
 739          */
 740         if (tbuf) {
 741                 temp.tms_utime = hz_to_std(current->times.tms_utime);
 742                 temp.tms_stime = hz_to_std(current->times.tms_stime);
 743                 temp.tms_cutime = hz_to_std(current->times.tms_cutime);
 744                 temp.tms_cstime = hz_to_std(current->times.tms_cstime);
 745                 if (copy_to_user(tbuf, &temp, sizeof(struct tms)))
 746                         return -EFAULT;
 747         }
 748         return hz_to_std(jiffies);
 749 }
 750
 751 /*
 752  * This needs some heavy checking ...
 753  * I just haven't the stomach for it. I also don't fully
 754  * understand sessions/pgrp etc. Let somebody who does explain it.
 755  *
 756  * OK, I think I have the protection semantics right.... this is really
 757  * only important on a multi-user system anyway, to make sure one user
 758  * can't send a signal to a process owned by another.  -TYT, 12/12/91
 759  *
 760  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 761  * LBT 04.03.94
 762  */
 763
 764 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 765 {
 766         struct task_struct * p;
 767         int err = -EINVAL;
 768
 769         if (!pid)
 770                 pid = current->pid;
 771         if (!pgid)
 772                 pgid = pid;
 773         if (pgid < 0)
 774                 return -EINVAL;
 775
 776         /* From this point forward we keep holding onto the tasklist lock
 777          * so that our parent does not change from under us. -DaveM
 778          */
 779         read_lock(&tasklist_lock);
 780
 781         err = -ESRCH;
 782         p = find_task_by_pid(pid);
 783         if (!p)
 784                 goto out;
 785
 786         if (p->p_pptr == current || p->p_opptr == current) {
 787                 err = -EPERM;
 788                 if (p->session != current->session)
 789                         goto out;
 790                 err = -EACCES;
 791                 if (p->did_exec)
 792                         goto out;
 793         } else if (p != current)
 794                 goto out;
 795         err = -EPERM;
 796         if (p->leader)
 797                 goto out;
 798         if (pgid != pid) {
 799                 struct task_struct * tmp;
 800                 for_each_task (tmp) {
 801                         if (tmp->pgrp == pgid &&
 802                             tmp->session == current->session)
 803                                 goto ok_pgid;
 804                 }
 805                 goto out;
 806         }
 807
 808 ok_pgid:
 809         p->pgrp = pgid;
 810         err = 0;
 811 out:
 812         /* All paths lead to here, thus we are safe. -DaveM */
 813         read_unlock(&tasklist_lock);
 814         return err;
 815 }
 816
 817 asmlinkage long sys_getpgid(pid_t pid)
 818 {
 819         if (!pid) {
 820                 return current->pgrp;
 821         } else {
 822                 int retval;
 823                 struct task_struct *p;
 824
 825                 read_lock(&tasklist_lock);
 826                 p = find_task_by_pid(pid);
 827
 828                 retval = -ESRCH;
 829                 if (p)
 830                         retval = p->pgrp;
 831                 read_unlock(&tasklist_lock);
 832                 return retval;
 833         }
 834 }
 835
 836 asmlinkage long sys_getpgrp(void)
 837 {
 838         /* SMP - assuming writes are word atomic this is fine */
 839         return current->pgrp;
 840 }
 841
 842 asmlinkage long sys_getsid(pid_t pid)
 843 {
 844         if (!pid) {
 845                 return current->session;
 846         } else {
 847                 int retval;
 848                 struct task_struct *p;
 849
 850                 read_lock(&tasklist_lock);
 851                 p = find_task_by_pid(pid);
 852
 853                 retval = -ESRCH;
 854                 if(p)
 855                         retval = p->session;
 856                 read_unlock(&tasklist_lock);
 857                 return retval;
 858         }
 859 }
 860
 861 asmlinkage long sys_setsid(void)
 862 {
 863         struct task_struct * p;
 864         int err = -EPERM;
 865
 866         read_lock(&tasklist_lock);
 867         for_each_task(p) {
 868                 if (p->pgrp == current->pid)
 869                         goto out;
 870         }
 871
 872         current->leader = 1;
 873         current->session = current->pgrp = current->pid;
 874         current->tty = NULL;
 875         current->tty_old_pgrp = 0;
 876         err = current->pgrp;
 877 out:
 878         read_unlock(&tasklist_lock);
 879         return err;
 880 }
 881
 882 /*
 883  * Supplementary group IDs
 884  */
 885 asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
 886 {
 887         int i;
 888
 889         /*
 890          *      SMP: Nobody else can change our grouplist. Thus we are
 891          *      safe.
 892          */
 893
 894         if (gidsetsize < 0)
 895                 return -EINVAL;
 896         i = current->ngroups;
 897         if (gidsetsize) {
 898                 if (i > gidsetsize)
 899                         return -EINVAL;
 900                 if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
 901                         return -EFAULT;
 902         }
 903         return i;
 904 }
 905
 906 /*
 907  *      SMP: Our groups are not shared. We can copy to/from them safely
 908  *      without another task interfering.
 909  */
 910
 911 asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
 912 {
 913         if (!capable(CAP_SETGID))
 914                 return -EPERM;
 915         if ((unsigned) gidsetsize > NGROUPS)
 916                 return -EINVAL;
 917         if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
 918                 return -EFAULT;
 919         current->ngroups = gidsetsize;
 920         return 0;
 921 }
 922
 923 static int supplemental_group_member(gid_t grp)
 924 {
 925         int i = current->ngroups;
 926
 927         if (i) {
 928                 gid_t *groups = current->groups;
 929                 do {
 930                         if (*groups == grp)
 931                                 return 1;
 932                         groups++;
 933                         i--;
 934                 } while (i);
 935         }
 936         return 0;
 937 }
 938
 939 /*
 940  * Check whether we're fsgid/egid or in the supplemental group..
 941  */
 942 int in_group_p(gid_t grp)
 943 {
 944         int retval = 1;
 945         if (grp != current->fsgid)
 946                 retval = supplemental_group_member(grp);
 947         return retval;
 948 }
 949
 950 int in_egroup_p(gid_t grp)
 951 {
 952         int retval = 1;
 953         if (grp != current->egid)
 954                 retval = supplemental_group_member(grp);
 955         return retval;
 956 }
 957
 958 DECLARE_RWSEM(uts_sem);
 959
 960 asmlinkage long sys_newuname(struct new_utsname * name)
 961 {
 962         int errno = 0;
 963
 964         down_read(&uts_sem);
 965         if (copy_to_user(name,&system_utsname,sizeof *name))
 966                 errno = -EFAULT;
 967         up_read(&uts_sem);
 968         return errno;
 969 }
 970
 971 asmlinkage long sys_sethostname(char *name, int len)
 972 {
 973         int errno;
 974
 975         if (!capable(CAP_SYS_ADMIN))
 976                 return -EPERM;
 977         if (len < 0 || len > __NEW_UTS_LEN)
 978                 return -EINVAL;
 979         down_write(&uts_sem);
 980         errno = -EFAULT;
 981         if (!copy_from_user(system_utsname.nodename, name, len)) {
 982                 system_utsname.nodename[len] = 0;
 983                 errno = 0;
 984         }
 985         up_write(&uts_sem);
 986         return errno;
 987 }
 988
 989 asmlinkage long sys_gethostname(char *name, int len)
 990 {
 991         int i, errno;
 992
 993         if (len < 0)
 994                 return -EINVAL;
 995         down_read(&uts_sem);
 996         i = 1 + strlen(system_utsname.nodename);
 997         if (i > len)
 998                 i = len;
 999         errno = 0;
1000         if (copy_to_user(name, system_utsname.nodename, i))
1001                 errno = -EFAULT;
1002         up_read(&uts_sem);
1003         return errno;
1004 }
1005
1006 /*
1007  * Only setdomainname; getdomainname can be implemented by calling
1008  * uname()
1009  */
1010 asmlinkage long sys_setdomainname(char *name, int len)
1011 {
1012         int errno;
1013
1014         if (!capable(CAP_SYS_ADMIN))
1015                 return -EPERM;
1016         if (len < 0 || len > __NEW_UTS_LEN)
1017                 return -EINVAL;
1018
1019         down_write(&uts_sem);
1020         errno = -EFAULT;
1021         if (!copy_from_user(system_utsname.domainname, name, len)) {
1022                 errno = 0;
1023                 system_utsname.domainname[len] = 0;
1024         }
1025         up_write(&uts_sem);
1026         return errno;
1027 }
1028
1029 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
1030 {
1031         if (resource >= RLIM_NLIMITS)
1032                 return -EINVAL;
1033         else
1034                 return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1035                         ? -EFAULT : 0;
1036 }
1037
1038 #if !defined(__ia64__) && !defined(__s390__)
1039
1040 /*
1041  *      Back compatibility for getrlimit. Needed for some apps.
1042  */
1043
1044 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
1045 {
1046         struct rlimit x;
1047         if (resource >= RLIM_NLIMITS)
1048                 return -EINVAL;
1049
1050         memcpy(&x, current->rlim + resource, sizeof(*rlim));
1051         if(x.rlim_cur > 0x7FFFFFFF)
1052                 x.rlim_cur = 0x7FFFFFFF;
1053         if(x.rlim_max > 0x7FFFFFFF)
1054                 x.rlim_max = 0x7FFFFFFF;
1055         return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1056 }
1057
1058 #endif
1059
1060 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
1061 {
1062         struct rlimit new_rlim, *old_rlim;
1063
1064         if (resource >= RLIM_NLIMITS)
1065                 return -EINVAL;
1066         if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1067                 return -EFAULT;
1068         if (new_rlim.rlim_cur < 0 || new_rlim.rlim_max < 0)
1069                 return -EINVAL;
1070         old_rlim = current->rlim + resource;
1071         if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
1072              (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1073             !capable(CAP_SYS_RESOURCE))
1074                 return -EPERM;
1075         if (resource == RLIMIT_NOFILE) {
1076                 if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1077                         return -EPERM;
1078         }
1079         *old_rlim = new_rlim;
1080         return 0;
1081 }
1082
1083 /*
1084  * It would make sense to put struct rusage in the task_struct,
1085  * except that would make the task_struct be *really big*.  After
1086  * task_struct gets moved into malloc'ed memory, it would
1087  * make sense to do this.  It will make moving the rest of the information
1088  * a lot simpler!  (Which we're not doing right now because we're not
1089  * measuring them yet).
1090  *
1091  * This is SMP safe.  Either we are called from sys_getrusage on ourselves
1092  * below (we know we aren't going to exit/disappear and only we change our
1093  * rusage counters), or we are called from wait4() on a process which is
1094  * either stopped or zombied.  In the zombied case the task won't get
1095  * reaped till shortly after the call to getrusage(), in both cases the
1096  * task being examined is in a frozen state so the counters won't change.
1097  *
1098  * FIXME! Get the fault counts properly!
1099  */
1100 int getrusage(struct task_struct *p, int who, struct rusage *ru)
1101 {
1102         struct rusage r;
1103
1104         memset((char *) &r, 0, sizeof(r));
1105         switch (who) {
1106                 case RUSAGE_SELF:
1107                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
1108                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
1109                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
1110                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
1111                         r.ru_minflt = p->min_flt;
1112                         r.ru_majflt = p->maj_flt;
1113                         r.ru_nswap = p->nswap;
1114                         break;
1115                 case RUSAGE_CHILDREN:
1116                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
1117                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
1118                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
1119                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
1120                         r.ru_minflt = p->cmin_flt;
1121                         r.ru_majflt = p->cmaj_flt;
1122                         r.ru_nswap = p->cnswap;
1123                         break;
1124                 default:
1125                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
1126                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
1127                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
1128                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
1129                         r.ru_minflt = p->min_flt + p->cmin_flt;
1130                         r.ru_majflt = p->maj_flt + p->cmaj_flt;
1131                         r.ru_nswap = p->nswap + p->cnswap;
1132                         break;
1133         }
1134         return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1135 }
1136
1137 asmlinkage long sys_getrusage(int who, struct rusage *ru)
1138 {
1139         if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1140                 return -EINVAL;
1141         return getrusage(current, who, ru);
1142 }
1143
1144 asmlinkage long sys_umask(int mask)
1145 {
1146         mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1147         return mask;
1148 }
1149
1150 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1151                           unsigned long arg4, unsigned long arg5)
1152 {
1153         int error = 0;
1154         int sig;
1155
1156         switch (option) {
1157                 case PR_SET_PDEATHSIG:
1158                         sig = arg2;
1159                         if (sig > _NSIG) {
1160                                 error = -EINVAL;
1161                                 break;
1162                         }
1163                         current->pdeath_signal = sig;
1164                         break;
1165                 case PR_GET_PDEATHSIG:
1166                         error = put_user(current->pdeath_signal, (int *)arg2);
1167                         break;
1168                 case PR_GET_DUMPABLE:
1169                         if (current->dumpable)
1170                                 error = 1;
1171                         break;
1172                 case PR_SET_DUMPABLE:
1173                         if (arg2 != 0 && arg2 != 1) {
1174                                 error = -EINVAL;
1175                                 break;
1176                         }
1177                         current->dumpable = arg2;
1178                         break;
1179                 case PR_SET_UNALIGN:
1180 #ifdef SET_UNALIGN_CTL
1181                         error = SET_UNALIGN_CTL(current, arg2);
1182 #else
1183                         error = -EINVAL;
1184 #endif
1185                         break;
1186
1187                 case PR_GET_UNALIGN:
1188 #ifdef GET_UNALIGN_CTL
1189                         error = GET_UNALIGN_CTL(current, arg2);
1190 #else
1191                         error = -EINVAL;
1192 #endif
1193                         break;
1194
1195                 case PR_GET_KEEPCAPS:
1196                         if (current->keep_capabilities)
1197                                 error = 1;
1198                         break;
1199                 case PR_SET_KEEPCAPS:
1200                         if (arg2 != 0 && arg2 != 1) {
1201                                 error = -EINVAL;
1202                                 break;
1203                         }
1204                         current->keep_capabilities = arg2;
1205                         break;
1206                 default:
1207                         error = -EINVAL;
1208                         break;
1209         }
1210         return error;
1211 }
1212
1213 EXPORT_SYMBOL(notifier_chain_register);
1214 EXPORT_SYMBOL(notifier_chain_unregister);
1215 EXPORT_SYMBOL(notifier_call_chain);
1216 EXPORT_SYMBOL(register_reboot_notifier);
1217 EXPORT_SYMBOL(unregister_reboot_notifier);
1218 EXPORT_SYMBOL(in_group_p);
1219 EXPORT_SYMBOL(in_egroup_p);