kernel/sys.c

   1 /*
   2  *  linux/kernel/sys.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/module.h>
   8 #include <linux/mm.h>
   9 #include <linux/utsname.h>
  10 #include <linux/mman.h>
  11 #include <linux/smp_lock.h>
  12 #include <linux/notifier.h>
  13 #include <linux/reboot.h>
  14 #include <linux/prctl.h>
  15 #include <linux/init.h>
  16 #include <linux/highuid.h>
  17
  18 #include <asm/uaccess.h>
  19 #include <asm/io.h>
  20
  21 /*
  22  * this is where the system-wide overflow UID and GID are defined, for
  23  * architectures that now have 32-bit UID/GID but didn't in the past
  24  */
  25
  26 int overflowuid = DEFAULT_OVERFLOWUID;
  27 int overflowgid = DEFAULT_OVERFLOWGID;
  28
  29 /*
  30  * the same as above, but for filesystems which can only store a 16-bit
  31  * UID and GID. as such, this is needed on all architectures
  32  */
  33
  34 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  35 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  36
  37 /*
  38  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  39  */
  40
  41 int C_A_D = 1;
  42
  43
  44 /*
  45  *      Notifier list for kernel code which wants to be called
  46  *      at shutdown. This is used to stop any idling DMA operations
  47  *      and the like.
  48  */
  49
  50 static struct notifier_block *reboot_notifier_list = NULL;
  51 rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
  52
  53 /**
  54  *      notifier_chain_register - Add notifier to a notifier chain
  55  *      @list: Pointer to root list pointer
  56  *      @n: New entry in notifier chain
  57  *
  58  *      Adds a notifier to a notifier chain.
  59  *
  60  *      Currently always returns zero.
  61  */
  62
  63 int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
  64 {
  65         write_lock(&notifier_lock);
  66         while(*list)
  67         {
  68                 if(n->priority > (*list)->priority)
  69                         break;
  70                 list= &((*list)->next);
  71         }
  72         n->next = *list;
  73         *list=n;
  74         write_unlock(&notifier_lock);
  75         return 0;
  76 }
  77
  78 /**
  79  *      notifier_chain_unregister - Remove notifier from a notifier chain
  80  *      @nl: Pointer to root list pointer
  81  *      @n: New entry in notifier chain
  82  *
  83  *      Removes a notifier from a notifier chain.
  84  *
  85  *      Returns zero on success, or %-ENOENT on failure.
  86  */
  87
  88 int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
  89 {
  90         write_lock(&notifier_lock);
  91         while((*nl)!=NULL)
  92         {
  93                 if((*nl)==n)
  94                 {
  95                         *nl=n->next;
  96                         write_unlock(&notifier_lock);
  97                         return 0;
  98                 }
  99                 nl=&((*nl)->next);
 100         }
 101         write_unlock(&notifier_lock);
 102         return -ENOENT;
 103 }
 104
 105 /**
 106  *      notifier_call_chain - Call functions in a notifier chain
 107  *      @n: Pointer to root pointer of notifier chain
 108  *      @val: Value passed unmodified to notifier function
 109  *      @v: Pointer passed unmodified to notifier function
 110  *
 111  *      Calls each function in a notifier chain in turn.
 112  *
 113  *      If the return value of the notifier can be and'd
 114  *      with %NOTIFY_STOP_MASK, then notifier_call_chain
 115  *      will return immediately, with the return value of
 116  *      the notifier function which halted execution.
 117  *      Otherwise, the return value is the return value
 118  *      of the last notifier function called.
 119  */
 120
 121 int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
 122 {
 123         int ret=NOTIFY_DONE;
 124         struct notifier_block *nb = *n;
 125
 126         read_lock(&notifier_lock);
 127         while(nb)
 128         {
 129                 ret=nb->notifier_call(nb,val,v);
 130                 if(ret&NOTIFY_STOP_MASK)
 131                 {
 132                         read_unlock(&notifier_lock);
 133                         return ret;
 134                 }
 135                 nb=nb->next;
 136         }
 137         read_unlock(&notifier_lock);
 138         return ret;
 139 }
 140
 141 /**
 142  *      register_reboot_notifier - Register function to be called at reboot time
 143  *      @nb: Info about notifier function to be called
 144  *
 145  *      Registers a function with the list of functions
 146  *      to be called at reboot time.
 147  *
 148  *      Currently always returns zero, as notifier_chain_register
 149  *      always returns zero.
 150  */
 151
 152 int register_reboot_notifier(struct notifier_block * nb)
 153 {
 154         return notifier_chain_register(&reboot_notifier_list, nb);
 155 }
 156
 157 /**
 158  *      unregister_reboot_notifier - Unregister previously registered reboot notifier
 159  *      @nb: Hook to be unregistered
 160  *
 161  *      Unregisters a previously registered reboot
 162  *      notifier function.
 163  *
 164  *      Returns zero on success, or %-ENOENT on failure.
 165  */
 166
 167 int unregister_reboot_notifier(struct notifier_block * nb)
 168 {
 169         return notifier_chain_unregister(&reboot_notifier_list, nb);
 170 }
 171
 172 asmlinkage long sys_ni_syscall(void)
 173 {
 174         return -ENOSYS;
 175 }
 176
 177 static int proc_sel(struct task_struct *p, int which, int who)
 178 {
 179         if(p->pid)
 180         {
 181                 switch (which) {
 182                         case PRIO_PROCESS:
 183                                 if (!who && p == current)
 184                                         return 1;
 185                                 return(p->pid == who);
 186                         case PRIO_PGRP:
 187                                 if (!who)
 188                                         who = current->pgrp;
 189                                 return(p->pgrp == who);
 190                         case PRIO_USER:
 191                                 if (!who)
 192                                         who = current->uid;
 193                                 return(p->uid == who);
 194                 }
 195         }
 196         return 0;
 197 }
 198
 199 asmlinkage long sys_setpriority(int which, int who, int niceval)
 200 {
 201         struct task_struct *p;
 202         int error;
 203
 204         if (which > 2 || which < 0)
 205                 return -EINVAL;
 206
 207         /* normalize: avoid signed division (rounding problems) */
 208         error = -ESRCH;
 209         if (niceval < -20)
 210                 niceval = -20;
 211         if (niceval > 19)
 212                 niceval = 19;
 213
 214         read_lock(&tasklist_lock);
 215         for_each_task(p) {
 216                 if (!proc_sel(p, which, who))
 217                         continue;
 218                 if (p->uid != current->euid &&
 219                         p->uid != current->uid && !capable(CAP_SYS_NICE)) {
 220                         error = -EPERM;
 221                         continue;
 222                 }
 223                 if (error == -ESRCH)
 224                         error = 0;
 225                 if (niceval < p->nice && !capable(CAP_SYS_NICE))
 226                         error = -EACCES;
 227                 else
 228                         p->nice = niceval;
 229         }
 230         read_unlock(&tasklist_lock);
 231
 232         return error;
 233 }
 234
 235 /*
 236  * Ugh. To avoid negative return values, "getpriority()" will
 237  * not return the normal nice-value, but a value that has been
 238  * offset by 20 (ie it returns 0..39 instead of -20..19)
 239  */
 240 asmlinkage long sys_getpriority(int which, int who)
 241 {
 242         struct task_struct *p;
 243         long retval = -ESRCH;
 244
 245         if (which > 2 || which < 0)
 246                 return -EINVAL;
 247
 248         read_lock(&tasklist_lock);
 249         for_each_task (p) {
 250                 unsigned niceval;
 251                 if (!proc_sel(p, which, who))
 252                         continue;
 253                 niceval = p->nice + 20;
 254                 if (niceval < (unsigned)retval)
 255                         retval = niceval;
 256         }
 257         read_unlock(&tasklist_lock);
 258
 259         return retval;
 260 }
 261
 262
 263 /*
 264  * Reboot system call: for obvious reasons only root may call it,
 265  * and even root needs to set up some magic numbers in the registers
 266  * so that some mistake won't make this reboot the whole machine.
 267  * You can also set the meaning of the ctrl-alt-del-key here.
 268  *
 269  * reboot doesn't sync: do that yourself before calling this.
 270  */
 271 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
 272 {
 273         char buffer[256];
 274
 275         /* We only trust the superuser with rebooting the system. */
 276         if (!capable(CAP_SYS_BOOT))
 277                 return -EPERM;
 278
 279         /* For safety, we require "magic" arguments. */
 280         if (magic1 != LINUX_REBOOT_MAGIC1 ||
 281             (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
 282                         magic2 != LINUX_REBOOT_MAGIC2B))
 283                 return -EINVAL;
 284
 285         lock_kernel();
 286         switch (cmd) {
 287         case LINUX_REBOOT_CMD_RESTART:
 288                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 289                 printk(KERN_EMERG "Restarting system.\n");
 290                 machine_restart(NULL);
 291                 break;
 292
 293         case LINUX_REBOOT_CMD_CAD_ON:
 294                 C_A_D = 1;
 295                 break;
 296
 297         case LINUX_REBOOT_CMD_CAD_OFF:
 298                 C_A_D = 0;
 299                 break;
 300
 301         case LINUX_REBOOT_CMD_HALT:
 302                 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 303                 printk(KERN_EMERG "System halted.\n");
 304                 machine_halt();
 305                 do_exit(0);
 306                 break;
 307
 308         case LINUX_REBOOT_CMD_POWER_OFF:
 309                 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 310                 printk(KERN_EMERG "Power down.\n");
 311                 machine_power_off();
 312                 do_exit(0);
 313                 break;
 314
 315         case LINUX_REBOOT_CMD_RESTART2:
 316                 if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
 317                         unlock_kernel();
 318                         return -EFAULT;
 319                 }
 320                 buffer[sizeof(buffer) - 1] = '\0';
 321
 322                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
 323                 printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
 324                 machine_restart(buffer);
 325                 break;
 326
 327         default:
 328                 unlock_kernel();
 329                 return -EINVAL;
 330         }
 331         unlock_kernel();
 332         return 0;
 333 }
 334
 335 /*
 336  * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 337  * As it's called within an interrupt, it may NOT sync: the only choice
 338  * is whether to reboot at once, or just ignore the ctrl-alt-del.
 339  */
 340 void ctrl_alt_del(void)
 341 {
 342         if (C_A_D) {
 343                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
 344                 machine_restart(NULL);
 345         } else
 346                 kill_proc(1, SIGINT, 1);
 347 }
 348
 349
 350 /*
 351  * Unprivileged users may change the real gid to the effective gid
 352  * or vice versa.  (BSD-style)
 353  *
 354  * If you set the real gid at all, or set the effective gid to a value not
 355  * equal to the real gid, then the saved gid is set to the new effective gid.
 356  *
 357  * This makes it possible for a setgid program to completely drop its
 358  * privileges, which is often a useful assertion to make when you are doing
 359  * a security audit over a program.
 360  *
 361  * The general idea is that a program which uses just setregid() will be
 362  * 100% compatible with BSD.  A program which uses just setgid() will be
 363  * 100% compatible with POSIX with saved IDs.
 364  *
 365  * SMP: There are not races, the GIDs are checked only by filesystem
 366  *      operations (as far as semantic preservation is concerned).
 367  */
 368 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 369 {
 370         int old_rgid = current->gid;
 371         int old_egid = current->egid;
 372
 373         if (rgid != (gid_t) -1) {
 374                 if ((old_rgid == rgid) ||
 375                     (current->egid==rgid) ||
 376                     capable(CAP_SETGID))
 377                         current->gid = rgid;
 378                 else
 379                         return -EPERM;
 380         }
 381         if (egid != (gid_t) -1) {
 382                 if ((old_rgid == egid) ||
 383                     (current->egid == egid) ||
 384                     (current->sgid == egid) ||
 385                     capable(CAP_SETGID))
 386                         current->fsgid = current->egid = egid;
 387                 else {
 388                         current->gid = old_rgid;
 389                         return -EPERM;
 390                 }
 391         }
 392         if (rgid != (gid_t) -1 ||
 393             (egid != (gid_t) -1 && egid != old_rgid))
 394                 current->sgid = current->egid;
 395         current->fsgid = current->egid;
 396         if (current->egid != old_egid)
 397                 current->dumpable = 0;
 398         return 0;
 399 }
 400
 401 /*
 402  * setgid() is implemented like SysV w/ SAVED_IDS
 403  *
 404  * SMP: Same implicit races as above.
 405  */
 406 asmlinkage long sys_setgid(gid_t gid)
 407 {
 408         int old_egid = current->egid;
 409
 410         if (capable(CAP_SETGID))
 411                 current->gid = current->egid = current->sgid = current->fsgid = gid;
 412         else if ((gid == current->gid) || (gid == current->sgid))
 413                 current->egid = current->fsgid = gid;
 414         else
 415                 return -EPERM;
 416
 417         if (current->egid != old_egid)
 418                 current->dumpable = 0;
 419         return 0;
 420 }
 421
 422 /*
 423  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
 424  * a process after a call to setuid, setreuid, or setresuid.
 425  *
 426  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
 427  *  {r,e,s}uid != 0, the permitted and effective capabilities are
 428  *  cleared.
 429  *
 430  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
 431  *  capabilities of the process are cleared.
 432  *
 433  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
 434  *  capabilities are set to the permitted capabilities.
 435  *
 436  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
 437  *  never happen.
 438  *
 439  *  -astor
 440  *
 441  * cevans - New behaviour, Oct '99
 442  * A process may, via prctl(), elect to keep its capabilities when it
 443  * calls setuid() and switches away from uid==0. Both permitted and
 444  * effective sets will be retained.
 445  * Without this change, it was impossible for a daemon to drop only some
 446  * of its privilege. The call to setuid(!=0) would drop all privileges!
 447  * Keeping uid 0 is not an option because uid 0 owns too many vital
 448  * files..
 449  * Thanks to Olaf Kirch and Peter Benie for spotting this.
 450  */
 451 extern inline void cap_emulate_setxuid(int old_ruid, int old_euid,
 452                                        int old_suid)
 453 {
 454         if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
 455             (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
 456             !current->keep_capabilities) {
 457                 cap_clear(current->cap_permitted);
 458                 cap_clear(current->cap_effective);
 459         }
 460         if (old_euid == 0 && current->euid != 0) {
 461                 cap_clear(current->cap_effective);
 462         }
 463         if (old_euid != 0 && current->euid == 0) {
 464                 current->cap_effective = current->cap_permitted;
 465         }
 466 }
 467
 468 /*
 469  * Unprivileged users may change the real uid to the effective uid
 470  * or vice versa.  (BSD-style)
 471  *
 472  * If you set the real uid at all, or set the effective uid to a value not
 473  * equal to the real uid, then the saved uid is set to the new effective uid.
 474  *
 475  * This makes it possible for a setuid program to completely drop its
 476  * privileges, which is often a useful assertion to make when you are doing
 477  * a security audit over a program.
 478  *
 479  * The general idea is that a program which uses just setreuid() will be
 480  * 100% compatible with BSD.  A program which uses just setuid() will be
 481  * 100% compatible with POSIX with saved IDs.
 482  */
 483 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 484 {
 485         int old_ruid, old_euid, old_suid, new_ruid;
 486
 487         new_ruid = old_ruid = current->uid;
 488         old_euid = current->euid;
 489         old_suid = current->suid;
 490         if (ruid != (uid_t) -1) {
 491                 if ((old_ruid == ruid) ||
 492                     (current->euid==ruid) ||
 493                     capable(CAP_SETUID))
 494                         new_ruid = ruid;
 495                 else
 496                         return -EPERM;
 497         }
 498         if (euid != (uid_t) -1) {
 499                 if ((old_ruid == euid) ||
 500                     (current->euid == euid) ||
 501                     (current->suid == euid) ||
 502                     capable(CAP_SETUID))
 503                         current->fsuid = current->euid = euid;
 504                 else
 505                         return -EPERM;
 506         }
 507         if (ruid != (uid_t) -1 ||
 508             (euid != (uid_t) -1 && euid != old_ruid))
 509                 current->suid = current->euid;
 510         current->fsuid = current->euid;
 511         if (current->euid != old_euid)
 512                 current->dumpable = 0;
 513
 514         if(new_ruid != old_ruid) {
 515                 /* What if a process setreuid()'s and this brings the
 516                  * new uid over his NPROC rlimit?  We can check this now
 517                  * cheaply with the new uid cache, so if it matters
 518                  * we should be checking for it.  -DaveM
 519                  */
 520                 free_uid(current);
 521                 current->uid = new_ruid;
 522                 alloc_uid(current);
 523         }
 524
 525         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 526                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 527         }
 528
 529         return 0;
 530 }
 531
 532
 533
 534 /*
 535  * setuid() is implemented like SysV with SAVED_IDS
 536  *
 537  * Note that SAVED_ID's is deficient in that a setuid root program
 538  * like sendmail, for example, cannot set its uid to be a normal
 539  * user and then switch back, because if you're root, setuid() sets
 540  * the saved uid too.  If you don't like this, blame the bright people
 541  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 542  * will allow a root program to temporarily drop privileges and be able to
 543  * regain them by swapping the real and effective uid.
 544  */
 545 asmlinkage long sys_setuid(uid_t uid)
 546 {
 547         int old_euid = current->euid;
 548         int old_ruid, old_suid, new_ruid;
 549
 550         old_ruid = new_ruid = current->uid;
 551         old_suid = current->suid;
 552         if (capable(CAP_SETUID))
 553                 new_ruid = current->euid = current->suid = current->fsuid = uid;
 554         else if ((uid == current->uid) || (uid == current->suid))
 555                 current->fsuid = current->euid = uid;
 556         else
 557                 return -EPERM;
 558
 559         if (current->euid != old_euid)
 560                 current->dumpable = 0;
 561
 562        if (new_ruid != old_ruid) {
 563                 /* See comment above about NPROC rlimit issues... */
 564                 free_uid(current);
 565                 current->uid = new_ruid;
 566                 alloc_uid(current);
 567         }
 568
 569         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 570                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 571         }
 572
 573         return 0;
 574 }
 575
 576
 577 /*
 578  * This function implements a generic ability to update ruid, euid,
 579  * and suid.  This allows you to implement the 4.4 compatible seteuid().
 580  */
 581 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 582 {
 583         int old_ruid = current->uid;
 584         int old_euid = current->euid;
 585         int old_suid = current->suid;
 586
 587         if (!capable(CAP_SETUID)) {
 588                 if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
 589                     (ruid != current->euid) && (ruid != current->suid))
 590                         return -EPERM;
 591                 if ((euid != (uid_t) -1) && (euid != current->uid) &&
 592                     (euid != current->euid) && (euid != current->suid))
 593                         return -EPERM;
 594                 if ((suid != (uid_t) -1) && (suid != current->uid) &&
 595                     (suid != current->euid) && (suid != current->suid))
 596                         return -EPERM;
 597         }
 598         if (ruid != (uid_t) -1) {
 599                 /* See above commentary about NPROC rlimit issues here. */
 600                 free_uid(current);
 601                 current->uid = ruid;
 602                 alloc_uid(current);
 603         }
 604         if (euid != (uid_t) -1) {
 605                 if (euid != current->euid)
 606                         current->dumpable = 0;
 607                 current->euid = euid;
 608                 current->fsuid = euid;
 609         }
 610         if (suid != (uid_t) -1)
 611                 current->suid = suid;
 612
 613         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 614                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
 615         }
 616
 617         return 0;
 618 }
 619
 620 asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
 621 {
 622         int retval;
 623
 624         if (!(retval = put_user(current->uid, ruid)) &&
 625             !(retval = put_user(current->euid, euid)))
 626                 retval = put_user(current->suid, suid);
 627
 628         return retval;
 629 }
 630
 631 /*
 632  * Same as above, but for rgid, egid, sgid.
 633  */
 634 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 635 {
 636        if (!capable(CAP_SETGID)) {
 637                 if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
 638                     (rgid != current->egid) && (rgid != current->sgid))
 639                         return -EPERM;
 640                 if ((egid != (gid_t) -1) && (egid != current->gid) &&
 641                     (egid != current->egid) && (egid != current->sgid))
 642                         return -EPERM;
 643                 if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
 644                     (sgid != current->egid) && (sgid != current->sgid))
 645                         return -EPERM;
 646         }
 647         if (rgid != (gid_t) -1)
 648                 current->gid = rgid;
 649         if (egid != (gid_t) -1) {
 650                 if (egid != current->egid)
 651                         current->dumpable = 0;
 652                 current->egid = egid;
 653                 current->fsgid = egid;
 654         }
 655         if (sgid != (gid_t) -1)
 656                 current->sgid = sgid;
 657         return 0;
 658 }
 659
 660 asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
 661 {
 662         int retval;
 663
 664         if (!(retval = put_user(current->gid, rgid)) &&
 665             !(retval = put_user(current->egid, egid)))
 666                 retval = put_user(current->sgid, sgid);
 667
 668         return retval;
 669 }
 670
 671
 672 /*
 673  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 674  * is used for "access()" and for the NFS daemon (letting nfsd stay at
 675  * whatever uid it wants to). It normally shadows "euid", except when
 676  * explicitly set by setfsuid() or for access..
 677  */
 678 asmlinkage long sys_setfsuid(uid_t uid)
 679 {
 680         int old_fsuid;
 681
 682         old_fsuid = current->fsuid;
 683         if (uid == current->uid || uid == current->euid ||
 684             uid == current->suid || uid == current->fsuid ||
 685             capable(CAP_SETUID))
 686                 current->fsuid = uid;
 687         if (current->fsuid != old_fsuid)
 688                 current->dumpable = 0;
 689
 690         /* We emulate fsuid by essentially doing a scaled-down version
 691          * of what we did in setresuid and friends. However, we only
 692          * operate on the fs-specific bits of the process' effective
 693          * capabilities
 694          *
 695          * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
 696          *          if not, we might be a bit too harsh here.
 697          */
 698
 699         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 700                 if (old_fsuid == 0 && current->fsuid != 0) {
 701                         cap_t(current->cap_effective) &= ~CAP_FS_MASK;
 702                 }
 703                 if (old_fsuid != 0 && current->fsuid == 0) {
 704                         cap_t(current->cap_effective) |=
 705                                 (cap_t(current->cap_permitted) & CAP_FS_MASK);
 706                 }
 707         }
 708
 709         return old_fsuid;
 710 }
 711
 712 /*
 713  * Samma på svenska..
 714  */
 715 asmlinkage long sys_setfsgid(gid_t gid)
 716 {
 717         int old_fsgid;
 718
 719         old_fsgid = current->fsgid;
 720         if (gid == current->gid || gid == current->egid ||
 721             gid == current->sgid || gid == current->fsgid ||
 722             capable(CAP_SETGID))
 723                 current->fsgid = gid;
 724         if (current->fsgid != old_fsgid)
 725                 current->dumpable = 0;
 726
 727         return old_fsgid;
 728 }
 729
 730 asmlinkage long sys_times(struct tms * tbuf)
 731 {
 732         /*
 733          *      In the SMP world we might just be unlucky and have one of
 734          *      the times increment as we use it. Since the value is an
 735          *      atomically safe type this is just fine. Conceptually its
 736          *      as if the syscall took an instant longer to occur.
 737          */
 738         if (tbuf)
 739                 if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
 740                         return -EFAULT;
 741         return jiffies;
 742 }
 743
 744 /*
 745  * This needs some heavy checking ...
 746  * I just haven't the stomach for it. I also don't fully
 747  * understand sessions/pgrp etc. Let somebody who does explain it.
 748  *
 749  * OK, I think I have the protection semantics right.... this is really
 750  * only important on a multi-user system anyway, to make sure one user
 751  * can't send a signal to a process owned by another.  -TYT, 12/12/91
 752  *
 753  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 754  * LBT 04.03.94
 755  */
 756
 757 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 758 {
 759         struct task_struct * p;
 760         int err = -EINVAL;
 761
 762         if (!pid)
 763                 pid = current->pid;
 764         if (!pgid)
 765                 pgid = pid;
 766         if (pgid < 0)
 767                 return -EINVAL;
 768
 769         /* From this point forward we keep holding onto the tasklist lock
 770          * so that our parent does not change from under us. -DaveM
 771          */
 772         read_lock(&tasklist_lock);
 773
 774         err = -ESRCH;
 775         p = find_task_by_pid(pid);
 776         if (!p)
 777                 goto out;
 778
 779         if (p->p_pptr == current || p->p_opptr == current) {
 780                 err = -EPERM;
 781                 if (p->session != current->session)
 782                         goto out;
 783                 err = -EACCES;
 784                 if (p->did_exec)
 785                         goto out;
 786         } else if (p != current)
 787                 goto out;
 788         err = -EPERM;
 789         if (p->leader)
 790                 goto out;
 791         if (pgid != pid) {
 792                 struct task_struct * tmp;
 793                 for_each_task (tmp) {
 794                         if (tmp->pgrp == pgid &&
 795                             tmp->session == current->session)
 796                                 goto ok_pgid;
 797                 }
 798                 goto out;
 799         }
 800
 801 ok_pgid:
 802         p->pgrp = pgid;
 803         err = 0;
 804 out:
 805         /* All paths lead to here, thus we are safe. -DaveM */
 806         read_unlock(&tasklist_lock);
 807         return err;
 808 }
 809
 810 asmlinkage long sys_getpgid(pid_t pid)
 811 {
 812         if (!pid) {
 813                 return current->pgrp;
 814         } else {
 815                 int retval;
 816                 struct task_struct *p;
 817
 818                 read_lock(&tasklist_lock);
 819                 p = find_task_by_pid(pid);
 820
 821                 retval = -ESRCH;
 822                 if (p)
 823                         retval = p->pgrp;
 824                 read_unlock(&tasklist_lock);
 825                 return retval;
 826         }
 827 }
 828
 829 asmlinkage long sys_getpgrp(void)
 830 {
 831         /* SMP - assuming writes are word atomic this is fine */
 832         return current->pgrp;
 833 }
 834
 835 asmlinkage long sys_getsid(pid_t pid)
 836 {
 837         if (!pid) {
 838                 return current->session;
 839         } else {
 840                 int retval;
 841                 struct task_struct *p;
 842
 843                 read_lock(&tasklist_lock);
 844                 p = find_task_by_pid(pid);
 845
 846                 retval = -ESRCH;
 847                 if(p)
 848                         retval = p->session;
 849                 read_unlock(&tasklist_lock);
 850                 return retval;
 851         }
 852 }
 853
 854 asmlinkage long sys_setsid(void)
 855 {
 856         struct task_struct * p;
 857         int err = -EPERM;
 858
 859         read_lock(&tasklist_lock);
 860         for_each_task(p) {
 861                 if (p->pgrp == current->pid)
 862                         goto out;
 863         }
 864
 865         current->leader = 1;
 866         current->session = current->pgrp = current->pid;
 867         current->tty = NULL;
 868         current->tty_old_pgrp = 0;
 869         err = current->pgrp;
 870 out:
 871         read_unlock(&tasklist_lock);
 872         return err;
 873 }
 874
 875 /*
 876  * Supplementary group IDs
 877  */
 878 asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
 879 {
 880         int i;
 881
 882         /*
 883          *      SMP: Nobody else can change our grouplist. Thus we are
 884          *      safe.
 885          */
 886
 887         if (gidsetsize < 0)
 888                 return -EINVAL;
 889         i = current->ngroups;
 890         if (gidsetsize) {
 891                 if (i > gidsetsize)
 892                         return -EINVAL;
 893                 if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
 894                         return -EFAULT;
 895         }
 896         return i;
 897 }
 898
 899 /*
 900  *      SMP: Our groups are not shared. We can copy to/from them safely
 901  *      without another task interfering.
 902  */
 903
 904 asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
 905 {
 906         if (!capable(CAP_SETGID))
 907                 return -EPERM;
 908         if ((unsigned) gidsetsize > NGROUPS)
 909                 return -EINVAL;
 910         if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
 911                 return -EFAULT;
 912         current->ngroups = gidsetsize;
 913         return 0;
 914 }
 915
 916 static int supplemental_group_member(gid_t grp)
 917 {
 918         int i = current->ngroups;
 919
 920         if (i) {
 921                 gid_t *groups = current->groups;
 922                 do {
 923                         if (*groups == grp)
 924                                 return 1;
 925                         groups++;
 926                         i--;
 927                 } while (i);
 928         }
 929         return 0;
 930 }
 931
 932 /*
 933  * Check whether we're fsgid/egid or in the supplemental group..
 934  */
 935 int in_group_p(gid_t grp)
 936 {
 937         int retval = 1;
 938         if (grp != current->fsgid)
 939                 retval = supplemental_group_member(grp);
 940         return retval;
 941 }
 942
 943 int in_egroup_p(gid_t grp)
 944 {
 945         int retval = 1;
 946         if (grp != current->egid)
 947                 retval = supplemental_group_member(grp);
 948         return retval;
 949 }
 950
 951 DECLARE_RWSEM(uts_sem);
 952
 953 asmlinkage long sys_newuname(struct new_utsname * name)
 954 {
 955         int errno = 0;
 956
 957         down_read(&uts_sem);
 958         if (copy_to_user(name,&system_utsname,sizeof *name))
 959                 errno = -EFAULT;
 960         up_read(&uts_sem);
 961         return errno;
 962 }
 963
 964 asmlinkage long sys_sethostname(char *name, int len)
 965 {
 966         int errno;
 967
 968         if (!capable(CAP_SYS_ADMIN))
 969                 return -EPERM;
 970         if (len < 0 || len > __NEW_UTS_LEN)
 971                 return -EINVAL;
 972         down_write(&uts_sem);
 973         errno = -EFAULT;
 974         if (!copy_from_user(system_utsname.nodename, name, len)) {
 975                 system_utsname.nodename[len] = 0;
 976                 errno = 0;
 977         }
 978         up_write(&uts_sem);
 979         return errno;
 980 }
 981
 982 asmlinkage long sys_gethostname(char *name, int len)
 983 {
 984         int i, errno;
 985
 986         if (len < 0)
 987                 return -EINVAL;
 988         down_read(&uts_sem);
 989         i = 1 + strlen(system_utsname.nodename);
 990         if (i > len)
 991                 i = len;
 992         errno = 0;
 993         if (copy_to_user(name, system_utsname.nodename, i))
 994                 errno = -EFAULT;
 995         up_read(&uts_sem);
 996         return errno;
 997 }
 998
 999 /*
1000  * Only setdomainname; getdomainname can be implemented by calling
1001  * uname()
1002  */
1003 asmlinkage long sys_setdomainname(char *name, int len)
1004 {
1005         int errno;
1006
1007         if (!capable(CAP_SYS_ADMIN))
1008                 return -EPERM;
1009         if (len < 0 || len > __NEW_UTS_LEN)
1010                 return -EINVAL;
1011
1012         down_write(&uts_sem);
1013         errno = -EFAULT;
1014         if (!copy_from_user(system_utsname.domainname, name, len)) {
1015                 errno = 0;
1016                 system_utsname.domainname[len] = 0;
1017         }
1018         up_write(&uts_sem);
1019         return errno;
1020 }
1021
1022 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
1023 {
1024         if (resource >= RLIM_NLIMITS)
1025                 return -EINVAL;
1026         else
1027                 return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1028                         ? -EFAULT : 0;
1029 }
1030
1031 #if !defined(__ia64__) && !defined(__s390__)
1032
1033 /*
1034  *      Back compatibility for getrlimit. Needed for some apps.
1035  */
1036
1037 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
1038 {
1039         struct rlimit x;
1040         if (resource >= RLIM_NLIMITS)
1041                 return -EINVAL;
1042
1043         memcpy(&x, current->rlim + resource, sizeof(*rlim));
1044         if(x.rlim_cur > 0x7FFFFFFF)
1045                 x.rlim_cur = 0x7FFFFFFF;
1046         if(x.rlim_max > 0x7FFFFFFF)
1047                 x.rlim_max = 0x7FFFFFFF;
1048         return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1049 }
1050
1051 #endif
1052
1053 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
1054 {
1055         struct rlimit new_rlim, *old_rlim;
1056
1057         if (resource >= RLIM_NLIMITS)
1058                 return -EINVAL;
1059         if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1060                 return -EFAULT;
1061         if (new_rlim.rlim_cur < 0 || new_rlim.rlim_max < 0)
1062                 return -EINVAL;
1063         old_rlim = current->rlim + resource;
1064         if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
1065              (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1066             !capable(CAP_SYS_RESOURCE))
1067                 return -EPERM;
1068         if (resource == RLIMIT_NOFILE) {
1069                 if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1070                         return -EPERM;
1071         }
1072         *old_rlim = new_rlim;
1073         return 0;
1074 }
1075
1076 /*
1077  * It would make sense to put struct rusage in the task_struct,
1078  * except that would make the task_struct be *really big*.  After
1079  * task_struct gets moved into malloc'ed memory, it would
1080  * make sense to do this.  It will make moving the rest of the information
1081  * a lot simpler!  (Which we're not doing right now because we're not
1082  * measuring them yet).
1083  *
1084  * This is SMP safe.  Either we are called from sys_getrusage on ourselves
1085  * below (we know we aren't going to exit/disappear and only we change our
1086  * rusage counters), or we are called from wait4() on a process which is
1087  * either stopped or zombied.  In the zombied case the task won't get
1088  * reaped till shortly after the call to getrusage(), in both cases the
1089  * task being examined is in a frozen state so the counters won't change.
1090  *
1091  * FIXME! Get the fault counts properly!
1092  */
1093 int getrusage(struct task_struct *p, int who, struct rusage *ru)
1094 {
1095         struct rusage r;
1096
1097         memset((char *) &r, 0, sizeof(r));
1098         switch (who) {
1099                 case RUSAGE_SELF:
1100                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
1101                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
1102                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
1103                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
1104                         r.ru_minflt = p->min_flt;
1105                         r.ru_majflt = p->maj_flt;
1106                         r.ru_nswap = p->nswap;
1107                         break;
1108                 case RUSAGE_CHILDREN:
1109                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
1110                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
1111                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
1112                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
1113                         r.ru_minflt = p->cmin_flt;
1114                         r.ru_majflt = p->cmaj_flt;
1115                         r.ru_nswap = p->cnswap;
1116                         break;
1117                 default:
1118                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
1119                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
1120                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
1121                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
1122                         r.ru_minflt = p->min_flt + p->cmin_flt;
1123                         r.ru_majflt = p->maj_flt + p->cmaj_flt;
1124                         r.ru_nswap = p->nswap + p->cnswap;
1125                         break;
1126         }
1127         return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1128 }
1129
1130 asmlinkage long sys_getrusage(int who, struct rusage *ru)
1131 {
1132         if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1133                 return -EINVAL;
1134         return getrusage(current, who, ru);
1135 }
1136
1137 asmlinkage long sys_umask(int mask)
1138 {
1139         mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1140         return mask;
1141 }
1142
1143 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1144                           unsigned long arg4, unsigned long arg5)
1145 {
1146         int error = 0;
1147         int sig;
1148
1149         switch (option) {
1150                 case PR_SET_PDEATHSIG:
1151                         sig = arg2;
1152                         if (sig > _NSIG) {
1153                                 error = -EINVAL;
1154                                 break;
1155                         }
1156                         current->pdeath_signal = sig;
1157                         break;
1158                 case PR_GET_PDEATHSIG:
1159                         error = put_user(current->pdeath_signal, (int *)arg2);
1160                         break;
1161                 case PR_GET_DUMPABLE:
1162                         if (current->dumpable)
1163                                 error = 1;
1164                         break;
1165                 case PR_SET_DUMPABLE:
1166                         if (arg2 != 0 && arg2 != 1) {
1167                                 error = -EINVAL;
1168                                 break;
1169                         }
1170                         current->dumpable = arg2;
1171                         break;
1172                 case PR_SET_UNALIGN:
1173 #ifdef SET_UNALIGN_CTL
1174                         error = SET_UNALIGN_CTL(current, arg2);
1175 #else
1176                         error = -EINVAL;
1177 #endif
1178                         break;
1179
1180                 case PR_GET_UNALIGN:
1181 #ifdef GET_UNALIGN_CTL
1182                         error = GET_UNALIGN_CTL(current, arg2);
1183 #else
1184                         error = -EINVAL;
1185 #endif
1186                         break;
1187
1188                 case PR_GET_KEEPCAPS:
1189                         if (current->keep_capabilities)
1190                                 error = 1;
1191                         break;
1192                 case PR_SET_KEEPCAPS:
1193                         if (arg2 != 0 && arg2 != 1) {
1194                                 error = -EINVAL;
1195                                 break;
1196                         }
1197                         current->keep_capabilities = arg2;
1198                         break;
1199                 default:
1200                         error = -EINVAL;
1201                         break;
1202         }
1203         return error;
1204 }
1205
1206 EXPORT_SYMBOL(notifier_chain_register);
1207 EXPORT_SYMBOL(notifier_chain_unregister);
1208 EXPORT_SYMBOL(notifier_call_chain);
1209 EXPORT_SYMBOL(register_reboot_notifier);
1210 EXPORT_SYMBOL(unregister_reboot_notifier);
1211 EXPORT_SYMBOL(in_group_p);
1212 EXPORT_SYMBOL(in_egroup_p);