security/commoncap.c

   1 /* Common capabilities, needed by capability.o and root_plug.o
   2  *
   3  *      This program is free software; you can redistribute it and/or modify
   4  *      it under the terms of the GNU General Public License as published by
   5  *      the Free Software Foundation; either version 2 of the License, or
   6  *      (at your option) any later version.
   7  *
   8  */
   9
  10 #include <linux/capability.h>
  11 #include <linux/module.h>
  12 #include <linux/init.h>
  13 #include <linux/kernel.h>
  14 #include <linux/security.h>
  15 #include <linux/file.h>
  16 #include <linux/mm.h>
  17 #include <linux/mman.h>
  18 #include <linux/pagemap.h>
  19 #include <linux/swap.h>
  20 #include <linux/skbuff.h>
  21 #include <linux/netlink.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/xattr.h>
  24 #include <linux/hugetlb.h>
  25 #include <linux/mount.h>
  26 #include <linux/sched.h>
  27
  28 /* Global security state */
  29
  30 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
  31 EXPORT_SYMBOL(securebits);
  32
  33 int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
  34 {
  35         NETLINK_CB(skb).eff_cap = current->cap_effective;
  36         return 0;
  37 }
  38
  39 int cap_netlink_recv(struct sk_buff *skb, int cap)
  40 {
  41         if (!cap_raised(NETLINK_CB(skb).eff_cap, cap))
  42                 return -EPERM;
  43         return 0;
  44 }
  45
  46 EXPORT_SYMBOL(cap_netlink_recv);
  47
  48 /*
  49  * NOTE WELL: cap_capable() cannot be used like the kernel's capable()
  50  * function.  That is, it has the reverse semantics: cap_capable()
  51  * returns 0 when a task has a capability, but the kernel's capable()
  52  * returns 1 for this case.
  53  */
  54 int cap_capable (struct task_struct *tsk, int cap)
  55 {
  56         /* Derived from include/linux/sched.h:capable. */
  57         if (cap_raised(tsk->cap_effective, cap))
  58                 return 0;
  59         return -EPERM;
  60 }
  61
  62 int cap_settime(struct timespec *ts, struct timezone *tz)
  63 {
  64         if (!capable(CAP_SYS_TIME))
  65                 return -EPERM;
  66         return 0;
  67 }
  68
  69 int cap_ptrace (struct task_struct *parent, struct task_struct *child)
  70 {
  71         /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */
  72         if (!cap_issubset(child->cap_permitted, parent->cap_permitted) &&
  73             !__capable(parent, CAP_SYS_PTRACE))
  74                 return -EPERM;
  75         return 0;
  76 }
  77
  78 int cap_capget (struct task_struct *target, kernel_cap_t *effective,
  79                 kernel_cap_t *inheritable, kernel_cap_t *permitted)
  80 {
  81         /* Derived from kernel/capability.c:sys_capget. */
  82         *effective = target->cap_effective;
  83         *inheritable = target->cap_inheritable;
  84         *permitted = target->cap_permitted;
  85         return 0;
  86 }
  87
  88 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
  89
  90 static inline int cap_block_setpcap(struct task_struct *target)
  91 {
  92         /*
  93          * No support for remote process capability manipulation with
  94          * filesystem capability support.
  95          */
  96         return (target != current);
  97 }
  98
  99 static inline int cap_inh_is_capped(void)
 100 {
 101         /*
 102          * Return 1 if changes to the inheritable set are limited
 103          * to the old permitted set. That is, if the current task
 104          * does *not* possess the CAP_SETPCAP capability.
 105          */
 106         return (cap_capable(current, CAP_SETPCAP) != 0);
 107 }
 108
 109 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
 110
 111 static inline int cap_block_setpcap(struct task_struct *t) { return 0; }
 112 static inline int cap_inh_is_capped(void) { return 1; }
 113
 114 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
 115
 116 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective,
 117                       kernel_cap_t *inheritable, kernel_cap_t *permitted)
 118 {
 119         if (cap_block_setpcap(target)) {
 120                 return -EPERM;
 121         }
 122         if (cap_inh_is_capped()
 123             && !cap_issubset(*inheritable,
 124                              cap_combine(target->cap_inheritable,
 125                                          current->cap_permitted))) {
 126                 /* incapable of using this inheritable set */
 127                 return -EPERM;
 128         }
 129         if (!cap_issubset(*inheritable,
 130                            cap_combine(target->cap_inheritable,
 131                                        current->cap_bset))) {
 132                 /* no new pI capabilities outside bounding set */
 133                 return -EPERM;
 134         }
 135
 136         /* verify restrictions on target's new Permitted set */
 137         if (!cap_issubset (*permitted,
 138                            cap_combine (target->cap_permitted,
 139                                         current->cap_permitted))) {
 140                 return -EPERM;
 141         }
 142
 143         /* verify the _new_Effective_ is a subset of the _new_Permitted_ */
 144         if (!cap_issubset (*effective, *permitted)) {
 145                 return -EPERM;
 146         }
 147
 148         return 0;
 149 }
 150
 151 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective,
 152                      kernel_cap_t *inheritable, kernel_cap_t *permitted)
 153 {
 154         target->cap_effective = *effective;
 155         target->cap_inheritable = *inheritable;
 156         target->cap_permitted = *permitted;
 157 }
 158
 159 static inline void bprm_clear_caps(struct linux_binprm *bprm)
 160 {
 161         cap_clear(bprm->cap_inheritable);
 162         cap_clear(bprm->cap_permitted);
 163         bprm->cap_effective = false;
 164 }
 165
 166 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 167
 168 int cap_inode_need_killpriv(struct dentry *dentry)
 169 {
 170         struct inode *inode = dentry->d_inode;
 171         int error;
 172
 173         if (!inode->i_op || !inode->i_op->getxattr)
 174                return 0;
 175
 176         error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0);
 177         if (error <= 0)
 178                 return 0;
 179         return 1;
 180 }
 181
 182 int cap_inode_killpriv(struct dentry *dentry)
 183 {
 184         struct inode *inode = dentry->d_inode;
 185
 186         if (!inode->i_op || !inode->i_op->removexattr)
 187                return 0;
 188
 189         return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS);
 190 }
 191
 192 static inline int cap_from_disk(struct vfs_cap_data *caps,
 193                                 struct linux_binprm *bprm, unsigned size)
 194 {
 195         __u32 magic_etc;
 196         unsigned tocopy, i;
 197
 198         if (size < sizeof(magic_etc))
 199                 return -EINVAL;
 200
 201         magic_etc = le32_to_cpu(caps->magic_etc);
 202
 203         switch ((magic_etc & VFS_CAP_REVISION_MASK)) {
 204         case VFS_CAP_REVISION_1:
 205                 if (size != XATTR_CAPS_SZ_1)
 206                         return -EINVAL;
 207                 tocopy = VFS_CAP_U32_1;
 208                 break;
 209         case VFS_CAP_REVISION_2:
 210                 if (size != XATTR_CAPS_SZ_2)
 211                         return -EINVAL;
 212                 tocopy = VFS_CAP_U32_2;
 213                 break;
 214         default:
 215                 return -EINVAL;
 216         }
 217
 218         if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) {
 219                 bprm->cap_effective = true;
 220         } else {
 221                 bprm->cap_effective = false;
 222         }
 223
 224         for (i = 0; i < tocopy; ++i) {
 225                 bprm->cap_permitted.cap[i] =
 226                         le32_to_cpu(caps->data[i].permitted);
 227                 bprm->cap_inheritable.cap[i] =
 228                         le32_to_cpu(caps->data[i].inheritable);
 229         }
 230         while (i < VFS_CAP_U32) {
 231                 bprm->cap_permitted.cap[i] = 0;
 232                 bprm->cap_inheritable.cap[i] = 0;
 233                 i++;
 234         }
 235
 236         return 0;
 237 }
 238
 239 /* Locate any VFS capabilities: */
 240 static int get_file_caps(struct linux_binprm *bprm)
 241 {
 242         struct dentry *dentry;
 243         int rc = 0;
 244         struct vfs_cap_data vcaps;
 245         struct inode *inode;
 246
 247         if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) {
 248                 bprm_clear_caps(bprm);
 249                 return 0;
 250         }
 251
 252         dentry = dget(bprm->file->f_dentry);
 253         inode = dentry->d_inode;
 254         if (!inode->i_op || !inode->i_op->getxattr)
 255                 goto out;
 256
 257         rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps,
 258                                    XATTR_CAPS_SZ);
 259         if (rc == -ENODATA || rc == -EOPNOTSUPP) {
 260                 /* no data, that's ok */
 261                 rc = 0;
 262                 goto out;
 263         }
 264         if (rc < 0)
 265                 goto out;
 266
 267         rc = cap_from_disk(&vcaps, bprm, rc);
 268         if (rc)
 269                 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
 270                         __FUNCTION__, rc, bprm->filename);
 271
 272 out:
 273         dput(dentry);
 274         if (rc)
 275                 bprm_clear_caps(bprm);
 276
 277         return rc;
 278 }
 279
 280 #else
 281 int cap_inode_need_killpriv(struct dentry *dentry)
 282 {
 283         return 0;
 284 }
 285
 286 int cap_inode_killpriv(struct dentry *dentry)
 287 {
 288         return 0;
 289 }
 290
 291 static inline int get_file_caps(struct linux_binprm *bprm)
 292 {
 293         bprm_clear_caps(bprm);
 294         return 0;
 295 }
 296 #endif
 297
 298 int cap_bprm_set_security (struct linux_binprm *bprm)
 299 {
 300         int ret;
 301
 302         ret = get_file_caps(bprm);
 303         if (ret)
 304                 printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n",
 305                         __FUNCTION__, ret, bprm->filename);
 306
 307         /*  To support inheritance of root-permissions and suid-root
 308          *  executables under compatibility mode, we raise all three
 309          *  capability sets for the file.
 310          *
 311          *  If only the real uid is 0, we only raise the inheritable
 312          *  and permitted sets of the executable file.
 313          */
 314
 315         if (!issecure (SECURE_NOROOT)) {
 316                 if (bprm->e_uid == 0 || current->uid == 0) {
 317                         cap_set_full (bprm->cap_inheritable);
 318                         cap_set_full (bprm->cap_permitted);
 319                 }
 320                 if (bprm->e_uid == 0)
 321                         bprm->cap_effective = true;
 322         }
 323
 324         return ret;
 325 }
 326
 327 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 328 {
 329         /* Derived from fs/exec.c:compute_creds. */
 330         kernel_cap_t new_permitted, working;
 331
 332         new_permitted = cap_intersect(bprm->cap_permitted,
 333                                  current->cap_bset);
 334         working = cap_intersect(bprm->cap_inheritable,
 335                                  current->cap_inheritable);
 336         new_permitted = cap_combine(new_permitted, working);
 337
 338         if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
 339             !cap_issubset (new_permitted, current->cap_permitted)) {
 340                 set_dumpable(current->mm, suid_dumpable);
 341                 current->pdeath_signal = 0;
 342
 343                 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
 344                         if (!capable(CAP_SETUID)) {
 345                                 bprm->e_uid = current->uid;
 346                                 bprm->e_gid = current->gid;
 347                         }
 348                         if (!capable (CAP_SETPCAP)) {
 349                                 new_permitted = cap_intersect (new_permitted,
 350                                                         current->cap_permitted);
 351                         }
 352                 }
 353         }
 354
 355         current->suid = current->euid = current->fsuid = bprm->e_uid;
 356         current->sgid = current->egid = current->fsgid = bprm->e_gid;
 357
 358         /* For init, we want to retain the capabilities set
 359          * in the init_task struct. Thus we skip the usual
 360          * capability rules */
 361         if (!is_global_init(current)) {
 362                 current->cap_permitted = new_permitted;
 363                 if (bprm->cap_effective)
 364                         current->cap_effective = new_permitted;
 365                 else
 366                         cap_clear(current->cap_effective);
 367         }
 368
 369         /* AUD: Audit candidate if current->cap_effective is set */
 370
 371         current->keep_capabilities = 0;
 372 }
 373
 374 int cap_bprm_secureexec (struct linux_binprm *bprm)
 375 {
 376         if (current->uid != 0) {
 377                 if (bprm->cap_effective)
 378                         return 1;
 379                 if (!cap_isclear(bprm->cap_permitted))
 380                         return 1;
 381                 if (!cap_isclear(bprm->cap_inheritable))
 382                         return 1;
 383         }
 384
 385         return (current->euid != current->uid ||
 386                 current->egid != current->gid);
 387 }
 388
 389 int cap_inode_setxattr(struct dentry *dentry, char *name, void *value,
 390                        size_t size, int flags)
 391 {
 392         if (!strcmp(name, XATTR_NAME_CAPS)) {
 393                 if (!capable(CAP_SETFCAP))
 394                         return -EPERM;
 395                 return 0;
 396         } else if (!strncmp(name, XATTR_SECURITY_PREFIX,
 397                      sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
 398             !capable(CAP_SYS_ADMIN))
 399                 return -EPERM;
 400         return 0;
 401 }
 402
 403 int cap_inode_removexattr(struct dentry *dentry, char *name)
 404 {
 405         if (!strcmp(name, XATTR_NAME_CAPS)) {
 406                 if (!capable(CAP_SETFCAP))
 407                         return -EPERM;
 408                 return 0;
 409         } else if (!strncmp(name, XATTR_SECURITY_PREFIX,
 410                      sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
 411             !capable(CAP_SYS_ADMIN))
 412                 return -EPERM;
 413         return 0;
 414 }
 415
 416 /* moved from kernel/sys.c. */
 417 /*
 418  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
 419  * a process after a call to setuid, setreuid, or setresuid.
 420  *
 421  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
 422  *  {r,e,s}uid != 0, the permitted and effective capabilities are
 423  *  cleared.
 424  *
 425  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
 426  *  capabilities of the process are cleared.
 427  *
 428  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
 429  *  capabilities are set to the permitted capabilities.
 430  *
 431  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
 432  *  never happen.
 433  *
 434  *  -astor
 435  *
 436  * cevans - New behaviour, Oct '99
 437  * A process may, via prctl(), elect to keep its capabilities when it
 438  * calls setuid() and switches away from uid==0. Both permitted and
 439  * effective sets will be retained.
 440  * Without this change, it was impossible for a daemon to drop only some
 441  * of its privilege. The call to setuid(!=0) would drop all privileges!
 442  * Keeping uid 0 is not an option because uid 0 owns too many vital
 443  * files..
 444  * Thanks to Olaf Kirch and Peter Benie for spotting this.
 445  */
 446 static inline void cap_emulate_setxuid (int old_ruid, int old_euid,
 447                                         int old_suid)
 448 {
 449         if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
 450             (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
 451             !current->keep_capabilities) {
 452                 cap_clear (current->cap_permitted);
 453                 cap_clear (current->cap_effective);
 454         }
 455         if (old_euid == 0 && current->euid != 0) {
 456                 cap_clear (current->cap_effective);
 457         }
 458         if (old_euid != 0 && current->euid == 0) {
 459                 current->cap_effective = current->cap_permitted;
 460         }
 461 }
 462
 463 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
 464                           int flags)
 465 {
 466         switch (flags) {
 467         case LSM_SETID_RE:
 468         case LSM_SETID_ID:
 469         case LSM_SETID_RES:
 470                 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */
 471                 if (!issecure (SECURE_NO_SETUID_FIXUP)) {
 472                         cap_emulate_setxuid (old_ruid, old_euid, old_suid);
 473                 }
 474                 break;
 475         case LSM_SETID_FS:
 476                 {
 477                         uid_t old_fsuid = old_ruid;
 478
 479                         /* Copied from kernel/sys.c:setfsuid. */
 480
 481                         /*
 482                          * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
 483                          *          if not, we might be a bit too harsh here.
 484                          */
 485
 486                         if (!issecure (SECURE_NO_SETUID_FIXUP)) {
 487                                 if (old_fsuid == 0 && current->fsuid != 0) {
 488                                         current->cap_effective =
 489                                                 cap_drop_fs_set(
 490                                                     current->cap_effective);
 491                                 }
 492                                 if (old_fsuid != 0 && current->fsuid == 0) {
 493                                         current->cap_effective =
 494                                                 cap_raise_fs_set(
 495                                                     current->cap_effective,
 496                                                     current->cap_permitted);
 497                                 }
 498                         }
 499                         break;
 500                 }
 501         default:
 502                 return -EINVAL;
 503         }
 504
 505         return 0;
 506 }
 507
 508 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 509 /*
 510  * Rationale: code calling task_setscheduler, task_setioprio, and
 511  * task_setnice, assumes that
 512  *   . if capable(cap_sys_nice), then those actions should be allowed
 513  *   . if not capable(cap_sys_nice), but acting on your own processes,
 514  *      then those actions should be allowed
 515  * This is insufficient now since you can call code without suid, but
 516  * yet with increased caps.
 517  * So we check for increased caps on the target process.
 518  */
 519 static inline int cap_safe_nice(struct task_struct *p)
 520 {
 521         if (!cap_issubset(p->cap_permitted, current->cap_permitted) &&
 522             !__capable(current, CAP_SYS_NICE))
 523                 return -EPERM;
 524         return 0;
 525 }
 526
 527 int cap_task_setscheduler (struct task_struct *p, int policy,
 528                            struct sched_param *lp)
 529 {
 530         return cap_safe_nice(p);
 531 }
 532
 533 int cap_task_setioprio (struct task_struct *p, int ioprio)
 534 {
 535         return cap_safe_nice(p);
 536 }
 537
 538 int cap_task_setnice (struct task_struct *p, int nice)
 539 {
 540         return cap_safe_nice(p);
 541 }
 542
 543 int cap_task_kill(struct task_struct *p, struct siginfo *info,
 544                                 int sig, u32 secid)
 545 {
 546         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
 547                 return 0;
 548
 549         /*
 550          * Running a setuid root program raises your capabilities.
 551          * Killing your own setuid root processes was previously
 552          * allowed.
 553          * We must preserve legacy signal behavior in this case.
 554          */
 555         if (p->euid == 0 && p->uid == current->uid)
 556                 return 0;
 557
 558         /* sigcont is permitted within same session */
 559         if (sig == SIGCONT && (task_session_nr(current) == task_session_nr(p)))
 560                 return 0;
 561
 562         if (secid)
 563                 /*
 564                  * Signal sent as a particular user.
 565                  * Capabilities are ignored.  May be wrong, but it's the
 566                  * only thing we can do at the moment.
 567                  * Used only by usb drivers?
 568                  */
 569                 return 0;
 570         if (cap_issubset(p->cap_permitted, current->cap_permitted))
 571                 return 0;
 572         if (capable(CAP_KILL))
 573                 return 0;
 574
 575         return -EPERM;
 576 }
 577
 578 /*
 579  * called from kernel/sys.c for prctl(PR_CABSET_DROP)
 580  * done without task_capability_lock() because it introduces
 581  * no new races - i.e. only another task doing capget() on
 582  * this task could get inconsistent info.  There can be no
 583  * racing writer bc a task can only change its own caps.
 584  */
 585 long cap_prctl_drop(unsigned long cap)
 586 {
 587         if (!capable(CAP_SETPCAP))
 588                 return -EPERM;
 589         if (!cap_valid(cap))
 590                 return -EINVAL;
 591         cap_lower(current->cap_bset, cap);
 592         return 0;
 593 }
 594 #else
 595 int cap_task_setscheduler (struct task_struct *p, int policy,
 596                            struct sched_param *lp)
 597 {
 598         return 0;
 599 }
 600 int cap_task_setioprio (struct task_struct *p, int ioprio)
 601 {
 602         return 0;
 603 }
 604 int cap_task_setnice (struct task_struct *p, int nice)
 605 {
 606         return 0;
 607 }
 608 int cap_task_kill(struct task_struct *p, struct siginfo *info,
 609                                 int sig, u32 secid)
 610 {
 611         return 0;
 612 }
 613 #endif
 614
 615 void cap_task_reparent_to_init (struct task_struct *p)
 616 {
 617         cap_set_init_eff(p->cap_effective);
 618         cap_clear(p->cap_inheritable);
 619         cap_set_full(p->cap_permitted);
 620         p->keep_capabilities = 0;
 621         return;
 622 }
 623
 624 int cap_syslog (int type)
 625 {
 626         if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
 627                 return -EPERM;
 628         return 0;
 629 }
 630
 631 int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 632 {
 633         int cap_sys_admin = 0;
 634
 635         if (cap_capable(current, CAP_SYS_ADMIN) == 0)
 636                 cap_sys_admin = 1;
 637         return __vm_enough_memory(mm, pages, cap_sys_admin);
 638 }
 639