sys/kern/kern_plimit.c

   1 /*
   2  * Copyright (c) 2006,2017,2018 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 /*
  35  * Copyright (c) 1982, 1986, 1991, 1993
  36  *      The Regents of the University of California.  All rights reserved.
  37  * (c) UNIX System Laboratories, Inc.
  38  * All or some portions of this file are derived from material licensed
  39  * to the University of California by American Telephone and Telegraph
  40  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  41  * the permission of UNIX System Laboratories, Inc.
  42  *
  43  * Redistribution and use in source and binary forms, with or without
  44  * modification, are permitted provided that the following conditions
  45  * are met:
  46  * 1. Redistributions of source code must retain the above copyright
  47  *    notice, this list of conditions and the following disclaimer.
  48  * 2. Redistributions in binary form must reproduce the above copyright
  49  *    notice, this list of conditions and the following disclaimer in the
  50  *    documentation and/or other materials provided with the distribution.
  51  * 3. Neither the name of the University nor the names of its contributors
  52  *    may be used to endorse or promote products derived from this software
  53  *    without specific prior written permission.
  54  *
  55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  65  * SUCH DAMAGE.
  66  *
  67  *      @(#)kern_resource.c     8.5 (Berkeley) 1/21/94
  68  */
  69 #include <sys/resource.h>
  70 #include <sys/spinlock.h>
  71 #include <sys/proc.h>
  72 #include <sys/caps.h>
  73 #include <sys/file.h>
  74 #include <sys/lockf.h>
  75 #include <sys/kern_syscall.h>
  76 #include <sys/malloc.h>
  77 #include <sys/sysmsg.h>
  78
  79 #include <vm/vm_param.h>
  80 #include <vm/vm.h>
  81 #include <vm/vm_map.h>
  82
  83 #include <machine/pmap.h>
  84
  85 #include <sys/spinlock2.h>
  86
  87 static MALLOC_DEFINE(M_PLIMIT, "plimit", "resource limits");
  88
  89 static void plimit_copy(struct plimit *olimit, struct plimit *nlimit);
  90
  91 static __inline
  92 struct plimit *
  93 readplimits(struct proc *p)
  94 {
  95         thread_t td = curthread;
  96         struct plimit *limit;
  97
  98         limit = td->td_limit;
  99         if (limit != p->p_limit) {
 100                 spin_lock_shared(&p->p_spin);
 101                 limit = p->p_limit;
 102                 atomic_add_int(&limit->p_refcnt, 1);
 103                 spin_unlock_shared(&p->p_spin);
 104                 if (td->td_limit)
 105                         plimit_free(td->td_limit);
 106                 td->td_limit = limit;
 107         }
 108         return limit;
 109 }
 110
 111 /*
 112  * Initialize proc0's plimit structure.  All later plimit structures
 113  * are inherited through fork.
 114  */
 115 void
 116 plimit_init0(struct plimit *limit)
 117 {
 118         int i;
 119         rlim_t lim;
 120
 121         for (i = 0; i < RLIM_NLIMITS; ++i) {
 122                 limit->pl_rlimit[i].rlim_cur = RLIM_INFINITY;
 123                 limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
 124         }
 125         limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = maxfiles;
 126         limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
 127         limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = maxproc;
 128         limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
 129         lim = ptoa((rlim_t)vmstats.v_free_count);
 130         limit->pl_rlimit[RLIMIT_RSS].rlim_max = lim;
 131         limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
 132         limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
 133         limit->p_cpulimit = RLIM_INFINITY;
 134         limit->p_refcnt = 1;
 135         spin_init(&limit->p_spin, "plimitinit");
 136 }
 137
 138 /*
 139  * Return a plimit for use by a new forked process given the one
 140  * contained in the parent process.
 141  */
 142 struct plimit *
 143 plimit_fork(struct proc *p1)
 144 {
 145         struct plimit *olimit = p1->p_limit;
 146         struct plimit *nlimit;
 147         uint32_t count;
 148
 149         /*
 150          * Try to share the parent's plimit structure.  If we cannot, make
 151          * a copy.
 152          *
 153          * NOTE: (count) value is field prior to increment.
 154          */
 155         count = atomic_fetchadd_int(&olimit->p_refcnt, 1);
 156         cpu_ccfence();
 157         if (count & PLIMITF_EXCLUSIVE) {
 158                 if ((count & PLIMITF_MASK) == 1 && p1->p_nthreads == 1) {
 159                         atomic_clear_int(&olimit->p_refcnt, PLIMITF_EXCLUSIVE);
 160                 } else {
 161                         nlimit = kmalloc(sizeof(*nlimit), M_PLIMIT, M_WAITOK);
 162                         plimit_copy(olimit, nlimit);
 163                         plimit_free(olimit);
 164                         olimit = nlimit;
 165                 }
 166         }
 167         return olimit;
 168 }
 169
 170 /*
 171  * This routine is called when a new LWP is created for a process.  We
 172  * must force exclusivity to ensure that p->p_limit remains stable.
 173  *
 174  * LWPs share the same process structure so this does not bump refcnt.
 175  */
 176 void
 177 plimit_lwp_fork(struct proc *p)
 178 {
 179         struct plimit *olimit = p->p_limit;
 180         struct plimit *nlimit;
 181         uint32_t count;
 182
 183         count = olimit->p_refcnt;
 184         cpu_ccfence();
 185         if ((count & PLIMITF_EXCLUSIVE) == 0) {
 186                 if (count != 1) {
 187                         nlimit = kmalloc(sizeof(*nlimit), M_PLIMIT, M_WAITOK);
 188                         plimit_copy(olimit, nlimit);
 189                         p->p_limit = nlimit;
 190                         plimit_free(olimit);
 191                         olimit = nlimit;
 192                 }
 193                 atomic_set_int(&olimit->p_refcnt, PLIMITF_EXCLUSIVE);
 194         }
 195 }
 196
 197 /*
 198  * This routine is called to fixup a process's p_limit structure prior
 199  * to it being modified.  If index >= 0 the specified modification is also
 200  * made.
 201  *
 202  * This routine must make the limit structure exclusive.  If we are threaded,
 203  * the structure will already be exclusive.  A later fork will convert it
 204  * back to copy-on-write if possible.
 205  *
 206  * We can count on p->p_limit being stable since if we had created any
 207  * threads it will have already been made exclusive.
 208  */
 209 void
 210 plimit_modify(struct proc *p, int index, struct rlimit *rlim)
 211 {
 212         struct plimit *olimit;
 213         struct plimit *nlimit;
 214         uint32_t count;
 215
 216         /*
 217          * Make exclusive
 218          */
 219         olimit = p->p_limit;
 220         count = olimit->p_refcnt;
 221         cpu_ccfence();
 222         if ((count & PLIMITF_EXCLUSIVE) == 0) {
 223                 if (count != 1) {
 224                         nlimit = kmalloc(sizeof(*nlimit), M_PLIMIT, M_WAITOK);
 225                         plimit_copy(olimit, nlimit);
 226                         p->p_limit = nlimit;
 227                         plimit_free(olimit);
 228                         olimit = nlimit;
 229                 }
 230                 atomic_set_int(&olimit->p_refcnt, PLIMITF_EXCLUSIVE);
 231         }
 232
 233         /*
 234          * Make modification
 235          */
 236         if (index >= 0) {
 237                 if (p->p_nthreads == 1) {
 238                         p->p_limit->pl_rlimit[index] = *rlim;
 239                 } else {
 240                         spin_lock(&olimit->p_spin);
 241                         p->p_limit->pl_rlimit[index].rlim_cur = rlim->rlim_cur;
 242                         p->p_limit->pl_rlimit[index].rlim_max = rlim->rlim_max;
 243                         spin_unlock(&olimit->p_spin);
 244                 }
 245         }
 246 }
 247
 248 /*
 249  * Destroy a process's plimit structure.
 250  */
 251 void
 252 plimit_free(struct plimit *limit)
 253 {
 254         uint32_t count;
 255
 256         count = atomic_fetchadd_int(&limit->p_refcnt, -1);
 257
 258         if ((count & ~PLIMITF_EXCLUSIVE) == 1) {
 259                 limit->p_refcnt = -999;
 260                 kfree(limit, M_PLIMIT);
 261         }
 262 }
 263
 264 /*
 265  * Modify a resource limit (from system call)
 266  */
 267 int
 268 kern_setrlimit(u_int which, struct rlimit *limp)
 269 {
 270         struct proc *p = curproc;
 271         struct plimit *limit;
 272         struct rlimit *alimp;
 273         int error;
 274
 275         if (which >= RLIM_NLIMITS)
 276                 return (EINVAL);
 277
 278         /*
 279          * We will be modifying a resource, make a copy if necessary.
 280          */
 281         plimit_modify(p, -1, NULL);
 282         limit = p->p_limit;
 283         alimp = &limit->pl_rlimit[which];
 284
 285         /*
 286          * Preserve historical bugs by treating negative limits as unsigned.
 287          */
 288         if (limp->rlim_cur < 0)
 289                 limp->rlim_cur = RLIM_INFINITY;
 290         if (limp->rlim_max < 0)
 291                 limp->rlim_max = RLIM_INFINITY;
 292
 293         spin_lock(&limit->p_spin);
 294         if (limp->rlim_cur > alimp->rlim_max ||
 295             limp->rlim_max > alimp->rlim_max)
 296         {
 297                 spin_unlock(&limit->p_spin);
 298                 error = caps_priv_check(p->p_ucred, SYSCAP_NOPROC_SETRLIMIT);
 299                 if (error)
 300                         return (error);
 301         } else {
 302                 spin_unlock(&limit->p_spin);
 303         }
 304         if (limp->rlim_cur > limp->rlim_max)
 305                 limp->rlim_cur = limp->rlim_max;
 306
 307         switch (which) {
 308         case RLIMIT_CPU:
 309                 spin_lock(&limit->p_spin);
 310                 if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
 311                         limit->p_cpulimit = RLIM_INFINITY;
 312                 else
 313                         limit->p_cpulimit = (rlim_t)1000000 * limp->rlim_cur;
 314                 spin_unlock(&limit->p_spin);
 315                 break;
 316         case RLIMIT_DATA:
 317                 if (limp->rlim_cur > maxdsiz)
 318                         limp->rlim_cur = maxdsiz;
 319                 if (limp->rlim_max > maxdsiz)
 320                         limp->rlim_max = maxdsiz;
 321                 break;
 322
 323         case RLIMIT_STACK:
 324                 if (limp->rlim_cur > maxssiz)
 325                         limp->rlim_cur = maxssiz;
 326                 if (limp->rlim_max > maxssiz)
 327                         limp->rlim_max = maxssiz;
 328                 /*
 329                  * Stack is allocated to the max at exec time with only
 330                  * "rlim_cur" bytes accessible.  If stack limit is going
 331                  * up make more accessible, if going down make inaccessible.
 332                  */
 333                 spin_lock(&limit->p_spin);
 334                 if (limp->rlim_cur != alimp->rlim_cur) {
 335                         vm_offset_t addr;
 336                         vm_size_t size;
 337                         vm_prot_t prot;
 338
 339                         if (limp->rlim_cur > alimp->rlim_cur) {
 340                                 prot = VM_PROT_ALL;
 341                                 size = limp->rlim_cur - alimp->rlim_cur;
 342                                 addr = USRSTACK - limp->rlim_cur;
 343                         } else {
 344                                 prot = VM_PROT_NONE;
 345                                 size = alimp->rlim_cur - limp->rlim_cur;
 346                                 addr = USRSTACK - alimp->rlim_cur;
 347                         }
 348                         spin_unlock(&limit->p_spin);
 349                         addr = trunc_page(addr);
 350                         size = round_page(size);
 351                         vm_map_protect(&p->p_vmspace->vm_map,
 352                                        addr, addr+size, prot, FALSE);
 353                 } else {
 354                         spin_unlock(&limit->p_spin);
 355                 }
 356                 break;
 357
 358         case RLIMIT_NOFILE:
 359                 if (limp->rlim_cur > maxfilesperproc)
 360                         limp->rlim_cur = maxfilesperproc;
 361                 if (limp->rlim_max > maxfilesperproc)
 362                         limp->rlim_max = maxfilesperproc;
 363                 break;
 364
 365         case RLIMIT_NPROC:
 366                 if (limp->rlim_cur > maxprocperuid)
 367                         limp->rlim_cur = maxprocperuid;
 368                 if (limp->rlim_max > maxprocperuid)
 369                         limp->rlim_max = maxprocperuid;
 370                 if (limp->rlim_cur < 1)
 371                         limp->rlim_cur = 1;
 372                 if (limp->rlim_max < 1)
 373                         limp->rlim_max = 1;
 374                 break;
 375         case RLIMIT_POSIXLOCKS:
 376                 if (limp->rlim_cur > maxposixlocksperuid)
 377                         limp->rlim_cur = maxposixlocksperuid;
 378                 if (limp->rlim_max > maxposixlocksperuid)
 379                         limp->rlim_max = maxposixlocksperuid;
 380                 break;
 381         }
 382         spin_lock(&limit->p_spin);
 383         *alimp = *limp;
 384         spin_unlock(&limit->p_spin);
 385         return (0);
 386 }
 387
 388 int
 389 sys_setrlimit(struct sysmsg *sysmsg, const struct __setrlimit_args *uap)
 390 {
 391         struct rlimit alim;
 392         int error;
 393
 394         error = copyin(uap->rlp, &alim, sizeof(alim));
 395         if (error)
 396                 return (error);
 397
 398         error = kern_setrlimit(uap->which, &alim);
 399
 400         return (error);
 401 }
 402
 403 /*
 404  * The rlimit indexed by which is returned in the second argument.
 405  */
 406 int
 407 kern_getrlimit(u_int which, struct rlimit *limp)
 408 {
 409         struct proc *p = curproc;
 410         struct plimit *limit;
 411
 412         /*
 413          * p is NULL when kern_getrlimit is called from a
 414          * kernel thread. In this case as the calling proc
 415          * isn't available we just skip the limit check.
 416          */
 417         if (p == NULL)
 418                 return 0;
 419
 420         if (which >= RLIM_NLIMITS)
 421                 return (EINVAL);
 422
 423         limit = readplimits(p);
 424         *limp = limit->pl_rlimit[which];
 425
 426         return (0);
 427 }
 428
 429 int
 430 sys_getrlimit(struct sysmsg *sysmsg, const struct __getrlimit_args *uap)
 431 {
 432         struct rlimit lim;
 433         int error;
 434
 435         error = kern_getrlimit(uap->which, &lim);
 436
 437         if (error == 0)
 438                 error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
 439         return error;
 440 }
 441
 442 /*
 443  * Determine if the cpu limit has been reached and return an operations
 444  * code for the caller to perform.
 445  */
 446 int
 447 plimit_testcpulimit(struct proc *p, u_int64_t ttime)
 448 {
 449         struct plimit *limit;
 450         struct rlimit *rlim;
 451         int mode;
 452
 453         limit = readplimits(p);
 454
 455         /*
 456          * Initial tests without the spinlock.  This is the fast path.
 457          * Any 32/64 bit glitches will fall through and retest with
 458          * the spinlock.
 459          */
 460         if (limit->p_cpulimit == RLIM_INFINITY)
 461                 return(PLIMIT_TESTCPU_OK);
 462         if (ttime <= limit->p_cpulimit)
 463                 return(PLIMIT_TESTCPU_OK);
 464
 465         if (ttime > limit->p_cpulimit) {
 466                 rlim = &limit->pl_rlimit[RLIMIT_CPU];
 467                 if (ttime / (rlim_t)1000000 >= rlim->rlim_max + 5)
 468                         mode = PLIMIT_TESTCPU_KILL;
 469                 else
 470                         mode = PLIMIT_TESTCPU_XCPU;
 471         } else {
 472                 mode = PLIMIT_TESTCPU_OK;
 473         }
 474
 475         return(mode);
 476 }
 477
 478 /*
 479  * Helper routine to copy olimit to nlimit and initialize nlimit for
 480  * use.  nlimit's reference count will be set to 1 and its exclusive bit
 481  * will be cleared.
 482  */
 483 static
 484 void
 485 plimit_copy(struct plimit *olimit, struct plimit *nlimit)
 486 {
 487         *nlimit = *olimit;
 488
 489         spin_init(&nlimit->p_spin, "plimitcopy");
 490         nlimit->p_refcnt = 1;
 491 }
 492
 493 /*
 494  * This routine returns the value of a resource, downscaled based on
 495  * the processes fork depth and chroot depth (up to 50%).  This mechanism
 496  * is designed to prevent run-aways from blowing up unrelated processes
 497  * running under the same UID.
 498  *
 499  * NOTE: Currently only applicable to RLIMIT_NPROC.  We could also limit
 500  *       file descriptors but we shouldn't have to as these are allocated
 501  *       dynamically.
 502  */
 503 u_int64_t
 504 plimit_getadjvalue(int i)
 505 {
 506         struct proc *p = curproc;
 507         struct plimit *limit;
 508         uint64_t v;
 509         uint32_t depth;
 510
 511         limit = p->p_limit;
 512         v = limit->pl_rlimit[i].rlim_cur;
 513         if (i == RLIMIT_NPROC) {
 514                 /*
 515                  * 10% per chroot (around 1/3% per fork depth), with a
 516                  * maximum of 50% downscaling of the resource limit.
 517                  */
 518                 depth = p->p_depth;
 519                 if (depth > 32 * 5)
 520                         depth = 32 * 5;
 521                 v -= v * depth / 320;
 522         }
 523         return v;
 524 }