2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
40 * $DragonFly: src/sys/kern/kern_resource.c,v 1.35 2008/05/27 05:25:34 dillon Exp $
43 #include "opt_compat.h"
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
49 #include <sys/kern_syscall.h>
50 #include <sys/kernel.h>
51 #include <sys/resourcevar.h>
52 #include <sys/malloc.h>
56 #include <sys/lockf.h>
59 #include <vm/vm_param.h>
62 #include <vm/vm_map.h>
64 #include <sys/thread2.h>
66 static int donice (struct proc
*chgp
, int n
);
68 static MALLOC_DEFINE(M_UIDINFO
, "uidinfo", "uidinfo structures");
69 #define UIHASH(uid) (&uihashtbl[(uid) & uihash])
70 static LIST_HEAD(uihashhead
, uidinfo
) *uihashtbl
;
71 static u_long uihash
; /* size of hash table - 1 */
73 static struct uidinfo
*uicreate (uid_t uid
);
74 static struct uidinfo
*uilookup (uid_t uid
);
77 * Resource controls and accounting.
80 struct getpriority_info
{
85 static int getpriority_callback(struct proc
*p
, void *data
);
88 sys_getpriority(struct getpriority_args
*uap
)
90 struct getpriority_info info
;
91 struct proc
*curp
= curproc
;
93 int low
= PRIO_MAX
+ 1;
103 if (!PRISON_CHECK(curp
->p_ucred
, p
->p_ucred
))
114 else if ((pg
= pgfind(uap
->who
)) == NULL
)
116 LIST_FOREACH(p
, &pg
->pg_members
, p_pglist
) {
117 if ((PRISON_CHECK(curp
->p_ucred
, p
->p_ucred
) && p
->p_nice
< low
))
124 uap
->who
= curp
->p_ucred
->cr_uid
;
127 allproc_scan(getpriority_callback
, &info
);
134 if (low
== PRIO_MAX
+ 1)
136 uap
->sysmsg_result
= low
;
141 * Figure out the current lowest nice priority for processes owned
142 * by the specified user.
146 getpriority_callback(struct proc
*p
, void *data
)
148 struct getpriority_info
*info
= data
;
150 if (PRISON_CHECK(curproc
->p_ucred
, p
->p_ucred
) &&
151 p
->p_ucred
->cr_uid
== info
->who
&&
152 p
->p_nice
< info
->low
) {
153 info
->low
= p
->p_nice
;
158 struct setpriority_info
{
165 static int setpriority_callback(struct proc
*p
, void *data
);
168 sys_setpriority(struct setpriority_args
*uap
)
170 struct setpriority_info info
;
171 struct proc
*curp
= curproc
;
173 int found
= 0, error
= 0;
175 switch (uap
->which
) {
183 if (!PRISON_CHECK(curp
->p_ucred
, p
->p_ucred
))
185 error
= donice(p
, uap
->prio
);
195 else if ((pg
= pgfind(uap
->who
)) == NULL
)
197 LIST_FOREACH(p
, &pg
->pg_members
, p_pglist
) {
198 if (PRISON_CHECK(curp
->p_ucred
, p
->p_ucred
)) {
199 error
= donice(p
, uap
->prio
);
207 uap
->who
= curp
->p_ucred
->cr_uid
;
208 info
.prio
= uap
->prio
;
212 allproc_scan(setpriority_callback
, &info
);
227 setpriority_callback(struct proc
*p
, void *data
)
229 struct setpriority_info
*info
= data
;
232 if (p
->p_ucred
->cr_uid
== info
->who
&&
233 PRISON_CHECK(curproc
->p_ucred
, p
->p_ucred
)) {
234 error
= donice(p
, info
->prio
);
243 donice(struct proc
*chgp
, int n
)
245 struct proc
*curp
= curproc
;
246 struct ucred
*cr
= curp
->p_ucred
;
249 if (cr
->cr_uid
&& cr
->cr_ruid
&&
250 cr
->cr_uid
!= chgp
->p_ucred
->cr_uid
&&
251 cr
->cr_ruid
!= chgp
->p_ucred
->cr_uid
)
257 if (n
< chgp
->p_nice
&& priv_check_cred(cr
, PRIV_ROOT
, 0))
260 FOREACH_LWP_IN_PROC(lp
, chgp
)
261 chgp
->p_usched
->resetpriority(lp
);
266 sys_lwp_rtprio(struct lwp_rtprio_args
*uap
)
268 struct proc
*p
= curproc
;
271 struct ucred
*cr
= p
->p_ucred
;
274 error
= copyin(uap
->rtp
, &rtp
, sizeof(struct rtprio
));
280 } else if (uap
->pid
== 0) {
281 /* curproc already loaded on p */
292 } else if (uap
->tid
== -1) {
294 * sadly, tid can be 0 so we can't use 0 here
297 lp
= curthread
->td_lwp
;
299 lp
= lwp_rb_tree_RB_LOOKUP(&p
->p_lwp_tree
, uap
->tid
);
304 switch (uap
->function
) {
306 return (copyout(&lp
->lwp_rtprio
, uap
->rtp
,
307 sizeof(struct rtprio
)));
309 if (cr
->cr_uid
&& cr
->cr_ruid
&&
310 cr
->cr_uid
!= p
->p_ucred
->cr_uid
&&
311 cr
->cr_ruid
!= p
->p_ucred
->cr_uid
) {
314 /* disallow setting rtprio in most cases if not superuser */
315 if (priv_check_cred(cr
, PRIV_ROOT
, 0)) {
316 /* can't set someone else's */
317 if (uap
->pid
) { /* XXX */
320 /* can't set realtime priority */
322 * Realtime priority has to be restricted for reasons which should be
323 * obvious. However, for idle priority, there is a potential for
324 * system deadlock if an idleprio process gains a lock on a resource
325 * that other processes need (and the idleprio process can't run
326 * due to a CPU-bound normal process). Fix me! XXX
328 if (RTP_PRIO_IS_REALTIME(rtp
.type
)) {
336 case RTP_PRIO_REALTIME
:
337 case RTP_PRIO_NORMAL
:
339 if (rtp
.prio
> RTP_PRIO_MAX
)
341 lp
->lwp_rtprio
= rtp
;
349 panic("can't get here");
353 * Set realtime priority
357 sys_rtprio(struct rtprio_args
*uap
)
359 struct proc
*curp
= curproc
;
362 struct ucred
*cr
= curp
->p_ucred
;
366 error
= copyin(uap
->rtp
, &rtp
, sizeof(struct rtprio
));
379 lp
= FIRST_LWP_IN_PROC(p
);
380 switch (uap
->function
) {
382 return (copyout(&lp
->lwp_rtprio
, uap
->rtp
, sizeof(struct rtprio
)));
384 if (cr
->cr_uid
&& cr
->cr_ruid
&&
385 cr
->cr_uid
!= p
->p_ucred
->cr_uid
&&
386 cr
->cr_ruid
!= p
->p_ucred
->cr_uid
)
388 /* disallow setting rtprio in most cases if not superuser */
389 if (priv_check_cred(cr
, PRIV_ROOT
, 0)) {
390 /* can't set someone else's */
393 /* can't set realtime priority */
395 * Realtime priority has to be restricted for reasons which should be
396 * obvious. However, for idle priority, there is a potential for
397 * system deadlock if an idleprio process gains a lock on a resource
398 * that other processes need (and the idleprio process can't run
399 * due to a CPU-bound normal process). Fix me! XXX
401 if (RTP_PRIO_IS_REALTIME(rtp
.type
))
408 case RTP_PRIO_REALTIME
:
409 case RTP_PRIO_NORMAL
:
411 if (rtp
.prio
> RTP_PRIO_MAX
)
413 lp
->lwp_rtprio
= rtp
;
425 sys_setrlimit(struct __setrlimit_args
*uap
)
430 error
= copyin(uap
->rlp
, &alim
, sizeof(alim
));
434 error
= kern_setrlimit(uap
->which
, &alim
);
440 sys_getrlimit(struct __getrlimit_args
*uap
)
445 error
= kern_getrlimit(uap
->which
, &lim
);
448 error
= copyout(&lim
, uap
->rlp
, sizeof(*uap
->rlp
));
453 * Transform the running time and tick information in lwp lp's thread into user,
454 * system, and interrupt time usage.
456 * Since we are limited to statclock tick granularity this is a statisical
457 * calculation which will be correct over the long haul, but should not be
458 * expected to measure fine grained deltas.
460 * It is possible to catch a lwp in the midst of being created, so
461 * check whether lwp_thread is NULL or not.
464 calcru(struct lwp
*lp
, struct timeval
*up
, struct timeval
*sp
)
469 * Calculate at the statclock level. YYY if the thread is owned by
470 * another cpu we need to forward the request to the other cpu, or
471 * have a token to interlock the information in order to avoid racing
472 * thread destruction.
474 if ((td
= lp
->lwp_thread
) != NULL
) {
476 up
->tv_sec
= td
->td_uticks
/ 1000000;
477 up
->tv_usec
= td
->td_uticks
% 1000000;
478 sp
->tv_sec
= td
->td_sticks
/ 1000000;
479 sp
->tv_usec
= td
->td_sticks
% 1000000;
485 * Aggregate resource statistics of all lwps of a process.
487 * proc.p_ru keeps track of all statistics directly related to a proc. This
488 * consists of RSS usage and nswap information and aggregate numbers for all
489 * former lwps of this proc.
491 * proc.p_cru is the sum of all stats of reaped children.
493 * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
494 * packet, scheduler switch or page fault counts, etc. This information gets
495 * added to lwp.lwp_proc.p_ru when the lwp exits.
498 calcru_proc(struct proc
*p
, struct rusage
*ru
)
500 struct timeval upt
, spt
;
506 FOREACH_LWP_IN_PROC(lp
, p
) {
507 calcru(lp
, &upt
, &spt
);
508 timevaladd(&ru
->ru_utime
, &upt
);
509 timevaladd(&ru
->ru_stime
, &spt
);
510 for (rip1
= &ru
->ru_first
, rip2
= &lp
->lwp_ru
.ru_first
;
511 rip1
<= &ru
->ru_last
;
520 sys_getrusage(struct getrusage_args
*uap
)
529 calcru_proc(curproc
, rup
);
532 case RUSAGE_CHILDREN
:
533 rup
= &curproc
->p_cru
;
539 return (copyout((caddr_t
)rup
, (caddr_t
)uap
->rusage
,
540 sizeof (struct rusage
)));
544 ruadd(struct rusage
*ru
, struct rusage
*ru2
)
549 timevaladd(&ru
->ru_utime
, &ru2
->ru_utime
);
550 timevaladd(&ru
->ru_stime
, &ru2
->ru_stime
);
551 if (ru
->ru_maxrss
< ru2
->ru_maxrss
)
552 ru
->ru_maxrss
= ru2
->ru_maxrss
;
553 ip
= &ru
->ru_first
; ip2
= &ru2
->ru_first
;
554 for (i
= &ru
->ru_last
- &ru
->ru_first
; i
>= 0; i
--)
559 * Find the uidinfo structure for a uid. This structure is used to
560 * track the total resource consumption (process count, socket buffer
561 * size, etc.) for the uid and impose limits.
566 uihashtbl
= hashinit(maxproc
/ 16, M_UIDINFO
, &uihash
);
569 static struct uidinfo
*
572 struct uihashhead
*uipp
;
576 LIST_FOREACH(uip
, uipp
, ui_hash
) {
577 if (uip
->ui_uid
== uid
)
583 static struct uidinfo
*
586 struct uidinfo
*uip
, *norace
;
589 * Allocate space and check for a race
591 MALLOC(uip
, struct uidinfo
*, sizeof(*uip
), M_UIDINFO
, M_WAITOK
);
592 norace
= uilookup(uid
);
593 if (norace
!= NULL
) {
594 FREE(uip
, M_UIDINFO
);
599 * Initialize structure and enter it into the hash table
601 LIST_INSERT_HEAD(UIHASH(uid
), uip
, ui_hash
);
606 uip
->ui_posixlocks
= 0;
607 varsymset_init(&uip
->ui_varsymset
, NULL
);
624 uifree(struct uidinfo
*uip
)
626 if (uip
->ui_sbsize
!= 0)
627 /* XXX no %qd in kernel. Truncate. */
628 kprintf("freeing uidinfo: uid = %d, sbsize = %ld\n",
629 uip
->ui_uid
, (long)uip
->ui_sbsize
);
630 if (uip
->ui_proccnt
!= 0)
631 kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
632 uip
->ui_uid
, uip
->ui_proccnt
);
633 LIST_REMOVE(uip
, ui_hash
);
634 varsymset_clean(&uip
->ui_varsymset
);
635 FREE(uip
, M_UIDINFO
);
639 uihold(struct uidinfo
*uip
)
642 KKASSERT(uip
->ui_ref
> 0);
646 uidrop(struct uidinfo
*uip
)
648 KKASSERT(uip
->ui_ref
> 0);
649 if (--uip
->ui_ref
== 0)
654 uireplace(struct uidinfo
**puip
, struct uidinfo
*nuip
)
661 * Change the count associated with number of processes
662 * a given user is using. When 'max' is 0, don't enforce a limit
665 chgproccnt(struct uidinfo
*uip
, int diff
, int max
)
667 /* don't allow them to exceed max, but allow subtraction */
668 if (diff
> 0 && uip
->ui_proccnt
+ diff
> max
&& max
!= 0)
670 uip
->ui_proccnt
+= diff
;
671 if (uip
->ui_proccnt
< 0)
672 kprintf("negative proccnt for uid = %d\n", uip
->ui_uid
);
677 * Change the total socket buffer size a user has used.
680 chgsbsize(struct uidinfo
*uip
, u_long
*hiwat
, u_long to
, rlim_t max
)
685 new = uip
->ui_sbsize
+ to
- *hiwat
;
688 * If we are trying to increase the socket buffer size
689 * Scale down the hi water mark when we exceed the user's
690 * allowed socket buffer space.
692 * We can't scale down too much or we will blow up atomic packet
695 if (to
> *hiwat
&& to
> MCLBYTES
&& new > max
) {
700 uip
->ui_sbsize
= new;
702 if (uip
->ui_sbsize
< 0)
703 kprintf("negative sbsize for uid = %d\n", uip
->ui_uid
);