2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
40 #include "opt_compat.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/sysproto.h>
46 #include <sys/kernel.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
52 #include <sys/refcount.h>
53 #include <sys/racct.h>
54 #include <sys/resourcevar.h>
55 #include <sys/rwlock.h>
56 #include <sys/sched.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysent.h>
65 #include <vm/vm_param.h>
67 #include <vm/vm_map.h>
70 static MALLOC_DEFINE(M_PLIMIT
, "plimit", "plimit structures");
71 static MALLOC_DEFINE(M_UIDINFO
, "uidinfo", "uidinfo structures");
72 #define UIHASH(uid) (&uihashtbl[(uid) & uihash])
73 static struct rwlock uihashtbl_lock
;
74 static LIST_HEAD(uihashhead
, uidinfo
) *uihashtbl
;
75 static u_long uihash
; /* size of hash table - 1 */
77 static void calcru1(struct proc
*p
, struct rusage_ext
*ruxp
,
78 struct timeval
*up
, struct timeval
*sp
);
79 static int donice(struct thread
*td
, struct proc
*chgp
, int n
);
80 static struct uidinfo
*uilookup(uid_t uid
);
81 static void ruxagg_locked(struct rusage_ext
*rux
, struct thread
*td
);
84 * Resource controls and accounting.
86 #ifndef _SYS_SYSPROTO_H_
87 struct getpriority_args
{
93 sys_getpriority(struct thread
*td
, register struct getpriority_args
*uap
)
101 switch (uap
->which
) {
105 low
= td
->td_proc
->p_nice
;
110 if (p_cansee(td
, p
) == 0)
117 sx_slock(&proctree_lock
);
119 pg
= td
->td_proc
->p_pgrp
;
122 pg
= pgfind(uap
->who
);
124 sx_sunlock(&proctree_lock
);
128 sx_sunlock(&proctree_lock
);
129 LIST_FOREACH(p
, &pg
->pg_members
, p_pglist
) {
131 if (p
->p_state
== PRS_NORMAL
&&
132 p_cansee(td
, p
) == 0) {
143 uap
->who
= td
->td_ucred
->cr_uid
;
144 sx_slock(&allproc_lock
);
145 FOREACH_PROC_IN_SYSTEM(p
) {
147 if (p
->p_state
== PRS_NORMAL
&&
148 p_cansee(td
, p
) == 0 &&
149 p
->p_ucred
->cr_uid
== uap
->who
) {
155 sx_sunlock(&allproc_lock
);
162 if (low
== PRIO_MAX
+ 1 && error
== 0)
164 td
->td_retval
[0] = low
;
168 #ifndef _SYS_SYSPROTO_H_
169 struct setpriority_args
{
176 sys_setpriority(struct thread
*td
, struct setpriority_args
*uap
)
178 struct proc
*curp
, *p
;
180 int found
= 0, error
= 0;
183 switch (uap
->which
) {
187 error
= donice(td
, curp
, uap
->prio
);
193 error
= p_cansee(td
, p
);
195 error
= donice(td
, p
, uap
->prio
);
202 sx_slock(&proctree_lock
);
207 pg
= pgfind(uap
->who
);
209 sx_sunlock(&proctree_lock
);
213 sx_sunlock(&proctree_lock
);
214 LIST_FOREACH(p
, &pg
->pg_members
, p_pglist
) {
216 if (p
->p_state
== PRS_NORMAL
&&
217 p_cansee(td
, p
) == 0) {
218 error
= donice(td
, p
, uap
->prio
);
228 uap
->who
= td
->td_ucred
->cr_uid
;
229 sx_slock(&allproc_lock
);
230 FOREACH_PROC_IN_SYSTEM(p
) {
232 if (p
->p_state
== PRS_NORMAL
&&
233 p
->p_ucred
->cr_uid
== uap
->who
&&
234 p_cansee(td
, p
) == 0) {
235 error
= donice(td
, p
, uap
->prio
);
240 sx_sunlock(&allproc_lock
);
247 if (found
== 0 && error
== 0)
253 * Set "nice" for a (whole) process.
256 donice(struct thread
*td
, struct proc
*p
, int n
)
260 PROC_LOCK_ASSERT(p
, MA_OWNED
);
261 if ((error
= p_cansched(td
, p
)))
267 if (n
< p
->p_nice
&& priv_check(td
, PRIV_SCHED_SETPRIORITY
) != 0)
273 static int unprivileged_idprio
;
274 SYSCTL_INT(_security_bsd
, OID_AUTO
, unprivileged_idprio
, CTLFLAG_RW
,
275 &unprivileged_idprio
, 0, "Allow non-root users to set an idle priority");
278 * Set realtime priority for LWP.
280 #ifndef _SYS_SYSPROTO_H_
281 struct rtprio_thread_args
{
288 sys_rtprio_thread(struct thread
*td
, struct rtprio_thread_args
*uap
)
295 /* Perform copyin before acquiring locks if needed. */
296 if (uap
->function
== RTP_SET
)
297 cierror
= copyin(uap
->rtp
, &rtp
, sizeof(struct rtprio
));
301 if (uap
->lwpid
== 0 || uap
->lwpid
== td
->td_tid
) {
306 /* Only look up thread in current process */
307 td1
= tdfind(uap
->lwpid
, curproc
->p_pid
);
313 switch (uap
->function
) {
315 if ((error
= p_cansee(td
, p
)))
317 pri_to_rtp(td1
, &rtp
);
319 return (copyout(&rtp
, uap
->rtp
, sizeof(struct rtprio
)));
321 if ((error
= p_cansched(td
, p
)) || (error
= cierror
))
324 /* Disallow setting rtprio in most cases if not superuser. */
327 * Realtime priority has to be restricted for reasons which
328 * should be obvious. However, for idleprio processes, there is
329 * a potential for system deadlock if an idleprio process gains
330 * a lock on a resource that other processes need (and the
331 * idleprio process can't run due to a CPU-bound normal
332 * process). Fix me! XXX
334 * This problem is not only related to idleprio process.
335 * A user level program can obtain a file lock and hold it
336 * indefinitely. Additionally, without idleprio processes it is
337 * still conceivable that a program with low priority will never
338 * get to run. In short, allowing this feature might make it
339 * easier to lock a resource indefinitely, but it is not the
340 * only thing that makes it possible.
342 if (RTP_PRIO_BASE(rtp
.type
) == RTP_PRIO_REALTIME
||
343 (RTP_PRIO_BASE(rtp
.type
) == RTP_PRIO_IDLE
&&
344 unprivileged_idprio
== 0)) {
345 error
= priv_check(td
, PRIV_SCHED_RTPRIO
);
349 error
= rtp_to_pri(&rtp
, td1
);
360 * Set realtime priority.
362 #ifndef _SYS_SYSPROTO_H_
370 sys_rtprio(struct thread
*td
, register struct rtprio_args
*uap
)
377 /* Perform copyin before acquiring locks if needed. */
378 if (uap
->function
== RTP_SET
)
379 cierror
= copyin(uap
->rtp
, &rtp
, sizeof(struct rtprio
));
392 switch (uap
->function
) {
394 if ((error
= p_cansee(td
, p
)))
397 * Return OUR priority if no pid specified,
398 * or if one is, report the highest priority
399 * in the process. There isn't much more you can do as
400 * there is only room to return a single priority.
401 * Note: specifying our own pid is not the same
402 * as leaving it zero.
405 pri_to_rtp(td
, &rtp
);
409 rtp
.type
= RTP_PRIO_IDLE
;
410 rtp
.prio
= RTP_PRIO_MAX
;
411 FOREACH_THREAD_IN_PROC(p
, tdp
) {
412 pri_to_rtp(tdp
, &rtp2
);
413 if (rtp2
.type
< rtp
.type
||
414 (rtp2
.type
== rtp
.type
&&
415 rtp2
.prio
< rtp
.prio
)) {
416 rtp
.type
= rtp2
.type
;
417 rtp
.prio
= rtp2
.prio
;
422 return (copyout(&rtp
, uap
->rtp
, sizeof(struct rtprio
)));
424 if ((error
= p_cansched(td
, p
)) || (error
= cierror
))
428 * Disallow setting rtprio in most cases if not superuser.
429 * See the comment in sys_rtprio_thread about idprio
430 * threads holding a lock.
432 if (RTP_PRIO_BASE(rtp
.type
) == RTP_PRIO_REALTIME
||
433 (RTP_PRIO_BASE(rtp
.type
) == RTP_PRIO_IDLE
&&
434 !unprivileged_idprio
)) {
435 error
= priv_check(td
, PRIV_SCHED_RTPRIO
);
441 * If we are setting our own priority, set just our
442 * thread but if we are doing another process,
443 * do all the threads on that process. If we
444 * specify our own pid we do the latter.
447 error
= rtp_to_pri(&rtp
, td
);
449 FOREACH_THREAD_IN_PROC(p
, td
) {
450 if ((error
= rtp_to_pri(&rtp
, td
)) != 0)
464 rtp_to_pri(struct rtprio
*rtp
, struct thread
*td
)
466 u_char newpri
, oldclass
, oldpri
;
468 switch (RTP_PRIO_BASE(rtp
->type
)) {
469 case RTP_PRIO_REALTIME
:
470 if (rtp
->prio
> RTP_PRIO_MAX
)
472 newpri
= PRI_MIN_REALTIME
+ rtp
->prio
;
474 case RTP_PRIO_NORMAL
:
475 if (rtp
->prio
> (PRI_MAX_TIMESHARE
- PRI_MIN_TIMESHARE
))
477 newpri
= PRI_MIN_TIMESHARE
+ rtp
->prio
;
480 if (rtp
->prio
> RTP_PRIO_MAX
)
482 newpri
= PRI_MIN_IDLE
+ rtp
->prio
;
489 oldclass
= td
->td_pri_class
;
490 sched_class(td
, rtp
->type
); /* XXX fix */
491 oldpri
= td
->td_user_pri
;
492 sched_user_prio(td
, newpri
);
493 if (td
->td_user_pri
!= oldpri
&& (oldclass
!= RTP_PRIO_NORMAL
||
494 td
->td_pri_class
!= RTP_PRIO_NORMAL
))
495 sched_prio(td
, td
->td_user_pri
);
496 if (TD_ON_UPILOCK(td
) && oldpri
!= newpri
) {
499 umtx_pi_adjust(td
, oldpri
);
507 pri_to_rtp(struct thread
*td
, struct rtprio
*rtp
)
511 switch (PRI_BASE(td
->td_pri_class
)) {
513 rtp
->prio
= td
->td_base_user_pri
- PRI_MIN_REALTIME
;
516 rtp
->prio
= td
->td_base_user_pri
- PRI_MIN_TIMESHARE
;
519 rtp
->prio
= td
->td_base_user_pri
- PRI_MIN_IDLE
;
524 rtp
->type
= td
->td_pri_class
;
528 #if defined(COMPAT_43)
529 #ifndef _SYS_SYSPROTO_H_
530 struct osetrlimit_args
{
536 osetrlimit(struct thread
*td
, register struct osetrlimit_args
*uap
)
542 if ((error
= copyin(uap
->rlp
, &olim
, sizeof(struct orlimit
))))
544 lim
.rlim_cur
= olim
.rlim_cur
;
545 lim
.rlim_max
= olim
.rlim_max
;
546 error
= kern_setrlimit(td
, uap
->which
, &lim
);
550 #ifndef _SYS_SYSPROTO_H_
551 struct ogetrlimit_args
{
557 ogetrlimit(struct thread
*td
, register struct ogetrlimit_args
*uap
)
563 if (uap
->which
>= RLIM_NLIMITS
)
565 lim_rlimit(td
, uap
->which
, &rl
);
568 * XXX would be more correct to convert only RLIM_INFINITY to the
569 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
570 * values. Most 64->32 and 32->16 conversions, including not
571 * unimportant ones of uids are even more broken than what we
572 * do here (they blindly truncate). We don't do this correctly
573 * here since we have little experience with EOVERFLOW yet.
574 * Elsewhere, getuid() can't fail...
576 olim
.rlim_cur
= rl
.rlim_cur
> 0x7fffffff ? 0x7fffffff : rl
.rlim_cur
;
577 olim
.rlim_max
= rl
.rlim_max
> 0x7fffffff ? 0x7fffffff : rl
.rlim_max
;
578 error
= copyout(&olim
, uap
->rlp
, sizeof(olim
));
581 #endif /* COMPAT_43 */
583 #ifndef _SYS_SYSPROTO_H_
584 struct __setrlimit_args
{
590 sys_setrlimit(struct thread
*td
, register struct __setrlimit_args
*uap
)
595 if ((error
= copyin(uap
->rlp
, &alim
, sizeof(struct rlimit
))))
597 error
= kern_setrlimit(td
, uap
->which
, &alim
);
609 PROC_LOCK_ASSERT(p
, MA_OWNED
);
611 * Check if the process exceeds its cpu resource allocation. If
612 * it reaches the max, arrange to kill the process in ast().
614 if (p
->p_cpulimit
== RLIM_INFINITY
)
617 FOREACH_THREAD_IN_PROC(p
, td
) {
621 if (p
->p_rux
.rux_runtime
> p
->p_cpulimit
* cpu_tickrate()) {
622 lim_rlimit_proc(p
, RLIMIT_CPU
, &rlim
);
623 if (p
->p_rux
.rux_runtime
>= rlim
.rlim_max
* cpu_tickrate()) {
624 killproc(p
, "exceeded maximum CPU limit");
626 if (p
->p_cpulimit
< rlim
.rlim_max
)
628 kern_psignal(p
, SIGXCPU
);
631 if ((p
->p_flag
& P_WEXIT
) == 0)
632 callout_reset_sbt(&p
->p_limco
, SBT_1S
, 0,
633 lim_cb
, p
, C_PREL(1));
637 kern_setrlimit(struct thread
*td
, u_int which
, struct rlimit
*limp
)
640 return (kern_proc_setrlimit(td
, td
->td_proc
, which
, limp
));
644 kern_proc_setrlimit(struct thread
*td
, struct proc
*p
, u_int which
,
647 struct plimit
*newlim
, *oldlim
;
648 register struct rlimit
*alimp
;
649 struct rlimit oldssiz
;
652 if (which
>= RLIM_NLIMITS
)
656 * Preserve historical bugs by treating negative limits as unsigned.
658 if (limp
->rlim_cur
< 0)
659 limp
->rlim_cur
= RLIM_INFINITY
;
660 if (limp
->rlim_max
< 0)
661 limp
->rlim_max
= RLIM_INFINITY
;
663 oldssiz
.rlim_cur
= 0;
664 newlim
= lim_alloc();
667 alimp
= &oldlim
->pl_rlimit
[which
];
668 if (limp
->rlim_cur
> alimp
->rlim_max
||
669 limp
->rlim_max
> alimp
->rlim_max
)
670 if ((error
= priv_check(td
, PRIV_PROC_SETRLIMIT
))) {
675 if (limp
->rlim_cur
> limp
->rlim_max
)
676 limp
->rlim_cur
= limp
->rlim_max
;
677 lim_copy(newlim
, oldlim
);
678 alimp
= &newlim
->pl_rlimit
[which
];
683 if (limp
->rlim_cur
!= RLIM_INFINITY
&&
684 p
->p_cpulimit
== RLIM_INFINITY
)
685 callout_reset_sbt(&p
->p_limco
, SBT_1S
, 0,
686 lim_cb
, p
, C_PREL(1));
687 p
->p_cpulimit
= limp
->rlim_cur
;
690 if (limp
->rlim_cur
> maxdsiz
)
691 limp
->rlim_cur
= maxdsiz
;
692 if (limp
->rlim_max
> maxdsiz
)
693 limp
->rlim_max
= maxdsiz
;
697 if (limp
->rlim_cur
> maxssiz
)
698 limp
->rlim_cur
= maxssiz
;
699 if (limp
->rlim_max
> maxssiz
)
700 limp
->rlim_max
= maxssiz
;
702 if (p
->p_sysent
->sv_fixlimit
!= NULL
)
703 p
->p_sysent
->sv_fixlimit(&oldssiz
,
708 if (limp
->rlim_cur
> maxfilesperproc
)
709 limp
->rlim_cur
= maxfilesperproc
;
710 if (limp
->rlim_max
> maxfilesperproc
)
711 limp
->rlim_max
= maxfilesperproc
;
715 if (limp
->rlim_cur
> maxprocperuid
)
716 limp
->rlim_cur
= maxprocperuid
;
717 if (limp
->rlim_max
> maxprocperuid
)
718 limp
->rlim_max
= maxprocperuid
;
719 if (limp
->rlim_cur
< 1)
721 if (limp
->rlim_max
< 1)
725 if (p
->p_sysent
->sv_fixlimit
!= NULL
)
726 p
->p_sysent
->sv_fixlimit(limp
, which
);
733 if (which
== RLIMIT_STACK
&&
735 * Skip calls from exec_new_vmspace(), done when stack is
738 (td
!= curthread
|| (p
->p_flag
& P_INEXEC
) == 0)) {
740 * Stack is allocated to the max at exec time with only
741 * "rlim_cur" bytes accessible. If stack limit is going
742 * up make more accessible, if going down make inaccessible.
744 if (limp
->rlim_cur
!= oldssiz
.rlim_cur
) {
749 if (limp
->rlim_cur
> oldssiz
.rlim_cur
) {
750 prot
= p
->p_sysent
->sv_stackprot
;
751 size
= limp
->rlim_cur
- oldssiz
.rlim_cur
;
752 addr
= p
->p_sysent
->sv_usrstack
-
756 size
= oldssiz
.rlim_cur
- limp
->rlim_cur
;
757 addr
= p
->p_sysent
->sv_usrstack
-
760 addr
= trunc_page(addr
);
761 size
= round_page(size
);
762 (void)vm_map_protect(&p
->p_vmspace
->vm_map
,
763 addr
, addr
+ size
, prot
, FALSE
);
770 #ifndef _SYS_SYSPROTO_H_
771 struct __getrlimit_args
{
778 sys_getrlimit(struct thread
*td
, register struct __getrlimit_args
*uap
)
783 if (uap
->which
>= RLIM_NLIMITS
)
785 lim_rlimit(td
, uap
->which
, &rlim
);
786 error
= copyout(&rlim
, uap
->rlp
, sizeof(struct rlimit
));
791 * Transform the running time and tick information for children of proc p
792 * into user and system time usage.
795 calccru(struct proc
*p
, struct timeval
*up
, struct timeval
*sp
)
798 PROC_LOCK_ASSERT(p
, MA_OWNED
);
799 calcru1(p
, &p
->p_crux
, up
, sp
);
803 * Transform the running time and tick information in proc p into user
804 * and system time usage. If appropriate, include the current time slice
808 calcru(struct proc
*p
, struct timeval
*up
, struct timeval
*sp
)
813 PROC_LOCK_ASSERT(p
, MA_OWNED
);
814 PROC_STATLOCK_ASSERT(p
, MA_OWNED
);
816 * If we are getting stats for the current process, then add in the
817 * stats that this thread has accumulated in its current time slice.
818 * We reset the thread and CPU state as if we had performed a context
822 if (td
->td_proc
== p
) {
824 runtime
= u
- PCPU_GET(switchtime
);
825 td
->td_runtime
+= runtime
;
826 td
->td_incruntime
+= runtime
;
827 PCPU_SET(switchtime
, u
);
829 /* Make sure the per-thread stats are current. */
830 FOREACH_THREAD_IN_PROC(p
, td
) {
831 if (td
->td_incruntime
== 0)
835 calcru1(p
, &p
->p_rux
, up
, sp
);
838 /* Collect resource usage for a single thread. */
840 rufetchtd(struct thread
*td
, struct rusage
*ru
)
846 PROC_STATLOCK_ASSERT(p
, MA_OWNED
);
847 THREAD_LOCK_ASSERT(td
, MA_OWNED
);
849 * If we are getting stats for the current thread, then add in the
850 * stats that this thread has accumulated in its current time slice.
851 * We reset the thread and CPU state as if we had performed a context
854 if (td
== curthread
) {
856 runtime
= u
- PCPU_GET(switchtime
);
857 td
->td_runtime
+= runtime
;
858 td
->td_incruntime
+= runtime
;
859 PCPU_SET(switchtime
, u
);
863 calcru1(p
, &td
->td_rux
, &ru
->ru_utime
, &ru
->ru_stime
);
867 calcru1(struct proc
*p
, struct rusage_ext
*ruxp
, struct timeval
*up
,
870 /* {user, system, interrupt, total} {ticks, usec}: */
871 uint64_t ut
, uu
, st
, su
, it
, tt
, tu
;
873 ut
= ruxp
->rux_uticks
;
874 st
= ruxp
->rux_sticks
;
875 it
= ruxp
->rux_iticks
;
878 /* Avoid divide by zero */
882 tu
= cputick2usec(ruxp
->rux_runtime
);
883 if ((int64_t)tu
< 0) {
884 /* XXX: this should be an assert /phk */
885 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
886 (intmax_t)tu
, p
->p_pid
, p
->p_comm
);
890 if (tu
>= ruxp
->rux_tu
) {
892 * The normal case, time increased.
893 * Enforce monotonicity of bucketed numbers.
896 if (uu
< ruxp
->rux_uu
)
899 if (su
< ruxp
->rux_su
)
901 } else if (tu
+ 3 > ruxp
->rux_tu
|| 101 * tu
> 100 * ruxp
->rux_tu
) {
903 * When we calibrate the cputicker, it is not uncommon to
904 * see the presumably fixed frequency increase slightly over
905 * time as a result of thermal stabilization and NTP
906 * discipline (of the reference clock). We therefore ignore
907 * a bit of backwards slop because we expect to catch up
908 * shortly. We use a 3 microsecond limit to catch low
909 * counts and a 1% limit for high counts.
914 } else { /* tu < ruxp->rux_tu */
916 * What happened here was likely that a laptop, which ran at
917 * a reduced clock frequency at boot, kicked into high gear.
918 * The wisdom of spamming this message in that case is
919 * dubious, but it might also be indicative of something
920 * serious, so lets keep it and hope laptops can be made
921 * more truthful about their CPU speed via ACPI.
923 printf("calcru: runtime went backwards from %ju usec "
924 "to %ju usec for pid %d (%s)\n",
925 (uintmax_t)ruxp
->rux_tu
, (uintmax_t)tu
,
926 p
->p_pid
, p
->p_comm
);
935 up
->tv_sec
= uu
/ 1000000;
936 up
->tv_usec
= uu
% 1000000;
937 sp
->tv_sec
= su
/ 1000000;
938 sp
->tv_usec
= su
% 1000000;
941 #ifndef _SYS_SYSPROTO_H_
942 struct getrusage_args
{
944 struct rusage
*rusage
;
948 sys_getrusage(register struct thread
*td
, register struct getrusage_args
*uap
)
953 error
= kern_getrusage(td
, uap
->who
, &ru
);
955 error
= copyout(&ru
, uap
->rusage
, sizeof(struct rusage
));
960 kern_getrusage(struct thread
*td
, int who
, struct rusage
*rup
)
970 rufetchcalc(p
, rup
, &rup
->ru_utime
,
974 case RUSAGE_CHILDREN
:
975 *rup
= p
->p_stats
->p_cru
;
976 calccru(p
, &rup
->ru_utime
, &rup
->ru_stime
);
995 rucollect(struct rusage
*ru
, struct rusage
*ru2
)
1000 if (ru
->ru_maxrss
< ru2
->ru_maxrss
)
1001 ru
->ru_maxrss
= ru2
->ru_maxrss
;
1003 ip2
= &ru2
->ru_first
;
1004 for (i
= &ru
->ru_last
- &ru
->ru_first
; i
>= 0; i
--)
1009 ruadd(struct rusage
*ru
, struct rusage_ext
*rux
, struct rusage
*ru2
,
1010 struct rusage_ext
*rux2
)
1013 rux
->rux_runtime
+= rux2
->rux_runtime
;
1014 rux
->rux_uticks
+= rux2
->rux_uticks
;
1015 rux
->rux_sticks
+= rux2
->rux_sticks
;
1016 rux
->rux_iticks
+= rux2
->rux_iticks
;
1017 rux
->rux_uu
+= rux2
->rux_uu
;
1018 rux
->rux_su
+= rux2
->rux_su
;
1019 rux
->rux_tu
+= rux2
->rux_tu
;
1024 * Aggregate tick counts into the proc's rusage_ext.
1027 ruxagg_locked(struct rusage_ext
*rux
, struct thread
*td
)
1030 THREAD_LOCK_ASSERT(td
, MA_OWNED
);
1031 PROC_STATLOCK_ASSERT(td
->td_proc
, MA_OWNED
);
1032 rux
->rux_runtime
+= td
->td_incruntime
;
1033 rux
->rux_uticks
+= td
->td_uticks
;
1034 rux
->rux_sticks
+= td
->td_sticks
;
1035 rux
->rux_iticks
+= td
->td_iticks
;
1039 ruxagg(struct proc
*p
, struct thread
*td
)
1043 ruxagg_locked(&p
->p_rux
, td
);
1044 ruxagg_locked(&td
->td_rux
, td
);
1045 td
->td_incruntime
= 0;
1053 * Update the rusage_ext structure and fetch a valid aggregate rusage
1054 * for proc p if storage for one is supplied.
1057 rufetch(struct proc
*p
, struct rusage
*ru
)
1061 PROC_STATLOCK_ASSERT(p
, MA_OWNED
);
1064 if (p
->p_numthreads
> 0) {
1065 FOREACH_THREAD_IN_PROC(p
, td
) {
1067 rucollect(ru
, &td
->td_ru
);
1073 * Atomically perform a rufetch and a calcru together.
1074 * Consumers, can safely assume the calcru is executed only once
1075 * rufetch is completed.
1078 rufetchcalc(struct proc
*p
, struct rusage
*ru
, struct timeval
*up
,
1089 * Allocate a new resource limits structure and initialize its
1090 * reference count and mutex pointer.
1095 struct plimit
*limp
;
1097 limp
= malloc(sizeof(struct plimit
), M_PLIMIT
, M_WAITOK
);
1098 refcount_init(&limp
->pl_refcnt
, 1);
1103 lim_hold(struct plimit
*limp
)
1106 refcount_acquire(&limp
->pl_refcnt
);
1111 lim_fork(struct proc
*p1
, struct proc
*p2
)
1114 PROC_LOCK_ASSERT(p1
, MA_OWNED
);
1115 PROC_LOCK_ASSERT(p2
, MA_OWNED
);
1117 p2
->p_limit
= lim_hold(p1
->p_limit
);
1118 callout_init_mtx(&p2
->p_limco
, &p2
->p_mtx
, 0);
1119 if (p1
->p_cpulimit
!= RLIM_INFINITY
)
1120 callout_reset_sbt(&p2
->p_limco
, SBT_1S
, 0,
1121 lim_cb
, p2
, C_PREL(1));
1125 lim_free(struct plimit
*limp
)
1128 if (refcount_release(&limp
->pl_refcnt
))
1129 free((void *)limp
, M_PLIMIT
);
1133 * Make a copy of the plimit structure.
1134 * We share these structures copy-on-write after fork.
1137 lim_copy(struct plimit
*dst
, struct plimit
*src
)
1140 KASSERT(dst
->pl_refcnt
<= 1, ("lim_copy to shared limit"));
1141 bcopy(src
->pl_rlimit
, dst
->pl_rlimit
, sizeof(src
->pl_rlimit
));
1145 * Return the hard limit for a particular system resource. The
1146 * which parameter specifies the index into the rlimit array.
1149 lim_max(struct thread
*td
, int which
)
1153 lim_rlimit(td
, which
, &rl
);
1154 return (rl
.rlim_max
);
1158 lim_max_proc(struct proc
*p
, int which
)
1162 lim_rlimit_proc(p
, which
, &rl
);
1163 return (rl
.rlim_max
);
1167 * Return the current (soft) limit for a particular system resource.
1168 * The which parameter which specifies the index into the rlimit array
1171 lim_cur(struct thread
*td
, int which
)
1175 lim_rlimit(td
, which
, &rl
);
1176 return (rl
.rlim_cur
);
1180 lim_cur_proc(struct proc
*p
, int which
)
1184 lim_rlimit_proc(p
, which
, &rl
);
1185 return (rl
.rlim_cur
);
1189 * Return a copy of the entire rlimit structure for the system limit
1190 * specified by 'which' in the rlimit structure pointed to by 'rlp'.
1193 lim_rlimit(struct thread
*td
, int which
, struct rlimit
*rlp
)
1195 struct proc
*p
= td
->td_proc
;
1197 MPASS(td
== curthread
);
1198 KASSERT(which
>= 0 && which
< RLIM_NLIMITS
,
1199 ("request for invalid resource limit"));
1200 *rlp
= td
->td_limit
->pl_rlimit
[which
];
1201 if (p
->p_sysent
->sv_fixlimit
!= NULL
)
1202 p
->p_sysent
->sv_fixlimit(rlp
, which
);
1206 lim_rlimit_proc(struct proc
*p
, int which
, struct rlimit
*rlp
)
1209 PROC_LOCK_ASSERT(p
, MA_OWNED
);
1210 KASSERT(which
>= 0 && which
< RLIM_NLIMITS
,
1211 ("request for invalid resource limit"));
1212 *rlp
= p
->p_limit
->pl_rlimit
[which
];
1213 if (p
->p_sysent
->sv_fixlimit
!= NULL
)
1214 p
->p_sysent
->sv_fixlimit(rlp
, which
);
1221 uihashtbl
= hashinit(maxproc
/ 16, M_UIDINFO
, &uihash
);
1222 rw_init(&uihashtbl_lock
, "uidinfo hash");
1226 * Look up a uidinfo struct for the parameter uid.
1227 * uihashtbl_lock must be locked.
1228 * Increase refcount on uidinfo struct returned.
1230 static struct uidinfo
*
1233 struct uihashhead
*uipp
;
1234 struct uidinfo
*uip
;
1236 rw_assert(&uihashtbl_lock
, RA_LOCKED
);
1238 LIST_FOREACH(uip
, uipp
, ui_hash
)
1239 if (uip
->ui_uid
== uid
) {
1248 * Find or allocate a struct uidinfo for a particular uid.
1249 * Returns with uidinfo struct referenced.
1250 * uifree() should be called on a struct uidinfo when released.
1255 struct uidinfo
*new_uip
, *uip
;
1257 rw_rlock(&uihashtbl_lock
);
1258 uip
= uilookup(uid
);
1259 rw_runlock(&uihashtbl_lock
);
1263 new_uip
= malloc(sizeof(*new_uip
), M_UIDINFO
, M_WAITOK
| M_ZERO
);
1264 racct_create(&new_uip
->ui_racct
);
1265 refcount_init(&new_uip
->ui_ref
, 1);
1266 new_uip
->ui_uid
= uid
;
1267 mtx_init(&new_uip
->ui_vmsize_mtx
, "ui_vmsize", NULL
, MTX_DEF
);
1269 rw_wlock(&uihashtbl_lock
);
1271 * There's a chance someone created our uidinfo while we
1272 * were in malloc and not holding the lock, so we have to
1273 * make sure we don't insert a duplicate uidinfo.
1275 if ((uip
= uilookup(uid
)) == NULL
) {
1276 LIST_INSERT_HEAD(UIHASH(uid
), new_uip
, ui_hash
);
1277 rw_wunlock(&uihashtbl_lock
);
1280 rw_wunlock(&uihashtbl_lock
);
1281 racct_destroy(&new_uip
->ui_racct
);
1282 mtx_destroy(&new_uip
->ui_vmsize_mtx
);
1283 free(new_uip
, M_UIDINFO
);
1289 * Place another refcount on a uidinfo struct.
1292 uihold(struct uidinfo
*uip
)
1295 refcount_acquire(&uip
->ui_ref
);
1299 * Since uidinfo structs have a long lifetime, we use an
1300 * opportunistic refcounting scheme to avoid locking the lookup hash
1303 * If the refcount hits 0, we need to free the structure,
1304 * which means we need to lock the hash.
1306 * After locking the struct and lowering the refcount, if we find
1307 * that we don't need to free, simply unlock and return.
1309 * If refcount lowering results in need to free, bump the count
1310 * back up, lose the lock and acquire the locks in the proper
1311 * order to try again.
1314 uifree(struct uidinfo
*uip
)
1318 /* Prepare for optimal case. */
1320 if (old
> 1 && atomic_cmpset_int(&uip
->ui_ref
, old
, old
- 1))
1323 /* Prepare for suboptimal case. */
1324 rw_wlock(&uihashtbl_lock
);
1325 if (refcount_release(&uip
->ui_ref
) == 0) {
1326 rw_wunlock(&uihashtbl_lock
);
1330 racct_destroy(&uip
->ui_racct
);
1331 LIST_REMOVE(uip
, ui_hash
);
1332 rw_wunlock(&uihashtbl_lock
);
1334 if (uip
->ui_sbsize
!= 0)
1335 printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
1336 uip
->ui_uid
, uip
->ui_sbsize
);
1337 if (uip
->ui_proccnt
!= 0)
1338 printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1339 uip
->ui_uid
, uip
->ui_proccnt
);
1340 if (uip
->ui_vmsize
!= 0)
1341 printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
1342 uip
->ui_uid
, (unsigned long long)uip
->ui_vmsize
);
1343 mtx_destroy(&uip
->ui_vmsize_mtx
);
1344 free(uip
, M_UIDINFO
);
1349 ui_racct_foreach(void (*callback
)(struct racct
*racct
,
1350 void *arg2
, void *arg3
), void (*pre
)(void), void (*post
)(void),
1351 void *arg2
, void *arg3
)
1353 struct uidinfo
*uip
;
1354 struct uihashhead
*uih
;
1356 rw_rlock(&uihashtbl_lock
);
1359 for (uih
= &uihashtbl
[uihash
]; uih
>= uihashtbl
; uih
--) {
1360 LIST_FOREACH(uip
, uih
, ui_hash
) {
1361 (callback
)(uip
->ui_racct
, arg2
, arg3
);
1366 rw_runlock(&uihashtbl_lock
);
1371 chglimit(struct uidinfo
*uip
, long *limit
, int diff
, rlim_t max
, const char *name
)
1374 /* Don't allow them to exceed max, but allow subtraction. */
1375 if (diff
> 0 && max
!= 0) {
1376 if (atomic_fetchadd_long(limit
, (long)diff
) + diff
> max
) {
1377 atomic_subtract_long(limit
, (long)diff
);
1381 atomic_add_long(limit
, (long)diff
);
1383 printf("negative %s for uid = %d\n", name
, uip
->ui_uid
);
1389 * Change the count associated with number of processes
1390 * a given user is using. When 'max' is 0, don't enforce a limit
1393 chgproccnt(struct uidinfo
*uip
, int diff
, rlim_t max
)
1396 return (chglimit(uip
, &uip
->ui_proccnt
, diff
, max
, "proccnt"));
1400 * Change the total socket buffer size a user has used.
1403 chgsbsize(struct uidinfo
*uip
, u_int
*hiwat
, u_int to
, rlim_t max
)
1408 if (diff
> 0 && max
== 0) {
1411 rv
= chglimit(uip
, &uip
->ui_sbsize
, diff
, max
, "sbsize");
1419 * Change the count associated with number of pseudo-terminals
1420 * a given user is using. When 'max' is 0, don't enforce a limit
1423 chgptscnt(struct uidinfo
*uip
, int diff
, rlim_t max
)
1426 return (chglimit(uip
, &uip
->ui_ptscnt
, diff
, max
, "ptscnt"));
1430 chgkqcnt(struct uidinfo
*uip
, int diff
, rlim_t max
)
1433 return (chglimit(uip
, &uip
->ui_kqcnt
, diff
, max
, "kqcnt"));