2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
40 #include "opt_compat.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/sysproto.h>
46 #include <sys/kernel.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
52 #include <sys/refcount.h>
53 #include <sys/resourcevar.h>
54 #include <sys/rwlock.h>
55 #include <sys/sched.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysent.h>
63 #include <vm/vm_param.h>
65 #include <vm/vm_map.h>
68 static MALLOC_DEFINE(M_PLIMIT
, "plimit", "plimit structures");
69 static MALLOC_DEFINE(M_UIDINFO
, "uidinfo", "uidinfo structures");
70 #define UIHASH(uid) (&uihashtbl[(uid) & uihash])
71 static struct rwlock uihashtbl_lock
;
72 static LIST_HEAD(uihashhead
, uidinfo
) *uihashtbl
;
73 static u_long uihash
; /* size of hash table - 1 */
75 static void calcru1(struct proc
*p
, struct rusage_ext
*ruxp
,
76 struct timeval
*up
, struct timeval
*sp
);
77 static int donice(struct thread
*td
, struct proc
*chgp
, int n
);
78 static struct uidinfo
*uilookup(uid_t uid
);
81 * Resource controls and accounting.
83 #ifndef _SYS_SYSPROTO_H_
84 struct getpriority_args
{
92 register struct getpriority_args
*uap
;
100 switch (uap
->which
) {
104 low
= td
->td_proc
->p_nice
;
109 if (p_cansee(td
, p
) == 0)
116 sx_slock(&proctree_lock
);
118 pg
= td
->td_proc
->p_pgrp
;
121 pg
= pgfind(uap
->who
);
123 sx_sunlock(&proctree_lock
);
127 sx_sunlock(&proctree_lock
);
128 LIST_FOREACH(p
, &pg
->pg_members
, p_pglist
) {
130 if (p_cansee(td
, p
) == 0) {
141 uap
->who
= td
->td_ucred
->cr_uid
;
142 sx_slock(&allproc_lock
);
143 FOREACH_PROC_IN_SYSTEM(p
) {
144 /* Do not bother to check PRS_NEW processes */
145 if (p
->p_state
== PRS_NEW
)
148 if (p_cansee(td
, p
) == 0 &&
149 p
->p_ucred
->cr_uid
== uap
->who
) {
155 sx_sunlock(&allproc_lock
);
162 if (low
== PRIO_MAX
+ 1 && error
== 0)
164 td
->td_retval
[0] = low
;
168 #ifndef _SYS_SYSPROTO_H_
169 struct setpriority_args
{
178 struct setpriority_args
*uap
;
180 struct proc
*curp
, *p
;
182 int found
= 0, error
= 0;
185 switch (uap
->which
) {
189 error
= donice(td
, curp
, uap
->prio
);
195 error
= p_cansee(td
, p
);
197 error
= donice(td
, p
, uap
->prio
);
204 sx_slock(&proctree_lock
);
209 pg
= pgfind(uap
->who
);
211 sx_sunlock(&proctree_lock
);
215 sx_sunlock(&proctree_lock
);
216 LIST_FOREACH(p
, &pg
->pg_members
, p_pglist
) {
218 if (p_cansee(td
, p
) == 0) {
219 error
= donice(td
, p
, uap
->prio
);
229 uap
->who
= td
->td_ucred
->cr_uid
;
230 sx_slock(&allproc_lock
);
231 FOREACH_PROC_IN_SYSTEM(p
) {
233 if (p
->p_ucred
->cr_uid
== uap
->who
&&
234 p_cansee(td
, p
) == 0) {
235 error
= donice(td
, p
, uap
->prio
);
240 sx_sunlock(&allproc_lock
);
247 if (found
== 0 && error
== 0)
253 * Set "nice" for a (whole) process.
256 donice(struct thread
*td
, struct proc
*p
, int n
)
260 PROC_LOCK_ASSERT(p
, MA_OWNED
);
261 if ((error
= p_cansched(td
, p
)))
267 if (n
< p
->p_nice
&& priv_check(td
, PRIV_SCHED_SETPRIORITY
) != 0)
274 * Set realtime priority for LWP.
276 #ifndef _SYS_SYSPROTO_H_
277 struct rtprio_thread_args
{
284 rtprio_thread(struct thread
*td
, struct rtprio_thread_args
*uap
)
291 /* Perform copyin before acquiring locks if needed. */
292 if (uap
->function
== RTP_SET
)
293 cierror
= copyin(uap
->rtp
, &rtp
, sizeof(struct rtprio
));
298 * Though lwpid is unique, only current process is supported
299 * since there is no efficient way to look up a LWP yet.
304 switch (uap
->function
) {
306 if ((error
= p_cansee(td
, p
)))
308 if (uap
->lwpid
== 0 || uap
->lwpid
== td
->td_tid
)
311 td1
= thread_find(p
, uap
->lwpid
);
313 pri_to_rtp(td1
, &rtp
);
317 return (copyout(&rtp
, uap
->rtp
, sizeof(struct rtprio
)));
319 if ((error
= p_cansched(td
, p
)) || (error
= cierror
))
322 /* Disallow setting rtprio in most cases if not superuser. */
324 * Realtime priority has to be restricted for reasons which should be
325 * obvious. However, for idle priority, there is a potential for
326 * system deadlock if an idleprio process gains a lock on a resource
327 * that other processes need (and the idleprio process can't run
328 * due to a CPU-bound normal process). Fix me! XXX
331 if (RTP_PRIO_IS_REALTIME(rtp
.type
)) {
333 if (rtp
.type
!= RTP_PRIO_NORMAL
) {
335 error
= priv_check(td
, PRIV_SCHED_RTPRIO
);
340 if (uap
->lwpid
== 0 || uap
->lwpid
== td
->td_tid
)
343 td1
= thread_find(p
, uap
->lwpid
);
345 error
= rtp_to_pri(&rtp
, td1
);
358 * Set realtime priority.
360 #ifndef _SYS_SYSPROTO_H_
369 struct thread
*td
; /* curthread */
370 register struct rtprio_args
*uap
;
377 /* Perform copyin before acquiring locks if needed. */
378 if (uap
->function
== RTP_SET
)
379 cierror
= copyin(uap
->rtp
, &rtp
, sizeof(struct rtprio
));
392 switch (uap
->function
) {
394 if ((error
= p_cansee(td
, p
)))
397 * Return OUR priority if no pid specified,
398 * or if one is, report the highest priority
399 * in the process. There isn't much more you can do as
400 * there is only room to return a single priority.
401 * Note: specifying our own pid is not the same
402 * as leaving it zero.
405 pri_to_rtp(td
, &rtp
);
409 rtp
.type
= RTP_PRIO_IDLE
;
410 rtp
.prio
= RTP_PRIO_MAX
;
411 FOREACH_THREAD_IN_PROC(p
, tdp
) {
412 pri_to_rtp(tdp
, &rtp2
);
413 if (rtp2
.type
< rtp
.type
||
414 (rtp2
.type
== rtp
.type
&&
415 rtp2
.prio
< rtp
.prio
)) {
416 rtp
.type
= rtp2
.type
;
417 rtp
.prio
= rtp2
.prio
;
422 return (copyout(&rtp
, uap
->rtp
, sizeof(struct rtprio
)));
424 if ((error
= p_cansched(td
, p
)) || (error
= cierror
))
427 /* Disallow setting rtprio in most cases if not superuser. */
429 * Realtime priority has to be restricted for reasons which should be
430 * obvious. However, for idle priority, there is a potential for
431 * system deadlock if an idleprio process gains a lock on a resource
432 * that other processes need (and the idleprio process can't run
433 * due to a CPU-bound normal process). Fix me! XXX
436 if (RTP_PRIO_IS_REALTIME(rtp
.type
)) {
438 if (rtp
.type
!= RTP_PRIO_NORMAL
) {
440 error
= priv_check(td
, PRIV_SCHED_RTPRIO
);
446 * If we are setting our own priority, set just our
447 * thread but if we are doing another process,
448 * do all the threads on that process. If we
449 * specify our own pid we do the latter.
452 error
= rtp_to_pri(&rtp
, td
);
454 FOREACH_THREAD_IN_PROC(p
, td
) {
455 if ((error
= rtp_to_pri(&rtp
, td
)) != 0)
469 rtp_to_pri(struct rtprio
*rtp
, struct thread
*td
)
474 if (rtp
->prio
> RTP_PRIO_MAX
)
477 switch (RTP_PRIO_BASE(rtp
->type
)) {
478 case RTP_PRIO_REALTIME
:
479 newpri
= PRI_MIN_REALTIME
+ rtp
->prio
;
481 case RTP_PRIO_NORMAL
:
482 newpri
= PRI_MIN_TIMESHARE
+ rtp
->prio
;
485 newpri
= PRI_MIN_IDLE
+ rtp
->prio
;
491 sched_class(td
, rtp
->type
); /* XXX fix */
492 oldpri
= td
->td_user_pri
;
493 sched_user_prio(td
, newpri
);
495 sched_prio(curthread
, td
->td_user_pri
); /* XXX dubious */
496 if (TD_ON_UPILOCK(td
) && oldpri
!= newpri
) {
498 umtx_pi_adjust(td
, oldpri
);
505 pri_to_rtp(struct thread
*td
, struct rtprio
*rtp
)
509 switch (PRI_BASE(td
->td_pri_class
)) {
511 rtp
->prio
= td
->td_base_user_pri
- PRI_MIN_REALTIME
;
514 rtp
->prio
= td
->td_base_user_pri
- PRI_MIN_TIMESHARE
;
517 rtp
->prio
= td
->td_base_user_pri
- PRI_MIN_IDLE
;
522 rtp
->type
= td
->td_pri_class
;
526 #if defined(COMPAT_43)
527 #ifndef _SYS_SYSPROTO_H_
528 struct osetrlimit_args
{
536 register struct osetrlimit_args
*uap
;
542 if ((error
= copyin(uap
->rlp
, &olim
, sizeof(struct orlimit
))))
544 lim
.rlim_cur
= olim
.rlim_cur
;
545 lim
.rlim_max
= olim
.rlim_max
;
546 error
= kern_setrlimit(td
, uap
->which
, &lim
);
550 #ifndef _SYS_SYSPROTO_H_
551 struct ogetrlimit_args
{
559 register struct ogetrlimit_args
*uap
;
566 if (uap
->which
>= RLIM_NLIMITS
)
570 lim_rlimit(p
, uap
->which
, &rl
);
574 * XXX would be more correct to convert only RLIM_INFINITY to the
575 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
576 * values. Most 64->32 and 32->16 conversions, including not
577 * unimportant ones of uids are even more broken than what we
578 * do here (they blindly truncate). We don't do this correctly
579 * here since we have little experience with EOVERFLOW yet.
580 * Elsewhere, getuid() can't fail...
582 olim
.rlim_cur
= rl
.rlim_cur
> 0x7fffffff ? 0x7fffffff : rl
.rlim_cur
;
583 olim
.rlim_max
= rl
.rlim_max
> 0x7fffffff ? 0x7fffffff : rl
.rlim_max
;
584 error
= copyout(&olim
, uap
->rlp
, sizeof(olim
));
587 #endif /* COMPAT_43 */
589 #ifndef _SYS_SYSPROTO_H_
590 struct __setrlimit_args
{
598 register struct __setrlimit_args
*uap
;
603 if ((error
= copyin(uap
->rlp
, &alim
, sizeof(struct rlimit
))))
605 error
= kern_setrlimit(td
, uap
->which
, &alim
);
617 PROC_LOCK_ASSERT(p
, MA_OWNED
);
619 * Check if the process exceeds its cpu resource allocation. If
620 * it reaches the max, arrange to kill the process in ast().
622 if (p
->p_cpulimit
== RLIM_INFINITY
)
625 FOREACH_THREAD_IN_PROC(p
, td
) {
627 ruxagg(&p
->p_rux
, td
);
631 if (p
->p_rux
.rux_runtime
> p
->p_cpulimit
* cpu_tickrate()) {
632 lim_rlimit(p
, RLIMIT_CPU
, &rlim
);
633 if (p
->p_rux
.rux_runtime
>= rlim
.rlim_max
* cpu_tickrate()) {
634 killproc(p
, "exceeded maximum CPU limit");
636 if (p
->p_cpulimit
< rlim
.rlim_max
)
641 callout_reset(&p
->p_limco
, hz
, lim_cb
, p
);
645 kern_setrlimit(td
, which
, limp
)
650 struct plimit
*newlim
, *oldlim
;
652 register struct rlimit
*alimp
;
653 struct rlimit oldssiz
;
656 if (which
>= RLIM_NLIMITS
)
660 * Preserve historical bugs by treating negative limits as unsigned.
662 if (limp
->rlim_cur
< 0)
663 limp
->rlim_cur
= RLIM_INFINITY
;
664 if (limp
->rlim_max
< 0)
665 limp
->rlim_max
= RLIM_INFINITY
;
667 oldssiz
.rlim_cur
= 0;
669 newlim
= lim_alloc();
672 alimp
= &oldlim
->pl_rlimit
[which
];
673 if (limp
->rlim_cur
> alimp
->rlim_max
||
674 limp
->rlim_max
> alimp
->rlim_max
)
675 if ((error
= priv_check(td
, PRIV_PROC_SETRLIMIT
))) {
680 if (limp
->rlim_cur
> limp
->rlim_max
)
681 limp
->rlim_cur
= limp
->rlim_max
;
682 lim_copy(newlim
, oldlim
);
683 alimp
= &newlim
->pl_rlimit
[which
];
688 if (limp
->rlim_cur
!= RLIM_INFINITY
&&
689 p
->p_cpulimit
== RLIM_INFINITY
)
690 callout_reset(&p
->p_limco
, hz
, lim_cb
, p
);
691 p
->p_cpulimit
= limp
->rlim_cur
;
694 if (limp
->rlim_cur
> maxdsiz
)
695 limp
->rlim_cur
= maxdsiz
;
696 if (limp
->rlim_max
> maxdsiz
)
697 limp
->rlim_max
= maxdsiz
;
701 if (limp
->rlim_cur
> maxssiz
)
702 limp
->rlim_cur
= maxssiz
;
703 if (limp
->rlim_max
> maxssiz
)
704 limp
->rlim_max
= maxssiz
;
706 if (td
->td_proc
->p_sysent
->sv_fixlimit
!= NULL
)
707 td
->td_proc
->p_sysent
->sv_fixlimit(&oldssiz
,
712 if (limp
->rlim_cur
> maxfilesperproc
)
713 limp
->rlim_cur
= maxfilesperproc
;
714 if (limp
->rlim_max
> maxfilesperproc
)
715 limp
->rlim_max
= maxfilesperproc
;
719 if (limp
->rlim_cur
> maxprocperuid
)
720 limp
->rlim_cur
= maxprocperuid
;
721 if (limp
->rlim_max
> maxprocperuid
)
722 limp
->rlim_max
= maxprocperuid
;
723 if (limp
->rlim_cur
< 1)
725 if (limp
->rlim_max
< 1)
729 if (td
->td_proc
->p_sysent
->sv_fixlimit
!= NULL
)
730 td
->td_proc
->p_sysent
->sv_fixlimit(limp
, which
);
736 if (which
== RLIMIT_STACK
) {
738 * Stack is allocated to the max at exec time with only
739 * "rlim_cur" bytes accessible. If stack limit is going
740 * up make more accessible, if going down make inaccessible.
742 if (limp
->rlim_cur
!= oldssiz
.rlim_cur
) {
747 if (limp
->rlim_cur
> oldssiz
.rlim_cur
) {
748 prot
= p
->p_sysent
->sv_stackprot
;
749 size
= limp
->rlim_cur
- oldssiz
.rlim_cur
;
750 addr
= p
->p_sysent
->sv_usrstack
-
754 size
= oldssiz
.rlim_cur
- limp
->rlim_cur
;
755 addr
= p
->p_sysent
->sv_usrstack
-
758 addr
= trunc_page(addr
);
759 size
= round_page(size
);
760 (void)vm_map_protect(&p
->p_vmspace
->vm_map
,
761 addr
, addr
+ size
, prot
, FALSE
);
768 #ifndef _SYS_SYSPROTO_H_
769 struct __getrlimit_args
{
778 register struct __getrlimit_args
*uap
;
784 if (uap
->which
>= RLIM_NLIMITS
)
788 lim_rlimit(p
, uap
->which
, &rlim
);
790 error
= copyout(&rlim
, uap
->rlp
, sizeof(struct rlimit
));
795 * Transform the running time and tick information for children of proc p
796 * into user and system time usage.
805 PROC_LOCK_ASSERT(p
, MA_OWNED
);
806 calcru1(p
, &p
->p_crux
, up
, sp
);
810 * Transform the running time and tick information in proc p into user
811 * and system time usage. If appropriate, include the current time slice
815 calcru(struct proc
*p
, struct timeval
*up
, struct timeval
*sp
)
820 PROC_LOCK_ASSERT(p
, MA_OWNED
);
821 PROC_SLOCK_ASSERT(p
, MA_OWNED
);
823 * If we are getting stats for the current process, then add in the
824 * stats that this thread has accumulated in its current time slice.
825 * We reset the thread and CPU state as if we had performed a context
829 if (td
->td_proc
== p
) {
831 p
->p_rux
.rux_runtime
+= u
- PCPU_GET(switchtime
);
832 PCPU_SET(switchtime
, u
);
834 /* Make sure the per-thread stats are current. */
835 FOREACH_THREAD_IN_PROC(p
, td
) {
836 if (td
->td_incruntime
== 0)
839 ruxagg(&p
->p_rux
, td
);
842 calcru1(p
, &p
->p_rux
, up
, sp
);
846 calcru1(struct proc
*p
, struct rusage_ext
*ruxp
, struct timeval
*up
,
849 /* {user, system, interrupt, total} {ticks, usec}: */
850 u_int64_t ut
, uu
, st
, su
, it
, tt
, tu
;
852 ut
= ruxp
->rux_uticks
;
853 st
= ruxp
->rux_sticks
;
854 it
= ruxp
->rux_iticks
;
857 /* Avoid divide by zero */
861 tu
= cputick2usec(ruxp
->rux_runtime
);
862 if ((int64_t)tu
< 0) {
863 /* XXX: this should be an assert /phk */
864 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
865 (intmax_t)tu
, p
->p_pid
, p
->p_comm
);
869 if (tu
>= ruxp
->rux_tu
) {
871 * The normal case, time increased.
872 * Enforce monotonicity of bucketed numbers.
875 if (uu
< ruxp
->rux_uu
)
878 if (su
< ruxp
->rux_su
)
880 } else if (tu
+ 3 > ruxp
->rux_tu
|| 101 * tu
> 100 * ruxp
->rux_tu
) {
882 * When we calibrate the cputicker, it is not uncommon to
883 * see the presumably fixed frequency increase slightly over
884 * time as a result of thermal stabilization and NTP
885 * discipline (of the reference clock). We therefore ignore
886 * a bit of backwards slop because we expect to catch up
887 * shortly. We use a 3 microsecond limit to catch low
888 * counts and a 1% limit for high counts.
893 } else { /* tu < ruxp->rux_tu */
895 * What happened here was likely that a laptop, which ran at
896 * a reduced clock frequency at boot, kicked into high gear.
897 * The wisdom of spamming this message in that case is
898 * dubious, but it might also be indicative of something
899 * serious, so lets keep it and hope laptops can be made
900 * more truthful about their CPU speed via ACPI.
902 printf("calcru: runtime went backwards from %ju usec "
903 "to %ju usec for pid %d (%s)\n",
904 (uintmax_t)ruxp
->rux_tu
, (uintmax_t)tu
,
905 p
->p_pid
, p
->p_comm
);
914 up
->tv_sec
= uu
/ 1000000;
915 up
->tv_usec
= uu
% 1000000;
916 sp
->tv_sec
= su
/ 1000000;
917 sp
->tv_usec
= su
% 1000000;
920 #ifndef _SYS_SYSPROTO_H_
921 struct getrusage_args
{
923 struct rusage
*rusage
;
928 register struct thread
*td
;
929 register struct getrusage_args
*uap
;
934 error
= kern_getrusage(td
, uap
->who
, &ru
);
936 error
= copyout(&ru
, uap
->rusage
, sizeof(struct rusage
));
941 kern_getrusage(td
, who
, rup
)
954 rufetchcalc(p
, rup
, &rup
->ru_utime
,
958 case RUSAGE_CHILDREN
:
959 *rup
= p
->p_stats
->p_cru
;
960 calccru(p
, &rup
->ru_utime
, &rup
->ru_stime
);
971 rucollect(struct rusage
*ru
, struct rusage
*ru2
)
976 if (ru
->ru_maxrss
< ru2
->ru_maxrss
)
977 ru
->ru_maxrss
= ru2
->ru_maxrss
;
979 ip2
= &ru2
->ru_first
;
980 for (i
= &ru
->ru_last
- &ru
->ru_first
; i
>= 0; i
--)
985 ruadd(struct rusage
*ru
, struct rusage_ext
*rux
, struct rusage
*ru2
,
986 struct rusage_ext
*rux2
)
989 rux
->rux_runtime
+= rux2
->rux_runtime
;
990 rux
->rux_uticks
+= rux2
->rux_uticks
;
991 rux
->rux_sticks
+= rux2
->rux_sticks
;
992 rux
->rux_iticks
+= rux2
->rux_iticks
;
993 rux
->rux_uu
+= rux2
->rux_uu
;
994 rux
->rux_su
+= rux2
->rux_su
;
995 rux
->rux_tu
+= rux2
->rux_tu
;
1000 * Aggregate tick counts into the proc's rusage_ext.
1003 ruxagg(struct rusage_ext
*rux
, struct thread
*td
)
1006 THREAD_LOCK_ASSERT(td
, MA_OWNED
);
1007 PROC_SLOCK_ASSERT(td
->td_proc
, MA_OWNED
);
1008 rux
->rux_runtime
+= td
->td_incruntime
;
1009 rux
->rux_uticks
+= td
->td_uticks
;
1010 rux
->rux_sticks
+= td
->td_sticks
;
1011 rux
->rux_iticks
+= td
->td_iticks
;
1012 td
->td_incruntime
= 0;
1019 * Update the rusage_ext structure and fetch a valid aggregate rusage
1020 * for proc p if storage for one is supplied.
1023 rufetch(struct proc
*p
, struct rusage
*ru
)
1027 PROC_SLOCK_ASSERT(p
, MA_OWNED
);
1030 if (p
->p_numthreads
> 0) {
1031 FOREACH_THREAD_IN_PROC(p
, td
) {
1033 ruxagg(&p
->p_rux
, td
);
1035 rucollect(ru
, &td
->td_ru
);
1041 * Atomically perform a rufetch and a calcru together.
1042 * Consumers, can safely assume the calcru is executed only once
1043 * rufetch is completed.
1046 rufetchcalc(struct proc
*p
, struct rusage
*ru
, struct timeval
*up
,
1057 * Allocate a new resource limits structure and initialize its
1058 * reference count and mutex pointer.
1063 struct plimit
*limp
;
1065 limp
= malloc(sizeof(struct plimit
), M_PLIMIT
, M_WAITOK
);
1066 refcount_init(&limp
->pl_refcnt
, 1);
1072 struct plimit
*limp
;
1075 refcount_acquire(&limp
->pl_refcnt
);
1080 lim_fork(struct proc
*p1
, struct proc
*p2
)
1082 p2
->p_limit
= lim_hold(p1
->p_limit
);
1083 callout_init_mtx(&p2
->p_limco
, &p2
->p_mtx
, 0);
1084 if (p1
->p_cpulimit
!= RLIM_INFINITY
)
1085 callout_reset(&p2
->p_limco
, hz
, lim_cb
, p2
);
1090 struct plimit
*limp
;
1093 KASSERT(limp
->pl_refcnt
> 0, ("plimit refcnt underflow"));
1094 if (refcount_release(&limp
->pl_refcnt
))
1095 free((void *)limp
, M_PLIMIT
);
1099 * Make a copy of the plimit structure.
1100 * We share these structures copy-on-write after fork.
1104 struct plimit
*dst
, *src
;
1107 KASSERT(dst
->pl_refcnt
== 1, ("lim_copy to shared limit"));
1108 bcopy(src
->pl_rlimit
, dst
->pl_rlimit
, sizeof(src
->pl_rlimit
));
1112 * Return the hard limit for a particular system resource. The
1113 * which parameter specifies the index into the rlimit array.
1116 lim_max(struct proc
*p
, int which
)
1120 lim_rlimit(p
, which
, &rl
);
1121 return (rl
.rlim_max
);
1125 * Return the current (soft) limit for a particular system resource.
1126 * The which parameter which specifies the index into the rlimit array
1129 lim_cur(struct proc
*p
, int which
)
1133 lim_rlimit(p
, which
, &rl
);
1134 return (rl
.rlim_cur
);
1138 * Return a copy of the entire rlimit structure for the system limit
1139 * specified by 'which' in the rlimit structure pointed to by 'rlp'.
1142 lim_rlimit(struct proc
*p
, int which
, struct rlimit
*rlp
)
1145 PROC_LOCK_ASSERT(p
, MA_OWNED
);
1146 KASSERT(which
>= 0 && which
< RLIM_NLIMITS
,
1147 ("request for invalid resource limit"));
1148 *rlp
= p
->p_limit
->pl_rlimit
[which
];
1149 if (p
->p_sysent
->sv_fixlimit
!= NULL
)
1150 p
->p_sysent
->sv_fixlimit(rlp
, which
);
1154 * Find the uidinfo structure for a uid. This structure is used to
1155 * track the total resource consumption (process count, socket buffer
1156 * size, etc.) for the uid and impose limits.
1162 uihashtbl
= hashinit(maxproc
/ 16, M_UIDINFO
, &uihash
);
1163 rw_init(&uihashtbl_lock
, "uidinfo hash");
1167 * Look up a uidinfo struct for the parameter uid.
1168 * uihashtbl_lock must be locked.
1170 static struct uidinfo
*
1174 struct uihashhead
*uipp
;
1175 struct uidinfo
*uip
;
1177 rw_assert(&uihashtbl_lock
, RA_LOCKED
);
1179 LIST_FOREACH(uip
, uipp
, ui_hash
)
1180 if (uip
->ui_uid
== uid
)
1187 * Find or allocate a struct uidinfo for a particular uid.
1188 * Increase refcount on uidinfo struct returned.
1189 * uifree() should be called on a struct uidinfo when released.
1195 struct uidinfo
*old_uip
, *uip
;
1197 rw_rlock(&uihashtbl_lock
);
1198 uip
= uilookup(uid
);
1200 rw_runlock(&uihashtbl_lock
);
1201 uip
= malloc(sizeof(*uip
), M_UIDINFO
, M_WAITOK
| M_ZERO
);
1202 rw_wlock(&uihashtbl_lock
);
1204 * There's a chance someone created our uidinfo while we
1205 * were in malloc and not holding the lock, so we have to
1206 * make sure we don't insert a duplicate uidinfo.
1208 if ((old_uip
= uilookup(uid
)) != NULL
) {
1209 /* Someone else beat us to it. */
1210 free(uip
, M_UIDINFO
);
1213 refcount_init(&uip
->ui_ref
, 0);
1215 LIST_INSERT_HEAD(UIHASH(uid
), uip
, ui_hash
);
1219 rw_unlock(&uihashtbl_lock
);
1224 * Place another refcount on a uidinfo struct.
1228 struct uidinfo
*uip
;
1231 refcount_acquire(&uip
->ui_ref
);
1235 * Since uidinfo structs have a long lifetime, we use an
1236 * opportunistic refcounting scheme to avoid locking the lookup hash
1239 * If the refcount hits 0, we need to free the structure,
1240 * which means we need to lock the hash.
1242 * After locking the struct and lowering the refcount, if we find
1243 * that we don't need to free, simply unlock and return.
1245 * If refcount lowering results in need to free, bump the count
1246 * back up, lose the lock and acquire the locks in the proper
1247 * order to try again.
1251 struct uidinfo
*uip
;
1255 /* Prepare for optimal case. */
1257 if (old
> 1 && atomic_cmpset_int(&uip
->ui_ref
, old
, old
- 1))
1260 /* Prepare for suboptimal case. */
1261 rw_wlock(&uihashtbl_lock
);
1262 if (refcount_release(&uip
->ui_ref
)) {
1263 LIST_REMOVE(uip
, ui_hash
);
1264 rw_wunlock(&uihashtbl_lock
);
1265 if (uip
->ui_sbsize
!= 0)
1266 printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
1267 uip
->ui_uid
, uip
->ui_sbsize
);
1268 if (uip
->ui_proccnt
!= 0)
1269 printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1270 uip
->ui_uid
, uip
->ui_proccnt
);
1271 FREE(uip
, M_UIDINFO
);
1275 * Someone added a reference between atomic_cmpset_int() and
1276 * rw_wlock(&uihashtbl_lock).
1278 rw_wunlock(&uihashtbl_lock
);
1282 * Change the count associated with number of processes
1283 * a given user is using. When 'max' is 0, don't enforce a limit
1286 chgproccnt(uip
, diff
, max
)
1287 struct uidinfo
*uip
;
1292 /* Don't allow them to exceed max, but allow subtraction. */
1293 if (diff
> 0 && max
!= 0) {
1294 if (atomic_fetchadd_long(&uip
->ui_proccnt
, (long)diff
) + diff
> max
) {
1295 atomic_subtract_long(&uip
->ui_proccnt
, (long)diff
);
1299 atomic_add_long(&uip
->ui_proccnt
, (long)diff
);
1300 if (uip
->ui_proccnt
< 0)
1301 printf("negative proccnt for uid = %d\n", uip
->ui_uid
);
1307 * Change the total socket buffer size a user has used.
1310 chgsbsize(uip
, hiwat
, to
, max
)
1311 struct uidinfo
*uip
;
1320 if (atomic_fetchadd_long(&uip
->ui_sbsize
, (long)diff
) + diff
> max
) {
1321 atomic_subtract_long(&uip
->ui_sbsize
, (long)diff
);
1325 atomic_add_long(&uip
->ui_sbsize
, (long)diff
);
1326 if (uip
->ui_sbsize
< 0)
1327 printf("negative sbsize for uid = %d\n", uip
->ui_uid
);
1334 * Change the count associated with number of pseudo-terminals
1335 * a given user is using. When 'max' is 0, don't enforce a limit
1338 chgptscnt(uip
, diff
, max
)
1339 struct uidinfo
*uip
;
1344 /* Don't allow them to exceed max, but allow subtraction. */
1345 if (diff
> 0 && max
!= 0) {
1346 if (atomic_fetchadd_long(&uip
->ui_ptscnt
, (long)diff
) + diff
> max
) {
1347 atomic_subtract_long(&uip
->ui_ptscnt
, (long)diff
);
1351 atomic_add_long(&uip
->ui_ptscnt
, (long)diff
);
1352 if (uip
->ui_ptscnt
< 0)
1353 printf("negative ptscnt for uid = %d\n", uip
->ui_uid
);