2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94
39 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40 * All rights reserved.
42 * Permission to use, copy, modify and distribute this software and
43 * its documentation is hereby granted, provided that both the copyright
44 * notice and this permission notice appear in all copies of the
45 * software, derivative works or modified versions, and any portions
46 * thereof, and that both notices appear in supporting documentation.
48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52 * Carnegie Mellon requests users of this software to return to
54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
55 * School of Computer Science
56 * Carnegie Mellon University
57 * Pittsburgh PA 15213-3890
59 * any improvements or extensions that they make and grant Carnegie the
60 * rights to redistribute these changes.
62 * $FreeBSD: src/sys/vm/vm_glue.c,v 1.94.2.4 2003/01/13 22:51:17 dillon Exp $
63 * $DragonFly: src/sys/vm/vm_glue.c,v 1.29 2005/02/07 20:39:01 dillon Exp $
68 #include <sys/param.h>
69 #include <sys/systm.h>
71 #include <sys/resourcevar.h>
74 #include <sys/vmmeter.h>
75 #include <sys/sysctl.h>
77 #include <sys/kernel.h>
78 #include <sys/unistd.h>
80 #include <machine/limits.h>
83 #include <vm/vm_param.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_pageout.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_extern.h>
93 #include <vm/vm_page2.h>
96 * System initialization
98 * Note: proc0 from proc.h
101 static void vm_init_limits (void *);
102 SYSINIT(vm_limits
, SI_SUB_VM_CONF
, SI_ORDER_FIRST
, vm_init_limits
, &proc0
)
105 * THIS MUST BE THE LAST INITIALIZATION ITEM!!!
107 * Note: run scheduling should be divorced from the vm system.
109 static void scheduler (void *);
110 SYSINIT(scheduler
, SI_SUB_RUN_SCHEDULER
, SI_ORDER_FIRST
, scheduler
, NULL
)
113 static void swapout (struct proc
*);
116 kernacc(c_caddr_t addr
, int len
, int rw
)
119 vm_offset_t saddr
, eaddr
;
122 KASSERT((rw
& (~VM_PROT_ALL
)) == 0,
123 ("illegal ``rw'' argument to kernacc (%x)\n", rw
));
125 saddr
= trunc_page((vm_offset_t
)addr
);
126 eaddr
= round_page((vm_offset_t
)addr
+ len
);
127 vm_map_lock_read(kernel_map
);
128 rv
= vm_map_check_protection(kernel_map
, saddr
, eaddr
, prot
);
129 vm_map_unlock_read(kernel_map
);
130 if (rv
== FALSE
&& is_globaldata_space(saddr
, eaddr
))
136 useracc(c_caddr_t addr
, int len
, int rw
)
141 vm_map_entry_t save_hint
;
143 KASSERT((rw
& (~VM_PROT_ALL
)) == 0,
144 ("illegal ``rw'' argument to useracc (%x)\n", rw
));
147 * XXX - check separately to disallow access to user area and user
148 * page tables - they are in the map.
150 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was once
151 * only used (as an end address) in trap.c. Use it as an end address
152 * here too. This bogusness has spread. I just fixed where it was
153 * used as a max in vm_mmap.c.
155 if ((vm_offset_t
) addr
+ len
> /* XXX */ VM_MAXUSER_ADDRESS
156 || (vm_offset_t
) addr
+ len
< (vm_offset_t
) addr
) {
159 map
= &curproc
->p_vmspace
->vm_map
;
160 vm_map_lock_read(map
);
162 * We save the map hint, and restore it. Useracc appears to distort
163 * the map hint unnecessarily.
165 save_hint
= map
->hint
;
166 rv
= vm_map_check_protection(map
,
167 trunc_page((vm_offset_t
)addr
), round_page((vm_offset_t
)addr
+ len
), prot
);
168 map
->hint
= save_hint
;
169 vm_map_unlock_read(map
);
175 vslock(caddr_t addr
, u_int len
)
177 vm_map_wire(&curproc
->p_vmspace
->vm_map
, trunc_page((vm_offset_t
)addr
),
178 round_page((vm_offset_t
)addr
+ len
), 0);
182 vsunlock(caddr_t addr
, u_int len
)
184 vm_map_wire(&curproc
->p_vmspace
->vm_map
, trunc_page((vm_offset_t
)addr
),
185 round_page((vm_offset_t
)addr
+ len
), KM_PAGEABLE
);
189 * Implement fork's actions on an address space.
190 * Here we arrange for the address space to be copied or referenced,
191 * allocate a user struct (pcb and kernel stack), then call the
192 * machine-dependent layer to fill those in and make the new process
193 * ready to run. The new process is set up so that it returns directly
194 * to user mode to avoid stack copying and relocation problems.
197 vm_fork(struct proc
*p1
, struct proc
*p2
, int flags
)
202 if ((flags
& RFPROC
) == 0) {
204 * Divorce the memory, if it is shared, essentially
205 * this changes shared memory amongst threads, into
208 if ((flags
& RFMEM
) == 0) {
209 if (p1
->p_vmspace
->vm_refcnt
> 1) {
213 cpu_fork(p1
, p2
, flags
);
218 p2
->p_vmspace
= p1
->p_vmspace
;
219 p1
->p_vmspace
->vm_refcnt
++;
222 while (vm_page_count_severe()) {
226 if ((flags
& RFMEM
) == 0) {
227 p2
->p_vmspace
= vmspace_fork(p1
->p_vmspace
);
229 pmap_pinit2(vmspace_pmap(p2
->p_vmspace
));
231 if (p1
->p_vmspace
->vm_shm
)
235 td2
= lwkt_alloc_thread(NULL
, LWKT_THREAD_STACK
, -1);
236 pmap_init_proc(p2
, td2
);
237 lwkt_setpri(td2
, TDPRI_KERN_USER
);
238 lwkt_set_comm(td2
, "%s", p1
->p_comm
);
243 * p_stats currently points at fields in the user struct
244 * but not at &u, instead at p_addr. Copy parts of
245 * p_stats; zero the rest of p_stats (statistics).
247 * If procsig->ps_refcnt is 1 and p2->p_sigacts is NULL we dont' need
248 * to share sigacts, so we use the up->u_sigacts.
250 p2
->p_stats
= &up
->u_stats
;
251 if (p2
->p_sigacts
== NULL
) {
252 if (p2
->p_procsig
->ps_refcnt
!= 1)
253 printf ("PID:%d NULL sigacts with refcnt not 1!\n",p2
->p_pid
);
254 p2
->p_sigacts
= &up
->u_sigacts
;
255 up
->u_sigacts
= *p1
->p_sigacts
;
258 bzero(&up
->u_stats
, sizeof(struct pstats
));
259 bcopy(&p1
->p_stats
->p_prof
, &up
->u_stats
.p_prof
,
260 sizeof(struct uprof
));
261 bcopy(&p1
->p_thread
->td_start
, &p2
->p_thread
->td_start
,
262 sizeof(struct timeval
));
266 * cpu_fork will copy and update the pcb, set up the kernel stack,
267 * and make the child ready to run.
269 cpu_fork(p1
, p2
, flags
);
273 * Called after process has been wait(2)'ed apon and is being reaped.
274 * The idea is to reclaim resources that we could not reclaim while
275 * the process was still executing.
278 vm_waitproc(struct proc
*p
)
282 vmspace_exitfree(p
); /* and clean-out the vmspace */
286 * Set default limits for VM system.
287 * Called for proc 0, and then inherited by all others.
289 * XXX should probably act directly on proc0.
292 vm_init_limits(void *udata
)
294 struct proc
*p
= udata
;
298 * Set up the initial limits on process VM. Set the maximum resident
299 * set size to be half of (reasonably) available memory. Since this
300 * is a soft limit, it comes into effect only when the system is out
301 * of memory - half of main memory helps to favor smaller processes,
302 * and reduces thrashing of the object cache.
304 p
->p_rlimit
[RLIMIT_STACK
].rlim_cur
= dflssiz
;
305 p
->p_rlimit
[RLIMIT_STACK
].rlim_max
= maxssiz
;
306 p
->p_rlimit
[RLIMIT_DATA
].rlim_cur
= dfldsiz
;
307 p
->p_rlimit
[RLIMIT_DATA
].rlim_max
= maxdsiz
;
308 /* limit the limit to no less than 2MB */
309 rss_limit
= max(vmstats
.v_free_count
, 512);
310 p
->p_rlimit
[RLIMIT_RSS
].rlim_cur
= ptoa(rss_limit
);
311 p
->p_rlimit
[RLIMIT_RSS
].rlim_max
= RLIM_INFINITY
;
315 faultin(struct proc
*p
)
319 if ((p
->p_flag
& P_INMEM
) == 0) {
328 * The process is in the kernel and controlled by LWKT,
329 * so we just schedule it rather then call setrunqueue().
331 if (p
->p_stat
== SRUN
)
332 lwkt_schedule(p
->p_thread
);
334 p
->p_flag
|= P_INMEM
;
336 /* undo the effect of setting SLOCK above */
344 * Kernel initialization eventually falls through to this function,
345 * which is process 0.
347 * This swapin algorithm attempts to swap-in processes only if there
348 * is enough space for them. Of course, if a process waits for a long
349 * time, it will be swapped in anyway.
353 scheduler(void *dummy
)
360 KKASSERT(!IN_CRITICAL_SECT(curthread
));
362 if (vm_page_count_min()) {
369 for (p
= allproc
.lh_first
; p
!= 0; p
= p
->p_list
.le_next
) {
370 if (p
->p_stat
== SRUN
&&
371 (p
->p_flag
& (P_INMEM
| P_SWAPPING
)) == 0) {
373 pri
= p
->p_swtime
+ p
->p_slptime
;
374 if ((p
->p_flag
& P_SWAPINREQ
) == 0) {
375 pri
-= p
->p_nice
* 8;
379 * if this process is higher priority and there is
380 * enough space, then select this process instead of
381 * the previous selection.
391 * Nothing to do, back to sleep.
393 if ((p
= pp
) == NULL
) {
394 tsleep(&proc0
, 0, "sched", 0);
397 p
->p_flag
&= ~P_SWAPINREQ
;
400 * We would like to bring someone in. (only if there is space).
409 #define swappable(p) \
410 (((p)->p_lock == 0) && \
411 ((p)->p_flag & (P_TRACED|P_SYSTEM|P_INMEM|P_WEXIT|P_SWAPPING)) == P_INMEM)
415 * Swap_idle_threshold1 is the guaranteed swapped in time for a process
417 static int swap_idle_threshold1
= 2;
418 SYSCTL_INT(_vm
, OID_AUTO
, swap_idle_threshold1
,
419 CTLFLAG_RW
, &swap_idle_threshold1
, 0, "");
422 * Swap_idle_threshold2 is the time that a process can be idle before
423 * it will be swapped out, if idle swapping is enabled.
425 static int swap_idle_threshold2
= 10;
426 SYSCTL_INT(_vm
, OID_AUTO
, swap_idle_threshold2
,
427 CTLFLAG_RW
, &swap_idle_threshold2
, 0, "");
430 * Swapout is driven by the pageout daemon. Very simple, we find eligible
431 * procs and unwire their u-areas. We try to always "swap" at least one
432 * process in case we need the room for a swapin.
433 * If any procs have been sleeping/stopped for at least maxslp seconds,
434 * they are swapped. Else, we swap the longest-sleeping or stopped process,
435 * if any, otherwise the longest-resident process.
438 swapout_procs(int action
)
441 struct proc
*outp
, *outp2
;
446 outpri
= outpri2
= INT_MIN
;
448 for (p
= allproc
.lh_first
; p
!= 0; p
= p
->p_list
.le_next
) {
462 * do not swapout a realtime process
464 if (RTP_PRIO_IS_REALTIME(p
->p_rtprio
.type
))
468 * YYY do not swapout a proc waiting on a critical
471 * Guarentee swap_idle_threshold time in memory
473 if (p
->p_slptime
< swap_idle_threshold1
)
477 * If the system is under memory stress, or if we
478 * are swapping idle processes >= swap_idle_threshold2,
479 * then swap the process out.
481 if (((action
& VM_SWAP_NORMAL
) == 0) &&
482 (((action
& VM_SWAP_IDLE
) == 0) ||
483 (p
->p_slptime
< swap_idle_threshold2
)))
488 * do not swapout a process that is waiting for VM
489 * data structures there is a possible deadlock.
491 if (lockmgr(&vm
->vm_map
.lock
,
492 LK_EXCLUSIVE
| LK_NOWAIT
,
497 vm_map_unlock(&vm
->vm_map
);
499 * If the process has been asleep for awhile and had
500 * most of its pages taken away already, swap it out.
502 if ((action
& VM_SWAP_NORMAL
) ||
503 ((action
& VM_SWAP_IDLE
) &&
504 (p
->p_slptime
> swap_idle_threshold2
))) {
512 * cleanup our reference
518 * If we swapped something out, and another process needed memory,
519 * then wakeup the sched process.
526 swapout(struct proc
*p
)
529 #if defined(SWAP_DEBUG)
530 printf("swapping out %d\n", p
->p_pid
);
532 ++p
->p_stats
->p_ru
.ru_nswap
;
534 * remember the process resident count
536 p
->p_vmspace
->vm_swrss
= vmspace_resident_count(p
->p_vmspace
);
539 p
->p_flag
&= ~P_INMEM
;
540 p
->p_flag
|= P_SWAPPING
;
541 if (p
->p_flag
& P_ONRUNQ
)
545 pmap_swapout_proc(p
);
547 p
->p_flag
&= ~P_SWAPPING
;
550 #endif /* !NO_SWAPPING */