2 * Copyright (c) 1994 John Dyson
3 * Copyright (c) 2001,2016 Matt Dillon
4 * Copyright (c) 2010,2016 The DragonFly Project
8 * This code is derived from software contributed to The DragonFly Project
9 * by Venkatesh Srinivas <me@endeavour.zapto.org>
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
24 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
27 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
40 #include <sys/vmmeter.h>
41 #include <sys/sched.h>
42 #include <sys/sysctl.h>
43 #include <sys/thread.h>
44 #include <sys/kthread.h>
45 #include <sys/unistd.h>
47 #include <vm/vm_page.h>
48 #include <cpu/lwbuf.h>
50 #include <sys/thread2.h>
51 #include <vm/vm_page2.h>
55 * Remove this file in 2017.
57 * REMOVED - Basically does not provide any performance benefit and instead
58 * appears to cause a performance detriment. I surmise the issue
59 * is simply that it takes such an enormous amount of time to read
60 * data from dynamic ram, what really matters for a page-fault is
61 * not that the page is zerod but instead that its cache is hot.
63 * Zeroing the page during idle periods means the page is likely
64 * to be cold in the cache when it actually gets used. Zeroing the
65 * page in-line with the VM-fault, on the other-hand, not only
66 * ensures that the memory will be hot in the cache, the zeroing
67 * operation itself does not actually have to read the dynamic ram,
68 * it really only writes into the cache (for a 4K page), so the
69 * page is already hot when the user program then accesses it.
73 * Implement the pre-zeroed page mechanism.
75 /* Number of bytes to zero between reschedule checks */
76 #define IDLEZERO_RUN (64)
78 /* Maximum number of pages per second to zero */
79 #define NPAGES_RUN (20000)
81 static int idlezero_enable
= 1;
82 TUNABLE_INT("vm.idlezero_enable", &idlezero_enable
);
83 SYSCTL_INT(_vm
, OID_AUTO
, idlezero_enable
, CTLFLAG_RW
, &idlezero_enable
, 0,
84 "Allow the kernel to use idle CPU cycles to zero pages");
85 static int idlezero_rate
= NPAGES_RUN
;
86 SYSCTL_INT(_vm
, OID_AUTO
, idlezero_rate
, CTLFLAG_RW
, &idlezero_rate
, 0,
87 "Maximum pages per second to zero");
88 static int idlezero_nocache
= -1;
89 SYSCTL_INT(_vm
, OID_AUTO
, idlezero_nocache
, CTLFLAG_RW
, &idlezero_nocache
, 0,
90 "Maximum pages per second to zero");
92 static ulong idlezero_count
= 0;
93 SYSCTL_ULONG(_vm
, OID_AUTO
, idlezero_count
, CTLFLAG_RD
, &idlezero_count
, 0,
94 "The number of physical pages prezeroed at idle time");
103 #define DEFAULT_SLEEP_TIME (hz / 10)
104 #define LONG_SLEEP_TIME (hz * 10)
107 * Attempt to maintain approximately 1/2 of our free pages in a
108 * PG_ZERO'd state. Add some hysteresis to (attempt to) avoid
109 * generally zeroing a page when the system is near steady-state.
110 * Otherwise we might get 'flutter' during disk I/O / IPC or
111 * fast sleeps. We also do not want to be continuously zeroing
112 * pages because doing so may flush our L1 and L2 caches too much.
114 * Returns non-zero if pages should be zerod.
117 vm_page_zero_check(int *zero_countp
, int *zero_statep
)
126 if (idlezero_enable
== 0)
129 base
= vm_get_pg_color(mycpu
, NULL
, 0) & PQ_L2_MASK
;
131 while (count
< PQ_L2_SIZE
/ ncpus
)
133 if (base
+ count
> PQ_L2_SIZE
)
134 count
= PQ_L2_SIZE
- base
;
136 for (i
= nt
= nz
= 0; i
< count
; ++i
) {
137 struct vpgqueues
*vpq
= &vm_page_queues
[PQ_FREE
+ base
+ i
];
138 nz
+= vpq
->zero_count
;
143 *zero_countp
= nz
* 100 / nt
;
147 if (*zero_statep
== 0) {
149 * Wait for the count to fall to LO before starting
152 if (*zero_countp
<= 50)
156 * Once we are zeroing pages wait for the count to
157 * increase to HI before we stop zeroing pages.
159 if (*zero_countp
>= 90)
162 return (*zero_statep
);
166 * vm_pagezero should sleep for a longer time when idlezero is disabled or
167 * when there is an excess of zeroed pages.
170 vm_page_zero_time(int zero_count
)
172 if (idlezero_enable
== 0)
173 return (LONG_SLEEP_TIME
);
174 if (zero_count
>= 90)
175 return (LONG_SLEEP_TIME
);
176 return (DEFAULT_SLEEP_TIME
);
183 vm_pagezero(void *arg
)
186 struct lwbuf
*lwb
= NULL
;
187 struct lwbuf lwb_cache
;
188 enum zeroidle_state state
= STATE_IDLE
;
193 int cpu
= (int)(intptr_t)arg
;
197 * Adjust thread parameters before entering our loop. The thread
198 * is started with the MP lock held and with normal kernel thread
201 * Also put us on the last cpu for now.
203 * For now leave the MP lock held, the VM routines cannot be called
204 * with it released until tokenization is finished.
206 lwkt_setpri_self(TDPRI_IDLE_WORK
);
207 lwkt_setcpu_self(globaldata_find(cpu
));
208 sleep_time
= DEFAULT_SLEEP_TIME
;
221 tsleep(&zero_state
, 0, "pgzero", sleep_time
);
222 if (vm_page_zero_check(&zero_count
, &zero_state
))
223 npages
= idlezero_rate
/ 10;
224 sleep_time
= vm_page_zero_time(zero_count
);
226 state
= STATE_GET_PAGE
; /* Fallthrough */
230 * Acquire page to zero
235 m
= vm_page_free_fromq_fast();
239 state
= STATE_ZERO_PAGE
;
240 lwb
= lwbuf_alloc(m
, &lwb_cache
);
241 pg
= (char *)lwbuf_kva(lwb
);
246 case STATE_ZERO_PAGE
:
250 while (i
< PAGE_SIZE
) {
251 if (idlezero_nocache
== 1)
252 bzeront(&pg
[i
], IDLEZERO_RUN
);
254 bzero(&pg
[i
], IDLEZERO_RUN
);
258 state
= STATE_RELEASE_PAGE
;
260 case STATE_RELEASE_PAGE
:
262 vm_page_flag_set(m
, PG_ZERO
);
264 state
= STATE_GET_PAGE
;
265 ++idlezero_count
; /* non-locked, SMP race ok */
273 pagezero_start(void __unused
*arg
)
278 if (idlezero_nocache
< 0 && (cpu_mi_feature
& CPU_MI_BZERONT
))
279 idlezero_nocache
= 1;
281 for (i
= 0; i
< ncpus
; ++i
) {
282 kthread_create(vm_pagezero
, (void *)(intptr_t)i
,
283 &td
, "pagezero %d", i
);
287 SYSINIT(pagezero
, SI_SUB_KTHREAD_VM
, SI_ORDER_ANY
, pagezero_start
, NULL
);