4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/errno.h>
31 #include <sys/vnode.h>
34 #include <sys/sysmacros.h>
36 #include <sys/callb.h>
37 #include <sys/debug.h>
39 #include <sys/fs/swapnode.h>
40 #include <sys/fs_subr.h>
41 #include <sys/cmn_err.h>
42 #include <sys/mem_config.h>
43 #include <sys/atomic.h>
46 * swapfs_minfree is the amount of physical memory (actually remaining
47 * availrmem) that we want to keep free for the rest of the system. This
48 * means that swapfs can only grow to availrmem - swapfs_minfree. This
49 * can be set as just constant value or a certain percentage of installed
50 * physical memory. It is set in swapinit().
52 * Users who want to change the amount of memory that can be used as swap
53 * space should do so by setting swapfs_desfree at boot time,
57 pgcnt_t swapfs_desfree
= 0;
58 pgcnt_t swapfs_minfree
= 0;
59 pgcnt_t swapfs_reserve
= 0;
63 #endif /* SWAPFS_DEBUG */
66 static int swapfs_vpcount
;
67 static kmutex_t swapfs_lock
;
68 static struct async_reqs
*sw_ar
, *sw_pendlist
, *sw_freelist
;
70 static struct vnode
**swap_vnodes
; /* ptr's to swap vnodes */
72 static void swap_init_mem_config(void);
74 static pgcnt_t initial_swapfs_desfree
;
75 static pgcnt_t initial_swapfs_minfree
;
76 static pgcnt_t initial_swapfs_reserve
;
78 static int swap_sync(struct vfs
*vfsp
, short flag
, struct cred
*cr
);
81 swapfs_recalc_save_initial(void)
83 initial_swapfs_desfree
= swapfs_desfree
;
84 initial_swapfs_minfree
= swapfs_minfree
;
85 initial_swapfs_reserve
= swapfs_reserve
;
89 swapfs_recalc(pgcnt_t pgs
)
91 pgcnt_t new_swapfs_desfree
;
92 pgcnt_t new_swapfs_minfree
;
93 pgcnt_t new_swapfs_reserve
;
95 new_swapfs_desfree
= initial_swapfs_desfree
;
96 new_swapfs_minfree
= initial_swapfs_minfree
;
97 new_swapfs_reserve
= initial_swapfs_reserve
;
99 if (new_swapfs_desfree
== 0)
100 new_swapfs_desfree
= btopr(7 * 512 * 1024); /* 3-1/2Mb */;
102 if (new_swapfs_minfree
== 0) {
104 * We set this lower than we'd like here, 2Mb, because we
105 * always boot on swapfs. It's up to a safer value,
106 * swapfs_desfree, when/if we add physical swap devices
107 * in swapadd(). Users who want to change the amount of
108 * memory that can be used as swap space should do so by
109 * setting swapfs_desfree at boot time, not swapfs_minfree.
110 * However, swapfs_minfree is tunable by install as a
111 * workaround for bugid 1147463.
113 new_swapfs_minfree
= MAX(btopr(2 * 1024 * 1024), pgs
>> 3);
117 * priv processes can reserve memory as swap as long as availrmem
118 * remains greater than swapfs_minfree; in the case of non-priv
119 * processes, memory can be reserved as swap only if availrmem
120 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
121 * swapfs_reserve amount of memswap is not available to non-priv
122 * processes. This protects daemons such as automounter dying
123 * as a result of application processes eating away almost entire
124 * membased swap. This safeguard becomes useless if apps are run
127 * set swapfs_reserve to a minimum of 4Mb or 1/128 of physmem whichever
128 * is greater up to the limit of 128 MB.
130 if (new_swapfs_reserve
== 0)
131 new_swapfs_reserve
= MIN(btopr(128 * 1024 * 1024),
132 MAX(btopr(4 * 1024 * 1024), pgs
>> 7));
134 /* Test basic numeric viability. */
135 if (new_swapfs_minfree
> pgs
)
138 /* Equivalent test to anon_resvmem() check. */
139 if (availrmem
< new_swapfs_minfree
) {
141 * If ism pages are being used, then there must be agreement
142 * between these two policies.
144 if ((availrmem
> segspt_minfree
) && (segspt_minfree
> 0)) {
145 new_swapfs_minfree
= segspt_minfree
;
151 swapfs_desfree
= new_swapfs_desfree
;
152 swapfs_minfree
= new_swapfs_minfree
;
153 swapfs_reserve
= new_swapfs_reserve
;
158 static const struct vfsops swap_vfsops
= {
159 .vfs_sync
= swap_sync
,
164 swapinit(int fstype
, char *name
)
165 { /* reserve for mp */
166 ssize_t sw_freelist_size
= klustsize
/ PAGESIZE
* 2;
169 SWAPFS_PRINT(SWAP_SUBR
, "swapinit\n", 0, 0, 0, 0, 0);
170 mutex_init(&swapfs_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
172 swap_vnodes
= kmem_zalloc(MAX_SWAP_VNODES
* sizeof (struct vnode
*),
175 swapfs_recalc_save_initial();
176 if (!swapfs_recalc(physmem
))
177 cmn_err(CE_PANIC
, "swapfs_minfree(%lu) > physmem(%lu)",
178 swapfs_minfree
, physmem
);
181 * Arrange for a callback on memory size change.
183 swap_init_mem_config();
185 sw_ar
= (struct async_reqs
*)
186 kmem_zalloc(sw_freelist_size
*sizeof (struct async_reqs
), KM_SLEEP
);
188 error
= vfs_setfsops(fstype
, &swap_vfsops
);
190 cmn_err(CE_WARN
, "swapinit: bad vfs ops template");
195 for (i
= 0; i
< sw_freelist_size
- 1; i
++)
196 sw_ar
[i
].a_next
= &sw_ar
[i
+ 1];
202 * Get a swapfs vnode corresponding to the specified identifier.
205 swapfs_getvp(ulong_t vidx
)
209 vp
= swap_vnodes
[vidx
];
214 mutex_enter(&swapfs_lock
);
215 vp
= swap_vnodes
[vidx
];
217 vp
= vn_alloc(KM_SLEEP
);
218 vn_setops(vp
, &swap_vnodeops
);
220 vp
->v_flag
|= (VISSWAP
|VISSWAPFS
);
221 swap_vnodes
[vidx
] = vp
;
224 mutex_exit(&swapfs_lock
);
232 swap_sync(struct vfs
*vfsp
, short flag
, struct cred
*cr
)
237 if (!(flag
& SYNC_ALL
))
241 * assumes that we are the only one left to access this so that
242 * no need to use swapfs_lock (since it's staticly defined)
244 for (i
= 0; i
< MAX_SWAP_VNODES
; i
++) {
248 (void) fop_putpage(vp
, 0, 0,
249 (B_ASYNC
| B_FREE
), kcred
, NULL
);
256 extern int sw_pending_size
;
259 * Take an async request off the pending queue
264 struct async_reqs
*arg
;
266 mutex_enter(&swapfs_lock
);
269 sw_pendlist
= arg
->a_next
;
271 sw_pending_size
-= PAGESIZE
;
273 ASSERT(sw_pending_size
>= 0);
274 mutex_exit(&swapfs_lock
);
279 * Put an async request on the pending queue
282 sw_putreq(struct async_reqs
*arg
)
287 mutex_enter(&swapfs_lock
);
288 arg
->a_next
= sw_pendlist
;
290 sw_pending_size
+= PAGESIZE
;
291 mutex_exit(&swapfs_lock
);
295 * Put an async request back on the pending queue
298 sw_putbackreq(struct async_reqs
*arg
)
300 mutex_enter(&swapfs_lock
);
301 arg
->a_next
= sw_pendlist
;
303 sw_pending_size
+= PAGESIZE
;
304 mutex_exit(&swapfs_lock
);
308 * Take an async request structure off the free list
313 struct async_reqs
*arg
;
315 mutex_enter(&swapfs_lock
);
318 sw_freelist
= arg
->a_next
;
321 mutex_exit(&swapfs_lock
);
326 * Put an async request structure on the free list
329 sw_putfree(struct async_reqs
*arg
)
331 /* Release our hold - should have locked the page by now */
334 mutex_enter(&swapfs_lock
);
335 arg
->a_next
= sw_freelist
;
337 mutex_exit(&swapfs_lock
);
340 static pgcnt_t swapfs_pending_delete
;
344 swap_mem_config_post_add(
348 (void) swapfs_recalc(physmem
- swapfs_pending_delete
);
353 swap_mem_config_pre_del(
359 nv
= atomic_add_long_nv(&swapfs_pending_delete
, (spgcnt_t
)delta_swaps
);
360 if (!swapfs_recalc(physmem
- nv
)) {
362 * Tidy-up is done by the call to post_del which
365 cmn_err(CE_NOTE
, "Memory operation refused to ensure system "
366 "doesn't deadlock due to excessive consumption by swapfs.");
374 swap_mem_config_post_del(
381 nv
= atomic_add_long_nv(&swapfs_pending_delete
, -(spgcnt_t
)delta_swaps
);
382 (void) swapfs_recalc(physmem
- nv
);
385 static kphysm_setup_vector_t swap_mem_config_vec
= {
386 KPHYSM_SETUP_VECTOR_VERSION
,
387 swap_mem_config_post_add
,
388 swap_mem_config_pre_del
,
389 swap_mem_config_post_del
,
393 swap_init_mem_config(void)
397 ret
= kphysm_setup_func_register(&swap_mem_config_vec
, NULL
);