4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Joyent, Inc.
27 * Copyright (c) 1987, 2010, Oracle and/or its affiliates. All rights reserved.
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
34 * University Copyright- Copyright (c) 1982, 1986, 1988
35 * The Regents of the University of California
38 * University Acknowledgment- Portions of this document are derived from
39 * software developed by the University of California, Berkeley, and its
44 * Each physical swap area has an associated bitmap representing
45 * its physical storage. The bitmap records which swap slots are
46 * currently allocated or freed. Allocation is done by searching
47 * through the bitmap for the first free slot. Thus, there's
48 * no linear relation between offset within the swap device and the
49 * address (within its segment(s)) of the page that the slot backs;
50 * instead, it's an arbitrary one-to-one mapping.
52 * Associated with each swap area is a swapinfo structure. These
53 * structures are linked into a linear list that determines the
54 * ordering of swap areas in the logical swap device. Each contains a
55 * pointer to the corresponding bitmap, the area's size, and its
59 #include <sys/types.h>
60 #include <sys/inttypes.h>
61 #include <sys/param.h>
62 #include <sys/t_lock.h>
63 #include <sys/sysmacros.h>
64 #include <sys/systm.h>
65 #include <sys/errno.h>
68 #include <sys/vnode.h>
69 #include <sys/pathname.h>
70 #include <sys/cmn_err.h>
71 #include <sys/vtrace.h>
73 #include <sys/dumphdr.h>
74 #include <sys/debug.h>
75 #include <sys/fs/snode.h>
76 #include <sys/fs/swapnode.h>
77 #include <sys/policy.h>
83 #include <vm/seg_vn.h>
86 #include <vm/seg_map.h>
89 * To balance the load among multiple swap areas, we don't allow
90 * more than swap_maxcontig allocations to be satisfied from a
91 * single swap area before moving on to the next swap area. This
92 * effectively "interleaves" allocations among the many swap areas.
94 int swap_maxcontig
; /* set by anon_init() to 1 Mb */
96 #define MINIROOTSIZE 12000 /* ~6 Meg XXX */
99 * XXX - this lock is a kludge. It serializes some aspects of swapadd() and
100 * swapdel() (namely VOP_OPEN, VOP_CLOSE, VN_RELE). It protects against
101 * somebody swapadd'ing and getting swap slots from a vnode, while someone
102 * else is in the process of closing or rele'ing it.
104 static kmutex_t swap_lock
;
106 kmutex_t swapinfo_lock
;
109 * protected by the swapinfo_lock
111 struct swapinfo
*swapinfo
;
113 static struct swapinfo
*silast
;
114 static int nswapfiles
;
116 static u_offset_t
swap_getoff(struct swapinfo
*);
117 static int swapadd(struct vnode
*, ulong_t
, ulong_t
, char *);
118 static int swapdel(struct vnode
*, ulong_t
);
119 static int swapslot_free(struct vnode
*, u_offset_t
, struct swapinfo
*);
122 * swap device bitmap allocation macros
125 #define NBBW (NBPW * NBBY) /* number of bits per word */
126 #define TESTBIT(map, i) (((map)[(i) >> MAPSHIFT] & (1 << (i) % NBBW)))
127 #define SETBIT(map, i) (((map)[(i) >> MAPSHIFT] |= (1 << (i) % NBBW)))
128 #define CLEARBIT(map, i) (((map)[(i) >> MAPSHIFT] &= ~(1 << (i) % NBBW)))
130 int swap_debug
= 0; /* set for debug printf's */
131 int swap_verify
= 0; /* set to verify slots when freeing and allocating */
133 uint_t swapalloc_maxcontig
;
136 * Allocate a range of up to *lenp contiguous slots (page) from a physical
137 * swap device. Flags are one of:
138 * SA_NOT Must have a slot from a physical swap device other than the
139 * the one containing input (*vpp, *offp).
140 * Less slots than requested may be returned. *lenp allocated slots are
141 * returned starting at *offp on *vpp.
142 * Returns 1 for a successful allocation, 0 for couldn't allocate any slots.
151 struct swapinfo
*sip
;
155 mutex_enter(&swapinfo_lock
);
158 /* Find a desirable physical device and allocate from it. */
162 if (!(sip
->si_flags
& ST_INDEL
) &&
163 (spgcnt_t
)sip
->si_nfpgs
> 0) {
164 /* Caller wants other than specified swap device */
165 if (flags
& SA_NOT
) {
166 if (*vpp
!= sip
->si_vp
||
167 *offp
< sip
->si_soff
||
168 *offp
>= sip
->si_eoff
)
170 /* Caller is loose, will take anything */
173 } else if (sip
->si_nfpgs
== 0)
175 if ((sip
= sip
->si_next
) == NULL
)
177 } while (sip
!= silast
);
178 mutex_exit(&swapinfo_lock
);
181 soff
= swap_getoff(sip
);
184 panic("swap_alloc: swap_getoff failed!");
186 for (len
= PAGESIZE
; len
< *lenp
; len
+= PAGESIZE
) {
187 if (sip
->si_nfpgs
== 0)
189 if (swapalloc_maxcontig
&& len
>= swapalloc_maxcontig
)
191 noff
= swap_getoff(sip
);
194 } else if (noff
!= soff
+ len
) {
195 CLEARBIT(sip
->si_swapslots
, btop(noff
- sip
->si_soff
));
203 ASSERT((spgcnt_t
)sip
->si_nfpgs
>= 0);
204 sip
->si_allocs
+= btop(len
);
205 if (sip
->si_allocs
>= swap_maxcontig
) {
207 if ((silast
= sip
->si_next
) == NULL
)
210 TRACE_2(TR_FAC_VM
, TR_SWAP_ALLOC
,
211 "swap_alloc:sip %p offset %lx", sip
, soff
);
212 mutex_exit(&swapinfo_lock
);
216 int swap_backsearch
= 0;
219 * Get a free offset on swap device sip.
220 * Return >=0 offset if succeeded, -1 for failure.
223 swap_getoff(struct swapinfo
*sip
)
226 size_t aoff
, boff
, poff
, slotnumber
;
228 ASSERT(MUTEX_HELD(&swapinfo_lock
));
231 for (sp
= &sip
->si_swapslots
[sip
->si_hint
>> MAPSHIFT
],
232 ep
= &sip
->si_swapslots
[sip
->si_mapsize
/ NBPW
]; sp
< ep
; sp
++) {
233 if (*sp
!= (uint_t
)0xffffffff)
239 "swap_getoff: couldn't find slot from hint %ld to end\n",
240 sip
->si_hint
, 0, 0, 0, 0);
242 * Go backwards? Check for faster method XXX
244 if (swap_backsearch
) {
245 for (sp
= &sip
->si_swapslots
[sip
->si_hint
>> MAPSHIFT
],
246 ep
= sip
->si_swapslots
; sp
> ep
; sp
--) {
247 if (*sp
!= (uint_t
)0xffffffff)
253 for (sp
= sip
->si_swapslots
,
254 ep
= &sip
->si_swapslots
[sip
->si_hint
>> MAPSHIFT
];
256 if (*sp
!= (uint_t
)0xffffffff)
262 if (*sp
== 0xffffffff) {
263 cmn_err(CE_WARN
, "No free swap slots!");
264 return ((u_offset_t
)-1);
269 * aoff is the page number offset (in bytes) of the si_swapslots
270 * array element containing a free page
272 * boff is the page number offset of the free page
273 * (i.e. cleared bit) in si_swapslots[aoff].
275 aoff
= ((char *)sp
- (char *)sip
->si_swapslots
) * NBBY
;
277 for (boff
= (sip
->si_hint
% NBBW
); boff
< NBBW
; boff
++) {
278 if (!TESTBIT(sip
->si_swapslots
, aoff
+ boff
))
283 for (boff
= 0; boff
< (sip
->si_hint
% NBBW
); boff
++) {
284 if (!TESTBIT(sip
->si_swapslots
, aoff
+ boff
))
289 panic("swap_getoff: didn't find slot in word hint %ld", sip
->si_hint
);
293 * Return the offset of the free page in swap device.
294 * Convert page number of byte offset and add starting
295 * offset of swap device.
297 slotnumber
= aoff
+ boff
;
298 SWAP_PRINT(SW_ALLOC
, "swap_getoff: allocating slot %ld\n",
299 slotnumber
, 0, 0, 0, 0);
300 poff
= ptob(slotnumber
);
301 if (poff
+ sip
->si_soff
>= sip
->si_eoff
)
302 printf("ptob(aoff(%ld) + boff(%ld))(%ld) >= eoff(%ld)\n",
303 aoff
, boff
, ptob(slotnumber
), (long)sip
->si_eoff
);
304 ASSERT(poff
< sip
->si_eoff
);
306 * We could verify here that the slot isn't already allocated
307 * by looking through all the anon slots.
309 SETBIT(sip
->si_swapslots
, slotnumber
);
310 sip
->si_hint
= slotnumber
+ 1; /* hint = next slot */
311 return (poff
+ sip
->si_soff
);
318 swap_phys_free(struct vnode
*vp
, u_offset_t off
, size_t len
)
320 struct swapinfo
*sip
;
321 ssize_t pagenumber
, npage
;
323 mutex_enter(&swapinfo_lock
);
327 if (sip
->si_vp
== vp
&&
328 sip
->si_soff
<= off
&& off
< sip
->si_eoff
) {
329 for (pagenumber
= btop(off
- sip
->si_soff
),
330 npage
= btop(len
) + pagenumber
;
331 pagenumber
< npage
; pagenumber
++) {
333 "swap_phys_free: freeing slot %ld on "
335 pagenumber
, sip
, 0, 0, 0);
336 if (!TESTBIT(sip
->si_swapslots
, pagenumber
)) {
338 "swap_phys_free: freeing free slot "
339 "%p,%lx\n", (void *)vp
,
340 ptob(pagenumber
) + sip
->si_soff
);
342 CLEARBIT(sip
->si_swapslots
, pagenumber
);
345 ASSERT(sip
->si_nfpgs
<= sip
->si_npgs
);
346 mutex_exit(&swapinfo_lock
);
349 } while ((sip
= sip
->si_next
) != NULL
);
350 panic("swap_phys_free");
355 * Return the anon struct corresponding for the given
356 * <vnode, off> if it is part of the virtual swap device.
357 * Return the anon struct if found, otherwise NULL.
360 swap_anon(struct vnode
*vp
, u_offset_t off
)
364 ASSERT(MUTEX_HELD(AH_MUTEX(vp
, off
)));
366 for (ap
= anon_hash
[ANON_HASH(vp
, off
)]; ap
!= NULL
; ap
= ap
->an_hash
) {
367 if (ap
->an_vp
== vp
&& ap
->an_off
== off
)
375 * Determine if the vp offset range overlap a swap device.
378 swap_in_range(struct vnode
*vp
, u_offset_t offset
, size_t len
)
380 struct swapinfo
*sip
;
384 ASSERT(eoff
> offset
);
386 mutex_enter(&swapinfo_lock
);
390 if (vp
!= sip
->si_vp
|| eoff
<= sip
->si_soff
||
391 offset
>= sip
->si_eoff
)
393 mutex_exit(&swapinfo_lock
);
395 } while ((sip
= sip
->si_next
) != NULL
);
397 mutex_exit(&swapinfo_lock
);
402 * See if name is one of our swap files
403 * even though lookupname failed.
404 * This can be used by swapdel to delete
405 * swap resources on remote machines
406 * where the link has gone down.
408 static struct vnode
*
410 char *name
, /* pathname to delete */
411 ulong_t lowblk
) /* Low block number of area to delete */
413 struct swapinfo
**sipp
, *osip
;
417 * Find the swap file entry for the file to
418 * be deleted. Skip any entries that are in
422 soff
= ptob(btopr(lowblk
<< SCTRSHFT
)); /* must be page aligned */
424 mutex_enter(&swapinfo_lock
);
425 for (sipp
= &swapinfo
; (osip
= *sipp
) != NULL
; sipp
= &osip
->si_next
) {
426 if ((strcmp(osip
->si_pname
, name
) == 0) &&
427 (osip
->si_soff
== soff
) && (osip
->si_flags
== 0)) {
428 struct vnode
*vp
= osip
->si_vp
;
431 mutex_exit(&swapinfo_lock
);
435 mutex_exit(&swapinfo_lock
);
441 * New system call to manipulate swap files.
444 swapctl(int sc_cmd
, void *sc_arg
, int *rv
)
446 struct swapinfo
*sip
, *csip
, *tsip
;
448 struct swapent st
, *ust
;
461 int global
= INGLOBALZONE(curproc
);
462 struct zone
*zp
= curproc
->p_zone
;
465 * When running in a zone we want to hide the details of the swap
466 * devices: we report there only being one swap device named "swap"
467 * having a size equal to the sum of the sizes of all real swap devices
480 * Return anoninfo information with these changes:
481 * ani_max = maximum amount of swap space
482 * (including potentially available physical memory)
483 * ani_free = amount of unallocated anonymous memory
484 * (some of which might be reserved and including
485 * potentially available physical memory)
486 * ani_resv = amount of claimed (reserved) anonymous memory
488 avail
= MAX((spgcnt_t
)(availrmem
- swapfs_minfree
), 0);
489 ai
.ani_max
= (k_anoninfo
.ani_max
+
490 k_anoninfo
.ani_mem_resv
) + avail
;
492 /* Update ani_free */
494 ai
.ani_free
= k_anoninfo
.ani_free
+ avail
;
496 ai
.ani_resv
= k_anoninfo
.ani_phys_resv
+
497 k_anoninfo
.ani_mem_resv
;
499 if (!global
&& zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
501 * We're in a non-global zone with a swap cap. We
502 * always report the system-wide values for the global
503 * zone, even though it too can have a swap cap.
507 * For a swap-capped zone, the numbers are contrived
508 * since we don't have a correct value of 'reserved'
511 * The ani_max value is always the zone's swap cap.
513 * The ani_free value is always the difference between
514 * the cap and the amount of swap in use by the zone.
516 * The ani_resv value is typically set to be the amount
517 * of swap in use by the zone, but can be adjusted
518 * upwards to indicate how much swap is currently
519 * unavailable to that zone due to usage by entities
522 * This works as follows.
524 * In the 'swap -s' output, the data is displayed
526 * allocated = ani_max - ani_free
527 * reserved = ani_resv - allocated
528 * available = ani_max - ani_resv
530 * Taking a contrived example, if the swap cap is 100
531 * and the amount of swap used by the zone is 75, this
533 * allocated = ani_max - ani_free = 100 - 25 = 75
534 * reserved = ani_resv - allocated = 75 - 75 = 0
535 * available = ani_max - ani_resv = 100 - 75 = 25
537 * In this typical case, you can see that the 'swap -s'
538 * 'reserved' will always be 0 inside a swap capped
541 * However, if the system as a whole has less free
542 * swap than the zone limits allow, then we adjust
543 * the ani_resv value up so that it is the difference
544 * between the zone cap and the amount of free system
545 * swap. Taking the above example, but when the
546 * system as a whole only has 20 of swap available, we
547 * get an ani_resv of 100 - 20 = 80. This gives:
548 * allocated = ani_max - ani_free = 100 - 25 = 75
549 * reserved = ani_resv - allocated = 80 - 75 = 5
550 * available = ani_max - ani_resv = 100 - 80 = 20
552 * In this case, you can see how the ani_resv value is
553 * tweaked up to make the 'swap -s' numbers work inside
556 rctl_qty_t cap
, used
;
557 pgcnt_t pgcap
, sys_avail
;
559 mutex_enter(&zp
->zone_mem_lock
);
560 cap
= zp
->zone_max_swap_ctl
;
561 used
= zp
->zone_max_swap
;
562 mutex_exit(&zp
->zone_mem_lock
);
564 pgcap
= MIN(btop(cap
), ai
.ani_max
);
565 ai
.ani_free
= pgcap
- btop(used
);
567 /* Get the system-wide swap currently available. */
568 sys_avail
= ai
.ani_max
- ai
.ani_resv
;
569 if (sys_avail
< ai
.ani_free
)
570 ai
.ani_resv
= pgcap
- sys_avail
;
572 ai
.ani_resv
= btop(used
);
577 if (copyout(&ai
, sc_arg
, sizeof (struct anoninfo
)) != 0)
582 if (copyin(sc_arg
, &length
, sizeof (int)) != 0)
586 char *swappath
= "swap";
590 ust
= (swapent_t
*)((swaptbl_t
*)sc_arg
)->swt_ent
;
591 if (copyin(ust
, &st
, sizeof (swapent_t
)) != 0)
593 st
.ste_start
= PAGESIZE
>> SCTRSHFT
;
594 st
.ste_length
= (off_t
)0;
599 mutex_enter(&swapinfo_lock
);
600 for (sip
= swapinfo
, nswap
= 0;
601 sip
!= NULL
&& nswap
< nswapfiles
;
602 sip
= sip
->si_next
, nswap
++) {
604 (sip
->si_eoff
- sip
->si_soff
) >> SCTRSHFT
;
605 st
.ste_pages
+= sip
->si_npgs
;
606 st
.ste_free
+= sip
->si_nfpgs
;
608 mutex_exit(&swapinfo_lock
);
610 if (zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
611 rctl_qty_t cap
, used
;
613 mutex_enter(&zp
->zone_mem_lock
);
614 cap
= zp
->zone_max_swap_ctl
;
615 used
= zp
->zone_max_swap
;
616 mutex_exit(&zp
->zone_mem_lock
);
618 st
.ste_length
= MIN(cap
, st
.ste_length
);
619 st
.ste_pages
= MIN(btop(cap
), st
.ste_pages
);
620 st
.ste_free
= MIN(st
.ste_pages
- btop(used
),
624 if (copyout(&st
, ust
, sizeof (swapent_t
)) != 0 ||
625 copyout(swappath
, st
.ste_path
,
626 strlen(swappath
) + 1) != 0) {
633 mutex_enter(&swapinfo_lock
);
634 tmp_nswapfiles
= nswapfiles
;
635 mutex_exit(&swapinfo_lock
);
638 * Return early if there are no swap entries to report:
640 if (tmp_nswapfiles
< 1) {
645 /* Return an error if not enough space for the whole table. */
646 if (length
< tmp_nswapfiles
)
649 * Get memory to hold the swap entries and their names. We'll
650 * copy the real entries into these and then copy these out.
651 * Allocating the pathname memory is only a guess so we may
652 * find that we need more and have to do it again.
653 * All this is because we have to hold the anon lock while
654 * traversing the swapinfo list, and we can't be doing copyouts
655 * and/or kmem_alloc()s during this.
657 csip
= kmem_zalloc(tmp_nswapfiles
* sizeof (struct swapinfo
),
660 nlen
= tmp_nswapfiles
* (gplen
+= 100);
661 pname
= kmem_zalloc(nlen
, KM_SLEEP
);
663 mutex_enter(&swapinfo_lock
);
665 if (tmp_nswapfiles
!= nswapfiles
) {
666 mutex_exit(&swapinfo_lock
);
667 kmem_free(pname
, nlen
);
669 tmp_nswapfiles
* sizeof (struct swapinfo
));
673 for (sip
= swapinfo
, tsip
= csip
, tpname
= pname
, nswap
= 0;
674 sip
&& nswap
< tmp_nswapfiles
;
675 sip
= sip
->si_next
, tsip
++, tpname
+= plen
, nswap
++) {
676 plen
= sip
->si_pnamelen
;
677 if (tpname
+ plen
- pname
> nlen
) {
678 mutex_exit(&swapinfo_lock
);
679 kmem_free(pname
, nlen
);
683 tsip
->si_pname
= tpname
;
684 (void) strcpy(tsip
->si_pname
, sip
->si_pname
);
686 mutex_exit(&swapinfo_lock
);
692 ust
= (swapent_t
*)((swaptbl_t
*)sc_arg
)->swt_ent
;
693 for (tsip
= csip
, cnt
= 0; cnt
< nswap
; tsip
++, ust
++, cnt
++) {
694 if (copyin(ust
, &st
, sizeof (swapent_t
)) != 0) {
698 st
.ste_flags
= tsip
->si_flags
;
700 (tsip
->si_eoff
- tsip
->si_soff
) >> SCTRSHFT
;
701 st
.ste_start
= tsip
->si_soff
>> SCTRSHFT
;
702 st
.ste_pages
= tsip
->si_npgs
;
703 st
.ste_free
= tsip
->si_nfpgs
;
704 if (copyout(&st
, ust
, sizeof (swapent_t
)) != 0) {
708 if (!tsip
->si_pnamelen
)
710 if (copyout(tsip
->si_pname
, st
.ste_path
,
711 tsip
->si_pnamelen
) != 0) {
718 kmem_free(csip
, tmp_nswapfiles
* sizeof (struct swapinfo
));
719 kmem_free(pname
, nlen
);
728 if ((error
= secpolicy_swapctl(CRED())) != 0)
731 if (copyin(sc_arg
, &sr
, sizeof (swapres_t
)))
734 /* Allocate the space to read in pathname */
735 if ((swapname
= kmem_alloc(MAXPATHLEN
, KM_NOSLEEP
)) == NULL
)
738 error
= copyinstr(sr
.sr_name
, swapname
, MAXPATHLEN
, 0);
742 error
= lookupname(swapname
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &vp
);
744 if (sc_cmd
== SC_ADD
)
746 /* see if we match by name */
747 vp
= swapdel_byname(swapname
, (size_t)sr
.sr_start
);
752 if (vp
->v_flag
& (VNOMAP
| VNOSWAP
)) {
757 switch (vp
->v_type
) {
762 if (vp
->v_vfsp
&& vn_is_readonly(vp
))
765 error
= VOP_ACCESS(vp
, VREAD
|VWRITE
, 0, CRED(), NULL
);
776 if (sc_cmd
== SC_REMOVE
)
777 error
= swapdel(vp
, sr
.sr_start
);
779 error
= swapadd(vp
, sr
.sr_start
,
780 sr
.sr_length
, swapname
);
784 kmem_free(swapname
, MAXPATHLEN
);
788 #if defined(_LP64) && defined(_SYSCALL32)
791 swapctl32(int sc_cmd
, void *sc_arg
, int *rv
)
793 struct swapinfo
*sip
, *csip
, *tsip
;
795 struct swapent32 st
, *ust
;
806 struct anoninfo32 ai
;
809 int global
= INGLOBALZONE(curproc
);
810 struct zone
*zp
= curproc
->p_zone
;
813 * When running in a zone we want to hide the details of the swap
814 * devices: we report there only being one swap device named "swap"
815 * having a size equal to the sum of the sizes of all real swap devices
828 * Return anoninfo information with these changes:
829 * ani_max = maximum amount of swap space
830 * (including potentially available physical memory)
831 * ani_free = amount of unallocated anonymous memory
832 * (some of which might be reserved and including
833 * potentially available physical memory)
834 * ani_resv = amount of claimed (reserved) anonymous memory
836 avail
= MAX((spgcnt_t
)(availrmem
- swapfs_minfree
), 0);
837 s
= (k_anoninfo
.ani_max
+ k_anoninfo
.ani_mem_resv
) + avail
;
842 /* Update ani_free */
844 s
= k_anoninfo
.ani_free
+ avail
;
849 s
= k_anoninfo
.ani_phys_resv
+ k_anoninfo
.ani_mem_resv
;
854 if (!global
&& zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
856 * We're in a non-global zone with a swap cap. We
857 * always report the system-wide values for the global
858 * zone, even though it too can have a swap cap.
859 * See the comment for the SC_AINFO case in swapctl()
860 * which explains the following logic.
862 rctl_qty_t cap
, used
;
863 pgcnt_t pgcap
, sys_avail
;
865 mutex_enter(&zp
->zone_mem_lock
);
866 cap
= zp
->zone_max_swap_ctl
;
867 used
= zp
->zone_max_swap
;
868 mutex_exit(&zp
->zone_mem_lock
);
870 pgcap
= MIN(btop(cap
), ai
.ani_max
);
871 ai
.ani_free
= pgcap
- btop(used
);
873 /* Get the system-wide swap currently available. */
874 sys_avail
= ai
.ani_max
- ai
.ani_resv
;
875 if (sys_avail
< ai
.ani_free
)
876 ai
.ani_resv
= pgcap
- sys_avail
;
878 ai
.ani_resv
= btop(used
);
883 if (copyout(&ai
, sc_arg
, sizeof (ai
)) != 0)
888 if (copyin(sc_arg
, &length
, sizeof (int32_t)) != 0)
892 char *swappath
= "swap";
896 ust
= (swapent32_t
*)((swaptbl32_t
*)sc_arg
)->swt_ent
;
897 if (copyin(ust
, &st
, sizeof (swapent32_t
)) != 0)
899 st
.ste_start
= PAGESIZE
>> SCTRSHFT
;
900 st
.ste_length
= (off_t
)0;
905 mutex_enter(&swapinfo_lock
);
906 for (sip
= swapinfo
, nswap
= 0;
907 sip
!= NULL
&& nswap
< nswapfiles
;
908 sip
= sip
->si_next
, nswap
++) {
910 (sip
->si_eoff
- sip
->si_soff
) >> SCTRSHFT
;
911 st
.ste_pages
+= sip
->si_npgs
;
912 st
.ste_free
+= sip
->si_nfpgs
;
914 mutex_exit(&swapinfo_lock
);
916 if (zp
->zone_max_swap_ctl
!= UINT64_MAX
) {
917 rctl_qty_t cap
, used
;
919 mutex_enter(&zp
->zone_mem_lock
);
920 cap
= zp
->zone_max_swap_ctl
;
921 used
= zp
->zone_max_swap
;
922 mutex_exit(&zp
->zone_mem_lock
);
924 st
.ste_length
= MIN(cap
, st
.ste_length
);
925 st
.ste_pages
= MIN(btop(cap
), st
.ste_pages
);
926 st
.ste_free
= MIN(st
.ste_pages
- btop(used
),
930 if (copyout(&st
, ust
, sizeof (swapent32_t
)) != 0 ||
931 copyout(swappath
, (caddr_t
)(uintptr_t)st
.ste_path
,
932 strlen(swappath
) + 1) != 0) {
939 mutex_enter(&swapinfo_lock
);
940 tmp_nswapfiles
= nswapfiles
;
941 mutex_exit(&swapinfo_lock
);
944 * Return early if there are no swap entries to report:
946 if (tmp_nswapfiles
< 1) {
951 /* Return an error if not enough space for the whole table. */
952 if (length
< tmp_nswapfiles
)
955 * Get memory to hold the swap entries and their names. We'll
956 * copy the real entries into these and then copy these out.
957 * Allocating the pathname memory is only a guess so we may
958 * find that we need more and have to do it again.
959 * All this is because we have to hold the anon lock while
960 * traversing the swapinfo list, and we can't be doing copyouts
961 * and/or kmem_alloc()s during this.
963 csip
= kmem_zalloc(tmp_nswapfiles
* sizeof (*csip
), KM_SLEEP
);
965 nlen
= tmp_nswapfiles
* (gplen
+= 100);
966 pname
= kmem_zalloc(nlen
, KM_SLEEP
);
968 mutex_enter(&swapinfo_lock
);
970 if (tmp_nswapfiles
!= nswapfiles
) {
971 mutex_exit(&swapinfo_lock
);
972 kmem_free(pname
, nlen
);
973 kmem_free(csip
, tmp_nswapfiles
* sizeof (*csip
));
977 for (sip
= swapinfo
, tsip
= csip
, tpname
= pname
, nswap
= 0;
978 (sip
!= NULL
) && (nswap
< tmp_nswapfiles
);
979 sip
= sip
->si_next
, tsip
++, tpname
+= plen
, nswap
++) {
980 plen
= sip
->si_pnamelen
;
981 if (tpname
+ plen
- pname
> nlen
) {
982 mutex_exit(&swapinfo_lock
);
983 kmem_free(pname
, nlen
);
987 tsip
->si_pname
= tpname
;
988 (void) strcpy(tsip
->si_pname
, sip
->si_pname
);
990 mutex_exit(&swapinfo_lock
);
996 ust
= (swapent32_t
*)((swaptbl32_t
*)sc_arg
)->swt_ent
;
997 for (tsip
= csip
, cnt
= 0; cnt
< nswap
; tsip
++, ust
++, cnt
++) {
998 if (copyin(ust
, &st
, sizeof (*ust
)) != 0) {
1002 st
.ste_flags
= tsip
->si_flags
;
1004 (tsip
->si_eoff
- tsip
->si_soff
) >> SCTRSHFT
;
1005 st
.ste_start
= tsip
->si_soff
>> SCTRSHFT
;
1006 st
.ste_pages
= tsip
->si_npgs
;
1007 st
.ste_free
= tsip
->si_nfpgs
;
1008 if (copyout(&st
, ust
, sizeof (st
)) != 0) {
1012 if (!tsip
->si_pnamelen
)
1014 if (copyout(tsip
->si_pname
,
1015 (caddr_t
)(uintptr_t)st
.ste_path
,
1016 tsip
->si_pnamelen
) != 0) {
1023 kmem_free(csip
, tmp_nswapfiles
* sizeof (*csip
));
1024 kmem_free(pname
, nlen
);
1033 if ((error
= secpolicy_swapctl(CRED())) != 0)
1036 if (copyin(sc_arg
, &sr
, sizeof (sr
)))
1039 /* Allocate the space to read in pathname */
1040 if ((swapname
= kmem_alloc(MAXPATHLEN
, KM_NOSLEEP
)) == NULL
)
1043 error
= copyinstr((caddr_t
)(uintptr_t)sr
.sr_name
,
1044 swapname
, MAXPATHLEN
, NULL
);
1048 error
= lookupname(swapname
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &vp
);
1050 if (sc_cmd
== SC_ADD
)
1052 /* see if we match by name */
1053 vp
= swapdel_byname(swapname
, (uint_t
)sr
.sr_start
);
1058 if (vp
->v_flag
& (VNOMAP
| VNOSWAP
)) {
1063 switch (vp
->v_type
) {
1068 if (vp
->v_vfsp
&& vn_is_readonly(vp
))
1071 error
= VOP_ACCESS(vp
, VREAD
|VWRITE
, 0, CRED(), NULL
);
1082 if (sc_cmd
== SC_REMOVE
)
1083 error
= swapdel(vp
, sr
.sr_start
);
1085 error
= swapadd(vp
, sr
.sr_start
, sr
.sr_length
,
1090 kmem_free(swapname
, MAXPATHLEN
);
1094 #endif /* _LP64 && _SYSCALL32 */
1097 * Add a new swap file.
1100 swapadd(struct vnode
*vp
, ulong_t lowblk
, ulong_t nblks
, char *swapname
)
1102 struct swapinfo
**sipp
, *nsip
= NULL
, *esip
= NULL
;
1106 u_offset_t soff
, eoff
;
1108 ssize_t i
, start
, end
;
1111 size_t returned_mem
;
1113 SWAP_PRINT(SW_CTL
, "swapadd: vp %p lowblk %ld nblks %ld swapname %s\n",
1114 vp
, lowblk
, nblks
, swapname
, 0);
1116 * Get the real vnode. (If vp is not a specnode it just returns vp, so
1117 * it does the right thing, but having this code know about specnodes
1118 * violates the spirit of having it be indepedent of vnode type.)
1120 cvp
= common_specvp(vp
);
1123 * Or in VISSWAP so file system has chance to deny swap-ons during open.
1125 mutex_enter(&cvp
->v_lock
);
1126 wasswap
= cvp
->v_flag
& VISSWAP
;
1127 cvp
->v_flag
|= VISSWAP
;
1128 mutex_exit(&cvp
->v_lock
);
1130 mutex_enter(&swap_lock
);
1131 if (error
= VOP_OPEN(&cvp
, FREAD
|FWRITE
, CRED(), NULL
)) {
1132 mutex_exit(&swap_lock
);
1133 /* restore state of v_flag */
1135 mutex_enter(&cvp
->v_lock
);
1136 cvp
->v_flag
&= ~VISSWAP
;
1137 mutex_exit(&cvp
->v_lock
);
1141 mutex_exit(&swap_lock
);
1144 * Get partition size. Return error if empty partition,
1145 * or if request does not fit within the partition.
1146 * If this is the first swap device, we can reduce
1147 * the size of the swap area to match what is
1148 * available. This can happen if the system was built
1149 * on a machine with a different size swap partition.
1151 vattr
.va_mask
= AT_SIZE
;
1152 if (error
= VOP_GETATTR(cvp
, &vattr
, ATTR_COMM
, CRED(), NULL
))
1156 * Specfs returns a va_size of MAXOFFSET_T (UNKNOWN_SIZE) when the
1157 * size of the device can't be determined.
1159 if ((vattr
.va_size
== 0) || (vattr
.va_size
== MAXOFFSET_T
)) {
1166 * No support for large swap in 32-bit OS, if the size of the swap is
1167 * bigger than MAXOFF32_T then the size used by swapfs must be limited.
1168 * This limitation is imposed by the swap subsystem itself, a D_64BIT
1169 * driver as the target of swap operation should be able to field
1172 if (vattr
.va_size
> MAXOFF32_T
) {
1174 "!swap device %s truncated from 0x%llx to 0x%x bytes",
1175 swapname
, vattr
.va_size
, MAXOFF32_T
);
1176 vattr
.va_size
= MAXOFF32_T
;
1180 /* Fail if file not writeable (try to set size to current size) */
1181 vattr
.va_mask
= AT_SIZE
;
1182 if (error
= VOP_SETATTR(cvp
, &vattr
, 0, CRED(), NULL
))
1185 /* Fail if fs does not support VOP_PAGEIO */
1186 error
= VOP_PAGEIO(cvp
, (page_t
*)NULL
, (u_offset_t
)0, 0, 0, CRED(),
1189 if (error
== ENOSYS
)
1194 * If swapping on the root filesystem don't put swap blocks that
1195 * correspond to the miniroot filesystem on the swap free list.
1198 startblk
= roundup(MINIROOTSIZE
<<SCTRSHFT
, klustsize
)>>SCTRSHFT
;
1199 else /* Skip 1st page (disk label) */
1200 startblk
= (ulong_t
)(lowblk
? lowblk
: 1);
1202 soff
= startblk
<< SCTRSHFT
;
1203 if (soff
>= vattr
.va_size
) {
1209 * If user specified 0 blks, use the size of the device
1211 eoff
= nblks
? soff
+ (nblks
- (startblk
- lowblk
) << SCTRSHFT
) :
1214 SWAP_PRINT(SW_CTL
, "swapadd: va_size %ld soff %ld eoff %ld\n",
1215 vattr
.va_size
, soff
, eoff
, 0, 0);
1217 if (eoff
> vattr
.va_size
) {
1223 * The starting and ending offsets must be page aligned.
1224 * Round soff up to next page boundary, round eoff
1225 * down to previous page boundary.
1227 soff
= ptob(btopr(soff
));
1228 eoff
= ptob(btop(eoff
));
1230 SWAP_PRINT(SW_CTL
, "swapadd: soff %ld >= eoff %ld\n",
1231 soff
, eoff
, 0, 0, 0);
1236 pages
= btop(eoff
- soff
);
1238 /* Allocate and partially set up the new swapinfo */
1239 nsip
= kmem_zalloc(sizeof (struct swapinfo
), KM_SLEEP
);
1242 nsip
->si_soff
= soff
;
1243 nsip
->si_eoff
= eoff
;
1245 nsip
->si_checkcnt
= nsip
->si_alloccnt
= 0;
1247 nsip
->si_pnamelen
= (int)strlen(swapname
) + 1;
1248 nsip
->si_pname
= (char *)kmem_zalloc(nsip
->si_pnamelen
, KM_SLEEP
);
1249 bcopy(swapname
, nsip
->si_pname
, nsip
->si_pnamelen
- 1);
1250 SWAP_PRINT(SW_CTL
, "swapadd: allocating swapinfo for %s, %ld pages\n",
1251 swapname
, pages
, 0, 0, 0);
1253 * Size of swapslots map in bytes
1255 nsip
->si_mapsize
= P2ROUNDUP(pages
, NBBW
) / NBBY
;
1256 nsip
->si_swapslots
= kmem_zalloc(nsip
->si_mapsize
, KM_SLEEP
);
1259 * Permanently set the bits that can't ever be allocated,
1260 * i.e. those from the ending offset to the round up slot for the
1261 * swapslots bit map.
1264 end
= P2ROUNDUP(pages
, NBBW
);
1265 for (i
= start
; i
< end
; i
++) {
1266 SWAP_PRINT(SW_CTL
, "swapadd: set bit for page %ld\n", i
,
1268 SETBIT(nsip
->si_swapslots
, i
);
1270 nsip
->si_npgs
= nsip
->si_nfpgs
= pages
;
1272 * Now check to see if we can add it. We wait til now to check because
1273 * we need the swapinfo_lock and we don't want sleep with it (e.g.,
1274 * during kmem_alloc()) while we're setting up the swapinfo.
1276 mutex_enter(&swapinfo_lock
);
1277 for (sipp
= &swapinfo
; (esip
= *sipp
) != NULL
; sipp
= &esip
->si_next
) {
1278 if (esip
->si_vp
== cvp
) {
1279 if (esip
->si_soff
== soff
&& esip
->si_npgs
== pages
&&
1280 (esip
->si_flags
& ST_DOINGDEL
)) {
1282 * We are adding a device that we are in the
1283 * middle of deleting. Just clear the
1284 * ST_DOINGDEL flag to signal this and
1285 * the deletion routine will eventually notice
1286 * it and add it back.
1288 esip
->si_flags
&= ~ST_DOINGDEL
;
1289 mutex_exit(&swapinfo_lock
);
1292 /* disallow overlapping swap files */
1293 if ((soff
< esip
->si_eoff
) && (eoff
> esip
->si_soff
)) {
1295 mutex_exit(&swapinfo_lock
);
1304 * add new swap device to list and shift allocations to it
1305 * before updating the anoninfo counters
1311 * Update the total amount of reservable swap space
1312 * accounting properly for swap space from physical memory
1314 /* New swap device soaks up currently reserved memory swap */
1315 mutex_enter(&anoninfo_lock
);
1317 ASSERT(k_anoninfo
.ani_mem_resv
>= k_anoninfo
.ani_locked_swap
);
1318 ASSERT(k_anoninfo
.ani_max
>= k_anoninfo
.ani_phys_resv
);
1320 k_anoninfo
.ani_max
+= pages
;
1322 if (k_anoninfo
.ani_mem_resv
> k_anoninfo
.ani_locked_swap
) {
1323 returned_mem
= MIN(k_anoninfo
.ani_mem_resv
-
1324 k_anoninfo
.ani_locked_swap
,
1325 k_anoninfo
.ani_max
- k_anoninfo
.ani_phys_resv
);
1327 ANI_ADD(-returned_mem
);
1328 k_anoninfo
.ani_free
-= returned_mem
;
1329 k_anoninfo
.ani_mem_resv
-= returned_mem
;
1330 k_anoninfo
.ani_phys_resv
+= returned_mem
;
1332 mutex_enter(&freemem_lock
);
1333 availrmem
+= returned_mem
;
1334 mutex_exit(&freemem_lock
);
1337 * At boot time, to permit booting small memory machines using
1338 * only physical memory as swap space, we allowed a dangerously
1339 * large amount of memory to be used as swap space; now that
1340 * more physical backing store is available bump down the amount
1341 * we can get from memory to a safer size.
1343 if (swapfs_minfree
< swapfs_desfree
) {
1344 mutex_enter(&freemem_lock
);
1345 if (availrmem
> swapfs_desfree
|| !k_anoninfo
.ani_mem_resv
)
1346 swapfs_minfree
= swapfs_desfree
;
1347 mutex_exit(&freemem_lock
);
1350 SWAP_PRINT(SW_CTL
, "swapadd: ani_max %ld ani_free %ld\n",
1351 k_anoninfo
.ani_free
, k_anoninfo
.ani_free
, 0, 0, 0);
1353 mutex_exit(&anoninfo_lock
);
1355 mutex_exit(&swapinfo_lock
);
1357 /* Initialize the dump device */
1358 mutex_enter(&dump_lock
);
1360 (void) dumpinit(vp
, swapname
, 0);
1361 mutex_exit(&dump_lock
);
1365 if (error
|| esip
) {
1366 SWAP_PRINT(SW_CTL
, "swapadd: error (%d)\n", error
, 0, 0, 0, 0);
1369 mutex_enter(&cvp
->v_lock
);
1370 cvp
->v_flag
&= ~VISSWAP
;
1371 mutex_exit(&cvp
->v_lock
);
1374 kmem_free(nsip
->si_swapslots
, (size_t)nsip
->si_mapsize
);
1375 kmem_free(nsip
->si_pname
, nsip
->si_pnamelen
);
1376 kmem_free(nsip
, sizeof (*nsip
));
1378 mutex_enter(&swap_lock
);
1379 (void) VOP_CLOSE(cvp
, FREAD
|FWRITE
, 1, (offset_t
)0, CRED(),
1381 mutex_exit(&swap_lock
);
1387 * Delete a swap file.
1392 ulong_t lowblk
) /* Low block number of area to delete. */
1394 struct swapinfo
**sipp
, *osip
= NULL
;
1398 u_offset_t toff
= 0;
1399 struct vnode
*tvp
= NULL
;
1401 struct anon
**app
, *ap
;
1403 pgcnt_t adjust_swap
= 0;
1405 /* Find the swap file entry for the file to be deleted */
1406 cvp
= common_specvp(vp
);
1409 lowblk
= lowblk
? lowblk
: 1; /* Skip first page (disk label) */
1410 soff
= ptob(btopr(lowblk
<< SCTRSHFT
)); /* must be page aligned */
1412 mutex_enter(&swapinfo_lock
);
1413 for (sipp
= &swapinfo
; (osip
= *sipp
) != NULL
; sipp
= &osip
->si_next
) {
1414 if ((osip
->si_vp
== cvp
) &&
1415 (osip
->si_soff
== soff
) && (osip
->si_flags
== 0))
1419 /* If the file was not found, error. */
1422 mutex_exit(&swapinfo_lock
);
1426 pages
= osip
->si_npgs
;
1429 * Do not delete if we will be low on swap pages.
1431 mutex_enter(&anoninfo_lock
);
1433 ASSERT(k_anoninfo
.ani_mem_resv
>= k_anoninfo
.ani_locked_swap
);
1434 ASSERT(k_anoninfo
.ani_max
>= k_anoninfo
.ani_phys_resv
);
1436 mutex_enter(&freemem_lock
);
1437 if (((k_anoninfo
.ani_max
- k_anoninfo
.ani_phys_resv
) +
1438 MAX((spgcnt_t
)(availrmem
- swapfs_minfree
), 0)) < pages
) {
1439 mutex_exit(&freemem_lock
);
1440 mutex_exit(&anoninfo_lock
);
1442 cmn_err(CE_WARN
, "swapdel - too few free pages");
1443 mutex_exit(&swapinfo_lock
);
1446 mutex_exit(&freemem_lock
);
1448 k_anoninfo
.ani_max
-= pages
;
1450 /* If needed, reserve memory swap to replace old device */
1451 if (k_anoninfo
.ani_phys_resv
> k_anoninfo
.ani_max
) {
1452 adjust_swap
= k_anoninfo
.ani_phys_resv
- k_anoninfo
.ani_max
;
1453 k_anoninfo
.ani_phys_resv
-= adjust_swap
;
1454 k_anoninfo
.ani_mem_resv
+= adjust_swap
;
1455 mutex_enter(&freemem_lock
);
1456 availrmem
-= adjust_swap
;
1457 mutex_exit(&freemem_lock
);
1458 ANI_ADD(adjust_swap
);
1460 ASSERT(k_anoninfo
.ani_mem_resv
>= k_anoninfo
.ani_locked_swap
);
1461 ASSERT(k_anoninfo
.ani_max
>= k_anoninfo
.ani_phys_resv
);
1462 mutex_exit(&anoninfo_lock
);
1467 * Set the delete flag. This prevents anyone from allocating more
1468 * pages from this file. Also set ST_DOINGDEL. Someone who wants to
1469 * add the file back while we're deleting it will signify by clearing
1472 osip
->si_flags
|= ST_INDEL
|ST_DOINGDEL
;
1473 mutex_exit(&swapinfo_lock
);
1476 * Free all the allocated physical slots for this file. We do this
1477 * by walking through the entire anon hash array, because we need
1478 * to update all the anon slots that have physical swap slots on
1479 * this file, and this is the only way to find them all. We go back
1480 * to the beginning of a bucket after each slot is freed because the
1481 * anonhash_lock is not held during the free and thus the hash table
1482 * may change under us.
1484 for (app
= anon_hash
; app
< &anon_hash
[ANON_HASH_SIZE
]; app
++) {
1485 ahm
= &anonhash_lock
[(app
- anon_hash
) &
1486 (AH_LOCK_SIZE
- 1)].pad_mutex
;
1489 for (ap
= *app
; ap
!= NULL
; ap
= ap
->an_hash
) {
1490 if (ap
->an_pvp
== cvp
&&
1491 ap
->an_poff
>= osip
->si_soff
&&
1492 ap
->an_poff
< osip
->si_eoff
) {
1493 ASSERT(TESTBIT(osip
->si_swapslots
,
1494 btop((size_t)(ap
->an_poff
-
1501 error
= swapslot_free(tvp
, toff
, osip
);
1505 if (!error
&& (osip
->si_flags
& ST_DOINGDEL
)) {
1510 "swapslot_free failed %d",
1515 * Add device back before making it
1518 mutex_enter(&swapinfo_lock
);
1520 ~(ST_INDEL
| ST_DOINGDEL
);
1521 mutex_exit(&swapinfo_lock
);
1524 * Update the anon space available
1526 mutex_enter(&anoninfo_lock
);
1528 k_anoninfo
.ani_phys_resv
+= adjust_swap
;
1529 k_anoninfo
.ani_mem_resv
-= adjust_swap
;
1530 k_anoninfo
.ani_max
+= pages
;
1532 mutex_enter(&freemem_lock
);
1533 availrmem
+= adjust_swap
;
1534 mutex_exit(&freemem_lock
);
1536 mutex_exit(&anoninfo_lock
);
1548 /* All done, they'd better all be free! */
1549 mutex_enter(&swapinfo_lock
);
1550 ASSERT(osip
->si_nfpgs
== osip
->si_npgs
);
1552 /* Now remove it from the swapinfo list */
1553 for (sipp
= &swapinfo
; *sipp
!= NULL
; sipp
= &(*sipp
)->si_next
) {
1558 *sipp
= osip
->si_next
;
1560 if ((silast
= osip
->si_next
) == NULL
)
1563 mutex_exit(&swapinfo_lock
);
1565 kmem_free(osip
->si_swapslots
, osip
->si_mapsize
);
1566 kmem_free(osip
->si_pname
, osip
->si_pnamelen
);
1567 kmem_free(osip
, sizeof (*osip
));
1569 mutex_enter(&dump_lock
);
1572 mutex_exit(&dump_lock
);
1574 /* Release the vnode */
1576 mutex_enter(&swap_lock
);
1577 (void) VOP_CLOSE(cvp
, FREAD
|FWRITE
, 1, (offset_t
)0, CRED(), NULL
);
1578 mutex_enter(&cvp
->v_lock
);
1579 cvp
->v_flag
&= ~VISSWAP
;
1580 mutex_exit(&cvp
->v_lock
);
1582 mutex_exit(&swap_lock
);
1588 * Free up a physical swap slot on swapinfo sip, currently in use by the
1589 * anonymous page whose name is (vp, off).
1595 struct swapinfo
*sip
)
1597 struct page
*pp
= NULL
;
1598 struct anon
*ap
= NULL
;
1601 struct vnode
*pvp
= NULL
;
1605 ASSERT(sip
->si_vp
!= NULL
);
1607 * Get the page for the old swap slot if exists or create a new one.
1610 if ((pp
= page_lookup(vp
, off
, SE_SHARED
)) == NULL
) {
1611 pp
= page_create_va(vp
, off
, PAGESIZE
, PG_WAIT
| PG_EXCL
,
1617 error
= swap_getphysname(vp
, off
, &pvp
, &poff
);
1618 if (error
|| pvp
!= sip
->si_vp
|| poff
< sip
->si_soff
||
1619 poff
>= sip
->si_eoff
) {
1621 /*LINTED: constant in conditional context*/
1622 VN_DISPOSE(pp
, B_INVAL
, 0, kcred
);
1626 error
= VOP_PAGEIO(pvp
, pp
, poff
, PAGESIZE
, B_READ
,
1630 if (error
== EFAULT
)
1632 /*LINTED: constant in conditional context*/
1633 VN_DISPOSE(pp
, B_INVAL
, 0, kcred
);
1639 * The anon could have been removed by anon_decref* and/or reallocated
1640 * by anon layer (an_pvp == NULL) with the same vp, off.
1641 * In this case the page which has been allocated needs to
1646 ahm
= AH_MUTEX(vp
, off
);
1648 ap
= swap_anon(vp
, off
);
1649 if ((ap
== NULL
|| ap
->an_pvp
== NULL
) && alloc_pg
) {
1652 /*LINTED: constant in conditional context*/
1653 VN_DISPOSE(pp
, B_INVAL
, 0, kcred
);
1658 * Free the physical slot. It may have been freed up and replaced with
1659 * another one while we were getting the page so we have to re-verify
1660 * that this is really one we want. If we do free the slot we have
1661 * to mark the page modified, as its backing store is now gone.
1663 if ((ap
!= NULL
) && (ap
->an_pvp
== sip
->si_vp
&& ap
->an_poff
>=
1664 sip
->si_soff
&& ap
->an_poff
< sip
->si_eoff
)) {
1665 swap_phys_free(ap
->an_pvp
, ap
->an_poff
, PAGESIZE
);
1680 * Get contig physical backing store for vp, in the range
1681 * [*offp, *offp + *lenp), May back a subrange of this, but must
1682 * always include the requested offset or fail. Returns the offsets
1683 * backed as [*offp, *offp + *lenp) and the physical offsets used to
1684 * back them from *pvpp in the range [*pstartp, *pstartp + *lenp).
1685 * Returns 0 for success
1686 * SE_NOANON -- no anon slot for requested paged
1687 * SE_NOSWAP -- no physical swap space available
1695 struct vnode
**pvpp
,
1698 struct anon
*ap
= NULL
; /* anon slot for vp, off */
1701 u_offset_t poff
, pstart
, prem
;
1703 u_offset_t off
, start
;
1706 ASSERT(*offp
<= offset
&& offset
< *offp
+ *lenp
);
1708 /* Get new physical swap slots. */
1710 if (!swap_phys_alloc(&pvp
, &pstart
, &plen
, 0)) {
1712 * No swap available so return error unless requested
1713 * offset is already backed in which case return that.
1715 ahm
= AH_MUTEX(vp
, offset
);
1717 if ((ap
= swap_anon(vp
, offset
)) == NULL
) {
1722 error
= (ap
->an_pvp
? 0 : SE_NOSWAP
);
1726 *poffp
= ap
->an_poff
;
1732 * We got plen (<= *lenp) contig slots. Use these to back a
1733 * subrange of [*offp, *offp + *lenp) which includes offset.
1734 * For now we just put offset at the end of the kluster.
1735 * Clearly there are other possible choices - which is best?
1738 (offset
+ PAGESIZE
> plen
) ? (offset
+ PAGESIZE
- plen
) : 0);
1739 ASSERT(start
+ plen
<= *offp
+ *lenp
);
1741 for (off
= start
, poff
= pstart
; poff
< pstart
+ plen
;
1742 off
+= PAGESIZE
, poff
+= PAGESIZE
) {
1743 ahm
= AH_MUTEX(vp
, off
);
1745 if ((ap
= swap_anon(vp
, off
)) != NULL
) {
1746 /* Free old slot if any, and assign new one */
1748 swap_phys_free(ap
->an_pvp
, ap
->an_poff
,
1752 } else { /* No anon slot for a klustered page, quit. */
1753 prem
= (pstart
+ plen
) - poff
;
1754 /* Already did requested page, do partial kluster */
1756 plen
= poff
- pstart
;
1758 /* Fail on requested page, error */
1759 } else if (off
== offset
) {
1761 /* Fail on prior page, fail on requested page, error */
1762 } else if ((ap
= swap_anon(vp
, offset
)) == NULL
) {
1764 /* Fail on prior page, got requested page, do only it */
1766 /* Free old slot if any, and assign new one */
1768 swap_phys_free(ap
->an_pvp
, ap
->an_poff
,
1772 /* One page kluster */
1779 /* Free unassigned slots */
1780 swap_phys_free(pvp
, poff
, prem
);
1786 ASSERT(*offp
<= start
&& start
+ plen
<= *offp
+ *lenp
);
1787 ASSERT(start
<= offset
&& offset
< start
+ plen
);
1797 * Get the physical swap backing store location for a given anonymous page
1798 * named (vp, off). The backing store name is returned in (*pvpp, *poffp).
1800 * EIDRM -- no anon slot (page is not allocated)
1806 struct vnode
**pvpp
,
1813 ahm
= AH_MUTEX(vp
, off
);
1816 /* Get anon slot for vp, off */
1817 ap
= swap_anon(vp
, off
);
1823 *poffp
= ap
->an_poff
;