autofs: disable by default
[unleashed.git] / include / vm / anon.h
blob7ad2d6fb5b29b1fe0bc2c4862e9863be6ec4ee2b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
39 #ifndef _VM_ANON_H
40 #define _VM_ANON_H
42 #include <sys/cred.h>
43 #include <sys/zone.h>
44 #include <vm/seg.h>
45 #include <vm/vpage.h>
47 #ifdef __cplusplus
48 extern "C" {
49 #endif
52 * VM - Anonymous pages.
55 typedef unsigned long anoff_t; /* anon offsets */
58 * Each anonymous page, either in memory or in swap, has an anon structure.
59 * The structure (slot) provides a level of indirection between anonymous pages
60 * and their backing store.
62 * (an_vp, an_off) names the vnode of the anonymous page for this slot.
64 * (an_pvp, an_poff) names the location of the physical backing store
65 * for the page this slot represents. If the name is null there is no
66 * associated physical store. The physical backing store location can
67 * change while the slot is in use.
69 * an_hash is a hash list of anon slots. The list is hashed by
70 * (an_vp, an_off) of the associated anonymous page and provides a
71 * method of going from the name of an anonymous page to its
72 * associated anon slot.
74 * an_refcnt holds a reference count which is the number of separate
75 * copies that will need to be created in case of copy-on-write.
76 * A refcnt > 0 protects the existence of the slot. The refcnt is
77 * initialized to 1 when the anon slot is created in anon_alloc().
78 * If a client obtains an anon slot and allows multiple threads to
79 * share it, then it is the client's responsibility to insure that
80 * it does not allow one thread to try to reference the slot at the
81 * same time as another is trying to decrement the last count and
82 * destroy the anon slot. E.g., the seg_vn segment type protects
83 * against this with higher level locks.
86 struct anon {
87 struct vnode *an_vp; /* vnode of anon page */
88 struct vnode *an_pvp; /* vnode of physical backing store */
89 anoff_t an_off; /* offset of anon page */
90 anoff_t an_poff; /* offset in vnode */
91 struct anon *an_hash; /* hash table of anon slots */
92 int an_refcnt; /* # of people sharing slot */
95 #define AN_CACHE_ALIGN_LOG2 4 /* log2(AN_CACHE_ALIGN) */
96 #define AN_CACHE_ALIGN (1U << AN_CACHE_ALIGN_LOG2) /* anon address aligned */
97 /* 16 bytes */
100 #ifdef _KERNEL
102 * The swapinfo_lock protects:
103 * swapinfo list
104 * individual swapinfo structures
106 * The anoninfo_lock protects:
107 * anoninfo counters
109 * The anonhash_lock protects:
110 * anon hash lists
111 * anon slot fields
113 * Fields in the anon slot which are read-only for the life of the slot
114 * (an_vp, an_off) do not require the anonhash_lock be held to access them.
115 * If you access a field without the anonhash_lock held you must be holding
116 * the slot with an_refcnt to make sure it isn't destroyed.
117 * To write (an_pvp, an_poff) in a given slot you must also hold the
118 * p_iolock of the anonymous page for slot.
120 extern kmutex_t anoninfo_lock;
121 extern kmutex_t swapinfo_lock;
122 extern pad_mutex_t *anonhash_lock;
123 extern pad_mutex_t anon_array_lock[];
124 extern kcondvar_t anon_array_cv[];
127 * Global hash table to provide a function from (vp, off) -> ap
129 extern size_t anon_hash_size;
130 extern unsigned int anon_hash_shift;
131 extern struct anon **anon_hash;
132 #define ANON_HASH_SIZE anon_hash_size
133 #define ANON_HASHAVELEN 4
135 * Try to use as many bits of randomness from both vp and off as we can.
136 * This should help spreading evenly for a variety of workloads. See comments
137 * for PAGE_HASH_FUNC for more explanation.
139 #define ANON_HASH(vp, off) \
140 (((((uintptr_t)(off) >> PAGESHIFT) ^ \
141 ((uintptr_t)(off) >> (PAGESHIFT + anon_hash_shift))) ^ \
142 (((uintptr_t)(vp) >> 3) ^ \
143 ((uintptr_t)(vp) >> (3 + anon_hash_shift)) ^ \
144 ((uintptr_t)(vp) >> (3 + 2 * anon_hash_shift)) ^ \
145 ((uintptr_t)(vp) << \
146 (anon_hash_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \
147 (anon_hash_size - 1))
149 #define AH_LOCK_SIZE (2 << NCPU_LOG2)
151 #define AH_MUTEX(vp, off) \
152 (&anonhash_lock[(ANON_HASH((vp), (off)) & \
153 (AH_LOCK_SIZE - 1))].pad_mutex)
155 #endif /* _KERNEL */
158 * Declaration for the Global counters to accurately
159 * track the kernel foot print in memory.
161 extern pgcnt_t pages_locked;
162 extern pgcnt_t pages_claimed;
163 extern pgcnt_t pages_useclaim;
164 extern pgcnt_t obp_pages;
167 * Anonymous backing store accounting structure for swapctl.
169 * ani_max = maximum amount of swap space
170 * (including potentially available physical memory)
171 * ani_free = amount of unallocated anonymous memory
172 * (some of which might be reserved and including
173 * potentially available physical memory)
174 * ani_resv = amount of claimed (reserved) anonymous memory
176 * The swap data can be aquired more efficiently through the
177 * kstats interface.
178 * Total slots currently available for reservation =
179 * MAX(ani_max - ani_resv, 0) + (availrmem - swapfs_minfree)
181 struct anoninfo {
182 pgcnt_t ani_max;
183 pgcnt_t ani_free;
184 pgcnt_t ani_resv;
187 #ifdef _SYSCALL32
188 struct anoninfo32 {
189 size32_t ani_max;
190 size32_t ani_free;
191 size32_t ani_resv;
193 #endif /* _SYSCALL32 */
196 * Define the NCPU pool of the ani_free counters. Update the counter
197 * of the cpu on which the thread is running and in every clock intr
198 * sync anoninfo.ani_free with the current total off all the NCPU entries.
201 typedef struct ani_free {
202 pgcnt_t ani_count;
203 uchar_t pad[64 - sizeof (pgcnt_t)];
204 /* XXX 64 = cacheline size */
205 } ani_free_t;
207 #define ANI_MAX_POOL (NCPU_P2)
208 extern ani_free_t *ani_free_pool;
211 * Since each CPU has its own bucket in ani_free_pool, there should be no
212 * contention here.
214 #define ANI_ADD(inc) { \
215 pgcnt_t *ani_countp; \
216 int index; \
217 index = (CPU->cpu_seqid & (ANI_MAX_POOL - 1)); \
218 ani_countp = &ani_free_pool[index].ani_count; \
219 atomic_add_long(ani_countp, inc); \
222 extern void set_anoninfo(void);
225 * Anon array pointers are allocated in chunks. Each chunk
226 * has PAGESIZE/sizeof(u_long *) of anon pointers.
227 * There are two levels of arrays for anon array pointers larger
228 * than a chunk. The first level points to anon array chunks.
229 * The second level consists of chunks of anon pointers.
231 * If anon array is smaller than a chunk then the whole anon array
232 * is created (memory is allocated for whole anon array).
233 * If anon array is larger than a chunk only first level array is
234 * allocated. Then other arrays (chunks) are allocated only when
235 * they are initialized with anon pointers.
237 struct anon_hdr {
238 kmutex_t serial_lock; /* serialize array chunk allocation */
239 pgcnt_t size; /* number of pointers to (anon) pages */
240 void **array_chunk; /* pointers to anon pointers or chunks of */
241 /* anon pointers */
242 int flags; /* ANON_ALLOC_FORCE force preallocation of */
243 /* whole anon array */
246 #ifdef _LP64
247 #define ANON_PTRSHIFT 3
248 #define ANON_PTRMASK ~7
249 #else
250 #define ANON_PTRSHIFT 2
251 #define ANON_PTRMASK ~3
252 #endif
254 #define ANON_CHUNK_SIZE (PAGESIZE >> ANON_PTRSHIFT)
255 #define ANON_CHUNK_SHIFT (PAGESHIFT - ANON_PTRSHIFT)
256 #define ANON_CHUNK_OFF (ANON_CHUNK_SIZE - 1)
259 * Anon flags.
261 #define ANON_SLEEP 0x0 /* ok to block */
262 #define ANON_NOSLEEP 0x1 /* non-blocking call */
263 #define ANON_ALLOC_FORCE 0x2 /* force single level anon array */
264 #define ANON_GROWDOWN 0x4 /* anon array should grow downward */
266 struct kshmid;
269 * The anon_map structure is used by various clients of the anon layer to
270 * manage anonymous memory. When anonymous memory is shared,
271 * then the different clients sharing it will point to the
272 * same anon_map structure. Also, if a segment is unmapped
273 * in the middle where an anon_map structure exists, the
274 * newly created segment will also share the anon_map structure,
275 * although the two segments will use different ranges of the
276 * anon array. When mappings are private (or shared with
277 * a reference count of 1), an unmap operation will free up
278 * a range of anon slots in the array given by the anon_map
279 * structure. Because of fragmentation due to this unmapping,
280 * we have to store the size of the anon array in the anon_map
281 * structure so that we can free everything when the referernce
282 * count goes to zero.
284 * A new rangelock scheme is introduced to make the anon layer scale.
285 * A reader/writer lock per anon_amp and an array of system-wide hash
286 * locks, anon_array_lock[] are introduced to replace serial_lock and
287 * anonmap lock. The writer lock is held when we want to singlethreaD
288 * the reference to the anon array pointers or when references to
289 * anon_map's members, whereas reader lock and anon_array_lock are
290 * held to allows multiple threads to reference different part of
291 * anon array. A global set of condition variables, anon_array_cv,
292 * are used with anon_array_lock[] to make the hold time of the locks
293 * short.
295 * szc is used to calculate the index of hash locks and cv's. We
296 * could've just used seg->s_szc if not for the possible sharing of
297 * anon_amp between SYSV shared memory and ISM, so now we introduce
298 * szc in the anon_map structure. For MAP_SHARED, the amp->szc is either
299 * 0 (base page size) or page_num_pagesizes() - 1, while MAP_PRIVATE
300 * the amp->szc could be anything in [0, page_num_pagesizes() - 1].
302 typedef struct anon_map {
303 krwlock_t a_rwlock; /* protect anon_map and anon array */
304 size_t size; /* size in bytes mapped by the anon array */
305 struct anon_hdr *ahp; /* anon array header pointer, containing */
306 /* anon pointer array(s) */
307 size_t swresv; /* swap space reserved for this anon_map */
308 ulong_t refcnt; /* reference count on this structure */
309 ushort_t a_szc; /* max szc among shared processes */
310 void *locality; /* lgroup locality info */
311 struct kshmid *a_sp; /* kshmid if amp backs sysV, or NULL */
312 int a_purgewait; /* somebody waits for slocks to go away */
313 kcondvar_t a_purgecv; /* cv for waiting for slocks to go away */
314 kmutex_t a_purgemtx; /* mutex for anonmap_purge() */
315 spgcnt_t a_softlockcnt; /* number of pages locked in pcache */
316 kmutex_t a_pmtx; /* protects amp's pcache list */
317 pcache_link_t a_phead; /* head of amp's pcache list */
318 } amp_t;
320 #ifdef _KERNEL
322 #define ANON_BUSY 0x1
323 #define ANON_ISBUSY(slot) (*(slot) & ANON_BUSY)
324 #define ANON_SETBUSY(slot) (*(slot) |= ANON_BUSY)
325 #define ANON_CLRBUSY(slot) (*(slot) &= ~ANON_BUSY)
327 #define ANON_MAP_SHIFT 6 /* log2(sizeof (struct anon_map)) */
328 #define ANON_ARRAY_SHIFT 7 /* log2(ANON_LOCKSIZE) */
329 #define ANON_LOCKSIZE 128
331 #define ANON_LOCK_ENTER(lock, type) rw_enter((lock), (type))
332 #define ANON_LOCK_EXIT(lock) rw_exit((lock))
333 #define ANON_LOCK_HELD(lock) RW_LOCK_HELD((lock))
334 #define ANON_READ_HELD(lock) RW_READ_HELD((lock))
335 #define ANON_WRITE_HELD(lock) RW_WRITE_HELD((lock))
337 #define ANON_ARRAY_HASH(amp, idx)\
338 ((((idx) + ((idx) >> ANON_ARRAY_SHIFT) +\
339 ((idx) >> (ANON_ARRAY_SHIFT << 1)) +\
340 ((idx) >> (ANON_ARRAY_SHIFT + (ANON_ARRAY_SHIFT << 1)))) ^\
341 ((uintptr_t)(amp) >> ANON_MAP_SHIFT)) & (ANON_LOCKSIZE - 1))
343 typedef struct anon_sync_obj {
344 kmutex_t *sync_mutex;
345 kcondvar_t *sync_cv;
346 ulong_t *sync_data;
347 } anon_sync_obj_t;
350 * Anonymous backing store accounting structure for kernel.
351 * ani_max = total reservable slots on physical (disk-backed) swap
352 * ani_phys_resv = total phys slots reserved for use by clients
353 * ani_mem_resv = total mem slots reserved for use by clients
354 * ani_free = # unallocated physical slots + # of reserved unallocated
355 * memory slots
359 * Initial total swap slots available for reservation
361 #define TOTAL_AVAILABLE_SWAP \
362 (k_anoninfo.ani_max + MAX((spgcnt_t)(availrmem - swapfs_minfree), 0))
365 * Swap slots currently available for reservation
367 #define CURRENT_TOTAL_AVAILABLE_SWAP \
368 ((k_anoninfo.ani_max - k_anoninfo.ani_phys_resv) + \
369 MAX((spgcnt_t)(availrmem - swapfs_minfree), 0))
371 struct k_anoninfo {
372 pgcnt_t ani_max; /* total reservable slots on phys */
373 /* (disk) swap */
374 pgcnt_t ani_free; /* # of unallocated phys and mem slots */
375 pgcnt_t ani_phys_resv; /* # of reserved phys (disk) slots */
376 pgcnt_t ani_mem_resv; /* # of reserved mem slots */
377 pgcnt_t ani_locked_swap; /* # of swap slots locked in reserved */
378 /* mem swap */
381 extern struct k_anoninfo k_anoninfo;
383 extern void anon_init(void);
384 extern struct anon *anon_alloc(struct vnode *, anoff_t);
385 extern void anon_dup(struct anon_hdr *, ulong_t,
386 struct anon_hdr *, ulong_t, size_t);
387 extern void anon_dup_fill_holes(struct anon_hdr *, ulong_t,
388 struct anon_hdr *, ulong_t, size_t, uint_t, int);
389 extern int anon_fill_cow_holes(struct seg *, caddr_t, struct anon_hdr *,
390 ulong_t, struct vnode *, uoff_t, size_t, uint_t,
391 uint_t, struct vpage [], struct cred *);
392 extern void anon_free(struct anon_hdr *, ulong_t, size_t);
393 extern void anon_free_pages(struct anon_hdr *, ulong_t, size_t, uint_t);
394 extern int anon_disclaim(struct anon_map *,
395 ulong_t, size_t, uint_t, pgcnt_t *);
396 extern int anon_getpage(struct anon **, uint_t *, struct page **,
397 size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
398 extern int swap_getconpage(struct vnode *, uoff_t, size_t,
399 uint_t *, page_t *[], size_t, page_t *, uint_t *,
400 spgcnt_t *, struct seg *, caddr_t,
401 enum seg_rw, struct cred *);
402 extern int anon_map_getpages(struct anon_map *, ulong_t,
403 uint_t, struct seg *, caddr_t, uint_t,
404 uint_t *, page_t *[], uint_t *,
405 struct vpage [], enum seg_rw, int, int, int, struct cred *);
406 extern int anon_map_privatepages(struct anon_map *, ulong_t,
407 uint_t, struct seg *, caddr_t, uint_t,
408 page_t *[], struct vpage [], int, int, struct cred *);
409 extern struct page *anon_private(struct anon **, struct seg *,
410 caddr_t, uint_t, struct page *,
411 int, struct cred *);
412 extern struct page *anon_zero(struct seg *, caddr_t,
413 struct anon **, struct cred *);
414 extern int anon_map_createpages(struct anon_map *, ulong_t,
415 size_t, struct page **,
416 struct seg *, caddr_t,
417 enum seg_rw, struct cred *);
418 extern int anon_map_demotepages(struct anon_map *, ulong_t,
419 struct seg *, caddr_t, uint_t,
420 struct vpage [], struct cred *);
421 extern void anon_shmap_free_pages(struct anon_map *, ulong_t, size_t);
422 extern int anon_resvmem(size_t, boolean_t, zone_t *, int);
423 extern void anon_unresvmem(size_t, zone_t *);
424 extern struct anon_map *anonmap_alloc(size_t, size_t, int);
425 extern void anonmap_free(struct anon_map *);
426 extern void anonmap_purge(struct anon_map *);
427 extern void anon_swap_free(struct anon *, struct page *);
428 extern void anon_decref(struct anon *);
429 extern int non_anon(struct anon_hdr *, ulong_t, uoff_t *, size_t *);
430 extern pgcnt_t anon_pages(struct anon_hdr *, ulong_t, pgcnt_t);
431 extern int anon_swap_adjust(pgcnt_t);
432 extern void anon_swap_restore(pgcnt_t);
433 extern struct anon_hdr *anon_create(pgcnt_t, int);
434 extern void anon_release(struct anon_hdr *, pgcnt_t);
435 extern struct anon *anon_get_ptr(struct anon_hdr *, ulong_t);
436 extern ulong_t *anon_get_slot(struct anon_hdr *, ulong_t);
437 extern struct anon *anon_get_next_ptr(struct anon_hdr *, ulong_t *);
438 extern int anon_set_ptr(struct anon_hdr *, ulong_t, struct anon *, int);
439 extern int anon_copy_ptr(struct anon_hdr *, ulong_t,
440 struct anon_hdr *, ulong_t, pgcnt_t, int);
441 extern pgcnt_t anon_grow(struct anon_hdr *, ulong_t *, pgcnt_t, pgcnt_t, int);
442 extern void anon_array_enter(struct anon_map *, ulong_t,
443 anon_sync_obj_t *);
444 extern void anon_array_exit(anon_sync_obj_t *);
447 * anon_resv checks to see if there is enough swap space to fulfill a
448 * request and if so, reserves the appropriate anonymous memory resources.
449 * anon_checkspace just checks to see if there is space to fulfill the request,
450 * without taking any resources. Both return 1 if successful and 0 if not.
452 * Macros are provided as anon reservation is usually charged to the zone of
453 * the current process. In some cases (such as anon reserved by tmpfs), a
454 * zone pointer is needed to charge the appropriate zone.
456 #define anon_unresv(size) anon_unresvmem(size, curproc->p_zone)
457 #define anon_unresv_zone(size, zone) anon_unresvmem(size, zone)
458 #define anon_resv(size) \
459 anon_resvmem((size), 1, curproc->p_zone, 1)
460 #define anon_resv_zone(size, zone) anon_resvmem((size), 1, zone, 1)
461 #define anon_checkspace(size, zone) anon_resvmem((size), 0, zone, 0)
462 #define anon_try_resv_zone(size, zone) anon_resvmem((size), 1, zone, 0)
465 * Flags to anon_private
467 #define STEAL_PAGE 0x1 /* page can be stolen */
468 #define LOCK_PAGE 0x2 /* page must be ``logically'' locked */
471 * SEGKP ANON pages that are locked are assumed to be LWP stack pages
472 * and thus count towards the user pages locked count.
473 * This value is protected by the same lock as availrmem.
475 extern pgcnt_t anon_segkp_pages_locked;
477 extern int anon_debug;
479 #ifdef ANON_DEBUG
481 #define A_ANON 0x01
482 #define A_RESV 0x02
483 #define A_MRESV 0x04
485 /* vararg-like debugging macro. */
486 #define ANON_PRINT(f, printf_args) \
487 if (anon_debug & f) \
488 printf printf_args
490 #else /* ANON_DEBUG */
492 #define ANON_PRINT(f, printf_args)
494 #endif /* ANON_DEBUG */
496 #endif /* _KERNEL */
498 #ifdef __cplusplus
500 #endif
502 #endif /* _VM_ANON_H */