linprocfs - Introduce /proc/mounts
[dragonfly.git] / sys / sys / ccms.h
blobe2c0a6c3b0a8aadd1cd78c68445fca081145c23b
1 /*
2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/sys/ccms.h,v 1.1 2006/08/23 06:45:40 dillon Exp $
37 * CCMS - Cache Coherency Management System. These structures are used
38 * to manage cache coherency and locking for an object. Cache Coherency is
39 * managed at byte granularity with 64 bit offset ranges.
41 * Management is broken into two distinct pieces: (1) Local shared/exclusive
42 * locks which essentially replace traditional vnode locks and (2) local
43 * cache state which interacts with other hosts and follows a MESI-like model.
45 * The core to the entire module is the 'CST' (Cache State Tree) structure
46 * which stores both pieces of information in a red-black tree styled data
47 * structure. CSTs are non-overlapping offset-ranged entities. Other
48 * higher level structures govern how CSTs in the red-black tree or cut up
49 * or merged.
52 #ifndef _SYS_CCMS_H_
53 #define _SYS_CCMS_H_
55 #ifndef _SYS_TYPES_H_
56 #include <sys/types.h>
57 #endif
58 #ifndef _SYS_PARAM_H_
59 #include <sys/param.h>
60 #endif
61 #ifndef _SYS_SERIALIZE_H_
62 #include <sys/serialize.h>
63 #endif
64 #ifndef _SYS_SPINLOCK_H_
65 #include <sys/spinlock.h>
66 #endif
67 #ifndef _SYS_TREE_H_
68 #include <sys/tree.h>
69 #endif
72 * CCMS uses a red-black tree to sort CSTs.
74 RB_HEAD(ccms_rb_tree, ccms_cst);
75 RB_PROTOTYPE3(ccms_rb_tree, ccms_cst, rbnode, ccms_cst_cmp, off_t);
77 struct ccms_lock;
78 struct ccms_cst;
81 * ccms_state_t - CCMS cache states
83 * CCMS uses an extended MESI caching model. There are two extension
84 * states, MASTER and SLAVE, which represents dirty data which has not been
85 * synchronized to backing store but which nevertheless is being shared
86 * between distinct caches. These states are designed to allow data
87 * to be shared between nodes in a cluster without having to wait for it
88 * to be synchronized with its backing store.
90 * SLAVE - A shared state where the master copy of the data is being
91 * held by a foreign cache rather then by backing store.
92 * This state implies that the backing store may contain stale
93 * data.
95 * MASTER - A shared state where the master copy of the data is being
96 * held locally. Zero or more foreign caches may be holding
97 * a copy of our data, so we cannot modify it without
98 * invalidating those caches. This state implies that the
99 * backing store may contain stale data.
101 * MASTER differs from MODIFIED in that the data is read-only
102 * due to the existance of foreign copies. However, even though
103 * the data is read-only, it is ALSO DIRTY because the backing
104 * store has not been synchronized
106 * NOTE! The cache state represents the worst case cache state for caching
107 * elements such as the buffer cache or VM page cache or the vnode attribute
108 * cache (or other things) within the specified range. It does NOT mean
109 * that the local machine actually has all of the requested data in-hand.
111 typedef enum ccms_state {
112 CCMS_STATE_INVALID = 0,
113 CCMS_STATE_SHARED, /* clean, read-only, from backing store */
114 CCMS_STATE_SLAVE, /* clean, read-only, from master */
115 CCMS_STATE_MASTER, /* dirty, read-only, shared master copy */
116 CCMS_STATE_EXCLUSIVE, /* clean, read-only, exclusive */
117 CCMS_STATE_MODIFIED /* clean or dirty, read-write, exclusive */
118 } ccms_state_t;
121 * ccms_ltype_t - local access control lock state
123 * Note: A MODIFYING lock is an exclusive lock where the caller intends to
124 * make a modification, such as issuing a WRITE. The difference between the
125 * two is in how the cache state is effected by the lock. The distinction
126 * exists because there are many situations where the governing structure
127 * on the local machine needs to be locked exclusively, but the underlying
128 * data cache does not.
130 * lock type cache state
131 * --------- ---------
132 * SHARED >= shared
133 * EXCLUSIVE >= shared
134 * MODIFYING >= exclusive
136 typedef enum ccms_ltype {
137 CCMS_LTYPE_SHARED = 0, /* shared lock on the range */
138 CCMS_LTYPE_EXCLUSIVE, /* exclusive lock on the range */
139 CCMS_LTYPE_MODIFYING /* modifying lock on the range */
140 } ccms_ltype_t;
143 * The CCMS ABI information structure. This structure contains ABI
144 * calls to resolve incompatible cache states.
146 struct ccms_info {
147 int (*ccms_set_cache)(struct ccms_info *, struct ccms_lock *, ccms_state_t);
148 void *data;
149 /* XXX */
153 * A CCMS dataspace, typically stored in a vnode or VM object. The primary
154 * reference is to the ccms_dataspace representing the local machine. The
155 * chain field is used to link ccms_dataspace's representing other machines.
156 * These foreign representations typically only contain summary 'worst-case'
157 * CSTs. The chain only needs to be followed if a CST has a cache state
158 * that is incompatible with the request.
160 struct ccms_dataspace {
161 struct ccms_rb_tree tree;
162 struct ccms_info *info;
163 struct ccms_dataspace *chain;
164 ccms_state_t defstate;
165 struct spinlock spin;
166 struct ccms_cst *delayed_free; /* delayed frees */
170 * The CCMS locking element - represents a high level locking request,
171 * such as used by read, write, and truncate operations. These requests
172 * are not organized into any tree but instead are shadowed by items in
173 * the actual cache state tree (ccms_cst). There are no direct links
174 * between a ccms_lock and the underlying CST items, only reference count
175 * fields in the CST item.
177 * When a CCMS lock is established the cache state of the underlying elements
178 * is adjusted to meet the requirements of the lock. The cache state
179 * requirements are infered by the lock type:
181 * NOTE: Ranges may include negative offsets. These are typically used to
182 * represent meta-data.
184 * local lock cache state
185 * ----------------- --------------------
186 * SHARED - SHARED must not be invalid
187 * EXCLUSIVE - EXCLUSIVE must not be invalid
188 * MODIFYING - EXCLUSIVE must be EXCLUSIVE or MODIFIED
190 struct ccms_lock {
191 struct ccms_dataspace *ds;
192 off_t beg_offset;
193 off_t end_offset;
194 ccms_ltype_t ltype;
198 * CCMS cache state tree element (CST) - represents the actual cache
199 * management state for a data space. The cache state tree is a
200 * non-overlaping red-black tree containing ranged ccms_cst structures
201 * which reflect the resolved state for all current high level locking
202 * requests. For example, two overlapping ccms_lock requests for shared
203 * access would typically be represented by three non-overlapping ccms_cst
204 * items in the CST. The CST item representing the overlapped portion of
205 * the ccms_lock requests would have ref count of 2 while the other CST
206 * items would have a ref count of 1.
208 * [lock request #01]
209 * [lock request #02]
210 * [--cst--][--cst--][--cst--]
212 * CSTs are partitioned so their edges line up to all current and pending
213 * ccms_lock requests. CSTs are re-merged whenever possible. A freshly
214 * initialized database typically has a single CST representing the default
215 * cache state for the host.
217 * A CST represents *TWO* different things. First, it represents local
218 * locks held on data ranges. Second, it represents the best-case cache
219 * state for data cached on the local machine for local<->remote host
220 * interactions.
222 * Any arbitrary data range within a dataspace can be locked shared or
223 * exclusive. Obtaining a lock has the side effect of potentially modifying
224 * the cache state. A positive sharecount in a CST indicates that a
225 * shared access lock is being held. A negative sharecount indicates an
226 * exclusive access lock is being held on the range. A MODIFYING lock
227 * type is just an exclusive lock but one which effects the cache state
228 * differently.
230 * The end offset is byte-inclusive, allowing the entire 64 bit data space
231 * to be represented without overflowing the edge case. For example, a
232 * 64 byte area might be represented as (0,63). The offsets are SIGNED
233 * entities. Negative offsets are often used to represent meta-data
234 * such as ownership and permissions. The file size is typically cached as a
235 * side effect of file operations occuring at the file EOF rather then
236 * combined with ownership and permissions.
238 struct ccms_cst {
239 RB_ENTRY(ccms_cst) rbnode; /* stored in a red-black tree */
240 struct ccms_cst *delayed_next; /* linked list to free */
241 off_t beg_offset;
242 off_t end_offset;
243 ccms_state_t state; /* local cache state */
244 int sharecount; /* shared or exclusive lock count */
245 int modifycount; /* number of modifying exclusive lks */
246 int blocked; /* indicates a blocked lock request */
247 int xrefs; /* lock overlap references */
248 int lrefs; /* left edge refs */
249 int rrefs; /* right edge refs */
252 typedef struct ccms_info *ccms_info_t;
253 typedef struct ccms_dataspace *ccms_dataspace_t;
254 typedef struct ccms_lock *ccms_lock_t;
255 typedef struct ccms_cst *ccms_cst_t;
258 * Kernel API
260 #ifdef _KERNEL
262 static __inline
263 void
264 ccms_lock_init(ccms_lock_t lock, off_t beg_offset, off_t end_offset,
265 ccms_ltype_t ltype)
267 lock->beg_offset = beg_offset;
268 lock->end_offset = end_offset;
269 lock->ltype = ltype;
272 void ccms_dataspace_init(ccms_dataspace_t);
273 void ccms_dataspace_destroy(ccms_dataspace_t);
274 int ccms_lock_get(ccms_dataspace_t, ccms_lock_t);
275 int ccms_lock_get_uio(ccms_dataspace_t, ccms_lock_t, struct uio *);
276 int ccms_lock_put(ccms_dataspace_t, ccms_lock_t);
278 #endif
280 #endif