HAMMER: MFC to 2.0
[dragonfly.git] / sys / vfs / hammer / hammer_pfs.c
blobe5ba329d778c2301cb90031be3e0eed8c1cfd271
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1.2.4 2008/08/02 21:24:28 dillon Exp $
37 * HAMMER PFS ioctls - Manage pseudo-fs configurations
40 #include "hammer.h"
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
43 hammer_inode_t ip);
44 static int hammer_pfs_rollback(hammer_transaction_t trans,
45 hammer_pseudofs_inmem_t pfsm,
46 hammer_tid_t trunc_tid);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
48 hammer_tid_t trunc_tid);
51 * Get mirroring/pseudo-fs information
53 * NOTE: The ip used for ioctl is not necessarily related to the PFS
55 int
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
57 struct hammer_ioc_pseudofs_rw *pfs)
59 hammer_pseudofs_inmem_t pfsm;
60 u_int32_t localization;
61 int error;
63 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
64 return(error);
65 localization = (u_int32_t)pfs->pfs_id << 16;
66 pfs->bytes = sizeof(struct hammer_pseudofs_data);
67 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
69 pfsm = hammer_load_pseudofs(trans, localization, &error);
70 if (error) {
71 hammer_rel_pseudofs(trans->hmp, pfsm);
72 return(error);
76 * If the PFS is a master the sync tid is set by normal operation
77 * rather then the mirroring code, and will always track the
78 * real HAMMER filesystem.
80 * We use flush_tid1, which is the highest fully committed TID.
81 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
82 * caught up to it yet so a crash will roll us back to flush_tid1.
84 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
85 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
88 * Copy out to userland.
90 error = 0;
91 if (pfs->ondisk && error == 0)
92 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
93 hammer_rel_pseudofs(trans->hmp, pfsm);
94 return(error);
98 * Set mirroring/pseudo-fs information
100 * NOTE: The ip used for ioctl is not necessarily related to the PFS
103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
106 hammer_pseudofs_inmem_t pfsm;
107 u_int32_t localization;
108 int error;
110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
111 return(error);
112 localization = (u_int32_t)pfs->pfs_id << 16;
113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
114 error = EINVAL;
115 localization = (u_int32_t)pfs->pfs_id << 16;
117 if (error == 0 && pfs->ondisk) {
119 * Load the PFS so we can modify our in-core copy. Ignore
120 * ENOENT errors.
122 pfsm = hammer_load_pseudofs(trans, localization, &error);
123 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
126 * Save it back, create a root inode if we are in master
127 * mode and no root exists.
129 if (error == 0)
130 error = hammer_mkroot_pseudofs(trans, cred, pfsm);
131 if (error == 0)
132 error = hammer_save_pseudofs(trans, pfsm);
135 * Wakeup anyone waiting for a TID update for this PFS
137 wakeup(&pfsm->pfsd.sync_end_tid);
138 hammer_rel_pseudofs(trans->hmp, pfsm);
140 return(error);
144 * Upgrade a slave to a master
146 * This is fairly easy to do, but we must physically undo any partial syncs
147 * for transaction ids > sync_end_tid. Effective, we must do a partial
148 * rollback.
150 * NOTE: The ip used for ioctl is not necessarily related to the PFS
153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
154 struct hammer_ioc_pseudofs_rw *pfs)
156 hammer_pseudofs_inmem_t pfsm;
157 u_int32_t localization;
158 int error;
160 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
161 return(error);
162 localization = (u_int32_t)pfs->pfs_id << 16;
163 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
164 return(error);
167 * A master id must be set when upgrading
169 pfsm = hammer_load_pseudofs(trans, localization, &error);
170 if (error == 0) {
171 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
172 error = hammer_pfs_rollback(trans, pfsm,
173 pfsm->pfsd.sync_end_tid + 1);
174 if (error == 0) {
175 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
176 error = hammer_save_pseudofs(trans, pfsm);
180 hammer_rel_pseudofs(trans->hmp, pfsm);
181 if (error == EINTR) {
182 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
183 error = 0;
185 return (error);
189 * Downgrade a master to a slave
191 * This is really easy to do, just set the SLAVE flag.
193 * We also leave sync_end_tid intact... the field is not used in master
194 * mode (vol0_next_tid overrides it), but if someone switches to master
195 * mode accidently and then back to slave mode we don't want it to change.
196 * Eventually it will be used as the cross-synchronization TID in
197 * multi-master mode, and we don't want to mess with it for that feature
198 * either.
200 * NOTE: The ip used for ioctl is not necessarily related to the PFS
203 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
204 struct hammer_ioc_pseudofs_rw *pfs)
206 hammer_pseudofs_inmem_t pfsm;
207 u_int32_t localization;
208 int error;
210 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
211 return(error);
212 localization = (u_int32_t)pfs->pfs_id << 16;
213 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
214 return(error);
216 pfsm = hammer_load_pseudofs(trans, localization, &error);
217 if (error == 0) {
218 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
219 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
220 error = hammer_save_pseudofs(trans, pfsm);
223 hammer_rel_pseudofs(trans->hmp, pfsm);
224 return (error);
228 * Destroy a PFS
230 * We can destroy a PFS by scanning and deleting all of its records in the
231 * B-Tree. The hammer utility will delete the softlink in the primary
232 * filesystem.
234 * NOTE: The ip used for ioctl is not necessarily related to the PFS
237 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
238 struct hammer_ioc_pseudofs_rw *pfs)
240 hammer_pseudofs_inmem_t pfsm;
241 u_int32_t localization;
242 int error;
244 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
245 return(error);
246 localization = (u_int32_t)pfs->pfs_id << 16;
248 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
249 return(error);
251 pfsm = hammer_load_pseudofs(trans, localization, &error);
252 if (error == 0) {
253 error = hammer_pfs_rollback(trans, pfsm, 0);
254 if (error == 0) {
255 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
256 error = hammer_save_pseudofs(trans, pfsm);
259 hammer_rel_pseudofs(trans->hmp, pfsm);
260 if (error == EINTR) {
261 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
262 error = 0;
264 return(error);
268 * Wait for the PFS to sync past the specified TID
271 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
272 struct hammer_ioc_pseudofs_rw *pfs)
274 hammer_pseudofs_inmem_t pfsm;
275 struct hammer_pseudofs_data pfsd;
276 u_int32_t localization;
277 hammer_tid_t tid;
278 void *waitp;
279 int error;
281 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
282 return(error);
283 localization = (u_int32_t)pfs->pfs_id << 16;
285 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
286 return(error);
288 pfsm = hammer_load_pseudofs(trans, localization, &error);
289 if (error == 0) {
290 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
291 tid = pfsm->pfsd.sync_end_tid;
292 waitp = &pfsm->pfsd.sync_end_tid;
293 } else {
294 tid = trans->hmp->flush_tid1;
295 waitp = &trans->hmp->flush_tid1;
297 if (tid <= pfsd.sync_end_tid)
298 tsleep(waitp, PCATCH, "hmrmwt", 0);
300 hammer_rel_pseudofs(trans->hmp, pfsm);
301 if (error == EINTR) {
302 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
303 error = 0;
305 return(error);
310 * Auto-detect the pseudofs and do basic bounds checking.
312 static
314 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
316 int error = 0;
318 if (pfs->pfs_id == -1)
319 pfs->pfs_id = (int)(ip->obj_localization >> 16);
320 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
321 error = EINVAL;
322 if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
323 error = EINVAL;
324 return(error);
328 * Rollback the specified PFS to (trunc_tid - 1), removing everything
329 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
330 * mode or the MIRROR_FILTERED scan will not work properly.
332 * This is typically used to remove any partial syncs when upgrading a
333 * slave to a master. It can theoretically also be used to rollback
334 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
335 * PRUNED, and to points that are older only if they are on a retained
336 * (pruning softlink) boundary.
338 * Rollbacks destroy information. If you don't mind inode numbers changing
339 * a better way would be to cpdup a snapshot back onto the master.
341 static
343 hammer_pfs_rollback(hammer_transaction_t trans,
344 hammer_pseudofs_inmem_t pfsm,
345 hammer_tid_t trunc_tid)
347 struct hammer_cmirror cmirror;
348 struct hammer_cursor cursor;
349 struct hammer_base_elm key_cur;
350 int error;
351 int seq;
353 bzero(&cmirror, sizeof(cmirror));
354 bzero(&key_cur, sizeof(key_cur));
355 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
356 key_cur.obj_id = HAMMER_MIN_OBJID;
357 key_cur.key = HAMMER_MIN_KEY;
358 key_cur.create_tid = 1;
359 key_cur.rec_type = HAMMER_MIN_RECTYPE;
361 seq = trans->hmp->flusher.act;
363 retry:
364 error = hammer_init_cursor(trans, &cursor, NULL, NULL);
365 if (error) {
366 hammer_done_cursor(&cursor);
367 goto failed;
369 cursor.key_beg = key_cur;
370 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
371 pfsm->localization;
372 cursor.key_end.obj_id = HAMMER_MAX_OBJID;
373 cursor.key_end.key = HAMMER_MAX_KEY;
374 cursor.key_end.create_tid = HAMMER_MAX_TID;
375 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
377 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
378 cursor.flags |= HAMMER_CURSOR_BACKEND;
381 * Do an optimized scan of only records created or modified
382 * >= trunc_tid, so we can fix up those records. We must
383 * still check the TIDs but this greatly reduces the size of
384 * the scan.
386 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
387 cursor.cmirror = &cmirror;
388 cmirror.mirror_tid = trunc_tid;
390 error = hammer_btree_first(&cursor);
391 while (error == 0) {
393 * Abort the rollback.
395 if (error == 0) {
396 error = hammer_signal_check(trans->hmp);
397 if (error)
398 break;
402 * We only care about leafs. Internal nodes can be returned
403 * in mirror-filtered mode (they are used to generate SKIP
404 * mrecords), but we don't need them for this code.
406 cursor.flags |= HAMMER_CURSOR_ATEDISK;
407 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
408 key_cur = cursor.node->ondisk->elms[cursor.index].base;
409 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
412 while (hammer_flusher_meta_halflimit(trans->hmp) ||
413 hammer_flusher_undo_exhausted(trans, 2)) {
414 hammer_unlock_cursor(&cursor, 0);
415 hammer_flusher_wait(trans->hmp, seq);
416 hammer_lock_cursor(&cursor, 0);
417 seq = hammer_flusher_async_one(trans->hmp);
420 if (error == 0)
421 error = hammer_btree_iterate(&cursor);
423 if (error == ENOENT)
424 error = 0;
425 hammer_done_cursor(&cursor);
426 if (error == EDEADLK)
427 goto retry;
428 failed:
429 return(error);
433 * Helper function - perform rollback on a B-Tree element given trunc_tid.
435 * If create_tid >= trunc_tid the record is physically destroyed.
436 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
438 static
440 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
442 hammer_btree_leaf_elm_t elm;
443 hammer_transaction_t trans;
444 int error;
446 elm = &cursor->node->ondisk->elms[cursor->index].leaf;
447 if (elm->base.create_tid < trunc_tid &&
448 elm->base.delete_tid < trunc_tid) {
449 return(0);
451 trans = cursor->trans;
453 if (elm->base.create_tid >= trunc_tid) {
454 error = hammer_delete_at_cursor(
455 cursor, HAMMER_DELETE_DESTROY,
456 cursor->trans->tid, cursor->trans->time32,
457 1, NULL);
458 } else if (elm->base.delete_tid >= trunc_tid) {
459 error = hammer_delete_at_cursor(
460 cursor, HAMMER_DELETE_ADJUST,
461 0, 0,
462 1, NULL);
463 } else {
464 error = 0;
466 return(error);