Pre-2.0 release: Sync with HAMMER 64 - simplify PFS operations, fix pfs-upgrade
[dragonfly.git] / sys / vfs / hammer / hammer_pfs.c
blob920c507d67e12a746cdfe095bd36c8e6b708a7a1
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1.2.3 2008/07/19 18:46:20 dillon Exp $
37 * HAMMER PFS ioctls - Manage pseudo-fs configurations
40 #include "hammer.h"
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
43 hammer_inode_t ip);
44 static int hammer_pfs_rollback(hammer_transaction_t trans,
45 hammer_pseudofs_inmem_t pfsm,
46 hammer_tid_t trunc_tid);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
48 hammer_tid_t trunc_tid);
51 * Get mirroring/pseudo-fs information
53 * NOTE: The ip used for ioctl is not necessarily related to the PFS
55 int
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
57 struct hammer_ioc_pseudofs_rw *pfs)
59 hammer_pseudofs_inmem_t pfsm;
60 u_int32_t localization;
61 int error;
63 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
64 return(error);
65 localization = (u_int32_t)pfs->pfs_id << 16;
66 pfs->bytes = sizeof(struct hammer_pseudofs_data);
67 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
69 pfsm = hammer_load_pseudofs(trans, localization, &error);
70 if (error) {
71 hammer_rel_pseudofs(trans->hmp, pfsm);
72 return(error);
76 * If the PFS is a master the sync tid is set by normal operation
77 * rather then the mirroring code, and will always track the
78 * real HAMMER filesystem.
80 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
81 pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid;
84 * Copy out to userland.
86 error = 0;
87 if (pfs->ondisk && error == 0)
88 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
89 hammer_rel_pseudofs(trans->hmp, pfsm);
90 return(error);
94 * Set mirroring/pseudo-fs information
96 * NOTE: The ip used for ioctl is not necessarily related to the PFS
98 int
99 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
100 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
102 hammer_pseudofs_inmem_t pfsm;
103 u_int32_t localization;
104 int error;
106 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
107 return(error);
108 localization = (u_int32_t)pfs->pfs_id << 16;
109 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
110 error = EINVAL;
111 localization = (u_int32_t)pfs->pfs_id << 16;
113 if (error == 0 && pfs->ondisk) {
115 * Load the PFS so we can modify our in-core copy. Ignore
116 * ENOENT errors.
118 pfsm = hammer_load_pseudofs(trans, localization, &error);
119 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
122 * Save it back, create a root inode if we are in master
123 * mode and no root exists.
125 if (error == 0)
126 error = hammer_mkroot_pseudofs(trans, cred, pfsm);
127 if (error == 0)
128 error = hammer_save_pseudofs(trans, pfsm);
129 hammer_rel_pseudofs(trans->hmp, pfsm);
131 return(error);
135 * Upgrade a slave to a master
137 * This is fairly easy to do, but we must physically undo any partial syncs
138 * for transaction ids > sync_end_tid. Effective, we must do a partial
139 * rollback.
141 * NOTE: The ip used for ioctl is not necessarily related to the PFS
144 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
145 struct hammer_ioc_pseudofs_rw *pfs)
147 hammer_pseudofs_inmem_t pfsm;
148 u_int32_t localization;
149 int error;
151 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
152 return(error);
153 localization = (u_int32_t)pfs->pfs_id << 16;
154 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
155 return(error);
158 * A master id must be set when upgrading
160 pfsm = hammer_load_pseudofs(trans, localization, &error);
161 if (error == 0) {
162 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
163 error = hammer_pfs_rollback(trans, pfsm,
164 pfsm->pfsd.sync_end_tid + 1);
165 if (error == 0) {
166 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
167 error = hammer_save_pseudofs(trans, pfsm);
171 hammer_rel_pseudofs(trans->hmp, pfsm);
172 if (error == EINTR) {
173 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
174 error = 0;
176 return (error);
180 * Downgrade a master to a slave
182 * This is really easy to do, just set the SLAVE flag.
184 * We also leave sync_end_tid intact... the field is not used in master
185 * mode (vol0_next_tid overrides it), but if someone switches to master
186 * mode accidently and then back to slave mode we don't want it to change.
187 * Eventually it will be used as the cross-synchronization TID in
188 * multi-master mode, and we don't want to mess with it for that feature
189 * either.
191 * NOTE: The ip used for ioctl is not necessarily related to the PFS
194 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
195 struct hammer_ioc_pseudofs_rw *pfs)
197 hammer_pseudofs_inmem_t pfsm;
198 u_int32_t localization;
199 int error;
201 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
202 return(error);
203 localization = (u_int32_t)pfs->pfs_id << 16;
204 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
205 return(error);
207 pfsm = hammer_load_pseudofs(trans, localization, &error);
208 if (error == 0) {
209 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
210 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
211 error = hammer_save_pseudofs(trans, pfsm);
214 hammer_rel_pseudofs(trans->hmp, pfsm);
215 return (error);
219 * Destroy a PFS
221 * We can destroy a PFS by scanning and deleting all of its records in the
222 * B-Tree. The hammer utility will delete the softlink in the primary
223 * filesystem.
225 * NOTE: The ip used for ioctl is not necessarily related to the PFS
228 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
229 struct hammer_ioc_pseudofs_rw *pfs)
231 hammer_pseudofs_inmem_t pfsm;
232 u_int32_t localization;
233 int error;
235 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
236 return(error);
237 localization = (u_int32_t)pfs->pfs_id << 16;
239 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
240 return(error);
242 pfsm = hammer_load_pseudofs(trans, localization, &error);
243 if (error == 0) {
244 error = hammer_pfs_rollback(trans, pfsm, 0);
245 if (error == 0) {
246 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
247 error = hammer_save_pseudofs(trans, pfsm);
250 hammer_rel_pseudofs(trans->hmp, pfsm);
251 if (error == EINTR) {
252 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
253 error = 0;
255 return(error);
259 * Auto-detect the pseudofs and do basic bounds checking.
261 static
263 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
265 int error = 0;
267 if (pfs->pfs_id == -1)
268 pfs->pfs_id = (int)(ip->obj_localization >> 16);
269 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
270 error = EINVAL;
271 if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
272 error = EINVAL;
273 return(error);
277 * Rollback the specified PFS to (trunc_tid - 1), removing everything
278 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
279 * mode or the MIRROR_FILTERED scan will not work properly.
281 * This is typically used to remove any partial syncs when upgrading a
282 * slave to a master. It can theoretically also be used to rollback
283 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
284 * PRUNED, and to points that are older only if they are on a retained
285 * (pruning softlink) boundary.
287 * Rollbacks destroy information. If you don't mind inode numbers changing
288 * a better way would be to cpdup a snapshot back onto the master.
290 static
292 hammer_pfs_rollback(hammer_transaction_t trans,
293 hammer_pseudofs_inmem_t pfsm,
294 hammer_tid_t trunc_tid)
296 struct hammer_cmirror cmirror;
297 struct hammer_cursor cursor;
298 struct hammer_base_elm key_cur;
299 int error;
300 int seq;
302 bzero(&cmirror, sizeof(cmirror));
303 bzero(&key_cur, sizeof(key_cur));
304 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
305 key_cur.obj_id = HAMMER_MIN_OBJID;
306 key_cur.key = HAMMER_MIN_KEY;
307 key_cur.create_tid = 1;
308 key_cur.rec_type = HAMMER_MIN_RECTYPE;
310 seq = trans->hmp->flusher.act;
312 retry:
313 error = hammer_init_cursor(trans, &cursor, NULL, NULL);
314 if (error) {
315 hammer_done_cursor(&cursor);
316 goto failed;
318 cursor.key_beg = key_cur;
319 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
320 pfsm->localization;
321 cursor.key_end.obj_id = HAMMER_MAX_OBJID;
322 cursor.key_end.key = HAMMER_MAX_KEY;
323 cursor.key_end.create_tid = HAMMER_MAX_TID;
324 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
326 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
327 cursor.flags |= HAMMER_CURSOR_BACKEND;
330 * Do an optimized scan of only records created or modified
331 * >= trunc_tid, so we can fix up those records. We must
332 * still check the TIDs but this greatly reduces the size of
333 * the scan.
335 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
336 cursor.cmirror = &cmirror;
337 cmirror.mirror_tid = trunc_tid;
339 error = hammer_btree_first(&cursor);
340 while (error == 0) {
342 * Abort the rollback.
344 if (error == 0) {
345 error = hammer_signal_check(trans->hmp);
346 if (error)
347 break;
351 * We only care about leafs. Internal nodes can be returned
352 * in mirror-filtered mode (they are used to generate SKIP
353 * mrecords), but we don't need them for this code.
355 cursor.flags |= HAMMER_CURSOR_ATEDISK;
356 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
357 key_cur = cursor.node->ondisk->elms[cursor.index].base;
358 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
361 while (hammer_flusher_meta_halflimit(trans->hmp) ||
362 hammer_flusher_undo_exhausted(trans, 2)) {
363 hammer_unlock_cursor(&cursor, 0);
364 hammer_flusher_wait(trans->hmp, seq);
365 hammer_lock_cursor(&cursor, 0);
366 seq = hammer_flusher_async_one(trans->hmp);
369 if (error == 0)
370 error = hammer_btree_iterate(&cursor);
372 if (error == ENOENT)
373 error = 0;
374 hammer_done_cursor(&cursor);
375 if (error == EDEADLK)
376 goto retry;
377 failed:
378 return(error);
382 * Helper function - perform rollback on a B-Tree element given trunc_tid.
384 * If create_tid >= trunc_tid the record is physically destroyed.
385 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
387 static
389 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
391 hammer_btree_leaf_elm_t elm;
392 hammer_transaction_t trans;
393 int error;
395 elm = &cursor->node->ondisk->elms[cursor->index].leaf;
396 if (elm->base.create_tid < trunc_tid &&
397 elm->base.delete_tid < trunc_tid) {
398 return(0);
400 trans = cursor->trans;
402 if (elm->base.create_tid >= trunc_tid) {
403 error = hammer_delete_at_cursor(
404 cursor, HAMMER_DELETE_DESTROY,
405 cursor->trans->tid, cursor->trans->time32,
406 1, NULL);
407 } else if (elm->base.delete_tid >= trunc_tid) {
408 error = hammer_delete_at_cursor(
409 cursor, HAMMER_DELETE_ADJUST,
410 0, 0,
411 1, NULL);
412 } else {
413 error = 0;
415 return(error);