0715aea5fa13812d78f99f200513602b6e00dcae
[dragonfly.git] / sys / vfs / hammer / hammer_pfs.c
blob0715aea5fa13812d78f99f200513602b6e00dcae
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1.2.1 2008/07/15 18:04:54 dillon Exp $
37 * HAMMER PFS ioctls - Manage pseudo-fs configurations
40 #include "hammer.h"
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
43 hammer_inode_t ip);
44 static int hammer_pfs_rollback(hammer_transaction_t trans,
45 hammer_pseudofs_inmem_t pfsm,
46 hammer_tid_t trunc_tid);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
48 hammer_tid_t trunc_tid);
51 * Get mirroring/pseudo-fs information
53 * NOTE: The ip used for ioctl is not necessarily related to the PFS
55 int
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
57 struct hammer_ioc_pseudofs_rw *pfs)
59 hammer_pseudofs_inmem_t pfsm;
60 u_int32_t localization;
61 int error;
63 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
64 return(error);
65 localization = (u_int32_t)pfs->pfs_id << 16;
66 pfs->bytes = sizeof(struct hammer_pseudofs_data);
67 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
69 pfsm = hammer_load_pseudofs(trans, localization, &error);
70 if (error) {
71 hammer_rel_pseudofs(trans->hmp, pfsm);
72 return(error);
76 * If the PFS is a master the sync tid is set by normal operation
77 * rather then the mirroring code, and will always track the
78 * real HAMMER filesystem.
80 if (pfsm->pfsd.master_id >= 0)
81 pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid;
84 * Copy out to userland.
86 error = 0;
87 if (pfs->ondisk && error == 0)
88 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
89 hammer_rel_pseudofs(trans->hmp, pfsm);
90 return(error);
94 * Set mirroring/pseudo-fs information
96 * NOTE: The ip used for ioctl is not necessarily related to the PFS
98 int
99 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
100 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
102 hammer_pseudofs_inmem_t pfsm;
103 u_int32_t localization;
104 int error;
106 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
107 return(error);
108 localization = (u_int32_t)pfs->pfs_id << 16;
109 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
110 error = EINVAL;
111 localization = (u_int32_t)pfs->pfs_id << 16;
113 if (error == 0 && pfs->ondisk) {
115 * Load the PFS so we can modify our in-core copy. Ignore
116 * ENOENT errors.
118 pfsm = hammer_load_pseudofs(trans, localization, &error);
119 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
122 * Save it back, create a root inode if we are in master
123 * mode and no root exists.
125 if (error == 0)
126 error = hammer_mkroot_pseudofs(trans, cred, pfsm);
127 if (error == 0)
128 error = hammer_save_pseudofs(trans, pfsm);
129 hammer_rel_pseudofs(trans->hmp, pfsm);
131 return(error);
135 * Upgrade a slave to a master
137 * This is fairly easy to do, but we must physically undo any partial syncs
138 * for transaction ids > sync_end_tid. Effective, we must do a partial
139 * rollback.
141 * NOTE: The ip used for ioctl is not necessarily related to the PFS
144 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
145 struct hammer_ioc_pseudofs_rw *pfs)
147 hammer_pseudofs_inmem_t pfsm;
148 u_int32_t localization;
149 int error;
151 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
152 return(error);
153 localization = (u_int32_t)pfs->pfs_id << 16;
154 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
155 return(error);
158 * A master id must be set when upgrading
160 pfsm = hammer_load_pseudofs(trans, localization, &error);
161 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0 &&
162 pfsm->pfsd.master_id < 0) {
163 error = EINVAL;
165 if (error == 0) {
166 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
167 error = hammer_pfs_rollback(trans, pfsm,
168 pfsm->pfsd.sync_end_tid + 1);
169 if (error == 0) {
170 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
171 error = hammer_save_pseudofs(trans, pfsm);
175 hammer_rel_pseudofs(trans->hmp, pfsm);
176 if (error == EINTR) {
177 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
178 error = 0;
180 return (error);
184 * Downgrade a master to a slave
186 * This is really easy to do, just set the SLAVE flag. The master_id is
187 * left intact.
189 * We also leave sync_end_tid intact... the field is not used in master
190 * mode (vol0_next_tid overrides it), but if someone switches to master
191 * mode accidently and then back to slave mode we don't want it to change.
192 * Eventually it will be used as the cross-synchronization TID in
193 * multi-master mode, and we don't want to mess with it for that feature
194 * either.
196 * NOTE: The ip used for ioctl is not necessarily related to the PFS
199 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
200 struct hammer_ioc_pseudofs_rw *pfs)
202 hammer_pseudofs_inmem_t pfsm;
203 u_int32_t localization;
204 int error;
206 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
207 return(error);
208 localization = (u_int32_t)pfs->pfs_id << 16;
209 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
210 return(error);
212 pfsm = hammer_load_pseudofs(trans, localization, &error);
213 if (error == 0) {
214 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
215 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
216 error = hammer_save_pseudofs(trans, pfsm);
219 hammer_rel_pseudofs(trans->hmp, pfsm);
220 return (error);
224 * Destroy a PFS
226 * We can destroy a PFS by scanning and deleting all of its records in the
227 * B-Tree. The hammer utility will delete the softlink in the primary
228 * filesystem.
230 * NOTE: The ip used for ioctl is not necessarily related to the PFS
233 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
234 struct hammer_ioc_pseudofs_rw *pfs)
236 hammer_pseudofs_inmem_t pfsm;
237 u_int32_t localization;
238 int error;
240 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
241 return(error);
242 localization = (u_int32_t)pfs->pfs_id << 16;
244 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
245 return(error);
247 pfsm = hammer_load_pseudofs(trans, localization, &error);
248 if (error == 0) {
249 error = hammer_pfs_rollback(trans, pfsm, 0);
250 if (error == 0) {
251 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
252 error = hammer_save_pseudofs(trans, pfsm);
255 hammer_rel_pseudofs(trans->hmp, pfsm);
256 if (error == EINTR) {
257 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
258 error = 0;
260 return(error);
264 * Auto-detect the pseudofs and do basic bounds checking.
266 static
268 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
270 int error = 0;
272 if (pfs->pfs_id == -1)
273 pfs->pfs_id = (int)(ip->obj_localization >> 16);
274 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
275 error = EINVAL;
276 if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
277 error = EINVAL;
278 return(error);
282 * Rollback the specified PFS to (trunc_tid - 1), removing everything
283 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
284 * mode or the MIRROR_FILTERED scan will not work properly.
286 * This is typically used to remove any partial syncs when upgrading a
287 * slave to a master. It can theoretically also be used to rollback
288 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
289 * PRUNED, and to points that are older only if they are on a retained
290 * (pruning softlink) boundary.
292 * Rollbacks destroy information. If you don't mind inode numbers changing
293 * a better way would be to cpdup a snapshot back onto the master.
295 static
297 hammer_pfs_rollback(hammer_transaction_t trans,
298 hammer_pseudofs_inmem_t pfsm,
299 hammer_tid_t trunc_tid)
301 struct hammer_cmirror cmirror;
302 struct hammer_cursor cursor;
303 struct hammer_base_elm key_cur;
304 int error;
305 int seq;
307 bzero(&cmirror, sizeof(cmirror));
308 bzero(&key_cur, sizeof(key_cur));
309 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
310 key_cur.obj_id = HAMMER_MIN_OBJID;
311 key_cur.key = HAMMER_MIN_KEY;
312 key_cur.create_tid = 1;
313 key_cur.rec_type = HAMMER_MIN_RECTYPE;
315 seq = trans->hmp->flusher.act;
317 retry:
318 error = hammer_init_cursor(trans, &cursor, NULL, NULL);
319 if (error) {
320 hammer_done_cursor(&cursor);
321 goto failed;
323 cursor.key_beg = key_cur;
324 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
325 pfsm->localization;
326 cursor.key_end.obj_id = HAMMER_MAX_OBJID;
327 cursor.key_end.key = HAMMER_MAX_KEY;
328 cursor.key_end.create_tid = HAMMER_MAX_TID;
329 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
331 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
332 cursor.flags |= HAMMER_CURSOR_BACKEND;
335 * Do an optimized scan of only records created or modified
336 * >= trunc_tid, so we can fix up those records. We must
337 * still check the TIDs but this greatly reduces the size of
338 * the scan.
340 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
341 cursor.cmirror = &cmirror;
342 cmirror.mirror_tid = trunc_tid;
344 error = hammer_btree_first(&cursor);
345 while (error == 0) {
347 * Abort the rollback.
349 if (error == 0) {
350 error = hammer_signal_check(trans->hmp);
351 if (error)
352 break;
356 * We only care about leafs. Internal nodes can be returned
357 * in mirror-filtered mode (they are used to generate SKIP
358 * mrecords), but we don't need them for this code.
360 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
361 key_cur = cursor.node->ondisk->elms[cursor.index].base;
362 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
365 while (hammer_flusher_meta_halflimit(trans->hmp) ||
366 hammer_flusher_undo_exhausted(trans, 2)) {
367 hammer_unlock_cursor(&cursor, 0);
368 hammer_flusher_wait(trans->hmp, seq);
369 hammer_lock_cursor(&cursor, 0);
370 seq = hammer_flusher_async_one(trans->hmp);
373 if (error == 0)
374 error = hammer_btree_iterate(&cursor);
376 if (error == ENOENT)
377 error = 0;
378 hammer_done_cursor(&cursor);
379 if (error == EDEADLK)
380 goto retry;
381 failed:
382 return(error);
386 * Helper function - perform rollback on a B-Tree element given trunc_tid.
388 * If create_tid >= trunc_tid the record is physically destroyed.
389 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
391 static
393 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
395 hammer_btree_leaf_elm_t elm;
396 hammer_transaction_t trans;
397 int error;
399 elm = &cursor->node->ondisk->elms[cursor->index].leaf;
400 if (elm->base.create_tid < trunc_tid &&
401 elm->base.delete_tid < trunc_tid) {
402 return(0);
404 trans = cursor->trans;
406 if (elm->base.create_tid >= trunc_tid) {
407 error = hammer_delete_at_cursor(
408 cursor, HAMMER_DELETE_DESTROY,
409 cursor->trans->tid, cursor->trans->time32,
410 1, NULL);
411 } else if (elm->base.delete_tid >= trunc_tid) {
412 error = hammer_delete_at_cursor(
413 cursor, HAMMER_DELETE_ADJUST,
414 0, 0,
415 1, NULL);
416 } else {
417 error = 0;
419 return(error);