2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $
37 * HAMMER PFS ioctls - Manage pseudo-fs configurations
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw
*pfs
,
44 static int hammer_pfs_rollback(hammer_transaction_t trans
,
45 hammer_pseudofs_inmem_t pfsm
,
46 hammer_tid_t trunc_tid
);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor
,
48 hammer_tid_t trunc_tid
);
51 * Get mirroring/pseudo-fs information
53 * NOTE: The ip used for ioctl is not necessarily related to the PFS
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
57 struct hammer_ioc_pseudofs_rw
*pfs
)
59 hammer_pseudofs_inmem_t pfsm
;
60 u_int32_t localization
;
63 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
65 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
66 pfs
->bytes
= sizeof(struct hammer_pseudofs_data
);
67 pfs
->version
= HAMMER_IOC_PSEUDOFS_VERSION
;
69 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
71 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
76 * If the PFS is a master the sync tid is set by normal operation
77 * rather then the mirroring code, and will always track the
78 * real HAMMER filesystem.
80 * We use flush_tid1, which is the highest fully committed TID.
81 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
82 * caught up to it yet so a crash will roll us back to flush_tid1.
84 if ((pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) == 0)
85 pfsm
->pfsd
.sync_end_tid
= trans
->hmp
->flush_tid1
;
88 * Copy out to userland.
91 if (pfs
->ondisk
&& error
== 0)
92 error
= copyout(&pfsm
->pfsd
, pfs
->ondisk
, sizeof(pfsm
->pfsd
));
93 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
98 * Set mirroring/pseudo-fs information
100 * NOTE: The ip used for ioctl is not necessarily related to the PFS
103 hammer_ioc_set_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
104 struct ucred
*cred
, struct hammer_ioc_pseudofs_rw
*pfs
)
106 hammer_pseudofs_inmem_t pfsm
;
107 u_int32_t localization
;
110 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
112 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
113 if (pfs
->version
!= HAMMER_IOC_PSEUDOFS_VERSION
)
115 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
117 if (error
== 0 && pfs
->ondisk
) {
119 * Load the PFS so we can modify our in-core copy. Ignore
122 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
123 error
= copyin(pfs
->ondisk
, &pfsm
->pfsd
, sizeof(pfsm
->pfsd
));
126 * Save it back, create a root inode if we are in master
127 * mode and no root exists.
130 error
= hammer_mkroot_pseudofs(trans
, cred
, pfsm
);
132 error
= hammer_save_pseudofs(trans
, pfsm
);
135 * Wakeup anyone waiting for a TID update for this PFS
137 wakeup(&pfsm
->pfsd
.sync_end_tid
);
138 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
144 * Upgrade a slave to a master
146 * This is fairly easy to do, but we must physically undo any partial syncs
147 * for transaction ids > sync_end_tid. Effective, we must do a partial
150 * NOTE: The ip used for ioctl is not necessarily related to the PFS
153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
154 struct hammer_ioc_pseudofs_rw
*pfs
)
156 hammer_pseudofs_inmem_t pfsm
;
157 u_int32_t localization
;
160 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
162 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
163 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
167 * A master id must be set when upgrading
169 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
171 if ((pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) != 0) {
172 error
= hammer_pfs_rollback(trans
, pfsm
,
173 pfsm
->pfsd
.sync_end_tid
+ 1);
175 pfsm
->pfsd
.mirror_flags
&= ~HAMMER_PFSD_SLAVE
;
176 error
= hammer_save_pseudofs(trans
, pfsm
);
180 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
181 if (error
== EINTR
) {
182 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
189 * Downgrade a master to a slave
191 * This is really easy to do, just set the SLAVE flag.
193 * We also leave sync_end_tid intact... the field is not used in master
194 * mode (vol0_next_tid overrides it), but if someone switches to master
195 * mode accidently and then back to slave mode we don't want it to change.
196 * Eventually it will be used as the cross-synchronization TID in
197 * multi-master mode, and we don't want to mess with it for that feature
200 * NOTE: The ip used for ioctl is not necessarily related to the PFS
203 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
204 struct hammer_ioc_pseudofs_rw
*pfs
)
206 hammer_pseudofs_inmem_t pfsm
;
207 u_int32_t localization
;
210 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
212 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
213 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
216 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
218 if ((pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) == 0) {
219 pfsm
->pfsd
.mirror_flags
|= HAMMER_PFSD_SLAVE
;
220 error
= hammer_save_pseudofs(trans
, pfsm
);
223 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
230 * We can destroy a PFS by scanning and deleting all of its records in the
231 * B-Tree. The hammer utility will delete the softlink in the primary
234 * NOTE: The ip used for ioctl is not necessarily related to the PFS
237 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
238 struct hammer_ioc_pseudofs_rw
*pfs
)
240 hammer_pseudofs_inmem_t pfsm
;
241 u_int32_t localization
;
244 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
246 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
248 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
251 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
253 error
= hammer_pfs_rollback(trans
, pfsm
, 0);
255 pfsm
->pfsd
.mirror_flags
|= HAMMER_PFSD_DELETED
;
256 error
= hammer_save_pseudofs(trans
, pfsm
);
259 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
260 if (error
== EINTR
) {
261 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
268 * Wait for the PFS to sync past the specified TID
271 hammer_ioc_wait_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
272 struct hammer_ioc_pseudofs_rw
*pfs
)
274 hammer_pseudofs_inmem_t pfsm
;
275 struct hammer_pseudofs_data pfsd
;
276 u_int32_t localization
;
281 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
283 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
285 if ((error
= copyin(pfs
->ondisk
, &pfsd
, sizeof(pfsd
))) != 0)
288 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
290 if (pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) {
291 tid
= pfsm
->pfsd
.sync_end_tid
;
292 waitp
= &pfsm
->pfsd
.sync_end_tid
;
294 tid
= trans
->hmp
->flush_tid1
;
295 waitp
= &trans
->hmp
->flush_tid1
;
297 if (tid
<= pfsd
.sync_end_tid
)
298 tsleep(waitp
, PCATCH
, "hmrmwt", 0);
300 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
301 if (error
== EINTR
) {
302 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
310 * Auto-detect the pseudofs and do basic bounds checking.
314 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw
*pfs
, hammer_inode_t ip
)
318 if (pfs
->pfs_id
== -1)
319 pfs
->pfs_id
= (int)(ip
->obj_localization
>> 16);
320 if (pfs
->pfs_id
< 0 || pfs
->pfs_id
>= HAMMER_MAX_PFS
)
322 if (pfs
->bytes
< sizeof(struct hammer_pseudofs_data
))
328 * Rollback the specified PFS to (trunc_tid - 1), removing everything
329 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
330 * mode or the MIRROR_FILTERED scan will not work properly.
332 * This is typically used to remove any partial syncs when upgrading a
333 * slave to a master. It can theoretically also be used to rollback
334 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
335 * PRUNED, and to points that are older only if they are on a retained
336 * (pruning softlink) boundary.
338 * Rollbacks destroy information. If you don't mind inode numbers changing
339 * a better way would be to cpdup a snapshot back onto the master.
343 hammer_pfs_rollback(hammer_transaction_t trans
,
344 hammer_pseudofs_inmem_t pfsm
,
345 hammer_tid_t trunc_tid
)
347 struct hammer_cmirror cmirror
;
348 struct hammer_cursor cursor
;
349 struct hammer_base_elm key_cur
;
353 bzero(&cmirror
, sizeof(cmirror
));
354 bzero(&key_cur
, sizeof(key_cur
));
355 key_cur
.localization
= HAMMER_MIN_LOCALIZATION
+ pfsm
->localization
;
356 key_cur
.obj_id
= HAMMER_MIN_OBJID
;
357 key_cur
.key
= HAMMER_MIN_KEY
;
358 key_cur
.create_tid
= 1;
359 key_cur
.rec_type
= HAMMER_MIN_RECTYPE
;
361 seq
= trans
->hmp
->flusher
.act
;
364 error
= hammer_init_cursor(trans
, &cursor
, NULL
, NULL
);
366 hammer_done_cursor(&cursor
);
369 cursor
.key_beg
= key_cur
;
370 cursor
.key_end
.localization
= HAMMER_MAX_LOCALIZATION
+
372 cursor
.key_end
.obj_id
= HAMMER_MAX_OBJID
;
373 cursor
.key_end
.key
= HAMMER_MAX_KEY
;
374 cursor
.key_end
.create_tid
= HAMMER_MAX_TID
;
375 cursor
.key_end
.rec_type
= HAMMER_MAX_RECTYPE
;
377 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
378 cursor
.flags
|= HAMMER_CURSOR_BACKEND
;
381 * Do an optimized scan of only records created or modified
382 * >= trunc_tid, so we can fix up those records. We must
383 * still check the TIDs but this greatly reduces the size of
386 cursor
.flags
|= HAMMER_CURSOR_MIRROR_FILTERED
;
387 cursor
.cmirror
= &cmirror
;
388 cmirror
.mirror_tid
= trunc_tid
;
390 error
= hammer_btree_first(&cursor
);
393 * Abort the rollback.
396 error
= hammer_signal_check(trans
->hmp
);
402 * We only care about leafs. Internal nodes can be returned
403 * in mirror-filtered mode (they are used to generate SKIP
404 * mrecords), but we don't need them for this code.
406 cursor
.flags
|= HAMMER_CURSOR_ATEDISK
;
407 if (cursor
.node
->ondisk
->type
== HAMMER_BTREE_TYPE_LEAF
) {
408 key_cur
= cursor
.node
->ondisk
->elms
[cursor
.index
].base
;
409 error
= hammer_pfs_delete_at_cursor(&cursor
, trunc_tid
);
412 while (hammer_flusher_meta_halflimit(trans
->hmp
) ||
413 hammer_flusher_undo_exhausted(trans
, 2)) {
414 hammer_unlock_cursor(&cursor
);
415 hammer_flusher_wait(trans
->hmp
, seq
);
416 hammer_lock_cursor(&cursor
);
417 seq
= hammer_flusher_async_one(trans
->hmp
);
421 error
= hammer_btree_iterate(&cursor
);
425 hammer_done_cursor(&cursor
);
426 if (error
== EDEADLK
)
433 * Helper function - perform rollback on a B-Tree element given trunc_tid.
435 * If create_tid >= trunc_tid the record is physically destroyed.
436 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
440 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor
, hammer_tid_t trunc_tid
)
442 hammer_btree_leaf_elm_t elm
;
443 hammer_transaction_t trans
;
446 elm
= &cursor
->node
->ondisk
->elms
[cursor
->index
].leaf
;
447 if (elm
->base
.create_tid
< trunc_tid
&&
448 elm
->base
.delete_tid
< trunc_tid
) {
451 trans
= cursor
->trans
;
453 if (elm
->base
.create_tid
>= trunc_tid
) {
454 error
= hammer_delete_at_cursor(
455 cursor
, HAMMER_DELETE_DESTROY
,
456 cursor
->trans
->tid
, cursor
->trans
->time32
,
458 } else if (elm
->base
.delete_tid
>= trunc_tid
) {
459 error
= hammer_delete_at_cursor(
460 cursor
, HAMMER_DELETE_ADJUST
,