2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $
37 * HAMMER PFS ioctls - Manage pseudo-fs configurations
42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw
*pfs
,
44 static int hammer_pfs_rollback(hammer_transaction_t trans
,
45 hammer_pseudofs_inmem_t pfsm
,
46 hammer_tid_t trunc_tid
);
47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor
,
48 hammer_tid_t trunc_tid
);
51 * Get mirroring/pseudo-fs information
53 * NOTE: The ip used for ioctl is not necessarily related to the PFS
56 hammer_ioc_get_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
57 struct hammer_ioc_pseudofs_rw
*pfs
)
59 hammer_pseudofs_inmem_t pfsm
;
60 u_int32_t localization
;
63 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
65 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
66 pfs
->bytes
= sizeof(struct hammer_pseudofs_data
);
67 pfs
->version
= HAMMER_IOC_PSEUDOFS_VERSION
;
69 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
71 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
76 * If the PFS is a master the sync tid is set by normal operation
77 * rather than the mirroring code, and will always track the
78 * real HAMMER filesystem.
80 * We use flush_tid1, which is the highest fully committed TID.
81 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
82 * caught up to it yet so a crash will roll us back to flush_tid1.
84 if ((pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) == 0)
85 pfsm
->pfsd
.sync_end_tid
= trans
->hmp
->flush_tid1
;
88 * Copy out to userland.
91 if (pfs
->ondisk
&& error
== 0)
92 error
= copyout(&pfsm
->pfsd
, pfs
->ondisk
, sizeof(pfsm
->pfsd
));
93 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
98 * Set mirroring/pseudo-fs information
100 * NOTE: The ip used for ioctl is not necessarily related to the PFS
103 hammer_ioc_set_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
104 struct ucred
*cred
, struct hammer_ioc_pseudofs_rw
*pfs
)
106 hammer_pseudofs_inmem_t pfsm
;
107 u_int32_t localization
;
110 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
112 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
113 if (pfs
->version
!= HAMMER_IOC_PSEUDOFS_VERSION
)
115 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
117 if (error
== 0 && pfs
->ondisk
) {
119 * Load the PFS so we can modify our in-core copy. Ignore
122 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
123 error
= copyin(pfs
->ondisk
, &pfsm
->pfsd
, sizeof(pfsm
->pfsd
));
126 * Save it back, create a root inode if we are in master
127 * mode and no root exists.
129 * We do not create root inodes for slaves, the root inode
130 * must be mirrored from the master.
133 (pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) == 0) {
134 error
= hammer_mkroot_pseudofs(trans
, cred
, pfsm
);
137 error
= hammer_save_pseudofs(trans
, pfsm
);
140 * Wakeup anyone waiting for a TID update for this PFS
142 wakeup(&pfsm
->pfsd
.sync_end_tid
);
143 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
149 * Upgrade a slave to a master
151 * This is fairly easy to do, but we must physically undo any partial syncs
152 * for transaction ids > sync_end_tid. Effective, we must do a partial
155 * NOTE: The ip used for ioctl is not necessarily related to the PFS
158 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
159 struct hammer_ioc_pseudofs_rw
*pfs
)
161 hammer_pseudofs_inmem_t pfsm
;
162 u_int32_t localization
;
165 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
167 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
168 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
172 * A master id must be set when upgrading
174 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
176 if ((pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) != 0) {
177 error
= hammer_pfs_rollback(trans
, pfsm
,
178 pfsm
->pfsd
.sync_end_tid
+ 1);
180 pfsm
->pfsd
.mirror_flags
&= ~HAMMER_PFSD_SLAVE
;
181 error
= hammer_save_pseudofs(trans
, pfsm
);
185 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
186 if (error
== EINTR
) {
187 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
194 * Downgrade a master to a slave
196 * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
198 * We previously did not update sync_end_tid in consideration for a slave
199 * upgraded to a master and then downgraded again, but this completely breaks
200 * the case where one starts with a master and then downgrades to a slave,
201 * then upgrades again.
203 * NOTE: The ip used for ioctl is not necessarily related to the PFS
206 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
207 struct hammer_ioc_pseudofs_rw
*pfs
)
209 hammer_mount_t hmp
= trans
->hmp
;
210 hammer_pseudofs_inmem_t pfsm
;
211 u_int32_t localization
;
214 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
216 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
217 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
220 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
222 if ((pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) == 0) {
223 pfsm
->pfsd
.mirror_flags
|= HAMMER_PFSD_SLAVE
;
224 if (pfsm
->pfsd
.sync_end_tid
< hmp
->flush_tid1
)
225 pfsm
->pfsd
.sync_end_tid
= hmp
->flush_tid1
;
226 error
= hammer_save_pseudofs(trans
, pfsm
);
229 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
236 * We can destroy a PFS by scanning and deleting all of its records in the
237 * B-Tree. The hammer utility will delete the softlink in the primary
240 * NOTE: The ip used for ioctl is not necessarily related to the PFS
243 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
244 struct hammer_ioc_pseudofs_rw
*pfs
)
246 hammer_pseudofs_inmem_t pfsm
;
247 u_int32_t localization
;
250 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
252 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
254 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
257 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
259 error
= hammer_pfs_rollback(trans
, pfsm
, 0);
261 pfsm
->pfsd
.mirror_flags
|= HAMMER_PFSD_DELETED
;
262 error
= hammer_save_pseudofs(trans
, pfsm
);
265 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
266 if (error
== EINTR
) {
267 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
274 * Wait for the PFS to sync past the specified TID
277 hammer_ioc_wait_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
278 struct hammer_ioc_pseudofs_rw
*pfs
)
280 hammer_pseudofs_inmem_t pfsm
;
281 struct hammer_pseudofs_data pfsd
;
282 u_int32_t localization
;
287 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
289 localization
= (u_int32_t
)pfs
->pfs_id
<< 16;
291 if ((error
= copyin(pfs
->ondisk
, &pfsd
, sizeof(pfsd
))) != 0)
294 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
296 if (pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
) {
297 tid
= pfsm
->pfsd
.sync_end_tid
;
298 waitp
= &pfsm
->pfsd
.sync_end_tid
;
300 tid
= trans
->hmp
->flush_tid1
;
301 waitp
= &trans
->hmp
->flush_tid1
;
303 if (tid
<= pfsd
.sync_end_tid
)
304 tsleep(waitp
, PCATCH
, "hmrmwt", 0);
306 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
307 if (error
== EINTR
) {
308 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
316 * Auto-detect the pseudofs and do basic bounds checking.
320 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw
*pfs
, hammer_inode_t ip
)
324 if (pfs
->pfs_id
== -1)
325 pfs
->pfs_id
= (int)(ip
->obj_localization
>> 16);
326 if (pfs
->pfs_id
< 0 || pfs
->pfs_id
>= HAMMER_MAX_PFS
)
328 if (pfs
->bytes
< sizeof(struct hammer_pseudofs_data
))
334 * Rollback the specified PFS to (trunc_tid - 1), removing everything
335 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
336 * mode or the MIRROR_FILTERED scan will not work properly.
338 * This is typically used to remove any partial syncs when upgrading a
339 * slave to a master. It can theoretically also be used to rollback
340 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
341 * PRUNED, and to points that are older only if they are on a retained
342 * (pruning softlink) boundary.
344 * Rollbacks destroy information. If you don't mind inode numbers changing
345 * a better way would be to cpdup a snapshot back onto the master.
349 hammer_pfs_rollback(hammer_transaction_t trans
,
350 hammer_pseudofs_inmem_t pfsm
,
351 hammer_tid_t trunc_tid
)
353 struct hammer_cmirror cmirror
;
354 struct hammer_cursor cursor
;
355 struct hammer_base_elm key_cur
;
359 bzero(&cmirror
, sizeof(cmirror
));
360 bzero(&key_cur
, sizeof(key_cur
));
361 key_cur
.localization
= HAMMER_MIN_LOCALIZATION
+ pfsm
->localization
;
362 key_cur
.obj_id
= HAMMER_MIN_OBJID
;
363 key_cur
.key
= HAMMER_MIN_KEY
;
364 key_cur
.create_tid
= 1;
365 key_cur
.rec_type
= HAMMER_MIN_RECTYPE
;
367 seq
= trans
->hmp
->flusher
.act
;
370 error
= hammer_init_cursor(trans
, &cursor
, NULL
, NULL
);
372 hammer_done_cursor(&cursor
);
375 cursor
.key_beg
= key_cur
;
376 cursor
.key_end
.localization
= HAMMER_MAX_LOCALIZATION
+
378 cursor
.key_end
.obj_id
= HAMMER_MAX_OBJID
;
379 cursor
.key_end
.key
= HAMMER_MAX_KEY
;
380 cursor
.key_end
.create_tid
= HAMMER_MAX_TID
;
381 cursor
.key_end
.rec_type
= HAMMER_MAX_RECTYPE
;
383 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
384 cursor
.flags
|= HAMMER_CURSOR_BACKEND
;
387 * Do an optimized scan of only records created or modified
388 * >= trunc_tid, so we can fix up those records. We must
389 * still check the TIDs but this greatly reduces the size of
392 cursor
.flags
|= HAMMER_CURSOR_MIRROR_FILTERED
;
393 cursor
.cmirror
= &cmirror
;
394 cmirror
.mirror_tid
= trunc_tid
;
396 error
= hammer_btree_first(&cursor
);
399 * Abort the rollback.
402 error
= hammer_signal_check(trans
->hmp
);
408 * We only care about leafs. Internal nodes can be returned
409 * in mirror-filtered mode (they are used to generate SKIP
410 * mrecords), but we don't need them for this code.
412 * WARNING: See warnings in hammer_unlock_cursor() function.
414 cursor
.flags
|= HAMMER_CURSOR_ATEDISK
;
415 if (cursor
.node
->ondisk
->type
== HAMMER_BTREE_TYPE_LEAF
) {
416 key_cur
= cursor
.node
->ondisk
->elms
[cursor
.index
].base
;
417 error
= hammer_pfs_delete_at_cursor(&cursor
, trunc_tid
);
420 while (hammer_flusher_meta_halflimit(trans
->hmp
) ||
421 hammer_flusher_undo_exhausted(trans
, 2)) {
422 hammer_unlock_cursor(&cursor
);
423 hammer_flusher_wait(trans
->hmp
, seq
);
424 hammer_lock_cursor(&cursor
);
425 seq
= hammer_flusher_async_one(trans
->hmp
);
429 error
= hammer_btree_iterate(&cursor
);
433 hammer_done_cursor(&cursor
);
434 if (error
== EDEADLK
)
441 * Helper function - perform rollback on a B-Tree element given trunc_tid.
443 * If create_tid >= trunc_tid the record is physically destroyed.
444 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
448 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor
, hammer_tid_t trunc_tid
)
450 hammer_btree_leaf_elm_t elm
;
451 hammer_transaction_t trans
;
454 elm
= &cursor
->node
->ondisk
->elms
[cursor
->index
].leaf
;
455 if (elm
->base
.create_tid
< trunc_tid
&&
456 elm
->base
.delete_tid
< trunc_tid
) {
459 trans
= cursor
->trans
;
461 if (elm
->base
.create_tid
>= trunc_tid
) {
462 error
= hammer_delete_at_cursor(
463 cursor
, HAMMER_DELETE_DESTROY
,
464 cursor
->trans
->tid
, cursor
->trans
->time32
,
466 } else if (elm
->base
.delete_tid
>= trunc_tid
) {
467 error
= hammer_delete_at_cursor(
468 cursor
, HAMMER_DELETE_ADJUST
,