2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * HAMMER PFS ioctls - Manage pseudo-fs configurations
40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw
*pfs
,
42 static int hammer_pfs_rollback(hammer_transaction_t trans
,
43 hammer_pseudofs_inmem_t pfsm
,
44 hammer_tid_t trunc_tid
);
45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor
,
46 hammer_tid_t trunc_tid
);
49 * Get mirroring/pseudo-fs information
51 * NOTE: The ip used for ioctl is not necessarily related to the PFS
52 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
55 hammer_ioc_get_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
56 struct hammer_ioc_pseudofs_rw
*pfs
)
58 hammer_pseudofs_inmem_t pfsm
;
59 uint32_t localization
;
62 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
64 localization
= pfs_to_lo(pfs
->pfs_id
);
65 pfs
->bytes
= sizeof(struct hammer_pseudofs_data
);
66 pfs
->version
= HAMMER_IOC_PSEUDOFS_VERSION
;
68 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
70 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
75 * If the PFS is a master the sync tid is set by normal operation
76 * rather than the mirroring code, and will always track the
77 * real HAMMER filesystem.
79 * We use flush_tid1, which is the highest fully committed TID.
80 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
81 * caught up to it yet so a crash will roll us back to flush_tid1.
83 if (hammer_is_pfs_master(&pfsm
->pfsd
))
84 pfsm
->pfsd
.sync_end_tid
= trans
->hmp
->flush_tid1
;
87 * Copy out to userland.
90 error
= copyout(&pfsm
->pfsd
, pfs
->ondisk
, sizeof(pfsm
->pfsd
));
91 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
96 * Set mirroring/pseudo-fs information
99 hammer_ioc_set_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
100 struct ucred
*cred
, struct hammer_ioc_pseudofs_rw
*pfs
)
102 hammer_pseudofs_inmem_t pfsm
;
103 uint32_t localization
;
106 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
108 localization
= pfs_to_lo(pfs
->pfs_id
);
109 if (pfs
->version
!= HAMMER_IOC_PSEUDOFS_VERSION
)
112 if (error
== 0 && pfs
->ondisk
) {
114 * Load the PFS so we can modify our in-core copy. Ignore
117 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
118 error
= copyin(pfs
->ondisk
, &pfsm
->pfsd
, sizeof(pfsm
->pfsd
));
121 * Save it back, create a root inode if we are in master
122 * mode and no root exists.
124 * We do not create root inodes for slaves, the root inode
125 * must be mirrored from the master.
127 if (error
== 0 && hammer_is_pfs_master(&pfsm
->pfsd
)) {
128 error
= hammer_mkroot_pseudofs(trans
, cred
, pfsm
, ip
);
131 error
= hammer_save_pseudofs(trans
, pfsm
);
134 * Wakeup anyone waiting for a TID update for this PFS
136 wakeup(&pfsm
->pfsd
.sync_end_tid
);
137 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
143 * Upgrade a slave to a master
145 * This is fairly easy to do, but we must physically undo any partial syncs
146 * for transaction ids > sync_end_tid. Effective, we must do a partial
149 * NOTE: The ip used for ioctl is not necessarily related to the PFS
150 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
154 struct hammer_ioc_pseudofs_rw
*pfs
)
156 hammer_pseudofs_inmem_t pfsm
;
157 uint32_t localization
;
160 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
162 localization
= pfs_to_lo(pfs
->pfs_id
);
163 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
167 * A master id must be set when upgrading
169 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
171 if (hammer_is_pfs_slave(&pfsm
->pfsd
)) {
172 error
= hammer_pfs_rollback(trans
, pfsm
,
173 pfsm
->pfsd
.sync_end_tid
+ 1);
175 pfsm
->pfsd
.mirror_flags
&= ~HAMMER_PFSD_SLAVE
;
176 error
= hammer_save_pseudofs(trans
, pfsm
);
180 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
181 if (error
== EINTR
) {
182 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
189 * Downgrade a master to a slave
191 * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
193 * We previously did not update sync_end_tid in consideration for a slave
194 * upgraded to a master and then downgraded again, but this completely breaks
195 * the case where one starts with a master and then downgrades to a slave,
196 * then upgrades again.
198 * NOTE: The ip used for ioctl is not necessarily related to the PFS
199 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
202 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
203 struct hammer_ioc_pseudofs_rw
*pfs
)
205 hammer_mount_t hmp
= trans
->hmp
;
206 hammer_pseudofs_inmem_t pfsm
;
207 uint32_t localization
;
210 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
212 localization
= pfs_to_lo(pfs
->pfs_id
);
213 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
216 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
218 if (hammer_is_pfs_master(&pfsm
->pfsd
)) {
219 pfsm
->pfsd
.mirror_flags
|= HAMMER_PFSD_SLAVE
;
220 if (pfsm
->pfsd
.sync_end_tid
< hmp
->flush_tid1
)
221 pfsm
->pfsd
.sync_end_tid
= hmp
->flush_tid1
;
222 error
= hammer_save_pseudofs(trans
, pfsm
);
225 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
232 * We can destroy a PFS by scanning and deleting all of its records in the
233 * B-Tree. The hammer utility will delete the softlink in the primary
236 * NOTE: The ip used for ioctl is not necessarily related to the PFS
237 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
240 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
241 struct hammer_ioc_pseudofs_rw
*pfs
)
243 hammer_pseudofs_inmem_t pfsm
;
244 uint32_t localization
;
247 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
249 localization
= pfs_to_lo(pfs
->pfs_id
);
251 if ((error
= hammer_unload_pseudofs(trans
, localization
)) != 0)
254 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
256 error
= hammer_pfs_rollback(trans
, pfsm
, 0);
258 pfsm
->pfsd
.mirror_flags
|= HAMMER_PFSD_DELETED
;
259 error
= hammer_save_pseudofs(trans
, pfsm
);
262 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
263 if (error
== EINTR
) {
264 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
271 * Wait for the PFS to sync past the specified TID
274 hammer_ioc_wait_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
275 struct hammer_ioc_pseudofs_rw
*pfs
)
277 hammer_pseudofs_inmem_t pfsm
;
278 struct hammer_pseudofs_data pfsd
;
279 uint32_t localization
;
284 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
286 localization
= pfs_to_lo(pfs
->pfs_id
);
288 if ((error
= copyin(pfs
->ondisk
, &pfsd
, sizeof(pfsd
))) != 0)
291 pfsm
= hammer_load_pseudofs(trans
, localization
, &error
);
293 if (hammer_is_pfs_slave(&pfsm
->pfsd
)) {
294 tid
= pfsm
->pfsd
.sync_end_tid
;
295 waitp
= &pfsm
->pfsd
.sync_end_tid
;
297 tid
= trans
->hmp
->flush_tid1
;
298 waitp
= &trans
->hmp
->flush_tid1
;
300 if (tid
<= pfsd
.sync_end_tid
)
301 tsleep(waitp
, PCATCH
, "hmrmwt", 0);
303 hammer_rel_pseudofs(trans
->hmp
, pfsm
);
304 if (error
== EINTR
) {
305 pfs
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
312 * Iterate PFS ondisk data.
313 * This function essentially does the same as hammer_load_pseudofs()
314 * except that this function only retrieves PFS data without touching
315 * hammer_pfs_rb_tree at all.
317 * NOTE: The ip used for ioctl is not necessarily related to the PFS
318 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
320 * NOTE: The API was changed in DragonFly 4.7, due to design issues
321 * this ioctl and libhammer (which is the only caller of this ioctl
322 * within DragonFly source, but no longer maintained by anyone) had.
325 hammer_ioc_scan_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
326 struct hammer_ioc_pseudofs_rw
*pfs
)
328 struct hammer_cursor cursor
;
330 uint32_t localization
;
333 if ((error
= hammer_pfs_autodetect(pfs
, ip
)) != 0)
335 localization
= pfs_to_lo(pfs
->pfs_id
);
336 pfs
->bytes
= sizeof(struct hammer_pseudofs_data
);
337 pfs
->version
= HAMMER_IOC_PSEUDOFS_VERSION
;
339 dip
= hammer_get_inode(trans
, NULL
, HAMMER_OBJID_ROOT
, HAMMER_MAX_TID
,
340 HAMMER_DEF_LOCALIZATION
, 0, &error
);
342 error
= hammer_init_cursor(trans
, &cursor
,
343 (dip
? &dip
->cache
[1] : NULL
), dip
);
347 cursor
.key_beg
.localization
= HAMMER_DEF_LOCALIZATION
|
348 HAMMER_LOCALIZE_MISC
;
349 cursor
.key_beg
.obj_id
= HAMMER_OBJID_ROOT
;
350 cursor
.key_beg
.create_tid
= 0;
351 cursor
.key_beg
.delete_tid
= 0;
352 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_PFS
;
353 cursor
.key_beg
.obj_type
= 0;
354 cursor
.key_beg
.key
= localization
;
355 cursor
.asof
= HAMMER_MAX_TID
;
356 cursor
.flags
|= HAMMER_CURSOR_ASOF
;
358 error
= hammer_ip_lookup(&cursor
);
360 error
= hammer_ip_resolve_data(&cursor
);
363 copyout(cursor
.data
, pfs
->ondisk
, cursor
.leaf
->data_len
);
364 localization
= cursor
.leaf
->base
.key
;
365 pfs
->pfs_id
= lo_to_pfs(localization
);
368 hammer_done_cursor(&cursor
);
371 hammer_rel_inode(dip
, 0);
376 * Auto-detect the pseudofs and do basic bounds checking.
380 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw
*pfs
, hammer_inode_t ip
)
384 if (pfs
->pfs_id
== -1)
385 pfs
->pfs_id
= lo_to_pfs(ip
->obj_localization
);
386 if (pfs
->pfs_id
< 0 || pfs
->pfs_id
>= HAMMER_MAX_PFS
)
388 if (pfs
->bytes
< sizeof(struct hammer_pseudofs_data
))
394 * Rollback the specified PFS to (trunc_tid - 1), removing everything
395 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
396 * mode or the MIRROR_FILTERED scan will not work properly.
398 * This is typically used to remove any partial syncs when upgrading a
399 * slave to a master. It can theoretically also be used to rollback
400 * any PFS, including root PFS, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
401 * PRUNED, and to points that are older only if they are on a retained
402 * (pruning softlink) boundary.
404 * Rollbacks destroy information. If you don't mind inode numbers changing
405 * a better way would be to cpdup a snapshot back onto the master.
409 hammer_pfs_rollback(hammer_transaction_t trans
,
410 hammer_pseudofs_inmem_t pfsm
,
411 hammer_tid_t trunc_tid
)
413 struct hammer_cmirror cmirror
;
414 struct hammer_cursor cursor
;
415 struct hammer_base_elm key_cur
;
419 bzero(&cmirror
, sizeof(cmirror
));
420 bzero(&key_cur
, sizeof(key_cur
));
421 key_cur
.localization
= HAMMER_MIN_LOCALIZATION
| pfsm
->localization
;
422 key_cur
.obj_id
= HAMMER_MIN_OBJID
;
423 key_cur
.key
= HAMMER_MIN_KEY
;
424 key_cur
.create_tid
= 1;
425 key_cur
.rec_type
= HAMMER_MIN_RECTYPE
;
427 seq
= trans
->hmp
->flusher
.done
;
430 error
= hammer_init_cursor(trans
, &cursor
, NULL
, NULL
);
432 hammer_done_cursor(&cursor
);
435 cursor
.key_beg
= key_cur
;
436 cursor
.key_end
.localization
= HAMMER_MAX_LOCALIZATION
|
438 cursor
.key_end
.obj_id
= HAMMER_MAX_OBJID
;
439 cursor
.key_end
.key
= HAMMER_MAX_KEY
;
440 cursor
.key_end
.create_tid
= HAMMER_MAX_TID
;
441 cursor
.key_end
.rec_type
= HAMMER_MAX_RECTYPE
;
443 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
444 cursor
.flags
|= HAMMER_CURSOR_BACKEND
;
447 * Do an optimized scan of only records created or modified
448 * >= trunc_tid, so we can fix up those records. We must
449 * still check the TIDs but this greatly reduces the size of
452 cursor
.flags
|= HAMMER_CURSOR_MIRROR_FILTERED
;
453 cursor
.cmirror
= &cmirror
;
454 cmirror
.mirror_tid
= trunc_tid
;
456 error
= hammer_btree_first(&cursor
);
459 * Abort the rollback.
462 error
= hammer_signal_check(trans
->hmp
);
468 * We only care about leafs. Internal nodes can be returned
469 * in mirror-filtered mode (they are used to generate SKIP
470 * mrecords), but we don't need them for this code.
472 * WARNING: See warnings in hammer_unlock_cursor() function.
474 cursor
.flags
|= HAMMER_CURSOR_ATEDISK
;
475 if (cursor
.node
->ondisk
->type
== HAMMER_BTREE_TYPE_LEAF
) {
476 key_cur
= cursor
.node
->ondisk
->elms
[cursor
.index
].base
;
477 error
= hammer_pfs_delete_at_cursor(&cursor
, trunc_tid
);
480 while (hammer_flusher_meta_halflimit(trans
->hmp
) ||
481 hammer_flusher_undo_exhausted(trans
, 2)) {
482 hammer_unlock_cursor(&cursor
);
483 hammer_flusher_wait(trans
->hmp
, seq
);
484 hammer_lock_cursor(&cursor
);
485 seq
= hammer_flusher_async_one(trans
->hmp
);
489 error
= hammer_btree_iterate(&cursor
);
493 hammer_done_cursor(&cursor
);
494 if (error
== EDEADLK
)
501 * Helper function - perform rollback on a B-Tree element given trunc_tid.
503 * If create_tid >= trunc_tid the record is physically destroyed.
504 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
508 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor
, hammer_tid_t trunc_tid
)
510 hammer_btree_leaf_elm_t elm
;
513 elm
= &cursor
->node
->ondisk
->elms
[cursor
->index
].leaf
;
514 if (elm
->base
.create_tid
< trunc_tid
&&
515 elm
->base
.delete_tid
< trunc_tid
) {
519 if (elm
->base
.create_tid
>= trunc_tid
) {
520 error
= hammer_delete_at_cursor(
521 cursor
, HAMMER_DELETE_DESTROY
,
522 cursor
->trans
->tid
, cursor
->trans
->time32
,
524 } else if (elm
->base
.delete_tid
>= trunc_tid
) {
525 error
= hammer_delete_at_cursor(
526 cursor
, HAMMER_DELETE_ADJUST
,