dsynth - Make dummy /usr/packages directory for pkg compatibility
[dragonfly.git] / sys / vfs / hammer / hammer_pfs.c
blob7d52644f8dade1fbd7dbaf56b2913b5a950c11a9
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 * HAMMER PFS ioctls - Manage pseudo-fs configurations
38 #include "hammer.h"
40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
41 hammer_inode_t ip);
42 static int hammer_pfs_rollback(hammer_transaction_t trans,
43 hammer_pseudofs_inmem_t pfsm,
44 hammer_tid_t trunc_tid);
45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
46 hammer_tid_t trunc_tid);
49 * Get mirroring/pseudo-fs information
51 * NOTE: The ip used for ioctl is not necessarily related to the PFS
52 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
54 int
55 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
56 struct hammer_ioc_pseudofs_rw *pfs)
58 hammer_pseudofs_inmem_t pfsm;
59 uint32_t localization;
60 int error;
62 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
63 return(error);
64 localization = pfs_to_lo(pfs->pfs_id);
65 pfs->bytes = sizeof(struct hammer_pseudofs_data);
66 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
68 pfsm = hammer_load_pseudofs(trans, localization, &error);
69 if (error) {
70 hammer_rel_pseudofs(trans->hmp, pfsm);
71 return(error);
75 * If the PFS is a master the sync tid is set by normal operation
76 * rather than the mirroring code, and will always track the
77 * real HAMMER filesystem.
79 * We use flush_tid1, which is the highest fully committed TID.
80 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
81 * caught up to it yet so a crash will roll us back to flush_tid1.
83 if (hammer_is_pfs_master(&pfsm->pfsd))
84 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
87 * Copy out to userland.
89 if (pfs->ondisk)
90 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
91 hammer_rel_pseudofs(trans->hmp, pfsm);
92 return(error);
96 * Set mirroring/pseudo-fs information
98 int
99 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
100 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
102 hammer_pseudofs_inmem_t pfsm;
103 uint32_t localization;
104 int error;
106 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
107 return(error);
108 localization = pfs_to_lo(pfs->pfs_id);
109 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
110 error = EINVAL;
112 if (error == 0 && pfs->ondisk) {
114 * Load the PFS so we can modify our in-core copy. Ignore
115 * ENOENT errors.
117 pfsm = hammer_load_pseudofs(trans, localization, &error);
118 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
121 * Save it back, create a root inode if we are in master
122 * mode and no root exists.
124 * We do not create root inodes for slaves, the root inode
125 * must be mirrored from the master.
127 if (error == 0 && hammer_is_pfs_master(&pfsm->pfsd)) {
128 error = hammer_mkroot_pseudofs(trans, cred, pfsm, ip);
130 if (error == 0)
131 error = hammer_save_pseudofs(trans, pfsm);
134 * Wakeup anyone waiting for a TID update for this PFS
136 wakeup(&pfsm->pfsd.sync_end_tid);
137 hammer_rel_pseudofs(trans->hmp, pfsm);
139 return(error);
143 * Upgrade a slave to a master
145 * This is fairly easy to do, but we must physically undo any partial syncs
146 * for transaction ids > sync_end_tid. Effective, we must do a partial
147 * rollback.
149 * NOTE: The ip used for ioctl is not necessarily related to the PFS
150 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
154 struct hammer_ioc_pseudofs_rw *pfs)
156 hammer_pseudofs_inmem_t pfsm;
157 uint32_t localization;
158 int error;
160 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
161 return(error);
162 localization = pfs_to_lo(pfs->pfs_id);
163 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
164 return(error);
167 * A master id must be set when upgrading
169 pfsm = hammer_load_pseudofs(trans, localization, &error);
170 if (error == 0) {
171 if (hammer_is_pfs_slave(&pfsm->pfsd)) {
172 error = hammer_pfs_rollback(trans, pfsm,
173 pfsm->pfsd.sync_end_tid + 1);
174 if (error == 0) {
175 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
176 error = hammer_save_pseudofs(trans, pfsm);
180 hammer_rel_pseudofs(trans->hmp, pfsm);
181 if (error == EINTR) {
182 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
183 error = 0;
185 return (error);
189 * Downgrade a master to a slave
191 * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
193 * We previously did not update sync_end_tid in consideration for a slave
194 * upgraded to a master and then downgraded again, but this completely breaks
195 * the case where one starts with a master and then downgrades to a slave,
196 * then upgrades again.
198 * NOTE: The ip used for ioctl is not necessarily related to the PFS
199 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
202 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
203 struct hammer_ioc_pseudofs_rw *pfs)
205 hammer_mount_t hmp = trans->hmp;
206 hammer_pseudofs_inmem_t pfsm;
207 uint32_t localization;
208 int error;
210 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
211 return(error);
212 localization = pfs_to_lo(pfs->pfs_id);
213 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
214 return(error);
216 pfsm = hammer_load_pseudofs(trans, localization, &error);
217 if (error == 0) {
218 if (hammer_is_pfs_master(&pfsm->pfsd)) {
219 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
220 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
221 pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
222 error = hammer_save_pseudofs(trans, pfsm);
225 hammer_rel_pseudofs(trans->hmp, pfsm);
226 return (error);
230 * Destroy a PFS
232 * We can destroy a PFS by scanning and deleting all of its records in the
233 * B-Tree. The hammer utility will delete the softlink in the primary
234 * filesystem.
236 * NOTE: The ip used for ioctl is not necessarily related to the PFS
237 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
240 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
241 struct hammer_ioc_pseudofs_rw *pfs)
243 hammer_pseudofs_inmem_t pfsm;
244 uint32_t localization;
245 int error;
247 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
248 return(error);
249 localization = pfs_to_lo(pfs->pfs_id);
251 if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
252 return(error);
254 pfsm = hammer_load_pseudofs(trans, localization, &error);
255 if (error == 0) {
256 error = hammer_pfs_rollback(trans, pfsm, 0);
257 if (error == 0) {
258 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
259 error = hammer_save_pseudofs(trans, pfsm);
262 hammer_rel_pseudofs(trans->hmp, pfsm);
263 if (error == EINTR) {
264 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
265 error = 0;
267 return(error);
271 * Wait for the PFS to sync past the specified TID
274 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
275 struct hammer_ioc_pseudofs_rw *pfs)
277 hammer_pseudofs_inmem_t pfsm;
278 struct hammer_pseudofs_data pfsd;
279 uint32_t localization;
280 hammer_tid_t tid;
281 void *waitp;
282 int error;
284 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
285 return(error);
286 localization = pfs_to_lo(pfs->pfs_id);
288 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
289 return(error);
291 pfsm = hammer_load_pseudofs(trans, localization, &error);
292 if (error == 0) {
293 if (hammer_is_pfs_slave(&pfsm->pfsd)) {
294 tid = pfsm->pfsd.sync_end_tid;
295 waitp = &pfsm->pfsd.sync_end_tid;
296 } else {
297 tid = trans->hmp->flush_tid1;
298 waitp = &trans->hmp->flush_tid1;
300 if (tid <= pfsd.sync_end_tid)
301 tsleep(waitp, PCATCH, "hmrmwt", 0);
303 hammer_rel_pseudofs(trans->hmp, pfsm);
304 if (error == EINTR) {
305 pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
306 error = 0;
308 return(error);
312 * Iterate PFS ondisk data.
313 * This function essentially does the same as hammer_load_pseudofs()
314 * except that this function only retrieves PFS data without touching
315 * hammer_pfs_rb_tree at all.
317 * NOTE: The ip used for ioctl is not necessarily related to the PFS
318 * since this ioctl only requires PFS id (or upper 16 bits of ip localization).
320 * NOTE: The API was changed in DragonFly 4.7, due to design issues
321 * this ioctl and libhammer (which is the only caller of this ioctl
322 * within DragonFly source, but no longer maintained by anyone) had.
325 hammer_ioc_scan_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
326 struct hammer_ioc_pseudofs_rw *pfs)
328 struct hammer_cursor cursor;
329 hammer_inode_t dip;
330 uint32_t localization;
331 int error;
333 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
334 return(error);
335 localization = pfs_to_lo(pfs->pfs_id);
336 pfs->bytes = sizeof(struct hammer_pseudofs_data);
337 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
339 dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID,
340 HAMMER_DEF_LOCALIZATION, 0, &error);
342 error = hammer_init_cursor(trans, &cursor,
343 (dip ? &dip->cache[1] : NULL), dip);
344 if (error)
345 goto fail;
347 cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION |
348 HAMMER_LOCALIZE_MISC;
349 cursor.key_beg.obj_id = HAMMER_OBJID_ROOT;
350 cursor.key_beg.create_tid = 0;
351 cursor.key_beg.delete_tid = 0;
352 cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS;
353 cursor.key_beg.obj_type = 0;
354 cursor.key_beg.key = localization;
355 cursor.asof = HAMMER_MAX_TID;
356 cursor.flags |= HAMMER_CURSOR_ASOF;
358 error = hammer_ip_lookup(&cursor);
359 if (error == 0) {
360 error = hammer_ip_resolve_data(&cursor);
361 if (error == 0) {
362 if (pfs->ondisk)
363 copyout(cursor.data, pfs->ondisk, cursor.leaf->data_len);
364 localization = cursor.leaf->base.key;
365 pfs->pfs_id = lo_to_pfs(localization);
368 hammer_done_cursor(&cursor);
369 fail:
370 if (dip)
371 hammer_rel_inode(dip, 0);
372 return(error);
376 * Auto-detect the pseudofs and do basic bounds checking.
378 static
380 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
382 int error = 0;
384 if (pfs->pfs_id == -1)
385 pfs->pfs_id = lo_to_pfs(ip->obj_localization);
386 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
387 error = EINVAL;
388 if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
389 error = EINVAL;
390 return(error);
394 * Rollback the specified PFS to (trunc_tid - 1), removing everything
395 * greater or equal to trunc_tid. The PFS must not have been in no-mirror
396 * mode or the MIRROR_FILTERED scan will not work properly.
398 * This is typically used to remove any partial syncs when upgrading a
399 * slave to a master. It can theoretically also be used to rollback
400 * any PFS, including root PFS, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
401 * PRUNED, and to points that are older only if they are on a retained
402 * (pruning softlink) boundary.
404 * Rollbacks destroy information. If you don't mind inode numbers changing
405 * a better way would be to cpdup a snapshot back onto the master.
407 static
409 hammer_pfs_rollback(hammer_transaction_t trans,
410 hammer_pseudofs_inmem_t pfsm,
411 hammer_tid_t trunc_tid)
413 struct hammer_cmirror cmirror;
414 struct hammer_cursor cursor;
415 struct hammer_base_elm key_cur;
416 int error;
417 int seq;
419 bzero(&cmirror, sizeof(cmirror));
420 bzero(&key_cur, sizeof(key_cur));
421 key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization;
422 key_cur.obj_id = HAMMER_MIN_OBJID;
423 key_cur.key = HAMMER_MIN_KEY;
424 key_cur.create_tid = 1;
425 key_cur.rec_type = HAMMER_MIN_RECTYPE;
427 seq = trans->hmp->flusher.done;
429 retry:
430 error = hammer_init_cursor(trans, &cursor, NULL, NULL);
431 if (error) {
432 hammer_done_cursor(&cursor);
433 goto failed;
435 cursor.key_beg = key_cur;
436 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION |
437 pfsm->localization;
438 cursor.key_end.obj_id = HAMMER_MAX_OBJID;
439 cursor.key_end.key = HAMMER_MAX_KEY;
440 cursor.key_end.create_tid = HAMMER_MAX_TID;
441 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
443 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
444 cursor.flags |= HAMMER_CURSOR_BACKEND;
447 * Do an optimized scan of only records created or modified
448 * >= trunc_tid, so we can fix up those records. We must
449 * still check the TIDs but this greatly reduces the size of
450 * the scan.
452 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
453 cursor.cmirror = &cmirror;
454 cmirror.mirror_tid = trunc_tid;
456 error = hammer_btree_first(&cursor);
457 while (error == 0) {
459 * Abort the rollback.
461 if (error == 0) {
462 error = hammer_signal_check(trans->hmp);
463 if (error)
464 break;
468 * We only care about leafs. Internal nodes can be returned
469 * in mirror-filtered mode (they are used to generate SKIP
470 * mrecords), but we don't need them for this code.
472 * WARNING: See warnings in hammer_unlock_cursor() function.
474 cursor.flags |= HAMMER_CURSOR_ATEDISK;
475 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
476 key_cur = cursor.node->ondisk->elms[cursor.index].base;
477 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
480 while (hammer_flusher_meta_halflimit(trans->hmp) ||
481 hammer_flusher_undo_exhausted(trans, 2)) {
482 hammer_unlock_cursor(&cursor);
483 hammer_flusher_wait(trans->hmp, seq);
484 hammer_lock_cursor(&cursor);
485 seq = hammer_flusher_async_one(trans->hmp);
488 if (error == 0)
489 error = hammer_btree_iterate(&cursor);
491 if (error == ENOENT)
492 error = 0;
493 hammer_done_cursor(&cursor);
494 if (error == EDEADLK)
495 goto retry;
496 failed:
497 return(error);
501 * Helper function - perform rollback on a B-Tree element given trunc_tid.
503 * If create_tid >= trunc_tid the record is physically destroyed.
504 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
506 static
508 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
510 hammer_btree_leaf_elm_t elm;
511 int error;
513 elm = &cursor->node->ondisk->elms[cursor->index].leaf;
514 if (elm->base.create_tid < trunc_tid &&
515 elm->base.delete_tid < trunc_tid) {
516 return(0);
519 if (elm->base.create_tid >= trunc_tid) {
520 error = hammer_delete_at_cursor(
521 cursor, HAMMER_DELETE_DESTROY,
522 cursor->trans->tid, cursor->trans->time32,
523 1, NULL);
524 } else if (elm->base.delete_tid >= trunc_tid) {
525 error = hammer_delete_at_cursor(
526 cursor, HAMMER_DELETE_ADJUST,
527 0, 0,
528 1, NULL);
529 } else {
530 error = 0;
532 return(error);