hammer2 - More involved refactoring of chain_repparent, cleanup
[dragonfly.git] / sys / vfs / hammer2 / hammer2_synchro.c
blob81b0b0f42fe700057a9c2b21ccf3e11584b18bb5
1 /*
2 * Copyright (c) 2015-2018 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 * This module implements the cluster synchronizer. Basically the way
36 * it works is that a thread is created for each cluster node in a PFS.
37 * This thread is responsible for synchronizing the current node using
38 * data from other nodes.
40 * Any out of sync master or slave can get back into synchronization as
41 * long as a quorum of masters agree on the update_tid. If a quorum is
42 * not available it may still be possible to synchronize to the highest
43 * available update_tid as a way of trying to catch up as much as possible
44 * until a quorum is available.
46 * If no quorum is possible (which can happen even if all masters are
47 * available, if the update_tid does not match), then manual intervention
48 * may be required to resolve discrepancies.
50 #include "hammer2.h"
52 typedef struct hammer2_deferred_ip {
53 struct hammer2_deferred_ip *next;
54 hammer2_inode_t *ip;
55 } hammer2_deferred_ip_t;
57 typedef struct hammer2_deferred_list {
58 hammer2_deferred_ip_t *base;
59 int count;
60 } hammer2_deferred_list_t;
63 #define HAMMER2_SYNCHRO_DEBUG 1
65 static int hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
66 hammer2_deferred_list_t *list, int isroot);
67 #if 0
68 static void hammer2_update_pfs_status(hammer2_thread_t *thr, uint32_t flags);
69 nerror = hammer2_sync_insert(
70 thr, &parent, &chain,
71 focus->bref.modify_tid,
72 idx, focus);
73 #endif
74 static int hammer2_sync_insert(hammer2_thread_t *thr,
75 hammer2_chain_t **parentp, hammer2_chain_t **chainp,
76 hammer2_tid_t modify_tid, int idx,
77 hammer2_chain_t *focus);
78 static int hammer2_sync_destroy(hammer2_thread_t *thr,
79 hammer2_chain_t **parentp, hammer2_chain_t **chainp,
80 hammer2_tid_t mtid, int idx);
81 static int hammer2_sync_replace(hammer2_thread_t *thr,
82 hammer2_chain_t *parent, hammer2_chain_t *chain,
83 hammer2_tid_t mtid, int idx,
84 hammer2_chain_t *focus, int isroot);
86 /****************************************************************************
87 * HAMMER2 SYNC THREADS *
88 ****************************************************************************/
90 * Primary management thread for an element of a node. A thread will exist
91 * for each element requiring management.
93 * No management threads are needed for the SPMP or for any PMP with only
94 * a single MASTER.
96 * On the SPMP - handles bulkfree and dedup operations
97 * On a PFS - handles remastering and synchronization
99 void
100 hammer2_primary_sync_thread(void *arg)
102 hammer2_thread_t *thr = arg;
103 hammer2_pfs_t *pmp;
104 hammer2_deferred_list_t list;
105 hammer2_deferred_ip_t *defer;
106 int error;
107 uint32_t flags;
108 uint32_t nflags;
110 pmp = thr->pmp;
111 bzero(&list, sizeof(list));
113 for (;;) {
114 flags = thr->flags;
115 cpu_ccfence();
118 * Handle stop request
120 if (flags & HAMMER2_THREAD_STOP)
121 break;
124 * Handle freeze request
126 if (flags & HAMMER2_THREAD_FREEZE) {
127 nflags = (flags & ~(HAMMER2_THREAD_FREEZE |
128 HAMMER2_THREAD_WAITING)) |
129 HAMMER2_THREAD_FROZEN;
130 if (!atomic_cmpset_int(&thr->flags, flags, nflags))
131 continue;
132 if (flags & HAMMER2_THREAD_WAITING)
133 wakeup(&thr->flags);
134 continue;
137 if (flags & HAMMER2_THREAD_UNFREEZE) {
138 nflags = flags & ~(HAMMER2_THREAD_UNFREEZE |
139 HAMMER2_THREAD_FROZEN |
140 HAMMER2_THREAD_WAITING);
141 if (!atomic_cmpset_int(&thr->flags, flags, nflags))
142 continue;
143 if (flags & HAMMER2_THREAD_WAITING)
144 wakeup(&thr->flags);
145 continue;
149 * Force idle if frozen until unfrozen or stopped.
151 if (flags & HAMMER2_THREAD_FROZEN) {
152 nflags = flags | HAMMER2_THREAD_WAITING;
154 tsleep_interlock(&thr->flags, 0);
155 if (atomic_cmpset_int(&thr->flags, flags, nflags))
156 tsleep(&thr->flags, PINTERLOCKED, "frozen", 0);
157 continue;
161 * Reset state on REMASTER request
163 if (thr->flags & HAMMER2_THREAD_REMASTER) {
164 nflags = flags & ~HAMMER2_THREAD_REMASTER;
165 if (atomic_cmpset_int(&thr->flags, flags, nflags)) {
166 /* reset state here */
168 continue;
172 * Synchronization scan.
174 if (hammer2_debug & 0x8000)
175 kprintf("sync_slaves pfs %s clindex %d\n",
176 pmp->pfs_names[thr->clindex], thr->clindex);
177 hammer2_trans_init(pmp, 0);
179 hammer2_inode_ref(pmp->iroot);
181 for (;;) {
182 int didbreak = 0;
183 /* XXX lock synchronize pmp->modify_tid */
184 error = hammer2_sync_slaves(thr, pmp->iroot, &list, 1);
185 if (hammer2_debug & 0x8000) {
186 kprintf("sync_slaves error %d defer %p\n",
187 error, list.base);
189 if (error != HAMMER2_ERROR_EAGAIN)
190 break;
191 while ((defer = list.base) != NULL) {
192 hammer2_inode_t *nip;
194 nip = defer->ip;
195 error = hammer2_sync_slaves(thr, nip, &list,
196 (nip == pmp->iroot));
197 if (error &&
198 error != HAMMER2_ERROR_EAGAIN &&
199 error != HAMMER2_ERROR_ENOENT) {
200 break;
202 if (hammer2_thr_break(thr)) {
203 didbreak = 1;
204 break;
208 * If no additional defers occurred we can
209 * remove this one, otherwise keep it on
210 * the list and retry once the additional
211 * defers have completed.
213 if (defer == list.base) {
214 --list.count;
215 list.base = defer->next;
216 kfree(defer, M_HAMMER2);
217 defer = NULL; /* safety */
218 hammer2_inode_drop(nip);
223 * If the thread is being remastered, frozen, or
224 * stopped, clean up any left-over deferals.
226 if (didbreak ||
227 (error && error != HAMMER2_ERROR_EAGAIN)) {
228 kprintf("didbreak\n");
229 while ((defer = list.base) != NULL) {
230 --list.count;
231 hammer2_inode_drop(defer->ip);
232 list.base = defer->next;
233 kfree(defer, M_HAMMER2);
235 if (error == 0 || error == HAMMER2_ERROR_EAGAIN)
236 error = HAMMER2_ERROR_EINPROGRESS;
237 break;
241 hammer2_inode_drop(pmp->iroot);
242 hammer2_trans_done(pmp);
244 if (error && error != HAMMER2_ERROR_EINPROGRESS)
245 kprintf("hammer2_sync_slaves: error %d\n", error);
248 * Wait for event, or 5-second poll.
250 nflags = flags | HAMMER2_THREAD_WAITING;
251 tsleep_interlock(&thr->flags, 0);
252 if (atomic_cmpset_int(&thr->flags, flags, nflags)) {
253 tsleep(&thr->flags, 0, "h2idle", hz * 5);
256 thr->td = NULL;
257 hammer2_thr_signal(thr, HAMMER2_THREAD_STOPPED);
258 /* thr structure can go invalid after this point */
261 #if 0
263 * Given a locked cluster created from pmp->iroot, update the PFS's
264 * reporting status.
266 static
267 void
268 hammer2_update_pfs_status(hammer2_thread_t *thr, uint32_t flags)
270 hammer2_pfs_t *pmp = thr->pmp;
272 flags &= HAMMER2_CLUSTER_ZFLAGS;
273 if (pmp->cluster_flags == flags)
274 return;
275 pmp->cluster_flags = flags;
277 kprintf("pfs %p", pmp);
278 if (flags & HAMMER2_CLUSTER_MSYNCED)
279 kprintf(" masters-all-good");
280 if (flags & HAMMER2_CLUSTER_SSYNCED)
281 kprintf(" slaves-all-good");
283 if (flags & HAMMER2_CLUSTER_WRHARD)
284 kprintf(" quorum/rw");
285 else if (flags & HAMMER2_CLUSTER_RDHARD)
286 kprintf(" quorum/ro");
288 if (flags & HAMMER2_CLUSTER_UNHARD)
289 kprintf(" out-of-sync-masters");
290 else if (flags & HAMMER2_CLUSTER_NOHARD)
291 kprintf(" no-masters-visible");
293 if (flags & HAMMER2_CLUSTER_WRSOFT)
294 kprintf(" soft/rw");
295 else if (flags & HAMMER2_CLUSTER_RDSOFT)
296 kprintf(" soft/ro");
298 if (flags & HAMMER2_CLUSTER_UNSOFT)
299 kprintf(" out-of-sync-slaves");
300 else if (flags & HAMMER2_CLUSTER_NOSOFT)
301 kprintf(" no-slaves-visible");
302 kprintf("\n");
304 #endif
306 #if 0
307 static
308 void
309 dumpcluster(const char *label,
310 hammer2_cluster_t *cparent, hammer2_cluster_t *cluster)
312 hammer2_chain_t *chain;
313 int i;
315 if ((hammer2_debug & 1) == 0)
316 return;
318 kprintf("%s\t", label);
319 KKASSERT(cparent->nchains == cluster->nchains);
320 for (i = 0; i < cparent->nchains; ++i) {
321 if (i)
322 kprintf("\t");
323 kprintf("%d ", i);
324 if ((chain = cparent->array[i].chain) != NULL) {
325 kprintf("%016jx%s ",
326 chain->bref.key,
327 ((cparent->array[i].flags &
328 HAMMER2_CITEM_INVALID) ? "(I)" : " ")
330 } else {
331 kprintf(" NULL %s ", " ");
333 if ((chain = cluster->array[i].chain) != NULL) {
334 kprintf("%016jx%s ",
335 chain->bref.key,
336 ((cluster->array[i].flags &
337 HAMMER2_CITEM_INVALID) ? "(I)" : " ")
339 } else {
340 kprintf(" NULL %s ", " ");
342 kprintf("\n");
345 #endif
348 * Each out of sync node sync-thread must issue an all-nodes XOP scan of
349 * the inode. This creates a multiplication effect since the XOP scan itself
350 * issues to all nodes. However, this is the only way we can safely
351 * synchronize nodes which might have disparate I/O bandwidths and the only
352 * way we can safely deal with stalled nodes.
354 * XXX serror / merror rollup and handling.
356 static
358 hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip,
359 hammer2_deferred_list_t *list, int isroot)
361 hammer2_xop_scanall_t *xop;
362 hammer2_chain_t *parent;
363 hammer2_chain_t *chain;
364 hammer2_pfs_t *pmp;
365 hammer2_key_t key_next;
366 hammer2_tid_t sync_tid;
367 int needrescan;
368 int want_update;
369 int serror; /* slave error */
370 int merror; /* master error (from xop_collect) */
371 int nerror; /* temporary error */
372 int idx;
373 int n;
375 pmp = ip->pmp;
376 idx = thr->clindex; /* cluster node we are responsible for */
377 needrescan = 0;
378 want_update = 0;
379 sync_tid = 0;
380 chain = NULL;
381 parent = NULL;
383 #if 0
385 * Nothing to do if all slaves are synchronized.
386 * Nothing to do if cluster not authoritatively readable.
388 if (pmp->cluster_flags & HAMMER2_CLUSTER_SSYNCED)
389 return(0);
390 if ((pmp->cluster_flags & HAMMER2_CLUSTER_RDHARD) == 0)
391 return(HAMMER2_ERROR_INCOMPLETE);
392 #endif
394 merror = 0;
397 * Resolve the root inode of the PFS and determine if synchronization
398 * is needed by checking modify_tid.
400 * Retain the synchronization TID from the focus inode and use it
401 * later to synchronize the focus inode if/when the recursion
402 * succeeds.
405 hammer2_xop_ipcluster_t *xop2;
406 hammer2_chain_t *focus;
408 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
409 xop2 = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING);
410 hammer2_xop_start_except(&xop2->head, hammer2_xop_ipcluster,
411 idx);
412 hammer2_inode_unlock(ip);
413 merror = hammer2_xop_collect(&xop2->head, 0);
414 if (merror == 0 && (focus = xop2->head.cluster.focus) != NULL) {
415 sync_tid = focus->bref.modify_tid;
416 chain = hammer2_inode_chain_and_parent(ip, idx,
417 &parent,
418 HAMMER2_RESOLVE_ALWAYS |
419 HAMMER2_RESOLVE_SHARED);
420 want_update = (chain->bref.modify_tid != sync_tid);
421 if (chain) {
422 hammer2_chain_unlock(chain);
423 hammer2_chain_drop(chain);
424 chain = NULL;
426 if (parent) {
427 hammer2_chain_unlock(parent);
428 hammer2_chain_drop(parent);
429 parent = NULL;
432 hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
435 if (want_update == 0)
436 return(0);
439 * The inode is left unlocked during the scan. Issue a XOP
440 * that does *not* include our cluster index to iterate
441 * properly synchronized elements and resolve our cluster index
442 * against it.
444 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
445 xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING);
446 xop->key_beg = HAMMER2_KEY_MIN;
447 xop->key_end = HAMMER2_KEY_MAX;
448 xop->resolve_flags = HAMMER2_RESOLVE_SHARED |
449 HAMMER2_RESOLVE_ALWAYS;
450 xop->lookup_flags = HAMMER2_LOOKUP_SHARED |
451 HAMMER2_LOOKUP_NODIRECT |
452 HAMMER2_LOOKUP_ALWAYS;
453 hammer2_xop_start_except(&xop->head, hammer2_xop_scanall, idx);
454 parent = hammer2_inode_chain(ip, idx,
455 HAMMER2_RESOLVE_ALWAYS |
456 HAMMER2_RESOLVE_SHARED);
457 hammer2_inode_unlock(ip);
459 chain = hammer2_chain_lookup(&parent, &key_next,
460 HAMMER2_KEY_MIN, HAMMER2_KEY_MAX,
461 &serror,
462 HAMMER2_LOOKUP_SHARED |
463 HAMMER2_LOOKUP_NODIRECT |
464 HAMMER2_LOOKUP_NODATA);
465 merror = hammer2_xop_collect(&xop->head, 0);
466 if (hammer2_debug & 0x8000) {
467 kprintf("START_SCAN IP=%016jx chain=%p (%016jx)\n",
468 ip->meta.name_key, chain,
469 (chain ? chain->bref.key : -1));
472 for (;;) {
474 * We are done if our scan is done and the XOP scan is done.
475 * We are done if the XOP scan failed (that is, we don't
476 * have authoritative data to synchronize with).
478 int advance_local = 0;
479 int advance_xop = 0;
480 int dodefer = 0;
481 hammer2_chain_t *focus;
483 if (chain == NULL && merror == HAMMER2_ERROR_ENOENT)
484 break;
485 if (merror && merror != HAMMER2_ERROR_ENOENT)
486 break;
489 * Compare
491 if (chain && merror == HAMMER2_ERROR_ENOENT) {
493 * If we have local chains but the XOP scan is done,
494 * the chains need to be deleted.
496 n = -1;
497 focus = NULL;
498 } else if (chain == NULL) {
500 * If our local scan is done but the XOP scan is not,
501 * we need to create the missing chain(s).
503 n = 1;
504 focus = xop->head.cluster.focus;
505 } else {
507 * Otherwise compare to determine the action
508 * needed.
510 focus = xop->head.cluster.focus;
511 n = hammer2_chain_cmp(chain, focus);
515 * Take action based on comparison results.
517 if (n < 0) {
519 * Delete extranious local data. This will
520 * automatically advance the chain.
522 nerror = hammer2_sync_destroy(thr, &parent, &chain,
523 0, idx);
524 } else if (n == 0 && chain->bref.modify_tid !=
525 focus->bref.modify_tid) {
527 * Matching key but local data or meta-data requires
528 * updating. If we will recurse, we still need to
529 * update to compatible content first but we do not
530 * synchronize modify_tid until the entire recursion
531 * has completed successfully.
533 if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) {
534 nerror = hammer2_sync_replace(
535 thr, parent, chain,
537 idx, focus, 0);
538 dodefer = 1;
539 } else {
540 nerror = hammer2_sync_replace(
541 thr, parent, chain,
542 focus->bref.modify_tid,
543 idx, focus, 0);
545 advance_local = 1;
546 advance_xop = 1;
547 } else if (n == 0) {
549 * 100% match, advance both
551 advance_local = 1;
552 advance_xop = 1;
553 nerror = 0;
554 } else if (n > 0) {
556 * Insert missing local data.
558 * If we will recurse, we still need to update to
559 * compatible content first but we do not synchronize
560 * modify_tid until the entire recursion has
561 * completed successfully.
563 if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) {
564 nerror = hammer2_sync_insert(
565 thr, &parent, &chain,
567 idx, focus);
568 dodefer = 2;
569 } else {
570 nerror = hammer2_sync_insert(
571 thr, &parent, &chain,
572 focus->bref.modify_tid,
573 idx, focus);
575 advance_local = 1;
576 advance_xop = 1;
580 * We cannot recurse depth-first because the XOP is still
581 * running in node threads for this scan. Create a placemarker
582 * by obtaining and record the hammer2_inode.
584 * We excluded our node from the XOP so we must temporarily
585 * add it to xop->head.cluster so it is properly incorporated
586 * into the inode.
588 * The deferral is pushed onto a LIFO list for bottom-up
589 * synchronization.
591 if (merror == 0 && dodefer) {
592 hammer2_inode_t *nip;
593 hammer2_deferred_ip_t *defer;
595 KKASSERT(focus->bref.type == HAMMER2_BREF_TYPE_INODE);
597 defer = kmalloc(sizeof(*defer), M_HAMMER2,
598 M_WAITOK | M_ZERO);
599 KKASSERT(xop->head.cluster.array[idx].chain == NULL);
600 xop->head.cluster.array[idx].flags =
601 HAMMER2_CITEM_INVALID;
602 xop->head.cluster.array[idx].chain = chain;
603 nip = hammer2_inode_get(pmp, ip,
604 &xop->head.cluster, idx);
605 xop->head.cluster.array[idx].chain = NULL;
607 hammer2_inode_ref(nip);
608 hammer2_inode_unlock(nip);
610 defer->next = list->base;
611 defer->ip = nip;
612 list->base = defer;
613 ++list->count;
614 needrescan = 1;
618 * If at least one deferral was added and the deferral
619 * list has grown too large, stop adding more. This
620 * will trigger an HAMMER2_ERROR_EAGAIN return.
622 if (needrescan && list->count > 1000)
623 break;
626 * Advancements for iteration.
628 if (advance_xop) {
629 merror = hammer2_xop_collect(&xop->head, 0);
631 if (advance_local) {
632 chain = hammer2_chain_next(&parent, chain, &key_next,
633 key_next, HAMMER2_KEY_MAX,
634 &serror,
635 HAMMER2_LOOKUP_SHARED |
636 HAMMER2_LOOKUP_NODIRECT |
637 HAMMER2_LOOKUP_NODATA);
640 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
641 if (chain) {
642 hammer2_chain_unlock(chain);
643 hammer2_chain_drop(chain);
645 if (parent) {
646 hammer2_chain_unlock(parent);
647 hammer2_chain_drop(parent);
651 * If we added deferrals we want the caller to synchronize them
652 * and then call us again.
654 * NOTE: In this situation we do not yet want to synchronize our
655 * inode, setting the error code also has that effect.
657 if ((merror == 0 || merror == HAMMER2_ERROR_ENOENT) && needrescan)
658 merror = HAMMER2_ERROR_EAGAIN;
661 * If no error occurred we can synchronize the inode meta-data
662 * and modify_tid. Only limited changes are made to PFSROOTs.
664 * XXX inode lock was lost
666 if (merror == 0 || merror == HAMMER2_ERROR_ENOENT) {
667 hammer2_xop_ipcluster_t *xop2;
668 hammer2_chain_t *focus;
670 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
671 xop2 = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING);
672 hammer2_xop_start_except(&xop2->head, hammer2_xop_ipcluster,
673 idx);
674 hammer2_inode_unlock(ip);
675 merror = hammer2_xop_collect(&xop2->head, 0);
676 if (merror == 0) {
677 focus = xop2->head.cluster.focus;
678 if (hammer2_debug & 0x8000) {
679 kprintf("syncthr: update inode %p (%s)\n",
680 focus,
681 (focus ? (char *)focus->data->
682 ipdata.filename :
683 "?"));
685 chain = hammer2_inode_chain_and_parent(ip, idx,
686 &parent,
687 HAMMER2_RESOLVE_ALWAYS |
688 HAMMER2_RESOLVE_SHARED);
690 KKASSERT(parent != NULL);
691 nerror = hammer2_sync_replace(
692 thr, parent, chain,
693 sync_tid,
694 idx, focus, isroot);
695 hammer2_chain_unlock(chain);
696 hammer2_chain_drop(chain);
697 hammer2_chain_unlock(parent);
698 hammer2_chain_drop(parent);
699 /* XXX */
701 hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
704 return merror;
708 * Create a missing chain by copying the focus from another device.
710 * On entry *parentp and focus are both locked shared. The chain will be
711 * created and returned in *chainp also locked shared.
713 static
715 hammer2_sync_insert(hammer2_thread_t *thr,
716 hammer2_chain_t **parentp, hammer2_chain_t **chainp,
717 hammer2_tid_t mtid, int idx, hammer2_chain_t *focus)
719 hammer2_chain_t *chain;
720 hammer2_key_t dummy;
721 int error;
723 #if HAMMER2_SYNCHRO_DEBUG
724 if (hammer2_debug & 1)
725 kprintf("insert rec par=%p/%d.%016jx slave %d %d.%016jx mod=%016jx\n",
726 *parentp,
727 (*parentp)->bref.type,
728 (*parentp)->bref.key,
729 idx,
730 focus->bref.type, focus->bref.key, mtid);
731 #endif
734 * Parent requires an exclusive lock for the insertion.
735 * We must unlock the child to avoid deadlocks while
736 * relocking the parent.
738 if (*chainp) {
739 hammer2_chain_unlock(*chainp);
740 hammer2_chain_drop(*chainp);
741 *chainp = NULL;
743 hammer2_chain_unlock(*parentp);
744 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_ALWAYS);
747 * We must reissue the lookup to properly position (*parentp)
748 * for the insertion.
750 chain = hammer2_chain_lookup(parentp, &dummy,
751 focus->bref.key, focus->bref.key,
752 &error,
753 HAMMER2_LOOKUP_NODIRECT |
754 HAMMER2_LOOKUP_ALWAYS);
755 KKASSERT(chain == NULL);
757 chain = NULL;
758 error = hammer2_chain_create(parentp, &chain,
759 thr->pmp, focus->bref.methods,
760 focus->bref.key, focus->bref.keybits,
761 focus->bref.type, focus->bytes,
762 mtid, 0, 0);
763 if (error == 0) {
764 error = hammer2_chain_modify(chain, mtid, 0, 0);
765 if (error)
766 goto failed;
769 * Copy focus to new chain
772 /* type already set */
773 chain->bref.methods = focus->bref.methods;
774 /* keybits already set */
775 chain->bref.vradix = focus->bref.vradix;
776 /* mirror_tid set by flush */
777 KKASSERT(chain->bref.modify_tid == mtid);
778 chain->bref.flags = focus->bref.flags;
779 /* key already present */
780 /* check code will be recalculated */
783 * Copy data body.
785 switch(chain->bref.type) {
786 case HAMMER2_BREF_TYPE_INODE:
787 if ((focus->data->ipdata.meta.op_flags &
788 HAMMER2_OPFLAG_DIRECTDATA) == 0) {
789 /* do not copy block table */
790 bcopy(focus->data, chain->data,
791 offsetof(hammer2_inode_data_t, u));
792 break;
794 /* fall through copy whole thing */
795 case HAMMER2_BREF_TYPE_DATA:
796 bcopy(focus->data, chain->data, chain->bytes);
797 hammer2_chain_setcheck(chain, chain->data);
798 break;
799 case HAMMER2_BREF_TYPE_DIRENT:
801 * Directory entries embed data in the blockref.
803 if (chain->bytes) {
804 bcopy(focus->data, chain->data, chain->bytes);
805 hammer2_chain_setcheck(chain, chain->data);
806 } else {
807 chain->bref.check = focus->bref.check;
809 chain->bref.embed = focus->bref.embed;
810 break;
811 default:
812 KKASSERT(0);
813 break;
817 failed:
818 if (chain)
819 hammer2_chain_unlock(chain); /* unlock, leave ref */
820 *chainp = chain; /* will be returned locked */
823 * Avoid an ordering deadlock when relocking shared.
825 hammer2_chain_unlock(*parentp);
826 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_SHARED |
827 HAMMER2_RESOLVE_ALWAYS);
828 if (chain) {
829 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED |
830 HAMMER2_RESOLVE_ALWAYS);
831 error = chain->error;
834 return error;
838 * Destroy an extranious chain.
840 * Both *parentp and *chainp are locked shared.
842 * On return, *chainp will be adjusted to point to the next element in the
843 * iteration and locked shared.
845 static
847 hammer2_sync_destroy(hammer2_thread_t *thr,
848 hammer2_chain_t **parentp, hammer2_chain_t **chainp,
849 hammer2_tid_t mtid, int idx)
851 hammer2_chain_t *chain;
852 hammer2_key_t key_next;
853 hammer2_key_t save_key;
854 int error;
856 chain = *chainp;
858 #if HAMMER2_SYNCHRO_DEBUG
859 if (hammer2_debug & 1)
860 kprintf("destroy rec %p/%p slave %d %d.%016jx\n",
861 *parentp, chain,
862 idx, chain->bref.type, chain->bref.key);
863 #endif
865 save_key = chain->bref.key;
866 if (save_key != HAMMER2_KEY_MAX)
867 ++save_key;
870 * Try to avoid unnecessary I/O.
872 * XXX accounting not propagated up properly. We might have to do
873 * a RESOLVE_MAYBE here and pass 0 for the flags.
875 hammer2_chain_unlock(chain); /* relock exclusive */
876 hammer2_chain_unlock(*parentp);
877 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_ALWAYS);
878 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER);
880 hammer2_chain_delete(*parentp, chain, mtid, HAMMER2_DELETE_PERMANENT);
881 hammer2_chain_unlock(chain);
882 hammer2_chain_drop(chain);
883 chain = NULL; /* safety */
885 hammer2_chain_unlock(*parentp); /* relock shared */
886 hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_SHARED |
887 HAMMER2_RESOLVE_ALWAYS);
888 *chainp = hammer2_chain_lookup(parentp, &key_next,
889 save_key, HAMMER2_KEY_MAX,
890 &error,
891 HAMMER2_LOOKUP_SHARED |
892 HAMMER2_LOOKUP_NODIRECT |
893 HAMMER2_LOOKUP_NODATA);
894 return error;
898 * cparent is locked exclusively, with an extra ref, cluster is not locked.
899 * Replace element [i] in the cluster.
901 static
903 hammer2_sync_replace(hammer2_thread_t *thr,
904 hammer2_chain_t *parent, hammer2_chain_t *chain,
905 hammer2_tid_t mtid, int idx,
906 hammer2_chain_t *focus, int isroot)
908 uint8_t otype;
909 int nradix;
910 int error;
912 #if HAMMER2_SYNCHRO_DEBUG
913 if (hammer2_debug & 1)
914 kprintf("replace rec %p slave %d %d.%016jx mod=%016jx\n",
915 chain,
916 idx,
917 focus->bref.type, focus->bref.key, mtid);
918 #endif
919 hammer2_chain_unlock(chain);
920 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
921 error = chain->error;
922 if (error == 0) {
923 if (chain->bytes != focus->bytes) {
924 /* XXX what if compressed? */
925 nradix = hammer2_getradix(chain->bytes);
926 error = hammer2_chain_resize(chain, mtid, 0, nradix, 0);
927 if (error)
928 goto failed;
930 error = hammer2_chain_modify(chain, mtid, 0, 0);
931 if (error)
932 goto failed;
933 otype = chain->bref.type;
934 chain->bref.type = focus->bref.type;
935 chain->bref.methods = focus->bref.methods;
936 chain->bref.keybits = focus->bref.keybits;
937 chain->bref.vradix = focus->bref.vradix;
938 /* mirror_tid updated by flush */
939 KKASSERT(mtid == 0 || chain->bref.modify_tid == mtid);
940 chain->bref.flags = focus->bref.flags;
941 /* key already present */
942 /* check code will be recalculated */
945 * Copy data body.
947 switch(chain->bref.type) {
948 case HAMMER2_BREF_TYPE_INODE:
950 * Special case PFSROOTs, only limited changes can
951 * be made since the meta-data contains miscellanious
952 * distinguishing fields.
954 if (isroot) {
955 chain->data->ipdata.meta.uflags =
956 focus->data->ipdata.meta.uflags;
957 chain->data->ipdata.meta.rmajor =
958 focus->data->ipdata.meta.rmajor;
959 chain->data->ipdata.meta.rminor =
960 focus->data->ipdata.meta.rminor;
961 chain->data->ipdata.meta.ctime =
962 focus->data->ipdata.meta.ctime;
963 chain->data->ipdata.meta.mtime =
964 focus->data->ipdata.meta.mtime;
965 chain->data->ipdata.meta.atime =
966 focus->data->ipdata.meta.atime;
967 /* not btime */
968 chain->data->ipdata.meta.uid =
969 focus->data->ipdata.meta.uid;
970 chain->data->ipdata.meta.gid =
971 focus->data->ipdata.meta.gid;
972 chain->data->ipdata.meta.mode =
973 focus->data->ipdata.meta.mode;
974 chain->data->ipdata.meta.ncopies =
975 focus->data->ipdata.meta.ncopies;
976 chain->data->ipdata.meta.comp_algo =
977 focus->data->ipdata.meta.comp_algo;
978 chain->data->ipdata.meta.check_algo =
979 focus->data->ipdata.meta.check_algo;
980 chain->data->ipdata.meta.data_quota =
981 focus->data->ipdata.meta.data_quota;
982 chain->data->ipdata.meta.inode_quota =
983 focus->data->ipdata.meta.inode_quota;
986 * last snapshot tid controls overwrite
988 if (chain->data->ipdata.meta.pfs_lsnap_tid <
989 focus->data->ipdata.meta.pfs_lsnap_tid) {
990 chain->data->ipdata.meta.pfs_lsnap_tid =
991 focus->data->ipdata.meta.pfs_lsnap_tid;
994 hammer2_chain_setcheck(chain, chain->data);
995 break;
999 * Normal replacement.
1001 if ((focus->data->ipdata.meta.op_flags &
1002 HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1004 * If DIRECTDATA is transitioning to 0 or the
1005 * old chain is not an inode we have to
1006 * initialize the block table.
1008 if (otype != HAMMER2_BREF_TYPE_INODE ||
1009 (chain->data->ipdata.meta.op_flags &
1010 HAMMER2_OPFLAG_DIRECTDATA)) {
1011 kprintf("chain inode trans "
1012 "away from dd\n");
1013 bzero(&chain->data->ipdata.u,
1014 sizeof(chain->data->ipdata.u));
1016 bcopy(focus->data, chain->data,
1017 offsetof(hammer2_inode_data_t, u));
1018 /* XXX setcheck on inode should not be needed */
1019 hammer2_chain_setcheck(chain, chain->data);
1020 break;
1022 /* fall through */
1023 case HAMMER2_BREF_TYPE_DATA:
1024 bcopy(focus->data, chain->data, chain->bytes);
1025 hammer2_chain_setcheck(chain, chain->data);
1026 break;
1027 case HAMMER2_BREF_TYPE_DIRENT:
1029 * Directory entries embed data in the blockref.
1031 if (chain->bytes) {
1032 bcopy(focus->data, chain->data, chain->bytes);
1033 hammer2_chain_setcheck(chain, chain->data);
1034 } else {
1035 chain->bref.check = focus->bref.check;
1037 chain->bref.embed = focus->bref.embed;
1038 break;
1039 default:
1040 KKASSERT(0);
1041 break;
1045 failed:
1046 hammer2_chain_unlock(chain);
1047 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED |
1048 HAMMER2_RESOLVE_MAYBE);
1050 return error;