2 * Copyright (c) 2015-2018 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * This module implements the cluster synchronizer. Basically the way
36 * it works is that a thread is created for each cluster node in a PFS.
37 * This thread is responsible for synchronizing the current node using
38 * data from other nodes.
40 * Any out of sync master or slave can get back into synchronization as
41 * long as a quorum of masters agree on the update_tid. If a quorum is
42 * not available it may still be possible to synchronize to the highest
43 * available update_tid as a way of trying to catch up as much as possible
44 * until a quorum is available.
46 * If no quorum is possible (which can happen even if all masters are
47 * available, if the update_tid does not match), then manual intervention
48 * may be required to resolve discrepancies.
52 typedef struct hammer2_deferred_ip
{
53 struct hammer2_deferred_ip
*next
;
55 } hammer2_deferred_ip_t
;
57 typedef struct hammer2_deferred_list
{
58 hammer2_deferred_ip_t
*base
;
60 } hammer2_deferred_list_t
;
63 #define HAMMER2_SYNCHRO_DEBUG 1
65 static int hammer2_sync_slaves(hammer2_thread_t
*thr
, hammer2_inode_t
*ip
,
66 hammer2_deferred_list_t
*list
, int isroot
);
68 static void hammer2_update_pfs_status(hammer2_thread_t
*thr
, uint32_t flags
);
69 nerror
= hammer2_sync_insert(
71 focus
->bref
.modify_tid
,
74 static int hammer2_sync_insert(hammer2_thread_t
*thr
,
75 hammer2_chain_t
**parentp
, hammer2_chain_t
**chainp
,
76 hammer2_tid_t modify_tid
, int idx
,
77 hammer2_xop_head_t
*xop
, hammer2_chain_t
*focus
);
78 static int hammer2_sync_destroy(hammer2_thread_t
*thr
,
79 hammer2_chain_t
**parentp
, hammer2_chain_t
**chainp
,
80 hammer2_tid_t mtid
, int idx
);
81 static int hammer2_sync_replace(hammer2_thread_t
*thr
,
82 hammer2_chain_t
*parent
, hammer2_chain_t
*chain
,
83 hammer2_tid_t mtid
, int idx
,
84 hammer2_xop_head_t
*xop
, hammer2_chain_t
*focus
,
87 /****************************************************************************
88 * HAMMER2 SYNC THREADS *
89 ****************************************************************************/
91 * Primary management thread for an element of a node. A thread will exist
92 * for each element requiring management.
94 * No management threads are needed for the SPMP or for any PMP with only
97 * On the SPMP - handles bulkfree and dedup operations
98 * On a PFS - handles remastering and synchronization
101 hammer2_primary_sync_thread(void *arg
)
103 hammer2_thread_t
*thr
= arg
;
105 hammer2_deferred_list_t list
;
106 hammer2_deferred_ip_t
*defer
;
112 bzero(&list
, sizeof(list
));
119 * Handle stop request
121 if (flags
& HAMMER2_THREAD_STOP
)
125 * Handle freeze request
127 if (flags
& HAMMER2_THREAD_FREEZE
) {
128 nflags
= (flags
& ~(HAMMER2_THREAD_FREEZE
|
129 HAMMER2_THREAD_WAITING
)) |
130 HAMMER2_THREAD_FROZEN
;
131 if (!atomic_cmpset_int(&thr
->flags
, flags
, nflags
))
133 if (flags
& HAMMER2_THREAD_WAITING
)
138 if (flags
& HAMMER2_THREAD_UNFREEZE
) {
139 nflags
= flags
& ~(HAMMER2_THREAD_UNFREEZE
|
140 HAMMER2_THREAD_FROZEN
|
141 HAMMER2_THREAD_WAITING
);
142 if (!atomic_cmpset_int(&thr
->flags
, flags
, nflags
))
144 if (flags
& HAMMER2_THREAD_WAITING
)
150 * Force idle if frozen until unfrozen or stopped.
152 if (flags
& HAMMER2_THREAD_FROZEN
) {
153 nflags
= flags
| HAMMER2_THREAD_WAITING
;
155 tsleep_interlock(&thr
->flags
, 0);
156 if (atomic_cmpset_int(&thr
->flags
, flags
, nflags
))
157 tsleep(&thr
->flags
, PINTERLOCKED
, "frozen", 0);
162 * Reset state on REMASTER request
164 if (thr
->flags
& HAMMER2_THREAD_REMASTER
) {
165 nflags
= flags
& ~HAMMER2_THREAD_REMASTER
;
166 if (atomic_cmpset_int(&thr
->flags
, flags
, nflags
)) {
167 /* reset state here */
173 * Synchronization scan.
175 if (hammer2_debug
& 0x8000)
176 kprintf("sync_slaves pfs %s clindex %d\n",
177 pmp
->pfs_names
[thr
->clindex
], thr
->clindex
);
178 hammer2_trans_init(pmp
, 0);
180 hammer2_inode_ref(pmp
->iroot
);
184 /* XXX lock synchronize pmp->modify_tid */
185 error
= hammer2_sync_slaves(thr
, pmp
->iroot
, &list
, 1);
186 if (hammer2_debug
& 0x8000) {
187 kprintf("sync_slaves error %d defer %p\n",
190 if (error
!= HAMMER2_ERROR_EAGAIN
)
192 while ((defer
= list
.base
) != NULL
) {
193 hammer2_inode_t
*nip
;
196 error
= hammer2_sync_slaves(thr
, nip
, &list
,
197 (nip
== pmp
->iroot
));
199 error
!= HAMMER2_ERROR_EAGAIN
&&
200 error
!= HAMMER2_ERROR_ENOENT
) {
203 if (hammer2_thr_break(thr
)) {
209 * If no additional defers occurred we can
210 * remove this one, otherwise keep it on
211 * the list and retry once the additional
212 * defers have completed.
214 if (defer
== list
.base
) {
216 list
.base
= defer
->next
;
217 kfree(defer
, M_HAMMER2
);
218 defer
= NULL
; /* safety */
219 hammer2_inode_drop(nip
);
224 * If the thread is being remastered, frozen, or
225 * stopped, clean up any left-over deferals.
228 (error
&& error
!= HAMMER2_ERROR_EAGAIN
)) {
229 kprintf("didbreak\n");
230 while ((defer
= list
.base
) != NULL
) {
232 hammer2_inode_drop(defer
->ip
);
233 list
.base
= defer
->next
;
234 kfree(defer
, M_HAMMER2
);
236 if (error
== 0 || error
== HAMMER2_ERROR_EAGAIN
)
237 error
= HAMMER2_ERROR_EINPROGRESS
;
242 hammer2_inode_drop(pmp
->iroot
);
243 hammer2_trans_done(pmp
, 0);
245 if (error
&& error
!= HAMMER2_ERROR_EINPROGRESS
)
246 kprintf("hammer2_sync_slaves: error %d\n", error
);
249 * Wait for event, or 5-second poll.
251 nflags
= flags
| HAMMER2_THREAD_WAITING
;
252 tsleep_interlock(&thr
->flags
, 0);
253 if (atomic_cmpset_int(&thr
->flags
, flags
, nflags
)) {
254 tsleep(&thr
->flags
, 0, "h2idle", hz
* 5);
258 hammer2_thr_signal(thr
, HAMMER2_THREAD_STOPPED
);
259 /* thr structure can go invalid after this point */
264 * Given a locked cluster created from pmp->iroot, update the PFS's
269 hammer2_update_pfs_status(hammer2_thread_t
*thr
, uint32_t flags
)
271 hammer2_pfs_t
*pmp
= thr
->pmp
;
273 flags
&= HAMMER2_CLUSTER_ZFLAGS
;
274 if (pmp
->cluster_flags
== flags
)
276 pmp
->cluster_flags
= flags
;
278 kprintf("pfs %p", pmp
);
279 if (flags
& HAMMER2_CLUSTER_MSYNCED
)
280 kprintf(" masters-all-good");
281 if (flags
& HAMMER2_CLUSTER_SSYNCED
)
282 kprintf(" slaves-all-good");
284 if (flags
& HAMMER2_CLUSTER_WRHARD
)
285 kprintf(" quorum/rw");
286 else if (flags
& HAMMER2_CLUSTER_RDHARD
)
287 kprintf(" quorum/ro");
289 if (flags
& HAMMER2_CLUSTER_UNHARD
)
290 kprintf(" out-of-sync-masters");
291 else if (flags
& HAMMER2_CLUSTER_NOHARD
)
292 kprintf(" no-masters-visible");
294 if (flags
& HAMMER2_CLUSTER_WRSOFT
)
296 else if (flags
& HAMMER2_CLUSTER_RDSOFT
)
299 if (flags
& HAMMER2_CLUSTER_UNSOFT
)
300 kprintf(" out-of-sync-slaves");
301 else if (flags
& HAMMER2_CLUSTER_NOSOFT
)
302 kprintf(" no-slaves-visible");
310 dumpcluster(const char *label
,
311 hammer2_cluster_t
*cparent
, hammer2_cluster_t
*cluster
)
313 hammer2_chain_t
*chain
;
316 if ((hammer2_debug
& 1) == 0)
319 kprintf("%s\t", label
);
320 KKASSERT(cparent
->nchains
== cluster
->nchains
);
321 for (i
= 0; i
< cparent
->nchains
; ++i
) {
325 if ((chain
= cparent
->array
[i
].chain
) != NULL
) {
328 ((cparent
->array
[i
].flags
&
329 HAMMER2_CITEM_INVALID
) ? "(I)" : " ")
332 kprintf(" NULL %s ", " ");
334 if ((chain
= cluster
->array
[i
].chain
) != NULL
) {
337 ((cluster
->array
[i
].flags
&
338 HAMMER2_CITEM_INVALID
) ? "(I)" : " ")
341 kprintf(" NULL %s ", " ");
349 * Each out of sync node sync-thread must issue an all-nodes XOP scan of
350 * the inode. This creates a multiplication effect since the XOP scan itself
351 * issues to all nodes. However, this is the only way we can safely
352 * synchronize nodes which might have disparate I/O bandwidths and the only
353 * way we can safely deal with stalled nodes.
355 * XXX serror / merror rollup and handling.
359 hammer2_sync_slaves(hammer2_thread_t
*thr
, hammer2_inode_t
*ip
,
360 hammer2_deferred_list_t
*list
, int isroot
)
362 hammer2_xop_scanall_t
*xop
;
363 hammer2_chain_t
*parent
;
364 hammer2_chain_t
*chain
;
366 hammer2_key_t key_next
;
367 hammer2_tid_t sync_tid
;
370 int serror
; /* slave error */
371 int merror
; /* master error (from xop_collect) */
372 int nerror
; /* temporary error */
377 idx
= thr
->clindex
; /* cluster node we are responsible for */
386 * Nothing to do if all slaves are synchronized.
387 * Nothing to do if cluster not authoritatively readable.
389 if (pmp
->cluster_flags
& HAMMER2_CLUSTER_SSYNCED
)
391 if ((pmp
->cluster_flags
& HAMMER2_CLUSTER_RDHARD
) == 0)
392 return(HAMMER2_ERROR_INCOMPLETE
);
398 * Resolve the root inode of the PFS and determine if synchronization
399 * is needed by checking modify_tid.
401 * Retain the synchronization TID from the focus inode and use it
402 * later to synchronize the focus inode if/when the recursion
406 hammer2_xop_ipcluster_t
*xop2
;
407 hammer2_chain_t
*focus
;
409 hammer2_inode_lock(ip
, HAMMER2_RESOLVE_SHARED
);
410 xop2
= hammer2_xop_alloc(ip
, HAMMER2_XOP_MODIFYING
);
411 hammer2_xop_start_except(&xop2
->head
, &hammer2_ipcluster_desc
,
413 hammer2_inode_unlock(ip
);
414 merror
= hammer2_xop_collect(&xop2
->head
, 0);
415 if (merror
== 0 && (focus
= xop2
->head
.cluster
.focus
) != NULL
) {
416 sync_tid
= focus
->bref
.modify_tid
;
417 chain
= hammer2_inode_chain_and_parent(ip
, idx
,
419 HAMMER2_RESOLVE_ALWAYS
|
420 HAMMER2_RESOLVE_SHARED
);
421 want_update
= (chain
->bref
.modify_tid
!= sync_tid
);
423 hammer2_chain_unlock(chain
);
424 hammer2_chain_drop(chain
);
428 hammer2_chain_unlock(parent
);
429 hammer2_chain_drop(parent
);
433 hammer2_xop_retire(&xop2
->head
, HAMMER2_XOPMASK_VOP
);
436 if (want_update
== 0)
440 * The inode is left unlocked during the scan. Issue a XOP
441 * that does *not* include our cluster index to iterate
442 * properly synchronized elements and resolve our cluster index
445 hammer2_inode_lock(ip
, HAMMER2_RESOLVE_SHARED
);
446 xop
= hammer2_xop_alloc(ip
, HAMMER2_XOP_MODIFYING
);
447 xop
->key_beg
= HAMMER2_KEY_MIN
;
448 xop
->key_end
= HAMMER2_KEY_MAX
;
449 xop
->resolve_flags
= HAMMER2_RESOLVE_SHARED
|
450 HAMMER2_RESOLVE_ALWAYS
;
451 xop
->lookup_flags
= HAMMER2_LOOKUP_SHARED
|
452 HAMMER2_LOOKUP_NODIRECT
|
453 HAMMER2_LOOKUP_ALWAYS
;
454 hammer2_xop_start_except(&xop
->head
, &hammer2_scanall_desc
, idx
);
455 parent
= hammer2_inode_chain(ip
, idx
,
456 HAMMER2_RESOLVE_ALWAYS
|
457 HAMMER2_RESOLVE_SHARED
);
458 hammer2_inode_unlock(ip
);
460 chain
= hammer2_chain_lookup(&parent
, &key_next
,
461 HAMMER2_KEY_MIN
, HAMMER2_KEY_MAX
,
463 HAMMER2_LOOKUP_SHARED
|
464 HAMMER2_LOOKUP_NODIRECT
|
465 HAMMER2_LOOKUP_NODATA
);
466 merror
= hammer2_xop_collect(&xop
->head
, 0);
467 if (hammer2_debug
& 0x8000) {
468 kprintf("START_SCAN IP=%016jx chain=%p (%016jx)\n",
469 ip
->meta
.name_key
, chain
,
470 (chain
? chain
->bref
.key
: -1));
475 * We are done if our scan is done and the XOP scan is done.
476 * We are done if the XOP scan failed (that is, we don't
477 * have authoritative data to synchronize with).
479 int advance_local
= 0;
482 hammer2_chain_t
*focus
;
484 if (chain
== NULL
&& merror
== HAMMER2_ERROR_ENOENT
)
486 if (merror
&& merror
!= HAMMER2_ERROR_ENOENT
)
492 if (chain
&& merror
== HAMMER2_ERROR_ENOENT
) {
494 * If we have local chains but the XOP scan is done,
495 * the chains need to be deleted.
499 } else if (chain
== NULL
) {
501 * If our local scan is done but the XOP scan is not,
502 * we need to create the missing chain(s).
505 focus
= xop
->head
.cluster
.focus
;
508 * Otherwise compare to determine the action
511 focus
= xop
->head
.cluster
.focus
;
512 n
= hammer2_chain_cmp(chain
, focus
);
516 * Take action based on comparison results.
520 * Delete extranious local data. This will
521 * automatically advance the chain.
523 nerror
= hammer2_sync_destroy(thr
, &parent
, &chain
,
525 } else if (n
== 0 && chain
->bref
.modify_tid
!=
526 focus
->bref
.modify_tid
) {
528 * Matching key but local data or meta-data requires
529 * updating. If we will recurse, we still need to
530 * update to compatible content first but we do not
531 * synchronize modify_tid until the entire recursion
532 * has completed successfully.
534 if (focus
->bref
.type
== HAMMER2_BREF_TYPE_INODE
) {
535 nerror
= hammer2_sync_replace(
538 idx
, &xop
->head
, focus
, 0);
541 nerror
= hammer2_sync_replace(
543 focus
->bref
.modify_tid
,
544 idx
, &xop
->head
, focus
, 0);
550 * 100% match, advance both
557 * Insert missing local data.
559 * If we will recurse, we still need to update to
560 * compatible content first but we do not synchronize
561 * modify_tid until the entire recursion has
562 * completed successfully.
564 if (focus
->bref
.type
== HAMMER2_BREF_TYPE_INODE
) {
565 nerror
= hammer2_sync_insert(
566 thr
, &parent
, &chain
,
568 idx
, &xop
->head
, focus
);
571 nerror
= hammer2_sync_insert(
572 thr
, &parent
, &chain
,
573 focus
->bref
.modify_tid
,
574 idx
, &xop
->head
, focus
);
581 * We cannot recurse depth-first because the XOP is still
582 * running in node threads for this scan. Create a placemarker
583 * by obtaining and record the hammer2_inode.
585 * We excluded our node from the XOP so we must temporarily
586 * add it to xop->head.cluster so it is properly incorporated
589 * The deferral is pushed onto a LIFO list for bottom-up
592 if (merror
== 0 && dodefer
) {
593 hammer2_inode_t
*nip
;
594 hammer2_deferred_ip_t
*defer
;
596 KKASSERT(focus
->bref
.type
== HAMMER2_BREF_TYPE_INODE
);
598 defer
= kmalloc(sizeof(*defer
), M_HAMMER2
,
600 KKASSERT(xop
->head
.cluster
.array
[idx
].chain
== NULL
);
601 xop
->head
.cluster
.array
[idx
].flags
=
602 HAMMER2_CITEM_INVALID
;
603 xop
->head
.cluster
.array
[idx
].chain
= chain
;
604 nip
= hammer2_inode_get(pmp
, ip
, &xop
->head
, idx
);
605 xop
->head
.cluster
.array
[idx
].chain
= NULL
;
607 hammer2_inode_ref(nip
);
608 hammer2_inode_unlock(nip
);
610 defer
->next
= list
->base
;
618 * If at least one deferral was added and the deferral
619 * list has grown too large, stop adding more. This
620 * will trigger an HAMMER2_ERROR_EAGAIN return.
622 if (needrescan
&& list
->count
> 1000)
626 * Advancements for iteration.
629 merror
= hammer2_xop_collect(&xop
->head
, 0);
632 chain
= hammer2_chain_next(&parent
, chain
, &key_next
,
633 key_next
, HAMMER2_KEY_MAX
,
635 HAMMER2_LOOKUP_SHARED
|
636 HAMMER2_LOOKUP_NODIRECT
|
637 HAMMER2_LOOKUP_NODATA
);
640 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
642 hammer2_chain_unlock(chain
);
643 hammer2_chain_drop(chain
);
646 hammer2_chain_unlock(parent
);
647 hammer2_chain_drop(parent
);
651 * If we added deferrals we want the caller to synchronize them
652 * and then call us again.
654 * NOTE: In this situation we do not yet want to synchronize our
655 * inode, setting the error code also has that effect.
657 if ((merror
== 0 || merror
== HAMMER2_ERROR_ENOENT
) && needrescan
)
658 merror
= HAMMER2_ERROR_EAGAIN
;
661 * If no error occurred we can synchronize the inode meta-data
662 * and modify_tid. Only limited changes are made to PFSROOTs.
664 * XXX inode lock was lost
666 if (merror
== 0 || merror
== HAMMER2_ERROR_ENOENT
) {
667 hammer2_xop_ipcluster_t
*xop2
;
668 hammer2_chain_t
*focus
;
670 hammer2_inode_lock(ip
, HAMMER2_RESOLVE_SHARED
);
671 xop2
= hammer2_xop_alloc(ip
, HAMMER2_XOP_MODIFYING
);
672 hammer2_xop_start_except(&xop2
->head
, &hammer2_ipcluster_desc
,
674 hammer2_inode_unlock(ip
);
675 merror
= hammer2_xop_collect(&xop2
->head
, 0);
677 focus
= xop2
->head
.cluster
.focus
;
678 if ((hammer2_debug
& 0x8000) && focus
) {
679 const char *filename
;
681 filename
= hammer2_xop_gdata(&xop2
->head
)->
683 kprintf("syncthr: update inode %p (%s)\n",
685 hammer2_xop_pdata(&xop2
->head
);
687 chain
= hammer2_inode_chain_and_parent(ip
, idx
,
689 HAMMER2_RESOLVE_ALWAYS
|
690 HAMMER2_RESOLVE_SHARED
);
692 KKASSERT(parent
!= NULL
);
693 nerror
= hammer2_sync_replace(
696 idx
, &xop2
->head
, focus
, isroot
);
697 hammer2_chain_unlock(chain
);
698 hammer2_chain_drop(chain
);
699 hammer2_chain_unlock(parent
);
700 hammer2_chain_drop(parent
);
703 hammer2_xop_retire(&xop2
->head
, HAMMER2_XOPMASK_VOP
);
710 * Create a missing chain by copying the focus from another device.
712 * On entry *parentp and focus are both locked shared. The chain will be
713 * created and returned in *chainp also locked shared.
717 hammer2_sync_insert(hammer2_thread_t
*thr
,
718 hammer2_chain_t
**parentp
, hammer2_chain_t
**chainp
,
719 hammer2_tid_t mtid
, int idx
, hammer2_xop_head_t
*xop
,
720 hammer2_chain_t
*focus
)
722 hammer2_chain_t
*chain
;
726 #if HAMMER2_SYNCHRO_DEBUG
727 if (hammer2_debug
& 1)
728 kprintf("insert rec par=%p/%d.%016jx slave %d %d.%016jx mod=%016jx\n",
730 (*parentp
)->bref
.type
,
731 (*parentp
)->bref
.key
,
733 focus
->bref
.type
, focus
->bref
.key
, mtid
);
737 * Parent requires an exclusive lock for the insertion.
738 * We must unlock the child to avoid deadlocks while
739 * relocking the parent.
742 hammer2_chain_unlock(*chainp
);
743 hammer2_chain_drop(*chainp
);
746 hammer2_chain_unlock(*parentp
);
747 hammer2_chain_lock(*parentp
, HAMMER2_RESOLVE_ALWAYS
);
750 * We must reissue the lookup to properly position (*parentp)
753 chain
= hammer2_chain_lookup(parentp
, &dummy
,
754 focus
->bref
.key
, focus
->bref
.key
,
756 HAMMER2_LOOKUP_NODIRECT
|
757 HAMMER2_LOOKUP_ALWAYS
);
758 KKASSERT(chain
== NULL
);
761 error
= hammer2_chain_create(parentp
, &chain
,
762 thr
->pmp
, focus
->bref
.methods
,
763 focus
->bref
.key
, focus
->bref
.keybits
,
764 focus
->bref
.type
, focus
->bytes
,
767 const hammer2_media_data_t
*data
;
769 error
= hammer2_chain_modify(chain
, mtid
, 0, 0);
774 * Copy focus to new chain
777 /* type already set */
778 chain
->bref
.methods
= focus
->bref
.methods
;
779 /* keybits already set */
780 chain
->bref
.vradix
= focus
->bref
.vradix
;
781 /* mirror_tid set by flush */
782 KKASSERT(chain
->bref
.modify_tid
== mtid
);
783 chain
->bref
.flags
= focus
->bref
.flags
;
784 /* key already present */
785 /* check code will be recalculated */
790 switch(chain
->bref
.type
) {
791 case HAMMER2_BREF_TYPE_INODE
:
792 data
= hammer2_xop_gdata(xop
);
794 if ((data
->ipdata
.meta
.op_flags
&
795 HAMMER2_OPFLAG_DIRECTDATA
) == 0) {
796 /* do not copy block table */
797 bcopy(data
, chain
->data
,
798 offsetof(hammer2_inode_data_t
, u
));
799 hammer2_xop_pdata(xop
);
802 hammer2_xop_pdata(xop
);
803 /* fall through copy whole thing */
804 case HAMMER2_BREF_TYPE_DATA
:
805 data
= hammer2_xop_gdata(xop
);
806 bcopy(data
, chain
->data
, chain
->bytes
);
807 hammer2_chain_setcheck(chain
, chain
->data
);
808 hammer2_xop_pdata(xop
);
810 case HAMMER2_BREF_TYPE_DIRENT
:
812 * Directory entries embed data in the blockref.
815 data
= hammer2_xop_gdata(xop
);
816 bcopy(data
, chain
->data
, chain
->bytes
);
817 hammer2_chain_setcheck(chain
, chain
->data
);
818 hammer2_xop_pdata(xop
);
820 chain
->bref
.check
= focus
->bref
.check
;
822 chain
->bref
.embed
= focus
->bref
.embed
;
832 hammer2_chain_unlock(chain
); /* unlock, leave ref */
833 *chainp
= chain
; /* will be returned locked */
836 * Avoid an ordering deadlock when relocking shared.
838 hammer2_chain_unlock(*parentp
);
839 hammer2_chain_lock(*parentp
, HAMMER2_RESOLVE_SHARED
|
840 HAMMER2_RESOLVE_ALWAYS
);
842 hammer2_chain_lock(chain
, HAMMER2_RESOLVE_SHARED
|
843 HAMMER2_RESOLVE_ALWAYS
);
844 error
= chain
->error
;
851 * Destroy an extranious chain.
853 * Both *parentp and *chainp are locked shared.
855 * On return, *chainp will be adjusted to point to the next element in the
856 * iteration and locked shared.
860 hammer2_sync_destroy(hammer2_thread_t
*thr
,
861 hammer2_chain_t
**parentp
, hammer2_chain_t
**chainp
,
862 hammer2_tid_t mtid
, int idx
)
864 hammer2_chain_t
*chain
;
865 hammer2_key_t key_next
;
866 hammer2_key_t save_key
;
871 #if HAMMER2_SYNCHRO_DEBUG
872 if (hammer2_debug
& 1)
873 kprintf("destroy rec %p/%p slave %d %d.%016jx\n",
875 idx
, chain
->bref
.type
, chain
->bref
.key
);
878 save_key
= chain
->bref
.key
;
879 if (save_key
!= HAMMER2_KEY_MAX
)
883 * Try to avoid unnecessary I/O.
885 * XXX accounting not propagated up properly. We might have to do
886 * a RESOLVE_MAYBE here and pass 0 for the flags.
888 hammer2_chain_unlock(chain
); /* relock exclusive */
889 hammer2_chain_unlock(*parentp
);
890 hammer2_chain_lock(*parentp
, HAMMER2_RESOLVE_ALWAYS
);
891 hammer2_chain_lock(chain
, HAMMER2_RESOLVE_NEVER
);
893 hammer2_chain_delete(*parentp
, chain
, mtid
, HAMMER2_DELETE_PERMANENT
);
894 hammer2_chain_unlock(chain
);
895 hammer2_chain_drop(chain
);
896 chain
= NULL
; /* safety */
898 hammer2_chain_unlock(*parentp
); /* relock shared */
899 hammer2_chain_lock(*parentp
, HAMMER2_RESOLVE_SHARED
|
900 HAMMER2_RESOLVE_ALWAYS
);
901 *chainp
= hammer2_chain_lookup(parentp
, &key_next
,
902 save_key
, HAMMER2_KEY_MAX
,
904 HAMMER2_LOOKUP_SHARED
|
905 HAMMER2_LOOKUP_NODIRECT
|
906 HAMMER2_LOOKUP_NODATA
);
911 * cparent is locked exclusively, with an extra ref, cluster is not locked.
912 * Replace element [i] in the cluster.
916 hammer2_sync_replace(hammer2_thread_t
*thr
,
917 hammer2_chain_t
*parent
, hammer2_chain_t
*chain
,
918 hammer2_tid_t mtid
, int idx
,
919 hammer2_xop_head_t
*xop
, hammer2_chain_t
*focus
,
926 #if HAMMER2_SYNCHRO_DEBUG
927 if (hammer2_debug
& 1)
928 kprintf("replace rec %p slave %d %d.%016jx mod=%016jx\n",
931 focus
->bref
.type
, focus
->bref
.key
, mtid
);
933 hammer2_chain_unlock(chain
);
934 hammer2_chain_lock(chain
, HAMMER2_RESOLVE_ALWAYS
);
935 error
= chain
->error
;
937 const hammer2_media_data_t
*data
;
939 if (chain
->bytes
!= focus
->bytes
) {
940 /* XXX what if compressed? */
941 nradix
= hammer2_getradix(chain
->bytes
);
942 error
= hammer2_chain_resize(chain
, mtid
, 0, nradix
, 0);
946 error
= hammer2_chain_modify(chain
, mtid
, 0, 0);
949 otype
= chain
->bref
.type
;
950 data
= hammer2_xop_gdata(xop
);
951 chain
->bref
.type
= focus
->bref
.type
;
952 chain
->bref
.methods
= focus
->bref
.methods
;
953 chain
->bref
.keybits
= focus
->bref
.keybits
;
954 chain
->bref
.vradix
= focus
->bref
.vradix
;
955 /* mirror_tid updated by flush */
956 KKASSERT(mtid
== 0 || chain
->bref
.modify_tid
== mtid
);
957 chain
->bref
.flags
= focus
->bref
.flags
;
958 /* key already present */
959 /* check code will be recalculated */
964 switch(chain
->bref
.type
) {
965 case HAMMER2_BREF_TYPE_INODE
:
967 * Special case PFSROOTs, only limited changes can
968 * be made since the meta-data contains miscellanious
969 * distinguishing fields.
972 chain
->data
->ipdata
.meta
.uflags
=
973 data
->ipdata
.meta
.uflags
;
974 chain
->data
->ipdata
.meta
.rmajor
=
975 data
->ipdata
.meta
.rmajor
;
976 chain
->data
->ipdata
.meta
.rminor
=
977 data
->ipdata
.meta
.rminor
;
978 chain
->data
->ipdata
.meta
.ctime
=
979 data
->ipdata
.meta
.ctime
;
980 chain
->data
->ipdata
.meta
.mtime
=
981 data
->ipdata
.meta
.mtime
;
982 chain
->data
->ipdata
.meta
.atime
=
983 data
->ipdata
.meta
.atime
;
985 chain
->data
->ipdata
.meta
.uid
=
986 data
->ipdata
.meta
.uid
;
987 chain
->data
->ipdata
.meta
.gid
=
988 data
->ipdata
.meta
.gid
;
989 chain
->data
->ipdata
.meta
.mode
=
990 data
->ipdata
.meta
.mode
;
991 chain
->data
->ipdata
.meta
.ncopies
=
992 data
->ipdata
.meta
.ncopies
;
993 chain
->data
->ipdata
.meta
.comp_algo
=
994 data
->ipdata
.meta
.comp_algo
;
995 chain
->data
->ipdata
.meta
.check_algo
=
996 data
->ipdata
.meta
.check_algo
;
997 chain
->data
->ipdata
.meta
.data_quota
=
998 data
->ipdata
.meta
.data_quota
;
999 chain
->data
->ipdata
.meta
.inode_quota
=
1000 data
->ipdata
.meta
.inode_quota
;
1003 * last snapshot tid controls overwrite
1005 if (chain
->data
->ipdata
.meta
.pfs_lsnap_tid
<
1006 data
->ipdata
.meta
.pfs_lsnap_tid
) {
1007 chain
->data
->ipdata
.meta
.pfs_lsnap_tid
=
1008 data
->ipdata
.meta
.pfs_lsnap_tid
;
1011 hammer2_chain_setcheck(chain
, chain
->data
);
1016 * Normal replacement.
1018 if ((data
->ipdata
.meta
.op_flags
&
1019 HAMMER2_OPFLAG_DIRECTDATA
) == 0) {
1021 * If DIRECTDATA is transitioning to 0 or the
1022 * old chain is not an inode we have to
1023 * initialize the block table.
1025 if (otype
!= HAMMER2_BREF_TYPE_INODE
||
1026 (chain
->data
->ipdata
.meta
.op_flags
&
1027 HAMMER2_OPFLAG_DIRECTDATA
)) {
1028 kprintf("chain inode trans "
1030 bzero(&chain
->data
->ipdata
.u
,
1031 sizeof(chain
->data
->ipdata
.u
));
1033 bcopy(data
, chain
->data
,
1034 offsetof(hammer2_inode_data_t
, u
));
1035 /* XXX setcheck on inode should not be needed */
1036 hammer2_chain_setcheck(chain
, chain
->data
);
1040 case HAMMER2_BREF_TYPE_DATA
:
1041 bcopy(data
, chain
->data
, chain
->bytes
);
1042 hammer2_chain_setcheck(chain
, chain
->data
);
1044 case HAMMER2_BREF_TYPE_DIRENT
:
1046 * Directory entries embed data in the blockref.
1049 bcopy(data
, chain
->data
, chain
->bytes
);
1050 hammer2_chain_setcheck(chain
, chain
->data
);
1052 chain
->bref
.check
= focus
->bref
.check
;
1054 chain
->bref
.embed
= focus
->bref
.embed
;
1060 hammer2_xop_pdata(xop
);
1064 hammer2_chain_unlock(chain
);
1065 hammer2_chain_lock(chain
, HAMMER2_RESOLVE_SHARED
|
1066 HAMMER2_RESOLVE_MAYBE
);