2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.8 2008/07/07 03:49:51 dillon Exp $
37 * HAMMER mirroring ioctls - serialize and deserialize modifications made
43 static int hammer_mirror_check(hammer_cursor_t cursor
,
44 struct hammer_ioc_mrecord
*mrec
);
45 static int hammer_mirror_update(hammer_cursor_t cursor
,
46 struct hammer_ioc_mrecord
*mrec
);
47 static int hammer_mirror_write(hammer_cursor_t cursor
,
48 struct hammer_ioc_mrecord
*mrec
,
49 hammer_inode_t ip
, char *udata
);
50 static int hammer_mirror_localize_data(hammer_data_ondisk_t data
,
51 hammer_btree_leaf_elm_t leaf
);
54 * All B-Tree records within the specified key range which also conform
55 * to the transaction id range are returned. Mirroring code keeps track
56 * of the last transaction id fully scanned and can efficiently pick up
57 * where it left off if interrupted.
60 hammer_ioc_mirror_read(hammer_transaction_t trans
, hammer_inode_t ip
,
61 struct hammer_ioc_mirror_rw
*mirror
)
63 struct hammer_cursor cursor
;
64 struct hammer_ioc_mrecord mrec
;
65 hammer_btree_leaf_elm_t elm
;
66 const int head_size
= HAMMER_MREC_HEADSIZE
;
67 const int crc_start
= HAMMER_MREC_CRCOFF
;
73 if ((mirror
->key_beg
.localization
| mirror
->key_end
.localization
) &
74 HAMMER_LOCALIZE_PSEUDOFS_MASK
) {
77 if (hammer_btree_cmp(&mirror
->key_beg
, &mirror
->key_end
) > 0)
80 mirror
->key_cur
= mirror
->key_beg
;
81 mirror
->key_cur
.localization
+= ip
->obj_localization
;
82 bzero(&mrec
, sizeof(mrec
));
85 error
= hammer_init_cursor(trans
, &cursor
, NULL
, NULL
);
87 hammer_done_cursor(&cursor
);
90 cursor
.key_beg
= mirror
->key_cur
;
91 cursor
.key_end
= mirror
->key_end
;
92 cursor
.key_end
.localization
+= ip
->obj_localization
;
94 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
95 cursor
.flags
|= HAMMER_CURSOR_BACKEND
;
98 * This flag filters the search to only return elements whos create
99 * or delete TID is >= mirror_tid. The B-Tree uses the mirror_tid
100 * field stored with internal and leaf nodes to shortcut the scan.
102 cursor
.flags
|= HAMMER_CURSOR_MIRROR_FILTERED
;
103 cursor
.mirror_tid
= mirror
->tid_beg
;
105 error
= hammer_btree_first(&cursor
);
108 * Leaf node. Only return elements modified in the range
109 * requested by userland.
111 KKASSERT(cursor
.node
->ondisk
->type
== HAMMER_BTREE_TYPE_LEAF
);
112 elm
= &cursor
.node
->ondisk
->elms
[cursor
.index
].leaf
;
114 if (elm
->base
.create_tid
< mirror
->tid_beg
||
115 elm
->base
.create_tid
>= mirror
->tid_end
) {
116 if (elm
->base
.delete_tid
< mirror
->tid_beg
||
117 elm
->base
.delete_tid
>= mirror
->tid_end
) {
122 mirror
->key_cur
= elm
->base
;
125 * Yield to more important tasks
127 if ((error
= hammer_signal_check(trans
->hmp
)) != 0)
129 if (trans
->hmp
->sync_lock
.wanted
) {
130 tsleep(trans
, 0, "hmrslo", hz
/ 10);
132 if (trans
->hmp
->locked_dirty_space
+
133 trans
->hmp
->io_running_space
> hammer_limit_dirtybufspace
) {
134 hammer_flusher_async(trans
->hmp
);
135 tsleep(trans
, 0, "hmrslo", hz
/ 10);
139 * The core code exports the data to userland.
141 data_len
= (elm
->data_offset
) ? elm
->data_len
: 0;
143 error
= hammer_btree_extract(&cursor
,
144 HAMMER_CURSOR_GET_DATA
);
148 bytes
= sizeof(struct hammer_ioc_mrecord
) + data_len
;
149 bytes
= (bytes
+ HAMMER_HEAD_ALIGN_MASK
) &
150 ~HAMMER_HEAD_ALIGN_MASK
;
151 if (mirror
->count
+ bytes
> mirror
->size
)
155 * Construct the record for userland and copyout.
157 * The user is asking for a snapshot, if the record was
158 * deleted beyond the user-requested ending tid, the record
159 * is not considered deleted from the point of view of
160 * userland and delete_tid is cleared.
162 mrec
.signature
= HAMMER_IOC_MIRROR_SIGNATURE
;
163 mrec
.type
= HAMMER_MREC_TYPE_REC
;
164 mrec
.rec_size
= bytes
;
166 if (elm
->base
.delete_tid
>= mirror
->tid_end
)
167 mrec
.leaf
.base
.delete_tid
= 0;
168 mrec
.rec_crc
= crc32(&mrec
.rec_size
, head_size
- crc_start
);
169 uptr
= (char *)mirror
->ubuf
+ mirror
->count
;
170 error
= copyout(&mrec
, uptr
, head_size
);
171 if (data_len
&& error
== 0) {
172 error
= copyout(cursor
.data
, uptr
+ head_size
,
176 mirror
->count
+= bytes
;
179 cursor
.flags
|= HAMMER_CURSOR_ATEDISK
;
180 error
= hammer_btree_iterate(&cursor
);
183 if (error
== ENOENT
) {
184 mirror
->key_cur
= mirror
->key_end
;
187 hammer_done_cursor(&cursor
);
188 if (error
== EDEADLK
)
190 if (error
== EINTR
) {
191 mirror
->head
.flags
|= HAMMER_IOC_HEAD_INTR
;
195 mirror
->key_cur
.localization
&= HAMMER_LOCALIZE_MASK
;
200 * Copy records from userland to the target mirror. Records which already
201 * exist may only have their delete_tid updated.
203 * The passed ip is the root ip of the pseudofs
206 hammer_ioc_mirror_write(hammer_transaction_t trans
, hammer_inode_t ip
,
207 struct hammer_ioc_mirror_rw
*mirror
)
209 struct hammer_cursor cursor
;
210 struct hammer_ioc_mrecord mrec
;
211 const int head_size
= HAMMER_MREC_HEADSIZE
;
212 const int crc_start
= HAMMER_MREC_CRCOFF
;
217 if (mirror
->size
< 0 || mirror
->size
> 0x70000000)
220 error
= hammer_init_cursor(trans
, &cursor
, NULL
, NULL
);
222 hammer_normalize_cursor(&cursor
);
224 while (error
== 0 && mirror
->count
+ head_size
<= mirror
->size
) {
226 * Acquire and validate header
228 uptr
= (char *)mirror
->ubuf
+ mirror
->count
;
229 error
= copyin(uptr
, &mrec
, head_size
);
232 rec_crc
= crc32(&mrec
.rec_size
, head_size
- crc_start
);
233 if (mrec
.signature
!= HAMMER_IOC_MIRROR_SIGNATURE
) {
237 if (mrec
.type
!= HAMMER_MREC_TYPE_REC
) {
241 if (rec_crc
!= mrec
.rec_crc
) {
245 if (mrec
.rec_size
< head_size
||
246 mrec
.rec_size
> head_size
+ HAMMER_XBUFSIZE
+ 16 ||
247 mirror
->count
+ mrec
.rec_size
> mirror
->size
) {
251 if (mrec
.leaf
.data_len
< 0 ||
252 mrec
.leaf
.data_len
> HAMMER_XBUFSIZE
||
253 sizeof(struct hammer_ioc_mrecord
) + mrec
.leaf
.data_len
> mrec
.rec_size
) {
258 * Re-localize for target. relocalization of data is handled
259 * by hammer_mirror_write().
261 mrec
.leaf
.base
.localization
&= HAMMER_LOCALIZE_MASK
;
262 mrec
.leaf
.base
.localization
+= ip
->obj_localization
;
267 * If the record exists only the delete_tid may be updated.
269 * If the record does not exist we create it. For now we
270 * ignore records with a non-zero delete_tid. Note that
271 * mirror operations are effective an as-of operation and
272 * delete_tid can be 0 for mirroring purposes even if it is
273 * not actually 0 at the originator.
275 hammer_normalize_cursor(&cursor
);
276 cursor
.key_beg
= mrec
.leaf
.base
;
277 cursor
.flags
|= HAMMER_CURSOR_BACKEND
;
278 cursor
.flags
&= ~HAMMER_CURSOR_INSERT
;
279 error
= hammer_btree_lookup(&cursor
);
281 if (error
== 0 && hammer_mirror_check(&cursor
, &mrec
)) {
282 hammer_sync_lock_sh(trans
);
283 error
= hammer_mirror_update(&cursor
, &mrec
);
284 hammer_sync_unlock(trans
);
285 } else if (error
== ENOENT
&& mrec
.leaf
.base
.delete_tid
== 0) {
286 hammer_sync_lock_sh(trans
);
287 error
= hammer_mirror_write(&cursor
, &mrec
, ip
,
289 hammer_sync_unlock(trans
);
290 } else if (error
== ENOENT
) {
295 * Clean for loop. It is ok if the record already exists
298 if (error
== EDEADLK
) {
299 hammer_done_cursor(&cursor
);
300 error
= hammer_init_cursor(trans
, &cursor
, NULL
, NULL
);
304 if (error
== EALREADY
)
307 mirror
->count
+= mrec
.rec_size
;
309 hammer_done_cursor(&cursor
);
314 * Check whether an update is needed in the case where a match already
315 * exists on the target. The only type of update allowed in this case
316 * is an update of the delete_tid.
318 * Return non-zero if the update should proceed.
322 hammer_mirror_check(hammer_cursor_t cursor
, struct hammer_ioc_mrecord
*mrec
)
324 hammer_btree_leaf_elm_t leaf
= cursor
->leaf
;
326 if (leaf
->base
.delete_tid
!= mrec
->leaf
.base
.delete_tid
) {
327 if (leaf
->base
.delete_tid
!= 0)
334 * Update a record in-place. Only the delete_tid can change.
338 hammer_mirror_update(hammer_cursor_t cursor
, struct hammer_ioc_mrecord
*mrec
)
340 hammer_transaction_t trans
;
341 hammer_btree_leaf_elm_t elm
;
344 trans
= cursor
->trans
;
346 if (mrec
->leaf
.base
.delete_tid
== 0) {
347 kprintf("mirror_write: object %016llx:%016llx deleted on "
348 "target, not deleted on source\n",
349 elm
->base
.obj_id
, elm
->base
.key
);
353 KKASSERT(elm
->base
.create_tid
< mrec
->leaf
.base
.delete_tid
);
354 hammer_modify_node(trans
, cursor
->node
, elm
, sizeof(*elm
));
355 elm
->base
.delete_tid
= mrec
->leaf
.base
.delete_tid
;
356 elm
->delete_ts
= mrec
->leaf
.delete_ts
;
357 hammer_modify_node_done(cursor
->node
);
360 * Track a count of active inodes.
362 if (elm
->base
.obj_type
== HAMMER_RECTYPE_INODE
) {
363 hammer_modify_volume_field(trans
,
366 --trans
->hmp
->rootvol
->ondisk
->vol0_stat_inodes
;
367 hammer_modify_volume_done(trans
->rootvol
);
374 * Write out a new record.
378 hammer_mirror_write(hammer_cursor_t cursor
, struct hammer_ioc_mrecord
*mrec
,
379 hammer_inode_t ip
, char *udata
)
381 hammer_transaction_t trans
;
382 hammer_buffer_t data_buffer
;
383 hammer_off_t ndata_offset
;
384 hammer_tid_t high_tid
;
390 * Skip records related to the root inode other then
393 if (mrec
->leaf
.base
.obj_id
== HAMMER_OBJID_ROOT
) {
394 if (mrec
->leaf
.base
.rec_type
== HAMMER_RECTYPE_INODE
||
395 mrec
->leaf
.base
.rec_type
== HAMMER_RECTYPE_FIX
) {
400 trans
= cursor
->trans
;
404 * Allocate and adjust data
406 if (mrec
->leaf
.data_len
&& mrec
->leaf
.data_offset
) {
407 ndata
= hammer_alloc_data(trans
, mrec
->leaf
.data_len
,
408 mrec
->leaf
.base
.rec_type
,
409 &ndata_offset
, &data_buffer
, &error
);
412 mrec
->leaf
.data_offset
= ndata_offset
;
413 hammer_modify_buffer(trans
, data_buffer
, NULL
, 0);
414 error
= copyin(udata
, ndata
, mrec
->leaf
.data_len
);
416 if (hammer_crc_test_leaf(ndata
, &mrec
->leaf
) == 0) {
417 kprintf("data crc mismatch on pipe\n");
420 error
= hammer_mirror_localize_data(
424 hammer_modify_buffer_done(data_buffer
);
426 mrec
->leaf
.data_offset
= 0;
436 cursor
->flags
|= HAMMER_CURSOR_INSERT
;
437 error
= hammer_btree_lookup(cursor
);
438 if (error
!= ENOENT
) {
445 error
= hammer_btree_insert(cursor
, &mrec
->leaf
, &doprop
);
448 * Track a count of active inodes.
450 if (error
== 0 && mrec
->leaf
.base
.delete_tid
== 0 &&
451 mrec
->leaf
.base
.obj_type
== HAMMER_RECTYPE_INODE
) {
452 hammer_modify_volume_field(trans
,
455 ++trans
->hmp
->rootvol
->ondisk
->vol0_stat_inodes
;
456 hammer_modify_volume_done(trans
->rootvol
);
460 * vol0_next_tid must track the highest TID stored in the filesystem.
461 * We do not need to generate undo for this update.
463 high_tid
= mrec
->leaf
.base
.create_tid
;
464 if (high_tid
< mrec
->leaf
.base
.delete_tid
)
465 high_tid
= mrec
->leaf
.base
.delete_tid
;
466 if (trans
->rootvol
->ondisk
->vol0_next_tid
< high_tid
) {
467 hammer_modify_volume(trans
, trans
->rootvol
, NULL
, 0);
468 trans
->rootvol
->ondisk
->vol0_next_tid
= high_tid
;
469 hammer_modify_volume_done(trans
->rootvol
);
472 if (error
== 0 && doprop
)
473 hammer_btree_do_propagation(cursor
, ip
, &mrec
->leaf
);
479 if (error
&& mrec
->leaf
.data_offset
) {
480 hammer_blockmap_free(cursor
->trans
,
481 mrec
->leaf
.data_offset
,
482 mrec
->leaf
.data_len
);
485 hammer_rel_buffer(data_buffer
, 0);
490 * Localize the data payload. Directory entries may need their
491 * localization adjusted.
493 * PFS directory entries must be skipped entirely (return EALREADY).
497 hammer_mirror_localize_data(hammer_data_ondisk_t data
,
498 hammer_btree_leaf_elm_t leaf
)
500 u_int32_t localization
;
502 if (leaf
->base
.rec_type
== HAMMER_RECTYPE_DIRENTRY
) {
503 if (data
->entry
.obj_id
== HAMMER_OBJID_ROOT
)
505 localization
= leaf
->base
.localization
&
506 HAMMER_LOCALIZE_PSEUDOFS_MASK
;
507 if (data
->entry
.localization
!= localization
) {
508 data
->entry
.localization
= localization
;
509 hammer_crc_set_leaf(data
, leaf
);
516 * Set mirroring/pseudo-fs information
519 hammer_ioc_set_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
520 struct hammer_ioc_pseudofs_rw
*pfs
)
522 hammer_pseudofs_inmem_t pfsm
;
528 if (pfs
->pseudoid
!= ip
->obj_localization
)
530 if (pfs
->bytes
!= sizeof(pfsm
->pfsd
))
532 if (pfs
->version
!= HAMMER_IOC_PSEUDOFS_VERSION
)
534 if (error
== 0 && pfs
->ondisk
) {
535 if (ip
->obj_id
!= HAMMER_OBJID_ROOT
)
538 error
= copyin(pfs
->ondisk
, &ip
->pfsm
->pfsd
,
539 sizeof(ip
->pfsm
->pfsd
));
542 error
= hammer_save_pseudofs(trans
, ip
);
548 * Get mirroring/pseudo-fs information
551 hammer_ioc_get_pseudofs(hammer_transaction_t trans
, hammer_inode_t ip
,
552 struct hammer_ioc_pseudofs_rw
*pfs
)
554 hammer_pseudofs_inmem_t pfsm
;
557 pfs
->pseudoid
= ip
->obj_localization
;
558 pfs
->bytes
= sizeof(struct hammer_pseudofs_data
);
559 pfs
->version
= HAMMER_IOC_PSEUDOFS_VERSION
;
562 * Update pfsm->sync_end_tid if a master
565 if (pfsm
->pfsd
.master_id
>= 0)
566 pfsm
->pfsd
.sync_end_tid
= trans
->rootvol
->ondisk
->vol0_next_tid
;
569 * Return PFS information for root inodes only.
573 if (ip
->obj_id
!= HAMMER_OBJID_ROOT
)
576 error
= copyout(&ip
->pfsm
->pfsd
, pfs
->ondisk
,
577 sizeof(ip
->pfsm
->pfsd
));