2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.5 2007/11/26 05:03:11 dillon Exp $
39 static int hammer_mem_add(hammer_transaction_t trans
,
40 hammer_record_t record
);
41 static int hammer_mem_lookup(hammer_cursor_t cursor
, hammer_inode_t ip
);
42 static int hammer_mem_search(hammer_cursor_t cursor
, hammer_inode_t ip
);
45 * Red-black tree support.
48 hammer_rec_rb_compare(hammer_record_t rec1
, hammer_record_t rec2
)
50 if (rec1
->rec
.base
.base
.rec_type
< rec2
->rec
.base
.base
.rec_type
)
52 if (rec1
->rec
.base
.base
.rec_type
> rec2
->rec
.base
.base
.rec_type
)
55 if (rec1
->rec
.base
.base
.key
< rec2
->rec
.base
.base
.key
)
57 if (rec1
->rec
.base
.base
.key
> rec2
->rec
.base
.base
.key
)
60 if (rec1
->rec
.base
.base
.create_tid
< rec2
->rec
.base
.base
.create_tid
)
62 if (rec1
->rec
.base
.base
.create_tid
> rec2
->rec
.base
.base
.create_tid
)
68 hammer_rec_compare(hammer_base_elm_t info
, hammer_record_t rec
)
71 * A key1->rec_type of 0 matches any record type.
74 if (info
->rec_type
< rec
->rec
.base
.base
.rec_type
)
76 if (info
->rec_type
> rec
->rec
.base
.base
.rec_type
)
81 * There is no special case for key. 0 means 0.
83 if (info
->key
< rec
->rec
.base
.base
.key
)
85 if (info
->key
> rec
->rec
.base
.base
.key
)
89 * This test has a number of special cases. create_tid in key1 is
90 * the as-of transction id, and delete_tid in key1 is NOT USED.
92 * A key1->create_tid of 0 matches any record regardles of when
93 * it was created or destroyed. 0xFFFFFFFFFFFFFFFFULL should be
94 * used to search for the most current state of the object.
96 * key2->create_tid is a HAMMER record and will never be
97 * 0. key2->delete_tid is the deletion transaction id or 0 if
98 * the record has not yet been deleted.
100 if (info
->create_tid
) {
101 if (info
->create_tid
< rec
->rec
.base
.base
.create_tid
)
103 if (rec
->rec
.base
.base
.delete_tid
&&
104 info
->create_tid
>= rec
->rec
.base
.base
.delete_tid
) {
112 * RB_SCAN comparison code for hammer_mem_search(). The argument order
113 * is reversed so the comparison result has to be negated. key_beg and
114 * key_end are both inclusive boundaries.
118 hammer_rec_scan_cmp(hammer_record_t rec
, void *data
)
120 hammer_cursor_t cursor
= data
;
123 r
= hammer_rec_compare(&cursor
->key_beg
, rec
);
128 r
= hammer_rec_compare(&cursor
->key_end
, rec
);
134 RB_GENERATE(hammer_rec_rb_tree
, hammer_record
, rb_node
, hammer_rec_rb_compare
);
135 RB_GENERATE_XLOOKUP(hammer_rec_rb_tree
, INFO
, hammer_record
, rb_node
,
136 hammer_rec_compare
, hammer_base_elm_t
);
139 * Allocate a record for the caller to finish filling in
142 hammer_alloc_mem_record(struct hammer_transaction
*trans
, hammer_inode_t ip
)
144 hammer_record_t record
;
146 record
= kmalloc(sizeof(*record
), M_HAMMER
, M_WAITOK
|M_ZERO
);
152 * Release a memory record. If the record is marked for defered deletion,
153 * destroy the record when the last reference goes away.
156 hammer_rel_mem_record(struct hammer_record
**recordp
)
160 if ((rec
= *recordp
) != NULL
) {
161 if (hammer_islastref(&rec
->lock
)) {
162 hammer_unref(&rec
->lock
);
163 if (rec
->flags
& HAMMER_RECF_DELETED
)
164 hammer_free_mem_record(rec
);
166 hammer_unref(&rec
->lock
);
173 * Free a record. Clean the structure up even though we are throwing it
174 * away as a sanity check. The actual free operation is delayed while
175 * the record is referenced. However, the record is removed from the RB
179 hammer_free_mem_record(hammer_record_t record
)
181 if (record
->flags
& HAMMER_RECF_ONRBTREE
) {
182 RB_REMOVE(hammer_rec_rb_tree
, &record
->ip
->rec_tree
, record
);
183 record
->flags
&= ~HAMMER_RECF_ONRBTREE
;
185 if (record
->lock
.refs
) {
186 record
->flags
|= HAMMER_RECF_DELETED
;
189 if (record
->flags
& HAMMER_RECF_ALLOCDATA
) {
190 kfree(record
->data
, M_HAMMER
);
191 record
->flags
&= ~HAMMER_RECF_ALLOCDATA
;
194 kfree(record
, M_HAMMER
);
198 * Lookup an in-memory record given the key specified in the cursor. Works
199 * just like hammer_btree_lookup() but operates on an inode's in-memory
202 * The lookup must fail if the record is marked for deferred deletion.
206 hammer_mem_lookup(hammer_cursor_t cursor
, hammer_inode_t ip
)
211 hammer_rel_mem_record(&cursor
->iprec
);
213 hammer_rec_rb_tree_scan_info_done(&cursor
->scan
,
214 &cursor
->ip
->rec_tree
);
217 hammer_rec_rb_tree_scan_info_link(&cursor
->scan
, &ip
->rec_tree
);
218 cursor
->scan
.node
= NULL
;
219 cursor
->iprec
= hammer_rec_rb_tree_RB_LOOKUP_INFO(
220 &ip
->rec_tree
, &cursor
->key_beg
);
221 if (cursor
->iprec
== NULL
) {
224 hammer_ref(&cursor
->iprec
->lock
);
231 * hammer_mem_search() - locate the first in-memory record matching the
234 * The RB_SCAN function we use is designed as a callback. We terminate it
235 * (return -1) as soon as we get a match.
239 hammer_rec_scan_callback(hammer_record_t rec
, void *data
)
241 hammer_cursor_t cursor
= data
;
243 if (cursor
->iprec
== NULL
) {
245 hammer_ref(&rec
->lock
);
253 hammer_mem_search(hammer_cursor_t cursor
, hammer_inode_t ip
)
256 hammer_rel_mem_record(&cursor
->iprec
);
258 hammer_rec_rb_tree_scan_info_done(&cursor
->scan
,
259 &cursor
->ip
->rec_tree
);
262 hammer_rec_rb_tree_scan_info_link(&cursor
->scan
, &ip
->rec_tree
);
263 cursor
->scan
.node
= NULL
;
264 hammer_rec_rb_tree_RB_SCAN(&ip
->rec_tree
, hammer_rec_scan_cmp
,
265 hammer_rec_scan_callback
, cursor
);
267 cursor
->scan
.node
= hammer_rec_rb_tree_RB_NEXT(cursor
->iprec
);
274 hammer_mem_done(hammer_cursor_t cursor
)
277 hammer_rec_rb_tree_scan_info_done(&cursor
->scan
,
278 &cursor
->ip
->rec_tree
);
282 hammer_rel_mem_record(&cursor
->iprec
);
285 /************************************************************************
286 * HAMMER IN-MEMORY RECORD FUNCTIONS *
287 ************************************************************************
289 * These functions manipulate in-memory records. Such records typically
290 * exist prior to being committed to disk or indexed via the on-disk B-Tree.
294 * Add a directory entry (dip,ncp) which references inode (ip).
296 * Note that the low 32 bits of the namekey are set temporarily to create
297 * a unique in-memory record, and may be modified a second time when the
298 * record is synchronized to disk. In particular, the low 32 bits cannot be
299 * all 0's when synching to disk, which is not handled here.
302 hammer_ip_add_directory(struct hammer_transaction
*trans
,
303 struct hammer_inode
*dip
, struct namecache
*ncp
,
304 struct hammer_inode
*ip
)
306 hammer_record_t record
;
310 record
= hammer_alloc_mem_record(trans
, dip
);
312 kprintf("add to directory dip %p\n", dip
);
313 bytes
= ncp
->nc_nlen
; /* NOTE: terminating \0 is NOT included */
314 if (++trans
->hmp
->namekey_iterator
== 0)
315 ++trans
->hmp
->namekey_iterator
;
317 record
->rec
.entry
.base
.base
.obj_id
= dip
->obj_id
;
318 record
->rec
.entry
.base
.base
.key
=
319 hammer_directory_namekey(ncp
->nc_name
, bytes
);
320 record
->rec
.entry
.base
.base
.key
+= trans
->hmp
->namekey_iterator
;
321 record
->rec
.entry
.base
.base
.create_tid
= trans
->tid
;
322 record
->rec
.entry
.base
.base
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
323 record
->rec
.entry
.base
.base
.obj_type
= ip
->ino_rec
.base
.base
.obj_type
;
324 record
->rec
.entry
.obj_id
= ip
->obj_id
;
325 if (bytes
<= sizeof(record
->rec
.entry
.den_name
)) {
326 record
->data
= (void *)record
->rec
.entry
.den_name
;
327 record
->flags
|= HAMMER_RECF_EMBEDDED_DATA
;
329 record
->data
= kmalloc(bytes
, M_HAMMER
, M_WAITOK
);
330 record
->flags
|= HAMMER_RECF_ALLOCDATA
;
332 bcopy(ncp
->nc_name
, record
->data
, bytes
);
333 record
->rec
.entry
.base
.data_len
= bytes
;
334 ++ip
->ino_rec
.ino_nlinks
;
335 hammer_modify_inode(trans
, ip
,
336 HAMMER_INODE_RDIRTY
| HAMMER_INODE_TID
);
337 error
= hammer_mem_add(trans
, record
);
342 * Delete the directory entry and update the inode link count. The
343 * cursor must be seeked to the directory entry record being deleted.
345 * NOTE: HAMMER_CURSOR_DELETE may not have been set. XXX remove flag.
348 hammer_ip_del_directory(struct hammer_transaction
*trans
,
349 hammer_cursor_t cursor
, struct hammer_inode
*dip
,
350 struct hammer_inode
*ip
)
354 if (cursor
->record
== &cursor
->iprec
->rec
) {
356 * The directory entry was in-memory, just scrap the
359 hammer_free_mem_record(cursor
->iprec
);
363 * The directory entry was on-disk, mark the record and
364 * B-Tree entry as deleted. The B-Tree entry does not
365 * have to be reindexed because a 'current' delete transid
366 * will wind up in the same position as the live record.
368 KKASSERT(ip
->flags
& HAMMER_INODE_ONDISK
);
369 error
= hammer_btree_extract(cursor
, HAMMER_CURSOR_GET_RECORD
);
371 cursor
->node
->ondisk
->elms
[cursor
->index
].base
.delete_tid
= trans
->tid
;
372 cursor
->record
->base
.base
.delete_tid
= trans
->tid
;
373 hammer_modify_node(cursor
->node
);
374 hammer_modify_buffer(cursor
->record_buffer
);
379 * One less link. The file may still be open in the OS even after
380 * all links have gone away so we don't destroy the inode's data
384 --ip
->ino_rec
.ino_nlinks
;
385 hammer_modify_inode(trans
, ip
,
386 HAMMER_INODE_RDIRTY
| HAMMER_INODE_TID
);
392 * Sync data from a buffer cache buffer (typically) to the filesystem. This
393 * is called via the strategy called from a cached data source. This code
394 * is responsible for actually writing a data record out to the disk.
397 hammer_ip_sync_data(hammer_transaction_t trans
, hammer_inode_t ip
,
398 int64_t offset
, void *data
, int bytes
)
400 struct hammer_cursor cursor
;
401 hammer_record_ondisk_t rec
;
402 union hammer_btree_elm elm
;
406 error
= hammer_init_cursor_ip(&cursor
, ip
);
409 cursor
.key_beg
.obj_id
= ip
->obj_id
;
410 cursor
.key_beg
.key
= offset
+ bytes
;
411 cursor
.key_beg
.create_tid
= trans
->tid
;
412 cursor
.key_beg
.delete_tid
= 0;
413 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DATA
;
414 cursor
.flags
= HAMMER_CURSOR_INSERT
;
417 * Issue a lookup to position the cursor and locate the cluster
419 error
= hammer_btree_lookup(&cursor
);
421 kprintf("hammer_ip_sync_data: duplicate data at (%lld,%d)\n",
429 * Allocate record and data space now that we know which cluster
430 * the B-Tree node ended up in.
432 bdata
= hammer_alloc_data(cursor
.node
->cluster
, bytes
, &error
,
433 &cursor
.data_buffer
);
436 rec
= hammer_alloc_record(cursor
.node
->cluster
, &error
,
437 &cursor
.record_buffer
);
442 * Fill everything in and insert our B-Tree node.
444 rec
->base
.base
= cursor
.key_beg
;
445 rec
->base
.data_crc
= crc32(data
, bytes
);
446 rec
->base
.rec_id
= 0; /* XXX */
447 rec
->base
.data_offset
= hammer_bclu_offset(cursor
.data_buffer
, bdata
);
448 rec
->base
.data_len
= bytes
;
449 hammer_modify_buffer(cursor
.record_buffer
);
451 bcopy(data
, bdata
, bytes
);
452 hammer_modify_buffer(cursor
.data_buffer
);
454 elm
.leaf
.base
= cursor
.key_beg
;
455 elm
.leaf
.rec_offset
= hammer_bclu_offset(cursor
.record_buffer
, rec
);
456 elm
.leaf
.data_offset
= rec
->base
.data_offset
;
457 elm
.leaf
.data_len
= bytes
;
458 elm
.leaf
.data_crc
= rec
->base
.data_crc
;
460 error
= hammer_btree_insert(&cursor
, &elm
);
464 hammer_free_record_ptr(cursor
.record_buffer
, rec
);
466 hammer_free_data_ptr(cursor
.data_buffer
, bdata
, bytes
);
468 hammer_done_cursor(&cursor
);
473 * Sync an in-memory record to the disk. this is typically called via fsync
474 * from a cached record source. This code is responsible for actually
475 * writing a record out to the disk.
478 hammer_ip_sync_record(hammer_record_t record
)
480 struct hammer_cursor cursor
;
481 hammer_record_ondisk_t rec
;
482 union hammer_btree_elm elm
;
486 error
= hammer_init_cursor_ip(&cursor
, record
->ip
);
489 cursor
.key_beg
= record
->rec
.base
.base
;
490 cursor
.flags
= HAMMER_CURSOR_INSERT
;
493 * Issue a lookup to position the cursor and locate the cluster
495 error
= hammer_btree_lookup(&cursor
);
497 kprintf("hammer_ip_sync_record: duplicate rec at (%016llx)\n",
498 record
->rec
.base
.base
.key
);
505 * Allocate record and data space now that we know which cluster
506 * the B-Tree node ended up in.
508 if (record
->data
== NULL
||
509 (record
->flags
& HAMMER_RECF_EMBEDDED_DATA
)) {
510 bdata
= record
->data
;
512 bdata
= hammer_alloc_data(cursor
.node
->cluster
,
513 record
->rec
.base
.data_len
, &error
,
514 &cursor
.data_buffer
);
518 rec
= hammer_alloc_record(cursor
.node
->cluster
, &error
,
519 &cursor
.record_buffer
);
524 * Fill everything in and insert our B-Tree node.
526 * XXX assign rec_id here
529 kprintf("record->rec %p data %p\n", &record
->rec
, record
->data
);
531 rec
->base
.data_crc
= crc32(record
->data
,
532 record
->rec
.base
.data_len
);
533 if (record
->flags
& HAMMER_RECF_EMBEDDED_DATA
) {
535 * Data embedded in record
537 rec
->base
.data_offset
= ((char *)bdata
-
538 (char *)&record
->rec
);
539 KKASSERT(rec
->base
.data_offset
>= 0 &&
540 rec
->base
.data_offset
+ rec
->base
.data_len
<
542 rec
->base
.data_offset
+= hammer_bclu_offset(cursor
.record_buffer
, rec
);
545 * Data separate from record
547 rec
->base
.data_offset
= hammer_bclu_offset(cursor
.data_buffer
,bdata
);
548 bcopy(record
->data
, bdata
, rec
->base
.data_len
);
549 hammer_modify_buffer(cursor
.data_buffer
);
552 rec
->base
.rec_id
= 0; /* XXX */
554 hammer_modify_buffer(cursor
.record_buffer
);
556 elm
.leaf
.base
= cursor
.key_beg
;
557 elm
.leaf
.rec_offset
= hammer_bclu_offset(cursor
.record_buffer
, rec
);
558 elm
.leaf
.data_offset
= rec
->base
.data_offset
;
559 elm
.leaf
.data_len
= rec
->base
.data_len
;
560 elm
.leaf
.data_crc
= rec
->base
.data_crc
;
562 error
= hammer_btree_insert(&cursor
, &elm
);
566 hammer_free_record_ptr(cursor
.record_buffer
, rec
);
568 if (record
->data
&& (record
->flags
& HAMMER_RECF_EMBEDDED_DATA
) == 0) {
569 hammer_free_data_ptr(cursor
.data_buffer
, bdata
,
573 hammer_done_cursor(&cursor
);
574 kprintf("hammer_ip_sync_record_done %d\n", error
);
580 * Add the record to the inode's rec_tree. The low 32 bits of a directory
581 * entry's key is used to deal with hash collisions in the upper 32 bits.
582 * A unique 64 bit key is generated in-memory and may be regenerated a
583 * second time when the directory record is flushed to the on-disk B-Tree.
587 hammer_mem_add(struct hammer_transaction
*trans
, hammer_record_t record
)
589 while (RB_INSERT(hammer_rec_rb_tree
, &record
->ip
->rec_tree
, record
)) {
590 if (record
->rec
.base
.base
.rec_type
!= HAMMER_RECTYPE_DIRENTRY
){
591 hammer_free_mem_record(record
);
594 if (++trans
->hmp
->namekey_iterator
== 0)
595 ++trans
->hmp
->namekey_iterator
;
596 record
->rec
.base
.base
.key
&= ~(0xFFFFFFFFLL
);
597 record
->rec
.base
.base
.key
|= trans
->hmp
->namekey_iterator
;
599 record
->flags
|= HAMMER_RECF_ONRBTREE
;
603 /************************************************************************
604 * HAMMER INODE MERGED-RECORD FUNCTIONS *
605 ************************************************************************
607 * These functions augment the B-Tree scanning functions in hammer_btree.c
608 * by merging in-memory records with on-disk records.
612 * Locate a particular record either in-memory or on-disk.
614 * NOTE: This is basically a standalone routine, hammer_ip_next() may
615 * NOT be called to iterate results.
618 hammer_ip_lookup(hammer_cursor_t cursor
, struct hammer_inode
*ip
)
623 * If the element is in-memory return it without searching the
626 error
= hammer_mem_lookup(cursor
, ip
);
628 cursor
->record
= &cursor
->iprec
->rec
;
635 * If the inode has on-disk components search the on-disk B-Tree.
637 if ((ip
->flags
& HAMMER_INODE_ONDISK
) == 0)
639 error
= hammer_btree_lookup(cursor
);
641 error
= hammer_btree_extract(cursor
, HAMMER_CURSOR_GET_RECORD
);
646 * Locate the first record within the cursor's key_beg/key_end range,
647 * restricted to a particular inode. 0 is returned on success, ENOENT
648 * if no records matched the requested range, or some other error.
650 * When 0 is returned hammer_ip_next() may be used to iterate additional
651 * records within the requested range.
654 hammer_ip_first(hammer_cursor_t cursor
, struct hammer_inode
*ip
)
659 * Clean up fields and setup for merged scan
661 cursor
->flags
|= HAMMER_CURSOR_ATEDISK
| HAMMER_CURSOR_ATEMEM
;
662 cursor
->flags
|= HAMMER_CURSOR_DISKEOF
| HAMMER_CURSOR_MEMEOF
;
664 hammer_rel_mem_record(&cursor
->iprec
);
667 * Search the on-disk B-Tree. hammer_btree_lookup() only does an
668 * exact lookup so if we get ENOENT we have to call the iterate
669 * function to validate the first record after the begin key.
671 * The ATEDISK flag is used by hammer_btree_iterate to determine
672 * whether it must index forwards or not.
674 if (ip
->flags
& HAMMER_INODE_ONDISK
) {
675 error
= hammer_btree_lookup(cursor
);
676 if (error
== ENOENT
) {
677 cursor
->flags
&= ~HAMMER_CURSOR_ATEDISK
;
678 error
= hammer_btree_iterate(cursor
);
680 if (error
&& error
!= ENOENT
)
683 cursor
->flags
&= ~HAMMER_CURSOR_DISKEOF
;
684 cursor
->flags
&= ~HAMMER_CURSOR_ATEDISK
;
686 cursor
->flags
|= HAMMER_CURSOR_ATEDISK
;
691 * Search the in-memory record list (Red-Black tree). Unlike the
692 * B-Tree search, mem_search checks for records in the range.
694 error
= hammer_mem_search(cursor
, ip
);
695 if (error
&& error
!= ENOENT
)
698 cursor
->flags
&= ~HAMMER_CURSOR_MEMEOF
;
699 cursor
->flags
&= ~HAMMER_CURSOR_ATEMEM
;
703 * This will return the first matching record.
705 return(hammer_ip_next(cursor
));
709 * Retrieve the next record in a merged iteration within the bounds of the
710 * cursor. This call may be made multiple times after the cursor has been
711 * initially searched with hammer_ip_first().
713 * 0 is returned on success, ENOENT if no further records match the
714 * requested range, or some other error code is returned.
717 hammer_ip_next(hammer_cursor_t cursor
)
719 hammer_btree_elm_t elm
;
725 * Load the current on-disk and in-memory record. If we ate any
726 * records we have to get the next one.
728 * Get the next on-disk record
730 if (cursor
->flags
& HAMMER_CURSOR_ATEDISK
) {
731 if ((cursor
->flags
& HAMMER_CURSOR_DISKEOF
) == 0) {
732 error
= hammer_btree_iterate(cursor
);
734 cursor
->flags
&= ~HAMMER_CURSOR_ATEDISK
;
736 cursor
->flags
|= HAMMER_CURSOR_DISKEOF
;
741 * Get the next in-memory record. The record can be ripped out
742 * of the RB tree so we maintain a scan_info structure to track
745 if (cursor
->flags
& HAMMER_CURSOR_ATEMEM
) {
746 if ((cursor
->flags
& HAMMER_CURSOR_MEMEOF
) == 0) {
747 rec
= cursor
->scan
.node
; /* next node */
749 cursor
->flags
&= ~HAMMER_CURSOR_ATEMEM
;
750 hammer_ref(&rec
->lock
);
752 hammer_rec_rb_tree_RB_NEXT(rec
);
754 cursor
->flags
|= HAMMER_CURSOR_MEMEOF
;
756 hammer_rel_mem_record(&cursor
->iprec
);
762 * Extract either the disk or memory record depending on their
766 switch(cursor
->flags
& (HAMMER_CURSOR_ATEDISK
| HAMMER_CURSOR_ATEMEM
)) {
771 elm
= &cursor
->node
->ondisk
->elms
[cursor
->index
];
772 r
= hammer_btree_cmp(&elm
->base
,
773 &cursor
->iprec
->rec
.base
.base
);
775 error
= hammer_btree_extract(cursor
,
776 HAMMER_CURSOR_GET_RECORD
);
777 cursor
->flags
|= HAMMER_CURSOR_ATEDISK
;
780 /* fall through to the memory entry */
781 case HAMMER_CURSOR_ATEDISK
:
783 * Only the memory entry is valid
785 cursor
->record
= &cursor
->iprec
->rec
;
786 cursor
->flags
|= HAMMER_CURSOR_ATEMEM
;
788 case HAMMER_CURSOR_ATEMEM
:
790 * Only the disk entry is valid
792 error
= hammer_btree_extract(cursor
, HAMMER_CURSOR_GET_RECORD
);
793 cursor
->flags
|= HAMMER_CURSOR_ATEDISK
;
797 * Neither entry is valid
799 * XXX error not set properly
801 cursor
->record
= NULL
;
809 * Resolve the cursor->data pointer for the current cursor position in
810 * a merged iteration.
813 hammer_ip_resolve_data(hammer_cursor_t cursor
)
817 if (cursor
->iprec
&& cursor
->record
== &cursor
->iprec
->rec
) {
818 cursor
->data
= cursor
->iprec
->data
;
821 error
= hammer_btree_extract(cursor
, HAMMER_CURSOR_GET_DATA
);
827 * Delete all records within the specified range for inode ip.
829 * NOTE: An unaligned range will cause new records to be added to cover
832 * NOTE: ran_end is inclusive (e.g. 0,1023 instead of 0,1024).
835 hammer_ip_delete_range(hammer_transaction_t trans
, hammer_inode_t ip
,
836 int64_t ran_beg
, int64_t ran_end
)
838 struct hammer_cursor cursor
;
839 hammer_record_ondisk_t rec
;
840 hammer_base_elm_t base
;
844 hammer_init_cursor_ip(&cursor
, ip
);
846 cursor
.key_beg
.obj_id
= ip
->obj_id
;
847 cursor
.key_beg
.create_tid
= ip
->obj_asof
;
848 cursor
.key_beg
.delete_tid
= 0;
849 cursor
.key_beg
.obj_type
= 0;
850 cursor
.key_beg
.key
= ran_beg
;
851 cursor
.key_end
= cursor
.key_beg
;
852 if (ip
->ino_rec
.base
.base
.obj_type
== HAMMER_OBJTYPE_DBFILE
) {
853 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DB
;
854 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_DB
;
855 cursor
.key_end
.key
= ran_end
;
857 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DATA
;
858 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_DATA
;
859 if (ran_end
+ MAXPHYS
< ran_end
)
860 cursor
.key_end
.key
= 0x7FFFFFFFFFFFFFFFLL
;
862 cursor
.key_end
.key
= ran_end
+ MAXPHYS
;
865 error
= hammer_ip_first(&cursor
, ip
);
868 * Iterate through matching records and mark them as deleted.
872 base
= &rec
->base
.base
;
874 KKASSERT(base
->delete_tid
== 0);
877 * There may be overlap cases for regular file data. Also
878 * remember the key for a regular file record is the offset
879 * of the last byte of the record (base + len - 1), NOT the
882 if (base
->rec_type
== HAMMER_RECTYPE_DATA
) {
883 off
= base
->key
- rec
->base
.data_len
+ 1;
885 * Check the left edge case
888 panic("hammer left edge case\n");
892 * Check the right edge case. Note that the
893 * record can be completely out of bounds, which
894 * terminates the search.
896 * base->key is (base_offset + bytes - 1), ran_end
897 * works the same way.
899 if (base
->key
> ran_end
) {
900 if (base
->key
- rec
->base
.data_len
+ 1 > ran_end
) {
901 kprintf("right edge OOB\n");
904 panic("hammer right edge case\n");
909 * Mark the record and B-Tree entry as deleted
911 if (cursor
.record
== &cursor
.iprec
->rec
) {
912 hammer_free_mem_record(cursor
.iprec
);
915 cursor
.node
->ondisk
->
916 elms
[cursor
.index
].base
.delete_tid
= trans
->tid
;
917 cursor
.record
->base
.base
.delete_tid
= trans
->tid
;
918 hammer_modify_node(cursor
.node
);
919 hammer_modify_buffer(cursor
.record_buffer
);
921 error
= hammer_ip_next(&cursor
);
923 hammer_done_cursor(&cursor
);