2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "xfs_types.h"
24 #include "xfs_trans.h"
27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_alloc_btree.h"
30 #include "xfs_ialloc_btree.h"
31 #include "xfs_dinode.h"
32 #include "xfs_inode.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_btree.h"
35 #include "xfs_btree_trace.h"
36 #include "xfs_error.h"
37 #include "xfs_trace.h"
40 * Cursor allocation zone.
42 kmem_zone_t
*xfs_btree_cur_zone
;
45 * Btree magic numbers.
47 const __uint32_t xfs_magics
[XFS_BTNUM_MAX
] = {
48 XFS_ABTB_MAGIC
, XFS_ABTC_MAGIC
, XFS_BMAP_MAGIC
, XFS_IBT_MAGIC
52 STATIC
int /* error (0 or EFSCORRUPTED) */
53 xfs_btree_check_lblock(
54 struct xfs_btree_cur
*cur
, /* btree cursor */
55 struct xfs_btree_block
*block
, /* btree long form block pointer */
56 int level
, /* level of the btree block */
57 struct xfs_buf
*bp
) /* buffer for block, if any */
59 int lblock_ok
; /* block passes checks */
60 struct xfs_mount
*mp
; /* file system mount point */
64 be32_to_cpu(block
->bb_magic
) == xfs_magics
[cur
->bc_btnum
] &&
65 be16_to_cpu(block
->bb_level
) == level
&&
66 be16_to_cpu(block
->bb_numrecs
) <=
67 cur
->bc_ops
->get_maxrecs(cur
, level
) &&
68 block
->bb_u
.l
.bb_leftsib
&&
69 (be64_to_cpu(block
->bb_u
.l
.bb_leftsib
) == NULLDFSBNO
||
70 XFS_FSB_SANITY_CHECK(mp
,
71 be64_to_cpu(block
->bb_u
.l
.bb_leftsib
))) &&
72 block
->bb_u
.l
.bb_rightsib
&&
73 (be64_to_cpu(block
->bb_u
.l
.bb_rightsib
) == NULLDFSBNO
||
74 XFS_FSB_SANITY_CHECK(mp
,
75 be64_to_cpu(block
->bb_u
.l
.bb_rightsib
)));
76 if (unlikely(XFS_TEST_ERROR(!lblock_ok
, mp
,
77 XFS_ERRTAG_BTREE_CHECK_LBLOCK
,
78 XFS_RANDOM_BTREE_CHECK_LBLOCK
))) {
80 trace_xfs_btree_corrupt(bp
, _RET_IP_
);
81 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW
,
83 return XFS_ERROR(EFSCORRUPTED
);
88 STATIC
int /* error (0 or EFSCORRUPTED) */
89 xfs_btree_check_sblock(
90 struct xfs_btree_cur
*cur
, /* btree cursor */
91 struct xfs_btree_block
*block
, /* btree short form block pointer */
92 int level
, /* level of the btree block */
93 struct xfs_buf
*bp
) /* buffer containing block */
95 struct xfs_buf
*agbp
; /* buffer for ag. freespace struct */
96 struct xfs_agf
*agf
; /* ag. freespace structure */
97 xfs_agblock_t agflen
; /* native ag. freespace length */
98 int sblock_ok
; /* block passes checks */
100 agbp
= cur
->bc_private
.a
.agbp
;
101 agf
= XFS_BUF_TO_AGF(agbp
);
102 agflen
= be32_to_cpu(agf
->agf_length
);
104 be32_to_cpu(block
->bb_magic
) == xfs_magics
[cur
->bc_btnum
] &&
105 be16_to_cpu(block
->bb_level
) == level
&&
106 be16_to_cpu(block
->bb_numrecs
) <=
107 cur
->bc_ops
->get_maxrecs(cur
, level
) &&
108 (be32_to_cpu(block
->bb_u
.s
.bb_leftsib
) == NULLAGBLOCK
||
109 be32_to_cpu(block
->bb_u
.s
.bb_leftsib
) < agflen
) &&
110 block
->bb_u
.s
.bb_leftsib
&&
111 (be32_to_cpu(block
->bb_u
.s
.bb_rightsib
) == NULLAGBLOCK
||
112 be32_to_cpu(block
->bb_u
.s
.bb_rightsib
) < agflen
) &&
113 block
->bb_u
.s
.bb_rightsib
;
114 if (unlikely(XFS_TEST_ERROR(!sblock_ok
, cur
->bc_mp
,
115 XFS_ERRTAG_BTREE_CHECK_SBLOCK
,
116 XFS_RANDOM_BTREE_CHECK_SBLOCK
))) {
118 trace_xfs_btree_corrupt(bp
, _RET_IP_
);
119 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
120 XFS_ERRLEVEL_LOW
, cur
->bc_mp
, block
);
121 return XFS_ERROR(EFSCORRUPTED
);
127 * Debug routine: check that block header is ok.
130 xfs_btree_check_block(
131 struct xfs_btree_cur
*cur
, /* btree cursor */
132 struct xfs_btree_block
*block
, /* generic btree block pointer */
133 int level
, /* level of the btree block */
134 struct xfs_buf
*bp
) /* buffer containing block, if any */
136 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
)
137 return xfs_btree_check_lblock(cur
, block
, level
, bp
);
139 return xfs_btree_check_sblock(cur
, block
, level
, bp
);
143 * Check that (long) pointer is ok.
145 int /* error (0 or EFSCORRUPTED) */
146 xfs_btree_check_lptr(
147 struct xfs_btree_cur
*cur
, /* btree cursor */
148 xfs_dfsbno_t bno
, /* btree block disk address */
149 int level
) /* btree block level */
151 XFS_WANT_CORRUPTED_RETURN(
154 XFS_FSB_SANITY_CHECK(cur
->bc_mp
, bno
));
160 * Check that (short) pointer is ok.
162 STATIC
int /* error (0 or EFSCORRUPTED) */
163 xfs_btree_check_sptr(
164 struct xfs_btree_cur
*cur
, /* btree cursor */
165 xfs_agblock_t bno
, /* btree block disk address */
166 int level
) /* btree block level */
168 xfs_agblock_t agblocks
= cur
->bc_mp
->m_sb
.sb_agblocks
;
170 XFS_WANT_CORRUPTED_RETURN(
172 bno
!= NULLAGBLOCK
&&
179 * Check that block ptr is ok.
181 STATIC
int /* error (0 or EFSCORRUPTED) */
183 struct xfs_btree_cur
*cur
, /* btree cursor */
184 union xfs_btree_ptr
*ptr
, /* btree block disk address */
185 int index
, /* offset from ptr to check */
186 int level
) /* btree block level */
188 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) {
189 return xfs_btree_check_lptr(cur
,
190 be64_to_cpu((&ptr
->l
)[index
]), level
);
192 return xfs_btree_check_sptr(cur
,
193 be32_to_cpu((&ptr
->s
)[index
]), level
);
199 * Delete the btree cursor.
202 xfs_btree_del_cursor(
203 xfs_btree_cur_t
*cur
, /* btree cursor */
204 int error
) /* del because of error */
206 int i
; /* btree level */
209 * Clear the buffer pointers, and release the buffers.
210 * If we're doing this in the face of an error, we
211 * need to make sure to inspect all of the entries
212 * in the bc_bufs array for buffers to be unlocked.
213 * This is because some of the btree code works from
214 * level n down to 0, and if we get an error along
215 * the way we won't have initialized all the entries
218 for (i
= 0; i
< cur
->bc_nlevels
; i
++) {
220 xfs_btree_setbuf(cur
, i
, NULL
);
225 * Can't free a bmap cursor without having dealt with the
226 * allocated indirect blocks' accounting.
228 ASSERT(cur
->bc_btnum
!= XFS_BTNUM_BMAP
||
229 cur
->bc_private
.b
.allocated
== 0);
233 kmem_zone_free(xfs_btree_cur_zone
, cur
);
237 * Duplicate the btree cursor.
238 * Allocate a new one, copy the record, re-get the buffers.
241 xfs_btree_dup_cursor(
242 xfs_btree_cur_t
*cur
, /* input cursor */
243 xfs_btree_cur_t
**ncur
) /* output cursor */
245 xfs_buf_t
*bp
; /* btree block's buffer pointer */
246 int error
; /* error return value */
247 int i
; /* level number of btree block */
248 xfs_mount_t
*mp
; /* mount structure for filesystem */
249 xfs_btree_cur_t
*new; /* new cursor value */
250 xfs_trans_t
*tp
; /* transaction pointer, can be NULL */
256 * Allocate a new cursor like the old one.
258 new = cur
->bc_ops
->dup_cursor(cur
);
261 * Copy the record currently in the cursor.
263 new->bc_rec
= cur
->bc_rec
;
266 * For each level current, re-get the buffer and copy the ptr value.
268 for (i
= 0; i
< new->bc_nlevels
; i
++) {
269 new->bc_ptrs
[i
] = cur
->bc_ptrs
[i
];
270 new->bc_ra
[i
] = cur
->bc_ra
[i
];
271 if ((bp
= cur
->bc_bufs
[i
])) {
272 if ((error
= xfs_trans_read_buf(mp
, tp
, mp
->m_ddev_targp
,
273 XFS_BUF_ADDR(bp
), mp
->m_bsize
, 0, &bp
))) {
274 xfs_btree_del_cursor(new, error
);
278 new->bc_bufs
[i
] = bp
;
280 ASSERT(!XFS_BUF_GETERROR(bp
));
282 new->bc_bufs
[i
] = NULL
;
289 * XFS btree block layout and addressing:
291 * There are two types of blocks in the btree: leaf and non-leaf blocks.
293 * The leaf record start with a header then followed by records containing
294 * the values. A non-leaf block also starts with the same header, and
295 * then first contains lookup keys followed by an equal number of pointers
296 * to the btree blocks at the previous level.
298 * +--------+-------+-------+-------+-------+-------+-------+
299 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
300 * +--------+-------+-------+-------+-------+-------+-------+
302 * +--------+-------+-------+-------+-------+-------+-------+
303 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
304 * +--------+-------+-------+-------+-------+-------+-------+
306 * The header is called struct xfs_btree_block for reasons better left unknown
307 * and comes in different versions for short (32bit) and long (64bit) block
308 * pointers. The record and key structures are defined by the btree instances
309 * and opaque to the btree core. The block pointers are simple disk endian
310 * integers, available in a short (32bit) and long (64bit) variant.
312 * The helpers below calculate the offset of a given record, key or pointer
313 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
314 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
315 * inside the btree block is done using indices starting at one, not zero!
319 * Return size of the btree block header for this btree instance.
321 static inline size_t xfs_btree_block_len(struct xfs_btree_cur
*cur
)
323 return (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) ?
324 XFS_BTREE_LBLOCK_LEN
:
325 XFS_BTREE_SBLOCK_LEN
;
329 * Return size of btree block pointers for this btree instance.
331 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur
*cur
)
333 return (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) ?
334 sizeof(__be64
) : sizeof(__be32
);
338 * Calculate offset of the n-th record in a btree block.
341 xfs_btree_rec_offset(
342 struct xfs_btree_cur
*cur
,
345 return xfs_btree_block_len(cur
) +
346 (n
- 1) * cur
->bc_ops
->rec_len
;
350 * Calculate offset of the n-th key in a btree block.
353 xfs_btree_key_offset(
354 struct xfs_btree_cur
*cur
,
357 return xfs_btree_block_len(cur
) +
358 (n
- 1) * cur
->bc_ops
->key_len
;
362 * Calculate offset of the n-th block pointer in a btree block.
365 xfs_btree_ptr_offset(
366 struct xfs_btree_cur
*cur
,
370 return xfs_btree_block_len(cur
) +
371 cur
->bc_ops
->get_maxrecs(cur
, level
) * cur
->bc_ops
->key_len
+
372 (n
- 1) * xfs_btree_ptr_len(cur
);
376 * Return a pointer to the n-th record in the btree block.
378 STATIC
union xfs_btree_rec
*
380 struct xfs_btree_cur
*cur
,
382 struct xfs_btree_block
*block
)
384 return (union xfs_btree_rec
*)
385 ((char *)block
+ xfs_btree_rec_offset(cur
, n
));
389 * Return a pointer to the n-th key in the btree block.
391 STATIC
union xfs_btree_key
*
393 struct xfs_btree_cur
*cur
,
395 struct xfs_btree_block
*block
)
397 return (union xfs_btree_key
*)
398 ((char *)block
+ xfs_btree_key_offset(cur
, n
));
402 * Return a pointer to the n-th block pointer in the btree block.
404 STATIC
union xfs_btree_ptr
*
406 struct xfs_btree_cur
*cur
,
408 struct xfs_btree_block
*block
)
410 int level
= xfs_btree_get_level(block
);
412 ASSERT(block
->bb_level
!= 0);
414 return (union xfs_btree_ptr
*)
415 ((char *)block
+ xfs_btree_ptr_offset(cur
, n
, level
));
419 * Get a the root block which is stored in the inode.
421 * For now this btree implementation assumes the btree root is always
422 * stored in the if_broot field of an inode fork.
424 STATIC
struct xfs_btree_block
*
426 struct xfs_btree_cur
*cur
)
428 struct xfs_ifork
*ifp
;
430 ifp
= XFS_IFORK_PTR(cur
->bc_private
.b
.ip
, cur
->bc_private
.b
.whichfork
);
431 return (struct xfs_btree_block
*)ifp
->if_broot
;
435 * Retrieve the block pointer from the cursor at the given level.
436 * This may be an inode btree root or from a buffer.
438 STATIC
struct xfs_btree_block
* /* generic btree block pointer */
440 struct xfs_btree_cur
*cur
, /* btree cursor */
441 int level
, /* level in btree */
442 struct xfs_buf
**bpp
) /* buffer containing the block */
444 if ((cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) &&
445 (level
== cur
->bc_nlevels
- 1)) {
447 return xfs_btree_get_iroot(cur
);
450 *bpp
= cur
->bc_bufs
[level
];
451 return XFS_BUF_TO_BLOCK(*bpp
);
455 * Get a buffer for the block, return it with no data read.
456 * Long-form addressing.
458 xfs_buf_t
* /* buffer for fsbno */
460 xfs_mount_t
*mp
, /* file system mount point */
461 xfs_trans_t
*tp
, /* transaction pointer */
462 xfs_fsblock_t fsbno
, /* file system block number */
463 uint lock
) /* lock flags for get_buf */
465 xfs_buf_t
*bp
; /* buffer pointer (return value) */
466 xfs_daddr_t d
; /* real disk block address */
468 ASSERT(fsbno
!= NULLFSBLOCK
);
469 d
= XFS_FSB_TO_DADDR(mp
, fsbno
);
470 bp
= xfs_trans_get_buf(tp
, mp
->m_ddev_targp
, d
, mp
->m_bsize
, lock
);
472 ASSERT(!XFS_BUF_GETERROR(bp
));
477 * Get a buffer for the block, return it with no data read.
478 * Short-form addressing.
480 xfs_buf_t
* /* buffer for agno/agbno */
482 xfs_mount_t
*mp
, /* file system mount point */
483 xfs_trans_t
*tp
, /* transaction pointer */
484 xfs_agnumber_t agno
, /* allocation group number */
485 xfs_agblock_t agbno
, /* allocation group block number */
486 uint lock
) /* lock flags for get_buf */
488 xfs_buf_t
*bp
; /* buffer pointer (return value) */
489 xfs_daddr_t d
; /* real disk block address */
491 ASSERT(agno
!= NULLAGNUMBER
);
492 ASSERT(agbno
!= NULLAGBLOCK
);
493 d
= XFS_AGB_TO_DADDR(mp
, agno
, agbno
);
494 bp
= xfs_trans_get_buf(tp
, mp
->m_ddev_targp
, d
, mp
->m_bsize
, lock
);
496 ASSERT(!XFS_BUF_GETERROR(bp
));
501 * Check for the cursor referring to the last block at the given level.
503 int /* 1=is last block, 0=not last block */
504 xfs_btree_islastblock(
505 xfs_btree_cur_t
*cur
, /* btree cursor */
506 int level
) /* level to check */
508 struct xfs_btree_block
*block
; /* generic btree block pointer */
509 xfs_buf_t
*bp
; /* buffer containing block */
511 block
= xfs_btree_get_block(cur
, level
, &bp
);
512 xfs_btree_check_block(cur
, block
, level
, bp
);
513 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
)
514 return be64_to_cpu(block
->bb_u
.l
.bb_rightsib
) == NULLDFSBNO
;
516 return be32_to_cpu(block
->bb_u
.s
.bb_rightsib
) == NULLAGBLOCK
;
520 * Change the cursor to point to the first record at the given level.
521 * Other levels are unaffected.
523 STATIC
int /* success=1, failure=0 */
525 xfs_btree_cur_t
*cur
, /* btree cursor */
526 int level
) /* level to change */
528 struct xfs_btree_block
*block
; /* generic btree block pointer */
529 xfs_buf_t
*bp
; /* buffer containing block */
532 * Get the block pointer for this level.
534 block
= xfs_btree_get_block(cur
, level
, &bp
);
535 xfs_btree_check_block(cur
, block
, level
, bp
);
537 * It's empty, there is no such record.
539 if (!block
->bb_numrecs
)
542 * Set the ptr value to 1, that's the first record/key.
544 cur
->bc_ptrs
[level
] = 1;
549 * Change the cursor to point to the last record in the current block
550 * at the given level. Other levels are unaffected.
552 STATIC
int /* success=1, failure=0 */
554 xfs_btree_cur_t
*cur
, /* btree cursor */
555 int level
) /* level to change */
557 struct xfs_btree_block
*block
; /* generic btree block pointer */
558 xfs_buf_t
*bp
; /* buffer containing block */
561 * Get the block pointer for this level.
563 block
= xfs_btree_get_block(cur
, level
, &bp
);
564 xfs_btree_check_block(cur
, block
, level
, bp
);
566 * It's empty, there is no such record.
568 if (!block
->bb_numrecs
)
571 * Set the ptr value to numrecs, that's the last record/key.
573 cur
->bc_ptrs
[level
] = be16_to_cpu(block
->bb_numrecs
);
578 * Compute first and last byte offsets for the fields given.
579 * Interprets the offsets table, which contains struct field offsets.
583 __int64_t fields
, /* bitmask of fields */
584 const short *offsets
, /* table of field offsets */
585 int nbits
, /* number of bits to inspect */
586 int *first
, /* output: first byte offset */
587 int *last
) /* output: last byte offset */
589 int i
; /* current bit number */
590 __int64_t imask
; /* mask for current bit number */
594 * Find the lowest bit, so the first byte offset.
596 for (i
= 0, imask
= 1LL; ; i
++, imask
<<= 1) {
597 if (imask
& fields
) {
603 * Find the highest bit, so the last byte offset.
605 for (i
= nbits
- 1, imask
= 1LL << i
; ; i
--, imask
>>= 1) {
606 if (imask
& fields
) {
607 *last
= offsets
[i
+ 1] - 1;
614 * Get a buffer for the block, return it read in.
615 * Long-form addressing.
619 xfs_mount_t
*mp
, /* file system mount point */
620 xfs_trans_t
*tp
, /* transaction pointer */
621 xfs_fsblock_t fsbno
, /* file system block number */
622 uint lock
, /* lock flags for read_buf */
623 xfs_buf_t
**bpp
, /* buffer for fsbno */
624 int refval
) /* ref count value for buffer */
626 xfs_buf_t
*bp
; /* return value */
627 xfs_daddr_t d
; /* real disk block address */
630 ASSERT(fsbno
!= NULLFSBLOCK
);
631 d
= XFS_FSB_TO_DADDR(mp
, fsbno
);
632 if ((error
= xfs_trans_read_buf(mp
, tp
, mp
->m_ddev_targp
, d
,
633 mp
->m_bsize
, lock
, &bp
))) {
636 ASSERT(!bp
|| !XFS_BUF_GETERROR(bp
));
638 XFS_BUF_SET_VTYPE_REF(bp
, B_FS_MAP
, refval
);
645 * Read-ahead the block, don't wait for it, don't return a buffer.
646 * Long-form addressing.
650 xfs_btree_reada_bufl(
651 xfs_mount_t
*mp
, /* file system mount point */
652 xfs_fsblock_t fsbno
, /* file system block number */
653 xfs_extlen_t count
) /* count of filesystem blocks */
657 ASSERT(fsbno
!= NULLFSBLOCK
);
658 d
= XFS_FSB_TO_DADDR(mp
, fsbno
);
659 xfs_baread(mp
->m_ddev_targp
, d
, mp
->m_bsize
* count
);
663 * Read-ahead the block, don't wait for it, don't return a buffer.
664 * Short-form addressing.
668 xfs_btree_reada_bufs(
669 xfs_mount_t
*mp
, /* file system mount point */
670 xfs_agnumber_t agno
, /* allocation group number */
671 xfs_agblock_t agbno
, /* allocation group block number */
672 xfs_extlen_t count
) /* count of filesystem blocks */
676 ASSERT(agno
!= NULLAGNUMBER
);
677 ASSERT(agbno
!= NULLAGBLOCK
);
678 d
= XFS_AGB_TO_DADDR(mp
, agno
, agbno
);
679 xfs_baread(mp
->m_ddev_targp
, d
, mp
->m_bsize
* count
);
683 xfs_btree_readahead_lblock(
684 struct xfs_btree_cur
*cur
,
686 struct xfs_btree_block
*block
)
689 xfs_dfsbno_t left
= be64_to_cpu(block
->bb_u
.l
.bb_leftsib
);
690 xfs_dfsbno_t right
= be64_to_cpu(block
->bb_u
.l
.bb_rightsib
);
692 if ((lr
& XFS_BTCUR_LEFTRA
) && left
!= NULLDFSBNO
) {
693 xfs_btree_reada_bufl(cur
->bc_mp
, left
, 1);
697 if ((lr
& XFS_BTCUR_RIGHTRA
) && right
!= NULLDFSBNO
) {
698 xfs_btree_reada_bufl(cur
->bc_mp
, right
, 1);
706 xfs_btree_readahead_sblock(
707 struct xfs_btree_cur
*cur
,
709 struct xfs_btree_block
*block
)
712 xfs_agblock_t left
= be32_to_cpu(block
->bb_u
.s
.bb_leftsib
);
713 xfs_agblock_t right
= be32_to_cpu(block
->bb_u
.s
.bb_rightsib
);
716 if ((lr
& XFS_BTCUR_LEFTRA
) && left
!= NULLAGBLOCK
) {
717 xfs_btree_reada_bufs(cur
->bc_mp
, cur
->bc_private
.a
.agno
,
722 if ((lr
& XFS_BTCUR_RIGHTRA
) && right
!= NULLAGBLOCK
) {
723 xfs_btree_reada_bufs(cur
->bc_mp
, cur
->bc_private
.a
.agno
,
732 * Read-ahead btree blocks, at the given level.
733 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
737 struct xfs_btree_cur
*cur
, /* btree cursor */
738 int lev
, /* level in btree */
739 int lr
) /* left/right bits */
741 struct xfs_btree_block
*block
;
744 * No readahead needed if we are at the root level and the
745 * btree root is stored in the inode.
747 if ((cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) &&
748 (lev
== cur
->bc_nlevels
- 1))
751 if ((cur
->bc_ra
[lev
] | lr
) == cur
->bc_ra
[lev
])
754 cur
->bc_ra
[lev
] |= lr
;
755 block
= XFS_BUF_TO_BLOCK(cur
->bc_bufs
[lev
]);
757 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
)
758 return xfs_btree_readahead_lblock(cur
, lr
, block
);
759 return xfs_btree_readahead_sblock(cur
, lr
, block
);
763 * Set the buffer for level "lev" in the cursor to bp, releasing
764 * any previous buffer.
768 xfs_btree_cur_t
*cur
, /* btree cursor */
769 int lev
, /* level in btree */
770 xfs_buf_t
*bp
) /* new buffer to set */
772 struct xfs_btree_block
*b
; /* btree block */
773 xfs_buf_t
*obp
; /* old buffer pointer */
775 obp
= cur
->bc_bufs
[lev
];
777 xfs_trans_brelse(cur
->bc_tp
, obp
);
778 cur
->bc_bufs
[lev
] = bp
;
782 b
= XFS_BUF_TO_BLOCK(bp
);
783 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) {
784 if (be64_to_cpu(b
->bb_u
.l
.bb_leftsib
) == NULLDFSBNO
)
785 cur
->bc_ra
[lev
] |= XFS_BTCUR_LEFTRA
;
786 if (be64_to_cpu(b
->bb_u
.l
.bb_rightsib
) == NULLDFSBNO
)
787 cur
->bc_ra
[lev
] |= XFS_BTCUR_RIGHTRA
;
789 if (be32_to_cpu(b
->bb_u
.s
.bb_leftsib
) == NULLAGBLOCK
)
790 cur
->bc_ra
[lev
] |= XFS_BTCUR_LEFTRA
;
791 if (be32_to_cpu(b
->bb_u
.s
.bb_rightsib
) == NULLAGBLOCK
)
792 cur
->bc_ra
[lev
] |= XFS_BTCUR_RIGHTRA
;
797 xfs_btree_ptr_is_null(
798 struct xfs_btree_cur
*cur
,
799 union xfs_btree_ptr
*ptr
)
801 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
)
802 return be64_to_cpu(ptr
->l
) == NULLDFSBNO
;
804 return be32_to_cpu(ptr
->s
) == NULLAGBLOCK
;
808 xfs_btree_set_ptr_null(
809 struct xfs_btree_cur
*cur
,
810 union xfs_btree_ptr
*ptr
)
812 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
)
813 ptr
->l
= cpu_to_be64(NULLDFSBNO
);
815 ptr
->s
= cpu_to_be32(NULLAGBLOCK
);
819 * Get/set/init sibling pointers
822 xfs_btree_get_sibling(
823 struct xfs_btree_cur
*cur
,
824 struct xfs_btree_block
*block
,
825 union xfs_btree_ptr
*ptr
,
828 ASSERT(lr
== XFS_BB_LEFTSIB
|| lr
== XFS_BB_RIGHTSIB
);
830 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) {
831 if (lr
== XFS_BB_RIGHTSIB
)
832 ptr
->l
= block
->bb_u
.l
.bb_rightsib
;
834 ptr
->l
= block
->bb_u
.l
.bb_leftsib
;
836 if (lr
== XFS_BB_RIGHTSIB
)
837 ptr
->s
= block
->bb_u
.s
.bb_rightsib
;
839 ptr
->s
= block
->bb_u
.s
.bb_leftsib
;
844 xfs_btree_set_sibling(
845 struct xfs_btree_cur
*cur
,
846 struct xfs_btree_block
*block
,
847 union xfs_btree_ptr
*ptr
,
850 ASSERT(lr
== XFS_BB_LEFTSIB
|| lr
== XFS_BB_RIGHTSIB
);
852 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) {
853 if (lr
== XFS_BB_RIGHTSIB
)
854 block
->bb_u
.l
.bb_rightsib
= ptr
->l
;
856 block
->bb_u
.l
.bb_leftsib
= ptr
->l
;
858 if (lr
== XFS_BB_RIGHTSIB
)
859 block
->bb_u
.s
.bb_rightsib
= ptr
->s
;
861 block
->bb_u
.s
.bb_leftsib
= ptr
->s
;
866 xfs_btree_init_block(
867 struct xfs_btree_cur
*cur
,
870 struct xfs_btree_block
*new) /* new block */
872 new->bb_magic
= cpu_to_be32(xfs_magics
[cur
->bc_btnum
]);
873 new->bb_level
= cpu_to_be16(level
);
874 new->bb_numrecs
= cpu_to_be16(numrecs
);
876 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) {
877 new->bb_u
.l
.bb_leftsib
= cpu_to_be64(NULLDFSBNO
);
878 new->bb_u
.l
.bb_rightsib
= cpu_to_be64(NULLDFSBNO
);
880 new->bb_u
.s
.bb_leftsib
= cpu_to_be32(NULLAGBLOCK
);
881 new->bb_u
.s
.bb_rightsib
= cpu_to_be32(NULLAGBLOCK
);
886 * Return true if ptr is the last record in the btree and
887 * we need to track updateѕ to this record. The decision
888 * will be further refined in the update_lastrec method.
891 xfs_btree_is_lastrec(
892 struct xfs_btree_cur
*cur
,
893 struct xfs_btree_block
*block
,
896 union xfs_btree_ptr ptr
;
900 if (!(cur
->bc_flags
& XFS_BTREE_LASTREC_UPDATE
))
903 xfs_btree_get_sibling(cur
, block
, &ptr
, XFS_BB_RIGHTSIB
);
904 if (!xfs_btree_ptr_is_null(cur
, &ptr
))
910 xfs_btree_buf_to_ptr(
911 struct xfs_btree_cur
*cur
,
913 union xfs_btree_ptr
*ptr
)
915 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
)
916 ptr
->l
= cpu_to_be64(XFS_DADDR_TO_FSB(cur
->bc_mp
,
919 ptr
->s
= cpu_to_be32(xfs_daddr_to_agbno(cur
->bc_mp
,
925 xfs_btree_ptr_to_daddr(
926 struct xfs_btree_cur
*cur
,
927 union xfs_btree_ptr
*ptr
)
929 if (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) {
930 ASSERT(be64_to_cpu(ptr
->l
) != NULLDFSBNO
);
932 return XFS_FSB_TO_DADDR(cur
->bc_mp
, be64_to_cpu(ptr
->l
));
934 ASSERT(cur
->bc_private
.a
.agno
!= NULLAGNUMBER
);
935 ASSERT(be32_to_cpu(ptr
->s
) != NULLAGBLOCK
);
937 return XFS_AGB_TO_DADDR(cur
->bc_mp
, cur
->bc_private
.a
.agno
,
938 be32_to_cpu(ptr
->s
));
944 struct xfs_btree_cur
*cur
,
947 switch (cur
->bc_btnum
) {
950 XFS_BUF_SET_VTYPE_REF(*bpp
, B_FS_MAP
, XFS_ALLOC_BTREE_REF
);
953 XFS_BUF_SET_VTYPE_REF(*bpp
, B_FS_INOMAP
, XFS_INO_BTREE_REF
);
956 XFS_BUF_SET_VTYPE_REF(*bpp
, B_FS_MAP
, XFS_BMAP_BTREE_REF
);
964 xfs_btree_get_buf_block(
965 struct xfs_btree_cur
*cur
,
966 union xfs_btree_ptr
*ptr
,
968 struct xfs_btree_block
**block
,
969 struct xfs_buf
**bpp
)
971 struct xfs_mount
*mp
= cur
->bc_mp
;
974 /* need to sort out how callers deal with failures first */
975 ASSERT(!(flags
& XBF_TRYLOCK
));
977 d
= xfs_btree_ptr_to_daddr(cur
, ptr
);
978 *bpp
= xfs_trans_get_buf(cur
->bc_tp
, mp
->m_ddev_targp
, d
,
982 ASSERT(!XFS_BUF_GETERROR(*bpp
));
984 *block
= XFS_BUF_TO_BLOCK(*bpp
);
989 * Read in the buffer at the given ptr and return the buffer and
990 * the block pointer within the buffer.
993 xfs_btree_read_buf_block(
994 struct xfs_btree_cur
*cur
,
995 union xfs_btree_ptr
*ptr
,
998 struct xfs_btree_block
**block
,
999 struct xfs_buf
**bpp
)
1001 struct xfs_mount
*mp
= cur
->bc_mp
;
1005 /* need to sort out how callers deal with failures first */
1006 ASSERT(!(flags
& XBF_TRYLOCK
));
1008 d
= xfs_btree_ptr_to_daddr(cur
, ptr
);
1009 error
= xfs_trans_read_buf(mp
, cur
->bc_tp
, mp
->m_ddev_targp
, d
,
1010 mp
->m_bsize
, flags
, bpp
);
1014 ASSERT(*bpp
!= NULL
);
1015 ASSERT(!XFS_BUF_GETERROR(*bpp
));
1017 xfs_btree_set_refs(cur
, *bpp
);
1018 *block
= XFS_BUF_TO_BLOCK(*bpp
);
1020 error
= xfs_btree_check_block(cur
, *block
, level
, *bpp
);
1022 xfs_trans_brelse(cur
->bc_tp
, *bpp
);
1027 * Copy keys from one btree block to another.
1030 xfs_btree_copy_keys(
1031 struct xfs_btree_cur
*cur
,
1032 union xfs_btree_key
*dst_key
,
1033 union xfs_btree_key
*src_key
,
1036 ASSERT(numkeys
>= 0);
1037 memcpy(dst_key
, src_key
, numkeys
* cur
->bc_ops
->key_len
);
1041 * Copy records from one btree block to another.
1044 xfs_btree_copy_recs(
1045 struct xfs_btree_cur
*cur
,
1046 union xfs_btree_rec
*dst_rec
,
1047 union xfs_btree_rec
*src_rec
,
1050 ASSERT(numrecs
>= 0);
1051 memcpy(dst_rec
, src_rec
, numrecs
* cur
->bc_ops
->rec_len
);
1055 * Copy block pointers from one btree block to another.
1058 xfs_btree_copy_ptrs(
1059 struct xfs_btree_cur
*cur
,
1060 union xfs_btree_ptr
*dst_ptr
,
1061 union xfs_btree_ptr
*src_ptr
,
1064 ASSERT(numptrs
>= 0);
1065 memcpy(dst_ptr
, src_ptr
, numptrs
* xfs_btree_ptr_len(cur
));
1069 * Shift keys one index left/right inside a single btree block.
1072 xfs_btree_shift_keys(
1073 struct xfs_btree_cur
*cur
,
1074 union xfs_btree_key
*key
,
1080 ASSERT(numkeys
>= 0);
1081 ASSERT(dir
== 1 || dir
== -1);
1083 dst_key
= (char *)key
+ (dir
* cur
->bc_ops
->key_len
);
1084 memmove(dst_key
, key
, numkeys
* cur
->bc_ops
->key_len
);
1088 * Shift records one index left/right inside a single btree block.
1091 xfs_btree_shift_recs(
1092 struct xfs_btree_cur
*cur
,
1093 union xfs_btree_rec
*rec
,
1099 ASSERT(numrecs
>= 0);
1100 ASSERT(dir
== 1 || dir
== -1);
1102 dst_rec
= (char *)rec
+ (dir
* cur
->bc_ops
->rec_len
);
1103 memmove(dst_rec
, rec
, numrecs
* cur
->bc_ops
->rec_len
);
1107 * Shift block pointers one index left/right inside a single btree block.
1110 xfs_btree_shift_ptrs(
1111 struct xfs_btree_cur
*cur
,
1112 union xfs_btree_ptr
*ptr
,
1118 ASSERT(numptrs
>= 0);
1119 ASSERT(dir
== 1 || dir
== -1);
1121 dst_ptr
= (char *)ptr
+ (dir
* xfs_btree_ptr_len(cur
));
1122 memmove(dst_ptr
, ptr
, numptrs
* xfs_btree_ptr_len(cur
));
1126 * Log key values from the btree block.
1130 struct xfs_btree_cur
*cur
,
1135 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1136 XFS_BTREE_TRACE_ARGBII(cur
, bp
, first
, last
);
1139 xfs_trans_log_buf(cur
->bc_tp
, bp
,
1140 xfs_btree_key_offset(cur
, first
),
1141 xfs_btree_key_offset(cur
, last
+ 1) - 1);
1143 xfs_trans_log_inode(cur
->bc_tp
, cur
->bc_private
.b
.ip
,
1144 xfs_ilog_fbroot(cur
->bc_private
.b
.whichfork
));
1147 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1151 * Log record values from the btree block.
1155 struct xfs_btree_cur
*cur
,
1160 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1161 XFS_BTREE_TRACE_ARGBII(cur
, bp
, first
, last
);
1163 xfs_trans_log_buf(cur
->bc_tp
, bp
,
1164 xfs_btree_rec_offset(cur
, first
),
1165 xfs_btree_rec_offset(cur
, last
+ 1) - 1);
1167 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1171 * Log block pointer fields from a btree block (nonleaf).
1175 struct xfs_btree_cur
*cur
, /* btree cursor */
1176 struct xfs_buf
*bp
, /* buffer containing btree block */
1177 int first
, /* index of first pointer to log */
1178 int last
) /* index of last pointer to log */
1180 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1181 XFS_BTREE_TRACE_ARGBII(cur
, bp
, first
, last
);
1184 struct xfs_btree_block
*block
= XFS_BUF_TO_BLOCK(bp
);
1185 int level
= xfs_btree_get_level(block
);
1187 xfs_trans_log_buf(cur
->bc_tp
, bp
,
1188 xfs_btree_ptr_offset(cur
, first
, level
),
1189 xfs_btree_ptr_offset(cur
, last
+ 1, level
) - 1);
1191 xfs_trans_log_inode(cur
->bc_tp
, cur
->bc_private
.b
.ip
,
1192 xfs_ilog_fbroot(cur
->bc_private
.b
.whichfork
));
1195 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1199 * Log fields from a btree block header.
1202 xfs_btree_log_block(
1203 struct xfs_btree_cur
*cur
, /* btree cursor */
1204 struct xfs_buf
*bp
, /* buffer containing btree block */
1205 int fields
) /* mask of fields: XFS_BB_... */
1207 int first
; /* first byte offset logged */
1208 int last
; /* last byte offset logged */
1209 static const short soffsets
[] = { /* table of offsets (short) */
1210 offsetof(struct xfs_btree_block
, bb_magic
),
1211 offsetof(struct xfs_btree_block
, bb_level
),
1212 offsetof(struct xfs_btree_block
, bb_numrecs
),
1213 offsetof(struct xfs_btree_block
, bb_u
.s
.bb_leftsib
),
1214 offsetof(struct xfs_btree_block
, bb_u
.s
.bb_rightsib
),
1215 XFS_BTREE_SBLOCK_LEN
1217 static const short loffsets
[] = { /* table of offsets (long) */
1218 offsetof(struct xfs_btree_block
, bb_magic
),
1219 offsetof(struct xfs_btree_block
, bb_level
),
1220 offsetof(struct xfs_btree_block
, bb_numrecs
),
1221 offsetof(struct xfs_btree_block
, bb_u
.l
.bb_leftsib
),
1222 offsetof(struct xfs_btree_block
, bb_u
.l
.bb_rightsib
),
1223 XFS_BTREE_LBLOCK_LEN
1226 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1227 XFS_BTREE_TRACE_ARGBI(cur
, bp
, fields
);
1230 xfs_btree_offsets(fields
,
1231 (cur
->bc_flags
& XFS_BTREE_LONG_PTRS
) ?
1232 loffsets
: soffsets
,
1233 XFS_BB_NUM_BITS
, &first
, &last
);
1234 xfs_trans_log_buf(cur
->bc_tp
, bp
, first
, last
);
1236 xfs_trans_log_inode(cur
->bc_tp
, cur
->bc_private
.b
.ip
,
1237 xfs_ilog_fbroot(cur
->bc_private
.b
.whichfork
));
1240 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1244 * Increment cursor by one record at the level.
1245 * For nonzero levels the leaf-ward information is untouched.
1248 xfs_btree_increment(
1249 struct xfs_btree_cur
*cur
,
1251 int *stat
) /* success/failure */
1253 struct xfs_btree_block
*block
;
1254 union xfs_btree_ptr ptr
;
1256 int error
; /* error return value */
1259 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1260 XFS_BTREE_TRACE_ARGI(cur
, level
);
1262 ASSERT(level
< cur
->bc_nlevels
);
1264 /* Read-ahead to the right at this level. */
1265 xfs_btree_readahead(cur
, level
, XFS_BTCUR_RIGHTRA
);
1267 /* Get a pointer to the btree block. */
1268 block
= xfs_btree_get_block(cur
, level
, &bp
);
1271 error
= xfs_btree_check_block(cur
, block
, level
, bp
);
1276 /* We're done if we remain in the block after the increment. */
1277 if (++cur
->bc_ptrs
[level
] <= xfs_btree_get_numrecs(block
))
1280 /* Fail if we just went off the right edge of the tree. */
1281 xfs_btree_get_sibling(cur
, block
, &ptr
, XFS_BB_RIGHTSIB
);
1282 if (xfs_btree_ptr_is_null(cur
, &ptr
))
1285 XFS_BTREE_STATS_INC(cur
, increment
);
1288 * March up the tree incrementing pointers.
1289 * Stop when we don't go off the right edge of a block.
1291 for (lev
= level
+ 1; lev
< cur
->bc_nlevels
; lev
++) {
1292 block
= xfs_btree_get_block(cur
, lev
, &bp
);
1295 error
= xfs_btree_check_block(cur
, block
, lev
, bp
);
1300 if (++cur
->bc_ptrs
[lev
] <= xfs_btree_get_numrecs(block
))
1303 /* Read-ahead the right block for the next loop. */
1304 xfs_btree_readahead(cur
, lev
, XFS_BTCUR_RIGHTRA
);
1308 * If we went off the root then we are either seriously
1309 * confused or have the tree root in an inode.
1311 if (lev
== cur
->bc_nlevels
) {
1312 if (cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
)
1315 error
= EFSCORRUPTED
;
1318 ASSERT(lev
< cur
->bc_nlevels
);
1321 * Now walk back down the tree, fixing up the cursor's buffer
1322 * pointers and key numbers.
1324 for (block
= xfs_btree_get_block(cur
, lev
, &bp
); lev
> level
; ) {
1325 union xfs_btree_ptr
*ptrp
;
1327 ptrp
= xfs_btree_ptr_addr(cur
, cur
->bc_ptrs
[lev
], block
);
1328 error
= xfs_btree_read_buf_block(cur
, ptrp
, --lev
,
1333 xfs_btree_setbuf(cur
, lev
, bp
);
1334 cur
->bc_ptrs
[lev
] = 1;
1337 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1342 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1347 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
1352 * Decrement cursor by one record at the level.
1353 * For nonzero levels the leaf-ward information is untouched.
1356 xfs_btree_decrement(
1357 struct xfs_btree_cur
*cur
,
1359 int *stat
) /* success/failure */
1361 struct xfs_btree_block
*block
;
1363 int error
; /* error return value */
1365 union xfs_btree_ptr ptr
;
1367 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1368 XFS_BTREE_TRACE_ARGI(cur
, level
);
1370 ASSERT(level
< cur
->bc_nlevels
);
1372 /* Read-ahead to the left at this level. */
1373 xfs_btree_readahead(cur
, level
, XFS_BTCUR_LEFTRA
);
1375 /* We're done if we remain in the block after the decrement. */
1376 if (--cur
->bc_ptrs
[level
] > 0)
1379 /* Get a pointer to the btree block. */
1380 block
= xfs_btree_get_block(cur
, level
, &bp
);
1383 error
= xfs_btree_check_block(cur
, block
, level
, bp
);
1388 /* Fail if we just went off the left edge of the tree. */
1389 xfs_btree_get_sibling(cur
, block
, &ptr
, XFS_BB_LEFTSIB
);
1390 if (xfs_btree_ptr_is_null(cur
, &ptr
))
1393 XFS_BTREE_STATS_INC(cur
, decrement
);
1396 * March up the tree decrementing pointers.
1397 * Stop when we don't go off the left edge of a block.
1399 for (lev
= level
+ 1; lev
< cur
->bc_nlevels
; lev
++) {
1400 if (--cur
->bc_ptrs
[lev
] > 0)
1402 /* Read-ahead the left block for the next loop. */
1403 xfs_btree_readahead(cur
, lev
, XFS_BTCUR_LEFTRA
);
1407 * If we went off the root then we are seriously confused.
1408 * or the root of the tree is in an inode.
1410 if (lev
== cur
->bc_nlevels
) {
1411 if (cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
)
1414 error
= EFSCORRUPTED
;
1417 ASSERT(lev
< cur
->bc_nlevels
);
1420 * Now walk back down the tree, fixing up the cursor's buffer
1421 * pointers and key numbers.
1423 for (block
= xfs_btree_get_block(cur
, lev
, &bp
); lev
> level
; ) {
1424 union xfs_btree_ptr
*ptrp
;
1426 ptrp
= xfs_btree_ptr_addr(cur
, cur
->bc_ptrs
[lev
], block
);
1427 error
= xfs_btree_read_buf_block(cur
, ptrp
, --lev
,
1431 xfs_btree_setbuf(cur
, lev
, bp
);
1432 cur
->bc_ptrs
[lev
] = xfs_btree_get_numrecs(block
);
1435 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1440 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1445 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
1450 xfs_btree_lookup_get_block(
1451 struct xfs_btree_cur
*cur
, /* btree cursor */
1452 int level
, /* level in the btree */
1453 union xfs_btree_ptr
*pp
, /* ptr to btree block */
1454 struct xfs_btree_block
**blkp
) /* return btree block */
1456 struct xfs_buf
*bp
; /* buffer pointer for btree block */
1459 /* special case the root block if in an inode */
1460 if ((cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) &&
1461 (level
== cur
->bc_nlevels
- 1)) {
1462 *blkp
= xfs_btree_get_iroot(cur
);
1467 * If the old buffer at this level for the disk address we are
1468 * looking for re-use it.
1470 * Otherwise throw it away and get a new one.
1472 bp
= cur
->bc_bufs
[level
];
1473 if (bp
&& XFS_BUF_ADDR(bp
) == xfs_btree_ptr_to_daddr(cur
, pp
)) {
1474 *blkp
= XFS_BUF_TO_BLOCK(bp
);
1478 error
= xfs_btree_read_buf_block(cur
, pp
, level
, 0, blkp
, &bp
);
1482 xfs_btree_setbuf(cur
, level
, bp
);
1487 * Get current search key. For level 0 we don't actually have a key
1488 * structure so we make one up from the record. For all other levels
1489 * we just return the right key.
1491 STATIC
union xfs_btree_key
*
1492 xfs_lookup_get_search_key(
1493 struct xfs_btree_cur
*cur
,
1496 struct xfs_btree_block
*block
,
1497 union xfs_btree_key
*kp
)
1500 cur
->bc_ops
->init_key_from_rec(kp
,
1501 xfs_btree_rec_addr(cur
, keyno
, block
));
1505 return xfs_btree_key_addr(cur
, keyno
, block
);
1509 * Lookup the record. The cursor is made to point to it, based on dir.
1510 * Return 0 if can't find any such record, 1 for success.
1514 struct xfs_btree_cur
*cur
, /* btree cursor */
1515 xfs_lookup_t dir
, /* <=, ==, or >= */
1516 int *stat
) /* success/failure */
1518 struct xfs_btree_block
*block
; /* current btree block */
1519 __int64_t diff
; /* difference for the current key */
1520 int error
; /* error return value */
1521 int keyno
; /* current key number */
1522 int level
; /* level in the btree */
1523 union xfs_btree_ptr
*pp
; /* ptr to btree block */
1524 union xfs_btree_ptr ptr
; /* ptr to btree block */
1526 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1527 XFS_BTREE_TRACE_ARGI(cur
, dir
);
1529 XFS_BTREE_STATS_INC(cur
, lookup
);
1534 /* initialise start pointer from cursor */
1535 cur
->bc_ops
->init_ptr_from_cur(cur
, &ptr
);
1539 * Iterate over each level in the btree, starting at the root.
1540 * For each level above the leaves, find the key we need, based
1541 * on the lookup record, then follow the corresponding block
1542 * pointer down to the next level.
1544 for (level
= cur
->bc_nlevels
- 1, diff
= 1; level
>= 0; level
--) {
1545 /* Get the block we need to do the lookup on. */
1546 error
= xfs_btree_lookup_get_block(cur
, level
, pp
, &block
);
1552 * If we already had a key match at a higher level, we
1553 * know we need to use the first entry in this block.
1557 /* Otherwise search this block. Do a binary search. */
1559 int high
; /* high entry number */
1560 int low
; /* low entry number */
1562 /* Set low and high entry numbers, 1-based. */
1564 high
= xfs_btree_get_numrecs(block
);
1566 /* Block is empty, must be an empty leaf. */
1567 ASSERT(level
== 0 && cur
->bc_nlevels
== 1);
1569 cur
->bc_ptrs
[0] = dir
!= XFS_LOOKUP_LE
;
1570 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1575 /* Binary search the block. */
1576 while (low
<= high
) {
1577 union xfs_btree_key key
;
1578 union xfs_btree_key
*kp
;
1580 XFS_BTREE_STATS_INC(cur
, compare
);
1582 /* keyno is average of low and high. */
1583 keyno
= (low
+ high
) >> 1;
1585 /* Get current search key */
1586 kp
= xfs_lookup_get_search_key(cur
, level
,
1587 keyno
, block
, &key
);
1590 * Compute difference to get next direction:
1591 * - less than, move right
1592 * - greater than, move left
1593 * - equal, we're done
1595 diff
= cur
->bc_ops
->key_diff(cur
, kp
);
1606 * If there are more levels, set up for the next level
1607 * by getting the block number and filling in the cursor.
1611 * If we moved left, need the previous key number,
1612 * unless there isn't one.
1614 if (diff
> 0 && --keyno
< 1)
1616 pp
= xfs_btree_ptr_addr(cur
, keyno
, block
);
1619 error
= xfs_btree_check_ptr(cur
, pp
, 0, level
);
1623 cur
->bc_ptrs
[level
] = keyno
;
1627 /* Done with the search. See if we need to adjust the results. */
1628 if (dir
!= XFS_LOOKUP_LE
&& diff
< 0) {
1631 * If ge search and we went off the end of the block, but it's
1632 * not the last block, we're in the wrong block.
1634 xfs_btree_get_sibling(cur
, block
, &ptr
, XFS_BB_RIGHTSIB
);
1635 if (dir
== XFS_LOOKUP_GE
&&
1636 keyno
> xfs_btree_get_numrecs(block
) &&
1637 !xfs_btree_ptr_is_null(cur
, &ptr
)) {
1640 cur
->bc_ptrs
[0] = keyno
;
1641 error
= xfs_btree_increment(cur
, 0, &i
);
1644 XFS_WANT_CORRUPTED_RETURN(i
== 1);
1645 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1649 } else if (dir
== XFS_LOOKUP_LE
&& diff
> 0)
1651 cur
->bc_ptrs
[0] = keyno
;
1653 /* Return if we succeeded or not. */
1654 if (keyno
== 0 || keyno
> xfs_btree_get_numrecs(block
))
1656 else if (dir
!= XFS_LOOKUP_EQ
|| diff
== 0)
1660 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1664 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
1669 * Update keys at all levels from here to the root along the cursor's path.
1673 struct xfs_btree_cur
*cur
,
1674 union xfs_btree_key
*keyp
,
1677 struct xfs_btree_block
*block
;
1679 union xfs_btree_key
*kp
;
1682 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1683 XFS_BTREE_TRACE_ARGIK(cur
, level
, keyp
);
1685 ASSERT(!(cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) || level
>= 1);
1688 * Go up the tree from this level toward the root.
1689 * At each level, update the key value to the value input.
1690 * Stop when we reach a level where the cursor isn't pointing
1691 * at the first entry in the block.
1693 for (ptr
= 1; ptr
== 1 && level
< cur
->bc_nlevels
; level
++) {
1697 block
= xfs_btree_get_block(cur
, level
, &bp
);
1699 error
= xfs_btree_check_block(cur
, block
, level
, bp
);
1701 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
1705 ptr
= cur
->bc_ptrs
[level
];
1706 kp
= xfs_btree_key_addr(cur
, ptr
, block
);
1707 xfs_btree_copy_keys(cur
, kp
, keyp
, 1);
1708 xfs_btree_log_keys(cur
, bp
, ptr
, ptr
);
1711 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1716 * Update the record referred to by cur to the value in the
1717 * given record. This either works (return 0) or gets an
1718 * EFSCORRUPTED error.
1722 struct xfs_btree_cur
*cur
,
1723 union xfs_btree_rec
*rec
)
1725 struct xfs_btree_block
*block
;
1729 union xfs_btree_rec
*rp
;
1731 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1732 XFS_BTREE_TRACE_ARGR(cur
, rec
);
1734 /* Pick up the current block. */
1735 block
= xfs_btree_get_block(cur
, 0, &bp
);
1738 error
= xfs_btree_check_block(cur
, block
, 0, bp
);
1742 /* Get the address of the rec to be updated. */
1743 ptr
= cur
->bc_ptrs
[0];
1744 rp
= xfs_btree_rec_addr(cur
, ptr
, block
);
1746 /* Fill in the new contents and log them. */
1747 xfs_btree_copy_recs(cur
, rp
, rec
, 1);
1748 xfs_btree_log_recs(cur
, bp
, ptr
, ptr
);
1751 * If we are tracking the last record in the tree and
1752 * we are at the far right edge of the tree, update it.
1754 if (xfs_btree_is_lastrec(cur
, block
, 0)) {
1755 cur
->bc_ops
->update_lastrec(cur
, block
, rec
,
1756 ptr
, LASTREC_UPDATE
);
1759 /* Updating first rec in leaf. Pass new key value up to our parent. */
1761 union xfs_btree_key key
;
1763 cur
->bc_ops
->init_key_from_rec(&key
, rec
);
1764 error
= xfs_btree_updkey(cur
, &key
, 1);
1769 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1773 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
1778 * Move 1 record left from cur/level if possible.
1779 * Update cur to reflect the new path.
1781 STATIC
int /* error */
1783 struct xfs_btree_cur
*cur
,
1785 int *stat
) /* success/failure */
1787 union xfs_btree_key key
; /* btree key */
1788 struct xfs_buf
*lbp
; /* left buffer pointer */
1789 struct xfs_btree_block
*left
; /* left btree block */
1790 int lrecs
; /* left record count */
1791 struct xfs_buf
*rbp
; /* right buffer pointer */
1792 struct xfs_btree_block
*right
; /* right btree block */
1793 int rrecs
; /* right record count */
1794 union xfs_btree_ptr lptr
; /* left btree pointer */
1795 union xfs_btree_key
*rkp
= NULL
; /* right btree key */
1796 union xfs_btree_ptr
*rpp
= NULL
; /* right address pointer */
1797 union xfs_btree_rec
*rrp
= NULL
; /* right record pointer */
1798 int error
; /* error return value */
1800 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1801 XFS_BTREE_TRACE_ARGI(cur
, level
);
1803 if ((cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) &&
1804 level
== cur
->bc_nlevels
- 1)
1807 /* Set up variables for this block as "right". */
1808 right
= xfs_btree_get_block(cur
, level
, &rbp
);
1811 error
= xfs_btree_check_block(cur
, right
, level
, rbp
);
1816 /* If we've got no left sibling then we can't shift an entry left. */
1817 xfs_btree_get_sibling(cur
, right
, &lptr
, XFS_BB_LEFTSIB
);
1818 if (xfs_btree_ptr_is_null(cur
, &lptr
))
1822 * If the cursor entry is the one that would be moved, don't
1823 * do it... it's too complicated.
1825 if (cur
->bc_ptrs
[level
] <= 1)
1828 /* Set up the left neighbor as "left". */
1829 error
= xfs_btree_read_buf_block(cur
, &lptr
, level
, 0, &left
, &lbp
);
1833 /* If it's full, it can't take another entry. */
1834 lrecs
= xfs_btree_get_numrecs(left
);
1835 if (lrecs
== cur
->bc_ops
->get_maxrecs(cur
, level
))
1838 rrecs
= xfs_btree_get_numrecs(right
);
1841 * We add one entry to the left side and remove one for the right side.
1842 * Account for it here, the changes will be updated on disk and logged
1848 XFS_BTREE_STATS_INC(cur
, lshift
);
1849 XFS_BTREE_STATS_ADD(cur
, moves
, 1);
1852 * If non-leaf, copy a key and a ptr to the left block.
1853 * Log the changes to the left block.
1856 /* It's a non-leaf. Move keys and pointers. */
1857 union xfs_btree_key
*lkp
; /* left btree key */
1858 union xfs_btree_ptr
*lpp
; /* left address pointer */
1860 lkp
= xfs_btree_key_addr(cur
, lrecs
, left
);
1861 rkp
= xfs_btree_key_addr(cur
, 1, right
);
1863 lpp
= xfs_btree_ptr_addr(cur
, lrecs
, left
);
1864 rpp
= xfs_btree_ptr_addr(cur
, 1, right
);
1866 error
= xfs_btree_check_ptr(cur
, rpp
, 0, level
);
1870 xfs_btree_copy_keys(cur
, lkp
, rkp
, 1);
1871 xfs_btree_copy_ptrs(cur
, lpp
, rpp
, 1);
1873 xfs_btree_log_keys(cur
, lbp
, lrecs
, lrecs
);
1874 xfs_btree_log_ptrs(cur
, lbp
, lrecs
, lrecs
);
1876 ASSERT(cur
->bc_ops
->keys_inorder(cur
,
1877 xfs_btree_key_addr(cur
, lrecs
- 1, left
), lkp
));
1879 /* It's a leaf. Move records. */
1880 union xfs_btree_rec
*lrp
; /* left record pointer */
1882 lrp
= xfs_btree_rec_addr(cur
, lrecs
, left
);
1883 rrp
= xfs_btree_rec_addr(cur
, 1, right
);
1885 xfs_btree_copy_recs(cur
, lrp
, rrp
, 1);
1886 xfs_btree_log_recs(cur
, lbp
, lrecs
, lrecs
);
1888 ASSERT(cur
->bc_ops
->recs_inorder(cur
,
1889 xfs_btree_rec_addr(cur
, lrecs
- 1, left
), lrp
));
1892 xfs_btree_set_numrecs(left
, lrecs
);
1893 xfs_btree_log_block(cur
, lbp
, XFS_BB_NUMRECS
);
1895 xfs_btree_set_numrecs(right
, rrecs
);
1896 xfs_btree_log_block(cur
, rbp
, XFS_BB_NUMRECS
);
1899 * Slide the contents of right down one entry.
1901 XFS_BTREE_STATS_ADD(cur
, moves
, rrecs
- 1);
1903 /* It's a nonleaf. operate on keys and ptrs */
1905 int i
; /* loop index */
1907 for (i
= 0; i
< rrecs
; i
++) {
1908 error
= xfs_btree_check_ptr(cur
, rpp
, i
+ 1, level
);
1913 xfs_btree_shift_keys(cur
,
1914 xfs_btree_key_addr(cur
, 2, right
),
1916 xfs_btree_shift_ptrs(cur
,
1917 xfs_btree_ptr_addr(cur
, 2, right
),
1920 xfs_btree_log_keys(cur
, rbp
, 1, rrecs
);
1921 xfs_btree_log_ptrs(cur
, rbp
, 1, rrecs
);
1923 /* It's a leaf. operate on records */
1924 xfs_btree_shift_recs(cur
,
1925 xfs_btree_rec_addr(cur
, 2, right
),
1927 xfs_btree_log_recs(cur
, rbp
, 1, rrecs
);
1930 * If it's the first record in the block, we'll need a key
1931 * structure to pass up to the next level (updkey).
1933 cur
->bc_ops
->init_key_from_rec(&key
,
1934 xfs_btree_rec_addr(cur
, 1, right
));
1938 /* Update the parent key values of right. */
1939 error
= xfs_btree_updkey(cur
, rkp
, level
+ 1);
1943 /* Slide the cursor value left one. */
1944 cur
->bc_ptrs
[level
]--;
1946 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1951 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1956 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
1961 * Move 1 record right from cur/level if possible.
1962 * Update cur to reflect the new path.
1964 STATIC
int /* error */
1966 struct xfs_btree_cur
*cur
,
1968 int *stat
) /* success/failure */
1970 union xfs_btree_key key
; /* btree key */
1971 struct xfs_buf
*lbp
; /* left buffer pointer */
1972 struct xfs_btree_block
*left
; /* left btree block */
1973 struct xfs_buf
*rbp
; /* right buffer pointer */
1974 struct xfs_btree_block
*right
; /* right btree block */
1975 struct xfs_btree_cur
*tcur
; /* temporary btree cursor */
1976 union xfs_btree_ptr rptr
; /* right block pointer */
1977 union xfs_btree_key
*rkp
; /* right btree key */
1978 int rrecs
; /* right record count */
1979 int lrecs
; /* left record count */
1980 int error
; /* error return value */
1981 int i
; /* loop counter */
1983 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1984 XFS_BTREE_TRACE_ARGI(cur
, level
);
1986 if ((cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) &&
1987 (level
== cur
->bc_nlevels
- 1))
1990 /* Set up variables for this block as "left". */
1991 left
= xfs_btree_get_block(cur
, level
, &lbp
);
1994 error
= xfs_btree_check_block(cur
, left
, level
, lbp
);
1999 /* If we've got no right sibling then we can't shift an entry right. */
2000 xfs_btree_get_sibling(cur
, left
, &rptr
, XFS_BB_RIGHTSIB
);
2001 if (xfs_btree_ptr_is_null(cur
, &rptr
))
2005 * If the cursor entry is the one that would be moved, don't
2006 * do it... it's too complicated.
2008 lrecs
= xfs_btree_get_numrecs(left
);
2009 if (cur
->bc_ptrs
[level
] >= lrecs
)
2012 /* Set up the right neighbor as "right". */
2013 error
= xfs_btree_read_buf_block(cur
, &rptr
, level
, 0, &right
, &rbp
);
2017 /* If it's full, it can't take another entry. */
2018 rrecs
= xfs_btree_get_numrecs(right
);
2019 if (rrecs
== cur
->bc_ops
->get_maxrecs(cur
, level
))
2022 XFS_BTREE_STATS_INC(cur
, rshift
);
2023 XFS_BTREE_STATS_ADD(cur
, moves
, rrecs
);
2026 * Make a hole at the start of the right neighbor block, then
2027 * copy the last left block entry to the hole.
2030 /* It's a nonleaf. make a hole in the keys and ptrs */
2031 union xfs_btree_key
*lkp
;
2032 union xfs_btree_ptr
*lpp
;
2033 union xfs_btree_ptr
*rpp
;
2035 lkp
= xfs_btree_key_addr(cur
, lrecs
, left
);
2036 lpp
= xfs_btree_ptr_addr(cur
, lrecs
, left
);
2037 rkp
= xfs_btree_key_addr(cur
, 1, right
);
2038 rpp
= xfs_btree_ptr_addr(cur
, 1, right
);
2041 for (i
= rrecs
- 1; i
>= 0; i
--) {
2042 error
= xfs_btree_check_ptr(cur
, rpp
, i
, level
);
2048 xfs_btree_shift_keys(cur
, rkp
, 1, rrecs
);
2049 xfs_btree_shift_ptrs(cur
, rpp
, 1, rrecs
);
2052 error
= xfs_btree_check_ptr(cur
, lpp
, 0, level
);
2057 /* Now put the new data in, and log it. */
2058 xfs_btree_copy_keys(cur
, rkp
, lkp
, 1);
2059 xfs_btree_copy_ptrs(cur
, rpp
, lpp
, 1);
2061 xfs_btree_log_keys(cur
, rbp
, 1, rrecs
+ 1);
2062 xfs_btree_log_ptrs(cur
, rbp
, 1, rrecs
+ 1);
2064 ASSERT(cur
->bc_ops
->keys_inorder(cur
, rkp
,
2065 xfs_btree_key_addr(cur
, 2, right
)));
2067 /* It's a leaf. make a hole in the records */
2068 union xfs_btree_rec
*lrp
;
2069 union xfs_btree_rec
*rrp
;
2071 lrp
= xfs_btree_rec_addr(cur
, lrecs
, left
);
2072 rrp
= xfs_btree_rec_addr(cur
, 1, right
);
2074 xfs_btree_shift_recs(cur
, rrp
, 1, rrecs
);
2076 /* Now put the new data in, and log it. */
2077 xfs_btree_copy_recs(cur
, rrp
, lrp
, 1);
2078 xfs_btree_log_recs(cur
, rbp
, 1, rrecs
+ 1);
2080 cur
->bc_ops
->init_key_from_rec(&key
, rrp
);
2083 ASSERT(cur
->bc_ops
->recs_inorder(cur
, rrp
,
2084 xfs_btree_rec_addr(cur
, 2, right
)));
2088 * Decrement and log left's numrecs, bump and log right's numrecs.
2090 xfs_btree_set_numrecs(left
, --lrecs
);
2091 xfs_btree_log_block(cur
, lbp
, XFS_BB_NUMRECS
);
2093 xfs_btree_set_numrecs(right
, ++rrecs
);
2094 xfs_btree_log_block(cur
, rbp
, XFS_BB_NUMRECS
);
2097 * Using a temporary cursor, update the parent key values of the
2098 * block on the right.
2100 error
= xfs_btree_dup_cursor(cur
, &tcur
);
2103 i
= xfs_btree_lastrec(tcur
, level
);
2104 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
2106 error
= xfs_btree_increment(tcur
, level
, &i
);
2110 error
= xfs_btree_updkey(tcur
, rkp
, level
+ 1);
2114 xfs_btree_del_cursor(tcur
, XFS_BTREE_NOERROR
);
2116 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2121 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2126 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
2130 XFS_BTREE_TRACE_CURSOR(tcur
, XBT_ERROR
);
2131 xfs_btree_del_cursor(tcur
, XFS_BTREE_ERROR
);
2136 * Split cur/level block in half.
2137 * Return new block number and the key to its first
2138 * record (to be inserted into parent).
2140 STATIC
int /* error */
2142 struct xfs_btree_cur
*cur
,
2144 union xfs_btree_ptr
*ptrp
,
2145 union xfs_btree_key
*key
,
2146 struct xfs_btree_cur
**curp
,
2147 int *stat
) /* success/failure */
2149 union xfs_btree_ptr lptr
; /* left sibling block ptr */
2150 struct xfs_buf
*lbp
; /* left buffer pointer */
2151 struct xfs_btree_block
*left
; /* left btree block */
2152 union xfs_btree_ptr rptr
; /* right sibling block ptr */
2153 struct xfs_buf
*rbp
; /* right buffer pointer */
2154 struct xfs_btree_block
*right
; /* right btree block */
2155 union xfs_btree_ptr rrptr
; /* right-right sibling ptr */
2156 struct xfs_buf
*rrbp
; /* right-right buffer pointer */
2157 struct xfs_btree_block
*rrblock
; /* right-right btree block */
2161 int error
; /* error return value */
2166 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
2167 XFS_BTREE_TRACE_ARGIPK(cur
, level
, *ptrp
, key
);
2169 XFS_BTREE_STATS_INC(cur
, split
);
2171 /* Set up left block (current one). */
2172 left
= xfs_btree_get_block(cur
, level
, &lbp
);
2175 error
= xfs_btree_check_block(cur
, left
, level
, lbp
);
2180 xfs_btree_buf_to_ptr(cur
, lbp
, &lptr
);
2182 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2183 error
= cur
->bc_ops
->alloc_block(cur
, &lptr
, &rptr
, 1, stat
);
2188 XFS_BTREE_STATS_INC(cur
, alloc
);
2190 /* Set up the new block as "right". */
2191 error
= xfs_btree_get_buf_block(cur
, &rptr
, 0, &right
, &rbp
);
2195 /* Fill in the btree header for the new right block. */
2196 xfs_btree_init_block(cur
, xfs_btree_get_level(left
), 0, right
);
2199 * Split the entries between the old and the new block evenly.
2200 * Make sure that if there's an odd number of entries now, that
2201 * each new block will have the same number of entries.
2203 lrecs
= xfs_btree_get_numrecs(left
);
2205 if ((lrecs
& 1) && cur
->bc_ptrs
[level
] <= rrecs
+ 1)
2207 src_index
= (lrecs
- rrecs
+ 1);
2209 XFS_BTREE_STATS_ADD(cur
, moves
, rrecs
);
2212 * Copy btree block entries from the left block over to the
2213 * new block, the right. Update the right block and log the
2217 /* It's a non-leaf. Move keys and pointers. */
2218 union xfs_btree_key
*lkp
; /* left btree key */
2219 union xfs_btree_ptr
*lpp
; /* left address pointer */
2220 union xfs_btree_key
*rkp
; /* right btree key */
2221 union xfs_btree_ptr
*rpp
; /* right address pointer */
2223 lkp
= xfs_btree_key_addr(cur
, src_index
, left
);
2224 lpp
= xfs_btree_ptr_addr(cur
, src_index
, left
);
2225 rkp
= xfs_btree_key_addr(cur
, 1, right
);
2226 rpp
= xfs_btree_ptr_addr(cur
, 1, right
);
2229 for (i
= src_index
; i
< rrecs
; i
++) {
2230 error
= xfs_btree_check_ptr(cur
, lpp
, i
, level
);
2236 xfs_btree_copy_keys(cur
, rkp
, lkp
, rrecs
);
2237 xfs_btree_copy_ptrs(cur
, rpp
, lpp
, rrecs
);
2239 xfs_btree_log_keys(cur
, rbp
, 1, rrecs
);
2240 xfs_btree_log_ptrs(cur
, rbp
, 1, rrecs
);
2242 /* Grab the keys to the entries moved to the right block */
2243 xfs_btree_copy_keys(cur
, key
, rkp
, 1);
2245 /* It's a leaf. Move records. */
2246 union xfs_btree_rec
*lrp
; /* left record pointer */
2247 union xfs_btree_rec
*rrp
; /* right record pointer */
2249 lrp
= xfs_btree_rec_addr(cur
, src_index
, left
);
2250 rrp
= xfs_btree_rec_addr(cur
, 1, right
);
2252 xfs_btree_copy_recs(cur
, rrp
, lrp
, rrecs
);
2253 xfs_btree_log_recs(cur
, rbp
, 1, rrecs
);
2255 cur
->bc_ops
->init_key_from_rec(key
,
2256 xfs_btree_rec_addr(cur
, 1, right
));
2261 * Find the left block number by looking in the buffer.
2262 * Adjust numrecs, sibling pointers.
2264 xfs_btree_get_sibling(cur
, left
, &rrptr
, XFS_BB_RIGHTSIB
);
2265 xfs_btree_set_sibling(cur
, right
, &rrptr
, XFS_BB_RIGHTSIB
);
2266 xfs_btree_set_sibling(cur
, right
, &lptr
, XFS_BB_LEFTSIB
);
2267 xfs_btree_set_sibling(cur
, left
, &rptr
, XFS_BB_RIGHTSIB
);
2270 xfs_btree_set_numrecs(left
, lrecs
);
2271 xfs_btree_set_numrecs(right
, xfs_btree_get_numrecs(right
) + rrecs
);
2273 xfs_btree_log_block(cur
, rbp
, XFS_BB_ALL_BITS
);
2274 xfs_btree_log_block(cur
, lbp
, XFS_BB_NUMRECS
| XFS_BB_RIGHTSIB
);
2277 * If there's a block to the new block's right, make that block
2278 * point back to right instead of to left.
2280 if (!xfs_btree_ptr_is_null(cur
, &rrptr
)) {
2281 error
= xfs_btree_read_buf_block(cur
, &rrptr
, level
,
2282 0, &rrblock
, &rrbp
);
2285 xfs_btree_set_sibling(cur
, rrblock
, &rptr
, XFS_BB_LEFTSIB
);
2286 xfs_btree_log_block(cur
, rrbp
, XFS_BB_LEFTSIB
);
2289 * If the cursor is really in the right block, move it there.
2290 * If it's just pointing past the last entry in left, then we'll
2291 * insert there, so don't change anything in that case.
2293 if (cur
->bc_ptrs
[level
] > lrecs
+ 1) {
2294 xfs_btree_setbuf(cur
, level
, rbp
);
2295 cur
->bc_ptrs
[level
] -= lrecs
;
2298 * If there are more levels, we'll need another cursor which refers
2299 * the right block, no matter where this cursor was.
2301 if (level
+ 1 < cur
->bc_nlevels
) {
2302 error
= xfs_btree_dup_cursor(cur
, curp
);
2305 (*curp
)->bc_ptrs
[level
+ 1]++;
2308 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2312 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2317 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
2322 * Copy the old inode root contents into a real block and make the
2323 * broot point to it.
2326 xfs_btree_new_iroot(
2327 struct xfs_btree_cur
*cur
, /* btree cursor */
2328 int *logflags
, /* logging flags for inode */
2329 int *stat
) /* return status - 0 fail */
2331 struct xfs_buf
*cbp
; /* buffer for cblock */
2332 struct xfs_btree_block
*block
; /* btree block */
2333 struct xfs_btree_block
*cblock
; /* child btree block */
2334 union xfs_btree_key
*ckp
; /* child key pointer */
2335 union xfs_btree_ptr
*cpp
; /* child ptr pointer */
2336 union xfs_btree_key
*kp
; /* pointer to btree key */
2337 union xfs_btree_ptr
*pp
; /* pointer to block addr */
2338 union xfs_btree_ptr nptr
; /* new block addr */
2339 int level
; /* btree level */
2340 int error
; /* error return code */
2342 int i
; /* loop counter */
2345 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
2346 XFS_BTREE_STATS_INC(cur
, newroot
);
2348 ASSERT(cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
);
2350 level
= cur
->bc_nlevels
- 1;
2352 block
= xfs_btree_get_iroot(cur
);
2353 pp
= xfs_btree_ptr_addr(cur
, 1, block
);
2355 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2356 error
= cur
->bc_ops
->alloc_block(cur
, pp
, &nptr
, 1, stat
);
2360 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2363 XFS_BTREE_STATS_INC(cur
, alloc
);
2365 /* Copy the root into a real block. */
2366 error
= xfs_btree_get_buf_block(cur
, &nptr
, 0, &cblock
, &cbp
);
2370 memcpy(cblock
, block
, xfs_btree_block_len(cur
));
2372 be16_add_cpu(&block
->bb_level
, 1);
2373 xfs_btree_set_numrecs(block
, 1);
2375 cur
->bc_ptrs
[level
+ 1] = 1;
2377 kp
= xfs_btree_key_addr(cur
, 1, block
);
2378 ckp
= xfs_btree_key_addr(cur
, 1, cblock
);
2379 xfs_btree_copy_keys(cur
, ckp
, kp
, xfs_btree_get_numrecs(cblock
));
2381 cpp
= xfs_btree_ptr_addr(cur
, 1, cblock
);
2383 for (i
= 0; i
< be16_to_cpu(cblock
->bb_numrecs
); i
++) {
2384 error
= xfs_btree_check_ptr(cur
, pp
, i
, level
);
2389 xfs_btree_copy_ptrs(cur
, cpp
, pp
, xfs_btree_get_numrecs(cblock
));
2392 error
= xfs_btree_check_ptr(cur
, &nptr
, 0, level
);
2396 xfs_btree_copy_ptrs(cur
, pp
, &nptr
, 1);
2398 xfs_iroot_realloc(cur
->bc_private
.b
.ip
,
2399 1 - xfs_btree_get_numrecs(cblock
),
2400 cur
->bc_private
.b
.whichfork
);
2402 xfs_btree_setbuf(cur
, level
, cbp
);
2405 * Do all this logging at the end so that
2406 * the root is at the right level.
2408 xfs_btree_log_block(cur
, cbp
, XFS_BB_ALL_BITS
);
2409 xfs_btree_log_keys(cur
, cbp
, 1, be16_to_cpu(cblock
->bb_numrecs
));
2410 xfs_btree_log_ptrs(cur
, cbp
, 1, be16_to_cpu(cblock
->bb_numrecs
));
2413 XFS_ILOG_CORE
| xfs_ilog_fbroot(cur
->bc_private
.b
.whichfork
);
2415 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2418 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
2423 * Allocate a new root block, fill it in.
2425 STATIC
int /* error */
2427 struct xfs_btree_cur
*cur
, /* btree cursor */
2428 int *stat
) /* success/failure */
2430 struct xfs_btree_block
*block
; /* one half of the old root block */
2431 struct xfs_buf
*bp
; /* buffer containing block */
2432 int error
; /* error return value */
2433 struct xfs_buf
*lbp
; /* left buffer pointer */
2434 struct xfs_btree_block
*left
; /* left btree block */
2435 struct xfs_buf
*nbp
; /* new (root) buffer */
2436 struct xfs_btree_block
*new; /* new (root) btree block */
2437 int nptr
; /* new value for key index, 1 or 2 */
2438 struct xfs_buf
*rbp
; /* right buffer pointer */
2439 struct xfs_btree_block
*right
; /* right btree block */
2440 union xfs_btree_ptr rptr
;
2441 union xfs_btree_ptr lptr
;
2443 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
2444 XFS_BTREE_STATS_INC(cur
, newroot
);
2446 /* initialise our start point from the cursor */
2447 cur
->bc_ops
->init_ptr_from_cur(cur
, &rptr
);
2449 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2450 error
= cur
->bc_ops
->alloc_block(cur
, &rptr
, &lptr
, 1, stat
);
2455 XFS_BTREE_STATS_INC(cur
, alloc
);
2457 /* Set up the new block. */
2458 error
= xfs_btree_get_buf_block(cur
, &lptr
, 0, &new, &nbp
);
2462 /* Set the root in the holding structure increasing the level by 1. */
2463 cur
->bc_ops
->set_root(cur
, &lptr
, 1);
2466 * At the previous root level there are now two blocks: the old root,
2467 * and the new block generated when it was split. We don't know which
2468 * one the cursor is pointing at, so we set up variables "left" and
2469 * "right" for each case.
2471 block
= xfs_btree_get_block(cur
, cur
->bc_nlevels
- 1, &bp
);
2474 error
= xfs_btree_check_block(cur
, block
, cur
->bc_nlevels
- 1, bp
);
2479 xfs_btree_get_sibling(cur
, block
, &rptr
, XFS_BB_RIGHTSIB
);
2480 if (!xfs_btree_ptr_is_null(cur
, &rptr
)) {
2481 /* Our block is left, pick up the right block. */
2483 xfs_btree_buf_to_ptr(cur
, lbp
, &lptr
);
2485 error
= xfs_btree_read_buf_block(cur
, &rptr
,
2486 cur
->bc_nlevels
- 1, 0, &right
, &rbp
);
2492 /* Our block is right, pick up the left block. */
2494 xfs_btree_buf_to_ptr(cur
, rbp
, &rptr
);
2496 xfs_btree_get_sibling(cur
, right
, &lptr
, XFS_BB_LEFTSIB
);
2497 error
= xfs_btree_read_buf_block(cur
, &lptr
,
2498 cur
->bc_nlevels
- 1, 0, &left
, &lbp
);
2504 /* Fill in the new block's btree header and log it. */
2505 xfs_btree_init_block(cur
, cur
->bc_nlevels
, 2, new);
2506 xfs_btree_log_block(cur
, nbp
, XFS_BB_ALL_BITS
);
2507 ASSERT(!xfs_btree_ptr_is_null(cur
, &lptr
) &&
2508 !xfs_btree_ptr_is_null(cur
, &rptr
));
2510 /* Fill in the key data in the new root. */
2511 if (xfs_btree_get_level(left
) > 0) {
2512 xfs_btree_copy_keys(cur
,
2513 xfs_btree_key_addr(cur
, 1, new),
2514 xfs_btree_key_addr(cur
, 1, left
), 1);
2515 xfs_btree_copy_keys(cur
,
2516 xfs_btree_key_addr(cur
, 2, new),
2517 xfs_btree_key_addr(cur
, 1, right
), 1);
2519 cur
->bc_ops
->init_key_from_rec(
2520 xfs_btree_key_addr(cur
, 1, new),
2521 xfs_btree_rec_addr(cur
, 1, left
));
2522 cur
->bc_ops
->init_key_from_rec(
2523 xfs_btree_key_addr(cur
, 2, new),
2524 xfs_btree_rec_addr(cur
, 1, right
));
2526 xfs_btree_log_keys(cur
, nbp
, 1, 2);
2528 /* Fill in the pointer data in the new root. */
2529 xfs_btree_copy_ptrs(cur
,
2530 xfs_btree_ptr_addr(cur
, 1, new), &lptr
, 1);
2531 xfs_btree_copy_ptrs(cur
,
2532 xfs_btree_ptr_addr(cur
, 2, new), &rptr
, 1);
2533 xfs_btree_log_ptrs(cur
, nbp
, 1, 2);
2535 /* Fix up the cursor. */
2536 xfs_btree_setbuf(cur
, cur
->bc_nlevels
, nbp
);
2537 cur
->bc_ptrs
[cur
->bc_nlevels
] = nptr
;
2539 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2543 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
2546 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2552 xfs_btree_make_block_unfull(
2553 struct xfs_btree_cur
*cur
, /* btree cursor */
2554 int level
, /* btree level */
2555 int numrecs
,/* # of recs in block */
2556 int *oindex
,/* old tree index */
2557 int *index
, /* new tree index */
2558 union xfs_btree_ptr
*nptr
, /* new btree ptr */
2559 struct xfs_btree_cur
**ncur
, /* new btree cursor */
2560 union xfs_btree_rec
*nrec
, /* new record */
2563 union xfs_btree_key key
; /* new btree key value */
2566 if ((cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) &&
2567 level
== cur
->bc_nlevels
- 1) {
2568 struct xfs_inode
*ip
= cur
->bc_private
.b
.ip
;
2570 if (numrecs
< cur
->bc_ops
->get_dmaxrecs(cur
, level
)) {
2571 /* A root block that can be made bigger. */
2573 xfs_iroot_realloc(ip
, 1, cur
->bc_private
.b
.whichfork
);
2575 /* A root block that needs replacing */
2578 error
= xfs_btree_new_iroot(cur
, &logflags
, stat
);
2579 if (error
|| *stat
== 0)
2582 xfs_trans_log_inode(cur
->bc_tp
, ip
, logflags
);
2588 /* First, try shifting an entry to the right neighbor. */
2589 error
= xfs_btree_rshift(cur
, level
, stat
);
2593 /* Next, try shifting an entry to the left neighbor. */
2594 error
= xfs_btree_lshift(cur
, level
, stat
);
2599 *oindex
= *index
= cur
->bc_ptrs
[level
];
2604 * Next, try splitting the current block in half.
2606 * If this works we have to re-set our variables because we
2607 * could be in a different block now.
2609 error
= xfs_btree_split(cur
, level
, nptr
, &key
, ncur
, stat
);
2610 if (error
|| *stat
== 0)
2614 *index
= cur
->bc_ptrs
[level
];
2615 cur
->bc_ops
->init_rec_from_key(&key
, nrec
);
2620 * Insert one record/level. Return information to the caller
2621 * allowing the next level up to proceed if necessary.
2625 struct xfs_btree_cur
*cur
, /* btree cursor */
2626 int level
, /* level to insert record at */
2627 union xfs_btree_ptr
*ptrp
, /* i/o: block number inserted */
2628 union xfs_btree_rec
*recp
, /* i/o: record data inserted */
2629 struct xfs_btree_cur
**curp
, /* output: new cursor replacing cur */
2630 int *stat
) /* success/failure */
2632 struct xfs_btree_block
*block
; /* btree block */
2633 struct xfs_buf
*bp
; /* buffer for block */
2634 union xfs_btree_key key
; /* btree key */
2635 union xfs_btree_ptr nptr
; /* new block ptr */
2636 struct xfs_btree_cur
*ncur
; /* new btree cursor */
2637 union xfs_btree_rec nrec
; /* new record count */
2638 int optr
; /* old key/record index */
2639 int ptr
; /* key/record index */
2640 int numrecs
;/* number of records */
2641 int error
; /* error return value */
2646 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
2647 XFS_BTREE_TRACE_ARGIPR(cur
, level
, *ptrp
, recp
);
2652 * If we have an external root pointer, and we've made it to the
2653 * root level, allocate a new root block and we're done.
2655 if (!(cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) &&
2656 (level
>= cur
->bc_nlevels
)) {
2657 error
= xfs_btree_new_root(cur
, stat
);
2658 xfs_btree_set_ptr_null(cur
, ptrp
);
2660 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2664 /* If we're off the left edge, return failure. */
2665 ptr
= cur
->bc_ptrs
[level
];
2667 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2672 /* Make a key out of the record data to be inserted, and save it. */
2673 cur
->bc_ops
->init_key_from_rec(&key
, recp
);
2677 XFS_BTREE_STATS_INC(cur
, insrec
);
2679 /* Get pointers to the btree buffer and block. */
2680 block
= xfs_btree_get_block(cur
, level
, &bp
);
2681 numrecs
= xfs_btree_get_numrecs(block
);
2684 error
= xfs_btree_check_block(cur
, block
, level
, bp
);
2688 /* Check that the new entry is being inserted in the right place. */
2689 if (ptr
<= numrecs
) {
2691 ASSERT(cur
->bc_ops
->recs_inorder(cur
, recp
,
2692 xfs_btree_rec_addr(cur
, ptr
, block
)));
2694 ASSERT(cur
->bc_ops
->keys_inorder(cur
, &key
,
2695 xfs_btree_key_addr(cur
, ptr
, block
)));
2701 * If the block is full, we can't insert the new entry until we
2702 * make the block un-full.
2704 xfs_btree_set_ptr_null(cur
, &nptr
);
2705 if (numrecs
== cur
->bc_ops
->get_maxrecs(cur
, level
)) {
2706 error
= xfs_btree_make_block_unfull(cur
, level
, numrecs
,
2707 &optr
, &ptr
, &nptr
, &ncur
, &nrec
, stat
);
2708 if (error
|| *stat
== 0)
2713 * The current block may have changed if the block was
2714 * previously full and we have just made space in it.
2716 block
= xfs_btree_get_block(cur
, level
, &bp
);
2717 numrecs
= xfs_btree_get_numrecs(block
);
2720 error
= xfs_btree_check_block(cur
, block
, level
, bp
);
2726 * At this point we know there's room for our new entry in the block
2727 * we're pointing at.
2729 XFS_BTREE_STATS_ADD(cur
, moves
, numrecs
- ptr
+ 1);
2732 /* It's a nonleaf. make a hole in the keys and ptrs */
2733 union xfs_btree_key
*kp
;
2734 union xfs_btree_ptr
*pp
;
2736 kp
= xfs_btree_key_addr(cur
, ptr
, block
);
2737 pp
= xfs_btree_ptr_addr(cur
, ptr
, block
);
2740 for (i
= numrecs
- ptr
; i
>= 0; i
--) {
2741 error
= xfs_btree_check_ptr(cur
, pp
, i
, level
);
2747 xfs_btree_shift_keys(cur
, kp
, 1, numrecs
- ptr
+ 1);
2748 xfs_btree_shift_ptrs(cur
, pp
, 1, numrecs
- ptr
+ 1);
2751 error
= xfs_btree_check_ptr(cur
, ptrp
, 0, level
);
2756 /* Now put the new data in, bump numrecs and log it. */
2757 xfs_btree_copy_keys(cur
, kp
, &key
, 1);
2758 xfs_btree_copy_ptrs(cur
, pp
, ptrp
, 1);
2760 xfs_btree_set_numrecs(block
, numrecs
);
2761 xfs_btree_log_ptrs(cur
, bp
, ptr
, numrecs
);
2762 xfs_btree_log_keys(cur
, bp
, ptr
, numrecs
);
2764 if (ptr
< numrecs
) {
2765 ASSERT(cur
->bc_ops
->keys_inorder(cur
, kp
,
2766 xfs_btree_key_addr(cur
, ptr
+ 1, block
)));
2770 /* It's a leaf. make a hole in the records */
2771 union xfs_btree_rec
*rp
;
2773 rp
= xfs_btree_rec_addr(cur
, ptr
, block
);
2775 xfs_btree_shift_recs(cur
, rp
, 1, numrecs
- ptr
+ 1);
2777 /* Now put the new data in, bump numrecs and log it. */
2778 xfs_btree_copy_recs(cur
, rp
, recp
, 1);
2779 xfs_btree_set_numrecs(block
, ++numrecs
);
2780 xfs_btree_log_recs(cur
, bp
, ptr
, numrecs
);
2782 if (ptr
< numrecs
) {
2783 ASSERT(cur
->bc_ops
->recs_inorder(cur
, rp
,
2784 xfs_btree_rec_addr(cur
, ptr
+ 1, block
)));
2789 /* Log the new number of records in the btree header. */
2790 xfs_btree_log_block(cur
, bp
, XFS_BB_NUMRECS
);
2792 /* If we inserted at the start of a block, update the parents' keys. */
2794 error
= xfs_btree_updkey(cur
, &key
, level
+ 1);
2800 * If we are tracking the last record in the tree and
2801 * we are at the far right edge of the tree, update it.
2803 if (xfs_btree_is_lastrec(cur
, block
, level
)) {
2804 cur
->bc_ops
->update_lastrec(cur
, block
, recp
,
2805 ptr
, LASTREC_INSREC
);
2809 * Return the new block number, if any.
2810 * If there is one, give back a record value and a cursor too.
2813 if (!xfs_btree_ptr_is_null(cur
, &nptr
)) {
2818 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2823 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
2828 * Insert the record at the point referenced by cur.
2830 * A multi-level split of the tree on insert will invalidate the original
2831 * cursor. All callers of this function should assume that the cursor is
2832 * no longer valid and revalidate it.
2836 struct xfs_btree_cur
*cur
,
2839 int error
; /* error return value */
2840 int i
; /* result value, 0 for failure */
2841 int level
; /* current level number in btree */
2842 union xfs_btree_ptr nptr
; /* new block number (split result) */
2843 struct xfs_btree_cur
*ncur
; /* new cursor (split result) */
2844 struct xfs_btree_cur
*pcur
; /* previous level's cursor */
2845 union xfs_btree_rec rec
; /* record to insert */
2851 xfs_btree_set_ptr_null(cur
, &nptr
);
2852 cur
->bc_ops
->init_rec_from_cur(cur
, &rec
);
2855 * Loop going up the tree, starting at the leaf level.
2856 * Stop when we don't get a split block, that must mean that
2857 * the insert is finished with this level.
2861 * Insert nrec/nptr into this level of the tree.
2862 * Note if we fail, nptr will be null.
2864 error
= xfs_btree_insrec(pcur
, level
, &nptr
, &rec
, &ncur
, &i
);
2867 xfs_btree_del_cursor(pcur
, XFS_BTREE_ERROR
);
2871 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
2875 * See if the cursor we just used is trash.
2876 * Can't trash the caller's cursor, but otherwise we should
2877 * if ncur is a new cursor or we're about to be done.
2880 (ncur
|| xfs_btree_ptr_is_null(cur
, &nptr
))) {
2881 /* Save the state from the cursor before we trash it */
2882 if (cur
->bc_ops
->update_cursor
)
2883 cur
->bc_ops
->update_cursor(pcur
, cur
);
2884 cur
->bc_nlevels
= pcur
->bc_nlevels
;
2885 xfs_btree_del_cursor(pcur
, XFS_BTREE_NOERROR
);
2887 /* If we got a new cursor, switch to it. */
2892 } while (!xfs_btree_ptr_is_null(cur
, &nptr
));
2894 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
2898 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
2903 * Try to merge a non-leaf block back into the inode root.
2905 * Note: the killroot names comes from the fact that we're effectively
2906 * killing the old root block. But because we can't just delete the
2907 * inode we have to copy the single block it was pointing to into the
2911 xfs_btree_kill_iroot(
2912 struct xfs_btree_cur
*cur
)
2914 int whichfork
= cur
->bc_private
.b
.whichfork
;
2915 struct xfs_inode
*ip
= cur
->bc_private
.b
.ip
;
2916 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
2917 struct xfs_btree_block
*block
;
2918 struct xfs_btree_block
*cblock
;
2919 union xfs_btree_key
*kp
;
2920 union xfs_btree_key
*ckp
;
2921 union xfs_btree_ptr
*pp
;
2922 union xfs_btree_ptr
*cpp
;
2923 struct xfs_buf
*cbp
;
2928 union xfs_btree_ptr ptr
;
2932 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
2934 ASSERT(cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
);
2935 ASSERT(cur
->bc_nlevels
> 1);
2938 * Don't deal with the root block needs to be a leaf case.
2939 * We're just going to turn the thing back into extents anyway.
2941 level
= cur
->bc_nlevels
- 1;
2946 * Give up if the root has multiple children.
2948 block
= xfs_btree_get_iroot(cur
);
2949 if (xfs_btree_get_numrecs(block
) != 1)
2952 cblock
= xfs_btree_get_block(cur
, level
- 1, &cbp
);
2953 numrecs
= xfs_btree_get_numrecs(cblock
);
2956 * Only do this if the next level will fit.
2957 * Then the data must be copied up to the inode,
2958 * instead of freeing the root you free the next level.
2960 if (numrecs
> cur
->bc_ops
->get_dmaxrecs(cur
, level
))
2963 XFS_BTREE_STATS_INC(cur
, killroot
);
2966 xfs_btree_get_sibling(cur
, block
, &ptr
, XFS_BB_LEFTSIB
);
2967 ASSERT(xfs_btree_ptr_is_null(cur
, &ptr
));
2968 xfs_btree_get_sibling(cur
, block
, &ptr
, XFS_BB_RIGHTSIB
);
2969 ASSERT(xfs_btree_ptr_is_null(cur
, &ptr
));
2972 index
= numrecs
- cur
->bc_ops
->get_maxrecs(cur
, level
);
2974 xfs_iroot_realloc(cur
->bc_private
.b
.ip
, index
,
2975 cur
->bc_private
.b
.whichfork
);
2976 block
= ifp
->if_broot
;
2979 be16_add_cpu(&block
->bb_numrecs
, index
);
2980 ASSERT(block
->bb_numrecs
== cblock
->bb_numrecs
);
2982 kp
= xfs_btree_key_addr(cur
, 1, block
);
2983 ckp
= xfs_btree_key_addr(cur
, 1, cblock
);
2984 xfs_btree_copy_keys(cur
, kp
, ckp
, numrecs
);
2986 pp
= xfs_btree_ptr_addr(cur
, 1, block
);
2987 cpp
= xfs_btree_ptr_addr(cur
, 1, cblock
);
2989 for (i
= 0; i
< numrecs
; i
++) {
2992 error
= xfs_btree_check_ptr(cur
, cpp
, i
, level
- 1);
2994 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
2999 xfs_btree_copy_ptrs(cur
, pp
, cpp
, numrecs
);
3001 cur
->bc_ops
->free_block(cur
, cbp
);
3002 XFS_BTREE_STATS_INC(cur
, free
);
3004 cur
->bc_bufs
[level
- 1] = NULL
;
3005 be16_add_cpu(&block
->bb_level
, -1);
3006 xfs_trans_log_inode(cur
->bc_tp
, ip
,
3007 XFS_ILOG_CORE
| xfs_ilog_fbroot(cur
->bc_private
.b
.whichfork
));
3010 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
3015 xfs_btree_dec_cursor(
3016 struct xfs_btree_cur
*cur
,
3024 error
= xfs_btree_decrement(cur
, level
, &i
);
3029 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
3035 * Single level of the btree record deletion routine.
3036 * Delete record pointed to by cur/level.
3037 * Remove the record from its block then rebalance the tree.
3038 * Return 0 for error, 1 for done, 2 to go on to the next level.
3040 STATIC
int /* error */
3042 struct xfs_btree_cur
*cur
, /* btree cursor */
3043 int level
, /* level removing record from */
3044 int *stat
) /* fail/done/go-on */
3046 struct xfs_btree_block
*block
; /* btree block */
3047 union xfs_btree_ptr cptr
; /* current block ptr */
3048 struct xfs_buf
*bp
; /* buffer for block */
3049 int error
; /* error return value */
3050 int i
; /* loop counter */
3051 union xfs_btree_key key
; /* storage for keyp */
3052 union xfs_btree_key
*keyp
= &key
; /* passed to the next level */
3053 union xfs_btree_ptr lptr
; /* left sibling block ptr */
3054 struct xfs_buf
*lbp
; /* left buffer pointer */
3055 struct xfs_btree_block
*left
; /* left btree block */
3056 int lrecs
= 0; /* left record count */
3057 int ptr
; /* key/record index */
3058 union xfs_btree_ptr rptr
; /* right sibling block ptr */
3059 struct xfs_buf
*rbp
; /* right buffer pointer */
3060 struct xfs_btree_block
*right
; /* right btree block */
3061 struct xfs_btree_block
*rrblock
; /* right-right btree block */
3062 struct xfs_buf
*rrbp
; /* right-right buffer pointer */
3063 int rrecs
= 0; /* right record count */
3064 struct xfs_btree_cur
*tcur
; /* temporary btree cursor */
3065 int numrecs
; /* temporary numrec count */
3067 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
3068 XFS_BTREE_TRACE_ARGI(cur
, level
);
3072 /* Get the index of the entry being deleted, check for nothing there. */
3073 ptr
= cur
->bc_ptrs
[level
];
3075 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
3080 /* Get the buffer & block containing the record or key/ptr. */
3081 block
= xfs_btree_get_block(cur
, level
, &bp
);
3082 numrecs
= xfs_btree_get_numrecs(block
);
3085 error
= xfs_btree_check_block(cur
, block
, level
, bp
);
3090 /* Fail if we're off the end of the block. */
3091 if (ptr
> numrecs
) {
3092 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
3097 XFS_BTREE_STATS_INC(cur
, delrec
);
3098 XFS_BTREE_STATS_ADD(cur
, moves
, numrecs
- ptr
);
3100 /* Excise the entries being deleted. */
3102 /* It's a nonleaf. operate on keys and ptrs */
3103 union xfs_btree_key
*lkp
;
3104 union xfs_btree_ptr
*lpp
;
3106 lkp
= xfs_btree_key_addr(cur
, ptr
+ 1, block
);
3107 lpp
= xfs_btree_ptr_addr(cur
, ptr
+ 1, block
);
3110 for (i
= 0; i
< numrecs
- ptr
; i
++) {
3111 error
= xfs_btree_check_ptr(cur
, lpp
, i
, level
);
3117 if (ptr
< numrecs
) {
3118 xfs_btree_shift_keys(cur
, lkp
, -1, numrecs
- ptr
);
3119 xfs_btree_shift_ptrs(cur
, lpp
, -1, numrecs
- ptr
);
3120 xfs_btree_log_keys(cur
, bp
, ptr
, numrecs
- 1);
3121 xfs_btree_log_ptrs(cur
, bp
, ptr
, numrecs
- 1);
3125 * If it's the first record in the block, we'll need to pass a
3126 * key up to the next level (updkey).
3129 keyp
= xfs_btree_key_addr(cur
, 1, block
);
3131 /* It's a leaf. operate on records */
3132 if (ptr
< numrecs
) {
3133 xfs_btree_shift_recs(cur
,
3134 xfs_btree_rec_addr(cur
, ptr
+ 1, block
),
3136 xfs_btree_log_recs(cur
, bp
, ptr
, numrecs
- 1);
3140 * If it's the first record in the block, we'll need a key
3141 * structure to pass up to the next level (updkey).
3144 cur
->bc_ops
->init_key_from_rec(&key
,
3145 xfs_btree_rec_addr(cur
, 1, block
));
3151 * Decrement and log the number of entries in the block.
3153 xfs_btree_set_numrecs(block
, --numrecs
);
3154 xfs_btree_log_block(cur
, bp
, XFS_BB_NUMRECS
);
3157 * If we are tracking the last record in the tree and
3158 * we are at the far right edge of the tree, update it.
3160 if (xfs_btree_is_lastrec(cur
, block
, level
)) {
3161 cur
->bc_ops
->update_lastrec(cur
, block
, NULL
,
3162 ptr
, LASTREC_DELREC
);
3166 * We're at the root level. First, shrink the root block in-memory.
3167 * Try to get rid of the next level down. If we can't then there's
3168 * nothing left to do.
3170 if (level
== cur
->bc_nlevels
- 1) {
3171 if (cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) {
3172 xfs_iroot_realloc(cur
->bc_private
.b
.ip
, -1,
3173 cur
->bc_private
.b
.whichfork
);
3175 error
= xfs_btree_kill_iroot(cur
);
3179 error
= xfs_btree_dec_cursor(cur
, level
, stat
);
3187 * If this is the root level, and there's only one entry left,
3188 * and it's NOT the leaf level, then we can get rid of this
3191 if (numrecs
== 1 && level
> 0) {
3192 union xfs_btree_ptr
*pp
;
3194 * pp is still set to the first pointer in the block.
3195 * Make it the new root of the btree.
3197 pp
= xfs_btree_ptr_addr(cur
, 1, block
);
3198 error
= cur
->bc_ops
->kill_root(cur
, bp
, level
, pp
);
3201 } else if (level
> 0) {
3202 error
= xfs_btree_dec_cursor(cur
, level
, stat
);
3211 * If we deleted the leftmost entry in the block, update the
3212 * key values above us in the tree.
3215 error
= xfs_btree_updkey(cur
, keyp
, level
+ 1);
3221 * If the number of records remaining in the block is at least
3222 * the minimum, we're done.
3224 if (numrecs
>= cur
->bc_ops
->get_minrecs(cur
, level
)) {
3225 error
= xfs_btree_dec_cursor(cur
, level
, stat
);
3232 * Otherwise, we have to move some records around to keep the
3233 * tree balanced. Look at the left and right sibling blocks to
3234 * see if we can re-balance by moving only one record.
3236 xfs_btree_get_sibling(cur
, block
, &rptr
, XFS_BB_RIGHTSIB
);
3237 xfs_btree_get_sibling(cur
, block
, &lptr
, XFS_BB_LEFTSIB
);
3239 if (cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) {
3241 * One child of root, need to get a chance to copy its contents
3242 * into the root and delete it. Can't go up to next level,
3243 * there's nothing to delete there.
3245 if (xfs_btree_ptr_is_null(cur
, &rptr
) &&
3246 xfs_btree_ptr_is_null(cur
, &lptr
) &&
3247 level
== cur
->bc_nlevels
- 2) {
3248 error
= xfs_btree_kill_iroot(cur
);
3250 error
= xfs_btree_dec_cursor(cur
, level
, stat
);
3257 ASSERT(!xfs_btree_ptr_is_null(cur
, &rptr
) ||
3258 !xfs_btree_ptr_is_null(cur
, &lptr
));
3261 * Duplicate the cursor so our btree manipulations here won't
3262 * disrupt the next level up.
3264 error
= xfs_btree_dup_cursor(cur
, &tcur
);
3269 * If there's a right sibling, see if it's ok to shift an entry
3272 if (!xfs_btree_ptr_is_null(cur
, &rptr
)) {
3274 * Move the temp cursor to the last entry in the next block.
3275 * Actually any entry but the first would suffice.
3277 i
= xfs_btree_lastrec(tcur
, level
);
3278 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
3280 error
= xfs_btree_increment(tcur
, level
, &i
);
3283 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
3285 i
= xfs_btree_lastrec(tcur
, level
);
3286 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
3288 /* Grab a pointer to the block. */
3289 right
= xfs_btree_get_block(tcur
, level
, &rbp
);
3291 error
= xfs_btree_check_block(tcur
, right
, level
, rbp
);
3295 /* Grab the current block number, for future use. */
3296 xfs_btree_get_sibling(tcur
, right
, &cptr
, XFS_BB_LEFTSIB
);
3299 * If right block is full enough so that removing one entry
3300 * won't make it too empty, and left-shifting an entry out
3301 * of right to us works, we're done.
3303 if (xfs_btree_get_numrecs(right
) - 1 >=
3304 cur
->bc_ops
->get_minrecs(tcur
, level
)) {
3305 error
= xfs_btree_lshift(tcur
, level
, &i
);
3309 ASSERT(xfs_btree_get_numrecs(block
) >=
3310 cur
->bc_ops
->get_minrecs(tcur
, level
));
3312 xfs_btree_del_cursor(tcur
, XFS_BTREE_NOERROR
);
3315 error
= xfs_btree_dec_cursor(cur
, level
, stat
);
3323 * Otherwise, grab the number of records in right for
3324 * future reference, and fix up the temp cursor to point
3325 * to our block again (last record).
3327 rrecs
= xfs_btree_get_numrecs(right
);
3328 if (!xfs_btree_ptr_is_null(cur
, &lptr
)) {
3329 i
= xfs_btree_firstrec(tcur
, level
);
3330 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
3332 error
= xfs_btree_decrement(tcur
, level
, &i
);
3335 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
3340 * If there's a left sibling, see if it's ok to shift an entry
3343 if (!xfs_btree_ptr_is_null(cur
, &lptr
)) {
3345 * Move the temp cursor to the first entry in the
3348 i
= xfs_btree_firstrec(tcur
, level
);
3349 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
3351 error
= xfs_btree_decrement(tcur
, level
, &i
);
3354 i
= xfs_btree_firstrec(tcur
, level
);
3355 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
3357 /* Grab a pointer to the block. */
3358 left
= xfs_btree_get_block(tcur
, level
, &lbp
);
3360 error
= xfs_btree_check_block(cur
, left
, level
, lbp
);
3364 /* Grab the current block number, for future use. */
3365 xfs_btree_get_sibling(tcur
, left
, &cptr
, XFS_BB_RIGHTSIB
);
3368 * If left block is full enough so that removing one entry
3369 * won't make it too empty, and right-shifting an entry out
3370 * of left to us works, we're done.
3372 if (xfs_btree_get_numrecs(left
) - 1 >=
3373 cur
->bc_ops
->get_minrecs(tcur
, level
)) {
3374 error
= xfs_btree_rshift(tcur
, level
, &i
);
3378 ASSERT(xfs_btree_get_numrecs(block
) >=
3379 cur
->bc_ops
->get_minrecs(tcur
, level
));
3380 xfs_btree_del_cursor(tcur
, XFS_BTREE_NOERROR
);
3384 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
3391 * Otherwise, grab the number of records in right for
3394 lrecs
= xfs_btree_get_numrecs(left
);
3397 /* Delete the temp cursor, we're done with it. */
3398 xfs_btree_del_cursor(tcur
, XFS_BTREE_NOERROR
);
3401 /* If here, we need to do a join to keep the tree balanced. */
3402 ASSERT(!xfs_btree_ptr_is_null(cur
, &cptr
));
3404 if (!xfs_btree_ptr_is_null(cur
, &lptr
) &&
3405 lrecs
+ xfs_btree_get_numrecs(block
) <=
3406 cur
->bc_ops
->get_maxrecs(cur
, level
)) {
3408 * Set "right" to be the starting block,
3409 * "left" to be the left neighbor.
3414 error
= xfs_btree_read_buf_block(cur
, &lptr
, level
,
3420 * If that won't work, see if we can join with the right neighbor block.
3422 } else if (!xfs_btree_ptr_is_null(cur
, &rptr
) &&
3423 rrecs
+ xfs_btree_get_numrecs(block
) <=
3424 cur
->bc_ops
->get_maxrecs(cur
, level
)) {
3426 * Set "left" to be the starting block,
3427 * "right" to be the right neighbor.
3432 error
= xfs_btree_read_buf_block(cur
, &rptr
, level
,
3438 * Otherwise, we can't fix the imbalance.
3439 * Just return. This is probably a logic error, but it's not fatal.
3442 error
= xfs_btree_dec_cursor(cur
, level
, stat
);
3448 rrecs
= xfs_btree_get_numrecs(right
);
3449 lrecs
= xfs_btree_get_numrecs(left
);
3452 * We're now going to join "left" and "right" by moving all the stuff
3453 * in "right" to "left" and deleting "right".
3455 XFS_BTREE_STATS_ADD(cur
, moves
, rrecs
);
3457 /* It's a non-leaf. Move keys and pointers. */
3458 union xfs_btree_key
*lkp
; /* left btree key */
3459 union xfs_btree_ptr
*lpp
; /* left address pointer */
3460 union xfs_btree_key
*rkp
; /* right btree key */
3461 union xfs_btree_ptr
*rpp
; /* right address pointer */
3463 lkp
= xfs_btree_key_addr(cur
, lrecs
+ 1, left
);
3464 lpp
= xfs_btree_ptr_addr(cur
, lrecs
+ 1, left
);
3465 rkp
= xfs_btree_key_addr(cur
, 1, right
);
3466 rpp
= xfs_btree_ptr_addr(cur
, 1, right
);
3468 for (i
= 1; i
< rrecs
; i
++) {
3469 error
= xfs_btree_check_ptr(cur
, rpp
, i
, level
);
3474 xfs_btree_copy_keys(cur
, lkp
, rkp
, rrecs
);
3475 xfs_btree_copy_ptrs(cur
, lpp
, rpp
, rrecs
);
3477 xfs_btree_log_keys(cur
, lbp
, lrecs
+ 1, lrecs
+ rrecs
);
3478 xfs_btree_log_ptrs(cur
, lbp
, lrecs
+ 1, lrecs
+ rrecs
);
3480 /* It's a leaf. Move records. */
3481 union xfs_btree_rec
*lrp
; /* left record pointer */
3482 union xfs_btree_rec
*rrp
; /* right record pointer */
3484 lrp
= xfs_btree_rec_addr(cur
, lrecs
+ 1, left
);
3485 rrp
= xfs_btree_rec_addr(cur
, 1, right
);
3487 xfs_btree_copy_recs(cur
, lrp
, rrp
, rrecs
);
3488 xfs_btree_log_recs(cur
, lbp
, lrecs
+ 1, lrecs
+ rrecs
);
3491 XFS_BTREE_STATS_INC(cur
, join
);
3494 * Fix up the number of records and right block pointer in the
3495 * surviving block, and log it.
3497 xfs_btree_set_numrecs(left
, lrecs
+ rrecs
);
3498 xfs_btree_get_sibling(cur
, right
, &cptr
, XFS_BB_RIGHTSIB
),
3499 xfs_btree_set_sibling(cur
, left
, &cptr
, XFS_BB_RIGHTSIB
);
3500 xfs_btree_log_block(cur
, lbp
, XFS_BB_NUMRECS
| XFS_BB_RIGHTSIB
);
3502 /* If there is a right sibling, point it to the remaining block. */
3503 xfs_btree_get_sibling(cur
, left
, &cptr
, XFS_BB_RIGHTSIB
);
3504 if (!xfs_btree_ptr_is_null(cur
, &cptr
)) {
3505 error
= xfs_btree_read_buf_block(cur
, &cptr
, level
,
3506 0, &rrblock
, &rrbp
);
3509 xfs_btree_set_sibling(cur
, rrblock
, &lptr
, XFS_BB_LEFTSIB
);
3510 xfs_btree_log_block(cur
, rrbp
, XFS_BB_LEFTSIB
);
3513 /* Free the deleted block. */
3514 error
= cur
->bc_ops
->free_block(cur
, rbp
);
3517 XFS_BTREE_STATS_INC(cur
, free
);
3520 * If we joined with the left neighbor, set the buffer in the
3521 * cursor to the left block, and fix up the index.
3524 cur
->bc_bufs
[level
] = lbp
;
3525 cur
->bc_ptrs
[level
] += lrecs
;
3526 cur
->bc_ra
[level
] = 0;
3529 * If we joined with the right neighbor and there's a level above
3530 * us, increment the cursor at that level.
3532 else if ((cur
->bc_flags
& XFS_BTREE_ROOT_IN_INODE
) ||
3533 (level
+ 1 < cur
->bc_nlevels
)) {
3534 error
= xfs_btree_increment(cur
, level
+ 1, &i
);
3540 * Readjust the ptr at this level if it's not a leaf, since it's
3541 * still pointing at the deletion point, which makes the cursor
3542 * inconsistent. If this makes the ptr 0, the caller fixes it up.
3543 * We can't use decrement because it would change the next level up.
3546 cur
->bc_ptrs
[level
]--;
3548 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
3549 /* Return value means the next level up has something to do. */
3554 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
3556 xfs_btree_del_cursor(tcur
, XFS_BTREE_ERROR
);
3561 * Delete the record pointed to by cur.
3562 * The cursor refers to the place where the record was (could be inserted)
3563 * when the operation returns.
3567 struct xfs_btree_cur
*cur
,
3568 int *stat
) /* success/failure */
3570 int error
; /* error return value */
3574 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
3577 * Go up the tree, starting at leaf level.
3579 * If 2 is returned then a join was done; go to the next level.
3580 * Otherwise we are done.
3582 for (level
= 0, i
= 2; i
== 2; level
++) {
3583 error
= xfs_btree_delrec(cur
, level
, &i
);
3589 for (level
= 1; level
< cur
->bc_nlevels
; level
++) {
3590 if (cur
->bc_ptrs
[level
] == 0) {
3591 error
= xfs_btree_decrement(cur
, level
, &i
);
3599 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
3603 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
3608 * Get the data from the pointed-to record.
3612 struct xfs_btree_cur
*cur
, /* btree cursor */
3613 union xfs_btree_rec
**recp
, /* output: btree record */
3614 int *stat
) /* output: success/failure */
3616 struct xfs_btree_block
*block
; /* btree block */
3617 struct xfs_buf
*bp
; /* buffer pointer */
3618 int ptr
; /* record number */
3620 int error
; /* error return value */
3623 ptr
= cur
->bc_ptrs
[0];
3624 block
= xfs_btree_get_block(cur
, 0, &bp
);
3627 error
= xfs_btree_check_block(cur
, block
, 0, bp
);
3633 * Off the right end or left end, return failure.
3635 if (ptr
> xfs_btree_get_numrecs(block
) || ptr
<= 0) {
3641 * Point to the record and extract its data.
3643 *recp
= xfs_btree_rec_addr(cur
, ptr
, block
);