2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "xfs_types.h"
24 #include "xfs_trans.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_btree_trace.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_alloc.h"
41 #include "xfs_error.h"
43 STATIC
void xfs_inobt_log_block(xfs_trans_t
*, xfs_buf_t
*, int);
44 STATIC
void xfs_inobt_log_keys(xfs_btree_cur_t
*, xfs_buf_t
*, int, int);
45 STATIC
void xfs_inobt_log_ptrs(xfs_btree_cur_t
*, xfs_buf_t
*, int, int);
46 STATIC
void xfs_inobt_log_recs(xfs_btree_cur_t
*, xfs_buf_t
*, int, int);
47 STATIC
int xfs_inobt_newroot(xfs_btree_cur_t
*, int *);
50 * Single level of the xfs_inobt_delete record deletion routine.
51 * Delete record pointed to by cur/level.
52 * Remove the record from its block then rebalance the tree.
53 * Return 0 for error, 1 for done, 2 to go on to the next level.
55 STATIC
int /* error */
57 xfs_btree_cur_t
*cur
, /* btree cursor */
58 int level
, /* level removing record from */
59 int *stat
) /* fail/done/go-on */
61 xfs_buf_t
*agbp
; /* buffer for a.g. inode header */
62 xfs_mount_t
*mp
; /* mount structure */
63 xfs_agi_t
*agi
; /* allocation group inode header */
64 xfs_inobt_block_t
*block
; /* btree block record/key lives in */
65 xfs_agblock_t bno
; /* btree block number */
66 xfs_buf_t
*bp
; /* buffer for block */
67 int error
; /* error return value */
68 int i
; /* loop index */
69 xfs_inobt_key_t key
; /* kp points here if block is level 0 */
70 xfs_inobt_key_t
*kp
= NULL
; /* pointer to btree keys */
71 xfs_agblock_t lbno
; /* left block's block number */
72 xfs_buf_t
*lbp
; /* left block's buffer pointer */
73 xfs_inobt_block_t
*left
; /* left btree block */
74 xfs_inobt_key_t
*lkp
; /* left block key pointer */
75 xfs_inobt_ptr_t
*lpp
; /* left block address pointer */
76 int lrecs
= 0; /* number of records in left block */
77 xfs_inobt_rec_t
*lrp
; /* left block record pointer */
78 xfs_inobt_ptr_t
*pp
= NULL
; /* pointer to btree addresses */
79 int ptr
; /* index in btree block for this rec */
80 xfs_agblock_t rbno
; /* right block's block number */
81 xfs_buf_t
*rbp
; /* right block's buffer pointer */
82 xfs_inobt_block_t
*right
; /* right btree block */
83 xfs_inobt_key_t
*rkp
; /* right block key pointer */
84 xfs_inobt_rec_t
*rp
; /* pointer to btree records */
85 xfs_inobt_ptr_t
*rpp
; /* right block address pointer */
86 int rrecs
= 0; /* number of records in right block */
88 xfs_inobt_rec_t
*rrp
; /* right block record pointer */
89 xfs_btree_cur_t
*tcur
; /* temporary btree cursor */
94 * Get the index of the entry being deleted, check for nothing there.
96 ptr
= cur
->bc_ptrs
[level
];
103 * Get the buffer & block containing the record or key/ptr.
105 bp
= cur
->bc_bufs
[level
];
106 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
108 if ((error
= xfs_btree_check_sblock(cur
, block
, level
, bp
)))
112 * Fail if we're off the end of the block.
115 numrecs
= be16_to_cpu(block
->bb_numrecs
);
121 * It's a nonleaf. Excise the key and ptr being deleted, by
122 * sliding the entries past them down one.
123 * Log the changed areas of the block.
126 kp
= XFS_INOBT_KEY_ADDR(block
, 1, cur
);
127 pp
= XFS_INOBT_PTR_ADDR(block
, 1, cur
);
129 for (i
= ptr
; i
< numrecs
; i
++) {
130 if ((error
= xfs_btree_check_sptr(cur
, be32_to_cpu(pp
[i
]), level
)))
135 memmove(&kp
[ptr
- 1], &kp
[ptr
],
136 (numrecs
- ptr
) * sizeof(*kp
));
137 memmove(&pp
[ptr
- 1], &pp
[ptr
],
138 (numrecs
- ptr
) * sizeof(*kp
));
139 xfs_inobt_log_keys(cur
, bp
, ptr
, numrecs
- 1);
140 xfs_inobt_log_ptrs(cur
, bp
, ptr
, numrecs
- 1);
144 * It's a leaf. Excise the record being deleted, by sliding the
145 * entries past it down one. Log the changed areas of the block.
148 rp
= XFS_INOBT_REC_ADDR(block
, 1, cur
);
150 memmove(&rp
[ptr
- 1], &rp
[ptr
],
151 (numrecs
- ptr
) * sizeof(*rp
));
152 xfs_inobt_log_recs(cur
, bp
, ptr
, numrecs
- 1);
155 * If it's the first record in the block, we'll need a key
156 * structure to pass up to the next level (updkey).
159 key
.ir_startino
= rp
->ir_startino
;
164 * Decrement and log the number of entries in the block.
167 block
->bb_numrecs
= cpu_to_be16(numrecs
);
168 xfs_inobt_log_block(cur
->bc_tp
, bp
, XFS_BB_NUMRECS
);
170 * Is this the root level? If so, we're almost done.
172 if (level
== cur
->bc_nlevels
- 1) {
174 * If this is the root level,
175 * and there's only one entry left,
176 * and it's NOT the leaf level,
177 * then we can get rid of this level.
179 if (numrecs
== 1 && level
> 0) {
180 agbp
= cur
->bc_private
.a
.agbp
;
181 agi
= XFS_BUF_TO_AGI(agbp
);
183 * pp is still set to the first pointer in the block.
184 * Make it the new root of the btree.
186 bno
= be32_to_cpu(agi
->agi_root
);
188 be32_add_cpu(&agi
->agi_level
, -1);
192 if ((error
= xfs_free_extent(cur
->bc_tp
,
193 XFS_AGB_TO_FSB(mp
, cur
->bc_private
.a
.agno
, bno
), 1)))
195 xfs_trans_binval(cur
->bc_tp
, bp
);
196 xfs_ialloc_log_agi(cur
->bc_tp
, agbp
,
197 XFS_AGI_ROOT
| XFS_AGI_LEVEL
);
199 * Update the cursor so there's one fewer level.
201 cur
->bc_bufs
[level
] = NULL
;
203 } else if (level
> 0 &&
204 (error
= xfs_btree_decrement(cur
, level
, &i
)))
210 * If we deleted the leftmost entry in the block, update the
211 * key values above us in the tree.
213 if (ptr
== 1 && (error
= xfs_btree_updkey(cur
, (union xfs_btree_key
*)kp
, level
+ 1)))
216 * If the number of records remaining in the block is at least
217 * the minimum, we're done.
219 if (numrecs
>= XFS_INOBT_BLOCK_MINRECS(level
, cur
)) {
221 (error
= xfs_btree_decrement(cur
, level
, &i
)))
227 * Otherwise, we have to move some records around to keep the
228 * tree balanced. Look at the left and right sibling blocks to
229 * see if we can re-balance by moving only one record.
231 rbno
= be32_to_cpu(block
->bb_rightsib
);
232 lbno
= be32_to_cpu(block
->bb_leftsib
);
234 ASSERT(rbno
!= NULLAGBLOCK
|| lbno
!= NULLAGBLOCK
);
236 * Duplicate the cursor so our btree manipulations here won't
237 * disrupt the next level up.
239 if ((error
= xfs_btree_dup_cursor(cur
, &tcur
)))
242 * If there's a right sibling, see if it's ok to shift an entry
245 if (rbno
!= NULLAGBLOCK
) {
247 * Move the temp cursor to the last entry in the next block.
248 * Actually any entry but the first would suffice.
250 i
= xfs_btree_lastrec(tcur
, level
);
251 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
252 if ((error
= xfs_btree_increment(tcur
, level
, &i
)))
254 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
255 i
= xfs_btree_lastrec(tcur
, level
);
256 XFS_WANT_CORRUPTED_GOTO(i
== 1, error0
);
258 * Grab a pointer to the block.
260 rbp
= tcur
->bc_bufs
[level
];
261 right
= XFS_BUF_TO_INOBT_BLOCK(rbp
);
263 if ((error
= xfs_btree_check_sblock(cur
, right
, level
, rbp
)))
267 * Grab the current block number, for future use.
269 bno
= be32_to_cpu(right
->bb_leftsib
);
271 * If right block is full enough so that removing one entry
272 * won't make it too empty, and left-shifting an entry out
273 * of right to us works, we're done.
275 if (be16_to_cpu(right
->bb_numrecs
) - 1 >=
276 XFS_INOBT_BLOCK_MINRECS(level
, cur
)) {
277 if ((error
= xfs_btree_lshift(tcur
, level
, &i
)))
280 ASSERT(be16_to_cpu(block
->bb_numrecs
) >=
281 XFS_INOBT_BLOCK_MINRECS(level
, cur
));
282 xfs_btree_del_cursor(tcur
,
285 (error
= xfs_btree_decrement(cur
, level
,
293 * Otherwise, grab the number of records in right for
294 * future reference, and fix up the temp cursor to point
295 * to our block again (last record).
297 rrecs
= be16_to_cpu(right
->bb_numrecs
);
298 if (lbno
!= NULLAGBLOCK
) {
299 xfs_btree_firstrec(tcur
, level
);
300 if ((error
= xfs_btree_decrement(tcur
, level
, &i
)))
305 * If there's a left sibling, see if it's ok to shift an entry
308 if (lbno
!= NULLAGBLOCK
) {
310 * Move the temp cursor to the first entry in the
313 xfs_btree_firstrec(tcur
, level
);
314 if ((error
= xfs_btree_decrement(tcur
, level
, &i
)))
316 xfs_btree_firstrec(tcur
, level
);
318 * Grab a pointer to the block.
320 lbp
= tcur
->bc_bufs
[level
];
321 left
= XFS_BUF_TO_INOBT_BLOCK(lbp
);
323 if ((error
= xfs_btree_check_sblock(cur
, left
, level
, lbp
)))
327 * Grab the current block number, for future use.
329 bno
= be32_to_cpu(left
->bb_rightsib
);
331 * If left block is full enough so that removing one entry
332 * won't make it too empty, and right-shifting an entry out
333 * of left to us works, we're done.
335 if (be16_to_cpu(left
->bb_numrecs
) - 1 >=
336 XFS_INOBT_BLOCK_MINRECS(level
, cur
)) {
337 if ((error
= xfs_btree_rshift(tcur
, level
, &i
)))
340 ASSERT(be16_to_cpu(block
->bb_numrecs
) >=
341 XFS_INOBT_BLOCK_MINRECS(level
, cur
));
342 xfs_btree_del_cursor(tcur
,
351 * Otherwise, grab the number of records in right for
354 lrecs
= be16_to_cpu(left
->bb_numrecs
);
357 * Delete the temp cursor, we're done with it.
359 xfs_btree_del_cursor(tcur
, XFS_BTREE_NOERROR
);
361 * If here, we need to do a join to keep the tree balanced.
363 ASSERT(bno
!= NULLAGBLOCK
);
365 * See if we can join with the left neighbor block.
367 if (lbno
!= NULLAGBLOCK
&&
368 lrecs
+ numrecs
<= XFS_INOBT_BLOCK_MAXRECS(level
, cur
)) {
370 * Set "right" to be the starting block,
371 * "left" to be the left neighbor.
375 rrecs
= be16_to_cpu(right
->bb_numrecs
);
377 if ((error
= xfs_btree_read_bufs(mp
, cur
->bc_tp
,
378 cur
->bc_private
.a
.agno
, lbno
, 0, &lbp
,
381 left
= XFS_BUF_TO_INOBT_BLOCK(lbp
);
382 lrecs
= be16_to_cpu(left
->bb_numrecs
);
383 if ((error
= xfs_btree_check_sblock(cur
, left
, level
, lbp
)))
387 * If that won't work, see if we can join with the right neighbor block.
389 else if (rbno
!= NULLAGBLOCK
&&
390 rrecs
+ numrecs
<= XFS_INOBT_BLOCK_MAXRECS(level
, cur
)) {
392 * Set "left" to be the starting block,
393 * "right" to be the right neighbor.
397 lrecs
= be16_to_cpu(left
->bb_numrecs
);
399 if ((error
= xfs_btree_read_bufs(mp
, cur
->bc_tp
,
400 cur
->bc_private
.a
.agno
, rbno
, 0, &rbp
,
403 right
= XFS_BUF_TO_INOBT_BLOCK(rbp
);
404 rrecs
= be16_to_cpu(right
->bb_numrecs
);
405 if ((error
= xfs_btree_check_sblock(cur
, right
, level
, rbp
)))
409 * Otherwise, we can't fix the imbalance.
410 * Just return. This is probably a logic error, but it's not fatal.
413 if (level
> 0 && (error
= xfs_btree_decrement(cur
, level
, &i
)))
419 * We're now going to join "left" and "right" by moving all the stuff
420 * in "right" to "left" and deleting "right".
424 * It's a non-leaf. Move keys and pointers.
426 lkp
= XFS_INOBT_KEY_ADDR(left
, lrecs
+ 1, cur
);
427 lpp
= XFS_INOBT_PTR_ADDR(left
, lrecs
+ 1, cur
);
428 rkp
= XFS_INOBT_KEY_ADDR(right
, 1, cur
);
429 rpp
= XFS_INOBT_PTR_ADDR(right
, 1, cur
);
431 for (i
= 0; i
< rrecs
; i
++) {
432 if ((error
= xfs_btree_check_sptr(cur
, be32_to_cpu(rpp
[i
]), level
)))
436 memcpy(lkp
, rkp
, rrecs
* sizeof(*lkp
));
437 memcpy(lpp
, rpp
, rrecs
* sizeof(*lpp
));
438 xfs_inobt_log_keys(cur
, lbp
, lrecs
+ 1, lrecs
+ rrecs
);
439 xfs_inobt_log_ptrs(cur
, lbp
, lrecs
+ 1, lrecs
+ rrecs
);
442 * It's a leaf. Move records.
444 lrp
= XFS_INOBT_REC_ADDR(left
, lrecs
+ 1, cur
);
445 rrp
= XFS_INOBT_REC_ADDR(right
, 1, cur
);
446 memcpy(lrp
, rrp
, rrecs
* sizeof(*lrp
));
447 xfs_inobt_log_recs(cur
, lbp
, lrecs
+ 1, lrecs
+ rrecs
);
450 * If we joined with the left neighbor, set the buffer in the
451 * cursor to the left block, and fix up the index.
454 xfs_btree_setbuf(cur
, level
, lbp
);
455 cur
->bc_ptrs
[level
] += lrecs
;
458 * If we joined with the right neighbor and there's a level above
459 * us, increment the cursor at that level.
461 else if (level
+ 1 < cur
->bc_nlevels
&&
462 (error
= xfs_btree_increment(cur
, level
+ 1, &i
)))
465 * Fix up the number of records in the surviving block.
468 left
->bb_numrecs
= cpu_to_be16(lrecs
);
470 * Fix up the right block pointer in the surviving block, and log it.
472 left
->bb_rightsib
= right
->bb_rightsib
;
473 xfs_inobt_log_block(cur
->bc_tp
, lbp
, XFS_BB_NUMRECS
| XFS_BB_RIGHTSIB
);
475 * If there is a right sibling now, make it point to the
478 if (be32_to_cpu(left
->bb_rightsib
) != NULLAGBLOCK
) {
479 xfs_inobt_block_t
*rrblock
;
482 if ((error
= xfs_btree_read_bufs(mp
, cur
->bc_tp
,
483 cur
->bc_private
.a
.agno
, be32_to_cpu(left
->bb_rightsib
), 0,
484 &rrbp
, XFS_INO_BTREE_REF
)))
486 rrblock
= XFS_BUF_TO_INOBT_BLOCK(rrbp
);
487 if ((error
= xfs_btree_check_sblock(cur
, rrblock
, level
, rrbp
)))
489 rrblock
->bb_leftsib
= cpu_to_be32(lbno
);
490 xfs_inobt_log_block(cur
->bc_tp
, rrbp
, XFS_BB_LEFTSIB
);
493 * Free the deleting block.
495 if ((error
= xfs_free_extent(cur
->bc_tp
, XFS_AGB_TO_FSB(mp
,
496 cur
->bc_private
.a
.agno
, rbno
), 1)))
498 xfs_trans_binval(cur
->bc_tp
, rbp
);
500 * Readjust the ptr at this level if it's not a leaf, since it's
501 * still pointing at the deletion point, which makes the cursor
502 * inconsistent. If this makes the ptr 0, the caller fixes it up.
503 * We can't use decrement because it would change the next level up.
506 cur
->bc_ptrs
[level
]--;
508 * Return value means the next level up has something to do.
514 xfs_btree_del_cursor(tcur
, XFS_BTREE_ERROR
);
519 * Insert one record/level. Return information to the caller
520 * allowing the next level up to proceed if necessary.
522 STATIC
int /* error */
524 xfs_btree_cur_t
*cur
, /* btree cursor */
525 int level
, /* level to insert record at */
526 xfs_agblock_t
*bnop
, /* i/o: block number inserted */
527 xfs_inobt_rec_t
*recp
, /* i/o: record data inserted */
528 xfs_btree_cur_t
**curp
, /* output: new cursor replacing cur */
529 int *stat
) /* success/failure */
531 xfs_inobt_block_t
*block
; /* btree block record/key lives in */
532 xfs_buf_t
*bp
; /* buffer for block */
533 int error
; /* error return value */
534 int i
; /* loop index */
535 xfs_inobt_key_t key
; /* key value being inserted */
536 xfs_inobt_key_t
*kp
=NULL
; /* pointer to btree keys */
537 xfs_agblock_t nbno
; /* block number of allocated block */
538 xfs_btree_cur_t
*ncur
; /* new cursor to be used at next lvl */
539 xfs_inobt_key_t nkey
; /* new key value, from split */
540 xfs_inobt_rec_t nrec
; /* new record value, for caller */
542 int optr
; /* old ptr value */
543 xfs_inobt_ptr_t
*pp
; /* pointer to btree addresses */
544 int ptr
; /* index in btree block for this rec */
545 xfs_inobt_rec_t
*rp
=NULL
; /* pointer to btree records */
548 * GCC doesn't understand the (arguably complex) control flow in
549 * this function and complains about uninitialized structure fields
552 memset(&nrec
, 0, sizeof(nrec
));
555 * If we made it to the root level, allocate a new root block
558 if (level
>= cur
->bc_nlevels
) {
559 error
= xfs_inobt_newroot(cur
, &i
);
565 * Make a key out of the record data to be inserted, and save it.
567 key
.ir_startino
= recp
->ir_startino
;
568 optr
= ptr
= cur
->bc_ptrs
[level
];
570 * If we're off the left edge, return failure.
577 * Get pointers to the btree buffer and block.
579 bp
= cur
->bc_bufs
[level
];
580 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
581 numrecs
= be16_to_cpu(block
->bb_numrecs
);
583 if ((error
= xfs_btree_check_sblock(cur
, block
, level
, bp
)))
586 * Check that the new entry is being inserted in the right place.
588 if (ptr
<= numrecs
) {
590 rp
= XFS_INOBT_REC_ADDR(block
, ptr
, cur
);
591 xfs_btree_check_rec(cur
->bc_btnum
, recp
, rp
);
593 kp
= XFS_INOBT_KEY_ADDR(block
, ptr
, cur
);
594 xfs_btree_check_key(cur
->bc_btnum
, &key
, kp
);
601 * If the block is full, we can't insert the new entry until we
602 * make the block un-full.
604 if (numrecs
== XFS_INOBT_BLOCK_MAXRECS(level
, cur
)) {
606 * First, try shifting an entry to the right neighbor.
608 if ((error
= xfs_btree_rshift(cur
, level
, &i
)))
614 * Next, try shifting an entry to the left neighbor.
617 if ((error
= xfs_btree_lshift(cur
, level
, &i
)))
620 optr
= ptr
= cur
->bc_ptrs
[level
];
622 union xfs_btree_ptr bno
= { .s
= cpu_to_be32(nbno
) };
624 * Next, try splitting the current block
625 * in half. If this works we have to
626 * re-set our variables because
627 * we could be in a different block now.
629 if ((error
= xfs_btree_split(cur
, level
, &bno
,
630 (union xfs_btree_key
*)&nkey
,
633 nbno
= be32_to_cpu(bno
.s
);
635 bp
= cur
->bc_bufs
[level
];
636 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
638 if ((error
= xfs_btree_check_sblock(cur
,
642 ptr
= cur
->bc_ptrs
[level
];
643 nrec
.ir_startino
= nkey
.ir_startino
;
646 * Otherwise the insert fails.
655 * At this point we know there's room for our new entry in the block
658 numrecs
= be16_to_cpu(block
->bb_numrecs
);
661 * It's a non-leaf entry. Make a hole for the new data
662 * in the key and ptr regions of the block.
664 kp
= XFS_INOBT_KEY_ADDR(block
, 1, cur
);
665 pp
= XFS_INOBT_PTR_ADDR(block
, 1, cur
);
667 for (i
= numrecs
; i
>= ptr
; i
--) {
668 if ((error
= xfs_btree_check_sptr(cur
, be32_to_cpu(pp
[i
- 1]), level
)))
672 memmove(&kp
[ptr
], &kp
[ptr
- 1],
673 (numrecs
- ptr
+ 1) * sizeof(*kp
));
674 memmove(&pp
[ptr
], &pp
[ptr
- 1],
675 (numrecs
- ptr
+ 1) * sizeof(*pp
));
677 * Now stuff the new data in, bump numrecs and log the new data.
680 if ((error
= xfs_btree_check_sptr(cur
, *bnop
, level
)))
684 pp
[ptr
- 1] = cpu_to_be32(*bnop
);
686 block
->bb_numrecs
= cpu_to_be16(numrecs
);
687 xfs_inobt_log_keys(cur
, bp
, ptr
, numrecs
);
688 xfs_inobt_log_ptrs(cur
, bp
, ptr
, numrecs
);
691 * It's a leaf entry. Make a hole for the new record.
693 rp
= XFS_INOBT_REC_ADDR(block
, 1, cur
);
694 memmove(&rp
[ptr
], &rp
[ptr
- 1],
695 (numrecs
- ptr
+ 1) * sizeof(*rp
));
697 * Now stuff the new record in, bump numrecs
698 * and log the new data.
702 block
->bb_numrecs
= cpu_to_be16(numrecs
);
703 xfs_inobt_log_recs(cur
, bp
, ptr
, numrecs
);
706 * Log the new number of records in the btree header.
708 xfs_inobt_log_block(cur
->bc_tp
, bp
, XFS_BB_NUMRECS
);
711 * Check that the key/record is in the right place, now.
715 xfs_btree_check_rec(cur
->bc_btnum
, rp
+ ptr
- 1,
718 xfs_btree_check_key(cur
->bc_btnum
, kp
+ ptr
- 1,
723 * If we inserted at the start of a block, update the parents' keys.
725 if (optr
== 1 && (error
= xfs_btree_updkey(cur
, (union xfs_btree_key
*)&key
, level
+ 1)))
728 * Return the new block number, if any.
729 * If there is one, give back a record value and a cursor too.
732 if (nbno
!= NULLAGBLOCK
) {
741 * Log header fields from a btree block.
745 xfs_trans_t
*tp
, /* transaction pointer */
746 xfs_buf_t
*bp
, /* buffer containing btree block */
747 int fields
) /* mask of fields: XFS_BB_... */
749 int first
; /* first byte offset logged */
750 int last
; /* last byte offset logged */
751 static const short offsets
[] = { /* table of offsets */
752 offsetof(xfs_inobt_block_t
, bb_magic
),
753 offsetof(xfs_inobt_block_t
, bb_level
),
754 offsetof(xfs_inobt_block_t
, bb_numrecs
),
755 offsetof(xfs_inobt_block_t
, bb_leftsib
),
756 offsetof(xfs_inobt_block_t
, bb_rightsib
),
757 sizeof(xfs_inobt_block_t
)
760 xfs_btree_offsets(fields
, offsets
, XFS_BB_NUM_BITS
, &first
, &last
);
761 xfs_trans_log_buf(tp
, bp
, first
, last
);
765 * Log keys from a btree block (nonleaf).
769 xfs_btree_cur_t
*cur
, /* btree cursor */
770 xfs_buf_t
*bp
, /* buffer containing btree block */
771 int kfirst
, /* index of first key to log */
772 int klast
) /* index of last key to log */
774 xfs_inobt_block_t
*block
; /* btree block to log from */
775 int first
; /* first byte offset logged */
776 xfs_inobt_key_t
*kp
; /* key pointer in btree block */
777 int last
; /* last byte offset logged */
779 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
780 kp
= XFS_INOBT_KEY_ADDR(block
, 1, cur
);
781 first
= (int)((xfs_caddr_t
)&kp
[kfirst
- 1] - (xfs_caddr_t
)block
);
782 last
= (int)(((xfs_caddr_t
)&kp
[klast
] - 1) - (xfs_caddr_t
)block
);
783 xfs_trans_log_buf(cur
->bc_tp
, bp
, first
, last
);
787 * Log block pointer fields from a btree block (nonleaf).
791 xfs_btree_cur_t
*cur
, /* btree cursor */
792 xfs_buf_t
*bp
, /* buffer containing btree block */
793 int pfirst
, /* index of first pointer to log */
794 int plast
) /* index of last pointer to log */
796 xfs_inobt_block_t
*block
; /* btree block to log from */
797 int first
; /* first byte offset logged */
798 int last
; /* last byte offset logged */
799 xfs_inobt_ptr_t
*pp
; /* block-pointer pointer in btree blk */
801 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
802 pp
= XFS_INOBT_PTR_ADDR(block
, 1, cur
);
803 first
= (int)((xfs_caddr_t
)&pp
[pfirst
- 1] - (xfs_caddr_t
)block
);
804 last
= (int)(((xfs_caddr_t
)&pp
[plast
] - 1) - (xfs_caddr_t
)block
);
805 xfs_trans_log_buf(cur
->bc_tp
, bp
, first
, last
);
809 * Log records from a btree block (leaf).
813 xfs_btree_cur_t
*cur
, /* btree cursor */
814 xfs_buf_t
*bp
, /* buffer containing btree block */
815 int rfirst
, /* index of first record to log */
816 int rlast
) /* index of last record to log */
818 xfs_inobt_block_t
*block
; /* btree block to log from */
819 int first
; /* first byte offset logged */
820 int last
; /* last byte offset logged */
821 xfs_inobt_rec_t
*rp
; /* record pointer for btree block */
823 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
824 rp
= XFS_INOBT_REC_ADDR(block
, 1, cur
);
825 first
= (int)((xfs_caddr_t
)&rp
[rfirst
- 1] - (xfs_caddr_t
)block
);
826 last
= (int)(((xfs_caddr_t
)&rp
[rlast
] - 1) - (xfs_caddr_t
)block
);
827 xfs_trans_log_buf(cur
->bc_tp
, bp
, first
, last
);
831 * Allocate a new root block, fill it in.
833 STATIC
int /* error */
835 xfs_btree_cur_t
*cur
, /* btree cursor */
836 int *stat
) /* success/failure */
838 xfs_agi_t
*agi
; /* a.g. inode header */
839 xfs_alloc_arg_t args
; /* allocation argument structure */
840 xfs_inobt_block_t
*block
; /* one half of the old root block */
841 xfs_buf_t
*bp
; /* buffer containing block */
842 int error
; /* error return value */
843 xfs_inobt_key_t
*kp
; /* btree key pointer */
844 xfs_agblock_t lbno
; /* left block number */
845 xfs_buf_t
*lbp
; /* left buffer pointer */
846 xfs_inobt_block_t
*left
; /* left btree block */
847 xfs_buf_t
*nbp
; /* new (root) buffer */
848 xfs_inobt_block_t
*new; /* new (root) btree block */
849 int nptr
; /* new value for key index, 1 or 2 */
850 xfs_inobt_ptr_t
*pp
; /* btree address pointer */
851 xfs_agblock_t rbno
; /* right block number */
852 xfs_buf_t
*rbp
; /* right buffer pointer */
853 xfs_inobt_block_t
*right
; /* right btree block */
854 xfs_inobt_rec_t
*rp
; /* btree record pointer */
856 ASSERT(cur
->bc_nlevels
< XFS_IN_MAXLEVELS(cur
->bc_mp
));
859 * Get a block & a buffer.
861 agi
= XFS_BUF_TO_AGI(cur
->bc_private
.a
.agbp
);
862 args
.tp
= cur
->bc_tp
;
863 args
.mp
= cur
->bc_mp
;
864 args
.fsbno
= XFS_AGB_TO_FSB(args
.mp
, cur
->bc_private
.a
.agno
,
865 be32_to_cpu(agi
->agi_root
));
866 args
.mod
= args
.minleft
= args
.alignment
= args
.total
= args
.wasdel
=
867 args
.isfl
= args
.userdata
= args
.minalignslop
= 0;
868 args
.minlen
= args
.maxlen
= args
.prod
= 1;
869 args
.type
= XFS_ALLOCTYPE_NEAR_BNO
;
870 if ((error
= xfs_alloc_vextent(&args
)))
873 * None available, we fail.
875 if (args
.fsbno
== NULLFSBLOCK
) {
879 ASSERT(args
.len
== 1);
880 nbp
= xfs_btree_get_bufs(args
.mp
, args
.tp
, args
.agno
, args
.agbno
, 0);
881 new = XFS_BUF_TO_INOBT_BLOCK(nbp
);
883 * Set the root data in the a.g. inode structure.
885 agi
->agi_root
= cpu_to_be32(args
.agbno
);
886 be32_add_cpu(&agi
->agi_level
, 1);
887 xfs_ialloc_log_agi(args
.tp
, cur
->bc_private
.a
.agbp
,
888 XFS_AGI_ROOT
| XFS_AGI_LEVEL
);
890 * At the previous root level there are now two blocks: the old
891 * root, and the new block generated when it was split.
892 * We don't know which one the cursor is pointing at, so we
893 * set up variables "left" and "right" for each case.
895 bp
= cur
->bc_bufs
[cur
->bc_nlevels
- 1];
896 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
898 if ((error
= xfs_btree_check_sblock(cur
, block
, cur
->bc_nlevels
- 1, bp
)))
901 if (be32_to_cpu(block
->bb_rightsib
) != NULLAGBLOCK
) {
903 * Our block is left, pick up the right block.
906 lbno
= XFS_DADDR_TO_AGBNO(args
.mp
, XFS_BUF_ADDR(lbp
));
908 rbno
= be32_to_cpu(left
->bb_rightsib
);
909 if ((error
= xfs_btree_read_bufs(args
.mp
, args
.tp
, args
.agno
,
910 rbno
, 0, &rbp
, XFS_INO_BTREE_REF
)))
913 right
= XFS_BUF_TO_INOBT_BLOCK(rbp
);
914 if ((error
= xfs_btree_check_sblock(cur
, right
,
915 cur
->bc_nlevels
- 1, rbp
)))
920 * Our block is right, pick up the left block.
923 rbno
= XFS_DADDR_TO_AGBNO(args
.mp
, XFS_BUF_ADDR(rbp
));
925 lbno
= be32_to_cpu(right
->bb_leftsib
);
926 if ((error
= xfs_btree_read_bufs(args
.mp
, args
.tp
, args
.agno
,
927 lbno
, 0, &lbp
, XFS_INO_BTREE_REF
)))
930 left
= XFS_BUF_TO_INOBT_BLOCK(lbp
);
931 if ((error
= xfs_btree_check_sblock(cur
, left
,
932 cur
->bc_nlevels
- 1, lbp
)))
937 * Fill in the new block's btree header and log it.
939 new->bb_magic
= cpu_to_be32(xfs_magics
[cur
->bc_btnum
]);
940 new->bb_level
= cpu_to_be16(cur
->bc_nlevels
);
941 new->bb_numrecs
= cpu_to_be16(2);
942 new->bb_leftsib
= cpu_to_be32(NULLAGBLOCK
);
943 new->bb_rightsib
= cpu_to_be32(NULLAGBLOCK
);
944 xfs_inobt_log_block(args
.tp
, nbp
, XFS_BB_ALL_BITS
);
945 ASSERT(lbno
!= NULLAGBLOCK
&& rbno
!= NULLAGBLOCK
);
947 * Fill in the key data in the new root.
949 kp
= XFS_INOBT_KEY_ADDR(new, 1, cur
);
950 if (be16_to_cpu(left
->bb_level
) > 0) {
951 kp
[0] = *XFS_INOBT_KEY_ADDR(left
, 1, cur
);
952 kp
[1] = *XFS_INOBT_KEY_ADDR(right
, 1, cur
);
954 rp
= XFS_INOBT_REC_ADDR(left
, 1, cur
);
955 kp
[0].ir_startino
= rp
->ir_startino
;
956 rp
= XFS_INOBT_REC_ADDR(right
, 1, cur
);
957 kp
[1].ir_startino
= rp
->ir_startino
;
959 xfs_inobt_log_keys(cur
, nbp
, 1, 2);
961 * Fill in the pointer data in the new root.
963 pp
= XFS_INOBT_PTR_ADDR(new, 1, cur
);
964 pp
[0] = cpu_to_be32(lbno
);
965 pp
[1] = cpu_to_be32(rbno
);
966 xfs_inobt_log_ptrs(cur
, nbp
, 1, 2);
970 xfs_btree_setbuf(cur
, cur
->bc_nlevels
, nbp
);
971 cur
->bc_ptrs
[cur
->bc_nlevels
] = nptr
;
978 * Externally visible routines.
982 * Delete the record pointed to by cur.
983 * The cursor refers to the place where the record was (could be inserted)
984 * when the operation returns.
988 xfs_btree_cur_t
*cur
, /* btree cursor */
989 int *stat
) /* success/failure */
992 int i
; /* result code */
993 int level
; /* btree level */
996 * Go up the tree, starting at leaf level.
997 * If 2 is returned then a join was done; go to the next level.
998 * Otherwise we are done.
1000 for (level
= 0, i
= 2; i
== 2; level
++) {
1001 if ((error
= xfs_inobt_delrec(cur
, level
, &i
)))
1005 for (level
= 1; level
< cur
->bc_nlevels
; level
++) {
1006 if (cur
->bc_ptrs
[level
] == 0) {
1007 if ((error
= xfs_btree_decrement(cur
, level
, &i
)))
1019 * Get the data from the pointed-to record.
1023 xfs_btree_cur_t
*cur
, /* btree cursor */
1024 xfs_agino_t
*ino
, /* output: starting inode of chunk */
1025 __int32_t
*fcnt
, /* output: number of free inodes */
1026 xfs_inofree_t
*free
, /* output: free inode mask */
1027 int *stat
) /* output: success/failure */
1029 xfs_inobt_block_t
*block
; /* btree block */
1030 xfs_buf_t
*bp
; /* buffer containing btree block */
1032 int error
; /* error return value */
1034 int ptr
; /* record number */
1035 xfs_inobt_rec_t
*rec
; /* record data */
1037 bp
= cur
->bc_bufs
[0];
1038 ptr
= cur
->bc_ptrs
[0];
1039 block
= XFS_BUF_TO_INOBT_BLOCK(bp
);
1041 if ((error
= xfs_btree_check_sblock(cur
, block
, 0, bp
)))
1045 * Off the right end or left end, return failure.
1047 if (ptr
> be16_to_cpu(block
->bb_numrecs
) || ptr
<= 0) {
1052 * Point to the record and extract its data.
1054 rec
= XFS_INOBT_REC_ADDR(block
, ptr
, cur
);
1055 *ino
= be32_to_cpu(rec
->ir_startino
);
1056 *fcnt
= be32_to_cpu(rec
->ir_freecount
);
1057 *free
= be64_to_cpu(rec
->ir_free
);
1063 * Insert the current record at the point referenced by cur.
1064 * The cursor may be inconsistent on return if splits have been done.
1068 xfs_btree_cur_t
*cur
, /* btree cursor */
1069 int *stat
) /* success/failure */
1071 int error
; /* error return value */
1072 int i
; /* result value, 0 for failure */
1073 int level
; /* current level number in btree */
1074 xfs_agblock_t nbno
; /* new block number (split result) */
1075 xfs_btree_cur_t
*ncur
; /* new cursor (split result) */
1076 xfs_inobt_rec_t nrec
; /* record being inserted this level */
1077 xfs_btree_cur_t
*pcur
; /* previous level's cursor */
1081 nrec
.ir_startino
= cpu_to_be32(cur
->bc_rec
.i
.ir_startino
);
1082 nrec
.ir_freecount
= cpu_to_be32(cur
->bc_rec
.i
.ir_freecount
);
1083 nrec
.ir_free
= cpu_to_be64(cur
->bc_rec
.i
.ir_free
);
1087 * Loop going up the tree, starting at the leaf level.
1088 * Stop when we don't get a split block, that must mean that
1089 * the insert is finished with this level.
1093 * Insert nrec/nbno into this level of the tree.
1094 * Note if we fail, nbno will be null.
1096 if ((error
= xfs_inobt_insrec(pcur
, level
++, &nbno
, &nrec
, &ncur
,
1099 xfs_btree_del_cursor(pcur
, XFS_BTREE_ERROR
);
1103 * See if the cursor we just used is trash.
1104 * Can't trash the caller's cursor, but otherwise we should
1105 * if ncur is a new cursor or we're about to be done.
1107 if (pcur
!= cur
&& (ncur
|| nbno
== NULLAGBLOCK
)) {
1108 cur
->bc_nlevels
= pcur
->bc_nlevels
;
1109 xfs_btree_del_cursor(pcur
, XFS_BTREE_NOERROR
);
1112 * If we got a new cursor, switch to it.
1118 } while (nbno
!= NULLAGBLOCK
);
1123 STATIC
struct xfs_btree_cur
*
1124 xfs_inobt_dup_cursor(
1125 struct xfs_btree_cur
*cur
)
1127 return xfs_inobt_init_cursor(cur
->bc_mp
, cur
->bc_tp
,
1128 cur
->bc_private
.a
.agbp
, cur
->bc_private
.a
.agno
);
1132 xfs_inobt_alloc_block(
1133 struct xfs_btree_cur
*cur
,
1134 union xfs_btree_ptr
*start
,
1135 union xfs_btree_ptr
*new,
1139 xfs_alloc_arg_t args
; /* block allocation args */
1140 int error
; /* error return value */
1141 xfs_agblock_t sbno
= be32_to_cpu(start
->s
);
1143 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ENTRY
);
1145 memset(&args
, 0, sizeof(args
));
1146 args
.tp
= cur
->bc_tp
;
1147 args
.mp
= cur
->bc_mp
;
1148 args
.fsbno
= XFS_AGB_TO_FSB(args
.mp
, cur
->bc_private
.a
.agno
, sbno
);
1152 args
.type
= XFS_ALLOCTYPE_NEAR_BNO
;
1154 error
= xfs_alloc_vextent(&args
);
1156 XFS_BTREE_TRACE_CURSOR(cur
, XBT_ERROR
);
1159 if (args
.fsbno
== NULLFSBLOCK
) {
1160 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1164 ASSERT(args
.len
== 1);
1165 XFS_BTREE_TRACE_CURSOR(cur
, XBT_EXIT
);
1167 new->s
= cpu_to_be32(XFS_FSB_TO_AGBNO(args
.mp
, args
.fsbno
));
1174 xfs_inobt_get_maxrecs(
1175 struct xfs_btree_cur
*cur
,
1178 return cur
->bc_mp
->m_inobt_mxr
[level
!= 0];
1182 xfs_inobt_init_key_from_rec(
1183 union xfs_btree_key
*key
,
1184 union xfs_btree_rec
*rec
)
1186 key
->inobt
.ir_startino
= rec
->inobt
.ir_startino
;
1190 * intial value of ptr for lookup
1193 xfs_inobt_init_ptr_from_cur(
1194 struct xfs_btree_cur
*cur
,
1195 union xfs_btree_ptr
*ptr
)
1197 struct xfs_agi
*agi
= XFS_BUF_TO_AGI(cur
->bc_private
.a
.agbp
);
1199 ASSERT(cur
->bc_private
.a
.agno
== be32_to_cpu(agi
->agi_seqno
));
1201 ptr
->s
= agi
->agi_root
;
1206 struct xfs_btree_cur
*cur
,
1207 union xfs_btree_key
*key
)
1209 return (__int64_t
)be32_to_cpu(key
->inobt
.ir_startino
) -
1210 cur
->bc_rec
.i
.ir_startino
;
1213 #ifdef XFS_BTREE_TRACE
1214 ktrace_t
*xfs_inobt_trace_buf
;
1217 xfs_inobt_trace_enter(
1218 struct xfs_btree_cur
*cur
,
1235 ktrace_enter(xfs_inobt_trace_buf
, (void *)(__psint_t
)type
,
1236 (void *)func
, (void *)s
, NULL
, (void *)cur
,
1237 (void *)a0
, (void *)a1
, (void *)a2
, (void *)a3
,
1238 (void *)a4
, (void *)a5
, (void *)a6
, (void *)a7
,
1239 (void *)a8
, (void *)a9
, (void *)a10
);
1243 xfs_inobt_trace_cursor(
1244 struct xfs_btree_cur
*cur
,
1249 *s0
= cur
->bc_private
.a
.agno
;
1250 *l0
= cur
->bc_rec
.i
.ir_startino
;
1251 *l1
= cur
->bc_rec
.i
.ir_free
;
1255 xfs_inobt_trace_key(
1256 struct xfs_btree_cur
*cur
,
1257 union xfs_btree_key
*key
,
1261 *l0
= be32_to_cpu(key
->inobt
.ir_startino
);
1266 xfs_inobt_trace_record(
1267 struct xfs_btree_cur
*cur
,
1268 union xfs_btree_rec
*rec
,
1273 *l0
= be32_to_cpu(rec
->inobt
.ir_startino
);
1274 *l1
= be32_to_cpu(rec
->inobt
.ir_freecount
);
1275 *l2
= be64_to_cpu(rec
->inobt
.ir_free
);
1277 #endif /* XFS_BTREE_TRACE */
1279 static const struct xfs_btree_ops xfs_inobt_ops
= {
1280 .rec_len
= sizeof(xfs_inobt_rec_t
),
1281 .key_len
= sizeof(xfs_inobt_key_t
),
1283 .dup_cursor
= xfs_inobt_dup_cursor
,
1284 .alloc_block
= xfs_inobt_alloc_block
,
1285 .get_maxrecs
= xfs_inobt_get_maxrecs
,
1286 .init_key_from_rec
= xfs_inobt_init_key_from_rec
,
1287 .init_ptr_from_cur
= xfs_inobt_init_ptr_from_cur
,
1288 .key_diff
= xfs_inobt_key_diff
,
1290 #ifdef XFS_BTREE_TRACE
1291 .trace_enter
= xfs_inobt_trace_enter
,
1292 .trace_cursor
= xfs_inobt_trace_cursor
,
1293 .trace_key
= xfs_inobt_trace_key
,
1294 .trace_record
= xfs_inobt_trace_record
,
1299 * Allocate a new inode btree cursor.
1301 struct xfs_btree_cur
* /* new inode btree cursor */
1302 xfs_inobt_init_cursor(
1303 struct xfs_mount
*mp
, /* file system mount point */
1304 struct xfs_trans
*tp
, /* transaction pointer */
1305 struct xfs_buf
*agbp
, /* buffer for agi structure */
1306 xfs_agnumber_t agno
) /* allocation group number */
1308 struct xfs_agi
*agi
= XFS_BUF_TO_AGI(agbp
);
1309 struct xfs_btree_cur
*cur
;
1311 cur
= kmem_zone_zalloc(xfs_btree_cur_zone
, KM_SLEEP
);
1315 cur
->bc_nlevels
= be32_to_cpu(agi
->agi_level
);
1316 cur
->bc_btnum
= XFS_BTNUM_INO
;
1317 cur
->bc_blocklog
= mp
->m_sb
.sb_blocklog
;
1319 cur
->bc_ops
= &xfs_inobt_ops
;
1321 cur
->bc_private
.a
.agbp
= agbp
;
1322 cur
->bc_private
.a
.agno
= agno
;