Linux-2.6.12-rc2
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / fs / reiserfs / do_balan.c
blob2118db2896c7c2491777f10ec15d0842e87a2ce2
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
12 /**
13 ** balance_leaf_when_delete
14 ** balance_leaf
15 ** do_balance
17 **/
19 #include <linux/config.h>
20 #include <asm/uaccess.h>
21 #include <linux/time.h>
22 #include <linux/reiserfs_fs.h>
23 #include <linux/buffer_head.h>
25 #ifdef CONFIG_REISERFS_CHECK
27 struct tree_balance * cur_tb = NULL; /* detects whether more than one
28 copy of tb exists as a means
29 of checking whether schedule
30 is interrupting do_balance */
31 #endif
33 inline void do_balance_mark_leaf_dirty (struct tree_balance * tb,
34 struct buffer_head * bh, int flag)
36 journal_mark_dirty(tb->transaction_handle,
37 tb->transaction_handle->t_super, bh) ;
40 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
41 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
44 /* summary:
45 if deleting something ( tb->insert_size[0] < 0 )
46 return(balance_leaf_when_delete()); (flag d handled here)
47 else
48 if lnum is larger than 0 we put items into the left node
49 if rnum is larger than 0 we put items into the right node
50 if snum1 is larger than 0 we put items into the new node s1
51 if snum2 is larger than 0 we put items into the new node s2
52 Note that all *num* count new items being created.
54 It would be easier to read balance_leaf() if each of these summary
55 lines was a separate procedure rather than being inlined. I think
56 that there are many passages here and in balance_leaf_when_delete() in
57 which two calls to one procedure can replace two passages, and it
58 might save cache space and improve software maintenance costs to do so.
60 Vladimir made the perceptive comment that we should offload most of
61 the decision making in this function into fix_nodes/check_balance, and
62 then create some sort of structure in tb that says what actions should
63 be performed by do_balance.
65 -Hans */
69 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
71 * lnum, rnum can have values >= -1
72 * -1 means that the neighbor must be joined with S
73 * 0 means that nothing should be done with the neighbor
74 * >0 means to shift entirely or partly the specified number of items to the neighbor
76 static int balance_leaf_when_delete (struct tree_balance * tb, int flag)
78 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
79 int item_pos = PATH_LAST_POSITION (tb->tb_path);
80 int pos_in_item = tb->tb_path->pos_in_item;
81 struct buffer_info bi;
82 int n;
83 struct item_head * ih;
85 RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
86 "vs- 12000: level: wrong FR %z", tb->FR[0]);
87 RFALSE( tb->blknum[0] > 1,
88 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
89 RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0),
90 "PAP-12010: tree can not be empty");
92 ih = B_N_PITEM_HEAD (tbS0, item_pos);
94 /* Delete or truncate the item */
96 switch (flag) {
97 case M_DELETE: /* delete item in S[0] */
99 RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
100 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
101 -tb->insert_size [0], ih);
103 bi.tb = tb;
104 bi.bi_bh = tbS0;
105 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
106 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
107 leaf_delete_items (&bi, 0, item_pos, 1, -1);
109 if ( ! item_pos && tb->CFL[0] ) {
110 if ( B_NR_ITEMS(tbS0) ) {
111 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
113 else {
114 if ( ! PATH_H_POSITION (tb->tb_path, 1) )
115 replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0);
119 RFALSE( ! item_pos && !tb->CFL[0],
120 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]);
122 break;
124 case M_CUT: { /* cut item in S[0] */
125 bi.tb = tb;
126 bi.bi_bh = tbS0;
127 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
128 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
129 if (is_direntry_le_ih (ih)) {
131 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
132 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
133 tb->insert_size[0] = -1;
134 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
136 RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
138 tb->CFL[0]);
140 if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
141 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
143 } else {
144 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
146 RFALSE( ! ih_item_len(ih),
147 "PAP-12035: cut must leave non-zero dynamic length of item");
149 break;
152 default:
153 print_cur_tb ("12040");
154 reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
155 (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
158 /* the rule is that no shifting occurs unless by shifting a node can be freed */
159 n = B_NR_ITEMS(tbS0);
160 if ( tb->lnum[0] ) /* L[0] takes part in balancing */
162 if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */
164 if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */
166 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
168 /* all contents of all the 3 buffers will be in L[0] */
169 if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
170 replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
172 leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, NULL);
173 leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL);
175 reiserfs_invalidate_buffer (tb, tbS0);
176 reiserfs_invalidate_buffer (tb, tb->R[0]);
178 return 0;
180 /* all contents of all the 3 buffers will be in R[0] */
181 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, NULL);
182 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL);
184 /* right_delimiting_key is correct in R[0] */
185 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
187 reiserfs_invalidate_buffer (tb, tbS0);
188 reiserfs_invalidate_buffer (tb, tb->L[0]);
190 return -1;
193 RFALSE( tb->rnum[0] != 0,
194 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
195 /* all contents of L[0] and S[0] will be in L[0] */
196 leaf_shift_left(tb, n, -1);
198 reiserfs_invalidate_buffer (tb, tbS0);
200 return 0;
202 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
204 RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) ||
205 ( tb->lnum[0] + tb->rnum[0] > n+1 ),
206 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
207 tb->rnum[0], tb->lnum[0], n);
208 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) &&
209 (tb->lbytes != -1 || tb->rbytes != -1),
210 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
211 tb->rbytes, tb->lbytes);
212 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) &&
213 (tb->lbytes < 1 || tb->rbytes != -1),
214 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
215 tb->rbytes, tb->lbytes);
217 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
218 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
220 reiserfs_invalidate_buffer (tb, tbS0);
222 return 0;
225 if ( tb->rnum[0] == -1 ) {
226 /* all contents of R[0] and S[0] will be in R[0] */
227 leaf_shift_right(tb, n, -1);
228 reiserfs_invalidate_buffer (tb, tbS0);
229 return 0;
232 RFALSE( tb->rnum[0],
233 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
234 return 0;
238 static int balance_leaf (struct tree_balance * tb,
239 struct item_head * ih, /* item header of inserted item (this is on little endian) */
240 const char * body, /* body of inserted item or bytes to paste */
241 int flag, /* i - insert, d - delete, c - cut, p - paste
242 (see comment to do_balance) */
243 struct item_head * insert_key, /* in our processing of one level we sometimes determine what
244 must be inserted into the next higher level. This insertion
245 consists of a key or two keys and their corresponding
246 pointers */
247 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
250 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
251 int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0]
252 of the affected item */
253 struct buffer_info bi;
254 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
255 int snum[2]; /* number of items that will be placed
256 into S_new (includes partially shifted
257 items) */
258 int sbytes[2]; /* if an item is partially shifted into S_new then
259 if it is a directory item
260 it is the number of entries from the item that are shifted into S_new
261 else
262 it is the number of bytes from the item that are shifted into S_new
264 int n, i;
265 int ret_val;
266 int pos_in_item;
267 int zeros_num;
269 PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] );
271 /* Make balance in case insert_size[0] < 0 */
272 if ( tb->insert_size[0] < 0 )
273 return balance_leaf_when_delete (tb, flag);
275 zeros_num = 0;
276 if (flag == M_INSERT && body == 0)
277 zeros_num = ih_item_len( ih );
279 pos_in_item = tb->tb_path->pos_in_item;
280 /* for indirect item pos_in_item is measured in unformatted node
281 pointers. Recalculate to bytes */
282 if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos)))
283 pos_in_item *= UNFM_P_SIZE;
285 if ( tb->lnum[0] > 0 ) {
286 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
287 if ( item_pos < tb->lnum[0] ) {
288 /* new item or it part falls to L[0], shift it too */
289 n = B_NR_ITEMS(tb->L[0]);
291 switch (flag) {
292 case M_INSERT: /* insert item into L[0] */
294 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
295 /* part of new item falls into L[0] */
296 int new_item_len;
297 int version;
299 ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
301 /* Calculate item length to insert to S[0] */
302 new_item_len = ih_item_len(ih) - tb->lbytes;
303 /* Calculate and check item length to insert to L[0] */
304 put_ih_item_len(ih, ih_item_len(ih) - new_item_len );
306 RFALSE( ih_item_len(ih) <= 0,
307 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
308 ih_item_len(ih));
310 /* Insert new item into L[0] */
311 bi.tb = tb;
312 bi.bi_bh = tb->L[0];
313 bi.bi_parent = tb->FL[0];
314 bi.bi_position = get_left_neighbor_position (tb, 0);
315 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body,
316 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
318 version = ih_version (ih);
320 /* Calculate key component, item length and body to insert into S[0] */
321 set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
323 put_ih_item_len( ih, new_item_len );
324 if ( tb->lbytes > zeros_num ) {
325 body += (tb->lbytes - zeros_num);
326 zeros_num = 0;
328 else
329 zeros_num -= tb->lbytes;
331 RFALSE( ih_item_len(ih) <= 0,
332 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
333 ih_item_len(ih));
334 } else {
335 /* new item in whole falls into L[0] */
336 /* Shift lnum[0]-1 items to L[0] */
337 ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes);
338 /* Insert new item into L[0] */
339 bi.tb = tb;
340 bi.bi_bh = tb->L[0];
341 bi.bi_parent = tb->FL[0];
342 bi.bi_position = get_left_neighbor_position (tb, 0);
343 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num);
344 tb->insert_size[0] = 0;
345 zeros_num = 0;
347 break;
349 case M_PASTE: /* append item in L[0] */
351 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
352 /* we must shift the part of the appended item */
353 if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) {
355 RFALSE( zeros_num,
356 "PAP-12090: invalid parameter in case of a directory");
357 /* directory item */
358 if ( tb->lbytes > pos_in_item ) {
359 /* new directory entry falls into L[0] */
360 struct item_head * pasted;
361 int l_pos_in_item = pos_in_item;
363 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
364 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
365 if ( ret_val && ! item_pos ) {
366 pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1);
367 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1);
370 /* Append given directory entry to directory item */
371 bi.tb = tb;
372 bi.bi_bh = tb->L[0];
373 bi.bi_parent = tb->FL[0];
374 bi.bi_position = get_left_neighbor_position (tb, 0);
375 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
376 tb->insert_size[0], body, zeros_num);
378 /* previous string prepared space for pasting new entry, following string pastes this entry */
380 /* when we have merge directory item, pos_in_item has been changed too */
382 /* paste new directory entry. 1 is entry number */
383 leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
384 (struct reiserfs_de_head *)body,
385 body + DEH_SIZE, tb->insert_size[0]
387 tb->insert_size[0] = 0;
388 } else {
389 /* new directory item doesn't fall into L[0] */
390 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
391 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
393 /* Calculate new position to append in item body */
394 pos_in_item -= tb->lbytes;
396 else {
397 /* regular object */
398 RFALSE( tb->lbytes <= 0,
399 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
400 tb->lbytes);
401 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
402 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
403 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
405 if ( tb->lbytes >= pos_in_item ) {
406 /* appended item will be in L[0] in whole */
407 int l_n;
409 /* this bytes number must be appended to the last item of L[h] */
410 l_n = tb->lbytes - pos_in_item;
412 /* Calculate new insert_size[0] */
413 tb->insert_size[0] -= l_n;
415 RFALSE( tb->insert_size[0] <= 0,
416 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
417 tb->insert_size[0]);
418 ret_val = leaf_shift_left(tb,tb->lnum[0],
419 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
420 /* Append to body of item in L[0] */
421 bi.tb = tb;
422 bi.bi_bh = tb->L[0];
423 bi.bi_parent = tb->FL[0];
424 bi.bi_position = get_left_neighbor_position (tb, 0);
425 leaf_paste_in_buffer(
426 &bi,n + item_pos - ret_val,
427 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
428 l_n,body, zeros_num > l_n ? l_n : zeros_num
430 /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
432 int version;
433 int temp_l = l_n;
435 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
436 "PAP-12106: item length must be 0");
437 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
438 B_N_PKEY (tb->L[0],
439 n + item_pos - ret_val)),
440 "PAP-12107: items must be of the same file");
441 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
442 n + item_pos - ret_val))) {
443 temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
445 /* update key of first item in S0 */
446 version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
447 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0),
448 le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l);
449 /* update left delimiting key */
450 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
451 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l);
454 /* Calculate new body, position in item and insert_size[0] */
455 if ( l_n > zeros_num ) {
456 body += (l_n - zeros_num);
457 zeros_num = 0;
459 else
460 zeros_num -= l_n;
461 pos_in_item = 0;
463 RFALSE( comp_short_le_keys
464 (B_N_PKEY(tbS0,0),
465 B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) ||
467 !op_is_left_mergeable
468 (B_N_PKEY (tbS0, 0), tbS0->b_size) ||
469 !op_is_left_mergeable
470 (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
471 tbS0->b_size),
472 "PAP-12120: item must be merge-able with left neighboring item");
474 else /* only part of the appended item will be in L[0] */
476 /* Calculate position in item for append in S[0] */
477 pos_in_item -= tb->lbytes;
479 RFALSE( pos_in_item <= 0,
480 "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
482 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
483 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
487 else /* appended item will be in L[0] in whole */
489 struct item_head * pasted;
491 if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) )
492 { /* if we paste into first item of S[0] and it is left mergable */
493 /* then increment pos_in_item by the size of the last item in L[0] */
494 pasted = B_N_PITEM_HEAD(tb->L[0],n-1);
495 if ( is_direntry_le_ih (pasted) )
496 pos_in_item += ih_entry_count(pasted);
497 else
498 pos_in_item += ih_item_len(pasted);
501 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
502 ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
503 /* Append to body of item in L[0] */
504 bi.tb = tb;
505 bi.bi_bh = tb->L[0];
506 bi.bi_parent = tb->FL[0];
507 bi.bi_position = get_left_neighbor_position (tb, 0);
508 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
509 body, zeros_num);
511 /* if appended item is directory, paste entry */
512 pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
513 if (is_direntry_le_ih (pasted))
514 leaf_paste_entries (
515 bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1,
516 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
518 /* if appended item is indirect item, put unformatted node into un list */
519 if (is_indirect_le_ih (pasted))
520 set_ih_free_space (pasted, 0);
521 tb->insert_size[0] = 0;
522 zeros_num = 0;
524 break;
525 default: /* cases d and t */
526 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
527 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
529 } else {
530 /* new item doesn't fall into L[0] */
531 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
533 } /* tb->lnum[0] > 0 */
535 /* Calculate new item position */
536 item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0));
538 if ( tb->rnum[0] > 0 ) {
539 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
540 n = B_NR_ITEMS(tbS0);
541 switch ( flag ) {
543 case M_INSERT: /* insert item */
544 if ( n - tb->rnum[0] < item_pos )
545 { /* new item or its part falls to R[0] */
546 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
547 { /* part of new item falls into R[0] */
548 loff_t old_key_comp, old_len, r_zeros_number;
549 const char * r_body;
550 int version;
551 loff_t offset;
553 leaf_shift_right(tb,tb->rnum[0]-1,-1);
555 version = ih_version(ih);
556 /* Remember key component and item length */
557 old_key_comp = le_ih_k_offset( ih );
558 old_len = ih_item_len(ih);
560 /* Calculate key component and item length to insert into R[0] */
561 offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0));
562 set_le_ih_k_offset( ih, offset );
563 put_ih_item_len( ih, tb->rbytes);
564 /* Insert part of the item into R[0] */
565 bi.tb = tb;
566 bi.bi_bh = tb->R[0];
567 bi.bi_parent = tb->FR[0];
568 bi.bi_position = get_right_neighbor_position (tb, 0);
569 if ( (old_len - tb->rbytes) > zeros_num ) {
570 r_zeros_number = 0;
571 r_body = body + (old_len - tb->rbytes) - zeros_num;
573 else {
574 r_body = body;
575 r_zeros_number = zeros_num - (old_len - tb->rbytes);
576 zeros_num -= r_zeros_number;
579 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
581 /* Replace right delimiting key by first key in R[0] */
582 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
584 /* Calculate key component and item length to insert into S[0] */
585 set_le_ih_k_offset( ih, old_key_comp );
586 put_ih_item_len( ih, old_len - tb->rbytes );
588 tb->insert_size[0] -= tb->rbytes;
591 else /* whole new item falls into R[0] */
593 /* Shift rnum[0]-1 items to R[0] */
594 ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
595 /* Insert new item into R[0] */
596 bi.tb = tb;
597 bi.bi_bh = tb->R[0];
598 bi.bi_parent = tb->FR[0];
599 bi.bi_position = get_right_neighbor_position (tb, 0);
600 leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
602 if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
603 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
606 zeros_num = tb->insert_size[0] = 0;
609 else /* new item or part of it doesn't fall into R[0] */
611 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
613 break;
615 case M_PASTE: /* append item */
617 if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */
619 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
620 { /* we must shift the part of the appended item */
621 if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
622 { /* we append to directory item */
623 int entry_count;
625 RFALSE( zeros_num,
626 "PAP-12145: invalid parameter in case of a directory");
627 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
628 if ( entry_count - tb->rbytes < pos_in_item )
629 /* new directory entry falls into R[0] */
631 int paste_entry_position;
633 RFALSE( tb->rbytes - 1 >= entry_count ||
634 ! tb->insert_size[0],
635 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
636 tb->rbytes, entry_count);
637 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
638 leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
639 /* Paste given directory entry to directory item */
640 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
641 bi.tb = tb;
642 bi.bi_bh = tb->R[0];
643 bi.bi_parent = tb->FR[0];
644 bi.bi_position = get_right_neighbor_position (tb, 0);
645 leaf_paste_in_buffer (&bi, 0, paste_entry_position,
646 tb->insert_size[0],body,zeros_num);
647 /* paste entry */
648 leaf_paste_entries (
649 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body,
650 body + DEH_SIZE, tb->insert_size[0]
653 if ( paste_entry_position == 0 ) {
654 /* change delimiting keys */
655 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
658 tb->insert_size[0] = 0;
659 pos_in_item++;
661 else /* new directory entry doesn't fall into R[0] */
663 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
666 else /* regular object */
668 int n_shift, n_rem, r_zeros_number;
669 const char * r_body;
671 /* Calculate number of bytes which must be shifted from appended item */
672 if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
673 n_shift = 0;
675 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
676 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
677 pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
679 leaf_shift_right(tb,tb->rnum[0],n_shift);
680 /* Calculate number of bytes which must remain in body after appending to R[0] */
681 if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
682 n_rem = 0;
685 int version;
686 unsigned long temp_rem = n_rem;
688 version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
689 if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
690 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
691 UNFM_P_SHIFT);
693 set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
694 le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
695 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
696 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
698 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
699 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
700 do_balance_mark_internal_dirty (tb, tb->CFR[0], 0);
702 /* Append part of body into R[0] */
703 bi.tb = tb;
704 bi.bi_bh = tb->R[0];
705 bi.bi_parent = tb->FR[0];
706 bi.bi_position = get_right_neighbor_position (tb, 0);
707 if ( n_rem > zeros_num ) {
708 r_zeros_number = 0;
709 r_body = body + n_rem - zeros_num;
711 else {
712 r_body = body;
713 r_zeros_number = zeros_num - n_rem;
714 zeros_num -= r_zeros_number;
717 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
719 if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
720 #if 0
721 RFALSE( n_rem,
722 "PAP-12160: paste more than one unformatted node pointer");
723 #endif
724 set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0);
726 tb->insert_size[0] = n_rem;
727 if ( ! n_rem )
728 pos_in_item ++;
731 else /* pasted item in whole falls into R[0] */
733 struct item_head * pasted;
735 ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
736 /* append item in R[0] */
737 if ( pos_in_item >= 0 ) {
738 bi.tb = tb;
739 bi.bi_bh = tb->R[0];
740 bi.bi_parent = tb->FR[0];
741 bi.bi_position = get_right_neighbor_position (tb, 0);
742 leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
743 tb->insert_size[0],body, zeros_num);
746 /* paste new entry, if item is directory item */
747 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
748 if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
749 leaf_paste_entries (
750 bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1,
751 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
753 if ( ! pos_in_item ) {
755 RFALSE( item_pos - n + tb->rnum[0],
756 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
758 /* update delimiting keys */
759 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
763 if (is_indirect_le_ih (pasted))
764 set_ih_free_space (pasted, 0);
765 zeros_num = tb->insert_size[0] = 0;
768 else /* new item doesn't fall into R[0] */
770 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
772 break;
773 default: /* cases d and t */
774 reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
775 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
778 } /* tb->rnum[0] > 0 */
781 RFALSE( tb->blknum[0] > 3,
782 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
783 RFALSE( tb->blknum[0] < 0,
784 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
786 /* if while adding to a node we discover that it is possible to split
787 it in two, and merge the left part into the left neighbor and the
788 right part into the right neighbor, eliminating the node */
789 if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
791 RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
792 "PAP-12190: lnum and rnum must not be zero");
793 /* if insertion was done before 0-th position in R[0], right
794 delimiting key of the tb->L[0]'s and left delimiting key are
795 not set correctly */
796 if (tb->CFL[0]) {
797 if (!tb->CFR[0])
798 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
799 copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
800 do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
803 reiserfs_invalidate_buffer(tb,tbS0);
804 return 0;
808 /* Fill new nodes that appear in place of S[0] */
810 /* I am told that this copying is because we need an array to enable
811 the looping code. -Hans */
812 snum[0] = tb->s1num,
813 snum[1] = tb->s2num;
814 sbytes[0] = tb->s1bytes;
815 sbytes[1] = tb->s2bytes;
816 for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
818 RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
820 /* here we shift from S to S_new nodes */
822 S_new[i] = get_FEB(tb);
824 /* initialized block type and tree level */
825 set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL );
828 n = B_NR_ITEMS(tbS0);
830 switch (flag) {
831 case M_INSERT: /* insert item */
833 if ( n - snum[i] < item_pos )
834 { /* new item or it's part falls to first new node S_new[i]*/
835 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
836 { /* part of new item falls into S_new[i] */
837 int old_key_comp, old_len, r_zeros_number;
838 const char * r_body;
839 int version;
841 /* Move snum[i]-1 items from S[0] to S_new[i] */
842 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
843 /* Remember key component and item length */
844 version = ih_version (ih);
845 old_key_comp = le_ih_k_offset( ih );
846 old_len = ih_item_len(ih);
848 /* Calculate key component and item length to insert into S_new[i] */
849 set_le_ih_k_offset( ih,
850 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
852 put_ih_item_len( ih, sbytes[i] );
854 /* Insert part of the item into S_new[i] before 0-th item */
855 bi.tb = tb;
856 bi.bi_bh = S_new[i];
857 bi.bi_parent = NULL;
858 bi.bi_position = 0;
860 if ( (old_len - sbytes[i]) > zeros_num ) {
861 r_zeros_number = 0;
862 r_body = body + (old_len - sbytes[i]) - zeros_num;
864 else {
865 r_body = body;
866 r_zeros_number = zeros_num - (old_len - sbytes[i]);
867 zeros_num -= r_zeros_number;
870 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
872 /* Calculate key component and item length to insert into S[i] */
873 set_le_ih_k_offset( ih, old_key_comp );
874 put_ih_item_len( ih, old_len - sbytes[i] );
875 tb->insert_size[0] -= sbytes[i];
877 else /* whole new item falls into S_new[i] */
879 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
880 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
882 /* Insert new item into S_new[i] */
883 bi.tb = tb;
884 bi.bi_bh = S_new[i];
885 bi.bi_parent = NULL;
886 bi.bi_position = 0;
887 leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
889 zeros_num = tb->insert_size[0] = 0;
893 else /* new item or it part don't falls into S_new[i] */
895 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
897 break;
899 case M_PASTE: /* append item */
901 if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */
903 if ( item_pos == n - snum[i] && sbytes[i] != -1 )
904 { /* we must shift part of the appended item */
905 struct item_head * aux_ih;
907 RFALSE( ih, "PAP-12210: ih must be 0");
909 if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
910 /* we append to directory item */
912 int entry_count;
914 entry_count = ih_entry_count(aux_ih);
916 if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) {
917 /* new directory entry falls into S_new[i] */
919 RFALSE( ! tb->insert_size[0],
920 "PAP-12215: insert_size is already 0");
921 RFALSE( sbytes[i] - 1 >= entry_count,
922 "PAP-12220: there are no so much entries (%d), only %d",
923 sbytes[i] - 1, entry_count);
925 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
926 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
927 /* Paste given directory entry to directory item */
928 bi.tb = tb;
929 bi.bi_bh = S_new[i];
930 bi.bi_parent = NULL;
931 bi.bi_position = 0;
932 leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
933 tb->insert_size[0], body,zeros_num);
934 /* paste new directory entry */
935 leaf_paste_entries (
936 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
937 1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
938 tb->insert_size[0]
940 tb->insert_size[0] = 0;
941 pos_in_item++;
942 } else { /* new directory entry doesn't fall into S_new[i] */
943 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
946 else /* regular object */
948 int n_shift, n_rem, r_zeros_number;
949 const char * r_body;
951 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) ||
952 tb->insert_size[0] <= 0,
953 "PAP-12225: item too short or insert_size <= 0");
955 /* Calculate number of bytes which must be shifted from appended item */
956 n_shift = sbytes[i] - tb->insert_size[0];
957 if ( n_shift < 0 )
958 n_shift = 0;
959 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
961 /* Calculate number of bytes which must remain in body after append to S_new[i] */
962 n_rem = tb->insert_size[0] - sbytes[i];
963 if ( n_rem < 0 )
964 n_rem = 0;
965 /* Append part of body into S_new[0] */
966 bi.tb = tb;
967 bi.bi_bh = S_new[i];
968 bi.bi_parent = NULL;
969 bi.bi_position = 0;
971 if ( n_rem > zeros_num ) {
972 r_zeros_number = 0;
973 r_body = body + n_rem - zeros_num;
975 else {
976 r_body = body;
977 r_zeros_number = zeros_num - n_rem;
978 zeros_num -= r_zeros_number;
981 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number);
983 struct item_head * tmp;
985 tmp = B_N_PITEM_HEAD(S_new[i],0);
986 if (is_indirect_le_ih (tmp)) {
987 set_ih_free_space (tmp, 0);
988 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
989 (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
990 } else {
991 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
992 n_rem );
996 tb->insert_size[0] = n_rem;
997 if ( ! n_rem )
998 pos_in_item++;
1001 else
1002 /* item falls wholly into S_new[i] */
1004 int ret_val;
1005 struct item_head * pasted;
1007 #ifdef CONFIG_REISERFS_CHECK
1008 struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos);
1010 if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) ||
1011 tb->insert_size[0] <= 0) )
1012 reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1013 #endif /* CONFIG_REISERFS_CHECK */
1015 ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1017 RFALSE( ret_val,
1018 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1019 ret_val);
1021 /* paste into item */
1022 bi.tb = tb;
1023 bi.bi_bh = S_new[i];
1024 bi.bi_parent = NULL;
1025 bi.bi_position = 0;
1026 leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
1028 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1029 if (is_direntry_le_ih (pasted))
1031 leaf_paste_entries (
1032 bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1,
1033 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
1037 /* if we paste to indirect item update ih_free_space */
1038 if (is_indirect_le_ih (pasted))
1039 set_ih_free_space (pasted, 0);
1040 zeros_num = tb->insert_size[0] = 0;
1044 else /* pasted item doesn't fall into S_new[i] */
1046 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1048 break;
1049 default: /* cases d and t */
1050 reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1051 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1054 memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE);
1055 insert_ptr[i] = S_new[i];
1057 RFALSE (!buffer_journaled (S_new [i]) || buffer_journal_dirty (S_new [i]) ||
1058 buffer_dirty (S_new [i]),
1059 "PAP-12247: S_new[%d] : (%b)", i, S_new[i]);
1062 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1063 affected item which remains in S */
1064 if ( 0 <= item_pos && item_pos < tb->s0num )
1065 { /* if we must insert or append into buffer S[0] */
1067 switch (flag)
1069 case M_INSERT: /* insert item into S[0] */
1070 bi.tb = tb;
1071 bi.bi_bh = tbS0;
1072 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1073 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1074 leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
1076 /* If we insert the first key change the delimiting key */
1077 if( item_pos == 0 ) {
1078 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1079 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1082 break;
1084 case M_PASTE: { /* append item in S[0] */
1085 struct item_head * pasted;
1087 pasted = B_N_PITEM_HEAD (tbS0, item_pos);
1088 /* when directory, may be new entry already pasted */
1089 if (is_direntry_le_ih (pasted)) {
1090 if ( pos_in_item >= 0 &&
1091 pos_in_item <= ih_entry_count(pasted) ) {
1093 RFALSE( ! tb->insert_size[0],
1094 "PAP-12260: insert_size is 0 already");
1096 /* prepare space */
1097 bi.tb = tb;
1098 bi.bi_bh = tbS0;
1099 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1100 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1101 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1103 /* paste entry */
1104 leaf_paste_entries (
1105 bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
1106 body + DEH_SIZE, tb->insert_size[0]
1108 if ( ! item_pos && ! pos_in_item ) {
1109 RFALSE( !tb->CFL[0] || !tb->L[0],
1110 "PAP-12270: CFL[0]/L[0] must be specified");
1111 if (tb->CFL[0]) {
1112 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1116 tb->insert_size[0] = 0;
1118 } else { /* regular object */
1119 if ( pos_in_item == ih_item_len(pasted) ) {
1121 RFALSE( tb->insert_size[0] <= 0,
1122 "PAP-12275: insert size must not be %d",
1123 tb->insert_size[0]);
1124 bi.tb = tb;
1125 bi.bi_bh = tbS0;
1126 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1127 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1128 leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1130 if (is_indirect_le_ih (pasted)) {
1131 #if 0
1132 RFALSE( tb->insert_size[0] != UNFM_P_SIZE,
1133 "PAP-12280: insert_size for indirect item must be %d, not %d",
1134 UNFM_P_SIZE, tb->insert_size[0]);
1135 #endif
1136 set_ih_free_space (pasted, 0);
1138 tb->insert_size[0] = 0;
1141 #ifdef CONFIG_REISERFS_CHECK
1142 else {
1143 if ( tb->insert_size[0] ) {
1144 print_cur_tb ("12285");
1145 reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]);
1148 #endif /* CONFIG_REISERFS_CHECK */
1151 } /* case M_PASTE: */
1155 #ifdef CONFIG_REISERFS_CHECK
1156 if ( flag == M_PASTE && tb->insert_size[0] ) {
1157 print_cur_tb ("12290");
1158 reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]);
1160 #endif /* CONFIG_REISERFS_CHECK */
1162 return 0;
1163 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1167 /* Make empty node */
1168 void make_empty_node (struct buffer_info * bi)
1170 struct block_head * blkh;
1172 RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1174 blkh = B_BLK_HEAD(bi->bi_bh);
1175 set_blkh_nr_item( blkh, 0 );
1176 set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) );
1178 if (bi->bi_parent)
1179 B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1183 /* Get first empty buffer */
1184 struct buffer_head * get_FEB (struct tree_balance * tb)
1186 int i;
1187 struct buffer_head * first_b;
1188 struct buffer_info bi;
1190 for (i = 0; i < MAX_FEB_SIZE; i ++)
1191 if (tb->FEB[i] != 0)
1192 break;
1194 if (i == MAX_FEB_SIZE)
1195 reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
1197 bi.tb = tb;
1198 bi.bi_bh = first_b = tb->FEB[i];
1199 bi.bi_parent = NULL;
1200 bi.bi_position = 0;
1201 make_empty_node (&bi);
1202 set_buffer_uptodate(first_b);
1203 tb->FEB[i] = NULL;
1204 tb->used[i] = first_b;
1206 return(first_b);
1210 /* This is now used because reiserfs_free_block has to be able to
1211 ** schedule.
1213 static void store_thrown (struct tree_balance * tb, struct buffer_head * bh)
1215 int i;
1217 if (buffer_dirty (bh))
1218 reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer");
1219 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++)
1220 if (!tb->thrown[i]) {
1221 tb->thrown[i] = bh;
1222 get_bh(bh) ; /* free_thrown puts this */
1223 return;
1225 reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers");
1228 static void free_thrown(struct tree_balance *tb) {
1229 int i ;
1230 b_blocknr_t blocknr ;
1231 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) {
1232 if (tb->thrown[i]) {
1233 blocknr = tb->thrown[i]->b_blocknr ;
1234 if (buffer_dirty (tb->thrown[i]))
1235 reiserfs_warning (tb->tb_sb,
1236 "free_thrown deals with dirty buffer %d",
1237 blocknr);
1238 brelse(tb->thrown[i]) ; /* incremented in store_thrown */
1239 reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0);
1244 void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh)
1246 struct block_head *blkh;
1247 blkh = B_BLK_HEAD(bh);
1248 set_blkh_level( blkh, FREE_LEVEL );
1249 set_blkh_nr_item( blkh, 0 );
1251 clear_buffer_dirty(bh);
1252 store_thrown (tb, bh);
1255 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1256 void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest,
1257 struct buffer_head * src, int n_src)
1260 RFALSE( dest == NULL || src == NULL,
1261 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1262 src, dest);
1263 RFALSE( ! B_IS_KEYS_LEVEL (dest),
1264 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1265 dest);
1266 RFALSE( n_dest < 0 || n_src < 0,
1267 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1268 RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1269 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1270 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1272 if (B_IS_ITEMS_LEVEL (src))
1273 /* source buffer contains leaf node */
1274 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE);
1275 else
1276 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE);
1278 do_balance_mark_internal_dirty (tb, dest, 0);
1282 int get_left_neighbor_position (
1283 struct tree_balance * tb,
1284 int h
1287 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1289 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0,
1290 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1291 h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h));
1293 if (Sh_position == 0)
1294 return B_NR_ITEMS (tb->FL[h]);
1295 else
1296 return Sh_position - 1;
1300 int get_right_neighbor_position (struct tree_balance * tb, int h)
1302 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1304 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0,
1305 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1306 h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]);
1308 if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h)))
1309 return 0;
1310 else
1311 return Sh_position + 1;
1315 #ifdef CONFIG_REISERFS_CHECK
1317 int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value);
1318 static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes)
1320 struct disk_child * dc;
1321 int i;
1323 RFALSE( !bh, "PAP-12336: bh == 0");
1325 if (!bh || !B_IS_IN_TREE (bh))
1326 return;
1328 RFALSE( !buffer_dirty (bh) &&
1329 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1330 "PAP-12337: buffer (%b) must be dirty", bh);
1331 dc = B_N_CHILD (bh, 0);
1333 for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
1334 if (!is_reusable (s, dc_block_number(dc), 1) ) {
1335 print_cur_tb (mes);
1336 reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
1342 static int locked_or_not_in_tree (struct buffer_head * bh, char * which)
1344 if ( (!buffer_journal_prepared (bh) && buffer_locked (bh)) ||
1345 !B_IS_IN_TREE (bh) ) {
1346 reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)",
1347 which, bh);
1348 return 1;
1350 return 0;
1354 static int check_before_balancing (struct tree_balance * tb)
1356 int retval = 0;
1358 if ( cur_tb ) {
1359 reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: "
1360 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1361 "do_balance cannot properly handle schedule occurring while it runs.");
1364 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1365 prepped all of these for us). */
1366 if ( tb->lnum[0] ) {
1367 retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
1368 retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
1369 retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
1370 check_leaf (tb->L[0]);
1372 if ( tb->rnum[0] ) {
1373 retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
1374 retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
1375 retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
1376 check_leaf (tb->R[0]);
1378 retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
1379 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1381 return retval;
1385 static void check_after_balance_leaf (struct tree_balance * tb)
1387 if (tb->lnum[0]) {
1388 if (B_FREE_SPACE (tb->L[0]) !=
1389 MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) {
1390 print_cur_tb ("12221");
1391 reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1394 if (tb->rnum[0]) {
1395 if (B_FREE_SPACE (tb->R[0]) !=
1396 MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) {
1397 print_cur_tb ("12222");
1398 reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1401 if (PATH_H_PBUFFER(tb->tb_path,1) &&
1402 (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) !=
1403 (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1404 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1405 PATH_H_POSITION (tb->tb_path, 1)))) )) {
1406 int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0));
1407 int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1408 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1409 PATH_H_POSITION (tb->tb_path, 1))));
1410 print_cur_tb ("12223");
1411 reiserfs_warning (tb->tb_sb,
1412 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1413 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1414 left,
1415 MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
1416 PATH_H_PBUFFER(tb->tb_path,1),
1417 PATH_H_POSITION (tb->tb_path, 1),
1418 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
1419 right );
1420 reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
1425 static void check_leaf_level (struct tree_balance * tb)
1427 check_leaf (tb->L[0]);
1428 check_leaf (tb->R[0]);
1429 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1432 static void check_internal_levels (struct tree_balance * tb)
1434 int h;
1436 /* check all internal nodes */
1437 for (h = 1; tb->insert_size[h]; h ++) {
1438 check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH");
1439 if (tb->lnum[h])
1440 check_internal_node (tb->tb_sb, tb->L[h], "BAD L");
1441 if (tb->rnum[h])
1442 check_internal_node (tb->tb_sb, tb->R[h], "BAD R");
1447 #endif
1454 /* Now we have all of the buffers that must be used in balancing of
1455 the tree. We rely on the assumption that schedule() will not occur
1456 while do_balance works. ( Only interrupt handlers are acceptable.)
1457 We balance the tree according to the analysis made before this,
1458 using buffers already obtained. For SMP support it will someday be
1459 necessary to add ordered locking of tb. */
1461 /* Some interesting rules of balancing:
1463 we delete a maximum of two nodes per level per balancing: we never
1464 delete R, when we delete two of three nodes L, S, R then we move
1465 them into R.
1467 we only delete L if we are deleting two nodes, if we delete only
1468 one node we delete S
1470 if we shift leaves then we shift as much as we can: this is a
1471 deliberate policy of extremism in node packing which results in
1472 higher average utilization after repeated random balance operations
1473 at the cost of more memory copies and more balancing as a result of
1474 small insertions to full nodes.
1476 if we shift internal nodes we try to evenly balance the node
1477 utilization, with consequent less balancing at the cost of lower
1478 utilization.
1480 one could argue that the policy for directories in leaves should be
1481 that of internal nodes, but we will wait until another day to
1482 evaluate this.... It would be nice to someday measure and prove
1483 these assumptions as to what is optimal....
1487 static inline void do_balance_starts (struct tree_balance *tb)
1489 /* use print_cur_tb() to see initial state of struct
1490 tree_balance */
1492 /* store_print_tb (tb); */
1494 /* do not delete, just comment it out */
1495 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1496 "check");*/
1497 RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB");
1498 #ifdef CONFIG_REISERFS_CHECK
1499 cur_tb = tb;
1500 #endif
1504 static inline void do_balance_completed (struct tree_balance * tb)
1507 #ifdef CONFIG_REISERFS_CHECK
1508 check_leaf_level (tb);
1509 check_internal_levels (tb);
1510 cur_tb = NULL;
1511 #endif
1513 /* reiserfs_free_block is no longer schedule safe. So, we need to
1514 ** put the buffers we want freed on the thrown list during do_balance,
1515 ** and then free them now
1518 REISERFS_SB(tb->tb_sb)->s_do_balance ++;
1521 /* release all nodes hold to perform the balancing */
1522 unfix_nodes(tb);
1524 free_thrown(tb) ;
1531 void do_balance (struct tree_balance * tb, /* tree_balance structure */
1532 struct item_head * ih, /* item header of inserted item */
1533 const char * body, /* body of inserted item or bytes to paste */
1534 int flag) /* i - insert, d - delete
1535 c - cut, p - paste
1537 Cut means delete part of an item
1538 (includes removing an entry from a
1539 directory).
1541 Delete means delete whole item.
1543 Insert means add a new item into the
1544 tree.
1546 Paste means to append to the end of an
1547 existing file or to insert a directory
1548 entry. */
1550 int child_pos, /* position of a child node in its parent */
1551 h; /* level of the tree being processed */
1552 struct item_head insert_key[2]; /* in our processing of one level
1553 we sometimes determine what
1554 must be inserted into the next
1555 higher level. This insertion
1556 consists of a key or two keys
1557 and their corresponding
1558 pointers */
1559 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1560 level */
1562 tb->tb_mode = flag;
1563 tb->need_balance_dirty = 0;
1565 if (FILESYSTEM_CHANGED_TB(tb)) {
1566 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
1568 /* if we have no real work to do */
1569 if ( ! tb->insert_size[0] ) {
1570 reiserfs_warning (tb->tb_sb,
1571 "PAP-12350: do_balance: insert_size == 0, mode == %c",
1572 flag);
1573 unfix_nodes(tb);
1574 return;
1577 atomic_inc (&(fs_generation (tb->tb_sb)));
1578 do_balance_starts (tb);
1580 /* balance leaf returns 0 except if combining L R and S into
1581 one node. see balance_internal() for explanation of this
1582 line of code.*/
1583 child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) +
1584 balance_leaf (tb, ih, body, flag, insert_key, insert_ptr);
1586 #ifdef CONFIG_REISERFS_CHECK
1587 check_after_balance_leaf (tb);
1588 #endif
1590 /* Balance internal level of the tree. */
1591 for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ )
1592 child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr);
1595 do_balance_completed (tb);