Merge tag 'staging-3.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[deliverable/linux.git] / fs / reiserfs / do_balan.c
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
10
11 /**
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
15 **
16 **/
17
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include "reiserfs.h"
21 #include <linux/buffer_head.h>
22 #include <linux/kernel.h>
23
24 static inline void buffer_info_init_left(struct tree_balance *tb,
25 struct buffer_info *bi)
26 {
27 bi->tb = tb;
28 bi->bi_bh = tb->L[0];
29 bi->bi_parent = tb->FL[0];
30 bi->bi_position = get_left_neighbor_position(tb, 0);
31 }
32
33 static inline void buffer_info_init_right(struct tree_balance *tb,
34 struct buffer_info *bi)
35 {
36 bi->tb = tb;
37 bi->bi_bh = tb->R[0];
38 bi->bi_parent = tb->FR[0];
39 bi->bi_position = get_right_neighbor_position(tb, 0);
40 }
41
42 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
43 struct buffer_info *bi)
44 {
45 bi->tb = tb;
46 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
47 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
48 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
49 }
50
51 static inline void buffer_info_init_bh(struct tree_balance *tb,
52 struct buffer_info *bi,
53 struct buffer_head *bh)
54 {
55 bi->tb = tb;
56 bi->bi_bh = bh;
57 bi->bi_parent = NULL;
58 bi->bi_position = 0;
59 }
60
61 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
62 struct buffer_head *bh, int flag)
63 {
64 journal_mark_dirty(tb->transaction_handle,
65 tb->transaction_handle->t_super, bh);
66 }
67
68 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
70
71 /* summary:
72 if deleting something ( tb->insert_size[0] < 0 )
73 return(balance_leaf_when_delete()); (flag d handled here)
74 else
75 if lnum is larger than 0 we put items into the left node
76 if rnum is larger than 0 we put items into the right node
77 if snum1 is larger than 0 we put items into the new node s1
78 if snum2 is larger than 0 we put items into the new node s2
79 Note that all *num* count new items being created.
80
81 It would be easier to read balance_leaf() if each of these summary
82 lines was a separate procedure rather than being inlined. I think
83 that there are many passages here and in balance_leaf_when_delete() in
84 which two calls to one procedure can replace two passages, and it
85 might save cache space and improve software maintenance costs to do so.
86
87 Vladimir made the perceptive comment that we should offload most of
88 the decision making in this function into fix_nodes/check_balance, and
89 then create some sort of structure in tb that says what actions should
90 be performed by do_balance.
91
92 -Hans */
93
94 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
95 *
96 * lnum, rnum can have values >= -1
97 * -1 means that the neighbor must be joined with S
98 * 0 means that nothing should be done with the neighbor
99 * >0 means to shift entirely or partly the specified number of items to the neighbor
100 */
101 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
102 {
103 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
104 int item_pos = PATH_LAST_POSITION(tb->tb_path);
105 int pos_in_item = tb->tb_path->pos_in_item;
106 struct buffer_info bi;
107 int n;
108 struct item_head *ih;
109
110 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
111 "vs- 12000: level: wrong FR %z", tb->FR[0]);
112 RFALSE(tb->blknum[0] > 1,
113 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
114 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
115 "PAP-12010: tree can not be empty");
116
117 ih = B_N_PITEM_HEAD(tbS0, item_pos);
118 buffer_info_init_tbS0(tb, &bi);
119
120 /* Delete or truncate the item */
121
122 switch (flag) {
123 case M_DELETE: /* delete item in S[0] */
124
125 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
126 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
127 -tb->insert_size[0], ih);
128
129 leaf_delete_items(&bi, 0, item_pos, 1, -1);
130
131 if (!item_pos && tb->CFL[0]) {
132 if (B_NR_ITEMS(tbS0)) {
133 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
134 0);
135 } else {
136 if (!PATH_H_POSITION(tb->tb_path, 1))
137 replace_key(tb, tb->CFL[0], tb->lkey[0],
138 PATH_H_PPARENT(tb->tb_path,
139 0), 0);
140 }
141 }
142
143 RFALSE(!item_pos && !tb->CFL[0],
144 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
145 tb->L[0]);
146
147 break;
148
149 case M_CUT:{ /* cut item in S[0] */
150 if (is_direntry_le_ih(ih)) {
151
152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
154 tb->insert_size[0] = -1;
155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
156 -tb->insert_size[0]);
157
158 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
159 "PAP-12030: can not change delimiting key. CFL[0]=%p",
160 tb->CFL[0]);
161
162 if (!item_pos && !pos_in_item && tb->CFL[0]) {
163 replace_key(tb, tb->CFL[0], tb->lkey[0],
164 tbS0, 0);
165 }
166 } else {
167 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
168 -tb->insert_size[0]);
169
170 RFALSE(!ih_item_len(ih),
171 "PAP-12035: cut must leave non-zero dynamic length of item");
172 }
173 break;
174 }
175
176 default:
177 print_cur_tb("12040");
178 reiserfs_panic(tb->tb_sb, "PAP-12040",
179 "unexpected mode: %s(%d)",
180 (flag ==
181 M_PASTE) ? "PASTE" : ((flag ==
182 M_INSERT) ? "INSERT" :
183 "UNKNOWN"), flag);
184 }
185
186 /* the rule is that no shifting occurs unless by shifting a node can be freed */
187 n = B_NR_ITEMS(tbS0);
188 if (tb->lnum[0]) { /* L[0] takes part in balancing */
189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
192 /* all contents of all the 3 buffers will be in L[0] */
193 if (PATH_H_POSITION(tb->tb_path, 1) == 0
194 && 1 < B_NR_ITEMS(tb->FR[0]))
195 replace_key(tb, tb->CFL[0],
196 tb->lkey[0],
197 tb->FR[0], 1);
198
199 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
200 -1, NULL);
201 leaf_move_items(LEAF_FROM_R_TO_L, tb,
202 B_NR_ITEMS(tb->R[0]),
203 -1, NULL);
204
205 reiserfs_invalidate_buffer(tb, tbS0);
206 reiserfs_invalidate_buffer(tb,
207 tb->R[0]);
208
209 return 0;
210 }
211 /* all contents of all the 3 buffers will be in R[0] */
212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
213 NULL);
214 leaf_move_items(LEAF_FROM_L_TO_R, tb,
215 B_NR_ITEMS(tb->L[0]), -1, NULL);
216
217 /* right_delimiting_key is correct in R[0] */
218 replace_key(tb, tb->CFR[0], tb->rkey[0],
219 tb->R[0], 0);
220
221 reiserfs_invalidate_buffer(tb, tbS0);
222 reiserfs_invalidate_buffer(tb, tb->L[0]);
223
224 return -1;
225 }
226
227 RFALSE(tb->rnum[0] != 0,
228 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
229 /* all contents of L[0] and S[0] will be in L[0] */
230 leaf_shift_left(tb, n, -1);
231
232 reiserfs_invalidate_buffer(tb, tbS0);
233
234 return 0;
235 }
236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
237
238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239 (tb->lnum[0] + tb->rnum[0] > n + 1),
240 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
241 tb->rnum[0], tb->lnum[0], n);
242 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
243 (tb->lbytes != -1 || tb->rbytes != -1),
244 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
245 tb->rbytes, tb->lbytes);
246 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
247 (tb->lbytes < 1 || tb->rbytes != -1),
248 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
249 tb->rbytes, tb->lbytes);
250
251 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
252 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
253
254 reiserfs_invalidate_buffer(tb, tbS0);
255
256 return 0;
257 }
258
259 if (tb->rnum[0] == -1) {
260 /* all contents of R[0] and S[0] will be in R[0] */
261 leaf_shift_right(tb, n, -1);
262 reiserfs_invalidate_buffer(tb, tbS0);
263 return 0;
264 }
265
266 RFALSE(tb->rnum[0],
267 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
268 return 0;
269 }
270
271 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
272 const char *body, /* body of inserted item or bytes to paste */
273 int flag, /* i - insert, d - delete, c - cut, p - paste
274 (see comment to do_balance) */
275 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
276 must be inserted into the next higher level. This insertion
277 consists of a key or two keys and their corresponding
278 pointers */
279 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
280 )
281 {
282 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
283 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
284 of the affected item */
285 struct buffer_info bi;
286 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
287 int snum[2]; /* number of items that will be placed
288 into S_new (includes partially shifted
289 items) */
290 int sbytes[2]; /* if an item is partially shifted into S_new then
291 if it is a directory item
292 it is the number of entries from the item that are shifted into S_new
293 else
294 it is the number of bytes from the item that are shifted into S_new
295 */
296 int n, i;
297 int ret_val;
298 int pos_in_item;
299 int zeros_num;
300
301 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
302
303 /* Make balance in case insert_size[0] < 0 */
304 if (tb->insert_size[0] < 0)
305 return balance_leaf_when_delete(tb, flag);
306
307 zeros_num = 0;
308 if (flag == M_INSERT && !body)
309 zeros_num = ih_item_len(ih);
310
311 pos_in_item = tb->tb_path->pos_in_item;
312 /* for indirect item pos_in_item is measured in unformatted node
313 pointers. Recalculate to bytes */
314 if (flag != M_INSERT
315 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
316 pos_in_item *= UNFM_P_SIZE;
317
318 if (tb->lnum[0] > 0) {
319 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
320 if (item_pos < tb->lnum[0]) {
321 /* new item or it part falls to L[0], shift it too */
322 n = B_NR_ITEMS(tb->L[0]);
323
324 switch (flag) {
325 case M_INSERT: /* insert item into L[0] */
326
327 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
328 /* part of new item falls into L[0] */
329 int new_item_len;
330 int version;
331
332 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
333
334 /* Calculate item length to insert to S[0] */
335 new_item_len = ih_item_len(ih) - tb->lbytes;
336 /* Calculate and check item length to insert to L[0] */
337 put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
338
339 RFALSE(ih_item_len(ih) <= 0,
340 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
341 ih_item_len(ih));
342
343 /* Insert new item into L[0] */
344 buffer_info_init_left(tb, &bi);
345 leaf_insert_into_buf(&bi,
346 n + item_pos - ret_val, ih, body,
347 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
348
349 version = ih_version(ih);
350
351 /* Calculate key component, item length and body to insert into S[0] */
352 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
353 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
354
355 put_ih_item_len(ih, new_item_len);
356 if (tb->lbytes > zeros_num) {
357 body += (tb->lbytes - zeros_num);
358 zeros_num = 0;
359 } else
360 zeros_num -= tb->lbytes;
361
362 RFALSE(ih_item_len(ih) <= 0,
363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364 ih_item_len(ih));
365 } else {
366 /* new item in whole falls into L[0] */
367 /* Shift lnum[0]-1 items to L[0] */
368 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
369 /* Insert new item into L[0] */
370 buffer_info_init_left(tb, &bi);
371 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
372 tb->insert_size[0] = 0;
373 zeros_num = 0;
374 }
375 break;
376
377 case M_PASTE: /* append item in L[0] */
378
379 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
380 /* we must shift the part of the appended item */
381 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
382
383 RFALSE(zeros_num,
384 "PAP-12090: invalid parameter in case of a directory");
385 /* directory item */
386 if (tb->lbytes > pos_in_item) {
387 /* new directory entry falls into L[0] */
388 struct item_head *pasted;
389 int l_pos_in_item = pos_in_item;
390
391 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
392 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
393 if (ret_val && !item_pos) {
394 pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
395 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
396 }
397
398 /* Append given directory entry to directory item */
399 buffer_info_init_left(tb, &bi);
400 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
401
402 /* previous string prepared space for pasting new entry, following string pastes this entry */
403
404 /* when we have merge directory item, pos_in_item has been changed too */
405
406 /* paste new directory entry. 1 is entry number */
407 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
408 1, (struct reiserfs_de_head *) body,
409 body + DEH_SIZE, tb->insert_size[0]);
410 tb->insert_size[0] = 0;
411 } else {
412 /* new directory item doesn't fall into L[0] */
413 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
414 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
415 }
416 /* Calculate new position to append in item body */
417 pos_in_item -= tb->lbytes;
418 } else {
419 /* regular object */
420 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
421 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
422 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
423 ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
424
425 if (tb->lbytes >= pos_in_item) {
426 /* appended item will be in L[0] in whole */
427 int l_n;
428
429 /* this bytes number must be appended to the last item of L[h] */
430 l_n = tb->lbytes - pos_in_item;
431
432 /* Calculate new insert_size[0] */
433 tb->insert_size[0] -= l_n;
434
435 RFALSE(tb->insert_size[0] <= 0,
436 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
437 tb->insert_size[0]);
438 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
439 (B_N_PITEM_HEAD(tbS0, item_pos)));
440 /* Append to body of item in L[0] */
441 buffer_info_init_left(tb, &bi);
442 leaf_paste_in_buffer
443 (&bi, n + item_pos - ret_val, ih_item_len
444 (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
445 l_n, body,
446 zeros_num > l_n ? l_n : zeros_num);
447 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
448 {
449 int version;
450 int temp_l = l_n;
451
452 RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
453 "PAP-12106: item length must be 0");
454 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
455 (tb->L[0], n + item_pos - ret_val)),
456 "PAP-12107: items must be of the same file");
457 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
458 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
459 }
460 /* update key of first item in S0 */
461 version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
462 set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
463 le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
464 /* update left delimiting key */
465 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
466 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
467 }
468
469 /* Calculate new body, position in item and insert_size[0] */
470 if (l_n > zeros_num) {
471 body += (l_n - zeros_num);
472 zeros_num = 0;
473 } else
474 zeros_num -= l_n;
475 pos_in_item = 0;
476
477 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
478 || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
479 || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
480 "PAP-12120: item must be merge-able with left neighboring item");
481 } else { /* only part of the appended item will be in L[0] */
482
483 /* Calculate position in item for append in S[0] */
484 pos_in_item -= tb->lbytes;
485
486 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
487
488 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
489 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
490 }
491 }
492 } else { /* appended item will be in L[0] in whole */
493
494 struct item_head *pasted;
495
496 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
497 /* then increment pos_in_item by the size of the last item in L[0] */
498 pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
499 if (is_direntry_le_ih(pasted))
500 pos_in_item += ih_entry_count(pasted);
501 else
502 pos_in_item += ih_item_len(pasted);
503 }
504
505 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
506 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
507 /* Append to body of item in L[0] */
508 buffer_info_init_left(tb, &bi);
509 leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
510 pos_in_item,
511 tb->insert_size[0],
512 body, zeros_num);
513
514 /* if appended item is directory, paste entry */
515 pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
516 if (is_direntry_le_ih(pasted))
517 leaf_paste_entries(&bi, n + item_pos - ret_val,
518 pos_in_item, 1,
519 (struct reiserfs_de_head *) body,
520 body + DEH_SIZE,
521 tb->insert_size[0]);
522 /* if appended item is indirect item, put unformatted node into un list */
523 if (is_indirect_le_ih(pasted))
524 set_ih_free_space(pasted, 0);
525 tb->insert_size[0] = 0;
526 zeros_num = 0;
527 }
528 break;
529 default: /* cases d and t */
530 reiserfs_panic(tb->tb_sb, "PAP-12130",
531 "lnum > 0: unexpected mode: "
532 " %s(%d)",
533 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
534 }
535 } else {
536 /* new item doesn't fall into L[0] */
537 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
538 }
539 }
540
541 /* tb->lnum[0] > 0 */
542 /* Calculate new item position */
543 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
544
545 if (tb->rnum[0] > 0) {
546 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
547 n = B_NR_ITEMS(tbS0);
548 switch (flag) {
549
550 case M_INSERT: /* insert item */
551 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
552 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
553 loff_t old_key_comp, old_len, r_zeros_number;
554 const char *r_body;
555 int version;
556 loff_t offset;
557
558 leaf_shift_right(tb, tb->rnum[0] - 1, -1);
559
560 version = ih_version(ih);
561 /* Remember key component and item length */
562 old_key_comp = le_ih_k_offset(ih);
563 old_len = ih_item_len(ih);
564
565 /* Calculate key component and item length to insert into R[0] */
566 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
567 set_le_ih_k_offset(ih, offset);
568 put_ih_item_len(ih, tb->rbytes);
569 /* Insert part of the item into R[0] */
570 buffer_info_init_right(tb, &bi);
571 if ((old_len - tb->rbytes) > zeros_num) {
572 r_zeros_number = 0;
573 r_body = body + (old_len - tb->rbytes) - zeros_num;
574 } else {
575 r_body = body;
576 r_zeros_number = zeros_num - (old_len - tb->rbytes);
577 zeros_num -= r_zeros_number;
578 }
579
580 leaf_insert_into_buf(&bi, 0, ih, r_body,
581 r_zeros_number);
582
583 /* Replace right delimiting key by first key in R[0] */
584 replace_key(tb, tb->CFR[0], tb->rkey[0],
585 tb->R[0], 0);
586
587 /* Calculate key component and item length to insert into S[0] */
588 set_le_ih_k_offset(ih, old_key_comp);
589 put_ih_item_len(ih, old_len - tb->rbytes);
590
591 tb->insert_size[0] -= tb->rbytes;
592
593 } else { /* whole new item falls into R[0] */
594
595 /* Shift rnum[0]-1 items to R[0] */
596 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
597 /* Insert new item into R[0] */
598 buffer_info_init_right(tb, &bi);
599 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
600 ih, body, zeros_num);
601
602 if (item_pos - n + tb->rnum[0] - 1 == 0) {
603 replace_key(tb, tb->CFR[0],
604 tb->rkey[0],
605 tb->R[0], 0);
606
607 }
608 zeros_num = tb->insert_size[0] = 0;
609 }
610 } else { /* new item or part of it doesn't fall into R[0] */
611
612 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
613 }
614 break;
615
616 case M_PASTE: /* append item */
617
618 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
619 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
620 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
621 int entry_count;
622
623 RFALSE(zeros_num,
624 "PAP-12145: invalid parameter in case of a directory");
625 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
626 (tbS0, item_pos));
627 if (entry_count - tb->rbytes <
628 pos_in_item)
629 /* new directory entry falls into R[0] */
630 {
631 int paste_entry_position;
632
633 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
634 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
635 tb->rbytes, entry_count);
636 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
637 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
638 /* Paste given directory entry to directory item */
639 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
640 buffer_info_init_right(tb, &bi);
641 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
642 /* paste entry */
643 leaf_paste_entries(&bi, 0, paste_entry_position, 1,
644 (struct reiserfs_de_head *) body,
645 body + DEH_SIZE, tb->insert_size[0]);
646
647 if (paste_entry_position == 0) {
648 /* change delimiting keys */
649 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
650 }
651
652 tb->insert_size[0] = 0;
653 pos_in_item++;
654 } else { /* new directory entry doesn't fall into R[0] */
655
656 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
657 }
658 } else { /* regular object */
659
660 int n_shift, n_rem, r_zeros_number;
661 const char *r_body;
662
663 /* Calculate number of bytes which must be shifted from appended item */
664 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
665 n_shift = 0;
666
667 RFALSE(pos_in_item != ih_item_len
668 (B_N_PITEM_HEAD(tbS0, item_pos)),
669 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
670 pos_in_item, ih_item_len
671 (B_N_PITEM_HEAD(tbS0, item_pos)));
672
673 leaf_shift_right(tb, tb->rnum[0], n_shift);
674 /* Calculate number of bytes which must remain in body after appending to R[0] */
675 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
676 n_rem = 0;
677
678 {
679 int version;
680 unsigned long temp_rem = n_rem;
681
682 version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
683 if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
684 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
685 }
686 set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
687 le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
688 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
689 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
690 }
691 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
692 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
693 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
694
695 /* Append part of body into R[0] */
696 buffer_info_init_right(tb, &bi);
697 if (n_rem > zeros_num) {
698 r_zeros_number = 0;
699 r_body = body + n_rem - zeros_num;
700 } else {
701 r_body = body;
702 r_zeros_number = zeros_num - n_rem;
703 zeros_num -= r_zeros_number;
704 }
705
706 leaf_paste_in_buffer(&bi, 0, n_shift,
707 tb->insert_size[0] - n_rem,
708 r_body, r_zeros_number);
709
710 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
711 #if 0
712 RFALSE(n_rem,
713 "PAP-12160: paste more than one unformatted node pointer");
714 #endif
715 set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
716 }
717 tb->insert_size[0] = n_rem;
718 if (!n_rem)
719 pos_in_item++;
720 }
721 } else { /* pasted item in whole falls into R[0] */
722
723 struct item_head *pasted;
724
725 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
726 /* append item in R[0] */
727 if (pos_in_item >= 0) {
728 buffer_info_init_right(tb, &bi);
729 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
730 tb->insert_size[0], body, zeros_num);
731 }
732
733 /* paste new entry, if item is directory item */
734 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
735 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
736 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
737 pos_in_item, 1,
738 (struct reiserfs_de_head *) body,
739 body + DEH_SIZE, tb->insert_size[0]);
740 if (!pos_in_item) {
741
742 RFALSE(item_pos - n + tb->rnum[0],
743 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
744
745 /* update delimiting keys */
746 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
747 }
748 }
749
750 if (is_indirect_le_ih(pasted))
751 set_ih_free_space(pasted, 0);
752 zeros_num = tb->insert_size[0] = 0;
753 }
754 } else { /* new item doesn't fall into R[0] */
755
756 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
757 }
758 break;
759 default: /* cases d and t */
760 reiserfs_panic(tb->tb_sb, "PAP-12175",
761 "rnum > 0: unexpected mode: %s(%d)",
762 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
763 }
764
765 }
766
767 /* tb->rnum[0] > 0 */
768 RFALSE(tb->blknum[0] > 3,
769 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
770 RFALSE(tb->blknum[0] < 0,
771 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
772
773 /* if while adding to a node we discover that it is possible to split
774 it in two, and merge the left part into the left neighbor and the
775 right part into the right neighbor, eliminating the node */
776 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
777
778 RFALSE(!tb->lnum[0] || !tb->rnum[0],
779 "PAP-12190: lnum and rnum must not be zero");
780 /* if insertion was done before 0-th position in R[0], right
781 delimiting key of the tb->L[0]'s and left delimiting key are
782 not set correctly */
783 if (tb->CFL[0]) {
784 if (!tb->CFR[0])
785 reiserfs_panic(tb->tb_sb, "vs-12195",
786 "CFR not initialized");
787 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
788 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
789 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
790 }
791
792 reiserfs_invalidate_buffer(tb, tbS0);
793 return 0;
794 }
795
796 /* Fill new nodes that appear in place of S[0] */
797
798 /* I am told that this copying is because we need an array to enable
799 the looping code. -Hans */
800 snum[0] = tb->s1num, snum[1] = tb->s2num;
801 sbytes[0] = tb->s1bytes;
802 sbytes[1] = tb->s2bytes;
803 for (i = tb->blknum[0] - 2; i >= 0; i--) {
804
805 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
806 snum[i]);
807
808 /* here we shift from S to S_new nodes */
809
810 S_new[i] = get_FEB(tb);
811
812 /* initialized block type and tree level */
813 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
814
815 n = B_NR_ITEMS(tbS0);
816
817 switch (flag) {
818 case M_INSERT: /* insert item */
819
820 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
821 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
822 int old_key_comp, old_len, r_zeros_number;
823 const char *r_body;
824 int version;
825
826 /* Move snum[i]-1 items from S[0] to S_new[i] */
827 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
828 snum[i] - 1, -1,
829 S_new[i]);
830 /* Remember key component and item length */
831 version = ih_version(ih);
832 old_key_comp = le_ih_k_offset(ih);
833 old_len = ih_item_len(ih);
834
835 /* Calculate key component and item length to insert into S_new[i] */
836 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
837 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
838
839 put_ih_item_len(ih, sbytes[i]);
840
841 /* Insert part of the item into S_new[i] before 0-th item */
842 buffer_info_init_bh(tb, &bi, S_new[i]);
843
844 if ((old_len - sbytes[i]) > zeros_num) {
845 r_zeros_number = 0;
846 r_body = body + (old_len - sbytes[i]) - zeros_num;
847 } else {
848 r_body = body;
849 r_zeros_number = zeros_num - (old_len - sbytes[i]);
850 zeros_num -= r_zeros_number;
851 }
852
853 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
854
855 /* Calculate key component and item length to insert into S[i] */
856 set_le_ih_k_offset(ih, old_key_comp);
857 put_ih_item_len(ih, old_len - sbytes[i]);
858 tb->insert_size[0] -= sbytes[i];
859 } else { /* whole new item falls into S_new[i] */
860
861 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
862 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
863 snum[i] - 1, sbytes[i], S_new[i]);
864
865 /* Insert new item into S_new[i] */
866 buffer_info_init_bh(tb, &bi, S_new[i]);
867 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
868 ih, body, zeros_num);
869
870 zeros_num = tb->insert_size[0] = 0;
871 }
872 }
873
874 else { /* new item or it part don't falls into S_new[i] */
875
876 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
877 snum[i], sbytes[i], S_new[i]);
878 }
879 break;
880
881 case M_PASTE: /* append item */
882
883 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
884 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
885 struct item_head *aux_ih;
886
887 RFALSE(ih, "PAP-12210: ih must be 0");
888
889 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
890 if (is_direntry_le_ih(aux_ih)) {
891 /* we append to directory item */
892
893 int entry_count;
894
895 entry_count = ih_entry_count(aux_ih);
896
897 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
898 /* new directory entry falls into S_new[i] */
899
900 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
901 RFALSE(sbytes[i] - 1 >= entry_count,
902 "PAP-12220: there are no so much entries (%d), only %d",
903 sbytes[i] - 1, entry_count);
904
905 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
906 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
907 /* Paste given directory entry to directory item */
908 buffer_info_init_bh(tb, &bi, S_new[i]);
909 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
910 tb->insert_size[0], body, zeros_num);
911 /* paste new directory entry */
912 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
913 (struct reiserfs_de_head *) body,
914 body + DEH_SIZE, tb->insert_size[0]);
915 tb->insert_size[0] = 0;
916 pos_in_item++;
917 } else { /* new directory entry doesn't fall into S_new[i] */
918 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
919 }
920 } else { /* regular object */
921
922 int n_shift, n_rem, r_zeros_number;
923 const char *r_body;
924
925 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
926 "PAP-12225: item too short or insert_size <= 0");
927
928 /* Calculate number of bytes which must be shifted from appended item */
929 n_shift = sbytes[i] - tb->insert_size[0];
930 if (n_shift < 0)
931 n_shift = 0;
932 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
933
934 /* Calculate number of bytes which must remain in body after append to S_new[i] */
935 n_rem = tb->insert_size[0] - sbytes[i];
936 if (n_rem < 0)
937 n_rem = 0;
938 /* Append part of body into S_new[0] */
939 buffer_info_init_bh(tb, &bi, S_new[i]);
940 if (n_rem > zeros_num) {
941 r_zeros_number = 0;
942 r_body = body + n_rem - zeros_num;
943 } else {
944 r_body = body;
945 r_zeros_number = zeros_num - n_rem;
946 zeros_num -= r_zeros_number;
947 }
948
949 leaf_paste_in_buffer(&bi, 0, n_shift,
950 tb->insert_size[0] - n_rem,
951 r_body, r_zeros_number);
952 {
953 struct item_head *tmp;
954
955 tmp = B_N_PITEM_HEAD(S_new[i], 0);
956 if (is_indirect_le_ih
957 (tmp)) {
958 set_ih_free_space(tmp, 0);
959 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
960 } else {
961 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
962 }
963 }
964
965 tb->insert_size[0] = n_rem;
966 if (!n_rem)
967 pos_in_item++;
968 }
969 } else
970 /* item falls wholly into S_new[i] */
971 {
972 int leaf_mi;
973 struct item_head *pasted;
974
975 #ifdef CONFIG_REISERFS_CHECK
976 struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos);
977
978 if (!is_direntry_le_ih(ih_check)
979 && (pos_in_item != ih_item_len(ih_check)
980 || tb->insert_size[0] <= 0))
981 reiserfs_panic(tb->tb_sb,
982 "PAP-12235",
983 "pos_in_item "
984 "must be equal "
985 "to ih_item_len");
986 #endif /* CONFIG_REISERFS_CHECK */
987
988 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
989 tb, snum[i],
990 sbytes[i],
991 S_new[i]);
992
993 RFALSE(leaf_mi,
994 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
995 leaf_mi);
996
997 /* paste into item */
998 buffer_info_init_bh(tb, &bi, S_new[i]);
999 leaf_paste_in_buffer(&bi,
1000 item_pos - n + snum[i],
1001 pos_in_item,
1002 tb->insert_size[0],
1003 body, zeros_num);
1004
1005 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1006 if (is_direntry_le_ih(pasted)) {
1007 leaf_paste_entries(&bi,
1008 item_pos - n + snum[i],
1009 pos_in_item, 1,
1010 (struct reiserfs_de_head *)body,
1011 body + DEH_SIZE,
1012 tb->insert_size[0]
1013 );
1014 }
1015
1016 /* if we paste to indirect item update ih_free_space */
1017 if (is_indirect_le_ih(pasted))
1018 set_ih_free_space(pasted, 0);
1019 zeros_num = tb->insert_size[0] = 0;
1020 }
1021 }
1022
1023 else { /* pasted item doesn't fall into S_new[i] */
1024
1025 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1026 snum[i], sbytes[i], S_new[i]);
1027 }
1028 break;
1029 default: /* cases d and t */
1030 reiserfs_panic(tb->tb_sb, "PAP-12245",
1031 "blknum > 2: unexpected mode: %s(%d)",
1032 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1033 }
1034
1035 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1036 insert_ptr[i] = S_new[i];
1037
1038 RFALSE(!buffer_journaled(S_new[i])
1039 || buffer_journal_dirty(S_new[i])
1040 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1041 i, S_new[i]);
1042 }
1043
1044 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1045 affected item which remains in S */
1046 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1047
1048 switch (flag) {
1049 case M_INSERT: /* insert item into S[0] */
1050 buffer_info_init_tbS0(tb, &bi);
1051 leaf_insert_into_buf(&bi, item_pos, ih, body,
1052 zeros_num);
1053
1054 /* If we insert the first key change the delimiting key */
1055 if (item_pos == 0) {
1056 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1057 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1058 }
1059 break;
1060
1061 case M_PASTE:{ /* append item in S[0] */
1062 struct item_head *pasted;
1063
1064 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1065 /* when directory, may be new entry already pasted */
1066 if (is_direntry_le_ih(pasted)) {
1067 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1068
1069 RFALSE(!tb->insert_size[0],
1070 "PAP-12260: insert_size is 0 already");
1071
1072 /* prepare space */
1073 buffer_info_init_tbS0(tb, &bi);
1074 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1075 tb->insert_size[0], body,
1076 zeros_num);
1077
1078 /* paste entry */
1079 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1080 (struct reiserfs_de_head *)body,
1081 body + DEH_SIZE,
1082 tb->insert_size[0]);
1083 if (!item_pos && !pos_in_item) {
1084 RFALSE(!tb->CFL[0] || !tb->L[0],
1085 "PAP-12270: CFL[0]/L[0] must be specified");
1086 if (tb->CFL[0])
1087 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1088 }
1089 tb->insert_size[0] = 0;
1090 }
1091 } else { /* regular object */
1092 if (pos_in_item == ih_item_len(pasted)) {
1093
1094 RFALSE(tb->insert_size[0] <= 0,
1095 "PAP-12275: insert size must not be %d",
1096 tb->insert_size[0]);
1097 buffer_info_init_tbS0(tb, &bi);
1098 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1099 tb->insert_size[0], body, zeros_num);
1100
1101 if (is_indirect_le_ih(pasted)) {
1102 #if 0
1103 RFALSE(tb->
1104 insert_size[0] !=
1105 UNFM_P_SIZE,
1106 "PAP-12280: insert_size for indirect item must be %d, not %d",
1107 UNFM_P_SIZE,
1108 tb->
1109 insert_size[0]);
1110 #endif
1111 set_ih_free_space(pasted, 0);
1112 }
1113 tb->insert_size[0] = 0;
1114 }
1115 #ifdef CONFIG_REISERFS_CHECK
1116 else {
1117 if (tb->insert_size[0]) {
1118 print_cur_tb("12285");
1119 reiserfs_panic(tb->tb_sb,
1120 "PAP-12285",
1121 "insert_size "
1122 "must be 0 "
1123 "(%d)",
1124 tb->insert_size[0]);
1125 }
1126 }
1127 #endif /* CONFIG_REISERFS_CHECK */
1128
1129 }
1130 } /* case M_PASTE: */
1131 }
1132 }
1133 #ifdef CONFIG_REISERFS_CHECK
1134 if (flag == M_PASTE && tb->insert_size[0]) {
1135 print_cur_tb("12290");
1136 reiserfs_panic(tb->tb_sb,
1137 "PAP-12290", "insert_size is still not 0 (%d)",
1138 tb->insert_size[0]);
1139 }
1140 #endif /* CONFIG_REISERFS_CHECK */
1141 return 0;
1142 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1143
1144 /* Make empty node */
1145 void make_empty_node(struct buffer_info *bi)
1146 {
1147 struct block_head *blkh;
1148
1149 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1150
1151 blkh = B_BLK_HEAD(bi->bi_bh);
1152 set_blkh_nr_item(blkh, 0);
1153 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1154
1155 if (bi->bi_parent)
1156 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1157 }
1158
1159 /* Get first empty buffer */
1160 struct buffer_head *get_FEB(struct tree_balance *tb)
1161 {
1162 int i;
1163 struct buffer_info bi;
1164
1165 for (i = 0; i < MAX_FEB_SIZE; i++)
1166 if (tb->FEB[i] != NULL)
1167 break;
1168
1169 if (i == MAX_FEB_SIZE)
1170 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1171
1172 buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1173 make_empty_node(&bi);
1174 set_buffer_uptodate(tb->FEB[i]);
1175 tb->used[i] = tb->FEB[i];
1176 tb->FEB[i] = NULL;
1177
1178 return tb->used[i];
1179 }
1180
1181 /* This is now used because reiserfs_free_block has to be able to
1182 ** schedule.
1183 */
1184 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1185 {
1186 int i;
1187
1188 if (buffer_dirty(bh))
1189 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1190 "called with dirty buffer");
1191 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1192 if (!tb->thrown[i]) {
1193 tb->thrown[i] = bh;
1194 get_bh(bh); /* free_thrown puts this */
1195 return;
1196 }
1197 reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1198 "too many thrown buffers");
1199 }
1200
1201 static void free_thrown(struct tree_balance *tb)
1202 {
1203 int i;
1204 b_blocknr_t blocknr;
1205 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1206 if (tb->thrown[i]) {
1207 blocknr = tb->thrown[i]->b_blocknr;
1208 if (buffer_dirty(tb->thrown[i]))
1209 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1210 "called with dirty buffer %d",
1211 blocknr);
1212 brelse(tb->thrown[i]); /* incremented in store_thrown */
1213 reiserfs_free_block(tb->transaction_handle, NULL,
1214 blocknr, 0);
1215 }
1216 }
1217 }
1218
1219 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1220 {
1221 struct block_head *blkh;
1222 blkh = B_BLK_HEAD(bh);
1223 set_blkh_level(blkh, FREE_LEVEL);
1224 set_blkh_nr_item(blkh, 0);
1225
1226 clear_buffer_dirty(bh);
1227 store_thrown(tb, bh);
1228 }
1229
1230 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1231 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1232 struct buffer_head *src, int n_src)
1233 {
1234
1235 RFALSE(dest == NULL || src == NULL,
1236 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1237 src, dest);
1238 RFALSE(!B_IS_KEYS_LEVEL(dest),
1239 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1240 dest);
1241 RFALSE(n_dest < 0 || n_src < 0,
1242 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1243 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1244 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1245 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1246
1247 if (B_IS_ITEMS_LEVEL(src))
1248 /* source buffer contains leaf node */
1249 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1250 KEY_SIZE);
1251 else
1252 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1253 KEY_SIZE);
1254
1255 do_balance_mark_internal_dirty(tb, dest, 0);
1256 }
1257
1258 int get_left_neighbor_position(struct tree_balance *tb, int h)
1259 {
1260 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1261
1262 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1263 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1264 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1265
1266 if (Sh_position == 0)
1267 return B_NR_ITEMS(tb->FL[h]);
1268 else
1269 return Sh_position - 1;
1270 }
1271
1272 int get_right_neighbor_position(struct tree_balance *tb, int h)
1273 {
1274 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1275
1276 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1277 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1278 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1279
1280 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1281 return 0;
1282 else
1283 return Sh_position + 1;
1284 }
1285
1286 #ifdef CONFIG_REISERFS_CHECK
1287
1288 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1289 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1290 char *mes)
1291 {
1292 struct disk_child *dc;
1293 int i;
1294
1295 RFALSE(!bh, "PAP-12336: bh == 0");
1296
1297 if (!bh || !B_IS_IN_TREE(bh))
1298 return;
1299
1300 RFALSE(!buffer_dirty(bh) &&
1301 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1302 "PAP-12337: buffer (%b) must be dirty", bh);
1303 dc = B_N_CHILD(bh, 0);
1304
1305 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1306 if (!is_reusable(s, dc_block_number(dc), 1)) {
1307 print_cur_tb(mes);
1308 reiserfs_panic(s, "PAP-12338",
1309 "invalid child pointer %y in %b",
1310 dc, bh);
1311 }
1312 }
1313 }
1314
1315 static int locked_or_not_in_tree(struct tree_balance *tb,
1316 struct buffer_head *bh, char *which)
1317 {
1318 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1319 !B_IS_IN_TREE(bh)) {
1320 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1321 return 1;
1322 }
1323 return 0;
1324 }
1325
1326 static int check_before_balancing(struct tree_balance *tb)
1327 {
1328 int retval = 0;
1329
1330 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1331 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1332 "occurred based on cur_tb not being null at "
1333 "this point in code. do_balance cannot properly "
1334 "handle concurrent tree accesses on a same "
1335 "mount point.");
1336 }
1337
1338 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1339 prepped all of these for us). */
1340 if (tb->lnum[0]) {
1341 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1343 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1344 check_leaf(tb->L[0]);
1345 }
1346 if (tb->rnum[0]) {
1347 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1348 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1349 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1350 check_leaf(tb->R[0]);
1351 }
1352 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1353 "S[0]");
1354 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1355
1356 return retval;
1357 }
1358
1359 static void check_after_balance_leaf(struct tree_balance *tb)
1360 {
1361 if (tb->lnum[0]) {
1362 if (B_FREE_SPACE(tb->L[0]) !=
1363 MAX_CHILD_SIZE(tb->L[0]) -
1364 dc_size(B_N_CHILD
1365 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1366 print_cur_tb("12221");
1367 reiserfs_panic(tb->tb_sb, "PAP-12355",
1368 "shift to left was incorrect");
1369 }
1370 }
1371 if (tb->rnum[0]) {
1372 if (B_FREE_SPACE(tb->R[0]) !=
1373 MAX_CHILD_SIZE(tb->R[0]) -
1374 dc_size(B_N_CHILD
1375 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1376 print_cur_tb("12222");
1377 reiserfs_panic(tb->tb_sb, "PAP-12360",
1378 "shift to right was incorrect");
1379 }
1380 }
1381 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1382 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1383 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1384 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1385 PATH_H_POSITION(tb->tb_path, 1)))))) {
1386 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1387 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1388 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1389 PATH_H_POSITION(tb->tb_path,
1390 1))));
1391 print_cur_tb("12223");
1392 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1393 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1394 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1395 left,
1396 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1397 PATH_H_PBUFFER(tb->tb_path, 1),
1398 PATH_H_POSITION(tb->tb_path, 1),
1399 dc_size(B_N_CHILD
1400 (PATH_H_PBUFFER(tb->tb_path, 1),
1401 PATH_H_POSITION(tb->tb_path, 1))),
1402 right);
1403 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1404 }
1405 }
1406
1407 static void check_leaf_level(struct tree_balance *tb)
1408 {
1409 check_leaf(tb->L[0]);
1410 check_leaf(tb->R[0]);
1411 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1412 }
1413
1414 static void check_internal_levels(struct tree_balance *tb)
1415 {
1416 int h;
1417
1418 /* check all internal nodes */
1419 for (h = 1; tb->insert_size[h]; h++) {
1420 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1421 "BAD BUFFER ON PATH");
1422 if (tb->lnum[h])
1423 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1424 if (tb->rnum[h])
1425 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1426 }
1427
1428 }
1429
1430 #endif
1431
1432 /* Now we have all of the buffers that must be used in balancing of
1433 the tree. We rely on the assumption that schedule() will not occur
1434 while do_balance works. ( Only interrupt handlers are acceptable.)
1435 We balance the tree according to the analysis made before this,
1436 using buffers already obtained. For SMP support it will someday be
1437 necessary to add ordered locking of tb. */
1438
1439 /* Some interesting rules of balancing:
1440
1441 we delete a maximum of two nodes per level per balancing: we never
1442 delete R, when we delete two of three nodes L, S, R then we move
1443 them into R.
1444
1445 we only delete L if we are deleting two nodes, if we delete only
1446 one node we delete S
1447
1448 if we shift leaves then we shift as much as we can: this is a
1449 deliberate policy of extremism in node packing which results in
1450 higher average utilization after repeated random balance operations
1451 at the cost of more memory copies and more balancing as a result of
1452 small insertions to full nodes.
1453
1454 if we shift internal nodes we try to evenly balance the node
1455 utilization, with consequent less balancing at the cost of lower
1456 utilization.
1457
1458 one could argue that the policy for directories in leaves should be
1459 that of internal nodes, but we will wait until another day to
1460 evaluate this.... It would be nice to someday measure and prove
1461 these assumptions as to what is optimal....
1462
1463 */
1464
1465 static inline void do_balance_starts(struct tree_balance *tb)
1466 {
1467 /* use print_cur_tb() to see initial state of struct
1468 tree_balance */
1469
1470 /* store_print_tb (tb); */
1471
1472 /* do not delete, just comment it out */
1473 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1474 "check");*/
1475 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1476 #ifdef CONFIG_REISERFS_CHECK
1477 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1478 #endif
1479 }
1480
1481 static inline void do_balance_completed(struct tree_balance *tb)
1482 {
1483
1484 #ifdef CONFIG_REISERFS_CHECK
1485 check_leaf_level(tb);
1486 check_internal_levels(tb);
1487 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1488 #endif
1489
1490 /* reiserfs_free_block is no longer schedule safe. So, we need to
1491 ** put the buffers we want freed on the thrown list during do_balance,
1492 ** and then free them now
1493 */
1494
1495 REISERFS_SB(tb->tb_sb)->s_do_balance++;
1496
1497 /* release all nodes hold to perform the balancing */
1498 unfix_nodes(tb);
1499
1500 free_thrown(tb);
1501 }
1502
1503 void do_balance(struct tree_balance *tb, /* tree_balance structure */
1504 struct item_head *ih, /* item header of inserted item */
1505 const char *body, /* body of inserted item or bytes to paste */
1506 int flag)
1507 { /* i - insert, d - delete
1508 c - cut, p - paste
1509
1510 Cut means delete part of an item
1511 (includes removing an entry from a
1512 directory).
1513
1514 Delete means delete whole item.
1515
1516 Insert means add a new item into the
1517 tree.
1518
1519 Paste means to append to the end of an
1520 existing file or to insert a directory
1521 entry. */
1522 int child_pos, /* position of a child node in its parent */
1523 h; /* level of the tree being processed */
1524 struct item_head insert_key[2]; /* in our processing of one level
1525 we sometimes determine what
1526 must be inserted into the next
1527 higher level. This insertion
1528 consists of a key or two keys
1529 and their corresponding
1530 pointers */
1531 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1532 level */
1533
1534 tb->tb_mode = flag;
1535 tb->need_balance_dirty = 0;
1536
1537 if (FILESYSTEM_CHANGED_TB(tb)) {
1538 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1539 "changed");
1540 }
1541 /* if we have no real work to do */
1542 if (!tb->insert_size[0]) {
1543 reiserfs_warning(tb->tb_sb, "PAP-12350",
1544 "insert_size == 0, mode == %c", flag);
1545 unfix_nodes(tb);
1546 return;
1547 }
1548
1549 atomic_inc(&(fs_generation(tb->tb_sb)));
1550 do_balance_starts(tb);
1551
1552 /* balance leaf returns 0 except if combining L R and S into
1553 one node. see balance_internal() for explanation of this
1554 line of code. */
1555 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1557
1558 #ifdef CONFIG_REISERFS_CHECK
1559 check_after_balance_leaf(tb);
1560 #endif
1561
1562 /* Balance internal level of the tree. */
1563 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1564 child_pos =
1565 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1566
1567 do_balance_completed(tb);
1568
1569 }
This page took 0.096547 seconds and 5 git commands to generate.