1 /* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 /* 6 * Written by Anatoly P. Pinchuk pap@namesys.botik.ru 7 * Programm System Institute 8 * Pereslavl-Zalessky Russia 9 */ 10 11 #include <linux/time.h> 12 #include <linux/string.h> 13 #include <linux/pagemap.h> 14 #include "reiserfs.h" 15 #include <linux/buffer_head.h> 16 #include <linux/quotaops.h> 17 18 /* Does the buffer contain a disk block which is in the tree. */ 19 inline int B_IS_IN_TREE(const struct buffer_head *bh) 20 { 21 22 RFALSE(B_LEVEL(bh) > MAX_HEIGHT, 23 "PAP-1010: block (%b) has too big level (%z)", bh, bh); 24 25 return (B_LEVEL(bh) != FREE_LEVEL); 26 } 27 28 /* to get item head in le form */ 29 inline void copy_item_head(struct item_head *to, 30 const struct item_head *from) 31 { 32 memcpy(to, from, IH_SIZE); 33 } 34 35 /* 36 * k1 is pointer to on-disk structure which is stored in little-endian 37 * form. k2 is pointer to cpu variable. For key of items of the same 38 * object this returns 0. 39 * Returns: -1 if key1 < key2 40 * 0 if key1 == key2 41 * 1 if key1 > key2 42 */ 43 inline int comp_short_keys(const struct reiserfs_key *le_key, 44 const struct cpu_key *cpu_key) 45 { 46 __u32 n; 47 n = le32_to_cpu(le_key->k_dir_id); 48 if (n < cpu_key->on_disk_key.k_dir_id) 49 return -1; 50 if (n > cpu_key->on_disk_key.k_dir_id) 51 return 1; 52 n = le32_to_cpu(le_key->k_objectid); 53 if (n < cpu_key->on_disk_key.k_objectid) 54 return -1; 55 if (n > cpu_key->on_disk_key.k_objectid) 56 return 1; 57 return 0; 58 } 59 60 /* 61 * k1 is pointer to on-disk structure which is stored in little-endian 62 * form. k2 is pointer to cpu variable. 63 * Compare keys using all 4 key fields. 64 * Returns: -1 if key1 < key2 0 65 * if key1 = key2 1 if key1 > key2 66 */ 67 static inline int comp_keys(const struct reiserfs_key *le_key, 68 const struct cpu_key *cpu_key) 69 { 70 int retval; 71 72 retval = comp_short_keys(le_key, cpu_key); 73 if (retval) 74 return retval; 75 if (le_key_k_offset(le_key_version(le_key), le_key) < 76 cpu_key_k_offset(cpu_key)) 77 return -1; 78 if (le_key_k_offset(le_key_version(le_key), le_key) > 79 cpu_key_k_offset(cpu_key)) 80 return 1; 81 82 if (cpu_key->key_length == 3) 83 return 0; 84 85 /* this part is needed only when tail conversion is in progress */ 86 if (le_key_k_type(le_key_version(le_key), le_key) < 87 cpu_key_k_type(cpu_key)) 88 return -1; 89 90 if (le_key_k_type(le_key_version(le_key), le_key) > 91 cpu_key_k_type(cpu_key)) 92 return 1; 93 94 return 0; 95 } 96 97 inline int comp_short_le_keys(const struct reiserfs_key *key1, 98 const struct reiserfs_key *key2) 99 { 100 __u32 *k1_u32, *k2_u32; 101 int key_length = REISERFS_SHORT_KEY_LEN; 102 103 k1_u32 = (__u32 *) key1; 104 k2_u32 = (__u32 *) key2; 105 for (; key_length--; ++k1_u32, ++k2_u32) { 106 if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32)) 107 return -1; 108 if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32)) 109 return 1; 110 } 111 return 0; 112 } 113 114 inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from) 115 { 116 int version; 117 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); 118 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); 119 120 /* find out version of the key */ 121 version = le_key_version(from); 122 to->version = version; 123 to->on_disk_key.k_offset = le_key_k_offset(version, from); 124 to->on_disk_key.k_type = le_key_k_type(version, from); 125 } 126 127 /* 128 * this does not say which one is bigger, it only returns 1 if keys 129 * are not equal, 0 otherwise 130 */ 131 inline int comp_le_keys(const struct reiserfs_key *k1, 132 const struct reiserfs_key *k2) 133 { 134 return memcmp(k1, k2, sizeof(struct reiserfs_key)); 135 } 136 137 /************************************************************************** 138 * Binary search toolkit function * 139 * Search for an item in the array by the item key * 140 * Returns: 1 if found, 0 if not found; * 141 * *pos = number of the searched element if found, else the * 142 * number of the first element that is larger than key. * 143 **************************************************************************/ 144 /* 145 * For those not familiar with binary search: lbound is the leftmost item 146 * that it could be, rbound the rightmost item that it could be. We examine 147 * the item halfway between lbound and rbound, and that tells us either 148 * that we can increase lbound, or decrease rbound, or that we have found it, 149 * or if lbound <= rbound that there are no possible items, and we have not 150 * found it. With each examination we cut the number of possible items it 151 * could be by one more than half rounded down, or we find it. 152 */ 153 static inline int bin_search(const void *key, /* Key to search for. */ 154 const void *base, /* First item in the array. */ 155 int num, /* Number of items in the array. */ 156 /* 157 * Item size in the array. searched. Lest the 158 * reader be confused, note that this is crafted 159 * as a general function, and when it is applied 160 * specifically to the array of item headers in a 161 * node, width is actually the item header size 162 * not the item size. 163 */ 164 int width, 165 int *pos /* Number of the searched for element. */ 166 ) 167 { 168 int rbound, lbound, j; 169 170 for (j = ((rbound = num - 1) + (lbound = 0)) / 2; 171 lbound <= rbound; j = (rbound + lbound) / 2) 172 switch (comp_keys 173 ((struct reiserfs_key *)((char *)base + j * width), 174 (struct cpu_key *)key)) { 175 case -1: 176 lbound = j + 1; 177 continue; 178 case 1: 179 rbound = j - 1; 180 continue; 181 case 0: 182 *pos = j; 183 return ITEM_FOUND; /* Key found in the array. */ 184 } 185 186 /* 187 * bin_search did not find given key, it returns position of key, 188 * that is minimal and greater than the given one. 189 */ 190 *pos = lbound; 191 return ITEM_NOT_FOUND; 192 } 193 194 195 /* Minimal possible key. It is never in the tree. */ 196 const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; 197 198 /* Maximal possible key. It is never in the tree. */ 199 static const struct reiserfs_key MAX_KEY = { 200 cpu_to_le32(0xffffffff), 201 cpu_to_le32(0xffffffff), 202 {{cpu_to_le32(0xffffffff), 203 cpu_to_le32(0xffffffff)},} 204 }; 205 206 /* 207 * Get delimiting key of the buffer by looking for it in the buffers in the 208 * path, starting from the bottom of the path, and going upwards. We must 209 * check the path's validity at each step. If the key is not in the path, 210 * there is no delimiting key in the tree (buffer is first or last buffer 211 * in tree), and in this case we return a special key, either MIN_KEY or 212 * MAX_KEY. 213 */ 214 static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, 215 const struct super_block *sb) 216 { 217 int position, path_offset = chk_path->path_length; 218 struct buffer_head *parent; 219 220 RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, 221 "PAP-5010: invalid offset in the path"); 222 223 /* While not higher in path than first element. */ 224 while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { 225 226 RFALSE(!buffer_uptodate 227 (PATH_OFFSET_PBUFFER(chk_path, path_offset)), 228 "PAP-5020: parent is not uptodate"); 229 230 /* Parent at the path is not in the tree now. */ 231 if (!B_IS_IN_TREE 232 (parent = 233 PATH_OFFSET_PBUFFER(chk_path, path_offset))) 234 return &MAX_KEY; 235 /* Check whether position in the parent is correct. */ 236 if ((position = 237 PATH_OFFSET_POSITION(chk_path, 238 path_offset)) > 239 B_NR_ITEMS(parent)) 240 return &MAX_KEY; 241 /* Check whether parent at the path really points to the child. */ 242 if (B_N_CHILD_NUM(parent, position) != 243 PATH_OFFSET_PBUFFER(chk_path, 244 path_offset + 1)->b_blocknr) 245 return &MAX_KEY; 246 /* 247 * Return delimiting key if position in the parent 248 * is not equal to zero. 249 */ 250 if (position) 251 return internal_key(parent, position - 1); 252 } 253 /* Return MIN_KEY if we are in the root of the buffer tree. */ 254 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> 255 b_blocknr == SB_ROOT_BLOCK(sb)) 256 return &MIN_KEY; 257 return &MAX_KEY; 258 } 259 260 /* Get delimiting key of the buffer at the path and its right neighbor. */ 261 inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, 262 const struct super_block *sb) 263 { 264 int position, path_offset = chk_path->path_length; 265 struct buffer_head *parent; 266 267 RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET, 268 "PAP-5030: invalid offset in the path"); 269 270 while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { 271 272 RFALSE(!buffer_uptodate 273 (PATH_OFFSET_PBUFFER(chk_path, path_offset)), 274 "PAP-5040: parent is not uptodate"); 275 276 /* Parent at the path is not in the tree now. */ 277 if (!B_IS_IN_TREE 278 (parent = 279 PATH_OFFSET_PBUFFER(chk_path, path_offset))) 280 return &MIN_KEY; 281 /* Check whether position in the parent is correct. */ 282 if ((position = 283 PATH_OFFSET_POSITION(chk_path, 284 path_offset)) > 285 B_NR_ITEMS(parent)) 286 return &MIN_KEY; 287 /* 288 * Check whether parent at the path really points 289 * to the child. 290 */ 291 if (B_N_CHILD_NUM(parent, position) != 292 PATH_OFFSET_PBUFFER(chk_path, 293 path_offset + 1)->b_blocknr) 294 return &MIN_KEY; 295 296 /* 297 * Return delimiting key if position in the parent 298 * is not the last one. 299 */ 300 if (position != B_NR_ITEMS(parent)) 301 return internal_key(parent, position); 302 } 303 304 /* Return MAX_KEY if we are in the root of the buffer tree. */ 305 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> 306 b_blocknr == SB_ROOT_BLOCK(sb)) 307 return &MAX_KEY; 308 return &MIN_KEY; 309 } 310 311 /* 312 * Check whether a key is contained in the tree rooted from a buffer at a path. 313 * This works by looking at the left and right delimiting keys for the buffer 314 * in the last path_element in the path. These delimiting keys are stored 315 * at least one level above that buffer in the tree. If the buffer is the 316 * first or last node in the tree order then one of the delimiting keys may 317 * be absent, and in this case get_lkey and get_rkey return a special key 318 * which is MIN_KEY or MAX_KEY. 319 */ 320 static inline int key_in_buffer( 321 /* Path which should be checked. */ 322 struct treepath *chk_path, 323 /* Key which should be checked. */ 324 const struct cpu_key *key, 325 struct super_block *sb 326 ) 327 { 328 329 RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET 330 || chk_path->path_length > MAX_HEIGHT, 331 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", 332 key, chk_path->path_length); 333 RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev, 334 "PAP-5060: device must not be NODEV"); 335 336 if (comp_keys(get_lkey(chk_path, sb), key) == 1) 337 /* left delimiting key is bigger, that the key we look for */ 338 return 0; 339 /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */ 340 if (comp_keys(get_rkey(chk_path, sb), key) != 1) 341 /* key must be less than right delimitiing key */ 342 return 0; 343 return 1; 344 } 345 346 int reiserfs_check_path(struct treepath *p) 347 { 348 RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, 349 "path not properly relsed"); 350 return 0; 351 } 352 353 /* 354 * Drop the reference to each buffer in a path and restore 355 * dirty bits clean when preparing the buffer for the log. 356 * This version should only be called from fix_nodes() 357 */ 358 void pathrelse_and_restore(struct super_block *sb, 359 struct treepath *search_path) 360 { 361 int path_offset = search_path->path_length; 362 363 RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, 364 "clm-4000: invalid path offset"); 365 366 while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { 367 struct buffer_head *bh; 368 bh = PATH_OFFSET_PBUFFER(search_path, path_offset--); 369 reiserfs_restore_prepared_buffer(sb, bh); 370 brelse(bh); 371 } 372 search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; 373 } 374 375 /* Drop the reference to each buffer in a path */ 376 void pathrelse(struct treepath *search_path) 377 { 378 int path_offset = search_path->path_length; 379 380 RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, 381 "PAP-5090: invalid path offset"); 382 383 while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) 384 brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--)); 385 386 search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; 387 } 388 389 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) 390 { 391 struct block_head *blkh; 392 struct item_head *ih; 393 int used_space; 394 int prev_location; 395 int i; 396 int nr; 397 398 blkh = (struct block_head *)buf; 399 if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { 400 reiserfs_warning(NULL, "reiserfs-5080", 401 "this should be caught earlier"); 402 return 0; 403 } 404 405 nr = blkh_nr_item(blkh); 406 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { 407 /* item number is too big or too small */ 408 reiserfs_warning(NULL, "reiserfs-5081", 409 "nr_item seems wrong: %z", bh); 410 return 0; 411 } 412 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; 413 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); 414 415 /* free space does not match to calculated amount of use space */ 416 if (used_space != blocksize - blkh_free_space(blkh)) { 417 reiserfs_warning(NULL, "reiserfs-5082", 418 "free space seems wrong: %z", bh); 419 return 0; 420 } 421 /* 422 * FIXME: it is_leaf will hit performance too much - we may have 423 * return 1 here 424 */ 425 426 /* check tables of item heads */ 427 ih = (struct item_head *)(buf + BLKH_SIZE); 428 prev_location = blocksize; 429 for (i = 0; i < nr; i++, ih++) { 430 if (le_ih_k_type(ih) == TYPE_ANY) { 431 reiserfs_warning(NULL, "reiserfs-5083", 432 "wrong item type for item %h", 433 ih); 434 return 0; 435 } 436 if (ih_location(ih) >= blocksize 437 || ih_location(ih) < IH_SIZE * nr) { 438 reiserfs_warning(NULL, "reiserfs-5084", 439 "item location seems wrong: %h", 440 ih); 441 return 0; 442 } 443 if (ih_item_len(ih) < 1 444 || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { 445 reiserfs_warning(NULL, "reiserfs-5085", 446 "item length seems wrong: %h", 447 ih); 448 return 0; 449 } 450 if (prev_location - ih_location(ih) != ih_item_len(ih)) { 451 reiserfs_warning(NULL, "reiserfs-5086", 452 "item location seems wrong " 453 "(second one): %h", ih); 454 return 0; 455 } 456 prev_location = ih_location(ih); 457 } 458 459 /* one may imagine many more checks */ 460 return 1; 461 } 462 463 /* returns 1 if buf looks like an internal node, 0 otherwise */ 464 static int is_internal(char *buf, int blocksize, struct buffer_head *bh) 465 { 466 struct block_head *blkh; 467 int nr; 468 int used_space; 469 470 blkh = (struct block_head *)buf; 471 nr = blkh_level(blkh); 472 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { 473 /* this level is not possible for internal nodes */ 474 reiserfs_warning(NULL, "reiserfs-5087", 475 "this should be caught earlier"); 476 return 0; 477 } 478 479 nr = blkh_nr_item(blkh); 480 /* for internal which is not root we might check min number of keys */ 481 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { 482 reiserfs_warning(NULL, "reiserfs-5088", 483 "number of key seems wrong: %z", bh); 484 return 0; 485 } 486 487 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); 488 if (used_space != blocksize - blkh_free_space(blkh)) { 489 reiserfs_warning(NULL, "reiserfs-5089", 490 "free space seems wrong: %z", bh); 491 return 0; 492 } 493 494 /* one may imagine many more checks */ 495 return 1; 496 } 497 498 /* 499 * make sure that bh contains formatted node of reiserfs tree of 500 * 'level'-th level 501 */ 502 static int is_tree_node(struct buffer_head *bh, int level) 503 { 504 if (B_LEVEL(bh) != level) { 505 reiserfs_warning(NULL, "reiserfs-5090", "node level %d does " 506 "not match to the expected one %d", 507 B_LEVEL(bh), level); 508 return 0; 509 } 510 if (level == DISK_LEAF_NODE_LEVEL) 511 return is_leaf(bh->b_data, bh->b_size, bh); 512 513 return is_internal(bh->b_data, bh->b_size, bh); 514 } 515 516 #define SEARCH_BY_KEY_READA 16 517 518 /* 519 * The function is NOT SCHEDULE-SAFE! 520 * It might unlock the write lock if we needed to wait for a block 521 * to be read. Note that in this case it won't recover the lock to avoid 522 * high contention resulting from too much lock requests, especially 523 * the caller (search_by_key) will perform other schedule-unsafe 524 * operations just after calling this function. 525 * 526 * @return depth of lock to be restored after read completes 527 */ 528 static int search_by_key_reada(struct super_block *s, 529 struct buffer_head **bh, 530 b_blocknr_t *b, int num) 531 { 532 int i, j; 533 int depth = -1; 534 535 for (i = 0; i < num; i++) { 536 bh[i] = sb_getblk(s, b[i]); 537 } 538 /* 539 * We are going to read some blocks on which we 540 * have a reference. It's safe, though we might be 541 * reading blocks concurrently changed if we release 542 * the lock. But it's still fine because we check later 543 * if the tree changed 544 */ 545 for (j = 0; j < i; j++) { 546 /* 547 * note, this needs attention if we are getting rid of the BKL 548 * you have to make sure the prepared bit isn't set on this 549 * buffer 550 */ 551 if (!buffer_uptodate(bh[j])) { 552 if (depth == -1) 553 depth = reiserfs_write_unlock_nested(s); 554 ll_rw_block(READA, 1, bh + j); 555 } 556 brelse(bh[j]); 557 } 558 return depth; 559 } 560 561 /* 562 * This function fills up the path from the root to the leaf as it 563 * descends the tree looking for the key. It uses reiserfs_bread to 564 * try to find buffers in the cache given their block number. If it 565 * does not find them in the cache it reads them from disk. For each 566 * node search_by_key finds using reiserfs_bread it then uses 567 * bin_search to look through that node. bin_search will find the 568 * position of the block_number of the next node if it is looking 569 * through an internal node. If it is looking through a leaf node 570 * bin_search will find the position of the item which has key either 571 * equal to given key, or which is the maximal key less than the given 572 * key. search_by_key returns a path that must be checked for the 573 * correctness of the top of the path but need not be checked for the 574 * correctness of the bottom of the path 575 */ 576 /* 577 * search_by_key - search for key (and item) in stree 578 * @sb: superblock 579 * @key: pointer to key to search for 580 * @search_path: Allocated and initialized struct treepath; Returned filled 581 * on success. 582 * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to 583 * stop at leaf level. 584 * 585 * The function is NOT SCHEDULE-SAFE! 586 */ 587 int search_by_key(struct super_block *sb, const struct cpu_key *key, 588 struct treepath *search_path, int stop_level) 589 { 590 b_blocknr_t block_number; 591 int expected_level; 592 struct buffer_head *bh; 593 struct path_element *last_element; 594 int node_level, retval; 595 int right_neighbor_of_leaf_node; 596 int fs_gen; 597 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; 598 b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA]; 599 int reada_count = 0; 600 601 #ifdef CONFIG_REISERFS_CHECK 602 int repeat_counter = 0; 603 #endif 604 605 PROC_INFO_INC(sb, search_by_key); 606 607 /* 608 * As we add each node to a path we increase its count. This means 609 * that we must be careful to release all nodes in a path before we 610 * either discard the path struct or re-use the path struct, as we 611 * do here. 612 */ 613 614 pathrelse(search_path); 615 616 right_neighbor_of_leaf_node = 0; 617 618 /* 619 * With each iteration of this loop we search through the items in the 620 * current node, and calculate the next current node(next path element) 621 * for the next iteration of this loop.. 622 */ 623 block_number = SB_ROOT_BLOCK(sb); 624 expected_level = -1; 625 while (1) { 626 627 #ifdef CONFIG_REISERFS_CHECK 628 if (!(++repeat_counter % 50000)) 629 reiserfs_warning(sb, "PAP-5100", 630 "%s: there were %d iterations of " 631 "while loop looking for key %K", 632 current->comm, repeat_counter, 633 key); 634 #endif 635 636 /* prep path to have another element added to it. */ 637 last_element = 638 PATH_OFFSET_PELEMENT(search_path, 639 ++search_path->path_length); 640 fs_gen = get_generation(sb); 641 642 /* 643 * Read the next tree node, and set the last element 644 * in the path to have a pointer to it. 645 */ 646 if ((bh = last_element->pe_buffer = 647 sb_getblk(sb, block_number))) { 648 649 /* 650 * We'll need to drop the lock if we encounter any 651 * buffers that need to be read. If all of them are 652 * already up to date, we don't need to drop the lock. 653 */ 654 int depth = -1; 655 656 if (!buffer_uptodate(bh) && reada_count > 1) 657 depth = search_by_key_reada(sb, reada_bh, 658 reada_blocks, reada_count); 659 660 if (!buffer_uptodate(bh) && depth == -1) 661 depth = reiserfs_write_unlock_nested(sb); 662 663 ll_rw_block(READ, 1, &bh); 664 wait_on_buffer(bh); 665 666 if (depth != -1) 667 reiserfs_write_lock_nested(sb, depth); 668 if (!buffer_uptodate(bh)) 669 goto io_error; 670 } else { 671 io_error: 672 search_path->path_length--; 673 pathrelse(search_path); 674 return IO_ERROR; 675 } 676 reada_count = 0; 677 if (expected_level == -1) 678 expected_level = SB_TREE_HEIGHT(sb); 679 expected_level--; 680 681 /* 682 * It is possible that schedule occurred. We must check 683 * whether the key to search is still in the tree rooted 684 * from the current buffer. If not then repeat search 685 * from the root. 686 */ 687 if (fs_changed(fs_gen, sb) && 688 (!B_IS_IN_TREE(bh) || 689 B_LEVEL(bh) != expected_level || 690 !key_in_buffer(search_path, key, sb))) { 691 PROC_INFO_INC(sb, search_by_key_fs_changed); 692 PROC_INFO_INC(sb, search_by_key_restarted); 693 PROC_INFO_INC(sb, 694 sbk_restarted[expected_level - 1]); 695 pathrelse(search_path); 696 697 /* 698 * Get the root block number so that we can 699 * repeat the search starting from the root. 700 */ 701 block_number = SB_ROOT_BLOCK(sb); 702 expected_level = -1; 703 right_neighbor_of_leaf_node = 0; 704 705 /* repeat search from the root */ 706 continue; 707 } 708 709 /* 710 * only check that the key is in the buffer if key is not 711 * equal to the MAX_KEY. Latter case is only possible in 712 * "finish_unfinished()" processing during mount. 713 */ 714 RFALSE(comp_keys(&MAX_KEY, key) && 715 !key_in_buffer(search_path, key, sb), 716 "PAP-5130: key is not in the buffer"); 717 #ifdef CONFIG_REISERFS_CHECK 718 if (REISERFS_SB(sb)->cur_tb) { 719 print_cur_tb("5140"); 720 reiserfs_panic(sb, "PAP-5140", 721 "schedule occurred in do_balance!"); 722 } 723 #endif 724 725 /* 726 * make sure, that the node contents look like a node of 727 * certain level 728 */ 729 if (!is_tree_node(bh, expected_level)) { 730 reiserfs_error(sb, "vs-5150", 731 "invalid format found in block %ld. " 732 "Fsck?", bh->b_blocknr); 733 pathrelse(search_path); 734 return IO_ERROR; 735 } 736 737 /* ok, we have acquired next formatted node in the tree */ 738 node_level = B_LEVEL(bh); 739 740 PROC_INFO_BH_STAT(sb, bh, node_level - 1); 741 742 RFALSE(node_level < stop_level, 743 "vs-5152: tree level (%d) is less than stop level (%d)", 744 node_level, stop_level); 745 746 retval = bin_search(key, item_head(bh, 0), 747 B_NR_ITEMS(bh), 748 (node_level == 749 DISK_LEAF_NODE_LEVEL) ? IH_SIZE : 750 KEY_SIZE, 751 &last_element->pe_position); 752 if (node_level == stop_level) { 753 return retval; 754 } 755 756 /* we are not in the stop level */ 757 /* 758 * item has been found, so we choose the pointer which 759 * is to the right of the found one 760 */ 761 if (retval == ITEM_FOUND) 762 last_element->pe_position++; 763 764 /* 765 * if item was not found we choose the position which is to 766 * the left of the found item. This requires no code, 767 * bin_search did it already. 768 */ 769 770 /* 771 * So we have chosen a position in the current node which is 772 * an internal node. Now we calculate child block number by 773 * position in the node. 774 */ 775 block_number = 776 B_N_CHILD_NUM(bh, last_element->pe_position); 777 778 /* 779 * if we are going to read leaf nodes, try for read 780 * ahead as well 781 */ 782 if ((search_path->reada & PATH_READA) && 783 node_level == DISK_LEAF_NODE_LEVEL + 1) { 784 int pos = last_element->pe_position; 785 int limit = B_NR_ITEMS(bh); 786 struct reiserfs_key *le_key; 787 788 if (search_path->reada & PATH_READA_BACK) 789 limit = 0; 790 while (reada_count < SEARCH_BY_KEY_READA) { 791 if (pos == limit) 792 break; 793 reada_blocks[reada_count++] = 794 B_N_CHILD_NUM(bh, pos); 795 if (search_path->reada & PATH_READA_BACK) 796 pos--; 797 else 798 pos++; 799 800 /* 801 * check to make sure we're in the same object 802 */ 803 le_key = internal_key(bh, pos); 804 if (le32_to_cpu(le_key->k_objectid) != 805 key->on_disk_key.k_objectid) { 806 break; 807 } 808 } 809 } 810 } 811 } 812 813 /* 814 * Form the path to an item and position in this item which contains 815 * file byte defined by key. If there is no such item 816 * corresponding to the key, we point the path to the item with 817 * maximal key less than key, and *pos_in_item is set to one 818 * past the last entry/byte in the item. If searching for entry in a 819 * directory item, and it is not found, *pos_in_item is set to one 820 * entry more than the entry with maximal key which is less than the 821 * sought key. 822 * 823 * Note that if there is no entry in this same node which is one more, 824 * then we point to an imaginary entry. for direct items, the 825 * position is in units of bytes, for indirect items the position is 826 * in units of blocknr entries, for directory items the position is in 827 * units of directory entries. 828 */ 829 /* The function is NOT SCHEDULE-SAFE! */ 830 int search_for_position_by_key(struct super_block *sb, 831 /* Key to search (cpu variable) */ 832 const struct cpu_key *p_cpu_key, 833 /* Filled up by this function. */ 834 struct treepath *search_path) 835 { 836 struct item_head *p_le_ih; /* pointer to on-disk structure */ 837 int blk_size; 838 loff_t item_offset, offset; 839 struct reiserfs_dir_entry de; 840 int retval; 841 842 /* If searching for directory entry. */ 843 if (is_direntry_cpu_key(p_cpu_key)) 844 return search_by_entry_key(sb, p_cpu_key, search_path, 845 &de); 846 847 /* If not searching for directory entry. */ 848 849 /* If item is found. */ 850 retval = search_item(sb, p_cpu_key, search_path); 851 if (retval == IO_ERROR) 852 return retval; 853 if (retval == ITEM_FOUND) { 854 855 RFALSE(!ih_item_len 856 (item_head 857 (PATH_PLAST_BUFFER(search_path), 858 PATH_LAST_POSITION(search_path))), 859 "PAP-5165: item length equals zero"); 860 861 pos_in_item(search_path) = 0; 862 return POSITION_FOUND; 863 } 864 865 RFALSE(!PATH_LAST_POSITION(search_path), 866 "PAP-5170: position equals zero"); 867 868 /* Item is not found. Set path to the previous item. */ 869 p_le_ih = 870 item_head(PATH_PLAST_BUFFER(search_path), 871 --PATH_LAST_POSITION(search_path)); 872 blk_size = sb->s_blocksize; 873 874 if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key)) 875 return FILE_NOT_FOUND; 876 877 /* FIXME: quite ugly this far */ 878 879 item_offset = le_ih_k_offset(p_le_ih); 880 offset = cpu_key_k_offset(p_cpu_key); 881 882 /* Needed byte is contained in the item pointed to by the path. */ 883 if (item_offset <= offset && 884 item_offset + op_bytes_number(p_le_ih, blk_size) > offset) { 885 pos_in_item(search_path) = offset - item_offset; 886 if (is_indirect_le_ih(p_le_ih)) { 887 pos_in_item(search_path) /= blk_size; 888 } 889 return POSITION_FOUND; 890 } 891 892 /* 893 * Needed byte is not contained in the item pointed to by the 894 * path. Set pos_in_item out of the item. 895 */ 896 if (is_indirect_le_ih(p_le_ih)) 897 pos_in_item(search_path) = 898 ih_item_len(p_le_ih) / UNFM_P_SIZE; 899 else 900 pos_in_item(search_path) = ih_item_len(p_le_ih); 901 902 return POSITION_NOT_FOUND; 903 } 904 905 /* Compare given item and item pointed to by the path. */ 906 int comp_items(const struct item_head *stored_ih, const struct treepath *path) 907 { 908 struct buffer_head *bh = PATH_PLAST_BUFFER(path); 909 struct item_head *ih; 910 911 /* Last buffer at the path is not in the tree. */ 912 if (!B_IS_IN_TREE(bh)) 913 return 1; 914 915 /* Last path position is invalid. */ 916 if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh)) 917 return 1; 918 919 /* we need only to know, whether it is the same item */ 920 ih = tp_item_head(path); 921 return memcmp(stored_ih, ih, IH_SIZE); 922 } 923 924 /* unformatted nodes are not logged anymore, ever. This is safe now */ 925 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) 926 927 /* block can not be forgotten as it is in I/O or held by someone */ 928 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) 929 930 /* prepare for delete or cut of direct item */ 931 static inline int prepare_for_direct_item(struct treepath *path, 932 struct item_head *le_ih, 933 struct inode *inode, 934 loff_t new_file_length, int *cut_size) 935 { 936 loff_t round_len; 937 938 if (new_file_length == max_reiserfs_offset(inode)) { 939 /* item has to be deleted */ 940 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 941 return M_DELETE; 942 } 943 /* new file gets truncated */ 944 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { 945 round_len = ROUND_UP(new_file_length); 946 /* this was new_file_length < le_ih ... */ 947 if (round_len < le_ih_k_offset(le_ih)) { 948 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 949 return M_DELETE; /* Delete this item. */ 950 } 951 /* Calculate first position and size for cutting from item. */ 952 pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1); 953 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); 954 955 return M_CUT; /* Cut from this item. */ 956 } 957 958 /* old file: items may have any length */ 959 960 if (new_file_length < le_ih_k_offset(le_ih)) { 961 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 962 return M_DELETE; /* Delete this item. */ 963 } 964 965 /* Calculate first position and size for cutting from item. */ 966 *cut_size = -(ih_item_len(le_ih) - 967 (pos_in_item(path) = 968 new_file_length + 1 - le_ih_k_offset(le_ih))); 969 return M_CUT; /* Cut from this item. */ 970 } 971 972 static inline int prepare_for_direntry_item(struct treepath *path, 973 struct item_head *le_ih, 974 struct inode *inode, 975 loff_t new_file_length, 976 int *cut_size) 977 { 978 if (le_ih_k_offset(le_ih) == DOT_OFFSET && 979 new_file_length == max_reiserfs_offset(inode)) { 980 RFALSE(ih_entry_count(le_ih) != 2, 981 "PAP-5220: incorrect empty directory item (%h)", le_ih); 982 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 983 /* Delete the directory item containing "." and ".." entry. */ 984 return M_DELETE; 985 } 986 987 if (ih_entry_count(le_ih) == 1) { 988 /* 989 * Delete the directory item such as there is one record only 990 * in this item 991 */ 992 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 993 return M_DELETE; 994 } 995 996 /* Cut one record from the directory item. */ 997 *cut_size = 998 -(DEH_SIZE + 999 entry_length(get_last_bh(path), le_ih, pos_in_item(path))); 1000 return M_CUT; 1001 } 1002 1003 #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) 1004 1005 /* 1006 * If the path points to a directory or direct item, calculate mode 1007 * and the size cut, for balance. 1008 * If the path points to an indirect item, remove some number of its 1009 * unformatted nodes. 1010 * In case of file truncate calculate whether this item must be 1011 * deleted/truncated or last unformatted node of this item will be 1012 * converted to a direct item. 1013 * This function returns a determination of what balance mode the 1014 * calling function should employ. 1015 */ 1016 static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, 1017 struct inode *inode, 1018 struct treepath *path, 1019 const struct cpu_key *item_key, 1020 /* 1021 * Number of unformatted nodes 1022 * which were removed from end 1023 * of the file. 1024 */ 1025 int *removed, 1026 int *cut_size, 1027 /* MAX_KEY_OFFSET in case of delete. */ 1028 unsigned long long new_file_length 1029 ) 1030 { 1031 struct super_block *sb = inode->i_sb; 1032 struct item_head *p_le_ih = tp_item_head(path); 1033 struct buffer_head *bh = PATH_PLAST_BUFFER(path); 1034 1035 BUG_ON(!th->t_trans_id); 1036 1037 /* Stat_data item. */ 1038 if (is_statdata_le_ih(p_le_ih)) { 1039 1040 RFALSE(new_file_length != max_reiserfs_offset(inode), 1041 "PAP-5210: mode must be M_DELETE"); 1042 1043 *cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); 1044 return M_DELETE; 1045 } 1046 1047 /* Directory item. */ 1048 if (is_direntry_le_ih(p_le_ih)) 1049 return prepare_for_direntry_item(path, p_le_ih, inode, 1050 new_file_length, 1051 cut_size); 1052 1053 /* Direct item. */ 1054 if (is_direct_le_ih(p_le_ih)) 1055 return prepare_for_direct_item(path, p_le_ih, inode, 1056 new_file_length, cut_size); 1057 1058 /* Case of an indirect item. */ 1059 { 1060 int blk_size = sb->s_blocksize; 1061 struct item_head s_ih; 1062 int need_re_search; 1063 int delete = 0; 1064 int result = M_CUT; 1065 int pos = 0; 1066 1067 if ( new_file_length == max_reiserfs_offset (inode) ) { 1068 /* 1069 * prepare_for_delete_or_cut() is called by 1070 * reiserfs_delete_item() 1071 */ 1072 new_file_length = 0; 1073 delete = 1; 1074 } 1075 1076 do { 1077 need_re_search = 0; 1078 *cut_size = 0; 1079 bh = PATH_PLAST_BUFFER(path); 1080 copy_item_head(&s_ih, tp_item_head(path)); 1081 pos = I_UNFM_NUM(&s_ih); 1082 1083 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { 1084 __le32 *unfm; 1085 __u32 block; 1086 1087 /* 1088 * Each unformatted block deletion may involve 1089 * one additional bitmap block into the transaction, 1090 * thereby the initial journal space reservation 1091 * might not be enough. 1092 */ 1093 if (!delete && (*cut_size) != 0 && 1094 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) 1095 break; 1096 1097 unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1; 1098 block = get_block_num(unfm, 0); 1099 1100 if (block != 0) { 1101 reiserfs_prepare_for_journal(sb, bh, 1); 1102 put_block_num(unfm, 0, 0); 1103 journal_mark_dirty(th, bh); 1104 reiserfs_free_block(th, inode, block, 1); 1105 } 1106 1107 reiserfs_cond_resched(sb); 1108 1109 if (item_moved (&s_ih, path)) { 1110 need_re_search = 1; 1111 break; 1112 } 1113 1114 pos --; 1115 (*removed)++; 1116 (*cut_size) -= UNFM_P_SIZE; 1117 1118 if (pos == 0) { 1119 (*cut_size) -= IH_SIZE; 1120 result = M_DELETE; 1121 break; 1122 } 1123 } 1124 /* 1125 * a trick. If the buffer has been logged, this will 1126 * do nothing. If we've broken the loop without logging 1127 * it, it will restore the buffer 1128 */ 1129 reiserfs_restore_prepared_buffer(sb, bh); 1130 } while (need_re_search && 1131 search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); 1132 pos_in_item(path) = pos * UNFM_P_SIZE; 1133 1134 if (*cut_size == 0) { 1135 /* 1136 * Nothing was cut. maybe convert last unformatted node to the 1137 * direct item? 1138 */ 1139 result = M_CONVERT; 1140 } 1141 return result; 1142 } 1143 } 1144 1145 /* Calculate number of bytes which will be deleted or cut during balance */ 1146 static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) 1147 { 1148 int del_size; 1149 struct item_head *p_le_ih = tp_item_head(tb->tb_path); 1150 1151 if (is_statdata_le_ih(p_le_ih)) 1152 return 0; 1153 1154 del_size = 1155 (mode == 1156 M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; 1157 if (is_direntry_le_ih(p_le_ih)) { 1158 /* 1159 * return EMPTY_DIR_SIZE; We delete emty directories only. 1160 * we can't use EMPTY_DIR_SIZE, as old format dirs have a 1161 * different empty size. ick. FIXME, is this right? 1162 */ 1163 return del_size; 1164 } 1165 1166 if (is_indirect_le_ih(p_le_ih)) 1167 del_size = (del_size / UNFM_P_SIZE) * 1168 (PATH_PLAST_BUFFER(tb->tb_path)->b_size); 1169 return del_size; 1170 } 1171 1172 static void init_tb_struct(struct reiserfs_transaction_handle *th, 1173 struct tree_balance *tb, 1174 struct super_block *sb, 1175 struct treepath *path, int size) 1176 { 1177 1178 BUG_ON(!th->t_trans_id); 1179 1180 memset(tb, '\0', sizeof(struct tree_balance)); 1181 tb->transaction_handle = th; 1182 tb->tb_sb = sb; 1183 tb->tb_path = path; 1184 PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; 1185 PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; 1186 tb->insert_size[0] = size; 1187 } 1188 1189 void padd_item(char *item, int total_length, int length) 1190 { 1191 int i; 1192 1193 for (i = total_length; i > length;) 1194 item[--i] = 0; 1195 } 1196 1197 #ifdef REISERQUOTA_DEBUG 1198 char key2type(struct reiserfs_key *ih) 1199 { 1200 if (is_direntry_le_key(2, ih)) 1201 return 'd'; 1202 if (is_direct_le_key(2, ih)) 1203 return 'D'; 1204 if (is_indirect_le_key(2, ih)) 1205 return 'i'; 1206 if (is_statdata_le_key(2, ih)) 1207 return 's'; 1208 return 'u'; 1209 } 1210 1211 char head2type(struct item_head *ih) 1212 { 1213 if (is_direntry_le_ih(ih)) 1214 return 'd'; 1215 if (is_direct_le_ih(ih)) 1216 return 'D'; 1217 if (is_indirect_le_ih(ih)) 1218 return 'i'; 1219 if (is_statdata_le_ih(ih)) 1220 return 's'; 1221 return 'u'; 1222 } 1223 #endif 1224 1225 /* 1226 * Delete object item. 1227 * th - active transaction handle 1228 * path - path to the deleted item 1229 * item_key - key to search for the deleted item 1230 * indode - used for updating i_blocks and quotas 1231 * un_bh - NULL or unformatted node pointer 1232 */ 1233 int reiserfs_delete_item(struct reiserfs_transaction_handle *th, 1234 struct treepath *path, const struct cpu_key *item_key, 1235 struct inode *inode, struct buffer_head *un_bh) 1236 { 1237 struct super_block *sb = inode->i_sb; 1238 struct tree_balance s_del_balance; 1239 struct item_head s_ih; 1240 struct item_head *q_ih; 1241 int quota_cut_bytes; 1242 int ret_value, del_size, removed; 1243 int depth; 1244 1245 #ifdef CONFIG_REISERFS_CHECK 1246 char mode; 1247 int iter = 0; 1248 #endif 1249 1250 BUG_ON(!th->t_trans_id); 1251 1252 init_tb_struct(th, &s_del_balance, sb, path, 1253 0 /*size is unknown */ ); 1254 1255 while (1) { 1256 removed = 0; 1257 1258 #ifdef CONFIG_REISERFS_CHECK 1259 iter++; 1260 mode = 1261 #endif 1262 prepare_for_delete_or_cut(th, inode, path, 1263 item_key, &removed, 1264 &del_size, 1265 max_reiserfs_offset(inode)); 1266 1267 RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); 1268 1269 copy_item_head(&s_ih, tp_item_head(path)); 1270 s_del_balance.insert_size[0] = del_size; 1271 1272 ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); 1273 if (ret_value != REPEAT_SEARCH) 1274 break; 1275 1276 PROC_INFO_INC(sb, delete_item_restarted); 1277 1278 /* file system changed, repeat search */ 1279 ret_value = 1280 search_for_position_by_key(sb, item_key, path); 1281 if (ret_value == IO_ERROR) 1282 break; 1283 if (ret_value == FILE_NOT_FOUND) { 1284 reiserfs_warning(sb, "vs-5340", 1285 "no items of the file %K found", 1286 item_key); 1287 break; 1288 } 1289 } /* while (1) */ 1290 1291 if (ret_value != CARRY_ON) { 1292 unfix_nodes(&s_del_balance); 1293 return 0; 1294 } 1295 1296 /* reiserfs_delete_item returns item length when success */ 1297 ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); 1298 q_ih = tp_item_head(path); 1299 quota_cut_bytes = ih_item_len(q_ih); 1300 1301 /* 1302 * hack so the quota code doesn't have to guess if the file has a 1303 * tail. On tail insert, we allocate quota for 1 unformatted node. 1304 * We test the offset because the tail might have been 1305 * split into multiple items, and we only want to decrement for 1306 * the unfm node once 1307 */ 1308 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { 1309 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { 1310 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; 1311 } else { 1312 quota_cut_bytes = 0; 1313 } 1314 } 1315 1316 if (un_bh) { 1317 int off; 1318 char *data; 1319 1320 /* 1321 * We are in direct2indirect conversion, so move tail contents 1322 * to the unformatted node 1323 */ 1324 /* 1325 * note, we do the copy before preparing the buffer because we 1326 * don't care about the contents of the unformatted node yet. 1327 * the only thing we really care about is the direct item's 1328 * data is in the unformatted node. 1329 * 1330 * Otherwise, we would have to call 1331 * reiserfs_prepare_for_journal on the unformatted node, 1332 * which might schedule, meaning we'd have to loop all the 1333 * way back up to the start of the while loop. 1334 * 1335 * The unformatted node must be dirtied later on. We can't be 1336 * sure here if the entire tail has been deleted yet. 1337 * 1338 * un_bh is from the page cache (all unformatted nodes are 1339 * from the page cache) and might be a highmem page. So, we 1340 * can't use un_bh->b_data. 1341 * -clm 1342 */ 1343 1344 data = kmap_atomic(un_bh->b_page); 1345 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1)); 1346 memcpy(data + off, 1347 ih_item_body(PATH_PLAST_BUFFER(path), &s_ih), 1348 ret_value); 1349 kunmap_atomic(data); 1350 } 1351 1352 /* Perform balancing after all resources have been collected at once. */ 1353 do_balance(&s_del_balance, NULL, NULL, M_DELETE); 1354 1355 #ifdef REISERQUOTA_DEBUG 1356 reiserfs_debug(sb, REISERFS_DEBUG_CODE, 1357 "reiserquota delete_item(): freeing %u, id=%u type=%c", 1358 quota_cut_bytes, inode->i_uid, head2type(&s_ih)); 1359 #endif 1360 depth = reiserfs_write_unlock_nested(inode->i_sb); 1361 dquot_free_space_nodirty(inode, quota_cut_bytes); 1362 reiserfs_write_lock_nested(inode->i_sb, depth); 1363 1364 /* Return deleted body length */ 1365 return ret_value; 1366 } 1367 1368 /* 1369 * Summary Of Mechanisms For Handling Collisions Between Processes: 1370 * 1371 * deletion of the body of the object is performed by iput(), with the 1372 * result that if multiple processes are operating on a file, the 1373 * deletion of the body of the file is deferred until the last process 1374 * that has an open inode performs its iput(). 1375 * 1376 * writes and truncates are protected from collisions by use of 1377 * semaphores. 1378 * 1379 * creates, linking, and mknod are protected from collisions with other 1380 * processes by making the reiserfs_add_entry() the last step in the 1381 * creation, and then rolling back all changes if there was a collision. 1382 * - Hans 1383 */ 1384 1385 /* this deletes item which never gets split */ 1386 void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, 1387 struct inode *inode, struct reiserfs_key *key) 1388 { 1389 struct super_block *sb = th->t_super; 1390 struct tree_balance tb; 1391 INITIALIZE_PATH(path); 1392 int item_len = 0; 1393 int tb_init = 0; 1394 struct cpu_key cpu_key; 1395 int retval; 1396 int quota_cut_bytes = 0; 1397 1398 BUG_ON(!th->t_trans_id); 1399 1400 le_key2cpu_key(&cpu_key, key); 1401 1402 while (1) { 1403 retval = search_item(th->t_super, &cpu_key, &path); 1404 if (retval == IO_ERROR) { 1405 reiserfs_error(th->t_super, "vs-5350", 1406 "i/o failure occurred trying " 1407 "to delete %K", &cpu_key); 1408 break; 1409 } 1410 if (retval != ITEM_FOUND) { 1411 pathrelse(&path); 1412 /* 1413 * No need for a warning, if there is just no free 1414 * space to insert '..' item into the 1415 * newly-created subdir 1416 */ 1417 if (! 1418 ((unsigned long long) 1419 GET_HASH_VALUE(le_key_k_offset 1420 (le_key_version(key), key)) == 0 1421 && (unsigned long long) 1422 GET_GENERATION_NUMBER(le_key_k_offset 1423 (le_key_version(key), 1424 key)) == 1)) 1425 reiserfs_warning(th->t_super, "vs-5355", 1426 "%k not found", key); 1427 break; 1428 } 1429 if (!tb_init) { 1430 tb_init = 1; 1431 item_len = ih_item_len(tp_item_head(&path)); 1432 init_tb_struct(th, &tb, th->t_super, &path, 1433 -(IH_SIZE + item_len)); 1434 } 1435 quota_cut_bytes = ih_item_len(tp_item_head(&path)); 1436 1437 retval = fix_nodes(M_DELETE, &tb, NULL, NULL); 1438 if (retval == REPEAT_SEARCH) { 1439 PROC_INFO_INC(th->t_super, delete_solid_item_restarted); 1440 continue; 1441 } 1442 1443 if (retval == CARRY_ON) { 1444 do_balance(&tb, NULL, NULL, M_DELETE); 1445 /* 1446 * Should we count quota for item? (we don't 1447 * count quotas for save-links) 1448 */ 1449 if (inode) { 1450 int depth; 1451 #ifdef REISERQUOTA_DEBUG 1452 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, 1453 "reiserquota delete_solid_item(): freeing %u id=%u type=%c", 1454 quota_cut_bytes, inode->i_uid, 1455 key2type(key)); 1456 #endif 1457 depth = reiserfs_write_unlock_nested(sb); 1458 dquot_free_space_nodirty(inode, 1459 quota_cut_bytes); 1460 reiserfs_write_lock_nested(sb, depth); 1461 } 1462 break; 1463 } 1464 1465 /* IO_ERROR, NO_DISK_SPACE, etc */ 1466 reiserfs_warning(th->t_super, "vs-5360", 1467 "could not delete %K due to fix_nodes failure", 1468 &cpu_key); 1469 unfix_nodes(&tb); 1470 break; 1471 } 1472 1473 reiserfs_check_path(&path); 1474 } 1475 1476 int reiserfs_delete_object(struct reiserfs_transaction_handle *th, 1477 struct inode *inode) 1478 { 1479 int err; 1480 inode->i_size = 0; 1481 BUG_ON(!th->t_trans_id); 1482 1483 /* for directory this deletes item containing "." and ".." */ 1484 err = 1485 reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ ); 1486 if (err) 1487 return err; 1488 1489 #if defined( USE_INODE_GENERATION_COUNTER ) 1490 if (!old_format_only(th->t_super)) { 1491 __le32 *inode_generation; 1492 1493 inode_generation = 1494 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation; 1495 le32_add_cpu(inode_generation, 1); 1496 } 1497 /* USE_INODE_GENERATION_COUNTER */ 1498 #endif 1499 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); 1500 1501 return err; 1502 } 1503 1504 static void unmap_buffers(struct page *page, loff_t pos) 1505 { 1506 struct buffer_head *bh; 1507 struct buffer_head *head; 1508 struct buffer_head *next; 1509 unsigned long tail_index; 1510 unsigned long cur_index; 1511 1512 if (page) { 1513 if (page_has_buffers(page)) { 1514 tail_index = pos & (PAGE_SIZE - 1); 1515 cur_index = 0; 1516 head = page_buffers(page); 1517 bh = head; 1518 do { 1519 next = bh->b_this_page; 1520 1521 /* 1522 * we want to unmap the buffers that contain 1523 * the tail, and all the buffers after it 1524 * (since the tail must be at the end of the 1525 * file). We don't want to unmap file data 1526 * before the tail, since it might be dirty 1527 * and waiting to reach disk 1528 */ 1529 cur_index += bh->b_size; 1530 if (cur_index > tail_index) { 1531 reiserfs_unmap_buffer(bh); 1532 } 1533 bh = next; 1534 } while (bh != head); 1535 } 1536 } 1537 } 1538 1539 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, 1540 struct inode *inode, 1541 struct page *page, 1542 struct treepath *path, 1543 const struct cpu_key *item_key, 1544 loff_t new_file_size, char *mode) 1545 { 1546 struct super_block *sb = inode->i_sb; 1547 int block_size = sb->s_blocksize; 1548 int cut_bytes; 1549 BUG_ON(!th->t_trans_id); 1550 BUG_ON(new_file_size != inode->i_size); 1551 1552 /* 1553 * the page being sent in could be NULL if there was an i/o error 1554 * reading in the last block. The user will hit problems trying to 1555 * read the file, but for now we just skip the indirect2direct 1556 */ 1557 if (atomic_read(&inode->i_count) > 1 || 1558 !tail_has_to_be_packed(inode) || 1559 !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) { 1560 /* leave tail in an unformatted node */ 1561 *mode = M_SKIP_BALANCING; 1562 cut_bytes = 1563 block_size - (new_file_size & (block_size - 1)); 1564 pathrelse(path); 1565 return cut_bytes; 1566 } 1567 1568 /* Perform the conversion to a direct_item. */ 1569 return indirect2direct(th, inode, page, path, item_key, 1570 new_file_size, mode); 1571 } 1572 1573 /* 1574 * we did indirect_to_direct conversion. And we have inserted direct 1575 * item successesfully, but there were no disk space to cut unfm 1576 * pointer being converted. Therefore we have to delete inserted 1577 * direct item(s) 1578 */ 1579 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, 1580 struct inode *inode, struct treepath *path) 1581 { 1582 struct cpu_key tail_key; 1583 int tail_len; 1584 int removed; 1585 BUG_ON(!th->t_trans_id); 1586 1587 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); 1588 tail_key.key_length = 4; 1589 1590 tail_len = 1591 (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; 1592 while (tail_len) { 1593 /* look for the last byte of the tail */ 1594 if (search_for_position_by_key(inode->i_sb, &tail_key, path) == 1595 POSITION_NOT_FOUND) 1596 reiserfs_panic(inode->i_sb, "vs-5615", 1597 "found invalid item"); 1598 RFALSE(path->pos_in_item != 1599 ih_item_len(tp_item_head(path)) - 1, 1600 "vs-5616: appended bytes found"); 1601 PATH_LAST_POSITION(path)--; 1602 1603 removed = 1604 reiserfs_delete_item(th, path, &tail_key, inode, 1605 NULL /*unbh not needed */ ); 1606 RFALSE(removed <= 0 1607 || removed > tail_len, 1608 "vs-5617: there was tail %d bytes, removed item length %d bytes", 1609 tail_len, removed); 1610 tail_len -= removed; 1611 set_cpu_key_k_offset(&tail_key, 1612 cpu_key_k_offset(&tail_key) - removed); 1613 } 1614 reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " 1615 "conversion has been rolled back due to " 1616 "lack of disk space"); 1617 mark_inode_dirty(inode); 1618 } 1619 1620 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ 1621 int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, 1622 struct treepath *path, 1623 struct cpu_key *item_key, 1624 struct inode *inode, 1625 struct page *page, loff_t new_file_size) 1626 { 1627 struct super_block *sb = inode->i_sb; 1628 /* 1629 * Every function which is going to call do_balance must first 1630 * create a tree_balance structure. Then it must fill up this 1631 * structure by using the init_tb_struct and fix_nodes functions. 1632 * After that we can make tree balancing. 1633 */ 1634 struct tree_balance s_cut_balance; 1635 struct item_head *p_le_ih; 1636 int cut_size = 0; /* Amount to be cut. */ 1637 int ret_value = CARRY_ON; 1638 int removed = 0; /* Number of the removed unformatted nodes. */ 1639 int is_inode_locked = 0; 1640 char mode; /* Mode of the balance. */ 1641 int retval2 = -1; 1642 int quota_cut_bytes; 1643 loff_t tail_pos = 0; 1644 int depth; 1645 1646 BUG_ON(!th->t_trans_id); 1647 1648 init_tb_struct(th, &s_cut_balance, inode->i_sb, path, 1649 cut_size); 1650 1651 /* 1652 * Repeat this loop until we either cut the item without needing 1653 * to balance, or we fix_nodes without schedule occurring 1654 */ 1655 while (1) { 1656 /* 1657 * Determine the balance mode, position of the first byte to 1658 * be cut, and size to be cut. In case of the indirect item 1659 * free unformatted nodes which are pointed to by the cut 1660 * pointers. 1661 */ 1662 1663 mode = 1664 prepare_for_delete_or_cut(th, inode, path, 1665 item_key, &removed, 1666 &cut_size, new_file_size); 1667 if (mode == M_CONVERT) { 1668 /* 1669 * convert last unformatted node to direct item or 1670 * leave tail in the unformatted node 1671 */ 1672 RFALSE(ret_value != CARRY_ON, 1673 "PAP-5570: can not convert twice"); 1674 1675 ret_value = 1676 maybe_indirect_to_direct(th, inode, page, 1677 path, item_key, 1678 new_file_size, &mode); 1679 if (mode == M_SKIP_BALANCING) 1680 /* tail has been left in the unformatted node */ 1681 return ret_value; 1682 1683 is_inode_locked = 1; 1684 1685 /* 1686 * removing of last unformatted node will 1687 * change value we have to return to truncate. 1688 * Save it 1689 */ 1690 retval2 = ret_value; 1691 1692 /* 1693 * So, we have performed the first part of the 1694 * conversion: 1695 * inserting the new direct item. Now we are 1696 * removing the last unformatted node pointer. 1697 * Set key to search for it. 1698 */ 1699 set_cpu_key_k_type(item_key, TYPE_INDIRECT); 1700 item_key->key_length = 4; 1701 new_file_size -= 1702 (new_file_size & (sb->s_blocksize - 1)); 1703 tail_pos = new_file_size; 1704 set_cpu_key_k_offset(item_key, new_file_size + 1); 1705 if (search_for_position_by_key 1706 (sb, item_key, 1707 path) == POSITION_NOT_FOUND) { 1708 print_block(PATH_PLAST_BUFFER(path), 3, 1709 PATH_LAST_POSITION(path) - 1, 1710 PATH_LAST_POSITION(path) + 1); 1711 reiserfs_panic(sb, "PAP-5580", "item to " 1712 "convert does not exist (%K)", 1713 item_key); 1714 } 1715 continue; 1716 } 1717 if (cut_size == 0) { 1718 pathrelse(path); 1719 return 0; 1720 } 1721 1722 s_cut_balance.insert_size[0] = cut_size; 1723 1724 ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL); 1725 if (ret_value != REPEAT_SEARCH) 1726 break; 1727 1728 PROC_INFO_INC(sb, cut_from_item_restarted); 1729 1730 ret_value = 1731 search_for_position_by_key(sb, item_key, path); 1732 if (ret_value == POSITION_FOUND) 1733 continue; 1734 1735 reiserfs_warning(sb, "PAP-5610", "item %K not found", 1736 item_key); 1737 unfix_nodes(&s_cut_balance); 1738 return (ret_value == IO_ERROR) ? -EIO : -ENOENT; 1739 } /* while */ 1740 1741 /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */ 1742 if (ret_value != CARRY_ON) { 1743 if (is_inode_locked) { 1744 /* 1745 * FIXME: this seems to be not needed: we are always 1746 * able to cut item 1747 */ 1748 indirect_to_direct_roll_back(th, inode, path); 1749 } 1750 if (ret_value == NO_DISK_SPACE) 1751 reiserfs_warning(sb, "reiserfs-5092", 1752 "NO_DISK_SPACE"); 1753 unfix_nodes(&s_cut_balance); 1754 return -EIO; 1755 } 1756 1757 /* go ahead and perform balancing */ 1758 1759 RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode"); 1760 1761 /* Calculate number of bytes that need to be cut from the item. */ 1762 quota_cut_bytes = 1763 (mode == 1764 M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance. 1765 insert_size[0]; 1766 if (retval2 == -1) 1767 ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); 1768 else 1769 ret_value = retval2; 1770 1771 /* 1772 * For direct items, we only change the quota when deleting the last 1773 * item. 1774 */ 1775 p_le_ih = tp_item_head(s_cut_balance.tb_path); 1776 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { 1777 if (mode == M_DELETE && 1778 (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == 1779 1) { 1780 /* FIXME: this is to keep 3.5 happy */ 1781 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 1782 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; 1783 } else { 1784 quota_cut_bytes = 0; 1785 } 1786 } 1787 #ifdef CONFIG_REISERFS_CHECK 1788 if (is_inode_locked) { 1789 struct item_head *le_ih = 1790 tp_item_head(s_cut_balance.tb_path); 1791 /* 1792 * we are going to complete indirect2direct conversion. Make 1793 * sure, that we exactly remove last unformatted node pointer 1794 * of the item 1795 */ 1796 if (!is_indirect_le_ih(le_ih)) 1797 reiserfs_panic(sb, "vs-5652", 1798 "item must be indirect %h", le_ih); 1799 1800 if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) 1801 reiserfs_panic(sb, "vs-5653", "completing " 1802 "indirect2direct conversion indirect " 1803 "item %h being deleted must be of " 1804 "4 byte long", le_ih); 1805 1806 if (mode == M_CUT 1807 && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { 1808 reiserfs_panic(sb, "vs-5654", "can not complete " 1809 "indirect2direct conversion of %h " 1810 "(CUT, insert_size==%d)", 1811 le_ih, s_cut_balance.insert_size[0]); 1812 } 1813 /* 1814 * it would be useful to make sure, that right neighboring 1815 * item is direct item of this file 1816 */ 1817 } 1818 #endif 1819 1820 do_balance(&s_cut_balance, NULL, NULL, mode); 1821 if (is_inode_locked) { 1822 /* 1823 * we've done an indirect->direct conversion. when the 1824 * data block was freed, it was removed from the list of 1825 * blocks that must be flushed before the transaction 1826 * commits, make sure to unmap and invalidate it 1827 */ 1828 unmap_buffers(page, tail_pos); 1829 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 1830 } 1831 #ifdef REISERQUOTA_DEBUG 1832 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 1833 "reiserquota cut_from_item(): freeing %u id=%u type=%c", 1834 quota_cut_bytes, inode->i_uid, '?'); 1835 #endif 1836 depth = reiserfs_write_unlock_nested(sb); 1837 dquot_free_space_nodirty(inode, quota_cut_bytes); 1838 reiserfs_write_lock_nested(sb, depth); 1839 return ret_value; 1840 } 1841 1842 static void truncate_directory(struct reiserfs_transaction_handle *th, 1843 struct inode *inode) 1844 { 1845 BUG_ON(!th->t_trans_id); 1846 if (inode->i_nlink) 1847 reiserfs_error(inode->i_sb, "vs-5655", "link count != 0"); 1848 1849 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); 1850 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); 1851 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode)); 1852 reiserfs_update_sd(th, inode); 1853 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET); 1854 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); 1855 } 1856 1857 /* 1858 * Truncate file to the new size. Note, this must be called with a 1859 * transaction already started 1860 */ 1861 int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, 1862 struct inode *inode, /* ->i_size contains new size */ 1863 struct page *page, /* up to date for last block */ 1864 /* 1865 * when it is called by file_release to convert 1866 * the tail - no timestamps should be updated 1867 */ 1868 int update_timestamps 1869 ) 1870 { 1871 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ 1872 struct item_head *p_le_ih; /* Pointer to an item header. */ 1873 1874 /* Key to search for a previous file item. */ 1875 struct cpu_key s_item_key; 1876 loff_t file_size, /* Old file size. */ 1877 new_file_size; /* New file size. */ 1878 int deleted; /* Number of deleted or truncated bytes. */ 1879 int retval; 1880 int err = 0; 1881 1882 BUG_ON(!th->t_trans_id); 1883 if (! 1884 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 1885 || S_ISLNK(inode->i_mode))) 1886 return 0; 1887 1888 /* deletion of directory - no need to update timestamps */ 1889 if (S_ISDIR(inode->i_mode)) { 1890 truncate_directory(th, inode); 1891 return 0; 1892 } 1893 1894 /* Get new file size. */ 1895 new_file_size = inode->i_size; 1896 1897 /* FIXME: note, that key type is unimportant here */ 1898 make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), 1899 TYPE_DIRECT, 3); 1900 1901 retval = 1902 search_for_position_by_key(inode->i_sb, &s_item_key, 1903 &s_search_path); 1904 if (retval == IO_ERROR) { 1905 reiserfs_error(inode->i_sb, "vs-5657", 1906 "i/o failure occurred trying to truncate %K", 1907 &s_item_key); 1908 err = -EIO; 1909 goto out; 1910 } 1911 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { 1912 reiserfs_error(inode->i_sb, "PAP-5660", 1913 "wrong result %d of search for %K", retval, 1914 &s_item_key); 1915 1916 err = -EIO; 1917 goto out; 1918 } 1919 1920 s_search_path.pos_in_item--; 1921 1922 /* Get real file size (total length of all file items) */ 1923 p_le_ih = tp_item_head(&s_search_path); 1924 if (is_statdata_le_ih(p_le_ih)) 1925 file_size = 0; 1926 else { 1927 loff_t offset = le_ih_k_offset(p_le_ih); 1928 int bytes = 1929 op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); 1930 1931 /* 1932 * this may mismatch with real file size: if last direct item 1933 * had no padding zeros and last unformatted node had no free 1934 * space, this file would have this file size 1935 */ 1936 file_size = offset + bytes - 1; 1937 } 1938 /* 1939 * are we doing a full truncate or delete, if so 1940 * kick in the reada code 1941 */ 1942 if (new_file_size == 0) 1943 s_search_path.reada = PATH_READA | PATH_READA_BACK; 1944 1945 if (file_size == 0 || file_size < new_file_size) { 1946 goto update_and_out; 1947 } 1948 1949 /* Update key to search for the last file item. */ 1950 set_cpu_key_k_offset(&s_item_key, file_size); 1951 1952 do { 1953 /* Cut or delete file item. */ 1954 deleted = 1955 reiserfs_cut_from_item(th, &s_search_path, &s_item_key, 1956 inode, page, new_file_size); 1957 if (deleted < 0) { 1958 reiserfs_warning(inode->i_sb, "vs-5665", 1959 "reiserfs_cut_from_item failed"); 1960 reiserfs_check_path(&s_search_path); 1961 return 0; 1962 } 1963 1964 RFALSE(deleted > file_size, 1965 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", 1966 deleted, file_size, &s_item_key); 1967 1968 /* Change key to search the last file item. */ 1969 file_size -= deleted; 1970 1971 set_cpu_key_k_offset(&s_item_key, file_size); 1972 1973 /* 1974 * While there are bytes to truncate and previous 1975 * file item is presented in the tree. 1976 */ 1977 1978 /* 1979 * This loop could take a really long time, and could log 1980 * many more blocks than a transaction can hold. So, we do 1981 * a polite journal end here, and if the transaction needs 1982 * ending, we make sure the file is consistent before ending 1983 * the current trans and starting a new one 1984 */ 1985 if (journal_transaction_should_end(th, 0) || 1986 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { 1987 pathrelse(&s_search_path); 1988 1989 if (update_timestamps) { 1990 inode->i_mtime = CURRENT_TIME_SEC; 1991 inode->i_ctime = CURRENT_TIME_SEC; 1992 } 1993 reiserfs_update_sd(th, inode); 1994 1995 err = journal_end(th); 1996 if (err) 1997 goto out; 1998 err = journal_begin(th, inode->i_sb, 1999 JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ; 2000 if (err) 2001 goto out; 2002 reiserfs_update_inode_transaction(inode); 2003 } 2004 } while (file_size > ROUND_UP(new_file_size) && 2005 search_for_position_by_key(inode->i_sb, &s_item_key, 2006 &s_search_path) == POSITION_FOUND); 2007 2008 RFALSE(file_size > ROUND_UP(new_file_size), 2009 "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d", 2010 new_file_size, file_size, s_item_key.on_disk_key.k_objectid); 2011 2012 update_and_out: 2013 if (update_timestamps) { 2014 /* this is truncate, not file closing */ 2015 inode->i_mtime = CURRENT_TIME_SEC; 2016 inode->i_ctime = CURRENT_TIME_SEC; 2017 } 2018 reiserfs_update_sd(th, inode); 2019 2020 out: 2021 pathrelse(&s_search_path); 2022 return err; 2023 } 2024 2025 #ifdef CONFIG_REISERFS_CHECK 2026 /* this makes sure, that we __append__, not overwrite or add holes */ 2027 static void check_research_for_paste(struct treepath *path, 2028 const struct cpu_key *key) 2029 { 2030 struct item_head *found_ih = tp_item_head(path); 2031 2032 if (is_direct_le_ih(found_ih)) { 2033 if (le_ih_k_offset(found_ih) + 2034 op_bytes_number(found_ih, 2035 get_last_bh(path)->b_size) != 2036 cpu_key_k_offset(key) 2037 || op_bytes_number(found_ih, 2038 get_last_bh(path)->b_size) != 2039 pos_in_item(path)) 2040 reiserfs_panic(NULL, "PAP-5720", "found direct item " 2041 "%h or position (%d) does not match " 2042 "to key %K", found_ih, 2043 pos_in_item(path), key); 2044 } 2045 if (is_indirect_le_ih(found_ih)) { 2046 if (le_ih_k_offset(found_ih) + 2047 op_bytes_number(found_ih, 2048 get_last_bh(path)->b_size) != 2049 cpu_key_k_offset(key) 2050 || I_UNFM_NUM(found_ih) != pos_in_item(path) 2051 || get_ih_free_space(found_ih) != 0) 2052 reiserfs_panic(NULL, "PAP-5730", "found indirect " 2053 "item (%h) or position (%d) does not " 2054 "match to key (%K)", 2055 found_ih, pos_in_item(path), key); 2056 } 2057 } 2058 #endif /* config reiserfs check */ 2059 2060 /* 2061 * Paste bytes to the existing item. 2062 * Returns bytes number pasted into the item. 2063 */ 2064 int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, 2065 /* Path to the pasted item. */ 2066 struct treepath *search_path, 2067 /* Key to search for the needed item. */ 2068 const struct cpu_key *key, 2069 /* Inode item belongs to */ 2070 struct inode *inode, 2071 /* Pointer to the bytes to paste. */ 2072 const char *body, 2073 /* Size of pasted bytes. */ 2074 int pasted_size) 2075 { 2076 struct super_block *sb = inode->i_sb; 2077 struct tree_balance s_paste_balance; 2078 int retval; 2079 int fs_gen; 2080 int depth; 2081 2082 BUG_ON(!th->t_trans_id); 2083 2084 fs_gen = get_generation(inode->i_sb); 2085 2086 #ifdef REISERQUOTA_DEBUG 2087 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 2088 "reiserquota paste_into_item(): allocating %u id=%u type=%c", 2089 pasted_size, inode->i_uid, 2090 key2type(&key->on_disk_key)); 2091 #endif 2092 2093 depth = reiserfs_write_unlock_nested(sb); 2094 retval = dquot_alloc_space_nodirty(inode, pasted_size); 2095 reiserfs_write_lock_nested(sb, depth); 2096 if (retval) { 2097 pathrelse(search_path); 2098 return retval; 2099 } 2100 init_tb_struct(th, &s_paste_balance, th->t_super, search_path, 2101 pasted_size); 2102 #ifdef DISPLACE_NEW_PACKING_LOCALITIES 2103 s_paste_balance.key = key->on_disk_key; 2104 #endif 2105 2106 /* DQUOT_* can schedule, must check before the fix_nodes */ 2107 if (fs_changed(fs_gen, inode->i_sb)) { 2108 goto search_again; 2109 } 2110 2111 while ((retval = 2112 fix_nodes(M_PASTE, &s_paste_balance, NULL, 2113 body)) == REPEAT_SEARCH) { 2114 search_again: 2115 /* file system changed while we were in the fix_nodes */ 2116 PROC_INFO_INC(th->t_super, paste_into_item_restarted); 2117 retval = 2118 search_for_position_by_key(th->t_super, key, 2119 search_path); 2120 if (retval == IO_ERROR) { 2121 retval = -EIO; 2122 goto error_out; 2123 } 2124 if (retval == POSITION_FOUND) { 2125 reiserfs_warning(inode->i_sb, "PAP-5710", 2126 "entry or pasted byte (%K) exists", 2127 key); 2128 retval = -EEXIST; 2129 goto error_out; 2130 } 2131 #ifdef CONFIG_REISERFS_CHECK 2132 check_research_for_paste(search_path, key); 2133 #endif 2134 } 2135 2136 /* 2137 * Perform balancing after all resources are collected by fix_nodes, 2138 * and accessing them will not risk triggering schedule. 2139 */ 2140 if (retval == CARRY_ON) { 2141 do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); 2142 return 0; 2143 } 2144 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2145 error_out: 2146 /* this also releases the path */ 2147 unfix_nodes(&s_paste_balance); 2148 #ifdef REISERQUOTA_DEBUG 2149 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 2150 "reiserquota paste_into_item(): freeing %u id=%u type=%c", 2151 pasted_size, inode->i_uid, 2152 key2type(&key->on_disk_key)); 2153 #endif 2154 depth = reiserfs_write_unlock_nested(sb); 2155 dquot_free_space_nodirty(inode, pasted_size); 2156 reiserfs_write_lock_nested(sb, depth); 2157 return retval; 2158 } 2159 2160 /* 2161 * Insert new item into the buffer at the path. 2162 * th - active transaction handle 2163 * path - path to the inserted item 2164 * ih - pointer to the item header to insert 2165 * body - pointer to the bytes to insert 2166 */ 2167 int reiserfs_insert_item(struct reiserfs_transaction_handle *th, 2168 struct treepath *path, const struct cpu_key *key, 2169 struct item_head *ih, struct inode *inode, 2170 const char *body) 2171 { 2172 struct tree_balance s_ins_balance; 2173 int retval; 2174 int fs_gen = 0; 2175 int quota_bytes = 0; 2176 2177 BUG_ON(!th->t_trans_id); 2178 2179 if (inode) { /* Do we count quotas for item? */ 2180 int depth; 2181 fs_gen = get_generation(inode->i_sb); 2182 quota_bytes = ih_item_len(ih); 2183 2184 /* 2185 * hack so the quota code doesn't have to guess 2186 * if the file has a tail, links are always tails, 2187 * so there's no guessing needed 2188 */ 2189 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) 2190 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; 2191 #ifdef REISERQUOTA_DEBUG 2192 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 2193 "reiserquota insert_item(): allocating %u id=%u type=%c", 2194 quota_bytes, inode->i_uid, head2type(ih)); 2195 #endif 2196 /* 2197 * We can't dirty inode here. It would be immediately 2198 * written but appropriate stat item isn't inserted yet... 2199 */ 2200 depth = reiserfs_write_unlock_nested(inode->i_sb); 2201 retval = dquot_alloc_space_nodirty(inode, quota_bytes); 2202 reiserfs_write_lock_nested(inode->i_sb, depth); 2203 if (retval) { 2204 pathrelse(path); 2205 return retval; 2206 } 2207 } 2208 init_tb_struct(th, &s_ins_balance, th->t_super, path, 2209 IH_SIZE + ih_item_len(ih)); 2210 #ifdef DISPLACE_NEW_PACKING_LOCALITIES 2211 s_ins_balance.key = key->on_disk_key; 2212 #endif 2213 /* 2214 * DQUOT_* can schedule, must check to be sure calling 2215 * fix_nodes is safe 2216 */ 2217 if (inode && fs_changed(fs_gen, inode->i_sb)) { 2218 goto search_again; 2219 } 2220 2221 while ((retval = 2222 fix_nodes(M_INSERT, &s_ins_balance, ih, 2223 body)) == REPEAT_SEARCH) { 2224 search_again: 2225 /* file system changed while we were in the fix_nodes */ 2226 PROC_INFO_INC(th->t_super, insert_item_restarted); 2227 retval = search_item(th->t_super, key, path); 2228 if (retval == IO_ERROR) { 2229 retval = -EIO; 2230 goto error_out; 2231 } 2232 if (retval == ITEM_FOUND) { 2233 reiserfs_warning(th->t_super, "PAP-5760", 2234 "key %K already exists in the tree", 2235 key); 2236 retval = -EEXIST; 2237 goto error_out; 2238 } 2239 } 2240 2241 /* make balancing after all resources will be collected at a time */ 2242 if (retval == CARRY_ON) { 2243 do_balance(&s_ins_balance, ih, body, M_INSERT); 2244 return 0; 2245 } 2246 2247 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2248 error_out: 2249 /* also releases the path */ 2250 unfix_nodes(&s_ins_balance); 2251 #ifdef REISERQUOTA_DEBUG 2252 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, 2253 "reiserquota insert_item(): freeing %u id=%u type=%c", 2254 quota_bytes, inode->i_uid, head2type(ih)); 2255 #endif 2256 if (inode) { 2257 int depth = reiserfs_write_unlock_nested(inode->i_sb); 2258 dquot_free_space_nodirty(inode, quota_bytes); 2259 reiserfs_write_lock_nested(inode->i_sb, depth); 2260 } 2261 return retval; 2262 } 2263