1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * alloc.c 5 * 6 * Extent allocs and frees 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/highmem.h> 30 #include <linux/swap.h> 31 32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "alloc.h" 38 #include "aops.h" 39 #include "dlmglue.h" 40 #include "extent_map.h" 41 #include "inode.h" 42 #include "journal.h" 43 #include "localalloc.h" 44 #include "suballoc.h" 45 #include "sysfile.h" 46 #include "file.h" 47 #include "super.h" 48 #include "uptodate.h" 49 50 #include "buffer_head_io.h" 51 52 /* 53 * ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract 54 * the b-tree operations in ocfs2. Now all the b-tree operations are not 55 * limited to ocfs2_dinode only. Any data which need to allocate clusters 56 * to store can use b-tree. And it only needs to implement its ocfs2_extent_tree 57 * and operation. 58 * 59 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a 60 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree 61 * functions. 62 * ocfs2_extent_tree_operations abstract the normal operations we do for 63 * the root of extent b-tree. 64 */ 65 struct ocfs2_extent_tree; 66 67 struct ocfs2_extent_tree_operations { 68 void (*set_last_eb_blk) (struct ocfs2_extent_tree *et, u64 blkno); 69 u64 (*get_last_eb_blk) (struct ocfs2_extent_tree *et); 70 void (*update_clusters) (struct inode *inode, 71 struct ocfs2_extent_tree *et, 72 u32 new_clusters); 73 int (*sanity_check) (struct inode *inode, struct ocfs2_extent_tree *et); 74 }; 75 76 struct ocfs2_extent_tree { 77 enum ocfs2_extent_tree_type type; 78 struct ocfs2_extent_tree_operations *eops; 79 struct buffer_head *root_bh; 80 struct ocfs2_extent_list *root_el; 81 }; 82 83 static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, 84 u64 blkno) 85 { 86 struct ocfs2_dinode *di = (struct ocfs2_dinode *)et->root_bh->b_data; 87 88 BUG_ON(et->type != OCFS2_DINODE_EXTENT); 89 di->i_last_eb_blk = cpu_to_le64(blkno); 90 } 91 92 static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et) 93 { 94 struct ocfs2_dinode *di = (struct ocfs2_dinode *)et->root_bh->b_data; 95 96 BUG_ON(et->type != OCFS2_DINODE_EXTENT); 97 return le64_to_cpu(di->i_last_eb_blk); 98 } 99 100 static void ocfs2_dinode_update_clusters(struct inode *inode, 101 struct ocfs2_extent_tree *et, 102 u32 clusters) 103 { 104 struct ocfs2_dinode *di = 105 (struct ocfs2_dinode *)et->root_bh->b_data; 106 107 le32_add_cpu(&di->i_clusters, clusters); 108 spin_lock(&OCFS2_I(inode)->ip_lock); 109 OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); 110 spin_unlock(&OCFS2_I(inode)->ip_lock); 111 } 112 113 static int ocfs2_dinode_sanity_check(struct inode *inode, 114 struct ocfs2_extent_tree *et) 115 { 116 int ret = 0; 117 struct ocfs2_dinode *di; 118 119 BUG_ON(et->type != OCFS2_DINODE_EXTENT); 120 121 di = (struct ocfs2_dinode *)et->root_bh->b_data; 122 if (!OCFS2_IS_VALID_DINODE(di)) { 123 ret = -EIO; 124 ocfs2_error(inode->i_sb, 125 "Inode %llu has invalid path root", 126 (unsigned long long)OCFS2_I(inode)->ip_blkno); 127 } 128 129 return ret; 130 } 131 132 static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { 133 .set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, 134 .get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, 135 .update_clusters = ocfs2_dinode_update_clusters, 136 .sanity_check = ocfs2_dinode_sanity_check, 137 }; 138 139 static struct ocfs2_extent_tree* 140 ocfs2_new_extent_tree(struct buffer_head *bh, 141 enum ocfs2_extent_tree_type et_type) 142 { 143 struct ocfs2_extent_tree *et; 144 145 et = kzalloc(sizeof(*et), GFP_NOFS); 146 if (!et) 147 return NULL; 148 149 et->type = et_type; 150 get_bh(bh); 151 et->root_bh = bh; 152 153 /* current we only support dinode extent. */ 154 BUG_ON(et->type != OCFS2_DINODE_EXTENT); 155 if (et_type == OCFS2_DINODE_EXTENT) { 156 et->root_el = &((struct ocfs2_dinode *)bh->b_data)->id2.i_list; 157 et->eops = &ocfs2_dinode_et_ops; 158 } 159 160 return et; 161 } 162 163 static void ocfs2_free_extent_tree(struct ocfs2_extent_tree *et) 164 { 165 if (et) { 166 brelse(et->root_bh); 167 kfree(et); 168 } 169 } 170 171 static inline void ocfs2_set_last_eb_blk(struct ocfs2_extent_tree *et, 172 u64 new_last_eb_blk) 173 { 174 et->eops->set_last_eb_blk(et, new_last_eb_blk); 175 } 176 177 static inline u64 ocfs2_get_last_eb_blk(struct ocfs2_extent_tree *et) 178 { 179 return et->eops->get_last_eb_blk(et); 180 } 181 182 static inline void ocfs2_update_clusters(struct inode *inode, 183 struct ocfs2_extent_tree *et, 184 u32 clusters) 185 { 186 et->eops->update_clusters(inode, et, clusters); 187 } 188 189 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 190 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 191 struct ocfs2_extent_block *eb); 192 193 /* 194 * Structures which describe a path through a btree, and functions to 195 * manipulate them. 196 * 197 * The idea here is to be as generic as possible with the tree 198 * manipulation code. 199 */ 200 struct ocfs2_path_item { 201 struct buffer_head *bh; 202 struct ocfs2_extent_list *el; 203 }; 204 205 #define OCFS2_MAX_PATH_DEPTH 5 206 207 struct ocfs2_path { 208 int p_tree_depth; 209 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; 210 }; 211 212 #define path_root_bh(_path) ((_path)->p_node[0].bh) 213 #define path_root_el(_path) ((_path)->p_node[0].el) 214 #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) 215 #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) 216 #define path_num_items(_path) ((_path)->p_tree_depth + 1) 217 218 /* 219 * Reset the actual path elements so that we can re-use the structure 220 * to build another path. Generally, this involves freeing the buffer 221 * heads. 222 */ 223 static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) 224 { 225 int i, start = 0, depth = 0; 226 struct ocfs2_path_item *node; 227 228 if (keep_root) 229 start = 1; 230 231 for(i = start; i < path_num_items(path); i++) { 232 node = &path->p_node[i]; 233 234 brelse(node->bh); 235 node->bh = NULL; 236 node->el = NULL; 237 } 238 239 /* 240 * Tree depth may change during truncate, or insert. If we're 241 * keeping the root extent list, then make sure that our path 242 * structure reflects the proper depth. 243 */ 244 if (keep_root) 245 depth = le16_to_cpu(path_root_el(path)->l_tree_depth); 246 247 path->p_tree_depth = depth; 248 } 249 250 static void ocfs2_free_path(struct ocfs2_path *path) 251 { 252 if (path) { 253 ocfs2_reinit_path(path, 0); 254 kfree(path); 255 } 256 } 257 258 /* 259 * All the elements of src into dest. After this call, src could be freed 260 * without affecting dest. 261 * 262 * Both paths should have the same root. Any non-root elements of dest 263 * will be freed. 264 */ 265 static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) 266 { 267 int i; 268 269 BUG_ON(path_root_bh(dest) != path_root_bh(src)); 270 BUG_ON(path_root_el(dest) != path_root_el(src)); 271 272 ocfs2_reinit_path(dest, 1); 273 274 for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { 275 dest->p_node[i].bh = src->p_node[i].bh; 276 dest->p_node[i].el = src->p_node[i].el; 277 278 if (dest->p_node[i].bh) 279 get_bh(dest->p_node[i].bh); 280 } 281 } 282 283 /* 284 * Make the *dest path the same as src and re-initialize src path to 285 * have a root only. 286 */ 287 static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src) 288 { 289 int i; 290 291 BUG_ON(path_root_bh(dest) != path_root_bh(src)); 292 293 for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { 294 brelse(dest->p_node[i].bh); 295 296 dest->p_node[i].bh = src->p_node[i].bh; 297 dest->p_node[i].el = src->p_node[i].el; 298 299 src->p_node[i].bh = NULL; 300 src->p_node[i].el = NULL; 301 } 302 } 303 304 /* 305 * Insert an extent block at given index. 306 * 307 * This will not take an additional reference on eb_bh. 308 */ 309 static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index, 310 struct buffer_head *eb_bh) 311 { 312 struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)eb_bh->b_data; 313 314 /* 315 * Right now, no root bh is an extent block, so this helps 316 * catch code errors with dinode trees. The assertion can be 317 * safely removed if we ever need to insert extent block 318 * structures at the root. 319 */ 320 BUG_ON(index == 0); 321 322 path->p_node[index].bh = eb_bh; 323 path->p_node[index].el = &eb->h_list; 324 } 325 326 static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, 327 struct ocfs2_extent_list *root_el) 328 { 329 struct ocfs2_path *path; 330 331 BUG_ON(le16_to_cpu(root_el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH); 332 333 path = kzalloc(sizeof(*path), GFP_NOFS); 334 if (path) { 335 path->p_tree_depth = le16_to_cpu(root_el->l_tree_depth); 336 get_bh(root_bh); 337 path_root_bh(path) = root_bh; 338 path_root_el(path) = root_el; 339 } 340 341 return path; 342 } 343 344 /* 345 * Convenience function to journal all components in a path. 346 */ 347 static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, 348 struct ocfs2_path *path) 349 { 350 int i, ret = 0; 351 352 if (!path) 353 goto out; 354 355 for(i = 0; i < path_num_items(path); i++) { 356 ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh, 357 OCFS2_JOURNAL_ACCESS_WRITE); 358 if (ret < 0) { 359 mlog_errno(ret); 360 goto out; 361 } 362 } 363 364 out: 365 return ret; 366 } 367 368 /* 369 * Return the index of the extent record which contains cluster #v_cluster. 370 * -1 is returned if it was not found. 371 * 372 * Should work fine on interior and exterior nodes. 373 */ 374 int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster) 375 { 376 int ret = -1; 377 int i; 378 struct ocfs2_extent_rec *rec; 379 u32 rec_end, rec_start, clusters; 380 381 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 382 rec = &el->l_recs[i]; 383 384 rec_start = le32_to_cpu(rec->e_cpos); 385 clusters = ocfs2_rec_clusters(el, rec); 386 387 rec_end = rec_start + clusters; 388 389 if (v_cluster >= rec_start && v_cluster < rec_end) { 390 ret = i; 391 break; 392 } 393 } 394 395 return ret; 396 } 397 398 enum ocfs2_contig_type { 399 CONTIG_NONE = 0, 400 CONTIG_LEFT, 401 CONTIG_RIGHT, 402 CONTIG_LEFTRIGHT, 403 }; 404 405 406 /* 407 * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and 408 * ocfs2_extent_contig only work properly against leaf nodes! 409 */ 410 static int ocfs2_block_extent_contig(struct super_block *sb, 411 struct ocfs2_extent_rec *ext, 412 u64 blkno) 413 { 414 u64 blk_end = le64_to_cpu(ext->e_blkno); 415 416 blk_end += ocfs2_clusters_to_blocks(sb, 417 le16_to_cpu(ext->e_leaf_clusters)); 418 419 return blkno == blk_end; 420 } 421 422 static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left, 423 struct ocfs2_extent_rec *right) 424 { 425 u32 left_range; 426 427 left_range = le32_to_cpu(left->e_cpos) + 428 le16_to_cpu(left->e_leaf_clusters); 429 430 return (left_range == le32_to_cpu(right->e_cpos)); 431 } 432 433 static enum ocfs2_contig_type 434 ocfs2_extent_contig(struct inode *inode, 435 struct ocfs2_extent_rec *ext, 436 struct ocfs2_extent_rec *insert_rec) 437 { 438 u64 blkno = le64_to_cpu(insert_rec->e_blkno); 439 440 /* 441 * Refuse to coalesce extent records with different flag 442 * fields - we don't want to mix unwritten extents with user 443 * data. 444 */ 445 if (ext->e_flags != insert_rec->e_flags) 446 return CONTIG_NONE; 447 448 if (ocfs2_extents_adjacent(ext, insert_rec) && 449 ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) 450 return CONTIG_RIGHT; 451 452 blkno = le64_to_cpu(ext->e_blkno); 453 if (ocfs2_extents_adjacent(insert_rec, ext) && 454 ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno)) 455 return CONTIG_LEFT; 456 457 return CONTIG_NONE; 458 } 459 460 /* 461 * NOTE: We can have pretty much any combination of contiguousness and 462 * appending. 463 * 464 * The usefulness of APPEND_TAIL is more in that it lets us know that 465 * we'll have to update the path to that leaf. 466 */ 467 enum ocfs2_append_type { 468 APPEND_NONE = 0, 469 APPEND_TAIL, 470 }; 471 472 enum ocfs2_split_type { 473 SPLIT_NONE = 0, 474 SPLIT_LEFT, 475 SPLIT_RIGHT, 476 }; 477 478 struct ocfs2_insert_type { 479 enum ocfs2_split_type ins_split; 480 enum ocfs2_append_type ins_appending; 481 enum ocfs2_contig_type ins_contig; 482 int ins_contig_index; 483 int ins_tree_depth; 484 }; 485 486 struct ocfs2_merge_ctxt { 487 enum ocfs2_contig_type c_contig_type; 488 int c_has_empty_extent; 489 int c_split_covers_rec; 490 }; 491 492 /* 493 * How many free extents have we got before we need more meta data? 494 */ 495 int ocfs2_num_free_extents(struct ocfs2_super *osb, 496 struct inode *inode, 497 struct buffer_head *root_bh, 498 enum ocfs2_extent_tree_type type) 499 { 500 int retval; 501 struct ocfs2_extent_list *el = NULL; 502 struct ocfs2_extent_block *eb; 503 struct buffer_head *eb_bh = NULL; 504 u64 last_eb_blk = 0; 505 506 mlog_entry_void(); 507 508 if (type == OCFS2_DINODE_EXTENT) { 509 struct ocfs2_dinode *fe = 510 (struct ocfs2_dinode *)root_bh->b_data; 511 if (!OCFS2_IS_VALID_DINODE(fe)) { 512 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 513 retval = -EIO; 514 goto bail; 515 } 516 517 if (fe->i_last_eb_blk) 518 last_eb_blk = le64_to_cpu(fe->i_last_eb_blk); 519 el = &fe->id2.i_list; 520 } 521 522 if (last_eb_blk) { 523 retval = ocfs2_read_block(osb, last_eb_blk, 524 &eb_bh, OCFS2_BH_CACHED, inode); 525 if (retval < 0) { 526 mlog_errno(retval); 527 goto bail; 528 } 529 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 530 el = &eb->h_list; 531 } 532 533 BUG_ON(el->l_tree_depth != 0); 534 535 retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 536 bail: 537 if (eb_bh) 538 brelse(eb_bh); 539 540 mlog_exit(retval); 541 return retval; 542 } 543 544 /* expects array to already be allocated 545 * 546 * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 547 * l_count for you 548 */ 549 static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 550 handle_t *handle, 551 struct inode *inode, 552 int wanted, 553 struct ocfs2_alloc_context *meta_ac, 554 struct buffer_head *bhs[]) 555 { 556 int count, status, i; 557 u16 suballoc_bit_start; 558 u32 num_got; 559 u64 first_blkno; 560 struct ocfs2_extent_block *eb; 561 562 mlog_entry_void(); 563 564 count = 0; 565 while (count < wanted) { 566 status = ocfs2_claim_metadata(osb, 567 handle, 568 meta_ac, 569 wanted - count, 570 &suballoc_bit_start, 571 &num_got, 572 &first_blkno); 573 if (status < 0) { 574 mlog_errno(status); 575 goto bail; 576 } 577 578 for(i = count; i < (num_got + count); i++) { 579 bhs[i] = sb_getblk(osb->sb, first_blkno); 580 if (bhs[i] == NULL) { 581 status = -EIO; 582 mlog_errno(status); 583 goto bail; 584 } 585 ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 586 587 status = ocfs2_journal_access(handle, inode, bhs[i], 588 OCFS2_JOURNAL_ACCESS_CREATE); 589 if (status < 0) { 590 mlog_errno(status); 591 goto bail; 592 } 593 594 memset(bhs[i]->b_data, 0, osb->sb->s_blocksize); 595 eb = (struct ocfs2_extent_block *) bhs[i]->b_data; 596 /* Ok, setup the minimal stuff here. */ 597 strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 598 eb->h_blkno = cpu_to_le64(first_blkno); 599 eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 600 eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 601 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 602 eb->h_list.l_count = 603 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 604 605 suballoc_bit_start++; 606 first_blkno++; 607 608 /* We'll also be dirtied by the caller, so 609 * this isn't absolutely necessary. */ 610 status = ocfs2_journal_dirty(handle, bhs[i]); 611 if (status < 0) { 612 mlog_errno(status); 613 goto bail; 614 } 615 } 616 617 count += num_got; 618 } 619 620 status = 0; 621 bail: 622 if (status < 0) { 623 for(i = 0; i < wanted; i++) { 624 if (bhs[i]) 625 brelse(bhs[i]); 626 bhs[i] = NULL; 627 } 628 } 629 mlog_exit(status); 630 return status; 631 } 632 633 /* 634 * Helper function for ocfs2_add_branch() and ocfs2_shift_tree_depth(). 635 * 636 * Returns the sum of the rightmost extent rec logical offset and 637 * cluster count. 638 * 639 * ocfs2_add_branch() uses this to determine what logical cluster 640 * value should be populated into the leftmost new branch records. 641 * 642 * ocfs2_shift_tree_depth() uses this to determine the # clusters 643 * value for the new topmost tree record. 644 */ 645 static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) 646 { 647 int i; 648 649 i = le16_to_cpu(el->l_next_free_rec) - 1; 650 651 return le32_to_cpu(el->l_recs[i].e_cpos) + 652 ocfs2_rec_clusters(el, &el->l_recs[i]); 653 } 654 655 /* 656 * Add an entire tree branch to our inode. eb_bh is the extent block 657 * to start at, if we don't want to start the branch at the dinode 658 * structure. 659 * 660 * last_eb_bh is required as we have to update it's next_leaf pointer 661 * for the new last extent block. 662 * 663 * the new branch will be 'empty' in the sense that every block will 664 * contain a single record with cluster count == 0. 665 */ 666 static int ocfs2_add_branch(struct ocfs2_super *osb, 667 handle_t *handle, 668 struct inode *inode, 669 struct ocfs2_extent_tree *et, 670 struct buffer_head *eb_bh, 671 struct buffer_head **last_eb_bh, 672 struct ocfs2_alloc_context *meta_ac) 673 { 674 int status, new_blocks, i; 675 u64 next_blkno, new_last_eb_blk; 676 struct buffer_head *bh; 677 struct buffer_head **new_eb_bhs = NULL; 678 struct ocfs2_extent_block *eb; 679 struct ocfs2_extent_list *eb_el; 680 struct ocfs2_extent_list *el; 681 u32 new_cpos; 682 683 mlog_entry_void(); 684 685 BUG_ON(!last_eb_bh || !*last_eb_bh); 686 687 if (eb_bh) { 688 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 689 el = &eb->h_list; 690 } else 691 el = et->root_el; 692 693 /* we never add a branch to a leaf. */ 694 BUG_ON(!el->l_tree_depth); 695 696 new_blocks = le16_to_cpu(el->l_tree_depth); 697 698 /* allocate the number of new eb blocks we need */ 699 new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), 700 GFP_KERNEL); 701 if (!new_eb_bhs) { 702 status = -ENOMEM; 703 mlog_errno(status); 704 goto bail; 705 } 706 707 status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 708 meta_ac, new_eb_bhs); 709 if (status < 0) { 710 mlog_errno(status); 711 goto bail; 712 } 713 714 eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; 715 new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); 716 717 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 718 * linked with the rest of the tree. 719 * conversly, new_eb_bhs[0] is the new bottommost leaf. 720 * 721 * when we leave the loop, new_last_eb_blk will point to the 722 * newest leaf, and next_blkno will point to the topmost extent 723 * block. */ 724 next_blkno = new_last_eb_blk = 0; 725 for(i = 0; i < new_blocks; i++) { 726 bh = new_eb_bhs[i]; 727 eb = (struct ocfs2_extent_block *) bh->b_data; 728 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 729 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 730 status = -EIO; 731 goto bail; 732 } 733 eb_el = &eb->h_list; 734 735 status = ocfs2_journal_access(handle, inode, bh, 736 OCFS2_JOURNAL_ACCESS_CREATE); 737 if (status < 0) { 738 mlog_errno(status); 739 goto bail; 740 } 741 742 eb->h_next_leaf_blk = 0; 743 eb_el->l_tree_depth = cpu_to_le16(i); 744 eb_el->l_next_free_rec = cpu_to_le16(1); 745 /* 746 * This actually counts as an empty extent as 747 * c_clusters == 0 748 */ 749 eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos); 750 eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); 751 /* 752 * eb_el isn't always an interior node, but even leaf 753 * nodes want a zero'd flags and reserved field so 754 * this gets the whole 32 bits regardless of use. 755 */ 756 eb_el->l_recs[0].e_int_clusters = cpu_to_le32(0); 757 if (!eb_el->l_tree_depth) 758 new_last_eb_blk = le64_to_cpu(eb->h_blkno); 759 760 status = ocfs2_journal_dirty(handle, bh); 761 if (status < 0) { 762 mlog_errno(status); 763 goto bail; 764 } 765 766 next_blkno = le64_to_cpu(eb->h_blkno); 767 } 768 769 /* This is a bit hairy. We want to update up to three blocks 770 * here without leaving any of them in an inconsistent state 771 * in case of error. We don't have to worry about 772 * journal_dirty erroring as it won't unless we've aborted the 773 * handle (in which case we would never be here) so reserving 774 * the write with journal_access is all we need to do. */ 775 status = ocfs2_journal_access(handle, inode, *last_eb_bh, 776 OCFS2_JOURNAL_ACCESS_WRITE); 777 if (status < 0) { 778 mlog_errno(status); 779 goto bail; 780 } 781 status = ocfs2_journal_access(handle, inode, et->root_bh, 782 OCFS2_JOURNAL_ACCESS_WRITE); 783 if (status < 0) { 784 mlog_errno(status); 785 goto bail; 786 } 787 if (eb_bh) { 788 status = ocfs2_journal_access(handle, inode, eb_bh, 789 OCFS2_JOURNAL_ACCESS_WRITE); 790 if (status < 0) { 791 mlog_errno(status); 792 goto bail; 793 } 794 } 795 796 /* Link the new branch into the rest of the tree (el will 797 * either be on the root_bh, or the extent block passed in. */ 798 i = le16_to_cpu(el->l_next_free_rec); 799 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 800 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 801 el->l_recs[i].e_int_clusters = 0; 802 le16_add_cpu(&el->l_next_free_rec, 1); 803 804 /* fe needs a new last extent block pointer, as does the 805 * next_leaf on the previously last-extent-block. */ 806 ocfs2_set_last_eb_blk(et, new_last_eb_blk); 807 808 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; 809 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 810 811 status = ocfs2_journal_dirty(handle, *last_eb_bh); 812 if (status < 0) 813 mlog_errno(status); 814 status = ocfs2_journal_dirty(handle, et->root_bh); 815 if (status < 0) 816 mlog_errno(status); 817 if (eb_bh) { 818 status = ocfs2_journal_dirty(handle, eb_bh); 819 if (status < 0) 820 mlog_errno(status); 821 } 822 823 /* 824 * Some callers want to track the rightmost leaf so pass it 825 * back here. 826 */ 827 brelse(*last_eb_bh); 828 get_bh(new_eb_bhs[0]); 829 *last_eb_bh = new_eb_bhs[0]; 830 831 status = 0; 832 bail: 833 if (new_eb_bhs) { 834 for (i = 0; i < new_blocks; i++) 835 if (new_eb_bhs[i]) 836 brelse(new_eb_bhs[i]); 837 kfree(new_eb_bhs); 838 } 839 840 mlog_exit(status); 841 return status; 842 } 843 844 /* 845 * adds another level to the allocation tree. 846 * returns back the new extent block so you can add a branch to it 847 * after this call. 848 */ 849 static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 850 handle_t *handle, 851 struct inode *inode, 852 struct ocfs2_extent_tree *et, 853 struct ocfs2_alloc_context *meta_ac, 854 struct buffer_head **ret_new_eb_bh) 855 { 856 int status, i; 857 u32 new_clusters; 858 struct buffer_head *new_eb_bh = NULL; 859 struct ocfs2_extent_block *eb; 860 struct ocfs2_extent_list *root_el; 861 struct ocfs2_extent_list *eb_el; 862 863 mlog_entry_void(); 864 865 status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 866 &new_eb_bh); 867 if (status < 0) { 868 mlog_errno(status); 869 goto bail; 870 } 871 872 eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 873 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 874 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 875 status = -EIO; 876 goto bail; 877 } 878 879 eb_el = &eb->h_list; 880 root_el = et->root_el; 881 882 status = ocfs2_journal_access(handle, inode, new_eb_bh, 883 OCFS2_JOURNAL_ACCESS_CREATE); 884 if (status < 0) { 885 mlog_errno(status); 886 goto bail; 887 } 888 889 /* copy the root extent list data into the new extent block */ 890 eb_el->l_tree_depth = root_el->l_tree_depth; 891 eb_el->l_next_free_rec = root_el->l_next_free_rec; 892 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) 893 eb_el->l_recs[i] = root_el->l_recs[i]; 894 895 status = ocfs2_journal_dirty(handle, new_eb_bh); 896 if (status < 0) { 897 mlog_errno(status); 898 goto bail; 899 } 900 901 status = ocfs2_journal_access(handle, inode, et->root_bh, 902 OCFS2_JOURNAL_ACCESS_WRITE); 903 if (status < 0) { 904 mlog_errno(status); 905 goto bail; 906 } 907 908 new_clusters = ocfs2_sum_rightmost_rec(eb_el); 909 910 /* update root_bh now */ 911 le16_add_cpu(&root_el->l_tree_depth, 1); 912 root_el->l_recs[0].e_cpos = 0; 913 root_el->l_recs[0].e_blkno = eb->h_blkno; 914 root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); 915 for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) 916 memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); 917 root_el->l_next_free_rec = cpu_to_le16(1); 918 919 /* If this is our 1st tree depth shift, then last_eb_blk 920 * becomes the allocated extent block */ 921 if (root_el->l_tree_depth == cpu_to_le16(1)) 922 ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 923 924 status = ocfs2_journal_dirty(handle, et->root_bh); 925 if (status < 0) { 926 mlog_errno(status); 927 goto bail; 928 } 929 930 *ret_new_eb_bh = new_eb_bh; 931 new_eb_bh = NULL; 932 status = 0; 933 bail: 934 if (new_eb_bh) 935 brelse(new_eb_bh); 936 937 mlog_exit(status); 938 return status; 939 } 940 941 /* 942 * Should only be called when there is no space left in any of the 943 * leaf nodes. What we want to do is find the lowest tree depth 944 * non-leaf extent block with room for new records. There are three 945 * valid results of this search: 946 * 947 * 1) a lowest extent block is found, then we pass it back in 948 * *lowest_eb_bh and return '0' 949 * 950 * 2) the search fails to find anything, but the root_el has room. We 951 * pass NULL back in *lowest_eb_bh, but still return '0' 952 * 953 * 3) the search fails to find anything AND the root_el is full, in 954 * which case we return > 0 955 * 956 * return status < 0 indicates an error. 957 */ 958 static int ocfs2_find_branch_target(struct ocfs2_super *osb, 959 struct inode *inode, 960 struct ocfs2_extent_tree *et, 961 struct buffer_head **target_bh) 962 { 963 int status = 0, i; 964 u64 blkno; 965 struct ocfs2_extent_block *eb; 966 struct ocfs2_extent_list *el; 967 struct buffer_head *bh = NULL; 968 struct buffer_head *lowest_bh = NULL; 969 970 mlog_entry_void(); 971 972 *target_bh = NULL; 973 974 el = et->root_el; 975 976 while(le16_to_cpu(el->l_tree_depth) > 1) { 977 if (le16_to_cpu(el->l_next_free_rec) == 0) { 978 ocfs2_error(inode->i_sb, "Dinode %llu has empty " 979 "extent list (next_free_rec == 0)", 980 (unsigned long long)OCFS2_I(inode)->ip_blkno); 981 status = -EIO; 982 goto bail; 983 } 984 i = le16_to_cpu(el->l_next_free_rec) - 1; 985 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 986 if (!blkno) { 987 ocfs2_error(inode->i_sb, "Dinode %llu has extent " 988 "list where extent # %d has no physical " 989 "block start", 990 (unsigned long long)OCFS2_I(inode)->ip_blkno, i); 991 status = -EIO; 992 goto bail; 993 } 994 995 if (bh) { 996 brelse(bh); 997 bh = NULL; 998 } 999 1000 status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 1001 inode); 1002 if (status < 0) { 1003 mlog_errno(status); 1004 goto bail; 1005 } 1006 1007 eb = (struct ocfs2_extent_block *) bh->b_data; 1008 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1009 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1010 status = -EIO; 1011 goto bail; 1012 } 1013 el = &eb->h_list; 1014 1015 if (le16_to_cpu(el->l_next_free_rec) < 1016 le16_to_cpu(el->l_count)) { 1017 if (lowest_bh) 1018 brelse(lowest_bh); 1019 lowest_bh = bh; 1020 get_bh(lowest_bh); 1021 } 1022 } 1023 1024 /* If we didn't find one and the fe doesn't have any room, 1025 * then return '1' */ 1026 el = et->root_el; 1027 if (!lowest_bh && (el->l_next_free_rec == el->l_count)) 1028 status = 1; 1029 1030 *target_bh = lowest_bh; 1031 bail: 1032 if (bh) 1033 brelse(bh); 1034 1035 mlog_exit(status); 1036 return status; 1037 } 1038 1039 /* 1040 * Grow a b-tree so that it has more records. 1041 * 1042 * We might shift the tree depth in which case existing paths should 1043 * be considered invalid. 1044 * 1045 * Tree depth after the grow is returned via *final_depth. 1046 * 1047 * *last_eb_bh will be updated by ocfs2_add_branch(). 1048 */ 1049 static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, 1050 struct ocfs2_extent_tree *et, int *final_depth, 1051 struct buffer_head **last_eb_bh, 1052 struct ocfs2_alloc_context *meta_ac) 1053 { 1054 int ret, shift; 1055 struct ocfs2_extent_list *el = et->root_el; 1056 int depth = le16_to_cpu(el->l_tree_depth); 1057 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1058 struct buffer_head *bh = NULL; 1059 1060 BUG_ON(meta_ac == NULL); 1061 1062 shift = ocfs2_find_branch_target(osb, inode, et, &bh); 1063 if (shift < 0) { 1064 ret = shift; 1065 mlog_errno(ret); 1066 goto out; 1067 } 1068 1069 /* We traveled all the way to the bottom of the allocation tree 1070 * and didn't find room for any more extents - we need to add 1071 * another tree level */ 1072 if (shift) { 1073 BUG_ON(bh); 1074 mlog(0, "need to shift tree depth (current = %d)\n", depth); 1075 1076 /* ocfs2_shift_tree_depth will return us a buffer with 1077 * the new extent block (so we can pass that to 1078 * ocfs2_add_branch). */ 1079 ret = ocfs2_shift_tree_depth(osb, handle, inode, et, 1080 meta_ac, &bh); 1081 if (ret < 0) { 1082 mlog_errno(ret); 1083 goto out; 1084 } 1085 depth++; 1086 if (depth == 1) { 1087 /* 1088 * Special case: we have room now if we shifted from 1089 * tree_depth 0, so no more work needs to be done. 1090 * 1091 * We won't be calling add_branch, so pass 1092 * back *last_eb_bh as the new leaf. At depth 1093 * zero, it should always be null so there's 1094 * no reason to brelse. 1095 */ 1096 BUG_ON(*last_eb_bh); 1097 get_bh(bh); 1098 *last_eb_bh = bh; 1099 goto out; 1100 } 1101 } 1102 1103 /* call ocfs2_add_branch to add the final part of the tree with 1104 * the new data. */ 1105 mlog(0, "add branch. bh = %p\n", bh); 1106 ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh, 1107 meta_ac); 1108 if (ret < 0) { 1109 mlog_errno(ret); 1110 goto out; 1111 } 1112 1113 out: 1114 if (final_depth) 1115 *final_depth = depth; 1116 brelse(bh); 1117 return ret; 1118 } 1119 1120 /* 1121 * This function will discard the rightmost extent record. 1122 */ 1123 static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) 1124 { 1125 int next_free = le16_to_cpu(el->l_next_free_rec); 1126 int count = le16_to_cpu(el->l_count); 1127 unsigned int num_bytes; 1128 1129 BUG_ON(!next_free); 1130 /* This will cause us to go off the end of our extent list. */ 1131 BUG_ON(next_free >= count); 1132 1133 num_bytes = sizeof(struct ocfs2_extent_rec) * next_free; 1134 1135 memmove(&el->l_recs[1], &el->l_recs[0], num_bytes); 1136 } 1137 1138 static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, 1139 struct ocfs2_extent_rec *insert_rec) 1140 { 1141 int i, insert_index, next_free, has_empty, num_bytes; 1142 u32 insert_cpos = le32_to_cpu(insert_rec->e_cpos); 1143 struct ocfs2_extent_rec *rec; 1144 1145 next_free = le16_to_cpu(el->l_next_free_rec); 1146 has_empty = ocfs2_is_empty_extent(&el->l_recs[0]); 1147 1148 BUG_ON(!next_free); 1149 1150 /* The tree code before us didn't allow enough room in the leaf. */ 1151 BUG_ON(el->l_next_free_rec == el->l_count && !has_empty); 1152 1153 /* 1154 * The easiest way to approach this is to just remove the 1155 * empty extent and temporarily decrement next_free. 1156 */ 1157 if (has_empty) { 1158 /* 1159 * If next_free was 1 (only an empty extent), this 1160 * loop won't execute, which is fine. We still want 1161 * the decrement above to happen. 1162 */ 1163 for(i = 0; i < (next_free - 1); i++) 1164 el->l_recs[i] = el->l_recs[i+1]; 1165 1166 next_free--; 1167 } 1168 1169 /* 1170 * Figure out what the new record index should be. 1171 */ 1172 for(i = 0; i < next_free; i++) { 1173 rec = &el->l_recs[i]; 1174 1175 if (insert_cpos < le32_to_cpu(rec->e_cpos)) 1176 break; 1177 } 1178 insert_index = i; 1179 1180 mlog(0, "ins %u: index %d, has_empty %d, next_free %d, count %d\n", 1181 insert_cpos, insert_index, has_empty, next_free, le16_to_cpu(el->l_count)); 1182 1183 BUG_ON(insert_index < 0); 1184 BUG_ON(insert_index >= le16_to_cpu(el->l_count)); 1185 BUG_ON(insert_index > next_free); 1186 1187 /* 1188 * No need to memmove if we're just adding to the tail. 1189 */ 1190 if (insert_index != next_free) { 1191 BUG_ON(next_free >= le16_to_cpu(el->l_count)); 1192 1193 num_bytes = next_free - insert_index; 1194 num_bytes *= sizeof(struct ocfs2_extent_rec); 1195 memmove(&el->l_recs[insert_index + 1], 1196 &el->l_recs[insert_index], 1197 num_bytes); 1198 } 1199 1200 /* 1201 * Either we had an empty extent, and need to re-increment or 1202 * there was no empty extent on a non full rightmost leaf node, 1203 * in which case we still need to increment. 1204 */ 1205 next_free++; 1206 el->l_next_free_rec = cpu_to_le16(next_free); 1207 /* 1208 * Make sure none of the math above just messed up our tree. 1209 */ 1210 BUG_ON(le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)); 1211 1212 el->l_recs[insert_index] = *insert_rec; 1213 1214 } 1215 1216 static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el) 1217 { 1218 int size, num_recs = le16_to_cpu(el->l_next_free_rec); 1219 1220 BUG_ON(num_recs == 0); 1221 1222 if (ocfs2_is_empty_extent(&el->l_recs[0])) { 1223 num_recs--; 1224 size = num_recs * sizeof(struct ocfs2_extent_rec); 1225 memmove(&el->l_recs[0], &el->l_recs[1], size); 1226 memset(&el->l_recs[num_recs], 0, 1227 sizeof(struct ocfs2_extent_rec)); 1228 el->l_next_free_rec = cpu_to_le16(num_recs); 1229 } 1230 } 1231 1232 /* 1233 * Create an empty extent record . 1234 * 1235 * l_next_free_rec may be updated. 1236 * 1237 * If an empty extent already exists do nothing. 1238 */ 1239 static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el) 1240 { 1241 int next_free = le16_to_cpu(el->l_next_free_rec); 1242 1243 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); 1244 1245 if (next_free == 0) 1246 goto set_and_inc; 1247 1248 if (ocfs2_is_empty_extent(&el->l_recs[0])) 1249 return; 1250 1251 mlog_bug_on_msg(el->l_count == el->l_next_free_rec, 1252 "Asked to create an empty extent in a full list:\n" 1253 "count = %u, tree depth = %u", 1254 le16_to_cpu(el->l_count), 1255 le16_to_cpu(el->l_tree_depth)); 1256 1257 ocfs2_shift_records_right(el); 1258 1259 set_and_inc: 1260 le16_add_cpu(&el->l_next_free_rec, 1); 1261 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 1262 } 1263 1264 /* 1265 * For a rotation which involves two leaf nodes, the "root node" is 1266 * the lowest level tree node which contains a path to both leafs. This 1267 * resulting set of information can be used to form a complete "subtree" 1268 * 1269 * This function is passed two full paths from the dinode down to a 1270 * pair of adjacent leaves. It's task is to figure out which path 1271 * index contains the subtree root - this can be the root index itself 1272 * in a worst-case rotation. 1273 * 1274 * The array index of the subtree root is passed back. 1275 */ 1276 static int ocfs2_find_subtree_root(struct inode *inode, 1277 struct ocfs2_path *left, 1278 struct ocfs2_path *right) 1279 { 1280 int i = 0; 1281 1282 /* 1283 * Check that the caller passed in two paths from the same tree. 1284 */ 1285 BUG_ON(path_root_bh(left) != path_root_bh(right)); 1286 1287 do { 1288 i++; 1289 1290 /* 1291 * The caller didn't pass two adjacent paths. 1292 */ 1293 mlog_bug_on_msg(i > left->p_tree_depth, 1294 "Inode %lu, left depth %u, right depth %u\n" 1295 "left leaf blk %llu, right leaf blk %llu\n", 1296 inode->i_ino, left->p_tree_depth, 1297 right->p_tree_depth, 1298 (unsigned long long)path_leaf_bh(left)->b_blocknr, 1299 (unsigned long long)path_leaf_bh(right)->b_blocknr); 1300 } while (left->p_node[i].bh->b_blocknr == 1301 right->p_node[i].bh->b_blocknr); 1302 1303 return i - 1; 1304 } 1305 1306 typedef void (path_insert_t)(void *, struct buffer_head *); 1307 1308 /* 1309 * Traverse a btree path in search of cpos, starting at root_el. 1310 * 1311 * This code can be called with a cpos larger than the tree, in which 1312 * case it will return the rightmost path. 1313 */ 1314 static int __ocfs2_find_path(struct inode *inode, 1315 struct ocfs2_extent_list *root_el, u32 cpos, 1316 path_insert_t *func, void *data) 1317 { 1318 int i, ret = 0; 1319 u32 range; 1320 u64 blkno; 1321 struct buffer_head *bh = NULL; 1322 struct ocfs2_extent_block *eb; 1323 struct ocfs2_extent_list *el; 1324 struct ocfs2_extent_rec *rec; 1325 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1326 1327 el = root_el; 1328 while (el->l_tree_depth) { 1329 if (le16_to_cpu(el->l_next_free_rec) == 0) { 1330 ocfs2_error(inode->i_sb, 1331 "Inode %llu has empty extent list at " 1332 "depth %u\n", 1333 (unsigned long long)oi->ip_blkno, 1334 le16_to_cpu(el->l_tree_depth)); 1335 ret = -EROFS; 1336 goto out; 1337 1338 } 1339 1340 for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) { 1341 rec = &el->l_recs[i]; 1342 1343 /* 1344 * In the case that cpos is off the allocation 1345 * tree, this should just wind up returning the 1346 * rightmost record. 1347 */ 1348 range = le32_to_cpu(rec->e_cpos) + 1349 ocfs2_rec_clusters(el, rec); 1350 if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) 1351 break; 1352 } 1353 1354 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 1355 if (blkno == 0) { 1356 ocfs2_error(inode->i_sb, 1357 "Inode %llu has bad blkno in extent list " 1358 "at depth %u (index %d)\n", 1359 (unsigned long long)oi->ip_blkno, 1360 le16_to_cpu(el->l_tree_depth), i); 1361 ret = -EROFS; 1362 goto out; 1363 } 1364 1365 brelse(bh); 1366 bh = NULL; 1367 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, 1368 &bh, OCFS2_BH_CACHED, inode); 1369 if (ret) { 1370 mlog_errno(ret); 1371 goto out; 1372 } 1373 1374 eb = (struct ocfs2_extent_block *) bh->b_data; 1375 el = &eb->h_list; 1376 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1377 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1378 ret = -EIO; 1379 goto out; 1380 } 1381 1382 if (le16_to_cpu(el->l_next_free_rec) > 1383 le16_to_cpu(el->l_count)) { 1384 ocfs2_error(inode->i_sb, 1385 "Inode %llu has bad count in extent list " 1386 "at block %llu (next free=%u, count=%u)\n", 1387 (unsigned long long)oi->ip_blkno, 1388 (unsigned long long)bh->b_blocknr, 1389 le16_to_cpu(el->l_next_free_rec), 1390 le16_to_cpu(el->l_count)); 1391 ret = -EROFS; 1392 goto out; 1393 } 1394 1395 if (func) 1396 func(data, bh); 1397 } 1398 1399 out: 1400 /* 1401 * Catch any trailing bh that the loop didn't handle. 1402 */ 1403 brelse(bh); 1404 1405 return ret; 1406 } 1407 1408 /* 1409 * Given an initialized path (that is, it has a valid root extent 1410 * list), this function will traverse the btree in search of the path 1411 * which would contain cpos. 1412 * 1413 * The path traveled is recorded in the path structure. 1414 * 1415 * Note that this will not do any comparisons on leaf node extent 1416 * records, so it will work fine in the case that we just added a tree 1417 * branch. 1418 */ 1419 struct find_path_data { 1420 int index; 1421 struct ocfs2_path *path; 1422 }; 1423 static void find_path_ins(void *data, struct buffer_head *bh) 1424 { 1425 struct find_path_data *fp = data; 1426 1427 get_bh(bh); 1428 ocfs2_path_insert_eb(fp->path, fp->index, bh); 1429 fp->index++; 1430 } 1431 static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, 1432 u32 cpos) 1433 { 1434 struct find_path_data data; 1435 1436 data.index = 1; 1437 data.path = path; 1438 return __ocfs2_find_path(inode, path_root_el(path), cpos, 1439 find_path_ins, &data); 1440 } 1441 1442 static void find_leaf_ins(void *data, struct buffer_head *bh) 1443 { 1444 struct ocfs2_extent_block *eb =(struct ocfs2_extent_block *)bh->b_data; 1445 struct ocfs2_extent_list *el = &eb->h_list; 1446 struct buffer_head **ret = data; 1447 1448 /* We want to retain only the leaf block. */ 1449 if (le16_to_cpu(el->l_tree_depth) == 0) { 1450 get_bh(bh); 1451 *ret = bh; 1452 } 1453 } 1454 /* 1455 * Find the leaf block in the tree which would contain cpos. No 1456 * checking of the actual leaf is done. 1457 * 1458 * Some paths want to call this instead of allocating a path structure 1459 * and calling ocfs2_find_path(). 1460 * 1461 * This function doesn't handle non btree extent lists. 1462 */ 1463 int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 1464 u32 cpos, struct buffer_head **leaf_bh) 1465 { 1466 int ret; 1467 struct buffer_head *bh = NULL; 1468 1469 ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh); 1470 if (ret) { 1471 mlog_errno(ret); 1472 goto out; 1473 } 1474 1475 *leaf_bh = bh; 1476 out: 1477 return ret; 1478 } 1479 1480 /* 1481 * Adjust the adjacent records (left_rec, right_rec) involved in a rotation. 1482 * 1483 * Basically, we've moved stuff around at the bottom of the tree and 1484 * we need to fix up the extent records above the changes to reflect 1485 * the new changes. 1486 * 1487 * left_rec: the record on the left. 1488 * left_child_el: is the child list pointed to by left_rec 1489 * right_rec: the record to the right of left_rec 1490 * right_child_el: is the child list pointed to by right_rec 1491 * 1492 * By definition, this only works on interior nodes. 1493 */ 1494 static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, 1495 struct ocfs2_extent_list *left_child_el, 1496 struct ocfs2_extent_rec *right_rec, 1497 struct ocfs2_extent_list *right_child_el) 1498 { 1499 u32 left_clusters, right_end; 1500 1501 /* 1502 * Interior nodes never have holes. Their cpos is the cpos of 1503 * the leftmost record in their child list. Their cluster 1504 * count covers the full theoretical range of their child list 1505 * - the range between their cpos and the cpos of the record 1506 * immediately to their right. 1507 */ 1508 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1509 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1510 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1511 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1512 } 1513 left_clusters -= le32_to_cpu(left_rec->e_cpos); 1514 left_rec->e_int_clusters = cpu_to_le32(left_clusters); 1515 1516 /* 1517 * Calculate the rightmost cluster count boundary before 1518 * moving cpos - we will need to adjust clusters after 1519 * updating e_cpos to keep the same highest cluster count. 1520 */ 1521 right_end = le32_to_cpu(right_rec->e_cpos); 1522 right_end += le32_to_cpu(right_rec->e_int_clusters); 1523 1524 right_rec->e_cpos = left_rec->e_cpos; 1525 le32_add_cpu(&right_rec->e_cpos, left_clusters); 1526 1527 right_end -= le32_to_cpu(right_rec->e_cpos); 1528 right_rec->e_int_clusters = cpu_to_le32(right_end); 1529 } 1530 1531 /* 1532 * Adjust the adjacent root node records involved in a 1533 * rotation. left_el_blkno is passed in as a key so that we can easily 1534 * find it's index in the root list. 1535 */ 1536 static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, 1537 struct ocfs2_extent_list *left_el, 1538 struct ocfs2_extent_list *right_el, 1539 u64 left_el_blkno) 1540 { 1541 int i; 1542 1543 BUG_ON(le16_to_cpu(root_el->l_tree_depth) <= 1544 le16_to_cpu(left_el->l_tree_depth)); 1545 1546 for(i = 0; i < le16_to_cpu(root_el->l_next_free_rec) - 1; i++) { 1547 if (le64_to_cpu(root_el->l_recs[i].e_blkno) == left_el_blkno) 1548 break; 1549 } 1550 1551 /* 1552 * The path walking code should have never returned a root and 1553 * two paths which are not adjacent. 1554 */ 1555 BUG_ON(i >= (le16_to_cpu(root_el->l_next_free_rec) - 1)); 1556 1557 ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el, 1558 &root_el->l_recs[i + 1], right_el); 1559 } 1560 1561 /* 1562 * We've changed a leaf block (in right_path) and need to reflect that 1563 * change back up the subtree. 1564 * 1565 * This happens in multiple places: 1566 * - When we've moved an extent record from the left path leaf to the right 1567 * path leaf to make room for an empty extent in the left path leaf. 1568 * - When our insert into the right path leaf is at the leftmost edge 1569 * and requires an update of the path immediately to it's left. This 1570 * can occur at the end of some types of rotation and appending inserts. 1571 * - When we've adjusted the last extent record in the left path leaf and the 1572 * 1st extent record in the right path leaf during cross extent block merge. 1573 */ 1574 static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, 1575 struct ocfs2_path *left_path, 1576 struct ocfs2_path *right_path, 1577 int subtree_index) 1578 { 1579 int ret, i, idx; 1580 struct ocfs2_extent_list *el, *left_el, *right_el; 1581 struct ocfs2_extent_rec *left_rec, *right_rec; 1582 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 1583 1584 /* 1585 * Update the counts and position values within all the 1586 * interior nodes to reflect the leaf rotation we just did. 1587 * 1588 * The root node is handled below the loop. 1589 * 1590 * We begin the loop with right_el and left_el pointing to the 1591 * leaf lists and work our way up. 1592 * 1593 * NOTE: within this loop, left_el and right_el always refer 1594 * to the *child* lists. 1595 */ 1596 left_el = path_leaf_el(left_path); 1597 right_el = path_leaf_el(right_path); 1598 for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) { 1599 mlog(0, "Adjust records at index %u\n", i); 1600 1601 /* 1602 * One nice property of knowing that all of these 1603 * nodes are below the root is that we only deal with 1604 * the leftmost right node record and the rightmost 1605 * left node record. 1606 */ 1607 el = left_path->p_node[i].el; 1608 idx = le16_to_cpu(left_el->l_next_free_rec) - 1; 1609 left_rec = &el->l_recs[idx]; 1610 1611 el = right_path->p_node[i].el; 1612 right_rec = &el->l_recs[0]; 1613 1614 ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec, 1615 right_el); 1616 1617 ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh); 1618 if (ret) 1619 mlog_errno(ret); 1620 1621 ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh); 1622 if (ret) 1623 mlog_errno(ret); 1624 1625 /* 1626 * Setup our list pointers now so that the current 1627 * parents become children in the next iteration. 1628 */ 1629 left_el = left_path->p_node[i].el; 1630 right_el = right_path->p_node[i].el; 1631 } 1632 1633 /* 1634 * At the root node, adjust the two adjacent records which 1635 * begin our path to the leaves. 1636 */ 1637 1638 el = left_path->p_node[subtree_index].el; 1639 left_el = left_path->p_node[subtree_index + 1].el; 1640 right_el = right_path->p_node[subtree_index + 1].el; 1641 1642 ocfs2_adjust_root_records(el, left_el, right_el, 1643 left_path->p_node[subtree_index + 1].bh->b_blocknr); 1644 1645 root_bh = left_path->p_node[subtree_index].bh; 1646 1647 ret = ocfs2_journal_dirty(handle, root_bh); 1648 if (ret) 1649 mlog_errno(ret); 1650 } 1651 1652 static int ocfs2_rotate_subtree_right(struct inode *inode, 1653 handle_t *handle, 1654 struct ocfs2_path *left_path, 1655 struct ocfs2_path *right_path, 1656 int subtree_index) 1657 { 1658 int ret, i; 1659 struct buffer_head *right_leaf_bh; 1660 struct buffer_head *left_leaf_bh = NULL; 1661 struct buffer_head *root_bh; 1662 struct ocfs2_extent_list *right_el, *left_el; 1663 struct ocfs2_extent_rec move_rec; 1664 1665 left_leaf_bh = path_leaf_bh(left_path); 1666 left_el = path_leaf_el(left_path); 1667 1668 if (left_el->l_next_free_rec != left_el->l_count) { 1669 ocfs2_error(inode->i_sb, 1670 "Inode %llu has non-full interior leaf node %llu" 1671 "(next free = %u)", 1672 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1673 (unsigned long long)left_leaf_bh->b_blocknr, 1674 le16_to_cpu(left_el->l_next_free_rec)); 1675 return -EROFS; 1676 } 1677 1678 /* 1679 * This extent block may already have an empty record, so we 1680 * return early if so. 1681 */ 1682 if (ocfs2_is_empty_extent(&left_el->l_recs[0])) 1683 return 0; 1684 1685 root_bh = left_path->p_node[subtree_index].bh; 1686 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 1687 1688 ret = ocfs2_journal_access(handle, inode, root_bh, 1689 OCFS2_JOURNAL_ACCESS_WRITE); 1690 if (ret) { 1691 mlog_errno(ret); 1692 goto out; 1693 } 1694 1695 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 1696 ret = ocfs2_journal_access(handle, inode, 1697 right_path->p_node[i].bh, 1698 OCFS2_JOURNAL_ACCESS_WRITE); 1699 if (ret) { 1700 mlog_errno(ret); 1701 goto out; 1702 } 1703 1704 ret = ocfs2_journal_access(handle, inode, 1705 left_path->p_node[i].bh, 1706 OCFS2_JOURNAL_ACCESS_WRITE); 1707 if (ret) { 1708 mlog_errno(ret); 1709 goto out; 1710 } 1711 } 1712 1713 right_leaf_bh = path_leaf_bh(right_path); 1714 right_el = path_leaf_el(right_path); 1715 1716 /* This is a code error, not a disk corruption. */ 1717 mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails " 1718 "because rightmost leaf block %llu is empty\n", 1719 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1720 (unsigned long long)right_leaf_bh->b_blocknr); 1721 1722 ocfs2_create_empty_extent(right_el); 1723 1724 ret = ocfs2_journal_dirty(handle, right_leaf_bh); 1725 if (ret) { 1726 mlog_errno(ret); 1727 goto out; 1728 } 1729 1730 /* Do the copy now. */ 1731 i = le16_to_cpu(left_el->l_next_free_rec) - 1; 1732 move_rec = left_el->l_recs[i]; 1733 right_el->l_recs[0] = move_rec; 1734 1735 /* 1736 * Clear out the record we just copied and shift everything 1737 * over, leaving an empty extent in the left leaf. 1738 * 1739 * We temporarily subtract from next_free_rec so that the 1740 * shift will lose the tail record (which is now defunct). 1741 */ 1742 le16_add_cpu(&left_el->l_next_free_rec, -1); 1743 ocfs2_shift_records_right(left_el); 1744 memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 1745 le16_add_cpu(&left_el->l_next_free_rec, 1); 1746 1747 ret = ocfs2_journal_dirty(handle, left_leaf_bh); 1748 if (ret) { 1749 mlog_errno(ret); 1750 goto out; 1751 } 1752 1753 ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 1754 subtree_index); 1755 1756 out: 1757 return ret; 1758 } 1759 1760 /* 1761 * Given a full path, determine what cpos value would return us a path 1762 * containing the leaf immediately to the left of the current one. 1763 * 1764 * Will return zero if the path passed in is already the leftmost path. 1765 */ 1766 static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, 1767 struct ocfs2_path *path, u32 *cpos) 1768 { 1769 int i, j, ret = 0; 1770 u64 blkno; 1771 struct ocfs2_extent_list *el; 1772 1773 BUG_ON(path->p_tree_depth == 0); 1774 1775 *cpos = 0; 1776 1777 blkno = path_leaf_bh(path)->b_blocknr; 1778 1779 /* Start at the tree node just above the leaf and work our way up. */ 1780 i = path->p_tree_depth - 1; 1781 while (i >= 0) { 1782 el = path->p_node[i].el; 1783 1784 /* 1785 * Find the extent record just before the one in our 1786 * path. 1787 */ 1788 for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { 1789 if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { 1790 if (j == 0) { 1791 if (i == 0) { 1792 /* 1793 * We've determined that the 1794 * path specified is already 1795 * the leftmost one - return a 1796 * cpos of zero. 1797 */ 1798 goto out; 1799 } 1800 /* 1801 * The leftmost record points to our 1802 * leaf - we need to travel up the 1803 * tree one level. 1804 */ 1805 goto next_node; 1806 } 1807 1808 *cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos); 1809 *cpos = *cpos + ocfs2_rec_clusters(el, 1810 &el->l_recs[j - 1]); 1811 *cpos = *cpos - 1; 1812 goto out; 1813 } 1814 } 1815 1816 /* 1817 * If we got here, we never found a valid node where 1818 * the tree indicated one should be. 1819 */ 1820 ocfs2_error(sb, 1821 "Invalid extent tree at extent block %llu\n", 1822 (unsigned long long)blkno); 1823 ret = -EROFS; 1824 goto out; 1825 1826 next_node: 1827 blkno = path->p_node[i].bh->b_blocknr; 1828 i--; 1829 } 1830 1831 out: 1832 return ret; 1833 } 1834 1835 /* 1836 * Extend the transaction by enough credits to complete the rotation, 1837 * and still leave at least the original number of credits allocated 1838 * to this transaction. 1839 */ 1840 static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, 1841 int op_credits, 1842 struct ocfs2_path *path) 1843 { 1844 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; 1845 1846 if (handle->h_buffer_credits < credits) 1847 return ocfs2_extend_trans(handle, credits); 1848 1849 return 0; 1850 } 1851 1852 /* 1853 * Trap the case where we're inserting into the theoretical range past 1854 * the _actual_ left leaf range. Otherwise, we'll rotate a record 1855 * whose cpos is less than ours into the right leaf. 1856 * 1857 * It's only necessary to look at the rightmost record of the left 1858 * leaf because the logic that calls us should ensure that the 1859 * theoretical ranges in the path components above the leaves are 1860 * correct. 1861 */ 1862 static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path, 1863 u32 insert_cpos) 1864 { 1865 struct ocfs2_extent_list *left_el; 1866 struct ocfs2_extent_rec *rec; 1867 int next_free; 1868 1869 left_el = path_leaf_el(left_path); 1870 next_free = le16_to_cpu(left_el->l_next_free_rec); 1871 rec = &left_el->l_recs[next_free - 1]; 1872 1873 if (insert_cpos > le32_to_cpu(rec->e_cpos)) 1874 return 1; 1875 return 0; 1876 } 1877 1878 static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) 1879 { 1880 int next_free = le16_to_cpu(el->l_next_free_rec); 1881 unsigned int range; 1882 struct ocfs2_extent_rec *rec; 1883 1884 if (next_free == 0) 1885 return 0; 1886 1887 rec = &el->l_recs[0]; 1888 if (ocfs2_is_empty_extent(rec)) { 1889 /* Empty list. */ 1890 if (next_free == 1) 1891 return 0; 1892 rec = &el->l_recs[1]; 1893 } 1894 1895 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); 1896 if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) 1897 return 1; 1898 return 0; 1899 } 1900 1901 /* 1902 * Rotate all the records in a btree right one record, starting at insert_cpos. 1903 * 1904 * The path to the rightmost leaf should be passed in. 1905 * 1906 * The array is assumed to be large enough to hold an entire path (tree depth). 1907 * 1908 * Upon succesful return from this function: 1909 * 1910 * - The 'right_path' array will contain a path to the leaf block 1911 * whose range contains e_cpos. 1912 * - That leaf block will have a single empty extent in list index 0. 1913 * - In the case that the rotation requires a post-insert update, 1914 * *ret_left_path will contain a valid path which can be passed to 1915 * ocfs2_insert_path(). 1916 */ 1917 static int ocfs2_rotate_tree_right(struct inode *inode, 1918 handle_t *handle, 1919 enum ocfs2_split_type split, 1920 u32 insert_cpos, 1921 struct ocfs2_path *right_path, 1922 struct ocfs2_path **ret_left_path) 1923 { 1924 int ret, start, orig_credits = handle->h_buffer_credits; 1925 u32 cpos; 1926 struct ocfs2_path *left_path = NULL; 1927 1928 *ret_left_path = NULL; 1929 1930 left_path = ocfs2_new_path(path_root_bh(right_path), 1931 path_root_el(right_path)); 1932 if (!left_path) { 1933 ret = -ENOMEM; 1934 mlog_errno(ret); 1935 goto out; 1936 } 1937 1938 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos); 1939 if (ret) { 1940 mlog_errno(ret); 1941 goto out; 1942 } 1943 1944 mlog(0, "Insert: %u, first left path cpos: %u\n", insert_cpos, cpos); 1945 1946 /* 1947 * What we want to do here is: 1948 * 1949 * 1) Start with the rightmost path. 1950 * 1951 * 2) Determine a path to the leaf block directly to the left 1952 * of that leaf. 1953 * 1954 * 3) Determine the 'subtree root' - the lowest level tree node 1955 * which contains a path to both leaves. 1956 * 1957 * 4) Rotate the subtree. 1958 * 1959 * 5) Find the next subtree by considering the left path to be 1960 * the new right path. 1961 * 1962 * The check at the top of this while loop also accepts 1963 * insert_cpos == cpos because cpos is only a _theoretical_ 1964 * value to get us the left path - insert_cpos might very well 1965 * be filling that hole. 1966 * 1967 * Stop at a cpos of '0' because we either started at the 1968 * leftmost branch (i.e., a tree with one branch and a 1969 * rotation inside of it), or we've gone as far as we can in 1970 * rotating subtrees. 1971 */ 1972 while (cpos && insert_cpos <= cpos) { 1973 mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", 1974 insert_cpos, cpos); 1975 1976 ret = ocfs2_find_path(inode, left_path, cpos); 1977 if (ret) { 1978 mlog_errno(ret); 1979 goto out; 1980 } 1981 1982 mlog_bug_on_msg(path_leaf_bh(left_path) == 1983 path_leaf_bh(right_path), 1984 "Inode %lu: error during insert of %u " 1985 "(left path cpos %u) results in two identical " 1986 "paths ending at %llu\n", 1987 inode->i_ino, insert_cpos, cpos, 1988 (unsigned long long) 1989 path_leaf_bh(left_path)->b_blocknr); 1990 1991 if (split == SPLIT_NONE && 1992 ocfs2_rotate_requires_path_adjustment(left_path, 1993 insert_cpos)) { 1994 1995 /* 1996 * We've rotated the tree as much as we 1997 * should. The rest is up to 1998 * ocfs2_insert_path() to complete, after the 1999 * record insertion. We indicate this 2000 * situation by returning the left path. 2001 * 2002 * The reason we don't adjust the records here 2003 * before the record insert is that an error 2004 * later might break the rule where a parent 2005 * record e_cpos will reflect the actual 2006 * e_cpos of the 1st nonempty record of the 2007 * child list. 2008 */ 2009 *ret_left_path = left_path; 2010 goto out_ret_path; 2011 } 2012 2013 start = ocfs2_find_subtree_root(inode, left_path, right_path); 2014 2015 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2016 start, 2017 (unsigned long long) right_path->p_node[start].bh->b_blocknr, 2018 right_path->p_tree_depth); 2019 2020 ret = ocfs2_extend_rotate_transaction(handle, start, 2021 orig_credits, right_path); 2022 if (ret) { 2023 mlog_errno(ret); 2024 goto out; 2025 } 2026 2027 ret = ocfs2_rotate_subtree_right(inode, handle, left_path, 2028 right_path, start); 2029 if (ret) { 2030 mlog_errno(ret); 2031 goto out; 2032 } 2033 2034 if (split != SPLIT_NONE && 2035 ocfs2_leftmost_rec_contains(path_leaf_el(right_path), 2036 insert_cpos)) { 2037 /* 2038 * A rotate moves the rightmost left leaf 2039 * record over to the leftmost right leaf 2040 * slot. If we're doing an extent split 2041 * instead of a real insert, then we have to 2042 * check that the extent to be split wasn't 2043 * just moved over. If it was, then we can 2044 * exit here, passing left_path back - 2045 * ocfs2_split_extent() is smart enough to 2046 * search both leaves. 2047 */ 2048 *ret_left_path = left_path; 2049 goto out_ret_path; 2050 } 2051 2052 /* 2053 * There is no need to re-read the next right path 2054 * as we know that it'll be our current left 2055 * path. Optimize by copying values instead. 2056 */ 2057 ocfs2_mv_path(right_path, left_path); 2058 2059 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 2060 &cpos); 2061 if (ret) { 2062 mlog_errno(ret); 2063 goto out; 2064 } 2065 } 2066 2067 out: 2068 ocfs2_free_path(left_path); 2069 2070 out_ret_path: 2071 return ret; 2072 } 2073 2074 static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2075 struct ocfs2_path *path) 2076 { 2077 int i, idx; 2078 struct ocfs2_extent_rec *rec; 2079 struct ocfs2_extent_list *el; 2080 struct ocfs2_extent_block *eb; 2081 u32 range; 2082 2083 /* Path should always be rightmost. */ 2084 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2085 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2086 2087 el = &eb->h_list; 2088 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); 2089 idx = le16_to_cpu(el->l_next_free_rec) - 1; 2090 rec = &el->l_recs[idx]; 2091 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); 2092 2093 for (i = 0; i < path->p_tree_depth; i++) { 2094 el = path->p_node[i].el; 2095 idx = le16_to_cpu(el->l_next_free_rec) - 1; 2096 rec = &el->l_recs[idx]; 2097 2098 rec->e_int_clusters = cpu_to_le32(range); 2099 le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos)); 2100 2101 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2102 } 2103 } 2104 2105 static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2106 struct ocfs2_cached_dealloc_ctxt *dealloc, 2107 struct ocfs2_path *path, int unlink_start) 2108 { 2109 int ret, i; 2110 struct ocfs2_extent_block *eb; 2111 struct ocfs2_extent_list *el; 2112 struct buffer_head *bh; 2113 2114 for(i = unlink_start; i < path_num_items(path); i++) { 2115 bh = path->p_node[i].bh; 2116 2117 eb = (struct ocfs2_extent_block *)bh->b_data; 2118 /* 2119 * Not all nodes might have had their final count 2120 * decremented by the caller - handle this here. 2121 */ 2122 el = &eb->h_list; 2123 if (le16_to_cpu(el->l_next_free_rec) > 1) { 2124 mlog(ML_ERROR, 2125 "Inode %llu, attempted to remove extent block " 2126 "%llu with %u records\n", 2127 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2128 (unsigned long long)le64_to_cpu(eb->h_blkno), 2129 le16_to_cpu(el->l_next_free_rec)); 2130 2131 ocfs2_journal_dirty(handle, bh); 2132 ocfs2_remove_from_cache(inode, bh); 2133 continue; 2134 } 2135 2136 el->l_next_free_rec = 0; 2137 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2138 2139 ocfs2_journal_dirty(handle, bh); 2140 2141 ret = ocfs2_cache_extent_block_free(dealloc, eb); 2142 if (ret) 2143 mlog_errno(ret); 2144 2145 ocfs2_remove_from_cache(inode, bh); 2146 } 2147 } 2148 2149 static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, 2150 struct ocfs2_path *left_path, 2151 struct ocfs2_path *right_path, 2152 int subtree_index, 2153 struct ocfs2_cached_dealloc_ctxt *dealloc) 2154 { 2155 int i; 2156 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 2157 struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el; 2158 struct ocfs2_extent_list *el; 2159 struct ocfs2_extent_block *eb; 2160 2161 el = path_leaf_el(left_path); 2162 2163 eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data; 2164 2165 for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) 2166 if (root_el->l_recs[i].e_blkno == eb->h_blkno) 2167 break; 2168 2169 BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec)); 2170 2171 memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); 2172 le16_add_cpu(&root_el->l_next_free_rec, -1); 2173 2174 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2175 eb->h_next_leaf_blk = 0; 2176 2177 ocfs2_journal_dirty(handle, root_bh); 2178 ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 2179 2180 ocfs2_unlink_path(inode, handle, dealloc, right_path, 2181 subtree_index + 1); 2182 } 2183 2184 static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, 2185 struct ocfs2_path *left_path, 2186 struct ocfs2_path *right_path, 2187 int subtree_index, 2188 struct ocfs2_cached_dealloc_ctxt *dealloc, 2189 int *deleted, 2190 struct ocfs2_extent_tree *et) 2191 { 2192 int ret, i, del_right_subtree = 0, right_has_empty = 0; 2193 struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); 2194 struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; 2195 struct ocfs2_extent_block *eb; 2196 2197 *deleted = 0; 2198 2199 right_leaf_el = path_leaf_el(right_path); 2200 left_leaf_el = path_leaf_el(left_path); 2201 root_bh = left_path->p_node[subtree_index].bh; 2202 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 2203 2204 if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0])) 2205 return 0; 2206 2207 eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data; 2208 if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) { 2209 /* 2210 * It's legal for us to proceed if the right leaf is 2211 * the rightmost one and it has an empty extent. There 2212 * are two cases to handle - whether the leaf will be 2213 * empty after removal or not. If the leaf isn't empty 2214 * then just remove the empty extent up front. The 2215 * next block will handle empty leaves by flagging 2216 * them for unlink. 2217 * 2218 * Non rightmost leaves will throw -EAGAIN and the 2219 * caller can manually move the subtree and retry. 2220 */ 2221 2222 if (eb->h_next_leaf_blk != 0ULL) 2223 return -EAGAIN; 2224 2225 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { 2226 ret = ocfs2_journal_access(handle, inode, 2227 path_leaf_bh(right_path), 2228 OCFS2_JOURNAL_ACCESS_WRITE); 2229 if (ret) { 2230 mlog_errno(ret); 2231 goto out; 2232 } 2233 2234 ocfs2_remove_empty_extent(right_leaf_el); 2235 } else 2236 right_has_empty = 1; 2237 } 2238 2239 if (eb->h_next_leaf_blk == 0ULL && 2240 le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) { 2241 /* 2242 * We have to update i_last_eb_blk during the meta 2243 * data delete. 2244 */ 2245 ret = ocfs2_journal_access(handle, inode, et_root_bh, 2246 OCFS2_JOURNAL_ACCESS_WRITE); 2247 if (ret) { 2248 mlog_errno(ret); 2249 goto out; 2250 } 2251 2252 del_right_subtree = 1; 2253 } 2254 2255 /* 2256 * Getting here with an empty extent in the right path implies 2257 * that it's the rightmost path and will be deleted. 2258 */ 2259 BUG_ON(right_has_empty && !del_right_subtree); 2260 2261 ret = ocfs2_journal_access(handle, inode, root_bh, 2262 OCFS2_JOURNAL_ACCESS_WRITE); 2263 if (ret) { 2264 mlog_errno(ret); 2265 goto out; 2266 } 2267 2268 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 2269 ret = ocfs2_journal_access(handle, inode, 2270 right_path->p_node[i].bh, 2271 OCFS2_JOURNAL_ACCESS_WRITE); 2272 if (ret) { 2273 mlog_errno(ret); 2274 goto out; 2275 } 2276 2277 ret = ocfs2_journal_access(handle, inode, 2278 left_path->p_node[i].bh, 2279 OCFS2_JOURNAL_ACCESS_WRITE); 2280 if (ret) { 2281 mlog_errno(ret); 2282 goto out; 2283 } 2284 } 2285 2286 if (!right_has_empty) { 2287 /* 2288 * Only do this if we're moving a real 2289 * record. Otherwise, the action is delayed until 2290 * after removal of the right path in which case we 2291 * can do a simple shift to remove the empty extent. 2292 */ 2293 ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]); 2294 memset(&right_leaf_el->l_recs[0], 0, 2295 sizeof(struct ocfs2_extent_rec)); 2296 } 2297 if (eb->h_next_leaf_blk == 0ULL) { 2298 /* 2299 * Move recs over to get rid of empty extent, decrease 2300 * next_free. This is allowed to remove the last 2301 * extent in our leaf (setting l_next_free_rec to 2302 * zero) - the delete code below won't care. 2303 */ 2304 ocfs2_remove_empty_extent(right_leaf_el); 2305 } 2306 2307 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 2308 if (ret) 2309 mlog_errno(ret); 2310 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); 2311 if (ret) 2312 mlog_errno(ret); 2313 2314 if (del_right_subtree) { 2315 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2316 subtree_index, dealloc); 2317 ocfs2_update_edge_lengths(inode, handle, left_path); 2318 2319 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2320 ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2321 2322 /* 2323 * Removal of the extent in the left leaf was skipped 2324 * above so we could delete the right path 2325 * 1st. 2326 */ 2327 if (right_has_empty) 2328 ocfs2_remove_empty_extent(left_leaf_el); 2329 2330 ret = ocfs2_journal_dirty(handle, et_root_bh); 2331 if (ret) 2332 mlog_errno(ret); 2333 2334 *deleted = 1; 2335 } else 2336 ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 2337 subtree_index); 2338 2339 out: 2340 return ret; 2341 } 2342 2343 /* 2344 * Given a full path, determine what cpos value would return us a path 2345 * containing the leaf immediately to the right of the current one. 2346 * 2347 * Will return zero if the path passed in is already the rightmost path. 2348 * 2349 * This looks similar, but is subtly different to 2350 * ocfs2_find_cpos_for_left_leaf(). 2351 */ 2352 static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 2353 struct ocfs2_path *path, u32 *cpos) 2354 { 2355 int i, j, ret = 0; 2356 u64 blkno; 2357 struct ocfs2_extent_list *el; 2358 2359 *cpos = 0; 2360 2361 if (path->p_tree_depth == 0) 2362 return 0; 2363 2364 blkno = path_leaf_bh(path)->b_blocknr; 2365 2366 /* Start at the tree node just above the leaf and work our way up. */ 2367 i = path->p_tree_depth - 1; 2368 while (i >= 0) { 2369 int next_free; 2370 2371 el = path->p_node[i].el; 2372 2373 /* 2374 * Find the extent record just after the one in our 2375 * path. 2376 */ 2377 next_free = le16_to_cpu(el->l_next_free_rec); 2378 for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { 2379 if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { 2380 if (j == (next_free - 1)) { 2381 if (i == 0) { 2382 /* 2383 * We've determined that the 2384 * path specified is already 2385 * the rightmost one - return a 2386 * cpos of zero. 2387 */ 2388 goto out; 2389 } 2390 /* 2391 * The rightmost record points to our 2392 * leaf - we need to travel up the 2393 * tree one level. 2394 */ 2395 goto next_node; 2396 } 2397 2398 *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos); 2399 goto out; 2400 } 2401 } 2402 2403 /* 2404 * If we got here, we never found a valid node where 2405 * the tree indicated one should be. 2406 */ 2407 ocfs2_error(sb, 2408 "Invalid extent tree at extent block %llu\n", 2409 (unsigned long long)blkno); 2410 ret = -EROFS; 2411 goto out; 2412 2413 next_node: 2414 blkno = path->p_node[i].bh->b_blocknr; 2415 i--; 2416 } 2417 2418 out: 2419 return ret; 2420 } 2421 2422 static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, 2423 handle_t *handle, 2424 struct buffer_head *bh, 2425 struct ocfs2_extent_list *el) 2426 { 2427 int ret; 2428 2429 if (!ocfs2_is_empty_extent(&el->l_recs[0])) 2430 return 0; 2431 2432 ret = ocfs2_journal_access(handle, inode, bh, 2433 OCFS2_JOURNAL_ACCESS_WRITE); 2434 if (ret) { 2435 mlog_errno(ret); 2436 goto out; 2437 } 2438 2439 ocfs2_remove_empty_extent(el); 2440 2441 ret = ocfs2_journal_dirty(handle, bh); 2442 if (ret) 2443 mlog_errno(ret); 2444 2445 out: 2446 return ret; 2447 } 2448 2449 static int __ocfs2_rotate_tree_left(struct inode *inode, 2450 handle_t *handle, int orig_credits, 2451 struct ocfs2_path *path, 2452 struct ocfs2_cached_dealloc_ctxt *dealloc, 2453 struct ocfs2_path **empty_extent_path, 2454 struct ocfs2_extent_tree *et) 2455 { 2456 int ret, subtree_root, deleted; 2457 u32 right_cpos; 2458 struct ocfs2_path *left_path = NULL; 2459 struct ocfs2_path *right_path = NULL; 2460 2461 BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); 2462 2463 *empty_extent_path = NULL; 2464 2465 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, 2466 &right_cpos); 2467 if (ret) { 2468 mlog_errno(ret); 2469 goto out; 2470 } 2471 2472 left_path = ocfs2_new_path(path_root_bh(path), 2473 path_root_el(path)); 2474 if (!left_path) { 2475 ret = -ENOMEM; 2476 mlog_errno(ret); 2477 goto out; 2478 } 2479 2480 ocfs2_cp_path(left_path, path); 2481 2482 right_path = ocfs2_new_path(path_root_bh(path), 2483 path_root_el(path)); 2484 if (!right_path) { 2485 ret = -ENOMEM; 2486 mlog_errno(ret); 2487 goto out; 2488 } 2489 2490 while (right_cpos) { 2491 ret = ocfs2_find_path(inode, right_path, right_cpos); 2492 if (ret) { 2493 mlog_errno(ret); 2494 goto out; 2495 } 2496 2497 subtree_root = ocfs2_find_subtree_root(inode, left_path, 2498 right_path); 2499 2500 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2501 subtree_root, 2502 (unsigned long long) 2503 right_path->p_node[subtree_root].bh->b_blocknr, 2504 right_path->p_tree_depth); 2505 2506 ret = ocfs2_extend_rotate_transaction(handle, subtree_root, 2507 orig_credits, left_path); 2508 if (ret) { 2509 mlog_errno(ret); 2510 goto out; 2511 } 2512 2513 /* 2514 * Caller might still want to make changes to the 2515 * tree root, so re-add it to the journal here. 2516 */ 2517 ret = ocfs2_journal_access(handle, inode, 2518 path_root_bh(left_path), 2519 OCFS2_JOURNAL_ACCESS_WRITE); 2520 if (ret) { 2521 mlog_errno(ret); 2522 goto out; 2523 } 2524 2525 ret = ocfs2_rotate_subtree_left(inode, handle, left_path, 2526 right_path, subtree_root, 2527 dealloc, &deleted, et); 2528 if (ret == -EAGAIN) { 2529 /* 2530 * The rotation has to temporarily stop due to 2531 * the right subtree having an empty 2532 * extent. Pass it back to the caller for a 2533 * fixup. 2534 */ 2535 *empty_extent_path = right_path; 2536 right_path = NULL; 2537 goto out; 2538 } 2539 if (ret) { 2540 mlog_errno(ret); 2541 goto out; 2542 } 2543 2544 /* 2545 * The subtree rotate might have removed records on 2546 * the rightmost edge. If so, then rotation is 2547 * complete. 2548 */ 2549 if (deleted) 2550 break; 2551 2552 ocfs2_mv_path(left_path, right_path); 2553 2554 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, 2555 &right_cpos); 2556 if (ret) { 2557 mlog_errno(ret); 2558 goto out; 2559 } 2560 } 2561 2562 out: 2563 ocfs2_free_path(right_path); 2564 ocfs2_free_path(left_path); 2565 2566 return ret; 2567 } 2568 2569 static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, 2570 struct ocfs2_path *path, 2571 struct ocfs2_cached_dealloc_ctxt *dealloc, 2572 struct ocfs2_extent_tree *et) 2573 { 2574 int ret, subtree_index; 2575 u32 cpos; 2576 struct ocfs2_path *left_path = NULL; 2577 struct ocfs2_extent_block *eb; 2578 struct ocfs2_extent_list *el; 2579 2580 2581 ret = et->eops->sanity_check(inode, et); 2582 if (ret) 2583 goto out; 2584 /* 2585 * There's two ways we handle this depending on 2586 * whether path is the only existing one. 2587 */ 2588 ret = ocfs2_extend_rotate_transaction(handle, 0, 2589 handle->h_buffer_credits, 2590 path); 2591 if (ret) { 2592 mlog_errno(ret); 2593 goto out; 2594 } 2595 2596 ret = ocfs2_journal_access_path(inode, handle, path); 2597 if (ret) { 2598 mlog_errno(ret); 2599 goto out; 2600 } 2601 2602 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); 2603 if (ret) { 2604 mlog_errno(ret); 2605 goto out; 2606 } 2607 2608 if (cpos) { 2609 /* 2610 * We have a path to the left of this one - it needs 2611 * an update too. 2612 */ 2613 left_path = ocfs2_new_path(path_root_bh(path), 2614 path_root_el(path)); 2615 if (!left_path) { 2616 ret = -ENOMEM; 2617 mlog_errno(ret); 2618 goto out; 2619 } 2620 2621 ret = ocfs2_find_path(inode, left_path, cpos); 2622 if (ret) { 2623 mlog_errno(ret); 2624 goto out; 2625 } 2626 2627 ret = ocfs2_journal_access_path(inode, handle, left_path); 2628 if (ret) { 2629 mlog_errno(ret); 2630 goto out; 2631 } 2632 2633 subtree_index = ocfs2_find_subtree_root(inode, left_path, path); 2634 2635 ocfs2_unlink_subtree(inode, handle, left_path, path, 2636 subtree_index, dealloc); 2637 ocfs2_update_edge_lengths(inode, handle, left_path); 2638 2639 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2640 ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2641 } else { 2642 /* 2643 * 'path' is also the leftmost path which 2644 * means it must be the only one. This gets 2645 * handled differently because we want to 2646 * revert the inode back to having extents 2647 * in-line. 2648 */ 2649 ocfs2_unlink_path(inode, handle, dealloc, path, 1); 2650 2651 el = et->root_el; 2652 el->l_tree_depth = 0; 2653 el->l_next_free_rec = 0; 2654 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2655 2656 ocfs2_set_last_eb_blk(et, 0); 2657 } 2658 2659 ocfs2_journal_dirty(handle, path_root_bh(path)); 2660 2661 out: 2662 ocfs2_free_path(left_path); 2663 return ret; 2664 } 2665 2666 /* 2667 * Left rotation of btree records. 2668 * 2669 * In many ways, this is (unsurprisingly) the opposite of right 2670 * rotation. We start at some non-rightmost path containing an empty 2671 * extent in the leaf block. The code works its way to the rightmost 2672 * path by rotating records to the left in every subtree. 2673 * 2674 * This is used by any code which reduces the number of extent records 2675 * in a leaf. After removal, an empty record should be placed in the 2676 * leftmost list position. 2677 * 2678 * This won't handle a length update of the rightmost path records if 2679 * the rightmost tree leaf record is removed so the caller is 2680 * responsible for detecting and correcting that. 2681 */ 2682 static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, 2683 struct ocfs2_path *path, 2684 struct ocfs2_cached_dealloc_ctxt *dealloc, 2685 struct ocfs2_extent_tree *et) 2686 { 2687 int ret, orig_credits = handle->h_buffer_credits; 2688 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; 2689 struct ocfs2_extent_block *eb; 2690 struct ocfs2_extent_list *el; 2691 2692 el = path_leaf_el(path); 2693 if (!ocfs2_is_empty_extent(&el->l_recs[0])) 2694 return 0; 2695 2696 if (path->p_tree_depth == 0) { 2697 rightmost_no_delete: 2698 /* 2699 * Inline extents. This is trivially handled, so do 2700 * it up front. 2701 */ 2702 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, 2703 path_leaf_bh(path), 2704 path_leaf_el(path)); 2705 if (ret) 2706 mlog_errno(ret); 2707 goto out; 2708 } 2709 2710 /* 2711 * Handle rightmost branch now. There's several cases: 2712 * 1) simple rotation leaving records in there. That's trivial. 2713 * 2) rotation requiring a branch delete - there's no more 2714 * records left. Two cases of this: 2715 * a) There are branches to the left. 2716 * b) This is also the leftmost (the only) branch. 2717 * 2718 * 1) is handled via ocfs2_rotate_rightmost_leaf_left() 2719 * 2a) we need the left branch so that we can update it with the unlink 2720 * 2b) we need to bring the inode back to inline extents. 2721 */ 2722 2723 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2724 el = &eb->h_list; 2725 if (eb->h_next_leaf_blk == 0) { 2726 /* 2727 * This gets a bit tricky if we're going to delete the 2728 * rightmost path. Get the other cases out of the way 2729 * 1st. 2730 */ 2731 if (le16_to_cpu(el->l_next_free_rec) > 1) 2732 goto rightmost_no_delete; 2733 2734 if (le16_to_cpu(el->l_next_free_rec) == 0) { 2735 ret = -EIO; 2736 ocfs2_error(inode->i_sb, 2737 "Inode %llu has empty extent block at %llu", 2738 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2739 (unsigned long long)le64_to_cpu(eb->h_blkno)); 2740 goto out; 2741 } 2742 2743 /* 2744 * XXX: The caller can not trust "path" any more after 2745 * this as it will have been deleted. What do we do? 2746 * 2747 * In theory the rotate-for-merge code will never get 2748 * here because it'll always ask for a rotate in a 2749 * nonempty list. 2750 */ 2751 2752 ret = ocfs2_remove_rightmost_path(inode, handle, path, 2753 dealloc, et); 2754 if (ret) 2755 mlog_errno(ret); 2756 goto out; 2757 } 2758 2759 /* 2760 * Now we can loop, remembering the path we get from -EAGAIN 2761 * and restarting from there. 2762 */ 2763 try_rotate: 2764 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, 2765 dealloc, &restart_path, et); 2766 if (ret && ret != -EAGAIN) { 2767 mlog_errno(ret); 2768 goto out; 2769 } 2770 2771 while (ret == -EAGAIN) { 2772 tmp_path = restart_path; 2773 restart_path = NULL; 2774 2775 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, 2776 tmp_path, dealloc, 2777 &restart_path, et); 2778 if (ret && ret != -EAGAIN) { 2779 mlog_errno(ret); 2780 goto out; 2781 } 2782 2783 ocfs2_free_path(tmp_path); 2784 tmp_path = NULL; 2785 2786 if (ret == 0) 2787 goto try_rotate; 2788 } 2789 2790 out: 2791 ocfs2_free_path(tmp_path); 2792 ocfs2_free_path(restart_path); 2793 return ret; 2794 } 2795 2796 static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, 2797 int index) 2798 { 2799 struct ocfs2_extent_rec *rec = &el->l_recs[index]; 2800 unsigned int size; 2801 2802 if (rec->e_leaf_clusters == 0) { 2803 /* 2804 * We consumed all of the merged-from record. An empty 2805 * extent cannot exist anywhere but the 1st array 2806 * position, so move things over if the merged-from 2807 * record doesn't occupy that position. 2808 * 2809 * This creates a new empty extent so the caller 2810 * should be smart enough to have removed any existing 2811 * ones. 2812 */ 2813 if (index > 0) { 2814 BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); 2815 size = index * sizeof(struct ocfs2_extent_rec); 2816 memmove(&el->l_recs[1], &el->l_recs[0], size); 2817 } 2818 2819 /* 2820 * Always memset - the caller doesn't check whether it 2821 * created an empty extent, so there could be junk in 2822 * the other fields. 2823 */ 2824 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2825 } 2826 } 2827 2828 static int ocfs2_get_right_path(struct inode *inode, 2829 struct ocfs2_path *left_path, 2830 struct ocfs2_path **ret_right_path) 2831 { 2832 int ret; 2833 u32 right_cpos; 2834 struct ocfs2_path *right_path = NULL; 2835 struct ocfs2_extent_list *left_el; 2836 2837 *ret_right_path = NULL; 2838 2839 /* This function shouldn't be called for non-trees. */ 2840 BUG_ON(left_path->p_tree_depth == 0); 2841 2842 left_el = path_leaf_el(left_path); 2843 BUG_ON(left_el->l_next_free_rec != left_el->l_count); 2844 2845 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, 2846 &right_cpos); 2847 if (ret) { 2848 mlog_errno(ret); 2849 goto out; 2850 } 2851 2852 /* This function shouldn't be called for the rightmost leaf. */ 2853 BUG_ON(right_cpos == 0); 2854 2855 right_path = ocfs2_new_path(path_root_bh(left_path), 2856 path_root_el(left_path)); 2857 if (!right_path) { 2858 ret = -ENOMEM; 2859 mlog_errno(ret); 2860 goto out; 2861 } 2862 2863 ret = ocfs2_find_path(inode, right_path, right_cpos); 2864 if (ret) { 2865 mlog_errno(ret); 2866 goto out; 2867 } 2868 2869 *ret_right_path = right_path; 2870 out: 2871 if (ret) 2872 ocfs2_free_path(right_path); 2873 return ret; 2874 } 2875 2876 /* 2877 * Remove split_rec clusters from the record at index and merge them 2878 * onto the beginning of the record "next" to it. 2879 * For index < l_count - 1, the next means the extent rec at index + 1. 2880 * For index == l_count - 1, the "next" means the 1st extent rec of the 2881 * next extent block. 2882 */ 2883 static int ocfs2_merge_rec_right(struct inode *inode, 2884 struct ocfs2_path *left_path, 2885 handle_t *handle, 2886 struct ocfs2_extent_rec *split_rec, 2887 int index) 2888 { 2889 int ret, next_free, i; 2890 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); 2891 struct ocfs2_extent_rec *left_rec; 2892 struct ocfs2_extent_rec *right_rec; 2893 struct ocfs2_extent_list *right_el; 2894 struct ocfs2_path *right_path = NULL; 2895 int subtree_index = 0; 2896 struct ocfs2_extent_list *el = path_leaf_el(left_path); 2897 struct buffer_head *bh = path_leaf_bh(left_path); 2898 struct buffer_head *root_bh = NULL; 2899 2900 BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); 2901 left_rec = &el->l_recs[index]; 2902 2903 if (index == le16_to_cpu(el->l_next_free_rec) - 1 && 2904 le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { 2905 /* we meet with a cross extent block merge. */ 2906 ret = ocfs2_get_right_path(inode, left_path, &right_path); 2907 if (ret) { 2908 mlog_errno(ret); 2909 goto out; 2910 } 2911 2912 right_el = path_leaf_el(right_path); 2913 next_free = le16_to_cpu(right_el->l_next_free_rec); 2914 BUG_ON(next_free <= 0); 2915 right_rec = &right_el->l_recs[0]; 2916 if (ocfs2_is_empty_extent(right_rec)) { 2917 BUG_ON(next_free <= 1); 2918 right_rec = &right_el->l_recs[1]; 2919 } 2920 2921 BUG_ON(le32_to_cpu(left_rec->e_cpos) + 2922 le16_to_cpu(left_rec->e_leaf_clusters) != 2923 le32_to_cpu(right_rec->e_cpos)); 2924 2925 subtree_index = ocfs2_find_subtree_root(inode, 2926 left_path, right_path); 2927 2928 ret = ocfs2_extend_rotate_transaction(handle, subtree_index, 2929 handle->h_buffer_credits, 2930 right_path); 2931 if (ret) { 2932 mlog_errno(ret); 2933 goto out; 2934 } 2935 2936 root_bh = left_path->p_node[subtree_index].bh; 2937 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 2938 2939 ret = ocfs2_journal_access(handle, inode, root_bh, 2940 OCFS2_JOURNAL_ACCESS_WRITE); 2941 if (ret) { 2942 mlog_errno(ret); 2943 goto out; 2944 } 2945 2946 for (i = subtree_index + 1; 2947 i < path_num_items(right_path); i++) { 2948 ret = ocfs2_journal_access(handle, inode, 2949 right_path->p_node[i].bh, 2950 OCFS2_JOURNAL_ACCESS_WRITE); 2951 if (ret) { 2952 mlog_errno(ret); 2953 goto out; 2954 } 2955 2956 ret = ocfs2_journal_access(handle, inode, 2957 left_path->p_node[i].bh, 2958 OCFS2_JOURNAL_ACCESS_WRITE); 2959 if (ret) { 2960 mlog_errno(ret); 2961 goto out; 2962 } 2963 } 2964 2965 } else { 2966 BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1); 2967 right_rec = &el->l_recs[index + 1]; 2968 } 2969 2970 ret = ocfs2_journal_access(handle, inode, bh, 2971 OCFS2_JOURNAL_ACCESS_WRITE); 2972 if (ret) { 2973 mlog_errno(ret); 2974 goto out; 2975 } 2976 2977 le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters); 2978 2979 le32_add_cpu(&right_rec->e_cpos, -split_clusters); 2980 le64_add_cpu(&right_rec->e_blkno, 2981 -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); 2982 le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); 2983 2984 ocfs2_cleanup_merge(el, index); 2985 2986 ret = ocfs2_journal_dirty(handle, bh); 2987 if (ret) 2988 mlog_errno(ret); 2989 2990 if (right_path) { 2991 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); 2992 if (ret) 2993 mlog_errno(ret); 2994 2995 ocfs2_complete_edge_insert(inode, handle, left_path, 2996 right_path, subtree_index); 2997 } 2998 out: 2999 if (right_path) 3000 ocfs2_free_path(right_path); 3001 return ret; 3002 } 3003 3004 static int ocfs2_get_left_path(struct inode *inode, 3005 struct ocfs2_path *right_path, 3006 struct ocfs2_path **ret_left_path) 3007 { 3008 int ret; 3009 u32 left_cpos; 3010 struct ocfs2_path *left_path = NULL; 3011 3012 *ret_left_path = NULL; 3013 3014 /* This function shouldn't be called for non-trees. */ 3015 BUG_ON(right_path->p_tree_depth == 0); 3016 3017 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, 3018 right_path, &left_cpos); 3019 if (ret) { 3020 mlog_errno(ret); 3021 goto out; 3022 } 3023 3024 /* This function shouldn't be called for the leftmost leaf. */ 3025 BUG_ON(left_cpos == 0); 3026 3027 left_path = ocfs2_new_path(path_root_bh(right_path), 3028 path_root_el(right_path)); 3029 if (!left_path) { 3030 ret = -ENOMEM; 3031 mlog_errno(ret); 3032 goto out; 3033 } 3034 3035 ret = ocfs2_find_path(inode, left_path, left_cpos); 3036 if (ret) { 3037 mlog_errno(ret); 3038 goto out; 3039 } 3040 3041 *ret_left_path = left_path; 3042 out: 3043 if (ret) 3044 ocfs2_free_path(left_path); 3045 return ret; 3046 } 3047 3048 /* 3049 * Remove split_rec clusters from the record at index and merge them 3050 * onto the tail of the record "before" it. 3051 * For index > 0, the "before" means the extent rec at index - 1. 3052 * 3053 * For index == 0, the "before" means the last record of the previous 3054 * extent block. And there is also a situation that we may need to 3055 * remove the rightmost leaf extent block in the right_path and change 3056 * the right path to indicate the new rightmost path. 3057 */ 3058 static int ocfs2_merge_rec_left(struct inode *inode, 3059 struct ocfs2_path *right_path, 3060 handle_t *handle, 3061 struct ocfs2_extent_rec *split_rec, 3062 struct ocfs2_cached_dealloc_ctxt *dealloc, 3063 struct ocfs2_extent_tree *et, 3064 int index) 3065 { 3066 int ret, i, subtree_index = 0, has_empty_extent = 0; 3067 unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); 3068 struct ocfs2_extent_rec *left_rec; 3069 struct ocfs2_extent_rec *right_rec; 3070 struct ocfs2_extent_list *el = path_leaf_el(right_path); 3071 struct buffer_head *bh = path_leaf_bh(right_path); 3072 struct buffer_head *root_bh = NULL; 3073 struct ocfs2_path *left_path = NULL; 3074 struct ocfs2_extent_list *left_el; 3075 3076 BUG_ON(index < 0); 3077 3078 right_rec = &el->l_recs[index]; 3079 if (index == 0) { 3080 /* we meet with a cross extent block merge. */ 3081 ret = ocfs2_get_left_path(inode, right_path, &left_path); 3082 if (ret) { 3083 mlog_errno(ret); 3084 goto out; 3085 } 3086 3087 left_el = path_leaf_el(left_path); 3088 BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != 3089 le16_to_cpu(left_el->l_count)); 3090 3091 left_rec = &left_el->l_recs[ 3092 le16_to_cpu(left_el->l_next_free_rec) - 1]; 3093 BUG_ON(le32_to_cpu(left_rec->e_cpos) + 3094 le16_to_cpu(left_rec->e_leaf_clusters) != 3095 le32_to_cpu(split_rec->e_cpos)); 3096 3097 subtree_index = ocfs2_find_subtree_root(inode, 3098 left_path, right_path); 3099 3100 ret = ocfs2_extend_rotate_transaction(handle, subtree_index, 3101 handle->h_buffer_credits, 3102 left_path); 3103 if (ret) { 3104 mlog_errno(ret); 3105 goto out; 3106 } 3107 3108 root_bh = left_path->p_node[subtree_index].bh; 3109 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3110 3111 ret = ocfs2_journal_access(handle, inode, root_bh, 3112 OCFS2_JOURNAL_ACCESS_WRITE); 3113 if (ret) { 3114 mlog_errno(ret); 3115 goto out; 3116 } 3117 3118 for (i = subtree_index + 1; 3119 i < path_num_items(right_path); i++) { 3120 ret = ocfs2_journal_access(handle, inode, 3121 right_path->p_node[i].bh, 3122 OCFS2_JOURNAL_ACCESS_WRITE); 3123 if (ret) { 3124 mlog_errno(ret); 3125 goto out; 3126 } 3127 3128 ret = ocfs2_journal_access(handle, inode, 3129 left_path->p_node[i].bh, 3130 OCFS2_JOURNAL_ACCESS_WRITE); 3131 if (ret) { 3132 mlog_errno(ret); 3133 goto out; 3134 } 3135 } 3136 } else { 3137 left_rec = &el->l_recs[index - 1]; 3138 if (ocfs2_is_empty_extent(&el->l_recs[0])) 3139 has_empty_extent = 1; 3140 } 3141 3142 ret = ocfs2_journal_access(handle, inode, bh, 3143 OCFS2_JOURNAL_ACCESS_WRITE); 3144 if (ret) { 3145 mlog_errno(ret); 3146 goto out; 3147 } 3148 3149 if (has_empty_extent && index == 1) { 3150 /* 3151 * The easy case - we can just plop the record right in. 3152 */ 3153 *left_rec = *split_rec; 3154 3155 has_empty_extent = 0; 3156 } else 3157 le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); 3158 3159 le32_add_cpu(&right_rec->e_cpos, split_clusters); 3160 le64_add_cpu(&right_rec->e_blkno, 3161 ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); 3162 le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); 3163 3164 ocfs2_cleanup_merge(el, index); 3165 3166 ret = ocfs2_journal_dirty(handle, bh); 3167 if (ret) 3168 mlog_errno(ret); 3169 3170 if (left_path) { 3171 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 3172 if (ret) 3173 mlog_errno(ret); 3174 3175 /* 3176 * In the situation that the right_rec is empty and the extent 3177 * block is empty also, ocfs2_complete_edge_insert can't handle 3178 * it and we need to delete the right extent block. 3179 */ 3180 if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && 3181 le16_to_cpu(el->l_next_free_rec) == 1) { 3182 3183 ret = ocfs2_remove_rightmost_path(inode, handle, 3184 right_path, 3185 dealloc, et); 3186 if (ret) { 3187 mlog_errno(ret); 3188 goto out; 3189 } 3190 3191 /* Now the rightmost extent block has been deleted. 3192 * So we use the new rightmost path. 3193 */ 3194 ocfs2_mv_path(right_path, left_path); 3195 left_path = NULL; 3196 } else 3197 ocfs2_complete_edge_insert(inode, handle, left_path, 3198 right_path, subtree_index); 3199 } 3200 out: 3201 if (left_path) 3202 ocfs2_free_path(left_path); 3203 return ret; 3204 } 3205 3206 static int ocfs2_try_to_merge_extent(struct inode *inode, 3207 handle_t *handle, 3208 struct ocfs2_path *path, 3209 int split_index, 3210 struct ocfs2_extent_rec *split_rec, 3211 struct ocfs2_cached_dealloc_ctxt *dealloc, 3212 struct ocfs2_merge_ctxt *ctxt, 3213 struct ocfs2_extent_tree *et) 3214 3215 { 3216 int ret = 0; 3217 struct ocfs2_extent_list *el = path_leaf_el(path); 3218 struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; 3219 3220 BUG_ON(ctxt->c_contig_type == CONTIG_NONE); 3221 3222 if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) { 3223 /* 3224 * The merge code will need to create an empty 3225 * extent to take the place of the newly 3226 * emptied slot. Remove any pre-existing empty 3227 * extents - having more than one in a leaf is 3228 * illegal. 3229 */ 3230 ret = ocfs2_rotate_tree_left(inode, handle, path, 3231 dealloc, et); 3232 if (ret) { 3233 mlog_errno(ret); 3234 goto out; 3235 } 3236 split_index--; 3237 rec = &el->l_recs[split_index]; 3238 } 3239 3240 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { 3241 /* 3242 * Left-right contig implies this. 3243 */ 3244 BUG_ON(!ctxt->c_split_covers_rec); 3245 3246 /* 3247 * Since the leftright insert always covers the entire 3248 * extent, this call will delete the insert record 3249 * entirely, resulting in an empty extent record added to 3250 * the extent block. 3251 * 3252 * Since the adding of an empty extent shifts 3253 * everything back to the right, there's no need to 3254 * update split_index here. 3255 * 3256 * When the split_index is zero, we need to merge it to the 3257 * prevoius extent block. It is more efficient and easier 3258 * if we do merge_right first and merge_left later. 3259 */ 3260 ret = ocfs2_merge_rec_right(inode, path, 3261 handle, split_rec, 3262 split_index); 3263 if (ret) { 3264 mlog_errno(ret); 3265 goto out; 3266 } 3267 3268 /* 3269 * We can only get this from logic error above. 3270 */ 3271 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); 3272 3273 /* The merge left us with an empty extent, remove it. */ 3274 ret = ocfs2_rotate_tree_left(inode, handle, path, 3275 dealloc, et); 3276 if (ret) { 3277 mlog_errno(ret); 3278 goto out; 3279 } 3280 3281 rec = &el->l_recs[split_index]; 3282 3283 /* 3284 * Note that we don't pass split_rec here on purpose - 3285 * we've merged it into the rec already. 3286 */ 3287 ret = ocfs2_merge_rec_left(inode, path, 3288 handle, rec, 3289 dealloc, et, 3290 split_index); 3291 3292 if (ret) { 3293 mlog_errno(ret); 3294 goto out; 3295 } 3296 3297 ret = ocfs2_rotate_tree_left(inode, handle, path, 3298 dealloc, et); 3299 /* 3300 * Error from this last rotate is not critical, so 3301 * print but don't bubble it up. 3302 */ 3303 if (ret) 3304 mlog_errno(ret); 3305 ret = 0; 3306 } else { 3307 /* 3308 * Merge a record to the left or right. 3309 * 3310 * 'contig_type' is relative to the existing record, 3311 * so for example, if we're "right contig", it's to 3312 * the record on the left (hence the left merge). 3313 */ 3314 if (ctxt->c_contig_type == CONTIG_RIGHT) { 3315 ret = ocfs2_merge_rec_left(inode, 3316 path, 3317 handle, split_rec, 3318 dealloc, et, 3319 split_index); 3320 if (ret) { 3321 mlog_errno(ret); 3322 goto out; 3323 } 3324 } else { 3325 ret = ocfs2_merge_rec_right(inode, 3326 path, 3327 handle, split_rec, 3328 split_index); 3329 if (ret) { 3330 mlog_errno(ret); 3331 goto out; 3332 } 3333 } 3334 3335 if (ctxt->c_split_covers_rec) { 3336 /* 3337 * The merge may have left an empty extent in 3338 * our leaf. Try to rotate it away. 3339 */ 3340 ret = ocfs2_rotate_tree_left(inode, handle, path, 3341 dealloc, et); 3342 if (ret) 3343 mlog_errno(ret); 3344 ret = 0; 3345 } 3346 } 3347 3348 out: 3349 return ret; 3350 } 3351 3352 static void ocfs2_subtract_from_rec(struct super_block *sb, 3353 enum ocfs2_split_type split, 3354 struct ocfs2_extent_rec *rec, 3355 struct ocfs2_extent_rec *split_rec) 3356 { 3357 u64 len_blocks; 3358 3359 len_blocks = ocfs2_clusters_to_blocks(sb, 3360 le16_to_cpu(split_rec->e_leaf_clusters)); 3361 3362 if (split == SPLIT_LEFT) { 3363 /* 3364 * Region is on the left edge of the existing 3365 * record. 3366 */ 3367 le32_add_cpu(&rec->e_cpos, 3368 le16_to_cpu(split_rec->e_leaf_clusters)); 3369 le64_add_cpu(&rec->e_blkno, len_blocks); 3370 le16_add_cpu(&rec->e_leaf_clusters, 3371 -le16_to_cpu(split_rec->e_leaf_clusters)); 3372 } else { 3373 /* 3374 * Region is on the right edge of the existing 3375 * record. 3376 */ 3377 le16_add_cpu(&rec->e_leaf_clusters, 3378 -le16_to_cpu(split_rec->e_leaf_clusters)); 3379 } 3380 } 3381 3382 /* 3383 * Do the final bits of extent record insertion at the target leaf 3384 * list. If this leaf is part of an allocation tree, it is assumed 3385 * that the tree above has been prepared. 3386 */ 3387 static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, 3388 struct ocfs2_extent_list *el, 3389 struct ocfs2_insert_type *insert, 3390 struct inode *inode) 3391 { 3392 int i = insert->ins_contig_index; 3393 unsigned int range; 3394 struct ocfs2_extent_rec *rec; 3395 3396 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); 3397 3398 if (insert->ins_split != SPLIT_NONE) { 3399 i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); 3400 BUG_ON(i == -1); 3401 rec = &el->l_recs[i]; 3402 ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, 3403 insert_rec); 3404 goto rotate; 3405 } 3406 3407 /* 3408 * Contiguous insert - either left or right. 3409 */ 3410 if (insert->ins_contig != CONTIG_NONE) { 3411 rec = &el->l_recs[i]; 3412 if (insert->ins_contig == CONTIG_LEFT) { 3413 rec->e_blkno = insert_rec->e_blkno; 3414 rec->e_cpos = insert_rec->e_cpos; 3415 } 3416 le16_add_cpu(&rec->e_leaf_clusters, 3417 le16_to_cpu(insert_rec->e_leaf_clusters)); 3418 return; 3419 } 3420 3421 /* 3422 * Handle insert into an empty leaf. 3423 */ 3424 if (le16_to_cpu(el->l_next_free_rec) == 0 || 3425 ((le16_to_cpu(el->l_next_free_rec) == 1) && 3426 ocfs2_is_empty_extent(&el->l_recs[0]))) { 3427 el->l_recs[0] = *insert_rec; 3428 el->l_next_free_rec = cpu_to_le16(1); 3429 return; 3430 } 3431 3432 /* 3433 * Appending insert. 3434 */ 3435 if (insert->ins_appending == APPEND_TAIL) { 3436 i = le16_to_cpu(el->l_next_free_rec) - 1; 3437 rec = &el->l_recs[i]; 3438 range = le32_to_cpu(rec->e_cpos) 3439 + le16_to_cpu(rec->e_leaf_clusters); 3440 BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); 3441 3442 mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= 3443 le16_to_cpu(el->l_count), 3444 "inode %lu, depth %u, count %u, next free %u, " 3445 "rec.cpos %u, rec.clusters %u, " 3446 "insert.cpos %u, insert.clusters %u\n", 3447 inode->i_ino, 3448 le16_to_cpu(el->l_tree_depth), 3449 le16_to_cpu(el->l_count), 3450 le16_to_cpu(el->l_next_free_rec), 3451 le32_to_cpu(el->l_recs[i].e_cpos), 3452 le16_to_cpu(el->l_recs[i].e_leaf_clusters), 3453 le32_to_cpu(insert_rec->e_cpos), 3454 le16_to_cpu(insert_rec->e_leaf_clusters)); 3455 i++; 3456 el->l_recs[i] = *insert_rec; 3457 le16_add_cpu(&el->l_next_free_rec, 1); 3458 return; 3459 } 3460 3461 rotate: 3462 /* 3463 * Ok, we have to rotate. 3464 * 3465 * At this point, it is safe to assume that inserting into an 3466 * empty leaf and appending to a leaf have both been handled 3467 * above. 3468 * 3469 * This leaf needs to have space, either by the empty 1st 3470 * extent record, or by virtue of an l_next_rec < l_count. 3471 */ 3472 ocfs2_rotate_leaf(el, insert_rec); 3473 } 3474 3475 static void ocfs2_adjust_rightmost_records(struct inode *inode, 3476 handle_t *handle, 3477 struct ocfs2_path *path, 3478 struct ocfs2_extent_rec *insert_rec) 3479 { 3480 int ret, i, next_free; 3481 struct buffer_head *bh; 3482 struct ocfs2_extent_list *el; 3483 struct ocfs2_extent_rec *rec; 3484 3485 /* 3486 * Update everything except the leaf block. 3487 */ 3488 for (i = 0; i < path->p_tree_depth; i++) { 3489 bh = path->p_node[i].bh; 3490 el = path->p_node[i].el; 3491 3492 next_free = le16_to_cpu(el->l_next_free_rec); 3493 if (next_free == 0) { 3494 ocfs2_error(inode->i_sb, 3495 "Dinode %llu has a bad extent list", 3496 (unsigned long long)OCFS2_I(inode)->ip_blkno); 3497 ret = -EIO; 3498 return; 3499 } 3500 3501 rec = &el->l_recs[next_free - 1]; 3502 3503 rec->e_int_clusters = insert_rec->e_cpos; 3504 le32_add_cpu(&rec->e_int_clusters, 3505 le16_to_cpu(insert_rec->e_leaf_clusters)); 3506 le32_add_cpu(&rec->e_int_clusters, 3507 -le32_to_cpu(rec->e_cpos)); 3508 3509 ret = ocfs2_journal_dirty(handle, bh); 3510 if (ret) 3511 mlog_errno(ret); 3512 3513 } 3514 } 3515 3516 static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, 3517 struct ocfs2_extent_rec *insert_rec, 3518 struct ocfs2_path *right_path, 3519 struct ocfs2_path **ret_left_path) 3520 { 3521 int ret, next_free; 3522 struct ocfs2_extent_list *el; 3523 struct ocfs2_path *left_path = NULL; 3524 3525 *ret_left_path = NULL; 3526 3527 /* 3528 * This shouldn't happen for non-trees. The extent rec cluster 3529 * count manipulation below only works for interior nodes. 3530 */ 3531 BUG_ON(right_path->p_tree_depth == 0); 3532 3533 /* 3534 * If our appending insert is at the leftmost edge of a leaf, 3535 * then we might need to update the rightmost records of the 3536 * neighboring path. 3537 */ 3538 el = path_leaf_el(right_path); 3539 next_free = le16_to_cpu(el->l_next_free_rec); 3540 if (next_free == 0 || 3541 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { 3542 u32 left_cpos; 3543 3544 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 3545 &left_cpos); 3546 if (ret) { 3547 mlog_errno(ret); 3548 goto out; 3549 } 3550 3551 mlog(0, "Append may need a left path update. cpos: %u, " 3552 "left_cpos: %u\n", le32_to_cpu(insert_rec->e_cpos), 3553 left_cpos); 3554 3555 /* 3556 * No need to worry if the append is already in the 3557 * leftmost leaf. 3558 */ 3559 if (left_cpos) { 3560 left_path = ocfs2_new_path(path_root_bh(right_path), 3561 path_root_el(right_path)); 3562 if (!left_path) { 3563 ret = -ENOMEM; 3564 mlog_errno(ret); 3565 goto out; 3566 } 3567 3568 ret = ocfs2_find_path(inode, left_path, left_cpos); 3569 if (ret) { 3570 mlog_errno(ret); 3571 goto out; 3572 } 3573 3574 /* 3575 * ocfs2_insert_path() will pass the left_path to the 3576 * journal for us. 3577 */ 3578 } 3579 } 3580 3581 ret = ocfs2_journal_access_path(inode, handle, right_path); 3582 if (ret) { 3583 mlog_errno(ret); 3584 goto out; 3585 } 3586 3587 ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec); 3588 3589 *ret_left_path = left_path; 3590 ret = 0; 3591 out: 3592 if (ret != 0) 3593 ocfs2_free_path(left_path); 3594 3595 return ret; 3596 } 3597 3598 static void ocfs2_split_record(struct inode *inode, 3599 struct ocfs2_path *left_path, 3600 struct ocfs2_path *right_path, 3601 struct ocfs2_extent_rec *split_rec, 3602 enum ocfs2_split_type split) 3603 { 3604 int index; 3605 u32 cpos = le32_to_cpu(split_rec->e_cpos); 3606 struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; 3607 struct ocfs2_extent_rec *rec, *tmprec; 3608 3609 right_el = path_leaf_el(right_path);; 3610 if (left_path) 3611 left_el = path_leaf_el(left_path); 3612 3613 el = right_el; 3614 insert_el = right_el; 3615 index = ocfs2_search_extent_list(el, cpos); 3616 if (index != -1) { 3617 if (index == 0 && left_path) { 3618 BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); 3619 3620 /* 3621 * This typically means that the record 3622 * started in the left path but moved to the 3623 * right as a result of rotation. We either 3624 * move the existing record to the left, or we 3625 * do the later insert there. 3626 * 3627 * In this case, the left path should always 3628 * exist as the rotate code will have passed 3629 * it back for a post-insert update. 3630 */ 3631 3632 if (split == SPLIT_LEFT) { 3633 /* 3634 * It's a left split. Since we know 3635 * that the rotate code gave us an 3636 * empty extent in the left path, we 3637 * can just do the insert there. 3638 */ 3639 insert_el = left_el; 3640 } else { 3641 /* 3642 * Right split - we have to move the 3643 * existing record over to the left 3644 * leaf. The insert will be into the 3645 * newly created empty extent in the 3646 * right leaf. 3647 */ 3648 tmprec = &right_el->l_recs[index]; 3649 ocfs2_rotate_leaf(left_el, tmprec); 3650 el = left_el; 3651 3652 memset(tmprec, 0, sizeof(*tmprec)); 3653 index = ocfs2_search_extent_list(left_el, cpos); 3654 BUG_ON(index == -1); 3655 } 3656 } 3657 } else { 3658 BUG_ON(!left_path); 3659 BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0])); 3660 /* 3661 * Left path is easy - we can just allow the insert to 3662 * happen. 3663 */ 3664 el = left_el; 3665 insert_el = left_el; 3666 index = ocfs2_search_extent_list(el, cpos); 3667 BUG_ON(index == -1); 3668 } 3669 3670 rec = &el->l_recs[index]; 3671 ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); 3672 ocfs2_rotate_leaf(insert_el, split_rec); 3673 } 3674 3675 /* 3676 * This function only does inserts on an allocation b-tree. For tree 3677 * depth = 0, ocfs2_insert_at_leaf() is called directly. 3678 * 3679 * right_path is the path we want to do the actual insert 3680 * in. left_path should only be passed in if we need to update that 3681 * portion of the tree after an edge insert. 3682 */ 3683 static int ocfs2_insert_path(struct inode *inode, 3684 handle_t *handle, 3685 struct ocfs2_path *left_path, 3686 struct ocfs2_path *right_path, 3687 struct ocfs2_extent_rec *insert_rec, 3688 struct ocfs2_insert_type *insert) 3689 { 3690 int ret, subtree_index; 3691 struct buffer_head *leaf_bh = path_leaf_bh(right_path); 3692 3693 if (left_path) { 3694 int credits = handle->h_buffer_credits; 3695 3696 /* 3697 * There's a chance that left_path got passed back to 3698 * us without being accounted for in the 3699 * journal. Extend our transaction here to be sure we 3700 * can change those blocks. 3701 */ 3702 credits += left_path->p_tree_depth; 3703 3704 ret = ocfs2_extend_trans(handle, credits); 3705 if (ret < 0) { 3706 mlog_errno(ret); 3707 goto out; 3708 } 3709 3710 ret = ocfs2_journal_access_path(inode, handle, left_path); 3711 if (ret < 0) { 3712 mlog_errno(ret); 3713 goto out; 3714 } 3715 } 3716 3717 /* 3718 * Pass both paths to the journal. The majority of inserts 3719 * will be touching all components anyway. 3720 */ 3721 ret = ocfs2_journal_access_path(inode, handle, right_path); 3722 if (ret < 0) { 3723 mlog_errno(ret); 3724 goto out; 3725 } 3726 3727 if (insert->ins_split != SPLIT_NONE) { 3728 /* 3729 * We could call ocfs2_insert_at_leaf() for some types 3730 * of splits, but it's easier to just let one separate 3731 * function sort it all out. 3732 */ 3733 ocfs2_split_record(inode, left_path, right_path, 3734 insert_rec, insert->ins_split); 3735 3736 /* 3737 * Split might have modified either leaf and we don't 3738 * have a guarantee that the later edge insert will 3739 * dirty this for us. 3740 */ 3741 if (left_path) 3742 ret = ocfs2_journal_dirty(handle, 3743 path_leaf_bh(left_path)); 3744 if (ret) 3745 mlog_errno(ret); 3746 } else 3747 ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), 3748 insert, inode); 3749 3750 ret = ocfs2_journal_dirty(handle, leaf_bh); 3751 if (ret) 3752 mlog_errno(ret); 3753 3754 if (left_path) { 3755 /* 3756 * The rotate code has indicated that we need to fix 3757 * up portions of the tree after the insert. 3758 * 3759 * XXX: Should we extend the transaction here? 3760 */ 3761 subtree_index = ocfs2_find_subtree_root(inode, left_path, 3762 right_path); 3763 ocfs2_complete_edge_insert(inode, handle, left_path, 3764 right_path, subtree_index); 3765 } 3766 3767 ret = 0; 3768 out: 3769 return ret; 3770 } 3771 3772 static int ocfs2_do_insert_extent(struct inode *inode, 3773 handle_t *handle, 3774 struct ocfs2_extent_tree *et, 3775 struct ocfs2_extent_rec *insert_rec, 3776 struct ocfs2_insert_type *type) 3777 { 3778 int ret, rotate = 0; 3779 u32 cpos; 3780 struct ocfs2_path *right_path = NULL; 3781 struct ocfs2_path *left_path = NULL; 3782 struct ocfs2_extent_list *el; 3783 3784 el = et->root_el; 3785 3786 ret = ocfs2_journal_access(handle, inode, et->root_bh, 3787 OCFS2_JOURNAL_ACCESS_WRITE); 3788 if (ret) { 3789 mlog_errno(ret); 3790 goto out; 3791 } 3792 3793 if (le16_to_cpu(el->l_tree_depth) == 0) { 3794 ocfs2_insert_at_leaf(insert_rec, el, type, inode); 3795 goto out_update_clusters; 3796 } 3797 3798 right_path = ocfs2_new_path(et->root_bh, et->root_el); 3799 if (!right_path) { 3800 ret = -ENOMEM; 3801 mlog_errno(ret); 3802 goto out; 3803 } 3804 3805 /* 3806 * Determine the path to start with. Rotations need the 3807 * rightmost path, everything else can go directly to the 3808 * target leaf. 3809 */ 3810 cpos = le32_to_cpu(insert_rec->e_cpos); 3811 if (type->ins_appending == APPEND_NONE && 3812 type->ins_contig == CONTIG_NONE) { 3813 rotate = 1; 3814 cpos = UINT_MAX; 3815 } 3816 3817 ret = ocfs2_find_path(inode, right_path, cpos); 3818 if (ret) { 3819 mlog_errno(ret); 3820 goto out; 3821 } 3822 3823 /* 3824 * Rotations and appends need special treatment - they modify 3825 * parts of the tree's above them. 3826 * 3827 * Both might pass back a path immediate to the left of the 3828 * one being inserted to. This will be cause 3829 * ocfs2_insert_path() to modify the rightmost records of 3830 * left_path to account for an edge insert. 3831 * 3832 * XXX: When modifying this code, keep in mind that an insert 3833 * can wind up skipping both of these two special cases... 3834 */ 3835 if (rotate) { 3836 ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, 3837 le32_to_cpu(insert_rec->e_cpos), 3838 right_path, &left_path); 3839 if (ret) { 3840 mlog_errno(ret); 3841 goto out; 3842 } 3843 3844 /* 3845 * ocfs2_rotate_tree_right() might have extended the 3846 * transaction without re-journaling our tree root. 3847 */ 3848 ret = ocfs2_journal_access(handle, inode, et->root_bh, 3849 OCFS2_JOURNAL_ACCESS_WRITE); 3850 if (ret) { 3851 mlog_errno(ret); 3852 goto out; 3853 } 3854 } else if (type->ins_appending == APPEND_TAIL 3855 && type->ins_contig != CONTIG_LEFT) { 3856 ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, 3857 right_path, &left_path); 3858 if (ret) { 3859 mlog_errno(ret); 3860 goto out; 3861 } 3862 } 3863 3864 ret = ocfs2_insert_path(inode, handle, left_path, right_path, 3865 insert_rec, type); 3866 if (ret) { 3867 mlog_errno(ret); 3868 goto out; 3869 } 3870 3871 out_update_clusters: 3872 if (type->ins_split == SPLIT_NONE) 3873 ocfs2_update_clusters(inode, et, 3874 le16_to_cpu(insert_rec->e_leaf_clusters)); 3875 3876 ret = ocfs2_journal_dirty(handle, et->root_bh); 3877 if (ret) 3878 mlog_errno(ret); 3879 3880 out: 3881 ocfs2_free_path(left_path); 3882 ocfs2_free_path(right_path); 3883 3884 return ret; 3885 } 3886 3887 static enum ocfs2_contig_type 3888 ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, 3889 struct ocfs2_extent_list *el, int index, 3890 struct ocfs2_extent_rec *split_rec) 3891 { 3892 int status; 3893 enum ocfs2_contig_type ret = CONTIG_NONE; 3894 u32 left_cpos, right_cpos; 3895 struct ocfs2_extent_rec *rec = NULL; 3896 struct ocfs2_extent_list *new_el; 3897 struct ocfs2_path *left_path = NULL, *right_path = NULL; 3898 struct buffer_head *bh; 3899 struct ocfs2_extent_block *eb; 3900 3901 if (index > 0) { 3902 rec = &el->l_recs[index - 1]; 3903 } else if (path->p_tree_depth > 0) { 3904 status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, 3905 path, &left_cpos); 3906 if (status) 3907 goto out; 3908 3909 if (left_cpos != 0) { 3910 left_path = ocfs2_new_path(path_root_bh(path), 3911 path_root_el(path)); 3912 if (!left_path) 3913 goto out; 3914 3915 status = ocfs2_find_path(inode, left_path, left_cpos); 3916 if (status) 3917 goto out; 3918 3919 new_el = path_leaf_el(left_path); 3920 3921 if (le16_to_cpu(new_el->l_next_free_rec) != 3922 le16_to_cpu(new_el->l_count)) { 3923 bh = path_leaf_bh(left_path); 3924 eb = (struct ocfs2_extent_block *)bh->b_data; 3925 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 3926 eb); 3927 goto out; 3928 } 3929 rec = &new_el->l_recs[ 3930 le16_to_cpu(new_el->l_next_free_rec) - 1]; 3931 } 3932 } 3933 3934 /* 3935 * We're careful to check for an empty extent record here - 3936 * the merge code will know what to do if it sees one. 3937 */ 3938 if (rec) { 3939 if (index == 1 && ocfs2_is_empty_extent(rec)) { 3940 if (split_rec->e_cpos == el->l_recs[index].e_cpos) 3941 ret = CONTIG_RIGHT; 3942 } else { 3943 ret = ocfs2_extent_contig(inode, rec, split_rec); 3944 } 3945 } 3946 3947 rec = NULL; 3948 if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) 3949 rec = &el->l_recs[index + 1]; 3950 else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && 3951 path->p_tree_depth > 0) { 3952 status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, 3953 path, &right_cpos); 3954 if (status) 3955 goto out; 3956 3957 if (right_cpos == 0) 3958 goto out; 3959 3960 right_path = ocfs2_new_path(path_root_bh(path), 3961 path_root_el(path)); 3962 if (!right_path) 3963 goto out; 3964 3965 status = ocfs2_find_path(inode, right_path, right_cpos); 3966 if (status) 3967 goto out; 3968 3969 new_el = path_leaf_el(right_path); 3970 rec = &new_el->l_recs[0]; 3971 if (ocfs2_is_empty_extent(rec)) { 3972 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { 3973 bh = path_leaf_bh(right_path); 3974 eb = (struct ocfs2_extent_block *)bh->b_data; 3975 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 3976 eb); 3977 goto out; 3978 } 3979 rec = &new_el->l_recs[1]; 3980 } 3981 } 3982 3983 if (rec) { 3984 enum ocfs2_contig_type contig_type; 3985 3986 contig_type = ocfs2_extent_contig(inode, rec, split_rec); 3987 3988 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) 3989 ret = CONTIG_LEFTRIGHT; 3990 else if (ret == CONTIG_NONE) 3991 ret = contig_type; 3992 } 3993 3994 out: 3995 if (left_path) 3996 ocfs2_free_path(left_path); 3997 if (right_path) 3998 ocfs2_free_path(right_path); 3999 4000 return ret; 4001 } 4002 4003 static void ocfs2_figure_contig_type(struct inode *inode, 4004 struct ocfs2_insert_type *insert, 4005 struct ocfs2_extent_list *el, 4006 struct ocfs2_extent_rec *insert_rec) 4007 { 4008 int i; 4009 enum ocfs2_contig_type contig_type = CONTIG_NONE; 4010 4011 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); 4012 4013 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 4014 contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], 4015 insert_rec); 4016 if (contig_type != CONTIG_NONE) { 4017 insert->ins_contig_index = i; 4018 break; 4019 } 4020 } 4021 insert->ins_contig = contig_type; 4022 } 4023 4024 /* 4025 * This should only be called against the righmost leaf extent list. 4026 * 4027 * ocfs2_figure_appending_type() will figure out whether we'll have to 4028 * insert at the tail of the rightmost leaf. 4029 * 4030 * This should also work against the root extent list for tree's with 0 4031 * depth. If we consider the root extent list to be the rightmost leaf node 4032 * then the logic here makes sense. 4033 */ 4034 static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, 4035 struct ocfs2_extent_list *el, 4036 struct ocfs2_extent_rec *insert_rec) 4037 { 4038 int i; 4039 u32 cpos = le32_to_cpu(insert_rec->e_cpos); 4040 struct ocfs2_extent_rec *rec; 4041 4042 insert->ins_appending = APPEND_NONE; 4043 4044 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); 4045 4046 if (!el->l_next_free_rec) 4047 goto set_tail_append; 4048 4049 if (ocfs2_is_empty_extent(&el->l_recs[0])) { 4050 /* Were all records empty? */ 4051 if (le16_to_cpu(el->l_next_free_rec) == 1) 4052 goto set_tail_append; 4053 } 4054 4055 i = le16_to_cpu(el->l_next_free_rec) - 1; 4056 rec = &el->l_recs[i]; 4057 4058 if (cpos >= 4059 (le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters))) 4060 goto set_tail_append; 4061 4062 return; 4063 4064 set_tail_append: 4065 insert->ins_appending = APPEND_TAIL; 4066 } 4067 4068 /* 4069 * Helper function called at the begining of an insert. 4070 * 4071 * This computes a few things that are commonly used in the process of 4072 * inserting into the btree: 4073 * - Whether the new extent is contiguous with an existing one. 4074 * - The current tree depth. 4075 * - Whether the insert is an appending one. 4076 * - The total # of free records in the tree. 4077 * 4078 * All of the information is stored on the ocfs2_insert_type 4079 * structure. 4080 */ 4081 static int ocfs2_figure_insert_type(struct inode *inode, 4082 struct ocfs2_extent_tree *et, 4083 struct buffer_head **last_eb_bh, 4084 struct ocfs2_extent_rec *insert_rec, 4085 int *free_records, 4086 struct ocfs2_insert_type *insert) 4087 { 4088 int ret; 4089 struct ocfs2_extent_block *eb; 4090 struct ocfs2_extent_list *el; 4091 struct ocfs2_path *path = NULL; 4092 struct buffer_head *bh = NULL; 4093 4094 insert->ins_split = SPLIT_NONE; 4095 4096 el = et->root_el; 4097 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); 4098 4099 if (el->l_tree_depth) { 4100 /* 4101 * If we have tree depth, we read in the 4102 * rightmost extent block ahead of time as 4103 * ocfs2_figure_insert_type() and ocfs2_add_branch() 4104 * may want it later. 4105 */ 4106 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4107 ocfs2_get_last_eb_blk(et), &bh, 4108 OCFS2_BH_CACHED, inode); 4109 if (ret) { 4110 mlog_exit(ret); 4111 goto out; 4112 } 4113 eb = (struct ocfs2_extent_block *) bh->b_data; 4114 el = &eb->h_list; 4115 } 4116 4117 /* 4118 * Unless we have a contiguous insert, we'll need to know if 4119 * there is room left in our allocation tree for another 4120 * extent record. 4121 * 4122 * XXX: This test is simplistic, we can search for empty 4123 * extent records too. 4124 */ 4125 *free_records = le16_to_cpu(el->l_count) - 4126 le16_to_cpu(el->l_next_free_rec); 4127 4128 if (!insert->ins_tree_depth) { 4129 ocfs2_figure_contig_type(inode, insert, el, insert_rec); 4130 ocfs2_figure_appending_type(insert, el, insert_rec); 4131 return 0; 4132 } 4133 4134 path = ocfs2_new_path(et->root_bh, et->root_el); 4135 if (!path) { 4136 ret = -ENOMEM; 4137 mlog_errno(ret); 4138 goto out; 4139 } 4140 4141 /* 4142 * In the case that we're inserting past what the tree 4143 * currently accounts for, ocfs2_find_path() will return for 4144 * us the rightmost tree path. This is accounted for below in 4145 * the appending code. 4146 */ 4147 ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos)); 4148 if (ret) { 4149 mlog_errno(ret); 4150 goto out; 4151 } 4152 4153 el = path_leaf_el(path); 4154 4155 /* 4156 * Now that we have the path, there's two things we want to determine: 4157 * 1) Contiguousness (also set contig_index if this is so) 4158 * 4159 * 2) Are we doing an append? We can trivially break this up 4160 * into two types of appends: simple record append, or a 4161 * rotate inside the tail leaf. 4162 */ 4163 ocfs2_figure_contig_type(inode, insert, el, insert_rec); 4164 4165 /* 4166 * The insert code isn't quite ready to deal with all cases of 4167 * left contiguousness. Specifically, if it's an insert into 4168 * the 1st record in a leaf, it will require the adjustment of 4169 * cluster count on the last record of the path directly to it's 4170 * left. For now, just catch that case and fool the layers 4171 * above us. This works just fine for tree_depth == 0, which 4172 * is why we allow that above. 4173 */ 4174 if (insert->ins_contig == CONTIG_LEFT && 4175 insert->ins_contig_index == 0) 4176 insert->ins_contig = CONTIG_NONE; 4177 4178 /* 4179 * Ok, so we can simply compare against last_eb to figure out 4180 * whether the path doesn't exist. This will only happen in 4181 * the case that we're doing a tail append, so maybe we can 4182 * take advantage of that information somehow. 4183 */ 4184 if (ocfs2_get_last_eb_blk(et) == 4185 path_leaf_bh(path)->b_blocknr) { 4186 /* 4187 * Ok, ocfs2_find_path() returned us the rightmost 4188 * tree path. This might be an appending insert. There are 4189 * two cases: 4190 * 1) We're doing a true append at the tail: 4191 * -This might even be off the end of the leaf 4192 * 2) We're "appending" by rotating in the tail 4193 */ 4194 ocfs2_figure_appending_type(insert, el, insert_rec); 4195 } 4196 4197 out: 4198 ocfs2_free_path(path); 4199 4200 if (ret == 0) 4201 *last_eb_bh = bh; 4202 else 4203 brelse(bh); 4204 return ret; 4205 } 4206 4207 /* 4208 * Insert an extent into an inode btree. 4209 * 4210 * The caller needs to update fe->i_clusters 4211 */ 4212 int ocfs2_insert_extent(struct ocfs2_super *osb, 4213 handle_t *handle, 4214 struct inode *inode, 4215 struct buffer_head *root_bh, 4216 u32 cpos, 4217 u64 start_blk, 4218 u32 new_clusters, 4219 u8 flags, 4220 struct ocfs2_alloc_context *meta_ac, 4221 enum ocfs2_extent_tree_type et_type) 4222 { 4223 int status; 4224 int uninitialized_var(free_records); 4225 struct buffer_head *last_eb_bh = NULL; 4226 struct ocfs2_insert_type insert = {0, }; 4227 struct ocfs2_extent_rec rec; 4228 struct ocfs2_extent_tree *et = NULL; 4229 4230 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); 4231 4232 et = ocfs2_new_extent_tree(root_bh, et_type); 4233 if (!et) { 4234 status = -ENOMEM; 4235 mlog_errno(status); 4236 goto bail; 4237 } 4238 4239 mlog(0, "add %u clusters at position %u to inode %llu\n", 4240 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4241 4242 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && 4243 (OCFS2_I(inode)->ip_clusters != cpos), 4244 "Device %s, asking for sparse allocation: inode %llu, " 4245 "cpos %u, clusters %u\n", 4246 osb->dev_str, 4247 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, 4248 OCFS2_I(inode)->ip_clusters); 4249 4250 memset(&rec, 0, sizeof(rec)); 4251 rec.e_cpos = cpu_to_le32(cpos); 4252 rec.e_blkno = cpu_to_le64(start_blk); 4253 rec.e_leaf_clusters = cpu_to_le16(new_clusters); 4254 rec.e_flags = flags; 4255 4256 status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec, 4257 &free_records, &insert); 4258 if (status < 0) { 4259 mlog_errno(status); 4260 goto bail; 4261 } 4262 4263 mlog(0, "Insert.appending: %u, Insert.Contig: %u, " 4264 "Insert.contig_index: %d, Insert.free_records: %d, " 4265 "Insert.tree_depth: %d\n", 4266 insert.ins_appending, insert.ins_contig, insert.ins_contig_index, 4267 free_records, insert.ins_tree_depth); 4268 4269 if (insert.ins_contig == CONTIG_NONE && free_records == 0) { 4270 status = ocfs2_grow_tree(inode, handle, et, 4271 &insert.ins_tree_depth, &last_eb_bh, 4272 meta_ac); 4273 if (status) { 4274 mlog_errno(status); 4275 goto bail; 4276 } 4277 } 4278 4279 /* Finally, we can add clusters. This might rotate the tree for us. */ 4280 status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert); 4281 if (status < 0) 4282 mlog_errno(status); 4283 else if (et->type == OCFS2_DINODE_EXTENT) 4284 ocfs2_extent_map_insert_rec(inode, &rec); 4285 4286 bail: 4287 if (last_eb_bh) 4288 brelse(last_eb_bh); 4289 4290 if (et) 4291 ocfs2_free_extent_tree(et); 4292 mlog_exit(status); 4293 return status; 4294 } 4295 4296 static void ocfs2_make_right_split_rec(struct super_block *sb, 4297 struct ocfs2_extent_rec *split_rec, 4298 u32 cpos, 4299 struct ocfs2_extent_rec *rec) 4300 { 4301 u32 rec_cpos = le32_to_cpu(rec->e_cpos); 4302 u32 rec_range = rec_cpos + le16_to_cpu(rec->e_leaf_clusters); 4303 4304 memset(split_rec, 0, sizeof(struct ocfs2_extent_rec)); 4305 4306 split_rec->e_cpos = cpu_to_le32(cpos); 4307 split_rec->e_leaf_clusters = cpu_to_le16(rec_range - cpos); 4308 4309 split_rec->e_blkno = rec->e_blkno; 4310 le64_add_cpu(&split_rec->e_blkno, 4311 ocfs2_clusters_to_blocks(sb, cpos - rec_cpos)); 4312 4313 split_rec->e_flags = rec->e_flags; 4314 } 4315 4316 static int ocfs2_split_and_insert(struct inode *inode, 4317 handle_t *handle, 4318 struct ocfs2_path *path, 4319 struct ocfs2_extent_tree *et, 4320 struct buffer_head **last_eb_bh, 4321 int split_index, 4322 struct ocfs2_extent_rec *orig_split_rec, 4323 struct ocfs2_alloc_context *meta_ac) 4324 { 4325 int ret = 0, depth; 4326 unsigned int insert_range, rec_range, do_leftright = 0; 4327 struct ocfs2_extent_rec tmprec; 4328 struct ocfs2_extent_list *rightmost_el; 4329 struct ocfs2_extent_rec rec; 4330 struct ocfs2_extent_rec split_rec = *orig_split_rec; 4331 struct ocfs2_insert_type insert; 4332 struct ocfs2_extent_block *eb; 4333 4334 leftright: 4335 /* 4336 * Store a copy of the record on the stack - it might move 4337 * around as the tree is manipulated below. 4338 */ 4339 rec = path_leaf_el(path)->l_recs[split_index]; 4340 4341 rightmost_el = et->root_el; 4342 4343 depth = le16_to_cpu(rightmost_el->l_tree_depth); 4344 if (depth) { 4345 BUG_ON(!(*last_eb_bh)); 4346 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; 4347 rightmost_el = &eb->h_list; 4348 } 4349 4350 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4351 le16_to_cpu(rightmost_el->l_count)) { 4352 ret = ocfs2_grow_tree(inode, handle, et, 4353 &depth, last_eb_bh, meta_ac); 4354 if (ret) { 4355 mlog_errno(ret); 4356 goto out; 4357 } 4358 } 4359 4360 memset(&insert, 0, sizeof(struct ocfs2_insert_type)); 4361 insert.ins_appending = APPEND_NONE; 4362 insert.ins_contig = CONTIG_NONE; 4363 insert.ins_tree_depth = depth; 4364 4365 insert_range = le32_to_cpu(split_rec.e_cpos) + 4366 le16_to_cpu(split_rec.e_leaf_clusters); 4367 rec_range = le32_to_cpu(rec.e_cpos) + 4368 le16_to_cpu(rec.e_leaf_clusters); 4369 4370 if (split_rec.e_cpos == rec.e_cpos) { 4371 insert.ins_split = SPLIT_LEFT; 4372 } else if (insert_range == rec_range) { 4373 insert.ins_split = SPLIT_RIGHT; 4374 } else { 4375 /* 4376 * Left/right split. We fake this as a right split 4377 * first and then make a second pass as a left split. 4378 */ 4379 insert.ins_split = SPLIT_RIGHT; 4380 4381 ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range, 4382 &rec); 4383 4384 split_rec = tmprec; 4385 4386 BUG_ON(do_leftright); 4387 do_leftright = 1; 4388 } 4389 4390 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); 4391 if (ret) { 4392 mlog_errno(ret); 4393 goto out; 4394 } 4395 4396 if (do_leftright == 1) { 4397 u32 cpos; 4398 struct ocfs2_extent_list *el; 4399 4400 do_leftright++; 4401 split_rec = *orig_split_rec; 4402 4403 ocfs2_reinit_path(path, 1); 4404 4405 cpos = le32_to_cpu(split_rec.e_cpos); 4406 ret = ocfs2_find_path(inode, path, cpos); 4407 if (ret) { 4408 mlog_errno(ret); 4409 goto out; 4410 } 4411 4412 el = path_leaf_el(path); 4413 split_index = ocfs2_search_extent_list(el, cpos); 4414 goto leftright; 4415 } 4416 out: 4417 4418 return ret; 4419 } 4420 4421 /* 4422 * Mark part or all of the extent record at split_index in the leaf 4423 * pointed to by path as written. This removes the unwritten 4424 * extent flag. 4425 * 4426 * Care is taken to handle contiguousness so as to not grow the tree. 4427 * 4428 * meta_ac is not strictly necessary - we only truly need it if growth 4429 * of the tree is required. All other cases will degrade into a less 4430 * optimal tree layout. 4431 * 4432 * last_eb_bh should be the rightmost leaf block for any extent 4433 * btree. Since a split may grow the tree or a merge might shrink it, 4434 * the caller cannot trust the contents of that buffer after this call. 4435 * 4436 * This code is optimized for readability - several passes might be 4437 * made over certain portions of the tree. All of those blocks will 4438 * have been brought into cache (and pinned via the journal), so the 4439 * extra overhead is not expressed in terms of disk reads. 4440 */ 4441 static int __ocfs2_mark_extent_written(struct inode *inode, 4442 struct ocfs2_extent_tree *et, 4443 handle_t *handle, 4444 struct ocfs2_path *path, 4445 int split_index, 4446 struct ocfs2_extent_rec *split_rec, 4447 struct ocfs2_alloc_context *meta_ac, 4448 struct ocfs2_cached_dealloc_ctxt *dealloc) 4449 { 4450 int ret = 0; 4451 struct ocfs2_extent_list *el = path_leaf_el(path); 4452 struct buffer_head *last_eb_bh = NULL; 4453 struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; 4454 struct ocfs2_merge_ctxt ctxt; 4455 struct ocfs2_extent_list *rightmost_el; 4456 4457 if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) { 4458 ret = -EIO; 4459 mlog_errno(ret); 4460 goto out; 4461 } 4462 4463 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || 4464 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < 4465 (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { 4466 ret = -EIO; 4467 mlog_errno(ret); 4468 goto out; 4469 } 4470 4471 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, 4472 split_index, 4473 split_rec); 4474 4475 /* 4476 * The core merge / split code wants to know how much room is 4477 * left in this inodes allocation tree, so we pass the 4478 * rightmost extent list. 4479 */ 4480 if (path->p_tree_depth) { 4481 struct ocfs2_extent_block *eb; 4482 4483 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4484 ocfs2_get_last_eb_blk(et), 4485 &last_eb_bh, OCFS2_BH_CACHED, inode); 4486 if (ret) { 4487 mlog_exit(ret); 4488 goto out; 4489 } 4490 4491 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 4492 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 4493 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 4494 ret = -EROFS; 4495 goto out; 4496 } 4497 4498 rightmost_el = &eb->h_list; 4499 } else 4500 rightmost_el = path_root_el(path); 4501 4502 if (rec->e_cpos == split_rec->e_cpos && 4503 rec->e_leaf_clusters == split_rec->e_leaf_clusters) 4504 ctxt.c_split_covers_rec = 1; 4505 else 4506 ctxt.c_split_covers_rec = 0; 4507 4508 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); 4509 4510 mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n", 4511 split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent, 4512 ctxt.c_split_covers_rec); 4513 4514 if (ctxt.c_contig_type == CONTIG_NONE) { 4515 if (ctxt.c_split_covers_rec) 4516 el->l_recs[split_index] = *split_rec; 4517 else 4518 ret = ocfs2_split_and_insert(inode, handle, path, et, 4519 &last_eb_bh, split_index, 4520 split_rec, meta_ac); 4521 if (ret) 4522 mlog_errno(ret); 4523 } else { 4524 ret = ocfs2_try_to_merge_extent(inode, handle, path, 4525 split_index, split_rec, 4526 dealloc, &ctxt, et); 4527 if (ret) 4528 mlog_errno(ret); 4529 } 4530 4531 out: 4532 brelse(last_eb_bh); 4533 return ret; 4534 } 4535 4536 /* 4537 * Mark the already-existing extent at cpos as written for len clusters. 4538 * 4539 * If the existing extent is larger than the request, initiate a 4540 * split. An attempt will be made at merging with adjacent extents. 4541 * 4542 * The caller is responsible for passing down meta_ac if we'll need it. 4543 */ 4544 int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *root_bh, 4545 handle_t *handle, u32 cpos, u32 len, u32 phys, 4546 struct ocfs2_alloc_context *meta_ac, 4547 struct ocfs2_cached_dealloc_ctxt *dealloc, 4548 enum ocfs2_extent_tree_type et_type) 4549 { 4550 int ret, index; 4551 u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); 4552 struct ocfs2_extent_rec split_rec; 4553 struct ocfs2_path *left_path = NULL; 4554 struct ocfs2_extent_list *el; 4555 struct ocfs2_extent_tree *et = NULL; 4556 4557 mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n", 4558 inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno); 4559 4560 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { 4561 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " 4562 "that are being written to, but the feature bit " 4563 "is not set in the super block.", 4564 (unsigned long long)OCFS2_I(inode)->ip_blkno); 4565 ret = -EROFS; 4566 goto out; 4567 } 4568 4569 et = ocfs2_new_extent_tree(root_bh, et_type); 4570 if (!et) { 4571 ret = -ENOMEM; 4572 mlog_errno(ret); 4573 goto out; 4574 } 4575 4576 /* 4577 * XXX: This should be fixed up so that we just re-insert the 4578 * next extent records. 4579 */ 4580 if (et_type == OCFS2_DINODE_EXTENT) 4581 ocfs2_extent_map_trunc(inode, 0); 4582 4583 left_path = ocfs2_new_path(et->root_bh, et->root_el); 4584 if (!left_path) { 4585 ret = -ENOMEM; 4586 mlog_errno(ret); 4587 goto out; 4588 } 4589 4590 ret = ocfs2_find_path(inode, left_path, cpos); 4591 if (ret) { 4592 mlog_errno(ret); 4593 goto out; 4594 } 4595 el = path_leaf_el(left_path); 4596 4597 index = ocfs2_search_extent_list(el, cpos); 4598 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 4599 ocfs2_error(inode->i_sb, 4600 "Inode %llu has an extent at cpos %u which can no " 4601 "longer be found.\n", 4602 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); 4603 ret = -EROFS; 4604 goto out; 4605 } 4606 4607 memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); 4608 split_rec.e_cpos = cpu_to_le32(cpos); 4609 split_rec.e_leaf_clusters = cpu_to_le16(len); 4610 split_rec.e_blkno = cpu_to_le64(start_blkno); 4611 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; 4612 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; 4613 4614 ret = __ocfs2_mark_extent_written(inode, et, handle, left_path, 4615 index, &split_rec, meta_ac, 4616 dealloc); 4617 if (ret) 4618 mlog_errno(ret); 4619 4620 out: 4621 ocfs2_free_path(left_path); 4622 if (et) 4623 ocfs2_free_extent_tree(et); 4624 return ret; 4625 } 4626 4627 static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, 4628 handle_t *handle, struct ocfs2_path *path, 4629 int index, u32 new_range, 4630 struct ocfs2_alloc_context *meta_ac) 4631 { 4632 int ret, depth, credits = handle->h_buffer_credits; 4633 struct buffer_head *last_eb_bh = NULL; 4634 struct ocfs2_extent_block *eb; 4635 struct ocfs2_extent_list *rightmost_el, *el; 4636 struct ocfs2_extent_rec split_rec; 4637 struct ocfs2_extent_rec *rec; 4638 struct ocfs2_insert_type insert; 4639 4640 /* 4641 * Setup the record to split before we grow the tree. 4642 */ 4643 el = path_leaf_el(path); 4644 rec = &el->l_recs[index]; 4645 ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec); 4646 4647 depth = path->p_tree_depth; 4648 if (depth > 0) { 4649 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4650 ocfs2_get_last_eb_blk(et), 4651 &last_eb_bh, OCFS2_BH_CACHED, inode); 4652 if (ret < 0) { 4653 mlog_errno(ret); 4654 goto out; 4655 } 4656 4657 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 4658 rightmost_el = &eb->h_list; 4659 } else 4660 rightmost_el = path_leaf_el(path); 4661 4662 credits += path->p_tree_depth + 4663 ocfs2_extend_meta_needed(et->root_el); 4664 ret = ocfs2_extend_trans(handle, credits); 4665 if (ret) { 4666 mlog_errno(ret); 4667 goto out; 4668 } 4669 4670 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4671 le16_to_cpu(rightmost_el->l_count)) { 4672 ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh, 4673 meta_ac); 4674 if (ret) { 4675 mlog_errno(ret); 4676 goto out; 4677 } 4678 } 4679 4680 memset(&insert, 0, sizeof(struct ocfs2_insert_type)); 4681 insert.ins_appending = APPEND_NONE; 4682 insert.ins_contig = CONTIG_NONE; 4683 insert.ins_split = SPLIT_RIGHT; 4684 insert.ins_tree_depth = depth; 4685 4686 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); 4687 if (ret) 4688 mlog_errno(ret); 4689 4690 out: 4691 brelse(last_eb_bh); 4692 return ret; 4693 } 4694 4695 static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, 4696 struct ocfs2_path *path, int index, 4697 struct ocfs2_cached_dealloc_ctxt *dealloc, 4698 u32 cpos, u32 len, 4699 struct ocfs2_extent_tree *et) 4700 { 4701 int ret; 4702 u32 left_cpos, rec_range, trunc_range; 4703 int wants_rotate = 0, is_rightmost_tree_rec = 0; 4704 struct super_block *sb = inode->i_sb; 4705 struct ocfs2_path *left_path = NULL; 4706 struct ocfs2_extent_list *el = path_leaf_el(path); 4707 struct ocfs2_extent_rec *rec; 4708 struct ocfs2_extent_block *eb; 4709 4710 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { 4711 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); 4712 if (ret) { 4713 mlog_errno(ret); 4714 goto out; 4715 } 4716 4717 index--; 4718 } 4719 4720 if (index == (le16_to_cpu(el->l_next_free_rec) - 1) && 4721 path->p_tree_depth) { 4722 /* 4723 * Check whether this is the rightmost tree record. If 4724 * we remove all of this record or part of its right 4725 * edge then an update of the record lengths above it 4726 * will be required. 4727 */ 4728 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 4729 if (eb->h_next_leaf_blk == 0) 4730 is_rightmost_tree_rec = 1; 4731 } 4732 4733 rec = &el->l_recs[index]; 4734 if (index == 0 && path->p_tree_depth && 4735 le32_to_cpu(rec->e_cpos) == cpos) { 4736 /* 4737 * Changing the leftmost offset (via partial or whole 4738 * record truncate) of an interior (or rightmost) path 4739 * means we have to update the subtree that is formed 4740 * by this leaf and the one to it's left. 4741 * 4742 * There are two cases we can skip: 4743 * 1) Path is the leftmost one in our inode tree. 4744 * 2) The leaf is rightmost and will be empty after 4745 * we remove the extent record - the rotate code 4746 * knows how to update the newly formed edge. 4747 */ 4748 4749 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, 4750 &left_cpos); 4751 if (ret) { 4752 mlog_errno(ret); 4753 goto out; 4754 } 4755 4756 if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) { 4757 left_path = ocfs2_new_path(path_root_bh(path), 4758 path_root_el(path)); 4759 if (!left_path) { 4760 ret = -ENOMEM; 4761 mlog_errno(ret); 4762 goto out; 4763 } 4764 4765 ret = ocfs2_find_path(inode, left_path, left_cpos); 4766 if (ret) { 4767 mlog_errno(ret); 4768 goto out; 4769 } 4770 } 4771 } 4772 4773 ret = ocfs2_extend_rotate_transaction(handle, 0, 4774 handle->h_buffer_credits, 4775 path); 4776 if (ret) { 4777 mlog_errno(ret); 4778 goto out; 4779 } 4780 4781 ret = ocfs2_journal_access_path(inode, handle, path); 4782 if (ret) { 4783 mlog_errno(ret); 4784 goto out; 4785 } 4786 4787 ret = ocfs2_journal_access_path(inode, handle, left_path); 4788 if (ret) { 4789 mlog_errno(ret); 4790 goto out; 4791 } 4792 4793 rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); 4794 trunc_range = cpos + len; 4795 4796 if (le32_to_cpu(rec->e_cpos) == cpos && rec_range == trunc_range) { 4797 int next_free; 4798 4799 memset(rec, 0, sizeof(*rec)); 4800 ocfs2_cleanup_merge(el, index); 4801 wants_rotate = 1; 4802 4803 next_free = le16_to_cpu(el->l_next_free_rec); 4804 if (is_rightmost_tree_rec && next_free > 1) { 4805 /* 4806 * We skip the edge update if this path will 4807 * be deleted by the rotate code. 4808 */ 4809 rec = &el->l_recs[next_free - 1]; 4810 ocfs2_adjust_rightmost_records(inode, handle, path, 4811 rec); 4812 } 4813 } else if (le32_to_cpu(rec->e_cpos) == cpos) { 4814 /* Remove leftmost portion of the record. */ 4815 le32_add_cpu(&rec->e_cpos, len); 4816 le64_add_cpu(&rec->e_blkno, ocfs2_clusters_to_blocks(sb, len)); 4817 le16_add_cpu(&rec->e_leaf_clusters, -len); 4818 } else if (rec_range == trunc_range) { 4819 /* Remove rightmost portion of the record */ 4820 le16_add_cpu(&rec->e_leaf_clusters, -len); 4821 if (is_rightmost_tree_rec) 4822 ocfs2_adjust_rightmost_records(inode, handle, path, rec); 4823 } else { 4824 /* Caller should have trapped this. */ 4825 mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) " 4826 "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, 4827 le32_to_cpu(rec->e_cpos), 4828 le16_to_cpu(rec->e_leaf_clusters), cpos, len); 4829 BUG(); 4830 } 4831 4832 if (left_path) { 4833 int subtree_index; 4834 4835 subtree_index = ocfs2_find_subtree_root(inode, left_path, path); 4836 ocfs2_complete_edge_insert(inode, handle, left_path, path, 4837 subtree_index); 4838 } 4839 4840 ocfs2_journal_dirty(handle, path_leaf_bh(path)); 4841 4842 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); 4843 if (ret) { 4844 mlog_errno(ret); 4845 goto out; 4846 } 4847 4848 out: 4849 ocfs2_free_path(left_path); 4850 return ret; 4851 } 4852 4853 int ocfs2_remove_extent(struct inode *inode, struct buffer_head *root_bh, 4854 u32 cpos, u32 len, handle_t *handle, 4855 struct ocfs2_alloc_context *meta_ac, 4856 struct ocfs2_cached_dealloc_ctxt *dealloc, 4857 enum ocfs2_extent_tree_type et_type) 4858 { 4859 int ret, index; 4860 u32 rec_range, trunc_range; 4861 struct ocfs2_extent_rec *rec; 4862 struct ocfs2_extent_list *el; 4863 struct ocfs2_path *path = NULL; 4864 struct ocfs2_extent_tree *et = NULL; 4865 4866 et = ocfs2_new_extent_tree(root_bh, et_type); 4867 if (!et) { 4868 ret = -ENOMEM; 4869 mlog_errno(ret); 4870 goto out; 4871 } 4872 4873 ocfs2_extent_map_trunc(inode, 0); 4874 4875 path = ocfs2_new_path(et->root_bh, et->root_el); 4876 if (!path) { 4877 ret = -ENOMEM; 4878 mlog_errno(ret); 4879 goto out; 4880 } 4881 4882 ret = ocfs2_find_path(inode, path, cpos); 4883 if (ret) { 4884 mlog_errno(ret); 4885 goto out; 4886 } 4887 4888 el = path_leaf_el(path); 4889 index = ocfs2_search_extent_list(el, cpos); 4890 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 4891 ocfs2_error(inode->i_sb, 4892 "Inode %llu has an extent at cpos %u which can no " 4893 "longer be found.\n", 4894 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); 4895 ret = -EROFS; 4896 goto out; 4897 } 4898 4899 /* 4900 * We have 3 cases of extent removal: 4901 * 1) Range covers the entire extent rec 4902 * 2) Range begins or ends on one edge of the extent rec 4903 * 3) Range is in the middle of the extent rec (no shared edges) 4904 * 4905 * For case 1 we remove the extent rec and left rotate to 4906 * fill the hole. 4907 * 4908 * For case 2 we just shrink the existing extent rec, with a 4909 * tree update if the shrinking edge is also the edge of an 4910 * extent block. 4911 * 4912 * For case 3 we do a right split to turn the extent rec into 4913 * something case 2 can handle. 4914 */ 4915 rec = &el->l_recs[index]; 4916 rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); 4917 trunc_range = cpos + len; 4918 4919 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); 4920 4921 mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d " 4922 "(cpos %u, len %u)\n", 4923 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index, 4924 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); 4925 4926 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { 4927 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 4928 cpos, len, et); 4929 if (ret) { 4930 mlog_errno(ret); 4931 goto out; 4932 } 4933 } else { 4934 ret = ocfs2_split_tree(inode, et, handle, path, index, 4935 trunc_range, meta_ac); 4936 if (ret) { 4937 mlog_errno(ret); 4938 goto out; 4939 } 4940 4941 /* 4942 * The split could have manipulated the tree enough to 4943 * move the record location, so we have to look for it again. 4944 */ 4945 ocfs2_reinit_path(path, 1); 4946 4947 ret = ocfs2_find_path(inode, path, cpos); 4948 if (ret) { 4949 mlog_errno(ret); 4950 goto out; 4951 } 4952 4953 el = path_leaf_el(path); 4954 index = ocfs2_search_extent_list(el, cpos); 4955 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 4956 ocfs2_error(inode->i_sb, 4957 "Inode %llu: split at cpos %u lost record.", 4958 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4959 cpos); 4960 ret = -EROFS; 4961 goto out; 4962 } 4963 4964 /* 4965 * Double check our values here. If anything is fishy, 4966 * it's easier to catch it at the top level. 4967 */ 4968 rec = &el->l_recs[index]; 4969 rec_range = le32_to_cpu(rec->e_cpos) + 4970 ocfs2_rec_clusters(el, rec); 4971 if (rec_range != trunc_range) { 4972 ocfs2_error(inode->i_sb, 4973 "Inode %llu: error after split at cpos %u" 4974 "trunc len %u, existing record is (%u,%u)", 4975 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4976 cpos, len, le32_to_cpu(rec->e_cpos), 4977 ocfs2_rec_clusters(el, rec)); 4978 ret = -EROFS; 4979 goto out; 4980 } 4981 4982 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 4983 cpos, len, et); 4984 if (ret) { 4985 mlog_errno(ret); 4986 goto out; 4987 } 4988 } 4989 4990 out: 4991 ocfs2_free_path(path); 4992 if (et) 4993 ocfs2_free_extent_tree(et); 4994 return ret; 4995 } 4996 4997 int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 4998 { 4999 struct buffer_head *tl_bh = osb->osb_tl_bh; 5000 struct ocfs2_dinode *di; 5001 struct ocfs2_truncate_log *tl; 5002 5003 di = (struct ocfs2_dinode *) tl_bh->b_data; 5004 tl = &di->id2.i_dealloc; 5005 5006 mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count), 5007 "slot %d, invalid truncate log parameters: used = " 5008 "%u, count = %u\n", osb->slot_num, 5009 le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count)); 5010 return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count); 5011 } 5012 5013 static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, 5014 unsigned int new_start) 5015 { 5016 unsigned int tail_index; 5017 unsigned int current_tail; 5018 5019 /* No records, nothing to coalesce */ 5020 if (!le16_to_cpu(tl->tl_used)) 5021 return 0; 5022 5023 tail_index = le16_to_cpu(tl->tl_used) - 1; 5024 current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start); 5025 current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters); 5026 5027 return current_tail == new_start; 5028 } 5029 5030 int ocfs2_truncate_log_append(struct ocfs2_super *osb, 5031 handle_t *handle, 5032 u64 start_blk, 5033 unsigned int num_clusters) 5034 { 5035 int status, index; 5036 unsigned int start_cluster, tl_count; 5037 struct inode *tl_inode = osb->osb_tl_inode; 5038 struct buffer_head *tl_bh = osb->osb_tl_bh; 5039 struct ocfs2_dinode *di; 5040 struct ocfs2_truncate_log *tl; 5041 5042 mlog_entry("start_blk = %llu, num_clusters = %u\n", 5043 (unsigned long long)start_blk, num_clusters); 5044 5045 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5046 5047 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 5048 5049 di = (struct ocfs2_dinode *) tl_bh->b_data; 5050 tl = &di->id2.i_dealloc; 5051 if (!OCFS2_IS_VALID_DINODE(di)) { 5052 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 5053 status = -EIO; 5054 goto bail; 5055 } 5056 5057 tl_count = le16_to_cpu(tl->tl_count); 5058 mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 5059 tl_count == 0, 5060 "Truncate record count on #%llu invalid " 5061 "wanted %u, actual %u\n", 5062 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, 5063 ocfs2_truncate_recs_per_inode(osb->sb), 5064 le16_to_cpu(tl->tl_count)); 5065 5066 /* Caller should have known to flush before calling us. */ 5067 index = le16_to_cpu(tl->tl_used); 5068 if (index >= tl_count) { 5069 status = -ENOSPC; 5070 mlog_errno(status); 5071 goto bail; 5072 } 5073 5074 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 5075 OCFS2_JOURNAL_ACCESS_WRITE); 5076 if (status < 0) { 5077 mlog_errno(status); 5078 goto bail; 5079 } 5080 5081 mlog(0, "Log truncate of %u clusters starting at cluster %u to " 5082 "%llu (index = %d)\n", num_clusters, start_cluster, 5083 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index); 5084 5085 if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 5086 /* 5087 * Move index back to the record we are coalescing with. 5088 * ocfs2_truncate_log_can_coalesce() guarantees nonzero 5089 */ 5090 index--; 5091 5092 num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 5093 mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 5094 index, le32_to_cpu(tl->tl_recs[index].t_start), 5095 num_clusters); 5096 } else { 5097 tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 5098 tl->tl_used = cpu_to_le16(index + 1); 5099 } 5100 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 5101 5102 status = ocfs2_journal_dirty(handle, tl_bh); 5103 if (status < 0) { 5104 mlog_errno(status); 5105 goto bail; 5106 } 5107 5108 bail: 5109 mlog_exit(status); 5110 return status; 5111 } 5112 5113 static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, 5114 handle_t *handle, 5115 struct inode *data_alloc_inode, 5116 struct buffer_head *data_alloc_bh) 5117 { 5118 int status = 0; 5119 int i; 5120 unsigned int num_clusters; 5121 u64 start_blk; 5122 struct ocfs2_truncate_rec rec; 5123 struct ocfs2_dinode *di; 5124 struct ocfs2_truncate_log *tl; 5125 struct inode *tl_inode = osb->osb_tl_inode; 5126 struct buffer_head *tl_bh = osb->osb_tl_bh; 5127 5128 mlog_entry_void(); 5129 5130 di = (struct ocfs2_dinode *) tl_bh->b_data; 5131 tl = &di->id2.i_dealloc; 5132 i = le16_to_cpu(tl->tl_used) - 1; 5133 while (i >= 0) { 5134 /* Caller has given us at least enough credits to 5135 * update the truncate log dinode */ 5136 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 5137 OCFS2_JOURNAL_ACCESS_WRITE); 5138 if (status < 0) { 5139 mlog_errno(status); 5140 goto bail; 5141 } 5142 5143 tl->tl_used = cpu_to_le16(i); 5144 5145 status = ocfs2_journal_dirty(handle, tl_bh); 5146 if (status < 0) { 5147 mlog_errno(status); 5148 goto bail; 5149 } 5150 5151 /* TODO: Perhaps we can calculate the bulk of the 5152 * credits up front rather than extending like 5153 * this. */ 5154 status = ocfs2_extend_trans(handle, 5155 OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); 5156 if (status < 0) { 5157 mlog_errno(status); 5158 goto bail; 5159 } 5160 5161 rec = tl->tl_recs[i]; 5162 start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb, 5163 le32_to_cpu(rec.t_start)); 5164 num_clusters = le32_to_cpu(rec.t_clusters); 5165 5166 /* if start_blk is not set, we ignore the record as 5167 * invalid. */ 5168 if (start_blk) { 5169 mlog(0, "free record %d, start = %u, clusters = %u\n", 5170 i, le32_to_cpu(rec.t_start), num_clusters); 5171 5172 status = ocfs2_free_clusters(handle, data_alloc_inode, 5173 data_alloc_bh, start_blk, 5174 num_clusters); 5175 if (status < 0) { 5176 mlog_errno(status); 5177 goto bail; 5178 } 5179 } 5180 i--; 5181 } 5182 5183 bail: 5184 mlog_exit(status); 5185 return status; 5186 } 5187 5188 /* Expects you to already be holding tl_inode->i_mutex */ 5189 int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 5190 { 5191 int status; 5192 unsigned int num_to_flush; 5193 handle_t *handle; 5194 struct inode *tl_inode = osb->osb_tl_inode; 5195 struct inode *data_alloc_inode = NULL; 5196 struct buffer_head *tl_bh = osb->osb_tl_bh; 5197 struct buffer_head *data_alloc_bh = NULL; 5198 struct ocfs2_dinode *di; 5199 struct ocfs2_truncate_log *tl; 5200 5201 mlog_entry_void(); 5202 5203 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5204 5205 di = (struct ocfs2_dinode *) tl_bh->b_data; 5206 tl = &di->id2.i_dealloc; 5207 if (!OCFS2_IS_VALID_DINODE(di)) { 5208 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 5209 status = -EIO; 5210 goto out; 5211 } 5212 5213 num_to_flush = le16_to_cpu(tl->tl_used); 5214 mlog(0, "Flush %u records from truncate log #%llu\n", 5215 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 5216 if (!num_to_flush) { 5217 status = 0; 5218 goto out; 5219 } 5220 5221 data_alloc_inode = ocfs2_get_system_file_inode(osb, 5222 GLOBAL_BITMAP_SYSTEM_INODE, 5223 OCFS2_INVALID_SLOT); 5224 if (!data_alloc_inode) { 5225 status = -EINVAL; 5226 mlog(ML_ERROR, "Could not get bitmap inode!\n"); 5227 goto out; 5228 } 5229 5230 mutex_lock(&data_alloc_inode->i_mutex); 5231 5232 status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1); 5233 if (status < 0) { 5234 mlog_errno(status); 5235 goto out_mutex; 5236 } 5237 5238 handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); 5239 if (IS_ERR(handle)) { 5240 status = PTR_ERR(handle); 5241 mlog_errno(status); 5242 goto out_unlock; 5243 } 5244 5245 status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, 5246 data_alloc_bh); 5247 if (status < 0) 5248 mlog_errno(status); 5249 5250 ocfs2_commit_trans(osb, handle); 5251 5252 out_unlock: 5253 brelse(data_alloc_bh); 5254 ocfs2_inode_unlock(data_alloc_inode, 1); 5255 5256 out_mutex: 5257 mutex_unlock(&data_alloc_inode->i_mutex); 5258 iput(data_alloc_inode); 5259 5260 out: 5261 mlog_exit(status); 5262 return status; 5263 } 5264 5265 int ocfs2_flush_truncate_log(struct ocfs2_super *osb) 5266 { 5267 int status; 5268 struct inode *tl_inode = osb->osb_tl_inode; 5269 5270 mutex_lock(&tl_inode->i_mutex); 5271 status = __ocfs2_flush_truncate_log(osb); 5272 mutex_unlock(&tl_inode->i_mutex); 5273 5274 return status; 5275 } 5276 5277 static void ocfs2_truncate_log_worker(struct work_struct *work) 5278 { 5279 int status; 5280 struct ocfs2_super *osb = 5281 container_of(work, struct ocfs2_super, 5282 osb_truncate_log_wq.work); 5283 5284 mlog_entry_void(); 5285 5286 status = ocfs2_flush_truncate_log(osb); 5287 if (status < 0) 5288 mlog_errno(status); 5289 else 5290 ocfs2_init_inode_steal_slot(osb); 5291 5292 mlog_exit(status); 5293 } 5294 5295 #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 5296 void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 5297 int cancel) 5298 { 5299 if (osb->osb_tl_inode) { 5300 /* We want to push off log flushes while truncates are 5301 * still running. */ 5302 if (cancel) 5303 cancel_delayed_work(&osb->osb_truncate_log_wq); 5304 5305 queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq, 5306 OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL); 5307 } 5308 } 5309 5310 static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, 5311 int slot_num, 5312 struct inode **tl_inode, 5313 struct buffer_head **tl_bh) 5314 { 5315 int status; 5316 struct inode *inode = NULL; 5317 struct buffer_head *bh = NULL; 5318 5319 inode = ocfs2_get_system_file_inode(osb, 5320 TRUNCATE_LOG_SYSTEM_INODE, 5321 slot_num); 5322 if (!inode) { 5323 status = -EINVAL; 5324 mlog(ML_ERROR, "Could not get load truncate log inode!\n"); 5325 goto bail; 5326 } 5327 5328 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 5329 OCFS2_BH_CACHED, inode); 5330 if (status < 0) { 5331 iput(inode); 5332 mlog_errno(status); 5333 goto bail; 5334 } 5335 5336 *tl_inode = inode; 5337 *tl_bh = bh; 5338 bail: 5339 mlog_exit(status); 5340 return status; 5341 } 5342 5343 /* called during the 1st stage of node recovery. we stamp a clean 5344 * truncate log and pass back a copy for processing later. if the 5345 * truncate log does not require processing, a *tl_copy is set to 5346 * NULL. */ 5347 int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, 5348 int slot_num, 5349 struct ocfs2_dinode **tl_copy) 5350 { 5351 int status; 5352 struct inode *tl_inode = NULL; 5353 struct buffer_head *tl_bh = NULL; 5354 struct ocfs2_dinode *di; 5355 struct ocfs2_truncate_log *tl; 5356 5357 *tl_copy = NULL; 5358 5359 mlog(0, "recover truncate log from slot %d\n", slot_num); 5360 5361 status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 5362 if (status < 0) { 5363 mlog_errno(status); 5364 goto bail; 5365 } 5366 5367 di = (struct ocfs2_dinode *) tl_bh->b_data; 5368 tl = &di->id2.i_dealloc; 5369 if (!OCFS2_IS_VALID_DINODE(di)) { 5370 OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); 5371 status = -EIO; 5372 goto bail; 5373 } 5374 5375 if (le16_to_cpu(tl->tl_used)) { 5376 mlog(0, "We'll have %u logs to recover\n", 5377 le16_to_cpu(tl->tl_used)); 5378 5379 *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 5380 if (!(*tl_copy)) { 5381 status = -ENOMEM; 5382 mlog_errno(status); 5383 goto bail; 5384 } 5385 5386 /* Assuming the write-out below goes well, this copy 5387 * will be passed back to recovery for processing. */ 5388 memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); 5389 5390 /* All we need to do to clear the truncate log is set 5391 * tl_used. */ 5392 tl->tl_used = 0; 5393 5394 status = ocfs2_write_block(osb, tl_bh, tl_inode); 5395 if (status < 0) { 5396 mlog_errno(status); 5397 goto bail; 5398 } 5399 } 5400 5401 bail: 5402 if (tl_inode) 5403 iput(tl_inode); 5404 if (tl_bh) 5405 brelse(tl_bh); 5406 5407 if (status < 0 && (*tl_copy)) { 5408 kfree(*tl_copy); 5409 *tl_copy = NULL; 5410 } 5411 5412 mlog_exit(status); 5413 return status; 5414 } 5415 5416 int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 5417 struct ocfs2_dinode *tl_copy) 5418 { 5419 int status = 0; 5420 int i; 5421 unsigned int clusters, num_recs, start_cluster; 5422 u64 start_blk; 5423 handle_t *handle; 5424 struct inode *tl_inode = osb->osb_tl_inode; 5425 struct ocfs2_truncate_log *tl; 5426 5427 mlog_entry_void(); 5428 5429 if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 5430 mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 5431 return -EINVAL; 5432 } 5433 5434 tl = &tl_copy->id2.i_dealloc; 5435 num_recs = le16_to_cpu(tl->tl_used); 5436 mlog(0, "cleanup %u records from %llu\n", num_recs, 5437 (unsigned long long)le64_to_cpu(tl_copy->i_blkno)); 5438 5439 mutex_lock(&tl_inode->i_mutex); 5440 for(i = 0; i < num_recs; i++) { 5441 if (ocfs2_truncate_log_needs_flush(osb)) { 5442 status = __ocfs2_flush_truncate_log(osb); 5443 if (status < 0) { 5444 mlog_errno(status); 5445 goto bail_up; 5446 } 5447 } 5448 5449 handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); 5450 if (IS_ERR(handle)) { 5451 status = PTR_ERR(handle); 5452 mlog_errno(status); 5453 goto bail_up; 5454 } 5455 5456 clusters = le32_to_cpu(tl->tl_recs[i].t_clusters); 5457 start_cluster = le32_to_cpu(tl->tl_recs[i].t_start); 5458 start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster); 5459 5460 status = ocfs2_truncate_log_append(osb, handle, 5461 start_blk, clusters); 5462 ocfs2_commit_trans(osb, handle); 5463 if (status < 0) { 5464 mlog_errno(status); 5465 goto bail_up; 5466 } 5467 } 5468 5469 bail_up: 5470 mutex_unlock(&tl_inode->i_mutex); 5471 5472 mlog_exit(status); 5473 return status; 5474 } 5475 5476 void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb) 5477 { 5478 int status; 5479 struct inode *tl_inode = osb->osb_tl_inode; 5480 5481 mlog_entry_void(); 5482 5483 if (tl_inode) { 5484 cancel_delayed_work(&osb->osb_truncate_log_wq); 5485 flush_workqueue(ocfs2_wq); 5486 5487 status = ocfs2_flush_truncate_log(osb); 5488 if (status < 0) 5489 mlog_errno(status); 5490 5491 brelse(osb->osb_tl_bh); 5492 iput(osb->osb_tl_inode); 5493 } 5494 5495 mlog_exit_void(); 5496 } 5497 5498 int ocfs2_truncate_log_init(struct ocfs2_super *osb) 5499 { 5500 int status; 5501 struct inode *tl_inode = NULL; 5502 struct buffer_head *tl_bh = NULL; 5503 5504 mlog_entry_void(); 5505 5506 status = ocfs2_get_truncate_log_info(osb, 5507 osb->slot_num, 5508 &tl_inode, 5509 &tl_bh); 5510 if (status < 0) 5511 mlog_errno(status); 5512 5513 /* ocfs2_truncate_log_shutdown keys on the existence of 5514 * osb->osb_tl_inode so we don't set any of the osb variables 5515 * until we're sure all is well. */ 5516 INIT_DELAYED_WORK(&osb->osb_truncate_log_wq, 5517 ocfs2_truncate_log_worker); 5518 osb->osb_tl_bh = tl_bh; 5519 osb->osb_tl_inode = tl_inode; 5520 5521 mlog_exit(status); 5522 return status; 5523 } 5524 5525 /* 5526 * Delayed de-allocation of suballocator blocks. 5527 * 5528 * Some sets of block de-allocations might involve multiple suballocator inodes. 5529 * 5530 * The locking for this can get extremely complicated, especially when 5531 * the suballocator inodes to delete from aren't known until deep 5532 * within an unrelated codepath. 5533 * 5534 * ocfs2_extent_block structures are a good example of this - an inode 5535 * btree could have been grown by any number of nodes each allocating 5536 * out of their own suballoc inode. 5537 * 5538 * These structures allow the delay of block de-allocation until a 5539 * later time, when locking of multiple cluster inodes won't cause 5540 * deadlock. 5541 */ 5542 5543 /* 5544 * Describes a single block free from a suballocator 5545 */ 5546 struct ocfs2_cached_block_free { 5547 struct ocfs2_cached_block_free *free_next; 5548 u64 free_blk; 5549 unsigned int free_bit; 5550 }; 5551 5552 struct ocfs2_per_slot_free_list { 5553 struct ocfs2_per_slot_free_list *f_next_suballocator; 5554 int f_inode_type; 5555 int f_slot; 5556 struct ocfs2_cached_block_free *f_first; 5557 }; 5558 5559 static int ocfs2_free_cached_items(struct ocfs2_super *osb, 5560 int sysfile_type, 5561 int slot, 5562 struct ocfs2_cached_block_free *head) 5563 { 5564 int ret; 5565 u64 bg_blkno; 5566 handle_t *handle; 5567 struct inode *inode; 5568 struct buffer_head *di_bh = NULL; 5569 struct ocfs2_cached_block_free *tmp; 5570 5571 inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot); 5572 if (!inode) { 5573 ret = -EINVAL; 5574 mlog_errno(ret); 5575 goto out; 5576 } 5577 5578 mutex_lock(&inode->i_mutex); 5579 5580 ret = ocfs2_inode_lock(inode, &di_bh, 1); 5581 if (ret) { 5582 mlog_errno(ret); 5583 goto out_mutex; 5584 } 5585 5586 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); 5587 if (IS_ERR(handle)) { 5588 ret = PTR_ERR(handle); 5589 mlog_errno(ret); 5590 goto out_unlock; 5591 } 5592 5593 while (head) { 5594 bg_blkno = ocfs2_which_suballoc_group(head->free_blk, 5595 head->free_bit); 5596 mlog(0, "Free bit: (bit %u, blkno %llu)\n", 5597 head->free_bit, (unsigned long long)head->free_blk); 5598 5599 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, 5600 head->free_bit, bg_blkno, 1); 5601 if (ret) { 5602 mlog_errno(ret); 5603 goto out_journal; 5604 } 5605 5606 ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); 5607 if (ret) { 5608 mlog_errno(ret); 5609 goto out_journal; 5610 } 5611 5612 tmp = head; 5613 head = head->free_next; 5614 kfree(tmp); 5615 } 5616 5617 out_journal: 5618 ocfs2_commit_trans(osb, handle); 5619 5620 out_unlock: 5621 ocfs2_inode_unlock(inode, 1); 5622 brelse(di_bh); 5623 out_mutex: 5624 mutex_unlock(&inode->i_mutex); 5625 iput(inode); 5626 out: 5627 while(head) { 5628 /* Premature exit may have left some dangling items. */ 5629 tmp = head; 5630 head = head->free_next; 5631 kfree(tmp); 5632 } 5633 5634 return ret; 5635 } 5636 5637 int ocfs2_run_deallocs(struct ocfs2_super *osb, 5638 struct ocfs2_cached_dealloc_ctxt *ctxt) 5639 { 5640 int ret = 0, ret2; 5641 struct ocfs2_per_slot_free_list *fl; 5642 5643 if (!ctxt) 5644 return 0; 5645 5646 while (ctxt->c_first_suballocator) { 5647 fl = ctxt->c_first_suballocator; 5648 5649 if (fl->f_first) { 5650 mlog(0, "Free items: (type %u, slot %d)\n", 5651 fl->f_inode_type, fl->f_slot); 5652 ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, 5653 fl->f_slot, fl->f_first); 5654 if (ret2) 5655 mlog_errno(ret2); 5656 if (!ret) 5657 ret = ret2; 5658 } 5659 5660 ctxt->c_first_suballocator = fl->f_next_suballocator; 5661 kfree(fl); 5662 } 5663 5664 return ret; 5665 } 5666 5667 static struct ocfs2_per_slot_free_list * 5668 ocfs2_find_per_slot_free_list(int type, 5669 int slot, 5670 struct ocfs2_cached_dealloc_ctxt *ctxt) 5671 { 5672 struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; 5673 5674 while (fl) { 5675 if (fl->f_inode_type == type && fl->f_slot == slot) 5676 return fl; 5677 5678 fl = fl->f_next_suballocator; 5679 } 5680 5681 fl = kmalloc(sizeof(*fl), GFP_NOFS); 5682 if (fl) { 5683 fl->f_inode_type = type; 5684 fl->f_slot = slot; 5685 fl->f_first = NULL; 5686 fl->f_next_suballocator = ctxt->c_first_suballocator; 5687 5688 ctxt->c_first_suballocator = fl; 5689 } 5690 return fl; 5691 } 5692 5693 static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 5694 int type, int slot, u64 blkno, 5695 unsigned int bit) 5696 { 5697 int ret; 5698 struct ocfs2_per_slot_free_list *fl; 5699 struct ocfs2_cached_block_free *item; 5700 5701 fl = ocfs2_find_per_slot_free_list(type, slot, ctxt); 5702 if (fl == NULL) { 5703 ret = -ENOMEM; 5704 mlog_errno(ret); 5705 goto out; 5706 } 5707 5708 item = kmalloc(sizeof(*item), GFP_NOFS); 5709 if (item == NULL) { 5710 ret = -ENOMEM; 5711 mlog_errno(ret); 5712 goto out; 5713 } 5714 5715 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", 5716 type, slot, bit, (unsigned long long)blkno); 5717 5718 item->free_blk = blkno; 5719 item->free_bit = bit; 5720 item->free_next = fl->f_first; 5721 5722 fl->f_first = item; 5723 5724 ret = 0; 5725 out: 5726 return ret; 5727 } 5728 5729 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 5730 struct ocfs2_extent_block *eb) 5731 { 5732 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, 5733 le16_to_cpu(eb->h_suballoc_slot), 5734 le64_to_cpu(eb->h_blkno), 5735 le16_to_cpu(eb->h_suballoc_bit)); 5736 } 5737 5738 /* This function will figure out whether the currently last extent 5739 * block will be deleted, and if it will, what the new last extent 5740 * block will be so we can update his h_next_leaf_blk field, as well 5741 * as the dinodes i_last_eb_blk */ 5742 static int ocfs2_find_new_last_ext_blk(struct inode *inode, 5743 unsigned int clusters_to_del, 5744 struct ocfs2_path *path, 5745 struct buffer_head **new_last_eb) 5746 { 5747 int next_free, ret = 0; 5748 u32 cpos; 5749 struct ocfs2_extent_rec *rec; 5750 struct ocfs2_extent_block *eb; 5751 struct ocfs2_extent_list *el; 5752 struct buffer_head *bh = NULL; 5753 5754 *new_last_eb = NULL; 5755 5756 /* we have no tree, so of course, no last_eb. */ 5757 if (!path->p_tree_depth) 5758 goto out; 5759 5760 /* trunc to zero special case - this makes tree_depth = 0 5761 * regardless of what it is. */ 5762 if (OCFS2_I(inode)->ip_clusters == clusters_to_del) 5763 goto out; 5764 5765 el = path_leaf_el(path); 5766 BUG_ON(!el->l_next_free_rec); 5767 5768 /* 5769 * Make sure that this extent list will actually be empty 5770 * after we clear away the data. We can shortcut out if 5771 * there's more than one non-empty extent in the 5772 * list. Otherwise, a check of the remaining extent is 5773 * necessary. 5774 */ 5775 next_free = le16_to_cpu(el->l_next_free_rec); 5776 rec = NULL; 5777 if (ocfs2_is_empty_extent(&el->l_recs[0])) { 5778 if (next_free > 2) 5779 goto out; 5780 5781 /* We may have a valid extent in index 1, check it. */ 5782 if (next_free == 2) 5783 rec = &el->l_recs[1]; 5784 5785 /* 5786 * Fall through - no more nonempty extents, so we want 5787 * to delete this leaf. 5788 */ 5789 } else { 5790 if (next_free > 1) 5791 goto out; 5792 5793 rec = &el->l_recs[0]; 5794 } 5795 5796 if (rec) { 5797 /* 5798 * Check it we'll only be trimming off the end of this 5799 * cluster. 5800 */ 5801 if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del) 5802 goto out; 5803 } 5804 5805 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); 5806 if (ret) { 5807 mlog_errno(ret); 5808 goto out; 5809 } 5810 5811 ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh); 5812 if (ret) { 5813 mlog_errno(ret); 5814 goto out; 5815 } 5816 5817 eb = (struct ocfs2_extent_block *) bh->b_data; 5818 el = &eb->h_list; 5819 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 5820 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 5821 ret = -EROFS; 5822 goto out; 5823 } 5824 5825 *new_last_eb = bh; 5826 get_bh(*new_last_eb); 5827 mlog(0, "returning block %llu, (cpos: %u)\n", 5828 (unsigned long long)le64_to_cpu(eb->h_blkno), cpos); 5829 out: 5830 brelse(bh); 5831 5832 return ret; 5833 } 5834 5835 /* 5836 * Trim some clusters off the rightmost edge of a tree. Only called 5837 * during truncate. 5838 * 5839 * The caller needs to: 5840 * - start journaling of each path component. 5841 * - compute and fully set up any new last ext block 5842 */ 5843 static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, 5844 handle_t *handle, struct ocfs2_truncate_context *tc, 5845 u32 clusters_to_del, u64 *delete_start) 5846 { 5847 int ret, i, index = path->p_tree_depth; 5848 u32 new_edge = 0; 5849 u64 deleted_eb = 0; 5850 struct buffer_head *bh; 5851 struct ocfs2_extent_list *el; 5852 struct ocfs2_extent_rec *rec; 5853 5854 *delete_start = 0; 5855 5856 while (index >= 0) { 5857 bh = path->p_node[index].bh; 5858 el = path->p_node[index].el; 5859 5860 mlog(0, "traveling tree (index = %d, block = %llu)\n", 5861 index, (unsigned long long)bh->b_blocknr); 5862 5863 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); 5864 5865 if (index != 5866 (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { 5867 ocfs2_error(inode->i_sb, 5868 "Inode %lu has invalid ext. block %llu", 5869 inode->i_ino, 5870 (unsigned long long)bh->b_blocknr); 5871 ret = -EROFS; 5872 goto out; 5873 } 5874 5875 find_tail_record: 5876 i = le16_to_cpu(el->l_next_free_rec) - 1; 5877 rec = &el->l_recs[i]; 5878 5879 mlog(0, "Extent list before: record %d: (%u, %u, %llu), " 5880 "next = %u\n", i, le32_to_cpu(rec->e_cpos), 5881 ocfs2_rec_clusters(el, rec), 5882 (unsigned long long)le64_to_cpu(rec->e_blkno), 5883 le16_to_cpu(el->l_next_free_rec)); 5884 5885 BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del); 5886 5887 if (le16_to_cpu(el->l_tree_depth) == 0) { 5888 /* 5889 * If the leaf block contains a single empty 5890 * extent and no records, we can just remove 5891 * the block. 5892 */ 5893 if (i == 0 && ocfs2_is_empty_extent(rec)) { 5894 memset(rec, 0, 5895 sizeof(struct ocfs2_extent_rec)); 5896 el->l_next_free_rec = cpu_to_le16(0); 5897 5898 goto delete; 5899 } 5900 5901 /* 5902 * Remove any empty extents by shifting things 5903 * left. That should make life much easier on 5904 * the code below. This condition is rare 5905 * enough that we shouldn't see a performance 5906 * hit. 5907 */ 5908 if (ocfs2_is_empty_extent(&el->l_recs[0])) { 5909 le16_add_cpu(&el->l_next_free_rec, -1); 5910 5911 for(i = 0; 5912 i < le16_to_cpu(el->l_next_free_rec); i++) 5913 el->l_recs[i] = el->l_recs[i + 1]; 5914 5915 memset(&el->l_recs[i], 0, 5916 sizeof(struct ocfs2_extent_rec)); 5917 5918 /* 5919 * We've modified our extent list. The 5920 * simplest way to handle this change 5921 * is to being the search from the 5922 * start again. 5923 */ 5924 goto find_tail_record; 5925 } 5926 5927 le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del); 5928 5929 /* 5930 * We'll use "new_edge" on our way back up the 5931 * tree to know what our rightmost cpos is. 5932 */ 5933 new_edge = le16_to_cpu(rec->e_leaf_clusters); 5934 new_edge += le32_to_cpu(rec->e_cpos); 5935 5936 /* 5937 * The caller will use this to delete data blocks. 5938 */ 5939 *delete_start = le64_to_cpu(rec->e_blkno) 5940 + ocfs2_clusters_to_blocks(inode->i_sb, 5941 le16_to_cpu(rec->e_leaf_clusters)); 5942 5943 /* 5944 * If it's now empty, remove this record. 5945 */ 5946 if (le16_to_cpu(rec->e_leaf_clusters) == 0) { 5947 memset(rec, 0, 5948 sizeof(struct ocfs2_extent_rec)); 5949 le16_add_cpu(&el->l_next_free_rec, -1); 5950 } 5951 } else { 5952 if (le64_to_cpu(rec->e_blkno) == deleted_eb) { 5953 memset(rec, 0, 5954 sizeof(struct ocfs2_extent_rec)); 5955 le16_add_cpu(&el->l_next_free_rec, -1); 5956 5957 goto delete; 5958 } 5959 5960 /* Can this actually happen? */ 5961 if (le16_to_cpu(el->l_next_free_rec) == 0) 5962 goto delete; 5963 5964 /* 5965 * We never actually deleted any clusters 5966 * because our leaf was empty. There's no 5967 * reason to adjust the rightmost edge then. 5968 */ 5969 if (new_edge == 0) 5970 goto delete; 5971 5972 rec->e_int_clusters = cpu_to_le32(new_edge); 5973 le32_add_cpu(&rec->e_int_clusters, 5974 -le32_to_cpu(rec->e_cpos)); 5975 5976 /* 5977 * A deleted child record should have been 5978 * caught above. 5979 */ 5980 BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0); 5981 } 5982 5983 delete: 5984 ret = ocfs2_journal_dirty(handle, bh); 5985 if (ret) { 5986 mlog_errno(ret); 5987 goto out; 5988 } 5989 5990 mlog(0, "extent list container %llu, after: record %d: " 5991 "(%u, %u, %llu), next = %u.\n", 5992 (unsigned long long)bh->b_blocknr, i, 5993 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec), 5994 (unsigned long long)le64_to_cpu(rec->e_blkno), 5995 le16_to_cpu(el->l_next_free_rec)); 5996 5997 /* 5998 * We must be careful to only attempt delete of an 5999 * extent block (and not the root inode block). 6000 */ 6001 if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) { 6002 struct ocfs2_extent_block *eb = 6003 (struct ocfs2_extent_block *)bh->b_data; 6004 6005 /* 6006 * Save this for use when processing the 6007 * parent block. 6008 */ 6009 deleted_eb = le64_to_cpu(eb->h_blkno); 6010 6011 mlog(0, "deleting this extent block.\n"); 6012 6013 ocfs2_remove_from_cache(inode, bh); 6014 6015 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); 6016 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); 6017 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); 6018 6019 ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); 6020 /* An error here is not fatal. */ 6021 if (ret < 0) 6022 mlog_errno(ret); 6023 } else { 6024 deleted_eb = 0; 6025 } 6026 6027 index--; 6028 } 6029 6030 ret = 0; 6031 out: 6032 return ret; 6033 } 6034 6035 static int ocfs2_do_truncate(struct ocfs2_super *osb, 6036 unsigned int clusters_to_del, 6037 struct inode *inode, 6038 struct buffer_head *fe_bh, 6039 handle_t *handle, 6040 struct ocfs2_truncate_context *tc, 6041 struct ocfs2_path *path) 6042 { 6043 int status; 6044 struct ocfs2_dinode *fe; 6045 struct ocfs2_extent_block *last_eb = NULL; 6046 struct ocfs2_extent_list *el; 6047 struct buffer_head *last_eb_bh = NULL; 6048 u64 delete_blk = 0; 6049 6050 fe = (struct ocfs2_dinode *) fe_bh->b_data; 6051 6052 status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del, 6053 path, &last_eb_bh); 6054 if (status < 0) { 6055 mlog_errno(status); 6056 goto bail; 6057 } 6058 6059 /* 6060 * Each component will be touched, so we might as well journal 6061 * here to avoid having to handle errors later. 6062 */ 6063 status = ocfs2_journal_access_path(inode, handle, path); 6064 if (status < 0) { 6065 mlog_errno(status); 6066 goto bail; 6067 } 6068 6069 if (last_eb_bh) { 6070 status = ocfs2_journal_access(handle, inode, last_eb_bh, 6071 OCFS2_JOURNAL_ACCESS_WRITE); 6072 if (status < 0) { 6073 mlog_errno(status); 6074 goto bail; 6075 } 6076 6077 last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 6078 } 6079 6080 el = &(fe->id2.i_list); 6081 6082 /* 6083 * Lower levels depend on this never happening, but it's best 6084 * to check it up here before changing the tree. 6085 */ 6086 if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) { 6087 ocfs2_error(inode->i_sb, 6088 "Inode %lu has an empty extent record, depth %u\n", 6089 inode->i_ino, le16_to_cpu(el->l_tree_depth)); 6090 status = -EROFS; 6091 goto bail; 6092 } 6093 6094 spin_lock(&OCFS2_I(inode)->ip_lock); 6095 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 6096 clusters_to_del; 6097 spin_unlock(&OCFS2_I(inode)->ip_lock); 6098 le32_add_cpu(&fe->i_clusters, -clusters_to_del); 6099 inode->i_blocks = ocfs2_inode_sector_count(inode); 6100 6101 status = ocfs2_trim_tree(inode, path, handle, tc, 6102 clusters_to_del, &delete_blk); 6103 if (status) { 6104 mlog_errno(status); 6105 goto bail; 6106 } 6107 6108 if (le32_to_cpu(fe->i_clusters) == 0) { 6109 /* trunc to zero is a special case. */ 6110 el->l_tree_depth = 0; 6111 fe->i_last_eb_blk = 0; 6112 } else if (last_eb) 6113 fe->i_last_eb_blk = last_eb->h_blkno; 6114 6115 status = ocfs2_journal_dirty(handle, fe_bh); 6116 if (status < 0) { 6117 mlog_errno(status); 6118 goto bail; 6119 } 6120 6121 if (last_eb) { 6122 /* If there will be a new last extent block, then by 6123 * definition, there cannot be any leaves to the right of 6124 * him. */ 6125 last_eb->h_next_leaf_blk = 0; 6126 status = ocfs2_journal_dirty(handle, last_eb_bh); 6127 if (status < 0) { 6128 mlog_errno(status); 6129 goto bail; 6130 } 6131 } 6132 6133 if (delete_blk) { 6134 status = ocfs2_truncate_log_append(osb, handle, delete_blk, 6135 clusters_to_del); 6136 if (status < 0) { 6137 mlog_errno(status); 6138 goto bail; 6139 } 6140 } 6141 status = 0; 6142 bail: 6143 6144 mlog_exit(status); 6145 return status; 6146 } 6147 6148 static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) 6149 { 6150 set_buffer_uptodate(bh); 6151 mark_buffer_dirty(bh); 6152 return 0; 6153 } 6154 6155 static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) 6156 { 6157 set_buffer_uptodate(bh); 6158 mark_buffer_dirty(bh); 6159 return ocfs2_journal_dirty_data(handle, bh); 6160 } 6161 6162 static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, 6163 unsigned int from, unsigned int to, 6164 struct page *page, int zero, u64 *phys) 6165 { 6166 int ret, partial = 0; 6167 6168 ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0); 6169 if (ret) 6170 mlog_errno(ret); 6171 6172 if (zero) 6173 zero_user_segment(page, from, to); 6174 6175 /* 6176 * Need to set the buffers we zero'd into uptodate 6177 * here if they aren't - ocfs2_map_page_blocks() 6178 * might've skipped some 6179 */ 6180 if (ocfs2_should_order_data(inode)) { 6181 ret = walk_page_buffers(handle, 6182 page_buffers(page), 6183 from, to, &partial, 6184 ocfs2_ordered_zero_func); 6185 if (ret < 0) 6186 mlog_errno(ret); 6187 } else { 6188 ret = walk_page_buffers(handle, page_buffers(page), 6189 from, to, &partial, 6190 ocfs2_writeback_zero_func); 6191 if (ret < 0) 6192 mlog_errno(ret); 6193 } 6194 6195 if (!partial) 6196 SetPageUptodate(page); 6197 6198 flush_dcache_page(page); 6199 } 6200 6201 static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, 6202 loff_t end, struct page **pages, 6203 int numpages, u64 phys, handle_t *handle) 6204 { 6205 int i; 6206 struct page *page; 6207 unsigned int from, to = PAGE_CACHE_SIZE; 6208 struct super_block *sb = inode->i_sb; 6209 6210 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); 6211 6212 if (numpages == 0) 6213 goto out; 6214 6215 to = PAGE_CACHE_SIZE; 6216 for(i = 0; i < numpages; i++) { 6217 page = pages[i]; 6218 6219 from = start & (PAGE_CACHE_SIZE - 1); 6220 if ((end >> PAGE_CACHE_SHIFT) == page->index) 6221 to = end & (PAGE_CACHE_SIZE - 1); 6222 6223 BUG_ON(from > PAGE_CACHE_SIZE); 6224 BUG_ON(to > PAGE_CACHE_SIZE); 6225 6226 ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1, 6227 &phys); 6228 6229 start = (page->index + 1) << PAGE_CACHE_SHIFT; 6230 } 6231 out: 6232 if (pages) 6233 ocfs2_unlock_and_free_pages(pages, numpages); 6234 } 6235 6236 static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, 6237 struct page **pages, int *num) 6238 { 6239 int numpages, ret = 0; 6240 struct super_block *sb = inode->i_sb; 6241 struct address_space *mapping = inode->i_mapping; 6242 unsigned long index; 6243 loff_t last_page_bytes; 6244 6245 BUG_ON(start > end); 6246 6247 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != 6248 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); 6249 6250 numpages = 0; 6251 last_page_bytes = PAGE_ALIGN(end); 6252 index = start >> PAGE_CACHE_SHIFT; 6253 do { 6254 pages[numpages] = grab_cache_page(mapping, index); 6255 if (!pages[numpages]) { 6256 ret = -ENOMEM; 6257 mlog_errno(ret); 6258 goto out; 6259 } 6260 6261 numpages++; 6262 index++; 6263 } while (index < (last_page_bytes >> PAGE_CACHE_SHIFT)); 6264 6265 out: 6266 if (ret != 0) { 6267 if (pages) 6268 ocfs2_unlock_and_free_pages(pages, numpages); 6269 numpages = 0; 6270 } 6271 6272 *num = numpages; 6273 6274 return ret; 6275 } 6276 6277 /* 6278 * Zero the area past i_size but still within an allocated 6279 * cluster. This avoids exposing nonzero data on subsequent file 6280 * extends. 6281 * 6282 * We need to call this before i_size is updated on the inode because 6283 * otherwise block_write_full_page() will skip writeout of pages past 6284 * i_size. The new_i_size parameter is passed for this reason. 6285 */ 6286 int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, 6287 u64 range_start, u64 range_end) 6288 { 6289 int ret = 0, numpages; 6290 struct page **pages = NULL; 6291 u64 phys; 6292 unsigned int ext_flags; 6293 struct super_block *sb = inode->i_sb; 6294 6295 /* 6296 * File systems which don't support sparse files zero on every 6297 * extend. 6298 */ 6299 if (!ocfs2_sparse_alloc(OCFS2_SB(sb))) 6300 return 0; 6301 6302 pages = kcalloc(ocfs2_pages_per_cluster(sb), 6303 sizeof(struct page *), GFP_NOFS); 6304 if (pages == NULL) { 6305 ret = -ENOMEM; 6306 mlog_errno(ret); 6307 goto out; 6308 } 6309 6310 if (range_start == range_end) 6311 goto out; 6312 6313 ret = ocfs2_extent_map_get_blocks(inode, 6314 range_start >> sb->s_blocksize_bits, 6315 &phys, NULL, &ext_flags); 6316 if (ret) { 6317 mlog_errno(ret); 6318 goto out; 6319 } 6320 6321 /* 6322 * Tail is a hole, or is marked unwritten. In either case, we 6323 * can count on read and write to return/push zero's. 6324 */ 6325 if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN) 6326 goto out; 6327 6328 ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages, 6329 &numpages); 6330 if (ret) { 6331 mlog_errno(ret); 6332 goto out; 6333 } 6334 6335 ocfs2_zero_cluster_pages(inode, range_start, range_end, pages, 6336 numpages, phys, handle); 6337 6338 /* 6339 * Initiate writeout of the pages we zero'd here. We don't 6340 * wait on them - the truncate_inode_pages() call later will 6341 * do that for us. 6342 */ 6343 ret = do_sync_mapping_range(inode->i_mapping, range_start, 6344 range_end - 1, SYNC_FILE_RANGE_WRITE); 6345 if (ret) 6346 mlog_errno(ret); 6347 6348 out: 6349 if (pages) 6350 kfree(pages); 6351 6352 return ret; 6353 } 6354 6355 static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di) 6356 { 6357 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; 6358 6359 memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2)); 6360 } 6361 6362 void ocfs2_dinode_new_extent_list(struct inode *inode, 6363 struct ocfs2_dinode *di) 6364 { 6365 ocfs2_zero_dinode_id2(inode, di); 6366 di->id2.i_list.l_tree_depth = 0; 6367 di->id2.i_list.l_next_free_rec = 0; 6368 di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb)); 6369 } 6370 6371 void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) 6372 { 6373 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6374 struct ocfs2_inline_data *idata = &di->id2.i_data; 6375 6376 spin_lock(&oi->ip_lock); 6377 oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL; 6378 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 6379 spin_unlock(&oi->ip_lock); 6380 6381 /* 6382 * We clear the entire i_data structure here so that all 6383 * fields can be properly initialized. 6384 */ 6385 ocfs2_zero_dinode_id2(inode, di); 6386 6387 idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb)); 6388 } 6389 6390 int ocfs2_convert_inline_data_to_extents(struct inode *inode, 6391 struct buffer_head *di_bh) 6392 { 6393 int ret, i, has_data, num_pages = 0; 6394 handle_t *handle; 6395 u64 uninitialized_var(block); 6396 struct ocfs2_inode_info *oi = OCFS2_I(inode); 6397 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6398 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 6399 struct ocfs2_alloc_context *data_ac = NULL; 6400 struct page **pages = NULL; 6401 loff_t end = osb->s_clustersize; 6402 6403 has_data = i_size_read(inode) ? 1 : 0; 6404 6405 if (has_data) { 6406 pages = kcalloc(ocfs2_pages_per_cluster(osb->sb), 6407 sizeof(struct page *), GFP_NOFS); 6408 if (pages == NULL) { 6409 ret = -ENOMEM; 6410 mlog_errno(ret); 6411 goto out; 6412 } 6413 6414 ret = ocfs2_reserve_clusters(osb, 1, &data_ac); 6415 if (ret) { 6416 mlog_errno(ret); 6417 goto out; 6418 } 6419 } 6420 6421 handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS); 6422 if (IS_ERR(handle)) { 6423 ret = PTR_ERR(handle); 6424 mlog_errno(ret); 6425 goto out_unlock; 6426 } 6427 6428 ret = ocfs2_journal_access(handle, inode, di_bh, 6429 OCFS2_JOURNAL_ACCESS_WRITE); 6430 if (ret) { 6431 mlog_errno(ret); 6432 goto out_commit; 6433 } 6434 6435 if (has_data) { 6436 u32 bit_off, num; 6437 unsigned int page_end; 6438 u64 phys; 6439 6440 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 6441 &num); 6442 if (ret) { 6443 mlog_errno(ret); 6444 goto out_commit; 6445 } 6446 6447 /* 6448 * Save two copies, one for insert, and one that can 6449 * be changed by ocfs2_map_and_dirty_page() below. 6450 */ 6451 block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 6452 6453 /* 6454 * Non sparse file systems zero on extend, so no need 6455 * to do that now. 6456 */ 6457 if (!ocfs2_sparse_alloc(osb) && 6458 PAGE_CACHE_SIZE < osb->s_clustersize) 6459 end = PAGE_CACHE_SIZE; 6460 6461 ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); 6462 if (ret) { 6463 mlog_errno(ret); 6464 goto out_commit; 6465 } 6466 6467 /* 6468 * This should populate the 1st page for us and mark 6469 * it up to date. 6470 */ 6471 ret = ocfs2_read_inline_data(inode, pages[0], di_bh); 6472 if (ret) { 6473 mlog_errno(ret); 6474 goto out_commit; 6475 } 6476 6477 page_end = PAGE_CACHE_SIZE; 6478 if (PAGE_CACHE_SIZE > osb->s_clustersize) 6479 page_end = osb->s_clustersize; 6480 6481 for (i = 0; i < num_pages; i++) 6482 ocfs2_map_and_dirty_page(inode, handle, 0, page_end, 6483 pages[i], i > 0, &phys); 6484 } 6485 6486 spin_lock(&oi->ip_lock); 6487 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL; 6488 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 6489 spin_unlock(&oi->ip_lock); 6490 6491 ocfs2_dinode_new_extent_list(inode, di); 6492 6493 ocfs2_journal_dirty(handle, di_bh); 6494 6495 if (has_data) { 6496 /* 6497 * An error at this point should be extremely rare. If 6498 * this proves to be false, we could always re-build 6499 * the in-inode data from our pages. 6500 */ 6501 ret = ocfs2_insert_extent(osb, handle, inode, di_bh, 6502 0, block, 1, 0, 6503 NULL, OCFS2_DINODE_EXTENT); 6504 if (ret) { 6505 mlog_errno(ret); 6506 goto out_commit; 6507 } 6508 6509 inode->i_blocks = ocfs2_inode_sector_count(inode); 6510 } 6511 6512 out_commit: 6513 ocfs2_commit_trans(osb, handle); 6514 6515 out_unlock: 6516 if (data_ac) 6517 ocfs2_free_alloc_context(data_ac); 6518 6519 out: 6520 if (pages) { 6521 ocfs2_unlock_and_free_pages(pages, num_pages); 6522 kfree(pages); 6523 } 6524 6525 return ret; 6526 } 6527 6528 /* 6529 * It is expected, that by the time you call this function, 6530 * inode->i_size and fe->i_size have been adjusted. 6531 * 6532 * WARNING: This will kfree the truncate context 6533 */ 6534 int ocfs2_commit_truncate(struct ocfs2_super *osb, 6535 struct inode *inode, 6536 struct buffer_head *fe_bh, 6537 struct ocfs2_truncate_context *tc) 6538 { 6539 int status, i, credits, tl_sem = 0; 6540 u32 clusters_to_del, new_highest_cpos, range; 6541 struct ocfs2_extent_list *el; 6542 handle_t *handle = NULL; 6543 struct inode *tl_inode = osb->osb_tl_inode; 6544 struct ocfs2_path *path = NULL; 6545 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6546 6547 mlog_entry_void(); 6548 6549 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 6550 i_size_read(inode)); 6551 6552 path = ocfs2_new_path(fe_bh, &di->id2.i_list); 6553 if (!path) { 6554 status = -ENOMEM; 6555 mlog_errno(status); 6556 goto bail; 6557 } 6558 6559 ocfs2_extent_map_trunc(inode, new_highest_cpos); 6560 6561 start: 6562 /* 6563 * Check that we still have allocation to delete. 6564 */ 6565 if (OCFS2_I(inode)->ip_clusters == 0) { 6566 status = 0; 6567 goto bail; 6568 } 6569 6570 /* 6571 * Truncate always works against the rightmost tree branch. 6572 */ 6573 status = ocfs2_find_path(inode, path, UINT_MAX); 6574 if (status) { 6575 mlog_errno(status); 6576 goto bail; 6577 } 6578 6579 mlog(0, "inode->ip_clusters = %u, tree_depth = %u\n", 6580 OCFS2_I(inode)->ip_clusters, path->p_tree_depth); 6581 6582 /* 6583 * By now, el will point to the extent list on the bottom most 6584 * portion of this tree. Only the tail record is considered in 6585 * each pass. 6586 * 6587 * We handle the following cases, in order: 6588 * - empty extent: delete the remaining branch 6589 * - remove the entire record 6590 * - remove a partial record 6591 * - no record needs to be removed (truncate has completed) 6592 */ 6593 el = path_leaf_el(path); 6594 if (le16_to_cpu(el->l_next_free_rec) == 0) { 6595 ocfs2_error(inode->i_sb, 6596 "Inode %llu has empty extent block at %llu\n", 6597 (unsigned long long)OCFS2_I(inode)->ip_blkno, 6598 (unsigned long long)path_leaf_bh(path)->b_blocknr); 6599 status = -EROFS; 6600 goto bail; 6601 } 6602 6603 i = le16_to_cpu(el->l_next_free_rec) - 1; 6604 range = le32_to_cpu(el->l_recs[i].e_cpos) + 6605 ocfs2_rec_clusters(el, &el->l_recs[i]); 6606 if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 6607 clusters_to_del = 0; 6608 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 6609 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 6610 } else if (range > new_highest_cpos) { 6611 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 6612 le32_to_cpu(el->l_recs[i].e_cpos)) - 6613 new_highest_cpos; 6614 } else { 6615 status = 0; 6616 goto bail; 6617 } 6618 6619 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 6620 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); 6621 6622 mutex_lock(&tl_inode->i_mutex); 6623 tl_sem = 1; 6624 /* ocfs2_truncate_log_needs_flush guarantees us at least one 6625 * record is free for use. If there isn't any, we flush to get 6626 * an empty truncate log. */ 6627 if (ocfs2_truncate_log_needs_flush(osb)) { 6628 status = __ocfs2_flush_truncate_log(osb); 6629 if (status < 0) { 6630 mlog_errno(status); 6631 goto bail; 6632 } 6633 } 6634 6635 credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 6636 (struct ocfs2_dinode *)fe_bh->b_data, 6637 el); 6638 handle = ocfs2_start_trans(osb, credits); 6639 if (IS_ERR(handle)) { 6640 status = PTR_ERR(handle); 6641 handle = NULL; 6642 mlog_errno(status); 6643 goto bail; 6644 } 6645 6646 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, 6647 tc, path); 6648 if (status < 0) { 6649 mlog_errno(status); 6650 goto bail; 6651 } 6652 6653 mutex_unlock(&tl_inode->i_mutex); 6654 tl_sem = 0; 6655 6656 ocfs2_commit_trans(osb, handle); 6657 handle = NULL; 6658 6659 ocfs2_reinit_path(path, 1); 6660 6661 /* 6662 * The check above will catch the case where we've truncated 6663 * away all allocation. 6664 */ 6665 goto start; 6666 6667 bail: 6668 6669 ocfs2_schedule_truncate_log_flush(osb, 1); 6670 6671 if (tl_sem) 6672 mutex_unlock(&tl_inode->i_mutex); 6673 6674 if (handle) 6675 ocfs2_commit_trans(osb, handle); 6676 6677 ocfs2_run_deallocs(osb, &tc->tc_dealloc); 6678 6679 ocfs2_free_path(path); 6680 6681 /* This will drop the ext_alloc cluster lock for us */ 6682 ocfs2_free_truncate_context(tc); 6683 6684 mlog_exit(status); 6685 return status; 6686 } 6687 6688 /* 6689 * Expects the inode to already be locked. 6690 */ 6691 int ocfs2_prepare_truncate(struct ocfs2_super *osb, 6692 struct inode *inode, 6693 struct buffer_head *fe_bh, 6694 struct ocfs2_truncate_context **tc) 6695 { 6696 int status; 6697 unsigned int new_i_clusters; 6698 struct ocfs2_dinode *fe; 6699 struct ocfs2_extent_block *eb; 6700 struct buffer_head *last_eb_bh = NULL; 6701 6702 mlog_entry_void(); 6703 6704 *tc = NULL; 6705 6706 new_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 6707 i_size_read(inode)); 6708 fe = (struct ocfs2_dinode *) fe_bh->b_data; 6709 6710 mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" 6711 "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters, 6712 (unsigned long long)le64_to_cpu(fe->i_size)); 6713 6714 *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL); 6715 if (!(*tc)) { 6716 status = -ENOMEM; 6717 mlog_errno(status); 6718 goto bail; 6719 } 6720 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); 6721 6722 if (fe->id2.i_list.l_tree_depth) { 6723 status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 6724 &last_eb_bh, OCFS2_BH_CACHED, inode); 6725 if (status < 0) { 6726 mlog_errno(status); 6727 goto bail; 6728 } 6729 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 6730 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 6731 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 6732 6733 brelse(last_eb_bh); 6734 status = -EIO; 6735 goto bail; 6736 } 6737 } 6738 6739 (*tc)->tc_last_eb_bh = last_eb_bh; 6740 6741 status = 0; 6742 bail: 6743 if (status < 0) { 6744 if (*tc) 6745 ocfs2_free_truncate_context(*tc); 6746 *tc = NULL; 6747 } 6748 mlog_exit_void(); 6749 return status; 6750 } 6751 6752 /* 6753 * 'start' is inclusive, 'end' is not. 6754 */ 6755 int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 6756 unsigned int start, unsigned int end, int trunc) 6757 { 6758 int ret; 6759 unsigned int numbytes; 6760 handle_t *handle; 6761 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6762 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 6763 struct ocfs2_inline_data *idata = &di->id2.i_data; 6764 6765 if (end > i_size_read(inode)) 6766 end = i_size_read(inode); 6767 6768 BUG_ON(start >= end); 6769 6770 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || 6771 !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) || 6772 !ocfs2_supports_inline_data(osb)) { 6773 ocfs2_error(inode->i_sb, 6774 "Inline data flags for inode %llu don't agree! " 6775 "Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n", 6776 (unsigned long long)OCFS2_I(inode)->ip_blkno, 6777 le16_to_cpu(di->i_dyn_features), 6778 OCFS2_I(inode)->ip_dyn_features, 6779 osb->s_feature_incompat); 6780 ret = -EROFS; 6781 goto out; 6782 } 6783 6784 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 6785 if (IS_ERR(handle)) { 6786 ret = PTR_ERR(handle); 6787 mlog_errno(ret); 6788 goto out; 6789 } 6790 6791 ret = ocfs2_journal_access(handle, inode, di_bh, 6792 OCFS2_JOURNAL_ACCESS_WRITE); 6793 if (ret) { 6794 mlog_errno(ret); 6795 goto out_commit; 6796 } 6797 6798 numbytes = end - start; 6799 memset(idata->id_data + start, 0, numbytes); 6800 6801 /* 6802 * No need to worry about the data page here - it's been 6803 * truncated already and inline data doesn't need it for 6804 * pushing zero's to disk, so we'll let readpage pick it up 6805 * later. 6806 */ 6807 if (trunc) { 6808 i_size_write(inode, start); 6809 di->i_size = cpu_to_le64(start); 6810 } 6811 6812 inode->i_blocks = ocfs2_inode_sector_count(inode); 6813 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 6814 6815 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 6816 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 6817 6818 ocfs2_journal_dirty(handle, di_bh); 6819 6820 out_commit: 6821 ocfs2_commit_trans(osb, handle); 6822 6823 out: 6824 return ret; 6825 } 6826 6827 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 6828 { 6829 /* 6830 * The caller is responsible for completing deallocation 6831 * before freeing the context. 6832 */ 6833 if (tc->tc_dealloc.c_first_suballocator != NULL) 6834 mlog(ML_NOTICE, 6835 "Truncate completion has non-empty dealloc context\n"); 6836 6837 if (tc->tc_last_eb_bh) 6838 brelse(tc->tc_last_eb_bh); 6839 6840 kfree(tc); 6841 } 6842