1 /* 2 * Copyright IBM Corporation, 2007 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of version 2.1 of the GNU Lesser General Public License 7 * as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 * 13 */ 14 15 #include <linux/module.h> 16 #include <linux/slab.h> 17 #include "ext4_jbd2.h" 18 #include "ext4_extents.h" 19 20 /* 21 * The contiguous blocks details which can be 22 * represented by a single extent 23 */ 24 struct list_blocks_struct { 25 ext4_lblk_t first_block, last_block; 26 ext4_fsblk_t first_pblock, last_pblock; 27 }; 28 29 static int finish_range(handle_t *handle, struct inode *inode, 30 struct list_blocks_struct *lb) 31 32 { 33 int retval = 0, needed; 34 struct ext4_extent newext; 35 struct ext4_ext_path *path; 36 if (lb->first_pblock == 0) 37 return 0; 38 39 /* Add the extent to temp inode*/ 40 newext.ee_block = cpu_to_le32(lb->first_block); 41 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 42 ext4_ext_store_pblock(&newext, lb->first_pblock); 43 path = ext4_ext_find_extent(inode, lb->first_block, NULL); 44 45 if (IS_ERR(path)) { 46 retval = PTR_ERR(path); 47 path = NULL; 48 goto err_out; 49 } 50 51 /* 52 * Calculate the credit needed to inserting this extent 53 * Since we are doing this in loop we may accumalate extra 54 * credit. But below we try to not accumalate too much 55 * of them by restarting the journal. 56 */ 57 needed = ext4_ext_calc_credits_for_single_extent(inode, 58 lb->last_block - lb->first_block + 1, path); 59 60 /* 61 * Make sure the credit we accumalated is not really high 62 */ 63 if (needed && ext4_handle_has_enough_credits(handle, 64 EXT4_RESERVE_TRANS_BLOCKS)) { 65 retval = ext4_journal_restart(handle, needed); 66 if (retval) 67 goto err_out; 68 } else if (needed) { 69 retval = ext4_journal_extend(handle, needed); 70 if (retval) { 71 /* 72 * IF not able to extend the journal restart the journal 73 */ 74 retval = ext4_journal_restart(handle, needed); 75 if (retval) 76 goto err_out; 77 } 78 } 79 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); 80 err_out: 81 if (path) { 82 ext4_ext_drop_refs(path); 83 kfree(path); 84 } 85 lb->first_pblock = 0; 86 return retval; 87 } 88 89 static int update_extent_range(handle_t *handle, struct inode *inode, 90 ext4_fsblk_t pblock, ext4_lblk_t blk_num, 91 struct list_blocks_struct *lb) 92 { 93 int retval; 94 /* 95 * See if we can add on to the existing range (if it exists) 96 */ 97 if (lb->first_pblock && 98 (lb->last_pblock+1 == pblock) && 99 (lb->last_block+1 == blk_num)) { 100 lb->last_pblock = pblock; 101 lb->last_block = blk_num; 102 return 0; 103 } 104 /* 105 * Start a new range. 106 */ 107 retval = finish_range(handle, inode, lb); 108 lb->first_pblock = lb->last_pblock = pblock; 109 lb->first_block = lb->last_block = blk_num; 110 111 return retval; 112 } 113 114 static int update_ind_extent_range(handle_t *handle, struct inode *inode, 115 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 116 struct list_blocks_struct *lb) 117 { 118 struct buffer_head *bh; 119 __le32 *i_data; 120 int i, retval = 0; 121 ext4_lblk_t blk_count = *blk_nump; 122 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 123 124 if (!pblock) { 125 /* Only update the file block number */ 126 *blk_nump += max_entries; 127 return 0; 128 } 129 130 bh = sb_bread(inode->i_sb, pblock); 131 if (!bh) 132 return -EIO; 133 134 i_data = (__le32 *)bh->b_data; 135 for (i = 0; i < max_entries; i++, blk_count++) { 136 if (i_data[i]) { 137 retval = update_extent_range(handle, inode, 138 le32_to_cpu(i_data[i]), 139 blk_count, lb); 140 if (retval) 141 break; 142 } 143 } 144 145 /* Update the file block number */ 146 *blk_nump = blk_count; 147 put_bh(bh); 148 return retval; 149 150 } 151 152 static int update_dind_extent_range(handle_t *handle, struct inode *inode, 153 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 154 struct list_blocks_struct *lb) 155 { 156 struct buffer_head *bh; 157 __le32 *i_data; 158 int i, retval = 0; 159 ext4_lblk_t blk_count = *blk_nump; 160 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 161 162 if (!pblock) { 163 /* Only update the file block number */ 164 *blk_nump += max_entries * max_entries; 165 return 0; 166 } 167 bh = sb_bread(inode->i_sb, pblock); 168 if (!bh) 169 return -EIO; 170 171 i_data = (__le32 *)bh->b_data; 172 for (i = 0; i < max_entries; i++) { 173 if (i_data[i]) { 174 retval = update_ind_extent_range(handle, inode, 175 le32_to_cpu(i_data[i]), 176 &blk_count, lb); 177 if (retval) 178 break; 179 } else { 180 /* Only update the file block number */ 181 blk_count += max_entries; 182 } 183 } 184 185 /* Update the file block number */ 186 *blk_nump = blk_count; 187 put_bh(bh); 188 return retval; 189 190 } 191 192 static int update_tind_extent_range(handle_t *handle, struct inode *inode, 193 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 194 struct list_blocks_struct *lb) 195 { 196 struct buffer_head *bh; 197 __le32 *i_data; 198 int i, retval = 0; 199 ext4_lblk_t blk_count = *blk_nump; 200 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 201 202 if (!pblock) { 203 /* Only update the file block number */ 204 *blk_nump += max_entries * max_entries * max_entries; 205 return 0; 206 } 207 bh = sb_bread(inode->i_sb, pblock); 208 if (!bh) 209 return -EIO; 210 211 i_data = (__le32 *)bh->b_data; 212 for (i = 0; i < max_entries; i++) { 213 if (i_data[i]) { 214 retval = update_dind_extent_range(handle, inode, 215 le32_to_cpu(i_data[i]), 216 &blk_count, lb); 217 if (retval) 218 break; 219 } else 220 /* Only update the file block number */ 221 blk_count += max_entries * max_entries; 222 } 223 /* Update the file block number */ 224 *blk_nump = blk_count; 225 put_bh(bh); 226 return retval; 227 228 } 229 230 static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) 231 { 232 int retval = 0, needed; 233 234 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) 235 return 0; 236 /* 237 * We are freeing a blocks. During this we touch 238 * superblock, group descriptor and block bitmap. 239 * So allocate a credit of 3. We may update 240 * quota (user and group). 241 */ 242 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 243 244 if (ext4_journal_extend(handle, needed) != 0) 245 retval = ext4_journal_restart(handle, needed); 246 247 return retval; 248 } 249 250 static int free_dind_blocks(handle_t *handle, 251 struct inode *inode, __le32 i_data) 252 { 253 int i; 254 __le32 *tmp_idata; 255 struct buffer_head *bh; 256 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 257 258 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 259 if (!bh) 260 return -EIO; 261 262 tmp_idata = (__le32 *)bh->b_data; 263 for (i = 0; i < max_entries; i++) { 264 if (tmp_idata[i]) { 265 extend_credit_for_blkdel(handle, inode); 266 ext4_free_blocks(handle, inode, 0, 267 le32_to_cpu(tmp_idata[i]), 1, 268 EXT4_FREE_BLOCKS_METADATA | 269 EXT4_FREE_BLOCKS_FORGET); 270 } 271 } 272 put_bh(bh); 273 extend_credit_for_blkdel(handle, inode); 274 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, 275 EXT4_FREE_BLOCKS_METADATA | 276 EXT4_FREE_BLOCKS_FORGET); 277 return 0; 278 } 279 280 static int free_tind_blocks(handle_t *handle, 281 struct inode *inode, __le32 i_data) 282 { 283 int i, retval = 0; 284 __le32 *tmp_idata; 285 struct buffer_head *bh; 286 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 287 288 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 289 if (!bh) 290 return -EIO; 291 292 tmp_idata = (__le32 *)bh->b_data; 293 for (i = 0; i < max_entries; i++) { 294 if (tmp_idata[i]) { 295 retval = free_dind_blocks(handle, 296 inode, tmp_idata[i]); 297 if (retval) { 298 put_bh(bh); 299 return retval; 300 } 301 } 302 } 303 put_bh(bh); 304 extend_credit_for_blkdel(handle, inode); 305 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, 306 EXT4_FREE_BLOCKS_METADATA | 307 EXT4_FREE_BLOCKS_FORGET); 308 return 0; 309 } 310 311 static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) 312 { 313 int retval; 314 315 /* ei->i_data[EXT4_IND_BLOCK] */ 316 if (i_data[0]) { 317 extend_credit_for_blkdel(handle, inode); 318 ext4_free_blocks(handle, inode, 0, 319 le32_to_cpu(i_data[0]), 1, 320 EXT4_FREE_BLOCKS_METADATA | 321 EXT4_FREE_BLOCKS_FORGET); 322 } 323 324 /* ei->i_data[EXT4_DIND_BLOCK] */ 325 if (i_data[1]) { 326 retval = free_dind_blocks(handle, inode, i_data[1]); 327 if (retval) 328 return retval; 329 } 330 331 /* ei->i_data[EXT4_TIND_BLOCK] */ 332 if (i_data[2]) { 333 retval = free_tind_blocks(handle, inode, i_data[2]); 334 if (retval) 335 return retval; 336 } 337 return 0; 338 } 339 340 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 341 struct inode *tmp_inode) 342 { 343 int retval; 344 __le32 i_data[3]; 345 struct ext4_inode_info *ei = EXT4_I(inode); 346 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 347 348 /* 349 * One credit accounted for writing the 350 * i_data field of the original inode 351 */ 352 retval = ext4_journal_extend(handle, 1); 353 if (retval) { 354 retval = ext4_journal_restart(handle, 1); 355 if (retval) 356 goto err_out; 357 } 358 359 i_data[0] = ei->i_data[EXT4_IND_BLOCK]; 360 i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; 361 i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; 362 363 down_write(&EXT4_I(inode)->i_data_sem); 364 /* 365 * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation 366 * happened after we started the migrate. We need to 367 * fail the migrate 368 */ 369 if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { 370 retval = -EAGAIN; 371 up_write(&EXT4_I(inode)->i_data_sem); 372 goto err_out; 373 } else 374 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 375 /* 376 * We have the extent map build with the tmp inode. 377 * Now copy the i_data across 378 */ 379 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS); 380 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 381 382 /* 383 * Update i_blocks with the new blocks that got 384 * allocated while adding extents for extent index 385 * blocks. 386 * 387 * While converting to extents we need not 388 * update the orignal inode i_blocks for extent blocks 389 * via quota APIs. The quota update happened via tmp_inode already. 390 */ 391 spin_lock(&inode->i_lock); 392 inode->i_blocks += tmp_inode->i_blocks; 393 spin_unlock(&inode->i_lock); 394 up_write(&EXT4_I(inode)->i_data_sem); 395 396 /* 397 * We mark the inode dirty after, because we decrement the 398 * i_blocks when freeing the indirect meta-data blocks 399 */ 400 retval = free_ind_block(handle, inode, i_data); 401 ext4_mark_inode_dirty(handle, inode); 402 403 err_out: 404 return retval; 405 } 406 407 static int free_ext_idx(handle_t *handle, struct inode *inode, 408 struct ext4_extent_idx *ix) 409 { 410 int i, retval = 0; 411 ext4_fsblk_t block; 412 struct buffer_head *bh; 413 struct ext4_extent_header *eh; 414 415 block = ext4_idx_pblock(ix); 416 bh = sb_bread(inode->i_sb, block); 417 if (!bh) 418 return -EIO; 419 420 eh = (struct ext4_extent_header *)bh->b_data; 421 if (eh->eh_depth != 0) { 422 ix = EXT_FIRST_INDEX(eh); 423 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 424 retval = free_ext_idx(handle, inode, ix); 425 if (retval) 426 break; 427 } 428 } 429 put_bh(bh); 430 extend_credit_for_blkdel(handle, inode); 431 ext4_free_blocks(handle, inode, 0, block, 1, 432 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 433 return retval; 434 } 435 436 /* 437 * Free the extent meta data blocks only 438 */ 439 static int free_ext_block(handle_t *handle, struct inode *inode) 440 { 441 int i, retval = 0; 442 struct ext4_inode_info *ei = EXT4_I(inode); 443 struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; 444 struct ext4_extent_idx *ix; 445 if (eh->eh_depth == 0) 446 /* 447 * No extra blocks allocated for extent meta data 448 */ 449 return 0; 450 ix = EXT_FIRST_INDEX(eh); 451 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 452 retval = free_ext_idx(handle, inode, ix); 453 if (retval) 454 return retval; 455 } 456 return retval; 457 458 } 459 460 int ext4_ext_migrate(struct inode *inode) 461 { 462 handle_t *handle; 463 int retval = 0, i; 464 __le32 *i_data; 465 ext4_lblk_t blk_count = 0; 466 struct ext4_inode_info *ei; 467 struct inode *tmp_inode = NULL; 468 struct list_blocks_struct lb; 469 unsigned long max_entries; 470 __u32 goal; 471 472 /* 473 * If the filesystem does not support extents, or the inode 474 * already is extent-based, error out. 475 */ 476 if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, 477 EXT4_FEATURE_INCOMPAT_EXTENTS) || 478 (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 479 return -EINVAL; 480 481 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 482 /* 483 * don't migrate fast symlink 484 */ 485 return retval; 486 487 handle = ext4_journal_start(inode, 488 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 489 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 490 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) 491 + 1); 492 if (IS_ERR(handle)) { 493 retval = PTR_ERR(handle); 494 return retval; 495 } 496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 499 S_IFREG, 0, goal); 500 if (IS_ERR(tmp_inode)) { 501 retval = -ENOMEM; 502 ext4_journal_stop(handle); 503 return retval; 504 } 505 i_size_write(tmp_inode, i_size_read(inode)); 506 /* 507 * Set the i_nlink to zero so it will be deleted later 508 * when we drop inode reference. 509 */ 510 tmp_inode->i_nlink = 0; 511 512 ext4_ext_tree_init(handle, tmp_inode); 513 ext4_orphan_add(handle, tmp_inode); 514 ext4_journal_stop(handle); 515 516 /* 517 * start with one credit accounted for 518 * superblock modification. 519 * 520 * For the tmp_inode we already have commited the 521 * trascation that created the inode. Later as and 522 * when we add extents we extent the journal 523 */ 524 /* 525 * Even though we take i_mutex we can still cause block 526 * allocation via mmap write to holes. If we have allocated 527 * new blocks we fail migrate. New block allocation will 528 * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated 529 * with i_data_sem held to prevent racing with block 530 * allocation. 531 */ 532 down_read((&EXT4_I(inode)->i_data_sem)); 533 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 534 up_read((&EXT4_I(inode)->i_data_sem)); 535 536 handle = ext4_journal_start(inode, 1); 537 if (IS_ERR(handle)) { 538 /* 539 * It is impossible to update on-disk structures without 540 * a handle, so just rollback in-core changes and live other 541 * work to orphan_list_cleanup() 542 */ 543 ext4_orphan_del(NULL, tmp_inode); 544 retval = PTR_ERR(handle); 545 goto out; 546 } 547 548 ei = EXT4_I(inode); 549 i_data = ei->i_data; 550 memset(&lb, 0, sizeof(lb)); 551 552 /* 32 bit block address 4 bytes */ 553 max_entries = inode->i_sb->s_blocksize >> 2; 554 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 555 if (i_data[i]) { 556 retval = update_extent_range(handle, tmp_inode, 557 le32_to_cpu(i_data[i]), 558 blk_count, &lb); 559 if (retval) 560 goto err_out; 561 } 562 } 563 if (i_data[EXT4_IND_BLOCK]) { 564 retval = update_ind_extent_range(handle, tmp_inode, 565 le32_to_cpu(i_data[EXT4_IND_BLOCK]), 566 &blk_count, &lb); 567 if (retval) 568 goto err_out; 569 } else 570 blk_count += max_entries; 571 if (i_data[EXT4_DIND_BLOCK]) { 572 retval = update_dind_extent_range(handle, tmp_inode, 573 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), 574 &blk_count, &lb); 575 if (retval) 576 goto err_out; 577 } else 578 blk_count += max_entries * max_entries; 579 if (i_data[EXT4_TIND_BLOCK]) { 580 retval = update_tind_extent_range(handle, tmp_inode, 581 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), 582 &blk_count, &lb); 583 if (retval) 584 goto err_out; 585 } 586 /* 587 * Build the last extent 588 */ 589 retval = finish_range(handle, tmp_inode, &lb); 590 err_out: 591 if (retval) 592 /* 593 * Failure case delete the extent information with the 594 * tmp_inode 595 */ 596 free_ext_block(handle, tmp_inode); 597 else { 598 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); 599 if (retval) 600 /* 601 * if we fail to swap inode data free the extent 602 * details of the tmp inode 603 */ 604 free_ext_block(handle, tmp_inode); 605 } 606 607 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 608 if (ext4_journal_extend(handle, 1) != 0) 609 ext4_journal_restart(handle, 1); 610 611 /* 612 * Mark the tmp_inode as of size zero 613 */ 614 i_size_write(tmp_inode, 0); 615 616 /* 617 * set the i_blocks count to zero 618 * so that the ext4_delete_inode does the 619 * right job 620 * 621 * We don't need to take the i_lock because 622 * the inode is not visible to user space. 623 */ 624 tmp_inode->i_blocks = 0; 625 626 /* Reset the extent details */ 627 ext4_ext_tree_init(handle, tmp_inode); 628 ext4_journal_stop(handle); 629 out: 630 unlock_new_inode(tmp_inode); 631 iput(tmp_inode); 632 633 return retval; 634 } 635