1 /* 2 * Copyright IBM Corporation, 2007 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of version 2.1 of the GNU Lesser General Public License 7 * as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 * 13 */ 14 15 #include <linux/module.h> 16 #include "ext4_jbd2.h" 17 #include "ext4_extents.h" 18 19 /* 20 * The contiguous blocks details which can be 21 * represented by a single extent 22 */ 23 struct list_blocks_struct { 24 ext4_lblk_t first_block, last_block; 25 ext4_fsblk_t first_pblock, last_pblock; 26 }; 27 28 static int finish_range(handle_t *handle, struct inode *inode, 29 struct list_blocks_struct *lb) 30 31 { 32 int retval = 0, needed; 33 struct ext4_extent newext; 34 struct ext4_ext_path *path; 35 if (lb->first_pblock == 0) 36 return 0; 37 38 /* Add the extent to temp inode*/ 39 newext.ee_block = cpu_to_le32(lb->first_block); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 41 ext4_ext_store_pblock(&newext, lb->first_pblock); 42 path = ext4_ext_find_extent(inode, lb->first_block, NULL); 43 44 if (IS_ERR(path)) { 45 retval = PTR_ERR(path); 46 path = NULL; 47 goto err_out; 48 } 49 50 /* 51 * Calculate the credit needed to inserting this extent 52 * Since we are doing this in loop we may accumalate extra 53 * credit. But below we try to not accumalate too much 54 * of them by restarting the journal. 55 */ 56 needed = ext4_ext_calc_credits_for_single_extent(inode, 57 lb->last_block - lb->first_block + 1, path); 58 59 /* 60 * Make sure the credit we accumalated is not really high 61 */ 62 if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { 63 retval = ext4_journal_restart(handle, needed); 64 if (retval) 65 goto err_out; 66 } else if (needed) { 67 retval = ext4_journal_extend(handle, needed); 68 if (retval) { 69 /* 70 * IF not able to extend the journal restart the journal 71 */ 72 retval = ext4_journal_restart(handle, needed); 73 if (retval) 74 goto err_out; 75 } 76 } 77 retval = ext4_ext_insert_extent(handle, inode, path, &newext); 78 err_out: 79 if (path) { 80 ext4_ext_drop_refs(path); 81 kfree(path); 82 } 83 lb->first_pblock = 0; 84 return retval; 85 } 86 87 static int update_extent_range(handle_t *handle, struct inode *inode, 88 ext4_fsblk_t pblock, ext4_lblk_t blk_num, 89 struct list_blocks_struct *lb) 90 { 91 int retval; 92 /* 93 * See if we can add on to the existing range (if it exists) 94 */ 95 if (lb->first_pblock && 96 (lb->last_pblock+1 == pblock) && 97 (lb->last_block+1 == blk_num)) { 98 lb->last_pblock = pblock; 99 lb->last_block = blk_num; 100 return 0; 101 } 102 /* 103 * Start a new range. 104 */ 105 retval = finish_range(handle, inode, lb); 106 lb->first_pblock = lb->last_pblock = pblock; 107 lb->first_block = lb->last_block = blk_num; 108 109 return retval; 110 } 111 112 static int update_ind_extent_range(handle_t *handle, struct inode *inode, 113 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 114 struct list_blocks_struct *lb) 115 { 116 struct buffer_head *bh; 117 __le32 *i_data; 118 int i, retval = 0; 119 ext4_lblk_t blk_count = *blk_nump; 120 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 121 122 if (!pblock) { 123 /* Only update the file block number */ 124 *blk_nump += max_entries; 125 return 0; 126 } 127 128 bh = sb_bread(inode->i_sb, pblock); 129 if (!bh) 130 return -EIO; 131 132 i_data = (__le32 *)bh->b_data; 133 for (i = 0; i < max_entries; i++, blk_count++) { 134 if (i_data[i]) { 135 retval = update_extent_range(handle, inode, 136 le32_to_cpu(i_data[i]), 137 blk_count, lb); 138 if (retval) 139 break; 140 } 141 } 142 143 /* Update the file block number */ 144 *blk_nump = blk_count; 145 put_bh(bh); 146 return retval; 147 148 } 149 150 static int update_dind_extent_range(handle_t *handle, struct inode *inode, 151 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 152 struct list_blocks_struct *lb) 153 { 154 struct buffer_head *bh; 155 __le32 *i_data; 156 int i, retval = 0; 157 ext4_lblk_t blk_count = *blk_nump; 158 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 159 160 if (!pblock) { 161 /* Only update the file block number */ 162 *blk_nump += max_entries * max_entries; 163 return 0; 164 } 165 bh = sb_bread(inode->i_sb, pblock); 166 if (!bh) 167 return -EIO; 168 169 i_data = (__le32 *)bh->b_data; 170 for (i = 0; i < max_entries; i++) { 171 if (i_data[i]) { 172 retval = update_ind_extent_range(handle, inode, 173 le32_to_cpu(i_data[i]), 174 &blk_count, lb); 175 if (retval) 176 break; 177 } else { 178 /* Only update the file block number */ 179 blk_count += max_entries; 180 } 181 } 182 183 /* Update the file block number */ 184 *blk_nump = blk_count; 185 put_bh(bh); 186 return retval; 187 188 } 189 190 static int update_tind_extent_range(handle_t *handle, struct inode *inode, 191 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 192 struct list_blocks_struct *lb) 193 { 194 struct buffer_head *bh; 195 __le32 *i_data; 196 int i, retval = 0; 197 ext4_lblk_t blk_count = *blk_nump; 198 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 199 200 if (!pblock) { 201 /* Only update the file block number */ 202 *blk_nump += max_entries * max_entries * max_entries; 203 return 0; 204 } 205 bh = sb_bread(inode->i_sb, pblock); 206 if (!bh) 207 return -EIO; 208 209 i_data = (__le32 *)bh->b_data; 210 for (i = 0; i < max_entries; i++) { 211 if (i_data[i]) { 212 retval = update_dind_extent_range(handle, inode, 213 le32_to_cpu(i_data[i]), 214 &blk_count, lb); 215 if (retval) 216 break; 217 } else 218 /* Only update the file block number */ 219 blk_count += max_entries * max_entries; 220 } 221 /* Update the file block number */ 222 *blk_nump = blk_count; 223 put_bh(bh); 224 return retval; 225 226 } 227 228 static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) 229 { 230 int retval = 0, needed; 231 232 if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) 233 return 0; 234 /* 235 * We are freeing a blocks. During this we touch 236 * superblock, group descriptor and block bitmap. 237 * So allocate a credit of 3. We may update 238 * quota (user and group). 239 */ 240 needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 241 242 if (ext4_journal_extend(handle, needed) != 0) 243 retval = ext4_journal_restart(handle, needed); 244 245 return retval; 246 } 247 248 static int free_dind_blocks(handle_t *handle, 249 struct inode *inode, __le32 i_data) 250 { 251 int i; 252 __le32 *tmp_idata; 253 struct buffer_head *bh; 254 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 255 256 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 257 if (!bh) 258 return -EIO; 259 260 tmp_idata = (__le32 *)bh->b_data; 261 for (i = 0; i < max_entries; i++) { 262 if (tmp_idata[i]) { 263 extend_credit_for_blkdel(handle, inode); 264 ext4_free_blocks(handle, inode, 265 le32_to_cpu(tmp_idata[i]), 1, 1); 266 } 267 } 268 put_bh(bh); 269 extend_credit_for_blkdel(handle, inode); 270 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 271 return 0; 272 } 273 274 static int free_tind_blocks(handle_t *handle, 275 struct inode *inode, __le32 i_data) 276 { 277 int i, retval = 0; 278 __le32 *tmp_idata; 279 struct buffer_head *bh; 280 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 281 282 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 283 if (!bh) 284 return -EIO; 285 286 tmp_idata = (__le32 *)bh->b_data; 287 for (i = 0; i < max_entries; i++) { 288 if (tmp_idata[i]) { 289 retval = free_dind_blocks(handle, 290 inode, tmp_idata[i]); 291 if (retval) { 292 put_bh(bh); 293 return retval; 294 } 295 } 296 } 297 put_bh(bh); 298 extend_credit_for_blkdel(handle, inode); 299 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 300 return 0; 301 } 302 303 static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) 304 { 305 int retval; 306 307 /* ei->i_data[EXT4_IND_BLOCK] */ 308 if (i_data[0]) { 309 extend_credit_for_blkdel(handle, inode); 310 ext4_free_blocks(handle, inode, 311 le32_to_cpu(i_data[0]), 1, 1); 312 } 313 314 /* ei->i_data[EXT4_DIND_BLOCK] */ 315 if (i_data[1]) { 316 retval = free_dind_blocks(handle, inode, i_data[1]); 317 if (retval) 318 return retval; 319 } 320 321 /* ei->i_data[EXT4_TIND_BLOCK] */ 322 if (i_data[2]) { 323 retval = free_tind_blocks(handle, inode, i_data[2]); 324 if (retval) 325 return retval; 326 } 327 return 0; 328 } 329 330 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 331 struct inode *tmp_inode) 332 { 333 int retval; 334 __le32 i_data[3]; 335 struct ext4_inode_info *ei = EXT4_I(inode); 336 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 337 338 /* 339 * One credit accounted for writing the 340 * i_data field of the original inode 341 */ 342 retval = ext4_journal_extend(handle, 1); 343 if (retval) { 344 retval = ext4_journal_restart(handle, 1); 345 if (retval) 346 goto err_out; 347 } 348 349 i_data[0] = ei->i_data[EXT4_IND_BLOCK]; 350 i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; 351 i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; 352 353 down_write(&EXT4_I(inode)->i_data_sem); 354 /* 355 * if EXT4_EXT_MIGRATE is cleared a block allocation 356 * happened after we started the migrate. We need to 357 * fail the migrate 358 */ 359 if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { 360 retval = -EAGAIN; 361 up_write(&EXT4_I(inode)->i_data_sem); 362 goto err_out; 363 } else 364 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & 365 ~EXT4_EXT_MIGRATE; 366 /* 367 * We have the extent map build with the tmp inode. 368 * Now copy the i_data across 369 */ 370 ei->i_flags |= EXT4_EXTENTS_FL; 371 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 372 373 /* 374 * Update i_blocks with the new blocks that got 375 * allocated while adding extents for extent index 376 * blocks. 377 * 378 * While converting to extents we need not 379 * update the orignal inode i_blocks for extent blocks 380 * via quota APIs. The quota update happened via tmp_inode already. 381 */ 382 spin_lock(&inode->i_lock); 383 inode->i_blocks += tmp_inode->i_blocks; 384 spin_unlock(&inode->i_lock); 385 up_write(&EXT4_I(inode)->i_data_sem); 386 387 /* 388 * We mark the inode dirty after, because we decrement the 389 * i_blocks when freeing the indirect meta-data blocks 390 */ 391 retval = free_ind_block(handle, inode, i_data); 392 ext4_mark_inode_dirty(handle, inode); 393 394 err_out: 395 return retval; 396 } 397 398 static int free_ext_idx(handle_t *handle, struct inode *inode, 399 struct ext4_extent_idx *ix) 400 { 401 int i, retval = 0; 402 ext4_fsblk_t block; 403 struct buffer_head *bh; 404 struct ext4_extent_header *eh; 405 406 block = idx_pblock(ix); 407 bh = sb_bread(inode->i_sb, block); 408 if (!bh) 409 return -EIO; 410 411 eh = (struct ext4_extent_header *)bh->b_data; 412 if (eh->eh_depth != 0) { 413 ix = EXT_FIRST_INDEX(eh); 414 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 415 retval = free_ext_idx(handle, inode, ix); 416 if (retval) 417 break; 418 } 419 } 420 put_bh(bh); 421 extend_credit_for_blkdel(handle, inode); 422 ext4_free_blocks(handle, inode, block, 1, 1); 423 return retval; 424 } 425 426 /* 427 * Free the extent meta data blocks only 428 */ 429 static int free_ext_block(handle_t *handle, struct inode *inode) 430 { 431 int i, retval = 0; 432 struct ext4_inode_info *ei = EXT4_I(inode); 433 struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; 434 struct ext4_extent_idx *ix; 435 if (eh->eh_depth == 0) 436 /* 437 * No extra blocks allocated for extent meta data 438 */ 439 return 0; 440 ix = EXT_FIRST_INDEX(eh); 441 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 442 retval = free_ext_idx(handle, inode, ix); 443 if (retval) 444 return retval; 445 } 446 return retval; 447 448 } 449 450 int ext4_ext_migrate(struct inode *inode) 451 { 452 handle_t *handle; 453 int retval = 0, i; 454 __le32 *i_data; 455 ext4_lblk_t blk_count = 0; 456 struct ext4_inode_info *ei; 457 struct inode *tmp_inode = NULL; 458 struct list_blocks_struct lb; 459 unsigned long max_entries; 460 461 if (!test_opt(inode->i_sb, EXTENTS)) 462 /* 463 * if mounted with noextents we don't allow the migrate 464 */ 465 return -EINVAL; 466 467 if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 468 return -EINVAL; 469 470 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 471 /* 472 * don't migrate fast symlink 473 */ 474 return retval; 475 476 handle = ext4_journal_start(inode, 477 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 478 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 479 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) 480 + 1); 481 if (IS_ERR(handle)) { 482 retval = PTR_ERR(handle); 483 goto err_out; 484 } 485 tmp_inode = ext4_new_inode(handle, 486 inode->i_sb->s_root->d_inode, 487 S_IFREG); 488 if (IS_ERR(tmp_inode)) { 489 retval = -ENOMEM; 490 ext4_journal_stop(handle); 491 tmp_inode = NULL; 492 goto err_out; 493 } 494 i_size_write(tmp_inode, i_size_read(inode)); 495 /* 496 * We don't want the inode to be reclaimed 497 * if we got interrupted in between. We have 498 * this tmp inode carrying reference to the 499 * data blocks of the original file. We set 500 * the i_nlink to zero at the last stage after 501 * switching the original file to extent format 502 */ 503 tmp_inode->i_nlink = 1; 504 505 ext4_ext_tree_init(handle, tmp_inode); 506 ext4_orphan_add(handle, tmp_inode); 507 ext4_journal_stop(handle); 508 509 /* 510 * start with one credit accounted for 511 * superblock modification. 512 * 513 * For the tmp_inode we already have commited the 514 * trascation that created the inode. Later as and 515 * when we add extents we extent the journal 516 */ 517 /* 518 * Even though we take i_mutex we can still cause block allocation 519 * via mmap write to holes. If we have allocated new blocks we fail 520 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. 521 * The flag is updated with i_data_sem held to prevent racing with 522 * block allocation. 523 */ 524 down_read((&EXT4_I(inode)->i_data_sem)); 525 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; 526 up_read((&EXT4_I(inode)->i_data_sem)); 527 528 handle = ext4_journal_start(inode, 1); 529 530 ei = EXT4_I(inode); 531 i_data = ei->i_data; 532 memset(&lb, 0, sizeof(lb)); 533 534 /* 32 bit block address 4 bytes */ 535 max_entries = inode->i_sb->s_blocksize >> 2; 536 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 537 if (i_data[i]) { 538 retval = update_extent_range(handle, tmp_inode, 539 le32_to_cpu(i_data[i]), 540 blk_count, &lb); 541 if (retval) 542 goto err_out; 543 } 544 } 545 if (i_data[EXT4_IND_BLOCK]) { 546 retval = update_ind_extent_range(handle, tmp_inode, 547 le32_to_cpu(i_data[EXT4_IND_BLOCK]), 548 &blk_count, &lb); 549 if (retval) 550 goto err_out; 551 } else 552 blk_count += max_entries; 553 if (i_data[EXT4_DIND_BLOCK]) { 554 retval = update_dind_extent_range(handle, tmp_inode, 555 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), 556 &blk_count, &lb); 557 if (retval) 558 goto err_out; 559 } else 560 blk_count += max_entries * max_entries; 561 if (i_data[EXT4_TIND_BLOCK]) { 562 retval = update_tind_extent_range(handle, tmp_inode, 563 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), 564 &blk_count, &lb); 565 if (retval) 566 goto err_out; 567 } 568 /* 569 * Build the last extent 570 */ 571 retval = finish_range(handle, tmp_inode, &lb); 572 err_out: 573 if (retval) 574 /* 575 * Failure case delete the extent information with the 576 * tmp_inode 577 */ 578 free_ext_block(handle, tmp_inode); 579 else { 580 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); 581 if (retval) 582 /* 583 * if we fail to swap inode data free the extent 584 * details of the tmp inode 585 */ 586 free_ext_block(handle, tmp_inode); 587 } 588 589 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 590 if (ext4_journal_extend(handle, 1) != 0) 591 ext4_journal_restart(handle, 1); 592 593 /* 594 * Mark the tmp_inode as of size zero 595 */ 596 i_size_write(tmp_inode, 0); 597 598 /* 599 * set the i_blocks count to zero 600 * so that the ext4_delete_inode does the 601 * right job 602 * 603 * We don't need to take the i_lock because 604 * the inode is not visible to user space. 605 */ 606 tmp_inode->i_blocks = 0; 607 608 /* Reset the extent details */ 609 ext4_ext_tree_init(handle, tmp_inode); 610 611 /* 612 * Set the i_nlink to zero so that 613 * generic_drop_inode really deletes the 614 * inode 615 */ 616 tmp_inode->i_nlink = 0; 617 618 ext4_journal_stop(handle); 619 620 if (tmp_inode) 621 iput(tmp_inode); 622 623 return retval; 624 } 625