1 /* 2 * Copyright IBM Corporation, 2007 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of version 2.1 of the GNU Lesser General Public License 7 * as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 * 13 */ 14 15 #include <linux/slab.h> 16 #include "ext4_jbd2.h" 17 18 /* 19 * The contiguous blocks details which can be 20 * represented by a single extent 21 */ 22 struct migrate_struct { 23 ext4_lblk_t first_block, last_block, curr_block; 24 ext4_fsblk_t first_pblock, last_pblock; 25 }; 26 27 static int finish_range(handle_t *handle, struct inode *inode, 28 struct migrate_struct *lb) 29 30 { 31 int retval = 0, needed; 32 struct ext4_extent newext; 33 struct ext4_ext_path *path; 34 if (lb->first_pblock == 0) 35 return 0; 36 37 /* Add the extent to temp inode*/ 38 newext.ee_block = cpu_to_le32(lb->first_block); 39 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 40 ext4_ext_store_pblock(&newext, lb->first_pblock); 41 path = ext4_ext_find_extent(inode, lb->first_block, NULL); 42 43 if (IS_ERR(path)) { 44 retval = PTR_ERR(path); 45 path = NULL; 46 goto err_out; 47 } 48 49 /* 50 * Calculate the credit needed to inserting this extent 51 * Since we are doing this in loop we may accumalate extra 52 * credit. But below we try to not accumalate too much 53 * of them by restarting the journal. 54 */ 55 needed = ext4_ext_calc_credits_for_single_extent(inode, 56 lb->last_block - lb->first_block + 1, path); 57 58 /* 59 * Make sure the credit we accumalated is not really high 60 */ 61 if (needed && ext4_handle_has_enough_credits(handle, 62 EXT4_RESERVE_TRANS_BLOCKS)) { 63 retval = ext4_journal_restart(handle, needed); 64 if (retval) 65 goto err_out; 66 } else if (needed) { 67 retval = ext4_journal_extend(handle, needed); 68 if (retval) { 69 /* 70 * IF not able to extend the journal restart the journal 71 */ 72 retval = ext4_journal_restart(handle, needed); 73 if (retval) 74 goto err_out; 75 } 76 } 77 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); 78 err_out: 79 if (path) { 80 ext4_ext_drop_refs(path); 81 kfree(path); 82 } 83 lb->first_pblock = 0; 84 return retval; 85 } 86 87 static int update_extent_range(handle_t *handle, struct inode *inode, 88 ext4_fsblk_t pblock, struct migrate_struct *lb) 89 { 90 int retval; 91 /* 92 * See if we can add on to the existing range (if it exists) 93 */ 94 if (lb->first_pblock && 95 (lb->last_pblock+1 == pblock) && 96 (lb->last_block+1 == lb->curr_block)) { 97 lb->last_pblock = pblock; 98 lb->last_block = lb->curr_block; 99 lb->curr_block++; 100 return 0; 101 } 102 /* 103 * Start a new range. 104 */ 105 retval = finish_range(handle, inode, lb); 106 lb->first_pblock = lb->last_pblock = pblock; 107 lb->first_block = lb->last_block = lb->curr_block; 108 lb->curr_block++; 109 return retval; 110 } 111 112 static int update_ind_extent_range(handle_t *handle, struct inode *inode, 113 ext4_fsblk_t pblock, 114 struct migrate_struct *lb) 115 { 116 struct buffer_head *bh; 117 __le32 *i_data; 118 int i, retval = 0; 119 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 120 121 bh = sb_bread(inode->i_sb, pblock); 122 if (!bh) 123 return -EIO; 124 125 i_data = (__le32 *)bh->b_data; 126 for (i = 0; i < max_entries; i++) { 127 if (i_data[i]) { 128 retval = update_extent_range(handle, inode, 129 le32_to_cpu(i_data[i]), lb); 130 if (retval) 131 break; 132 } else { 133 lb->curr_block++; 134 } 135 } 136 put_bh(bh); 137 return retval; 138 139 } 140 141 static int update_dind_extent_range(handle_t *handle, struct inode *inode, 142 ext4_fsblk_t pblock, 143 struct migrate_struct *lb) 144 { 145 struct buffer_head *bh; 146 __le32 *i_data; 147 int i, retval = 0; 148 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 149 150 bh = sb_bread(inode->i_sb, pblock); 151 if (!bh) 152 return -EIO; 153 154 i_data = (__le32 *)bh->b_data; 155 for (i = 0; i < max_entries; i++) { 156 if (i_data[i]) { 157 retval = update_ind_extent_range(handle, inode, 158 le32_to_cpu(i_data[i]), lb); 159 if (retval) 160 break; 161 } else { 162 /* Only update the file block number */ 163 lb->curr_block += max_entries; 164 } 165 } 166 put_bh(bh); 167 return retval; 168 169 } 170 171 static int update_tind_extent_range(handle_t *handle, struct inode *inode, 172 ext4_fsblk_t pblock, 173 struct migrate_struct *lb) 174 { 175 struct buffer_head *bh; 176 __le32 *i_data; 177 int i, retval = 0; 178 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 179 180 bh = sb_bread(inode->i_sb, pblock); 181 if (!bh) 182 return -EIO; 183 184 i_data = (__le32 *)bh->b_data; 185 for (i = 0; i < max_entries; i++) { 186 if (i_data[i]) { 187 retval = update_dind_extent_range(handle, inode, 188 le32_to_cpu(i_data[i]), lb); 189 if (retval) 190 break; 191 } else { 192 /* Only update the file block number */ 193 lb->curr_block += max_entries * max_entries; 194 } 195 } 196 put_bh(bh); 197 return retval; 198 199 } 200 201 static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) 202 { 203 int retval = 0, needed; 204 205 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) 206 return 0; 207 /* 208 * We are freeing a blocks. During this we touch 209 * superblock, group descriptor and block bitmap. 210 * So allocate a credit of 3. We may update 211 * quota (user and group). 212 */ 213 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 214 215 if (ext4_journal_extend(handle, needed) != 0) 216 retval = ext4_journal_restart(handle, needed); 217 218 return retval; 219 } 220 221 static int free_dind_blocks(handle_t *handle, 222 struct inode *inode, __le32 i_data) 223 { 224 int i; 225 __le32 *tmp_idata; 226 struct buffer_head *bh; 227 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 228 229 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 230 if (!bh) 231 return -EIO; 232 233 tmp_idata = (__le32 *)bh->b_data; 234 for (i = 0; i < max_entries; i++) { 235 if (tmp_idata[i]) { 236 extend_credit_for_blkdel(handle, inode); 237 ext4_free_blocks(handle, inode, NULL, 238 le32_to_cpu(tmp_idata[i]), 1, 239 EXT4_FREE_BLOCKS_METADATA | 240 EXT4_FREE_BLOCKS_FORGET); 241 } 242 } 243 put_bh(bh); 244 extend_credit_for_blkdel(handle, inode); 245 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, 246 EXT4_FREE_BLOCKS_METADATA | 247 EXT4_FREE_BLOCKS_FORGET); 248 return 0; 249 } 250 251 static int free_tind_blocks(handle_t *handle, 252 struct inode *inode, __le32 i_data) 253 { 254 int i, retval = 0; 255 __le32 *tmp_idata; 256 struct buffer_head *bh; 257 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 258 259 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 260 if (!bh) 261 return -EIO; 262 263 tmp_idata = (__le32 *)bh->b_data; 264 for (i = 0; i < max_entries; i++) { 265 if (tmp_idata[i]) { 266 retval = free_dind_blocks(handle, 267 inode, tmp_idata[i]); 268 if (retval) { 269 put_bh(bh); 270 return retval; 271 } 272 } 273 } 274 put_bh(bh); 275 extend_credit_for_blkdel(handle, inode); 276 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, 277 EXT4_FREE_BLOCKS_METADATA | 278 EXT4_FREE_BLOCKS_FORGET); 279 return 0; 280 } 281 282 static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) 283 { 284 int retval; 285 286 /* ei->i_data[EXT4_IND_BLOCK] */ 287 if (i_data[0]) { 288 extend_credit_for_blkdel(handle, inode); 289 ext4_free_blocks(handle, inode, NULL, 290 le32_to_cpu(i_data[0]), 1, 291 EXT4_FREE_BLOCKS_METADATA | 292 EXT4_FREE_BLOCKS_FORGET); 293 } 294 295 /* ei->i_data[EXT4_DIND_BLOCK] */ 296 if (i_data[1]) { 297 retval = free_dind_blocks(handle, inode, i_data[1]); 298 if (retval) 299 return retval; 300 } 301 302 /* ei->i_data[EXT4_TIND_BLOCK] */ 303 if (i_data[2]) { 304 retval = free_tind_blocks(handle, inode, i_data[2]); 305 if (retval) 306 return retval; 307 } 308 return 0; 309 } 310 311 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 312 struct inode *tmp_inode) 313 { 314 int retval; 315 __le32 i_data[3]; 316 struct ext4_inode_info *ei = EXT4_I(inode); 317 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 318 319 /* 320 * One credit accounted for writing the 321 * i_data field of the original inode 322 */ 323 retval = ext4_journal_extend(handle, 1); 324 if (retval) { 325 retval = ext4_journal_restart(handle, 1); 326 if (retval) 327 goto err_out; 328 } 329 330 i_data[0] = ei->i_data[EXT4_IND_BLOCK]; 331 i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; 332 i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; 333 334 down_write(&EXT4_I(inode)->i_data_sem); 335 /* 336 * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation 337 * happened after we started the migrate. We need to 338 * fail the migrate 339 */ 340 if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { 341 retval = -EAGAIN; 342 up_write(&EXT4_I(inode)->i_data_sem); 343 goto err_out; 344 } else 345 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 346 /* 347 * We have the extent map build with the tmp inode. 348 * Now copy the i_data across 349 */ 350 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 351 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 352 353 /* 354 * Update i_blocks with the new blocks that got 355 * allocated while adding extents for extent index 356 * blocks. 357 * 358 * While converting to extents we need not 359 * update the orignal inode i_blocks for extent blocks 360 * via quota APIs. The quota update happened via tmp_inode already. 361 */ 362 spin_lock(&inode->i_lock); 363 inode->i_blocks += tmp_inode->i_blocks; 364 spin_unlock(&inode->i_lock); 365 up_write(&EXT4_I(inode)->i_data_sem); 366 367 /* 368 * We mark the inode dirty after, because we decrement the 369 * i_blocks when freeing the indirect meta-data blocks 370 */ 371 retval = free_ind_block(handle, inode, i_data); 372 ext4_mark_inode_dirty(handle, inode); 373 374 err_out: 375 return retval; 376 } 377 378 static int free_ext_idx(handle_t *handle, struct inode *inode, 379 struct ext4_extent_idx *ix) 380 { 381 int i, retval = 0; 382 ext4_fsblk_t block; 383 struct buffer_head *bh; 384 struct ext4_extent_header *eh; 385 386 block = ext4_idx_pblock(ix); 387 bh = sb_bread(inode->i_sb, block); 388 if (!bh) 389 return -EIO; 390 391 eh = (struct ext4_extent_header *)bh->b_data; 392 if (eh->eh_depth != 0) { 393 ix = EXT_FIRST_INDEX(eh); 394 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 395 retval = free_ext_idx(handle, inode, ix); 396 if (retval) 397 break; 398 } 399 } 400 put_bh(bh); 401 extend_credit_for_blkdel(handle, inode); 402 ext4_free_blocks(handle, inode, NULL, block, 1, 403 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 404 return retval; 405 } 406 407 /* 408 * Free the extent meta data blocks only 409 */ 410 static int free_ext_block(handle_t *handle, struct inode *inode) 411 { 412 int i, retval = 0; 413 struct ext4_inode_info *ei = EXT4_I(inode); 414 struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; 415 struct ext4_extent_idx *ix; 416 if (eh->eh_depth == 0) 417 /* 418 * No extra blocks allocated for extent meta data 419 */ 420 return 0; 421 ix = EXT_FIRST_INDEX(eh); 422 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 423 retval = free_ext_idx(handle, inode, ix); 424 if (retval) 425 return retval; 426 } 427 return retval; 428 429 } 430 431 int ext4_ext_migrate(struct inode *inode) 432 { 433 handle_t *handle; 434 int retval = 0, i; 435 __le32 *i_data; 436 struct ext4_inode_info *ei; 437 struct inode *tmp_inode = NULL; 438 struct migrate_struct lb; 439 unsigned long max_entries; 440 __u32 goal; 441 uid_t owner[2]; 442 443 /* 444 * If the filesystem does not support extents, or the inode 445 * already is extent-based, error out. 446 */ 447 if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, 448 EXT4_FEATURE_INCOMPAT_EXTENTS) || 449 (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 450 return -EINVAL; 451 452 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 453 /* 454 * don't migrate fast symlink 455 */ 456 return retval; 457 458 handle = ext4_journal_start(inode, 459 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 460 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 461 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) 462 + 1); 463 if (IS_ERR(handle)) { 464 retval = PTR_ERR(handle); 465 return retval; 466 } 467 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 468 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 469 owner[0] = i_uid_read(inode); 470 owner[1] = i_gid_read(inode); 471 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 472 S_IFREG, NULL, goal, owner); 473 if (IS_ERR(tmp_inode)) { 474 retval = PTR_ERR(tmp_inode); 475 ext4_journal_stop(handle); 476 return retval; 477 } 478 i_size_write(tmp_inode, i_size_read(inode)); 479 /* 480 * Set the i_nlink to zero so it will be deleted later 481 * when we drop inode reference. 482 */ 483 clear_nlink(tmp_inode); 484 485 ext4_ext_tree_init(handle, tmp_inode); 486 ext4_orphan_add(handle, tmp_inode); 487 ext4_journal_stop(handle); 488 489 /* 490 * start with one credit accounted for 491 * superblock modification. 492 * 493 * For the tmp_inode we already have committed the 494 * trascation that created the inode. Later as and 495 * when we add extents we extent the journal 496 */ 497 /* 498 * Even though we take i_mutex we can still cause block 499 * allocation via mmap write to holes. If we have allocated 500 * new blocks we fail migrate. New block allocation will 501 * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated 502 * with i_data_sem held to prevent racing with block 503 * allocation. 504 */ 505 down_read((&EXT4_I(inode)->i_data_sem)); 506 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 507 up_read((&EXT4_I(inode)->i_data_sem)); 508 509 handle = ext4_journal_start(inode, 1); 510 if (IS_ERR(handle)) { 511 /* 512 * It is impossible to update on-disk structures without 513 * a handle, so just rollback in-core changes and live other 514 * work to orphan_list_cleanup() 515 */ 516 ext4_orphan_del(NULL, tmp_inode); 517 retval = PTR_ERR(handle); 518 goto out; 519 } 520 521 ei = EXT4_I(inode); 522 i_data = ei->i_data; 523 memset(&lb, 0, sizeof(lb)); 524 525 /* 32 bit block address 4 bytes */ 526 max_entries = inode->i_sb->s_blocksize >> 2; 527 for (i = 0; i < EXT4_NDIR_BLOCKS; i++) { 528 if (i_data[i]) { 529 retval = update_extent_range(handle, tmp_inode, 530 le32_to_cpu(i_data[i]), &lb); 531 if (retval) 532 goto err_out; 533 } else 534 lb.curr_block++; 535 } 536 if (i_data[EXT4_IND_BLOCK]) { 537 retval = update_ind_extent_range(handle, tmp_inode, 538 le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb); 539 if (retval) 540 goto err_out; 541 } else 542 lb.curr_block += max_entries; 543 if (i_data[EXT4_DIND_BLOCK]) { 544 retval = update_dind_extent_range(handle, tmp_inode, 545 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb); 546 if (retval) 547 goto err_out; 548 } else 549 lb.curr_block += max_entries * max_entries; 550 if (i_data[EXT4_TIND_BLOCK]) { 551 retval = update_tind_extent_range(handle, tmp_inode, 552 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb); 553 if (retval) 554 goto err_out; 555 } 556 /* 557 * Build the last extent 558 */ 559 retval = finish_range(handle, tmp_inode, &lb); 560 err_out: 561 if (retval) 562 /* 563 * Failure case delete the extent information with the 564 * tmp_inode 565 */ 566 free_ext_block(handle, tmp_inode); 567 else { 568 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); 569 if (retval) 570 /* 571 * if we fail to swap inode data free the extent 572 * details of the tmp inode 573 */ 574 free_ext_block(handle, tmp_inode); 575 } 576 577 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 578 if (ext4_journal_extend(handle, 1) != 0) 579 ext4_journal_restart(handle, 1); 580 581 /* 582 * Mark the tmp_inode as of size zero 583 */ 584 i_size_write(tmp_inode, 0); 585 586 /* 587 * set the i_blocks count to zero 588 * so that the ext4_delete_inode does the 589 * right job 590 * 591 * We don't need to take the i_lock because 592 * the inode is not visible to user space. 593 */ 594 tmp_inode->i_blocks = 0; 595 596 /* Reset the extent details */ 597 ext4_ext_tree_init(handle, tmp_inode); 598 ext4_journal_stop(handle); 599 out: 600 unlock_new_inode(tmp_inode); 601 iput(tmp_inode); 602 603 return retval; 604 } 605