1748de673SAkira Fujita /* 2748de673SAkira Fujita * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. 3748de673SAkira Fujita * Written by Takashi Sato <t-sato@yk.jp.nec.com> 4748de673SAkira Fujita * Akira Fujita <a-fujita@rs.jp.nec.com> 5748de673SAkira Fujita * 6748de673SAkira Fujita * This program is free software; you can redistribute it and/or modify it 7748de673SAkira Fujita * under the terms of version 2.1 of the GNU Lesser General Public License 8748de673SAkira Fujita * as published by the Free Software Foundation. 9748de673SAkira Fujita * 10748de673SAkira Fujita * This program is distributed in the hope that it will be useful, 11748de673SAkira Fujita * but WITHOUT ANY WARRANTY; without even the implied warranty of 12748de673SAkira Fujita * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13748de673SAkira Fujita * GNU General Public License for more details. 14748de673SAkira Fujita */ 15748de673SAkira Fujita 16748de673SAkira Fujita #include <linux/fs.h> 17748de673SAkira Fujita #include <linux/quotaops.h> 185a0e3ad6STejun Heo #include <linux/slab.h> 19748de673SAkira Fujita #include "ext4_jbd2.h" 20748de673SAkira Fujita #include "ext4.h" 214a092d73STheodore Ts'o #include "ext4_extents.h" 22748de673SAkira Fujita 23e8505970SAkira Fujita /** 24e8505970SAkira Fujita * get_ext_path - Find an extent path for designated logical block number. 25e8505970SAkira Fujita * 26e8505970SAkira Fujita * @inode: an inode which is searched 27e8505970SAkira Fujita * @lblock: logical block number to find an extent path 28e8505970SAkira Fujita * @path: pointer to an extent path pointer (for output) 29e8505970SAkira Fujita * 30e8505970SAkira Fujita * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value 31e8505970SAkira Fujita * on failure. 32e8505970SAkira Fujita */ 33e8505970SAkira Fujita static inline int 34e8505970SAkira Fujita get_ext_path(struct inode *inode, ext4_lblk_t lblock, 350e401101SDmitry Monakhov struct ext4_ext_path **orig_path) 36e8505970SAkira Fujita { 37e8505970SAkira Fujita int ret = 0; 380e401101SDmitry Monakhov struct ext4_ext_path *path; 39e8505970SAkira Fujita 40107a7bd3STheodore Ts'o path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); 410e401101SDmitry Monakhov if (IS_ERR(path)) 420e401101SDmitry Monakhov ret = PTR_ERR(path); 430e401101SDmitry Monakhov else if (path[ext_depth(inode)].p_ext == NULL) 44347fa6f1SAkira Fujita ret = -ENODATA; 450e401101SDmitry Monakhov else 460e401101SDmitry Monakhov *orig_path = path; 47347fa6f1SAkira Fujita 48e8505970SAkira Fujita return ret; 49e8505970SAkira Fujita } 50748de673SAkira Fujita 51748de673SAkira Fujita /** 52748de673SAkira Fujita * copy_extent_status - Copy the extent's initialization status 53748de673SAkira Fujita * 54748de673SAkira Fujita * @src: an extent for getting initialize status 55748de673SAkira Fujita * @dest: an extent to be set the status 56748de673SAkira Fujita */ 57748de673SAkira Fujita static void 58748de673SAkira Fujita copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) 59748de673SAkira Fujita { 60748de673SAkira Fujita if (ext4_ext_is_uninitialized(src)) 61748de673SAkira Fujita ext4_ext_mark_uninitialized(dest); 62748de673SAkira Fujita else 63748de673SAkira Fujita dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); 64748de673SAkira Fujita } 65748de673SAkira Fujita 66748de673SAkira Fujita /** 67748de673SAkira Fujita * mext_next_extent - Search for the next extent and set it to "extent" 68748de673SAkira Fujita * 69748de673SAkira Fujita * @inode: inode which is searched 70748de673SAkira Fujita * @path: this will obtain data for the next extent 71748de673SAkira Fujita * @extent: pointer to the next extent we have just gotten 72748de673SAkira Fujita * 73748de673SAkira Fujita * Search the next extent in the array of ext4_ext_path structure (@path) 74748de673SAkira Fujita * and set it to ext4_extent structure (@extent). In addition, the member of 75748de673SAkira Fujita * @path (->p_ext) also points the next extent. Return 0 on success, 1 if 76748de673SAkira Fujita * ext4_ext_path structure refers to the last extent, or a negative error 77748de673SAkira Fujita * value on failure. 78748de673SAkira Fujita */ 79748de673SAkira Fujita static int 80748de673SAkira Fujita mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 81748de673SAkira Fujita struct ext4_extent **extent) 82748de673SAkira Fujita { 83fc04cb49SAkira Fujita struct ext4_extent_header *eh; 84748de673SAkira Fujita int ppos, leaf_ppos = path->p_depth; 85748de673SAkira Fujita 86748de673SAkira Fujita ppos = leaf_ppos; 87748de673SAkira Fujita if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 88748de673SAkira Fujita /* leaf block */ 89748de673SAkira Fujita *extent = ++path[ppos].p_ext; 90bf89d16fSTheodore Ts'o path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); 91748de673SAkira Fujita return 0; 92748de673SAkira Fujita } 93748de673SAkira Fujita 94748de673SAkira Fujita while (--ppos >= 0) { 95748de673SAkira Fujita if (EXT_LAST_INDEX(path[ppos].p_hdr) > 96748de673SAkira Fujita path[ppos].p_idx) { 97748de673SAkira Fujita int cur_ppos = ppos; 98748de673SAkira Fujita 99748de673SAkira Fujita /* index block */ 100748de673SAkira Fujita path[ppos].p_idx++; 101bf89d16fSTheodore Ts'o path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); 102748de673SAkira Fujita if (path[ppos+1].p_bh) 103748de673SAkira Fujita brelse(path[ppos+1].p_bh); 104748de673SAkira Fujita path[ppos+1].p_bh = 105748de673SAkira Fujita sb_bread(inode->i_sb, path[ppos].p_block); 106748de673SAkira Fujita if (!path[ppos+1].p_bh) 107748de673SAkira Fujita return -EIO; 108748de673SAkira Fujita path[ppos+1].p_hdr = 109748de673SAkira Fujita ext_block_hdr(path[ppos+1].p_bh); 110748de673SAkira Fujita 111748de673SAkira Fujita /* Halfway index block */ 112748de673SAkira Fujita while (++cur_ppos < leaf_ppos) { 113748de673SAkira Fujita path[cur_ppos].p_idx = 114748de673SAkira Fujita EXT_FIRST_INDEX(path[cur_ppos].p_hdr); 115748de673SAkira Fujita path[cur_ppos].p_block = 116bf89d16fSTheodore Ts'o ext4_idx_pblock(path[cur_ppos].p_idx); 117748de673SAkira Fujita if (path[cur_ppos+1].p_bh) 118748de673SAkira Fujita brelse(path[cur_ppos+1].p_bh); 119748de673SAkira Fujita path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, 120748de673SAkira Fujita path[cur_ppos].p_block); 121748de673SAkira Fujita if (!path[cur_ppos+1].p_bh) 122748de673SAkira Fujita return -EIO; 123748de673SAkira Fujita path[cur_ppos+1].p_hdr = 124748de673SAkira Fujita ext_block_hdr(path[cur_ppos+1].p_bh); 125748de673SAkira Fujita } 126748de673SAkira Fujita 127fc04cb49SAkira Fujita path[leaf_ppos].p_ext = *extent = NULL; 128fc04cb49SAkira Fujita 129fc04cb49SAkira Fujita eh = path[leaf_ppos].p_hdr; 130fc04cb49SAkira Fujita if (le16_to_cpu(eh->eh_entries) == 0) 131fc04cb49SAkira Fujita /* empty leaf is found */ 132fc04cb49SAkira Fujita return -ENODATA; 133fc04cb49SAkira Fujita 134748de673SAkira Fujita /* leaf block */ 135748de673SAkira Fujita path[leaf_ppos].p_ext = *extent = 136748de673SAkira Fujita EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 137fc04cb49SAkira Fujita path[leaf_ppos].p_block = 138bf89d16fSTheodore Ts'o ext4_ext_pblock(path[leaf_ppos].p_ext); 139748de673SAkira Fujita return 0; 140748de673SAkira Fujita } 141748de673SAkira Fujita } 142748de673SAkira Fujita /* We found the last extent */ 143748de673SAkira Fujita return 1; 144748de673SAkira Fujita } 145748de673SAkira Fujita 146748de673SAkira Fujita /** 147393d1d1dSDr. Tilmann Bubeck * ext4_double_down_write_data_sem - Acquire two inodes' write lock 148393d1d1dSDr. Tilmann Bubeck * of i_data_sem 149748de673SAkira Fujita * 15003bd8b9bSDmitry Monakhov * Acquire write lock of i_data_sem of the two inodes 151748de673SAkira Fujita */ 152393d1d1dSDr. Tilmann Bubeck void 153393d1d1dSDr. Tilmann Bubeck ext4_double_down_write_data_sem(struct inode *first, struct inode *second) 154748de673SAkira Fujita { 15503bd8b9bSDmitry Monakhov if (first < second) { 156748de673SAkira Fujita down_write(&EXT4_I(first)->i_data_sem); 15749bd22bcSAkira Fujita down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); 15803bd8b9bSDmitry Monakhov } else { 15903bd8b9bSDmitry Monakhov down_write(&EXT4_I(second)->i_data_sem); 16003bd8b9bSDmitry Monakhov down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); 16103bd8b9bSDmitry Monakhov 16203bd8b9bSDmitry Monakhov } 163748de673SAkira Fujita } 164748de673SAkira Fujita 165748de673SAkira Fujita /** 166393d1d1dSDr. Tilmann Bubeck * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem 167748de673SAkira Fujita * 168748de673SAkira Fujita * @orig_inode: original inode structure to be released its lock first 169748de673SAkira Fujita * @donor_inode: donor inode structure to be released its lock second 170fc04cb49SAkira Fujita * Release write lock of i_data_sem of two inodes (orig and donor). 171748de673SAkira Fujita */ 172393d1d1dSDr. Tilmann Bubeck void 173393d1d1dSDr. Tilmann Bubeck ext4_double_up_write_data_sem(struct inode *orig_inode, 174393d1d1dSDr. Tilmann Bubeck struct inode *donor_inode) 175748de673SAkira Fujita { 176748de673SAkira Fujita up_write(&EXT4_I(orig_inode)->i_data_sem); 177748de673SAkira Fujita up_write(&EXT4_I(donor_inode)->i_data_sem); 178748de673SAkira Fujita } 179748de673SAkira Fujita 180748de673SAkira Fujita /** 181748de673SAkira Fujita * mext_insert_across_blocks - Insert extents across leaf block 182748de673SAkira Fujita * 183748de673SAkira Fujita * @handle: journal handle 184748de673SAkira Fujita * @orig_inode: original inode 185748de673SAkira Fujita * @o_start: first original extent to be changed 186748de673SAkira Fujita * @o_end: last original extent to be changed 187748de673SAkira Fujita * @start_ext: first new extent to be inserted 188748de673SAkira Fujita * @new_ext: middle of new extent to be inserted 189748de673SAkira Fujita * @end_ext: last new extent to be inserted 190748de673SAkira Fujita * 191748de673SAkira Fujita * Allocate a new leaf block and insert extents into it. Return 0 on success, 192748de673SAkira Fujita * or a negative error value on failure. 193748de673SAkira Fujita */ 194748de673SAkira Fujita static int 195748de673SAkira Fujita mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, 196748de673SAkira Fujita struct ext4_extent *o_start, struct ext4_extent *o_end, 197748de673SAkira Fujita struct ext4_extent *start_ext, struct ext4_extent *new_ext, 198748de673SAkira Fujita struct ext4_extent *end_ext) 199748de673SAkira Fujita { 200748de673SAkira Fujita struct ext4_ext_path *orig_path = NULL; 201748de673SAkira Fujita ext4_lblk_t eblock = 0; 202748de673SAkira Fujita int new_flag = 0; 203748de673SAkira Fujita int end_flag = 0; 204748de673SAkira Fujita int err = 0; 205748de673SAkira Fujita 206748de673SAkira Fujita if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) { 207748de673SAkira Fujita if (o_start == o_end) { 208748de673SAkira Fujita 209748de673SAkira Fujita /* start_ext new_ext end_ext 210748de673SAkira Fujita * donor |---------|-----------|--------| 211748de673SAkira Fujita * orig |------------------------------| 212748de673SAkira Fujita */ 213748de673SAkira Fujita end_flag = 1; 214748de673SAkira Fujita } else { 215748de673SAkira Fujita 216748de673SAkira Fujita /* start_ext new_ext end_ext 217748de673SAkira Fujita * donor |---------|----------|---------| 218748de673SAkira Fujita * orig |---------------|--------------| 219748de673SAkira Fujita */ 220748de673SAkira Fujita o_end->ee_block = end_ext->ee_block; 221748de673SAkira Fujita o_end->ee_len = end_ext->ee_len; 222bf89d16fSTheodore Ts'o ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); 223748de673SAkira Fujita } 224748de673SAkira Fujita 225748de673SAkira Fujita o_start->ee_len = start_ext->ee_len; 2265fd5249aSAkira Fujita eblock = le32_to_cpu(start_ext->ee_block); 227748de673SAkira Fujita new_flag = 1; 228748de673SAkira Fujita 229748de673SAkira Fujita } else if (start_ext->ee_len && new_ext->ee_len && 230748de673SAkira Fujita !end_ext->ee_len && o_start == o_end) { 231748de673SAkira Fujita 232748de673SAkira Fujita /* start_ext new_ext 233748de673SAkira Fujita * donor |--------------|---------------| 234748de673SAkira Fujita * orig |------------------------------| 235748de673SAkira Fujita */ 236748de673SAkira Fujita o_start->ee_len = start_ext->ee_len; 2375fd5249aSAkira Fujita eblock = le32_to_cpu(start_ext->ee_block); 238748de673SAkira Fujita new_flag = 1; 239748de673SAkira Fujita 240748de673SAkira Fujita } else if (!start_ext->ee_len && new_ext->ee_len && 241748de673SAkira Fujita end_ext->ee_len && o_start == o_end) { 242748de673SAkira Fujita 243748de673SAkira Fujita /* new_ext end_ext 244748de673SAkira Fujita * donor |--------------|---------------| 245748de673SAkira Fujita * orig |------------------------------| 246748de673SAkira Fujita */ 247748de673SAkira Fujita o_end->ee_block = end_ext->ee_block; 248748de673SAkira Fujita o_end->ee_len = end_ext->ee_len; 249bf89d16fSTheodore Ts'o ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); 250748de673SAkira Fujita 251748de673SAkira Fujita /* 252748de673SAkira Fujita * Set 0 to the extent block if new_ext was 253748de673SAkira Fujita * the first block. 254748de673SAkira Fujita */ 255748de673SAkira Fujita if (new_ext->ee_block) 256748de673SAkira Fujita eblock = le32_to_cpu(new_ext->ee_block); 257748de673SAkira Fujita 258748de673SAkira Fujita new_flag = 1; 259748de673SAkira Fujita } else { 260748de673SAkira Fujita ext4_debug("ext4 move extent: Unexpected insert case\n"); 261748de673SAkira Fujita return -EIO; 262748de673SAkira Fujita } 263748de673SAkira Fujita 264748de673SAkira Fujita if (new_flag) { 265e8505970SAkira Fujita err = get_ext_path(orig_inode, eblock, &orig_path); 266347fa6f1SAkira Fujita if (err) 267748de673SAkira Fujita goto out; 268748de673SAkira Fujita 269748de673SAkira Fujita if (ext4_ext_insert_extent(handle, orig_inode, 2700031462bSMingming Cao orig_path, new_ext, 0)) 271748de673SAkira Fujita goto out; 272748de673SAkira Fujita } 273748de673SAkira Fujita 274748de673SAkira Fujita if (end_flag) { 275e8505970SAkira Fujita err = get_ext_path(orig_inode, 276e8505970SAkira Fujita le32_to_cpu(end_ext->ee_block) - 1, &orig_path); 277347fa6f1SAkira Fujita if (err) 278748de673SAkira Fujita goto out; 279748de673SAkira Fujita 280748de673SAkira Fujita if (ext4_ext_insert_extent(handle, orig_inode, 2810031462bSMingming Cao orig_path, end_ext, 0)) 282748de673SAkira Fujita goto out; 283748de673SAkira Fujita } 284748de673SAkira Fujita out: 285748de673SAkira Fujita if (orig_path) { 286748de673SAkira Fujita ext4_ext_drop_refs(orig_path); 287748de673SAkira Fujita kfree(orig_path); 288748de673SAkira Fujita } 289748de673SAkira Fujita 290748de673SAkira Fujita return err; 291748de673SAkira Fujita 292748de673SAkira Fujita } 293748de673SAkira Fujita 294748de673SAkira Fujita /** 295748de673SAkira Fujita * mext_insert_inside_block - Insert new extent to the extent block 296748de673SAkira Fujita * 297748de673SAkira Fujita * @o_start: first original extent to be moved 298748de673SAkira Fujita * @o_end: last original extent to be moved 299748de673SAkira Fujita * @start_ext: first new extent to be inserted 300748de673SAkira Fujita * @new_ext: middle of new extent to be inserted 301748de673SAkira Fujita * @end_ext: last new extent to be inserted 302748de673SAkira Fujita * @eh: extent header of target leaf block 303748de673SAkira Fujita * @range_to_move: used to decide how to insert extent 304748de673SAkira Fujita * 305748de673SAkira Fujita * Insert extents into the leaf block. The extent (@o_start) is overwritten 306748de673SAkira Fujita * by inserted extents. 307748de673SAkira Fujita */ 308748de673SAkira Fujita static void 309748de673SAkira Fujita mext_insert_inside_block(struct ext4_extent *o_start, 310748de673SAkira Fujita struct ext4_extent *o_end, 311748de673SAkira Fujita struct ext4_extent *start_ext, 312748de673SAkira Fujita struct ext4_extent *new_ext, 313748de673SAkira Fujita struct ext4_extent *end_ext, 314748de673SAkira Fujita struct ext4_extent_header *eh, 315748de673SAkira Fujita int range_to_move) 316748de673SAkira Fujita { 317748de673SAkira Fujita int i = 0; 318748de673SAkira Fujita unsigned long len; 319748de673SAkira Fujita 320748de673SAkira Fujita /* Move the existing extents */ 321748de673SAkira Fujita if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) { 322748de673SAkira Fujita len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) - 323748de673SAkira Fujita (unsigned long)(o_end + 1); 324748de673SAkira Fujita memmove(o_end + 1 + range_to_move, o_end + 1, len); 325748de673SAkira Fujita } 326748de673SAkira Fujita 327748de673SAkira Fujita /* Insert start entry */ 328748de673SAkira Fujita if (start_ext->ee_len) 329748de673SAkira Fujita o_start[i++].ee_len = start_ext->ee_len; 330748de673SAkira Fujita 331748de673SAkira Fujita /* Insert new entry */ 332748de673SAkira Fujita if (new_ext->ee_len) { 333748de673SAkira Fujita o_start[i] = *new_ext; 334bf89d16fSTheodore Ts'o ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); 335748de673SAkira Fujita } 336748de673SAkira Fujita 337748de673SAkira Fujita /* Insert end entry */ 338748de673SAkira Fujita if (end_ext->ee_len) 339748de673SAkira Fujita o_start[i] = *end_ext; 340748de673SAkira Fujita 341748de673SAkira Fujita /* Increment the total entries counter on the extent block */ 342748de673SAkira Fujita le16_add_cpu(&eh->eh_entries, range_to_move); 343748de673SAkira Fujita } 344748de673SAkira Fujita 345748de673SAkira Fujita /** 346748de673SAkira Fujita * mext_insert_extents - Insert new extent 347748de673SAkira Fujita * 348748de673SAkira Fujita * @handle: journal handle 349748de673SAkira Fujita * @orig_inode: original inode 350748de673SAkira Fujita * @orig_path: path indicates first extent to be changed 351748de673SAkira Fujita * @o_start: first original extent to be changed 352748de673SAkira Fujita * @o_end: last original extent to be changed 353748de673SAkira Fujita * @start_ext: first new extent to be inserted 354748de673SAkira Fujita * @new_ext: middle of new extent to be inserted 355748de673SAkira Fujita * @end_ext: last new extent to be inserted 356748de673SAkira Fujita * 357748de673SAkira Fujita * Call the function to insert extents. If we cannot add more extents into 358748de673SAkira Fujita * the leaf block, we call mext_insert_across_blocks() to create a 359748de673SAkira Fujita * new leaf block. Otherwise call mext_insert_inside_block(). Return 0 360748de673SAkira Fujita * on success, or a negative error value on failure. 361748de673SAkira Fujita */ 362748de673SAkira Fujita static int 363748de673SAkira Fujita mext_insert_extents(handle_t *handle, struct inode *orig_inode, 364748de673SAkira Fujita struct ext4_ext_path *orig_path, 365748de673SAkira Fujita struct ext4_extent *o_start, 366748de673SAkira Fujita struct ext4_extent *o_end, 367748de673SAkira Fujita struct ext4_extent *start_ext, 368748de673SAkira Fujita struct ext4_extent *new_ext, 369748de673SAkira Fujita struct ext4_extent *end_ext) 370748de673SAkira Fujita { 371748de673SAkira Fujita struct ext4_extent_header *eh; 372748de673SAkira Fujita unsigned long need_slots, slots_range; 373748de673SAkira Fujita int range_to_move, depth, ret; 374748de673SAkira Fujita 375748de673SAkira Fujita /* 376748de673SAkira Fujita * The extents need to be inserted 377748de673SAkira Fujita * start_extent + new_extent + end_extent. 378748de673SAkira Fujita */ 379748de673SAkira Fujita need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) + 380748de673SAkira Fujita (new_ext->ee_len ? 1 : 0); 381748de673SAkira Fujita 382748de673SAkira Fujita /* The number of slots between start and end */ 383748de673SAkira Fujita slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1) 384748de673SAkira Fujita / sizeof(struct ext4_extent); 385748de673SAkira Fujita 386748de673SAkira Fujita /* Range to move the end of extent */ 387748de673SAkira Fujita range_to_move = need_slots - slots_range; 388748de673SAkira Fujita depth = orig_path->p_depth; 389748de673SAkira Fujita orig_path += depth; 390748de673SAkira Fujita eh = orig_path->p_hdr; 391748de673SAkira Fujita 392748de673SAkira Fujita if (depth) { 393748de673SAkira Fujita /* Register to journal */ 394748de673SAkira Fujita ret = ext4_journal_get_write_access(handle, orig_path->p_bh); 395748de673SAkira Fujita if (ret) 396748de673SAkira Fujita return ret; 397748de673SAkira Fujita } 398748de673SAkira Fujita 399748de673SAkira Fujita /* Expansion */ 400748de673SAkira Fujita if (range_to_move > 0 && 401748de673SAkira Fujita (range_to_move > le16_to_cpu(eh->eh_max) 402748de673SAkira Fujita - le16_to_cpu(eh->eh_entries))) { 403748de673SAkira Fujita 404748de673SAkira Fujita ret = mext_insert_across_blocks(handle, orig_inode, o_start, 405748de673SAkira Fujita o_end, start_ext, new_ext, end_ext); 406748de673SAkira Fujita if (ret < 0) 407748de673SAkira Fujita return ret; 408748de673SAkira Fujita } else 409748de673SAkira Fujita mext_insert_inside_block(o_start, o_end, start_ext, new_ext, 410748de673SAkira Fujita end_ext, eh, range_to_move); 411748de673SAkira Fujita 4122656497bSDarrick J. Wong return ext4_ext_dirty(handle, orig_inode, orig_path); 413748de673SAkira Fujita } 414748de673SAkira Fujita 415748de673SAkira Fujita /** 416748de673SAkira Fujita * mext_leaf_block - Move one leaf extent block into the inode. 417748de673SAkira Fujita * 418748de673SAkira Fujita * @handle: journal handle 419748de673SAkira Fujita * @orig_inode: original inode 420748de673SAkira Fujita * @orig_path: path indicates first extent to be changed 421748de673SAkira Fujita * @dext: donor extent 422748de673SAkira Fujita * @from: start offset on the target file 423748de673SAkira Fujita * 424748de673SAkira Fujita * In order to insert extents into the leaf block, we must divide the extent 425748de673SAkira Fujita * in the leaf block into three extents. The one is located to be inserted 426748de673SAkira Fujita * extents, and the others are located around it. 427748de673SAkira Fujita * 428748de673SAkira Fujita * Therefore, this function creates structures to save extents of the leaf 429748de673SAkira Fujita * block, and inserts extents by calling mext_insert_extents() with 430748de673SAkira Fujita * created extents. Return 0 on success, or a negative error value on failure. 431748de673SAkira Fujita */ 432748de673SAkira Fujita static int 433748de673SAkira Fujita mext_leaf_block(handle_t *handle, struct inode *orig_inode, 434748de673SAkira Fujita struct ext4_ext_path *orig_path, struct ext4_extent *dext, 435748de673SAkira Fujita ext4_lblk_t *from) 436748de673SAkira Fujita { 437748de673SAkira Fujita struct ext4_extent *oext, *o_start, *o_end, *prev_ext; 438748de673SAkira Fujita struct ext4_extent new_ext, start_ext, end_ext; 439748de673SAkira Fujita ext4_lblk_t new_ext_end; 440748de673SAkira Fujita int oext_alen, new_ext_alen, end_ext_alen; 441748de673SAkira Fujita int depth = ext_depth(orig_inode); 442748de673SAkira Fujita int ret; 443748de673SAkira Fujita 444c26d0badSSteven Liu start_ext.ee_block = end_ext.ee_block = 0; 445748de673SAkira Fujita o_start = o_end = oext = orig_path[depth].p_ext; 446748de673SAkira Fujita oext_alen = ext4_ext_get_actual_len(oext); 447748de673SAkira Fujita start_ext.ee_len = end_ext.ee_len = 0; 448748de673SAkira Fujita 449748de673SAkira Fujita new_ext.ee_block = cpu_to_le32(*from); 450bf89d16fSTheodore Ts'o ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); 451748de673SAkira Fujita new_ext.ee_len = dext->ee_len; 452748de673SAkira Fujita new_ext_alen = ext4_ext_get_actual_len(&new_ext); 453748de673SAkira Fujita new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; 454748de673SAkira Fujita 455748de673SAkira Fujita /* 456748de673SAkira Fujita * Case: original extent is first 457748de673SAkira Fujita * oext |--------| 458748de673SAkira Fujita * new_ext |--| 459748de673SAkira Fujita * start_ext |--| 460748de673SAkira Fujita */ 461748de673SAkira Fujita if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) && 462748de673SAkira Fujita le32_to_cpu(new_ext.ee_block) < 463748de673SAkira Fujita le32_to_cpu(oext->ee_block) + oext_alen) { 464748de673SAkira Fujita start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - 465748de673SAkira Fujita le32_to_cpu(oext->ee_block)); 4665fd5249aSAkira Fujita start_ext.ee_block = oext->ee_block; 467748de673SAkira Fujita copy_extent_status(oext, &start_ext); 468748de673SAkira Fujita } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { 469748de673SAkira Fujita prev_ext = oext - 1; 470748de673SAkira Fujita /* 471748de673SAkira Fujita * We can merge new_ext into previous extent, 472748de673SAkira Fujita * if these are contiguous and same extent type. 473748de673SAkira Fujita */ 474748de673SAkira Fujita if (ext4_can_extents_be_merged(orig_inode, prev_ext, 475748de673SAkira Fujita &new_ext)) { 476748de673SAkira Fujita o_start = prev_ext; 477748de673SAkira Fujita start_ext.ee_len = cpu_to_le16( 478748de673SAkira Fujita ext4_ext_get_actual_len(prev_ext) + 479748de673SAkira Fujita new_ext_alen); 4805fd5249aSAkira Fujita start_ext.ee_block = oext->ee_block; 481748de673SAkira Fujita copy_extent_status(prev_ext, &start_ext); 482748de673SAkira Fujita new_ext.ee_len = 0; 483748de673SAkira Fujita } 484748de673SAkira Fujita } 485748de673SAkira Fujita 486748de673SAkira Fujita /* 487748de673SAkira Fujita * Case: new_ext_end must be less than oext 488748de673SAkira Fujita * oext |-----------| 489748de673SAkira Fujita * new_ext |-------| 490748de673SAkira Fujita */ 4912147b1a6SAkira Fujita if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { 49224676da4STheodore Ts'o EXT4_ERROR_INODE(orig_inode, 4932147b1a6SAkira Fujita "new_ext_end(%u) should be less than or equal to " 4942147b1a6SAkira Fujita "oext->ee_block(%u) + oext_alen(%d) - 1", 4952147b1a6SAkira Fujita new_ext_end, le32_to_cpu(oext->ee_block), 4962147b1a6SAkira Fujita oext_alen); 4972147b1a6SAkira Fujita ret = -EIO; 4982147b1a6SAkira Fujita goto out; 4992147b1a6SAkira Fujita } 500748de673SAkira Fujita 501748de673SAkira Fujita /* 502748de673SAkira Fujita * Case: new_ext is smaller than original extent 503748de673SAkira Fujita * oext |---------------| 504748de673SAkira Fujita * new_ext |-----------| 505748de673SAkira Fujita * end_ext |---| 506748de673SAkira Fujita */ 507748de673SAkira Fujita if (le32_to_cpu(oext->ee_block) <= new_ext_end && 508748de673SAkira Fujita new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) { 509748de673SAkira Fujita end_ext.ee_len = 510748de673SAkira Fujita cpu_to_le16(le32_to_cpu(oext->ee_block) + 511748de673SAkira Fujita oext_alen - 1 - new_ext_end); 512748de673SAkira Fujita copy_extent_status(oext, &end_ext); 513748de673SAkira Fujita end_ext_alen = ext4_ext_get_actual_len(&end_ext); 514748de673SAkira Fujita ext4_ext_store_pblock(&end_ext, 515bf89d16fSTheodore Ts'o (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); 516748de673SAkira Fujita end_ext.ee_block = 517748de673SAkira Fujita cpu_to_le32(le32_to_cpu(o_end->ee_block) + 518748de673SAkira Fujita oext_alen - end_ext_alen); 519748de673SAkira Fujita } 520748de673SAkira Fujita 521748de673SAkira Fujita ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, 522748de673SAkira Fujita o_end, &start_ext, &new_ext, &end_ext); 5232147b1a6SAkira Fujita out: 524748de673SAkira Fujita return ret; 525748de673SAkira Fujita } 526748de673SAkira Fujita 527748de673SAkira Fujita /** 528748de673SAkira Fujita * mext_calc_swap_extents - Calculate extents for extent swapping. 529748de673SAkira Fujita * 530748de673SAkira Fujita * @tmp_dext: the extent that will belong to the original inode 531748de673SAkira Fujita * @tmp_oext: the extent that will belong to the donor inode 532748de673SAkira Fujita * @orig_off: block offset of original inode 533748de673SAkira Fujita * @donor_off: block offset of donor inode 53492c28159SAkira Fujita * @max_count: the maximum length of extents 535c40ce3c9SAkira Fujita * 536c40ce3c9SAkira Fujita * Return 0 on success, or a negative error value on failure. 537748de673SAkira Fujita */ 538c40ce3c9SAkira Fujita static int 539748de673SAkira Fujita mext_calc_swap_extents(struct ext4_extent *tmp_dext, 540748de673SAkira Fujita struct ext4_extent *tmp_oext, 541748de673SAkira Fujita ext4_lblk_t orig_off, ext4_lblk_t donor_off, 542748de673SAkira Fujita ext4_lblk_t max_count) 543748de673SAkira Fujita { 544748de673SAkira Fujita ext4_lblk_t diff, orig_diff; 545748de673SAkira Fujita struct ext4_extent dext_old, oext_old; 546748de673SAkira Fujita 547c40ce3c9SAkira Fujita BUG_ON(orig_off != donor_off); 548c40ce3c9SAkira Fujita 549c40ce3c9SAkira Fujita /* original and donor extents have to cover the same block offset */ 550c40ce3c9SAkira Fujita if (orig_off < le32_to_cpu(tmp_oext->ee_block) || 551c40ce3c9SAkira Fujita le32_to_cpu(tmp_oext->ee_block) + 552c40ce3c9SAkira Fujita ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) 553c40ce3c9SAkira Fujita return -ENODATA; 554c40ce3c9SAkira Fujita 555c40ce3c9SAkira Fujita if (orig_off < le32_to_cpu(tmp_dext->ee_block) || 556c40ce3c9SAkira Fujita le32_to_cpu(tmp_dext->ee_block) + 557c40ce3c9SAkira Fujita ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) 558c40ce3c9SAkira Fujita return -ENODATA; 559c40ce3c9SAkira Fujita 560748de673SAkira Fujita dext_old = *tmp_dext; 561748de673SAkira Fujita oext_old = *tmp_oext; 562748de673SAkira Fujita 563748de673SAkira Fujita /* When tmp_dext is too large, pick up the target range. */ 564748de673SAkira Fujita diff = donor_off - le32_to_cpu(tmp_dext->ee_block); 565748de673SAkira Fujita 566bf89d16fSTheodore Ts'o ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); 567ba39ebb6SWei Yongjun le32_add_cpu(&tmp_dext->ee_block, diff); 568ba39ebb6SWei Yongjun le16_add_cpu(&tmp_dext->ee_len, -diff); 569748de673SAkira Fujita 570748de673SAkira Fujita if (max_count < ext4_ext_get_actual_len(tmp_dext)) 571748de673SAkira Fujita tmp_dext->ee_len = cpu_to_le16(max_count); 572748de673SAkira Fujita 573748de673SAkira Fujita orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); 574bf89d16fSTheodore Ts'o ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); 575748de673SAkira Fujita 576748de673SAkira Fujita /* Adjust extent length if donor extent is larger than orig */ 577748de673SAkira Fujita if (ext4_ext_get_actual_len(tmp_dext) > 578748de673SAkira Fujita ext4_ext_get_actual_len(tmp_oext) - orig_diff) 579748de673SAkira Fujita tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) - 580748de673SAkira Fujita orig_diff); 581748de673SAkira Fujita 582748de673SAkira Fujita tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext)); 583748de673SAkira Fujita 584748de673SAkira Fujita copy_extent_status(&oext_old, tmp_dext); 585748de673SAkira Fujita copy_extent_status(&dext_old, tmp_oext); 586c40ce3c9SAkira Fujita 587c40ce3c9SAkira Fujita return 0; 588748de673SAkira Fujita } 589748de673SAkira Fujita 590748de673SAkira Fujita /** 5918c854473SDmitry Monakhov * mext_check_coverage - Check that all extents in range has the same type 5928c854473SDmitry Monakhov * 5938c854473SDmitry Monakhov * @inode: inode in question 5948c854473SDmitry Monakhov * @from: block offset of inode 5958c854473SDmitry Monakhov * @count: block count to be checked 5968c854473SDmitry Monakhov * @uninit: extents expected to be uninitialized 5978c854473SDmitry Monakhov * @err: pointer to save error value 5988c854473SDmitry Monakhov * 5998c854473SDmitry Monakhov * Return 1 if all extents in range has expected type, and zero otherwise. 6008c854473SDmitry Monakhov */ 6018c854473SDmitry Monakhov static int 6028c854473SDmitry Monakhov mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, 6038c854473SDmitry Monakhov int uninit, int *err) 6048c854473SDmitry Monakhov { 6058c854473SDmitry Monakhov struct ext4_ext_path *path = NULL; 6068c854473SDmitry Monakhov struct ext4_extent *ext; 6070e401101SDmitry Monakhov int ret = 0; 6088c854473SDmitry Monakhov ext4_lblk_t last = from + count; 6098c854473SDmitry Monakhov while (from < last) { 6108c854473SDmitry Monakhov *err = get_ext_path(inode, from, &path); 6118c854473SDmitry Monakhov if (*err) 6120e401101SDmitry Monakhov goto out; 6138c854473SDmitry Monakhov ext = path[ext_depth(inode)].p_ext; 6140e401101SDmitry Monakhov if (uninit != ext4_ext_is_uninitialized(ext)) 6150e401101SDmitry Monakhov goto out; 6168c854473SDmitry Monakhov from += ext4_ext_get_actual_len(ext); 6178c854473SDmitry Monakhov ext4_ext_drop_refs(path); 6188c854473SDmitry Monakhov } 6190e401101SDmitry Monakhov ret = 1; 6200e401101SDmitry Monakhov out: 6210e401101SDmitry Monakhov if (path) { 6220e401101SDmitry Monakhov ext4_ext_drop_refs(path); 6230e401101SDmitry Monakhov kfree(path); 6240e401101SDmitry Monakhov } 6250e401101SDmitry Monakhov return ret; 6268c854473SDmitry Monakhov } 6278c854473SDmitry Monakhov 6288c854473SDmitry Monakhov /** 629748de673SAkira Fujita * mext_replace_branches - Replace original extents with new extents 630748de673SAkira Fujita * 631748de673SAkira Fujita * @handle: journal handle 632748de673SAkira Fujita * @orig_inode: original inode 633748de673SAkira Fujita * @donor_inode: donor inode 634748de673SAkira Fujita * @from: block offset of orig_inode 635748de673SAkira Fujita * @count: block count to be replaced 636f868a48dSAkira Fujita * @err: pointer to save return value 637748de673SAkira Fujita * 638748de673SAkira Fujita * Replace original inode extents and donor inode extents page by page. 639748de673SAkira Fujita * We implement this replacement in the following three steps: 640748de673SAkira Fujita * 1. Save the block information of original and donor inodes into 641748de673SAkira Fujita * dummy extents. 642748de673SAkira Fujita * 2. Change the block information of original inode to point at the 643748de673SAkira Fujita * donor inode blocks. 644748de673SAkira Fujita * 3. Change the block information of donor inode to point at the saved 645748de673SAkira Fujita * original inode blocks in the dummy extents. 646748de673SAkira Fujita * 647f868a48dSAkira Fujita * Return replaced block count. 648748de673SAkira Fujita */ 649748de673SAkira Fujita static int 650748de673SAkira Fujita mext_replace_branches(handle_t *handle, struct inode *orig_inode, 651748de673SAkira Fujita struct inode *donor_inode, ext4_lblk_t from, 652f868a48dSAkira Fujita ext4_lblk_t count, int *err) 653748de673SAkira Fujita { 654748de673SAkira Fujita struct ext4_ext_path *orig_path = NULL; 655748de673SAkira Fujita struct ext4_ext_path *donor_path = NULL; 656748de673SAkira Fujita struct ext4_extent *oext, *dext; 657748de673SAkira Fujita struct ext4_extent tmp_dext, tmp_oext; 658748de673SAkira Fujita ext4_lblk_t orig_off = from, donor_off = from; 659748de673SAkira Fujita int depth; 660748de673SAkira Fujita int replaced_count = 0; 661748de673SAkira Fujita int dext_alen; 662748de673SAkira Fujita 6636ca470d7SDmitry Monakhov *err = ext4_es_remove_extent(orig_inode, from, count); 6646ca470d7SDmitry Monakhov if (*err) 6656ca470d7SDmitry Monakhov goto out; 6666ca470d7SDmitry Monakhov 6676ca470d7SDmitry Monakhov *err = ext4_es_remove_extent(donor_inode, from, count); 6686ca470d7SDmitry Monakhov if (*err) 6696ca470d7SDmitry Monakhov goto out; 6706ca470d7SDmitry Monakhov 671748de673SAkira Fujita /* Get the original extent for the block "orig_off" */ 672f868a48dSAkira Fujita *err = get_ext_path(orig_inode, orig_off, &orig_path); 673f868a48dSAkira Fujita if (*err) 674748de673SAkira Fujita goto out; 675748de673SAkira Fujita 676748de673SAkira Fujita /* Get the donor extent for the head */ 677f868a48dSAkira Fujita *err = get_ext_path(donor_inode, donor_off, &donor_path); 678f868a48dSAkira Fujita if (*err) 679748de673SAkira Fujita goto out; 680748de673SAkira Fujita depth = ext_depth(orig_inode); 681748de673SAkira Fujita oext = orig_path[depth].p_ext; 682748de673SAkira Fujita tmp_oext = *oext; 683748de673SAkira Fujita 684748de673SAkira Fujita depth = ext_depth(donor_inode); 685748de673SAkira Fujita dext = donor_path[depth].p_ext; 68687e69873SAkria Fujita if (unlikely(!dext)) 68787e69873SAkria Fujita goto missing_donor_extent; 688748de673SAkira Fujita tmp_dext = *dext; 689748de673SAkira Fujita 690f868a48dSAkira Fujita *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 691748de673SAkira Fujita donor_off, count); 692f868a48dSAkira Fujita if (*err) 693c40ce3c9SAkira Fujita goto out; 694748de673SAkira Fujita 695748de673SAkira Fujita /* Loop for the donor extents */ 696748de673SAkira Fujita while (1) { 697748de673SAkira Fujita /* The extent for donor must be found. */ 69887e69873SAkria Fujita if (unlikely(!dext)) { 69987e69873SAkria Fujita missing_donor_extent: 70024676da4STheodore Ts'o EXT4_ERROR_INODE(donor_inode, 7012147b1a6SAkira Fujita "The extent for donor must be found"); 702f868a48dSAkira Fujita *err = -EIO; 7032147b1a6SAkira Fujita goto out; 7042147b1a6SAkira Fujita } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 70524676da4STheodore Ts'o EXT4_ERROR_INODE(donor_inode, 7062147b1a6SAkira Fujita "Donor offset(%u) and the first block of donor " 7072147b1a6SAkira Fujita "extent(%u) should be equal", 7082147b1a6SAkira Fujita donor_off, 7092147b1a6SAkira Fujita le32_to_cpu(tmp_dext.ee_block)); 710f868a48dSAkira Fujita *err = -EIO; 7112147b1a6SAkira Fujita goto out; 7122147b1a6SAkira Fujita } 713748de673SAkira Fujita 714748de673SAkira Fujita /* Set donor extent to orig extent */ 715f868a48dSAkira Fujita *err = mext_leaf_block(handle, orig_inode, 716748de673SAkira Fujita orig_path, &tmp_dext, &orig_off); 717f868a48dSAkira Fujita if (*err) 718748de673SAkira Fujita goto out; 719748de673SAkira Fujita 720748de673SAkira Fujita /* Set orig extent to donor extent */ 721f868a48dSAkira Fujita *err = mext_leaf_block(handle, donor_inode, 722748de673SAkira Fujita donor_path, &tmp_oext, &donor_off); 723f868a48dSAkira Fujita if (*err) 724748de673SAkira Fujita goto out; 725748de673SAkira Fujita 726748de673SAkira Fujita dext_alen = ext4_ext_get_actual_len(&tmp_dext); 727748de673SAkira Fujita replaced_count += dext_alen; 728748de673SAkira Fujita donor_off += dext_alen; 729748de673SAkira Fujita orig_off += dext_alen; 730748de673SAkira Fujita 7317e8b12c6SDmitry Monakhov BUG_ON(replaced_count > count); 732748de673SAkira Fujita /* Already moved the expected blocks */ 733748de673SAkira Fujita if (replaced_count >= count) 734748de673SAkira Fujita break; 735748de673SAkira Fujita 736748de673SAkira Fujita if (orig_path) 737748de673SAkira Fujita ext4_ext_drop_refs(orig_path); 738f868a48dSAkira Fujita *err = get_ext_path(orig_inode, orig_off, &orig_path); 739f868a48dSAkira Fujita if (*err) 740748de673SAkira Fujita goto out; 741748de673SAkira Fujita depth = ext_depth(orig_inode); 742748de673SAkira Fujita oext = orig_path[depth].p_ext; 743748de673SAkira Fujita tmp_oext = *oext; 744748de673SAkira Fujita 745748de673SAkira Fujita if (donor_path) 746748de673SAkira Fujita ext4_ext_drop_refs(donor_path); 747f868a48dSAkira Fujita *err = get_ext_path(donor_inode, donor_off, &donor_path); 748f868a48dSAkira Fujita if (*err) 749748de673SAkira Fujita goto out; 750748de673SAkira Fujita depth = ext_depth(donor_inode); 751748de673SAkira Fujita dext = donor_path[depth].p_ext; 752748de673SAkira Fujita tmp_dext = *dext; 753748de673SAkira Fujita 754f868a48dSAkira Fujita *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 755c40ce3c9SAkira Fujita donor_off, count - replaced_count); 756f868a48dSAkira Fujita if (*err) 757c40ce3c9SAkira Fujita goto out; 758748de673SAkira Fujita } 759748de673SAkira Fujita 760748de673SAkira Fujita out: 761748de673SAkira Fujita if (orig_path) { 762748de673SAkira Fujita ext4_ext_drop_refs(orig_path); 763748de673SAkira Fujita kfree(orig_path); 764748de673SAkira Fujita } 765748de673SAkira Fujita if (donor_path) { 766748de673SAkira Fujita ext4_ext_drop_refs(donor_path); 767748de673SAkira Fujita kfree(donor_path); 768748de673SAkira Fujita } 769748de673SAkira Fujita 770f868a48dSAkira Fujita return replaced_count; 771748de673SAkira Fujita } 772748de673SAkira Fujita 773748de673SAkira Fujita /** 774bb557488SDmitry Monakhov * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 775bb557488SDmitry Monakhov * 776bb557488SDmitry Monakhov * @inode1: the inode structure 777bb557488SDmitry Monakhov * @inode2: the inode structure 778bb557488SDmitry Monakhov * @index: page index 779bb557488SDmitry Monakhov * @page: result page vector 780bb557488SDmitry Monakhov * 781bb557488SDmitry Monakhov * Grab two locked pages for inode's by inode order 782bb557488SDmitry Monakhov */ 783bb557488SDmitry Monakhov static int 784bb557488SDmitry Monakhov mext_page_double_lock(struct inode *inode1, struct inode *inode2, 785bb557488SDmitry Monakhov pgoff_t index, struct page *page[2]) 786bb557488SDmitry Monakhov { 787bb557488SDmitry Monakhov struct address_space *mapping[2]; 788bb557488SDmitry Monakhov unsigned fl = AOP_FLAG_NOFS; 789bb557488SDmitry Monakhov 790bb557488SDmitry Monakhov BUG_ON(!inode1 || !inode2); 791bb557488SDmitry Monakhov if (inode1 < inode2) { 792bb557488SDmitry Monakhov mapping[0] = inode1->i_mapping; 793bb557488SDmitry Monakhov mapping[1] = inode2->i_mapping; 794bb557488SDmitry Monakhov } else { 795bb557488SDmitry Monakhov mapping[0] = inode2->i_mapping; 796bb557488SDmitry Monakhov mapping[1] = inode1->i_mapping; 797bb557488SDmitry Monakhov } 798bb557488SDmitry Monakhov 799bb557488SDmitry Monakhov page[0] = grab_cache_page_write_begin(mapping[0], index, fl); 800bb557488SDmitry Monakhov if (!page[0]) 801bb557488SDmitry Monakhov return -ENOMEM; 802bb557488SDmitry Monakhov 803bb557488SDmitry Monakhov page[1] = grab_cache_page_write_begin(mapping[1], index, fl); 804bb557488SDmitry Monakhov if (!page[1]) { 805bb557488SDmitry Monakhov unlock_page(page[0]); 806bb557488SDmitry Monakhov page_cache_release(page[0]); 807bb557488SDmitry Monakhov return -ENOMEM; 808bb557488SDmitry Monakhov } 8097e8b12c6SDmitry Monakhov /* 8107e8b12c6SDmitry Monakhov * grab_cache_page_write_begin() may not wait on page's writeback if 8117e8b12c6SDmitry Monakhov * BDI not demand that. But it is reasonable to be very conservative 8127e8b12c6SDmitry Monakhov * here and explicitly wait on page's writeback 8137e8b12c6SDmitry Monakhov */ 8147e8b12c6SDmitry Monakhov wait_on_page_writeback(page[0]); 8157e8b12c6SDmitry Monakhov wait_on_page_writeback(page[1]); 816bb557488SDmitry Monakhov if (inode1 > inode2) { 817bb557488SDmitry Monakhov struct page *tmp; 818bb557488SDmitry Monakhov tmp = page[0]; 819bb557488SDmitry Monakhov page[0] = page[1]; 820bb557488SDmitry Monakhov page[1] = tmp; 821bb557488SDmitry Monakhov } 822bb557488SDmitry Monakhov return 0; 823bb557488SDmitry Monakhov } 824bb557488SDmitry Monakhov 825bb557488SDmitry Monakhov /* Force page buffers uptodate w/o dropping page's lock */ 826bb557488SDmitry Monakhov static int 827bb557488SDmitry Monakhov mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) 828bb557488SDmitry Monakhov { 829bb557488SDmitry Monakhov struct inode *inode = page->mapping->host; 830bb557488SDmitry Monakhov sector_t block; 831bb557488SDmitry Monakhov struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 832bb557488SDmitry Monakhov unsigned int blocksize, block_start, block_end; 833bb557488SDmitry Monakhov int i, err, nr = 0, partial = 0; 834bb557488SDmitry Monakhov BUG_ON(!PageLocked(page)); 835bb557488SDmitry Monakhov BUG_ON(PageWriteback(page)); 836bb557488SDmitry Monakhov 837bb557488SDmitry Monakhov if (PageUptodate(page)) 838bb557488SDmitry Monakhov return 0; 839bb557488SDmitry Monakhov 840bb557488SDmitry Monakhov blocksize = 1 << inode->i_blkbits; 841bb557488SDmitry Monakhov if (!page_has_buffers(page)) 842bb557488SDmitry Monakhov create_empty_buffers(page, blocksize, 0); 843bb557488SDmitry Monakhov 844bb557488SDmitry Monakhov head = page_buffers(page); 845bb557488SDmitry Monakhov block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 846bb557488SDmitry Monakhov for (bh = head, block_start = 0; bh != head || !block_start; 847bb557488SDmitry Monakhov block++, block_start = block_end, bh = bh->b_this_page) { 848bb557488SDmitry Monakhov block_end = block_start + blocksize; 849bb557488SDmitry Monakhov if (block_end <= from || block_start >= to) { 850bb557488SDmitry Monakhov if (!buffer_uptodate(bh)) 851bb557488SDmitry Monakhov partial = 1; 852bb557488SDmitry Monakhov continue; 853bb557488SDmitry Monakhov } 854bb557488SDmitry Monakhov if (buffer_uptodate(bh)) 855bb557488SDmitry Monakhov continue; 856bb557488SDmitry Monakhov if (!buffer_mapped(bh)) { 857bb557488SDmitry Monakhov err = ext4_get_block(inode, block, bh, 0); 858bb557488SDmitry Monakhov if (err) { 859bb557488SDmitry Monakhov SetPageError(page); 860bb557488SDmitry Monakhov return err; 861bb557488SDmitry Monakhov } 862bb557488SDmitry Monakhov if (!buffer_mapped(bh)) { 863bb557488SDmitry Monakhov zero_user(page, block_start, blocksize); 864bb557488SDmitry Monakhov if (!err) 865bb557488SDmitry Monakhov set_buffer_uptodate(bh); 866bb557488SDmitry Monakhov continue; 867bb557488SDmitry Monakhov } 868bb557488SDmitry Monakhov } 869bb557488SDmitry Monakhov BUG_ON(nr >= MAX_BUF_PER_PAGE); 870bb557488SDmitry Monakhov arr[nr++] = bh; 871bb557488SDmitry Monakhov } 872bb557488SDmitry Monakhov /* No io required */ 873bb557488SDmitry Monakhov if (!nr) 874bb557488SDmitry Monakhov goto out; 875bb557488SDmitry Monakhov 876bb557488SDmitry Monakhov for (i = 0; i < nr; i++) { 877bb557488SDmitry Monakhov bh = arr[i]; 878bb557488SDmitry Monakhov if (!bh_uptodate_or_lock(bh)) { 879bb557488SDmitry Monakhov err = bh_submit_read(bh); 880bb557488SDmitry Monakhov if (err) 881bb557488SDmitry Monakhov return err; 882bb557488SDmitry Monakhov } 883bb557488SDmitry Monakhov } 884bb557488SDmitry Monakhov out: 885bb557488SDmitry Monakhov if (!partial) 886bb557488SDmitry Monakhov SetPageUptodate(page); 887bb557488SDmitry Monakhov return 0; 888bb557488SDmitry Monakhov } 889bb557488SDmitry Monakhov 890bb557488SDmitry Monakhov /** 891748de673SAkira Fujita * move_extent_per_page - Move extent data per page 892748de673SAkira Fujita * 893748de673SAkira Fujita * @o_filp: file structure of original file 894748de673SAkira Fujita * @donor_inode: donor inode 895748de673SAkira Fujita * @orig_page_offset: page index on original file 896748de673SAkira Fujita * @data_offset_in_page: block index where data swapping starts 897748de673SAkira Fujita * @block_len_in_page: the number of blocks to be swapped 898748de673SAkira Fujita * @uninit: orig extent is uninitialized or not 899f868a48dSAkira Fujita * @err: pointer to save return value 900748de673SAkira Fujita * 901748de673SAkira Fujita * Save the data in original inode blocks and replace original inode extents 902748de673SAkira Fujita * with donor inode extents by calling mext_replace_branches(). 903f868a48dSAkira Fujita * Finally, write out the saved data in new original inode blocks. Return 904f868a48dSAkira Fujita * replaced block count. 905748de673SAkira Fujita */ 906748de673SAkira Fujita static int 90744fc48f7SAkira Fujita move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 908748de673SAkira Fujita pgoff_t orig_page_offset, int data_offset_in_page, 909f868a48dSAkira Fujita int block_len_in_page, int uninit, int *err) 910748de673SAkira Fujita { 911496ad9aaSAl Viro struct inode *orig_inode = file_inode(o_filp); 912bb557488SDmitry Monakhov struct page *pagep[2] = {NULL, NULL}; 913748de673SAkira Fujita handle_t *handle; 914748de673SAkira Fujita ext4_lblk_t orig_blk_offset; 915748de673SAkira Fujita unsigned long blocksize = orig_inode->i_sb->s_blocksize; 916748de673SAkira Fujita unsigned int w_flags = 0; 917f868a48dSAkira Fujita unsigned int tmp_data_size, data_size, replaced_size; 918bb557488SDmitry Monakhov int err2, jblocks, retries = 0; 919f868a48dSAkira Fujita int replaced_count = 0; 920bb557488SDmitry Monakhov int from = data_offset_in_page << orig_inode->i_blkbits; 921748de673SAkira Fujita int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 922748de673SAkira Fujita 923748de673SAkira Fujita /* 924748de673SAkira Fujita * It needs twice the amount of ordinary journal buffers because 925748de673SAkira Fujita * inode and donor_inode may change each different metadata blocks. 926748de673SAkira Fujita */ 927bb557488SDmitry Monakhov again: 928bb557488SDmitry Monakhov *err = 0; 929748de673SAkira Fujita jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 9309924a92aSTheodore Ts'o handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks); 931748de673SAkira Fujita if (IS_ERR(handle)) { 932f868a48dSAkira Fujita *err = PTR_ERR(handle); 933f868a48dSAkira Fujita return 0; 934748de673SAkira Fujita } 935748de673SAkira Fujita 936748de673SAkira Fujita if (segment_eq(get_fs(), KERNEL_DS)) 937748de673SAkira Fujita w_flags |= AOP_FLAG_UNINTERRUPTIBLE; 938748de673SAkira Fujita 939748de673SAkira Fujita orig_blk_offset = orig_page_offset * blocks_per_page + 940748de673SAkira Fujita data_offset_in_page; 941748de673SAkira Fujita 942f868a48dSAkira Fujita /* Calculate data_size */ 943748de673SAkira Fujita if ((orig_blk_offset + block_len_in_page - 1) == 944748de673SAkira Fujita ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 945748de673SAkira Fujita /* Replace the last block */ 946f868a48dSAkira Fujita tmp_data_size = orig_inode->i_size & (blocksize - 1); 947748de673SAkira Fujita /* 948f868a48dSAkira Fujita * If data_size equal zero, it shows data_size is multiples of 949748de673SAkira Fujita * blocksize. So we set appropriate value. 950748de673SAkira Fujita */ 951f868a48dSAkira Fujita if (tmp_data_size == 0) 952f868a48dSAkira Fujita tmp_data_size = blocksize; 953748de673SAkira Fujita 954f868a48dSAkira Fujita data_size = tmp_data_size + 955748de673SAkira Fujita ((block_len_in_page - 1) << orig_inode->i_blkbits); 956f868a48dSAkira Fujita } else 957f868a48dSAkira Fujita data_size = block_len_in_page << orig_inode->i_blkbits; 958748de673SAkira Fujita 959f868a48dSAkira Fujita replaced_size = data_size; 960f868a48dSAkira Fujita 961bb557488SDmitry Monakhov *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, 962bb557488SDmitry Monakhov pagep); 963f868a48dSAkira Fujita if (unlikely(*err < 0)) 964bb557488SDmitry Monakhov goto stop_journal; 9658c854473SDmitry Monakhov /* 9668c854473SDmitry Monakhov * If orig extent was uninitialized it can become initialized 9678c854473SDmitry Monakhov * at any time after i_data_sem was dropped, in order to 9688c854473SDmitry Monakhov * serialize with delalloc we have recheck extent while we 9698c854473SDmitry Monakhov * hold page's lock, if it is still the case data copy is not 9708c854473SDmitry Monakhov * necessary, just swap data blocks between orig and donor. 9718c854473SDmitry Monakhov */ 9728c854473SDmitry Monakhov if (uninit) { 973393d1d1dSDr. Tilmann Bubeck ext4_double_down_write_data_sem(orig_inode, donor_inode); 9748c854473SDmitry Monakhov /* If any of extents in range became initialized we have to 9758c854473SDmitry Monakhov * fallback to data copying */ 9768c854473SDmitry Monakhov uninit = mext_check_coverage(orig_inode, orig_blk_offset, 9778c854473SDmitry Monakhov block_len_in_page, 1, err); 9788c854473SDmitry Monakhov if (*err) 9798c854473SDmitry Monakhov goto drop_data_sem; 980748de673SAkira Fujita 9818c854473SDmitry Monakhov uninit &= mext_check_coverage(donor_inode, orig_blk_offset, 9828c854473SDmitry Monakhov block_len_in_page, 1, err); 9838c854473SDmitry Monakhov if (*err) 9848c854473SDmitry Monakhov goto drop_data_sem; 9858c854473SDmitry Monakhov 9868c854473SDmitry Monakhov if (!uninit) { 987393d1d1dSDr. Tilmann Bubeck ext4_double_up_write_data_sem(orig_inode, donor_inode); 9888c854473SDmitry Monakhov goto data_copy; 9898c854473SDmitry Monakhov } 9908c854473SDmitry Monakhov if ((page_has_private(pagep[0]) && 9918c854473SDmitry Monakhov !try_to_release_page(pagep[0], 0)) || 9928c854473SDmitry Monakhov (page_has_private(pagep[1]) && 9938c854473SDmitry Monakhov !try_to_release_page(pagep[1], 0))) { 9948c854473SDmitry Monakhov *err = -EBUSY; 9958c854473SDmitry Monakhov goto drop_data_sem; 9968c854473SDmitry Monakhov } 9978c854473SDmitry Monakhov replaced_count = mext_replace_branches(handle, orig_inode, 9988c854473SDmitry Monakhov donor_inode, orig_blk_offset, 9998c854473SDmitry Monakhov block_len_in_page, err); 10008c854473SDmitry Monakhov drop_data_sem: 1001393d1d1dSDr. Tilmann Bubeck ext4_double_up_write_data_sem(orig_inode, donor_inode); 10028c854473SDmitry Monakhov goto unlock_pages; 10038c854473SDmitry Monakhov } 10048c854473SDmitry Monakhov data_copy: 1005bb557488SDmitry Monakhov *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); 1006bb557488SDmitry Monakhov if (*err) 1007bb557488SDmitry Monakhov goto unlock_pages; 1008bb557488SDmitry Monakhov 1009bb557488SDmitry Monakhov /* At this point all buffers in range are uptodate, old mapping layout 1010bb557488SDmitry Monakhov * is no longer required, try to drop it now. */ 1011bb557488SDmitry Monakhov if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) || 1012bb557488SDmitry Monakhov (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) { 1013bb557488SDmitry Monakhov *err = -EBUSY; 1014bb557488SDmitry Monakhov goto unlock_pages; 1015748de673SAkira Fujita } 1016748de673SAkira Fujita 1017f868a48dSAkira Fujita replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 1018bb557488SDmitry Monakhov orig_blk_offset, 1019bb557488SDmitry Monakhov block_len_in_page, err); 1020bb557488SDmitry Monakhov if (*err) { 1021f868a48dSAkira Fujita if (replaced_count) { 1022f868a48dSAkira Fujita block_len_in_page = replaced_count; 1023f868a48dSAkira Fujita replaced_size = 1024f868a48dSAkira Fujita block_len_in_page << orig_inode->i_blkbits; 1025ac48b0a1SAkira Fujita } else 1026bb557488SDmitry Monakhov goto unlock_pages; 1027f868a48dSAkira Fujita } 1028bb557488SDmitry Monakhov /* Perform all necessary steps similar write_begin()/write_end() 1029bb557488SDmitry Monakhov * but keeping in mind that i_size will not change */ 10307e8b12c6SDmitry Monakhov *err = __block_write_begin(pagep[0], from, replaced_size, 1031bb557488SDmitry Monakhov ext4_get_block); 1032bb557488SDmitry Monakhov if (!*err) 1033bb557488SDmitry Monakhov *err = block_commit_write(pagep[0], from, from + replaced_size); 1034fc04cb49SAkira Fujita 1035bb557488SDmitry Monakhov if (unlikely(*err < 0)) 1036bb557488SDmitry Monakhov goto repair_branches; 1037748de673SAkira Fujita 1038bb557488SDmitry Monakhov /* Even in case of data=writeback it is reasonable to pin 1039bb557488SDmitry Monakhov * inode to transaction, to prevent unexpected data loss */ 1040bb557488SDmitry Monakhov *err = ext4_jbd2_file_inode(handle, orig_inode); 1041748de673SAkira Fujita 1042bb557488SDmitry Monakhov unlock_pages: 1043bb557488SDmitry Monakhov unlock_page(pagep[0]); 1044bb557488SDmitry Monakhov page_cache_release(pagep[0]); 1045bb557488SDmitry Monakhov unlock_page(pagep[1]); 1046bb557488SDmitry Monakhov page_cache_release(pagep[1]); 1047bb557488SDmitry Monakhov stop_journal: 104891cc219aSPeng Tao ext4_journal_stop(handle); 1049bb557488SDmitry Monakhov /* Buffer was busy because probably is pinned to journal transaction, 1050bb557488SDmitry Monakhov * force transaction commit may help to free it. */ 1051bb557488SDmitry Monakhov if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, 1052bb557488SDmitry Monakhov &retries)) 1053bb557488SDmitry Monakhov goto again; 1054f868a48dSAkira Fujita return replaced_count; 1055bb557488SDmitry Monakhov 1056bb557488SDmitry Monakhov repair_branches: 1057bb557488SDmitry Monakhov /* 1058bb557488SDmitry Monakhov * This should never ever happen! 1059bb557488SDmitry Monakhov * Extents are swapped already, but we are not able to copy data. 1060bb557488SDmitry Monakhov * Try to swap extents to it's original places 1061bb557488SDmitry Monakhov */ 1062393d1d1dSDr. Tilmann Bubeck ext4_double_down_write_data_sem(orig_inode, donor_inode); 1063bb557488SDmitry Monakhov replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, 1064bb557488SDmitry Monakhov orig_blk_offset, 1065bb557488SDmitry Monakhov block_len_in_page, &err2); 1066393d1d1dSDr. Tilmann Bubeck ext4_double_up_write_data_sem(orig_inode, donor_inode); 1067bb557488SDmitry Monakhov if (replaced_count != block_len_in_page) { 1068bb557488SDmitry Monakhov EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), 1069bb557488SDmitry Monakhov "Unable to copy data block," 1070bb557488SDmitry Monakhov " data will be lost."); 1071bb557488SDmitry Monakhov *err = -EIO; 1072bb557488SDmitry Monakhov } 1073bb557488SDmitry Monakhov replaced_count = 0; 1074bb557488SDmitry Monakhov goto unlock_pages; 1075748de673SAkira Fujita } 1076748de673SAkira Fujita 1077748de673SAkira Fujita /** 1078c437b273SAkira Fujita * mext_check_arguments - Check whether move extent can be done 1079748de673SAkira Fujita * 1080748de673SAkira Fujita * @orig_inode: original inode 1081748de673SAkira Fujita * @donor_inode: donor inode 1082748de673SAkira Fujita * @orig_start: logical start offset in block for orig 1083748de673SAkira Fujita * @donor_start: logical start offset in block for donor 1084748de673SAkira Fujita * @len: the number of blocks to be moved 1085748de673SAkira Fujita * 1086748de673SAkira Fujita * Check the arguments of ext4_move_extents() whether the files can be 1087748de673SAkira Fujita * exchanged with each other. 1088748de673SAkira Fujita * Return 0 on success, or a negative error value on failure. 1089748de673SAkira Fujita */ 1090748de673SAkira Fujita static int 1091748de673SAkira Fujita mext_check_arguments(struct inode *orig_inode, 1092748de673SAkira Fujita struct inode *donor_inode, __u64 orig_start, 1093446aaa6eSKazuya Mio __u64 donor_start, __u64 *len) 1094748de673SAkira Fujita { 109570d5d3dcSAkira Fujita ext4_lblk_t orig_blocks, donor_blocks; 109670d5d3dcSAkira Fujita unsigned int blkbits = orig_inode->i_blkbits; 109770d5d3dcSAkira Fujita unsigned int blocksize = 1 << blkbits; 109870d5d3dcSAkira Fujita 10994a58579bSAkira Fujita if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { 11004a58579bSAkira Fujita ext4_debug("ext4 move extent: suid or sgid is set" 11014a58579bSAkira Fujita " to donor file [ino:orig %lu, donor %lu]\n", 11024a58579bSAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 11034a58579bSAkira Fujita return -EINVAL; 11044a58579bSAkira Fujita } 11054a58579bSAkira Fujita 11061f5a81e4STheodore Ts'o if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) 11071f5a81e4STheodore Ts'o return -EPERM; 11081f5a81e4STheodore Ts'o 1109748de673SAkira Fujita /* Ext4 move extent does not support swapfile */ 1110748de673SAkira Fujita if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 1111748de673SAkira Fujita ext4_debug("ext4 move extent: The argument files should " 1112748de673SAkira Fujita "not be swapfile [ino:orig %lu, donor %lu]\n", 1113748de673SAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 1114748de673SAkira Fujita return -EINVAL; 1115748de673SAkira Fujita } 1116748de673SAkira Fujita 1117748de673SAkira Fujita /* Ext4 move extent supports only extent based file */ 111812e9b892SDmitry Monakhov if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { 1119748de673SAkira Fujita ext4_debug("ext4 move extent: orig file is not extents " 1120748de673SAkira Fujita "based file [ino:orig %lu]\n", orig_inode->i_ino); 1121748de673SAkira Fujita return -EOPNOTSUPP; 112212e9b892SDmitry Monakhov } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { 1123748de673SAkira Fujita ext4_debug("ext4 move extent: donor file is not extents " 1124748de673SAkira Fujita "based file [ino:donor %lu]\n", donor_inode->i_ino); 1125748de673SAkira Fujita return -EOPNOTSUPP; 1126748de673SAkira Fujita } 1127748de673SAkira Fujita 1128748de673SAkira Fujita if ((!orig_inode->i_size) || (!donor_inode->i_size)) { 1129748de673SAkira Fujita ext4_debug("ext4 move extent: File size is 0 byte\n"); 1130748de673SAkira Fujita return -EINVAL; 1131748de673SAkira Fujita } 1132748de673SAkira Fujita 1133748de673SAkira Fujita /* Start offset should be same */ 1134748de673SAkira Fujita if (orig_start != donor_start) { 1135748de673SAkira Fujita ext4_debug("ext4 move extent: orig and donor's start " 1136748de673SAkira Fujita "offset are not same [ino:orig %lu, donor %lu]\n", 1137748de673SAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 1138748de673SAkira Fujita return -EINVAL; 1139748de673SAkira Fujita } 1140748de673SAkira Fujita 1141f17722f9SLukas Czerner if ((orig_start >= EXT_MAX_BLOCKS) || 1142f17722f9SLukas Czerner (*len > EXT_MAX_BLOCKS) || 1143f17722f9SLukas Czerner (orig_start + *len >= EXT_MAX_BLOCKS)) { 11440a80e986SEric Sandeen ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 1145f17722f9SLukas Czerner "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, 1146748de673SAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 1147748de673SAkira Fujita return -EINVAL; 1148748de673SAkira Fujita } 1149748de673SAkira Fujita 1150748de673SAkira Fujita if (orig_inode->i_size > donor_inode->i_size) { 115170d5d3dcSAkira Fujita donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; 115270d5d3dcSAkira Fujita /* TODO: eliminate this artificial restriction */ 115370d5d3dcSAkira Fujita if (orig_start >= donor_blocks) { 1154748de673SAkira Fujita ext4_debug("ext4 move extent: orig start offset " 115570d5d3dcSAkira Fujita "[%llu] should be less than donor file blocks " 115670d5d3dcSAkira Fujita "[%u] [ino:orig %lu, donor %lu]\n", 115770d5d3dcSAkira Fujita orig_start, donor_blocks, 1158748de673SAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 1159748de673SAkira Fujita return -EINVAL; 1160748de673SAkira Fujita } 1161748de673SAkira Fujita 116270d5d3dcSAkira Fujita /* TODO: eliminate this artificial restriction */ 116370d5d3dcSAkira Fujita if (orig_start + *len > donor_blocks) { 1164748de673SAkira Fujita ext4_debug("ext4 move extent: End offset [%llu] should " 116570d5d3dcSAkira Fujita "be less than donor file blocks [%u]." 116670d5d3dcSAkira Fujita "So adjust length from %llu to %llu " 1167748de673SAkira Fujita "[ino:orig %lu, donor %lu]\n", 116870d5d3dcSAkira Fujita orig_start + *len, donor_blocks, 116970d5d3dcSAkira Fujita *len, donor_blocks - orig_start, 1170748de673SAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 117170d5d3dcSAkira Fujita *len = donor_blocks - orig_start; 1172748de673SAkira Fujita } 1173748de673SAkira Fujita } else { 117470d5d3dcSAkira Fujita orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; 117570d5d3dcSAkira Fujita if (orig_start >= orig_blocks) { 1176748de673SAkira Fujita ext4_debug("ext4 move extent: start offset [%llu] " 117770d5d3dcSAkira Fujita "should be less than original file blocks " 117870d5d3dcSAkira Fujita "[%u] [ino:orig %lu, donor %lu]\n", 117970d5d3dcSAkira Fujita orig_start, orig_blocks, 1180748de673SAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 1181748de673SAkira Fujita return -EINVAL; 1182748de673SAkira Fujita } 1183748de673SAkira Fujita 118470d5d3dcSAkira Fujita if (orig_start + *len > orig_blocks) { 1185748de673SAkira Fujita ext4_debug("ext4 move extent: Adjust length " 118670d5d3dcSAkira Fujita "from %llu to %llu. Because it should be " 118770d5d3dcSAkira Fujita "less than original file blocks " 1188748de673SAkira Fujita "[ino:orig %lu, donor %lu]\n", 118970d5d3dcSAkira Fujita *len, orig_blocks - orig_start, 1190748de673SAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 119170d5d3dcSAkira Fujita *len = orig_blocks - orig_start; 1192748de673SAkira Fujita } 1193748de673SAkira Fujita } 1194748de673SAkira Fujita 1195748de673SAkira Fujita if (!*len) { 119692c28159SAkira Fujita ext4_debug("ext4 move extent: len should not be 0 " 1197748de673SAkira Fujita "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 1198748de673SAkira Fujita donor_inode->i_ino); 1199748de673SAkira Fujita return -EINVAL; 1200748de673SAkira Fujita } 1201748de673SAkira Fujita 1202748de673SAkira Fujita return 0; 1203748de673SAkira Fujita } 1204748de673SAkira Fujita 1205748de673SAkira Fujita /** 1206748de673SAkira Fujita * ext4_move_extents - Exchange the specified range of a file 1207748de673SAkira Fujita * 1208748de673SAkira Fujita * @o_filp: file structure of the original file 1209748de673SAkira Fujita * @d_filp: file structure of the donor file 1210748de673SAkira Fujita * @orig_start: start offset in block for orig 1211748de673SAkira Fujita * @donor_start: start offset in block for donor 1212748de673SAkira Fujita * @len: the number of blocks to be moved 1213748de673SAkira Fujita * @moved_len: moved block length 1214748de673SAkira Fujita * 1215748de673SAkira Fujita * This function returns 0 and moved block length is set in moved_len 1216748de673SAkira Fujita * if succeed, otherwise returns error value. 1217748de673SAkira Fujita * 1218748de673SAkira Fujita * Note: ext4_move_extents() proceeds the following order. 1219748de673SAkira Fujita * 1:ext4_move_extents() calculates the last block number of moving extent 1220748de673SAkira Fujita * function by the start block number (orig_start) and the number of blocks 1221748de673SAkira Fujita * to be moved (len) specified as arguments. 1222748de673SAkira Fujita * If the {orig, donor}_start points a hole, the extent's start offset 1223748de673SAkira Fujita * pointed by ext_cur (current extent), holecheck_path, orig_path are set 1224748de673SAkira Fujita * after hole behind. 1225748de673SAkira Fujita * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent 1226748de673SAkira Fujita * or the ext_cur exceeds the block_end which is last logical block number. 1227748de673SAkira Fujita * 3:To get the length of continues area, call mext_next_extent() 1228748de673SAkira Fujita * specified with the ext_cur (initial value is holecheck_path) re-cursive, 1229748de673SAkira Fujita * until find un-continuous extent, the start logical block number exceeds 1230748de673SAkira Fujita * the block_end or the extent points to the last extent. 1231748de673SAkira Fujita * 4:Exchange the original inode data with donor inode data 1232748de673SAkira Fujita * from orig_page_offset to seq_end_page. 1233748de673SAkira Fujita * The start indexes of data are specified as arguments. 1234748de673SAkira Fujita * That of the original inode is orig_page_offset, 1235748de673SAkira Fujita * and the donor inode is also orig_page_offset 1236748de673SAkira Fujita * (To easily handle blocksize != pagesize case, the offset for the 1237748de673SAkira Fujita * donor inode is block unit). 1238748de673SAkira Fujita * 5:Update holecheck_path and orig_path to points a next proceeding extent, 1239748de673SAkira Fujita * then returns to step 2. 1240748de673SAkira Fujita * 6:Release holecheck_path, orig_path and set the len to moved_len 1241748de673SAkira Fujita * which shows the number of moved blocks. 1242748de673SAkira Fujita * The moved_len is useful for the command to calculate the file offset 1243748de673SAkira Fujita * for starting next move extent ioctl. 1244748de673SAkira Fujita * 7:Return 0 on success, or a negative error value on failure. 1245748de673SAkira Fujita */ 1246748de673SAkira Fujita int 1247748de673SAkira Fujita ext4_move_extents(struct file *o_filp, struct file *d_filp, 1248748de673SAkira Fujita __u64 orig_start, __u64 donor_start, __u64 len, 1249748de673SAkira Fujita __u64 *moved_len) 1250748de673SAkira Fujita { 1251496ad9aaSAl Viro struct inode *orig_inode = file_inode(o_filp); 1252496ad9aaSAl Viro struct inode *donor_inode = file_inode(d_filp); 1253748de673SAkira Fujita struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; 1254748de673SAkira Fujita struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; 1255748de673SAkira Fujita ext4_lblk_t block_start = orig_start; 1256748de673SAkira Fujita ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; 1257748de673SAkira Fujita ext4_lblk_t rest_blocks; 1258748de673SAkira Fujita pgoff_t orig_page_offset = 0, seq_end_page; 125903bd8b9bSDmitry Monakhov int ret, depth, last_extent = 0; 1260748de673SAkira Fujita int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 1261748de673SAkira Fujita int data_offset_in_page; 1262748de673SAkira Fujita int block_len_in_page; 1263748de673SAkira Fujita int uninit; 1264748de673SAkira Fujita 126503bd8b9bSDmitry Monakhov if (orig_inode->i_sb != donor_inode->i_sb) { 126603bd8b9bSDmitry Monakhov ext4_debug("ext4 move extent: The argument files " 126703bd8b9bSDmitry Monakhov "should be in same FS [ino:orig %lu, donor %lu]\n", 126803bd8b9bSDmitry Monakhov orig_inode->i_ino, donor_inode->i_ino); 126903bd8b9bSDmitry Monakhov return -EINVAL; 127003bd8b9bSDmitry Monakhov } 127103bd8b9bSDmitry Monakhov 127203bd8b9bSDmitry Monakhov /* orig and donor should be different inodes */ 127303bd8b9bSDmitry Monakhov if (orig_inode == donor_inode) { 1274f3ce8064STheodore Ts'o ext4_debug("ext4 move extent: The argument files should not " 127503bd8b9bSDmitry Monakhov "be same inode [ino:orig %lu, donor %lu]\n", 1276f3ce8064STheodore Ts'o orig_inode->i_ino, donor_inode->i_ino); 1277f3ce8064STheodore Ts'o return -EINVAL; 1278f3ce8064STheodore Ts'o } 1279f3ce8064STheodore Ts'o 12807247c0caSAkira Fujita /* Regular file check */ 12817247c0caSAkira Fujita if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { 12827247c0caSAkira Fujita ext4_debug("ext4 move extent: The argument files should be " 12837247c0caSAkira Fujita "regular file [ino:orig %lu, donor %lu]\n", 12847247c0caSAkira Fujita orig_inode->i_ino, donor_inode->i_ino); 12857247c0caSAkira Fujita return -EINVAL; 12867247c0caSAkira Fujita } 1287f066055aSDmitry Monakhov /* TODO: This is non obvious task to swap blocks for inodes with full 1288f066055aSDmitry Monakhov jornaling enabled */ 1289f066055aSDmitry Monakhov if (ext4_should_journal_data(orig_inode) || 1290f066055aSDmitry Monakhov ext4_should_journal_data(donor_inode)) { 1291f066055aSDmitry Monakhov return -EINVAL; 1292f066055aSDmitry Monakhov } 1293fc04cb49SAkira Fujita /* Protect orig and donor inodes against a truncate */ 1294375e289eSJ. Bruce Fields lock_two_nondirectories(orig_inode, donor_inode); 1295748de673SAkira Fujita 129617335dccSDmitry Monakhov /* Wait for all existing dio workers */ 129717335dccSDmitry Monakhov ext4_inode_block_unlocked_dio(orig_inode); 129817335dccSDmitry Monakhov ext4_inode_block_unlocked_dio(donor_inode); 129917335dccSDmitry Monakhov inode_dio_wait(orig_inode); 130017335dccSDmitry Monakhov inode_dio_wait(donor_inode); 130117335dccSDmitry Monakhov 1302fc04cb49SAkira Fujita /* Protect extent tree against block allocations via delalloc */ 1303393d1d1dSDr. Tilmann Bubeck ext4_double_down_write_data_sem(orig_inode, donor_inode); 1304748de673SAkira Fujita /* Check the filesystem environment whether move_extent can be done */ 130503bd8b9bSDmitry Monakhov ret = mext_check_arguments(orig_inode, donor_inode, orig_start, 1306446aaa6eSKazuya Mio donor_start, &len); 130703bd8b9bSDmitry Monakhov if (ret) 1308347fa6f1SAkira Fujita goto out; 1309748de673SAkira Fujita 1310748de673SAkira Fujita file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; 1311748de673SAkira Fujita block_end = block_start + len - 1; 1312748de673SAkira Fujita if (file_end < block_end) 1313748de673SAkira Fujita len -= block_end - file_end; 1314748de673SAkira Fujita 131503bd8b9bSDmitry Monakhov ret = get_ext_path(orig_inode, block_start, &orig_path); 131603bd8b9bSDmitry Monakhov if (ret) 1317347fa6f1SAkira Fujita goto out; 1318748de673SAkira Fujita 1319748de673SAkira Fujita /* Get path structure to check the hole */ 132003bd8b9bSDmitry Monakhov ret = get_ext_path(orig_inode, block_start, &holecheck_path); 132103bd8b9bSDmitry Monakhov if (ret) 1322748de673SAkira Fujita goto out; 1323748de673SAkira Fujita 1324748de673SAkira Fujita depth = ext_depth(orig_inode); 1325748de673SAkira Fujita ext_cur = holecheck_path[depth].p_ext; 1326748de673SAkira Fujita 1327748de673SAkira Fujita /* 1328c40ce3c9SAkira Fujita * Get proper starting location of block replacement if block_start was 1329c40ce3c9SAkira Fujita * within the hole. 1330748de673SAkira Fujita */ 1331748de673SAkira Fujita if (le32_to_cpu(ext_cur->ee_block) + 1332748de673SAkira Fujita ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { 1333c40ce3c9SAkira Fujita /* 1334c40ce3c9SAkira Fujita * The hole exists between extents or the tail of 1335c40ce3c9SAkira Fujita * original file. 1336c40ce3c9SAkira Fujita */ 1337748de673SAkira Fujita last_extent = mext_next_extent(orig_inode, 1338748de673SAkira Fujita holecheck_path, &ext_cur); 1339748de673SAkira Fujita if (last_extent < 0) { 134003bd8b9bSDmitry Monakhov ret = last_extent; 1341748de673SAkira Fujita goto out; 1342748de673SAkira Fujita } 1343748de673SAkira Fujita last_extent = mext_next_extent(orig_inode, orig_path, 1344748de673SAkira Fujita &ext_dummy); 1345748de673SAkira Fujita if (last_extent < 0) { 134603bd8b9bSDmitry Monakhov ret = last_extent; 1347748de673SAkira Fujita goto out; 1348748de673SAkira Fujita } 1349c40ce3c9SAkira Fujita seq_start = le32_to_cpu(ext_cur->ee_block); 1350c40ce3c9SAkira Fujita } else if (le32_to_cpu(ext_cur->ee_block) > block_start) 1351c40ce3c9SAkira Fujita /* The hole exists at the beginning of original file. */ 1352c40ce3c9SAkira Fujita seq_start = le32_to_cpu(ext_cur->ee_block); 1353c40ce3c9SAkira Fujita else 1354748de673SAkira Fujita seq_start = block_start; 1355748de673SAkira Fujita 1356748de673SAkira Fujita /* No blocks within the specified range. */ 1357748de673SAkira Fujita if (le32_to_cpu(ext_cur->ee_block) > block_end) { 1358748de673SAkira Fujita ext4_debug("ext4 move extent: The specified range of file " 1359748de673SAkira Fujita "may be the hole\n"); 136003bd8b9bSDmitry Monakhov ret = -EINVAL; 1361748de673SAkira Fujita goto out; 1362748de673SAkira Fujita } 1363748de673SAkira Fujita 1364748de673SAkira Fujita /* Adjust start blocks */ 1365748de673SAkira Fujita add_blocks = min(le32_to_cpu(ext_cur->ee_block) + 1366748de673SAkira Fujita ext4_ext_get_actual_len(ext_cur), block_end + 1) - 1367748de673SAkira Fujita max(le32_to_cpu(ext_cur->ee_block), block_start); 1368748de673SAkira Fujita 1369748de673SAkira Fujita while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { 1370748de673SAkira Fujita seq_blocks += add_blocks; 1371748de673SAkira Fujita 1372748de673SAkira Fujita /* Adjust tail blocks */ 1373748de673SAkira Fujita if (seq_start + seq_blocks - 1 > block_end) 1374748de673SAkira Fujita seq_blocks = block_end - seq_start + 1; 1375748de673SAkira Fujita 1376748de673SAkira Fujita ext_prev = ext_cur; 1377748de673SAkira Fujita last_extent = mext_next_extent(orig_inode, holecheck_path, 1378748de673SAkira Fujita &ext_cur); 1379748de673SAkira Fujita if (last_extent < 0) { 138003bd8b9bSDmitry Monakhov ret = last_extent; 1381748de673SAkira Fujita break; 1382748de673SAkira Fujita } 1383748de673SAkira Fujita add_blocks = ext4_ext_get_actual_len(ext_cur); 1384748de673SAkira Fujita 1385748de673SAkira Fujita /* 1386748de673SAkira Fujita * Extend the length of contiguous block (seq_blocks) 1387748de673SAkira Fujita * if extents are contiguous. 1388748de673SAkira Fujita */ 1389748de673SAkira Fujita if (ext4_can_extents_be_merged(orig_inode, 1390748de673SAkira Fujita ext_prev, ext_cur) && 1391748de673SAkira Fujita block_end >= le32_to_cpu(ext_cur->ee_block) && 1392748de673SAkira Fujita !last_extent) 1393748de673SAkira Fujita continue; 1394748de673SAkira Fujita 1395748de673SAkira Fujita /* Is original extent is uninitialized */ 1396748de673SAkira Fujita uninit = ext4_ext_is_uninitialized(ext_prev); 1397748de673SAkira Fujita 1398748de673SAkira Fujita data_offset_in_page = seq_start % blocks_per_page; 1399748de673SAkira Fujita 1400748de673SAkira Fujita /* 1401748de673SAkira Fujita * Calculate data blocks count that should be swapped 1402748de673SAkira Fujita * at the first page. 1403748de673SAkira Fujita */ 1404748de673SAkira Fujita if (data_offset_in_page + seq_blocks > blocks_per_page) { 1405748de673SAkira Fujita /* Swapped blocks are across pages */ 1406748de673SAkira Fujita block_len_in_page = 1407748de673SAkira Fujita blocks_per_page - data_offset_in_page; 1408748de673SAkira Fujita } else { 1409748de673SAkira Fujita /* Swapped blocks are in a page */ 1410748de673SAkira Fujita block_len_in_page = seq_blocks; 1411748de673SAkira Fujita } 1412748de673SAkira Fujita 1413748de673SAkira Fujita orig_page_offset = seq_start >> 1414748de673SAkira Fujita (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); 1415748de673SAkira Fujita seq_end_page = (seq_start + seq_blocks - 1) >> 1416748de673SAkira Fujita (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); 1417748de673SAkira Fujita seq_start = le32_to_cpu(ext_cur->ee_block); 1418748de673SAkira Fujita rest_blocks = seq_blocks; 1419748de673SAkira Fujita 1420fc04cb49SAkira Fujita /* 1421fc04cb49SAkira Fujita * Up semaphore to avoid following problems: 1422fc04cb49SAkira Fujita * a. transaction deadlock among ext4_journal_start, 1423fc04cb49SAkira Fujita * ->write_begin via pagefault, and jbd2_journal_commit 1424fc04cb49SAkira Fujita * b. racing with ->readpage, ->write_begin, and ext4_get_block 1425fc04cb49SAkira Fujita * in move_extent_per_page 1426fc04cb49SAkira Fujita */ 1427393d1d1dSDr. Tilmann Bubeck ext4_double_up_write_data_sem(orig_inode, donor_inode); 1428748de673SAkira Fujita 1429748de673SAkira Fujita while (orig_page_offset <= seq_end_page) { 1430748de673SAkira Fujita 1431748de673SAkira Fujita /* Swap original branches with new branches */ 1432f868a48dSAkira Fujita block_len_in_page = move_extent_per_page( 1433f868a48dSAkira Fujita o_filp, donor_inode, 1434748de673SAkira Fujita orig_page_offset, 1435748de673SAkira Fujita data_offset_in_page, 1436f868a48dSAkira Fujita block_len_in_page, uninit, 143703bd8b9bSDmitry Monakhov &ret); 1438f868a48dSAkira Fujita 1439748de673SAkira Fujita /* Count how many blocks we have exchanged */ 1440748de673SAkira Fujita *moved_len += block_len_in_page; 144103bd8b9bSDmitry Monakhov if (ret < 0) 1442fc04cb49SAkira Fujita break; 14432147b1a6SAkira Fujita if (*moved_len > len) { 144424676da4STheodore Ts'o EXT4_ERROR_INODE(orig_inode, 14452147b1a6SAkira Fujita "We replaced blocks too much! " 14462147b1a6SAkira Fujita "sum of replaced: %llu requested: %llu", 14472147b1a6SAkira Fujita *moved_len, len); 144803bd8b9bSDmitry Monakhov ret = -EIO; 1449fc04cb49SAkira Fujita break; 14502147b1a6SAkira Fujita } 1451748de673SAkira Fujita 1452f868a48dSAkira Fujita orig_page_offset++; 1453748de673SAkira Fujita data_offset_in_page = 0; 1454748de673SAkira Fujita rest_blocks -= block_len_in_page; 1455748de673SAkira Fujita if (rest_blocks > blocks_per_page) 1456748de673SAkira Fujita block_len_in_page = blocks_per_page; 1457748de673SAkira Fujita else 1458748de673SAkira Fujita block_len_in_page = rest_blocks; 1459748de673SAkira Fujita } 1460748de673SAkira Fujita 1461393d1d1dSDr. Tilmann Bubeck ext4_double_down_write_data_sem(orig_inode, donor_inode); 146203bd8b9bSDmitry Monakhov if (ret < 0) 1463fc04cb49SAkira Fujita break; 1464fc04cb49SAkira Fujita 1465748de673SAkira Fujita /* Decrease buffer counter */ 1466748de673SAkira Fujita if (holecheck_path) 1467748de673SAkira Fujita ext4_ext_drop_refs(holecheck_path); 146803bd8b9bSDmitry Monakhov ret = get_ext_path(orig_inode, seq_start, &holecheck_path); 146903bd8b9bSDmitry Monakhov if (ret) 1470748de673SAkira Fujita break; 1471748de673SAkira Fujita depth = holecheck_path->p_depth; 1472748de673SAkira Fujita 1473748de673SAkira Fujita /* Decrease buffer counter */ 1474748de673SAkira Fujita if (orig_path) 1475748de673SAkira Fujita ext4_ext_drop_refs(orig_path); 147603bd8b9bSDmitry Monakhov ret = get_ext_path(orig_inode, seq_start, &orig_path); 147703bd8b9bSDmitry Monakhov if (ret) 1478748de673SAkira Fujita break; 1479748de673SAkira Fujita 1480748de673SAkira Fujita ext_cur = holecheck_path[depth].p_ext; 1481748de673SAkira Fujita add_blocks = ext4_ext_get_actual_len(ext_cur); 1482748de673SAkira Fujita seq_blocks = 0; 1483748de673SAkira Fujita 1484748de673SAkira Fujita } 1485748de673SAkira Fujita out: 148694d7c16cSAkira Fujita if (*moved_len) { 148794d7c16cSAkira Fujita ext4_discard_preallocations(orig_inode); 148894d7c16cSAkira Fujita ext4_discard_preallocations(donor_inode); 148994d7c16cSAkira Fujita } 149094d7c16cSAkira Fujita 1491748de673SAkira Fujita if (orig_path) { 1492748de673SAkira Fujita ext4_ext_drop_refs(orig_path); 1493748de673SAkira Fujita kfree(orig_path); 1494748de673SAkira Fujita } 1495748de673SAkira Fujita if (holecheck_path) { 1496748de673SAkira Fujita ext4_ext_drop_refs(holecheck_path); 1497748de673SAkira Fujita kfree(holecheck_path); 1498748de673SAkira Fujita } 1499393d1d1dSDr. Tilmann Bubeck ext4_double_up_write_data_sem(orig_inode, donor_inode); 150017335dccSDmitry Monakhov ext4_inode_resume_unlocked_dio(orig_inode); 150117335dccSDmitry Monakhov ext4_inode_resume_unlocked_dio(donor_inode); 1502375e289eSJ. Bruce Fields unlock_two_nondirectories(orig_inode, donor_inode); 1503748de673SAkira Fujita 150403bd8b9bSDmitry Monakhov return ret; 1505748de673SAkira Fujita } 1506