11802d0beSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2fa60ce2cSMasahiro Yamada /*
3028ba5dfSTristan Ye * move_extents.c
4028ba5dfSTristan Ye *
5028ba5dfSTristan Ye * Copyright (C) 2011 Oracle. All rights reserved.
6028ba5dfSTristan Ye */
7028ba5dfSTristan Ye #include <linux/fs.h>
8028ba5dfSTristan Ye #include <linux/types.h>
9028ba5dfSTristan Ye #include <linux/mount.h>
10028ba5dfSTristan Ye #include <linux/swap.h>
11028ba5dfSTristan Ye
12028ba5dfSTristan Ye #include <cluster/masklog.h>
13028ba5dfSTristan Ye
14028ba5dfSTristan Ye #include "ocfs2.h"
15028ba5dfSTristan Ye #include "ocfs2_ioctl.h"
16028ba5dfSTristan Ye
17028ba5dfSTristan Ye #include "alloc.h"
186194ae42SLarry Chen #include "localalloc.h"
19028ba5dfSTristan Ye #include "aops.h"
20028ba5dfSTristan Ye #include "dlmglue.h"
21028ba5dfSTristan Ye #include "extent_map.h"
22028ba5dfSTristan Ye #include "inode.h"
23028ba5dfSTristan Ye #include "journal.h"
24028ba5dfSTristan Ye #include "suballoc.h"
25028ba5dfSTristan Ye #include "uptodate.h"
26028ba5dfSTristan Ye #include "super.h"
27028ba5dfSTristan Ye #include "dir.h"
28028ba5dfSTristan Ye #include "buffer_head_io.h"
29028ba5dfSTristan Ye #include "sysfile.h"
30028ba5dfSTristan Ye #include "refcounttree.h"
31028ba5dfSTristan Ye #include "move_extents.h"
32028ba5dfSTristan Ye
33028ba5dfSTristan Ye struct ocfs2_move_extents_context {
34028ba5dfSTristan Ye struct inode *inode;
35028ba5dfSTristan Ye struct file *file;
36028ba5dfSTristan Ye int auto_defrag;
374dfa66bdSTristan Ye int partial;
38028ba5dfSTristan Ye int credits;
39028ba5dfSTristan Ye u32 new_phys_cpos;
40028ba5dfSTristan Ye u32 clusters_moved;
41028ba5dfSTristan Ye u64 refcount_loc;
42028ba5dfSTristan Ye struct ocfs2_move_extents *range;
43028ba5dfSTristan Ye struct ocfs2_extent_tree et;
44028ba5dfSTristan Ye struct ocfs2_alloc_context *meta_ac;
45028ba5dfSTristan Ye struct ocfs2_alloc_context *data_ac;
46028ba5dfSTristan Ye struct ocfs2_cached_dealloc_ctxt dealloc;
47028ba5dfSTristan Ye };
48de474ee8STristan Ye
__ocfs2_move_extent(handle_t * handle,struct ocfs2_move_extents_context * context,u32 cpos,u32 len,u32 p_cpos,u32 new_p_cpos,int ext_flags)498f603e56STristan Ye static int __ocfs2_move_extent(handle_t *handle,
508f603e56STristan Ye struct ocfs2_move_extents_context *context,
518f603e56STristan Ye u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos,
528f603e56STristan Ye int ext_flags)
538f603e56STristan Ye {
548f603e56STristan Ye int ret = 0, index;
558f603e56STristan Ye struct inode *inode = context->inode;
568f603e56STristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
578f603e56STristan Ye struct ocfs2_extent_rec *rec, replace_rec;
588f603e56STristan Ye struct ocfs2_path *path = NULL;
598f603e56STristan Ye struct ocfs2_extent_list *el;
608f603e56STristan Ye u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
618f603e56STristan Ye u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
628f603e56STristan Ye
63c7dd3392STiger Yang ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
648f603e56STristan Ye p_cpos, new_p_cpos, len);
658f603e56STristan Ye if (ret) {
668f603e56STristan Ye mlog_errno(ret);
678f603e56STristan Ye goto out;
688f603e56STristan Ye }
698f603e56STristan Ye
708f603e56STristan Ye memset(&replace_rec, 0, sizeof(replace_rec));
718f603e56STristan Ye replace_rec.e_cpos = cpu_to_le32(cpos);
728f603e56STristan Ye replace_rec.e_leaf_clusters = cpu_to_le16(len);
738f603e56STristan Ye replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
748f603e56STristan Ye new_p_cpos));
758f603e56STristan Ye
768f603e56STristan Ye path = ocfs2_new_path_from_et(&context->et);
778f603e56STristan Ye if (!path) {
788f603e56STristan Ye ret = -ENOMEM;
798f603e56STristan Ye mlog_errno(ret);
808f603e56STristan Ye goto out;
818f603e56STristan Ye }
828f603e56STristan Ye
838f603e56STristan Ye ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos);
848f603e56STristan Ye if (ret) {
858f603e56STristan Ye mlog_errno(ret);
868f603e56STristan Ye goto out;
878f603e56STristan Ye }
888f603e56STristan Ye
898f603e56STristan Ye el = path_leaf_el(path);
908f603e56STristan Ye
918f603e56STristan Ye index = ocfs2_search_extent_list(el, cpos);
92981035b4SYingtai Xie if (index == -1) {
9317a5b9abSGoldwyn Rodrigues ret = ocfs2_error(inode->i_sb,
947ecef14aSJoe Perches "Inode %llu has an extent at cpos %u which can no longer be found\n",
958f603e56STristan Ye (unsigned long long)ino, cpos);
968f603e56STristan Ye goto out;
978f603e56STristan Ye }
988f603e56STristan Ye
998f603e56STristan Ye rec = &el->l_recs[index];
1008f603e56STristan Ye
1018f603e56STristan Ye BUG_ON(ext_flags != rec->e_flags);
1028f603e56STristan Ye /*
1038f603e56STristan Ye * after moving/defraging to new location, the extent is not going
1048f603e56STristan Ye * to be refcounted anymore.
1058f603e56STristan Ye */
1068f603e56STristan Ye replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED;
1078f603e56STristan Ye
1088f603e56STristan Ye ret = ocfs2_split_extent(handle, &context->et, path, index,
1098f603e56STristan Ye &replace_rec, context->meta_ac,
1108f603e56STristan Ye &context->dealloc);
1118f603e56STristan Ye if (ret) {
1128f603e56STristan Ye mlog_errno(ret);
1138f603e56STristan Ye goto out;
1148f603e56STristan Ye }
1158f603e56STristan Ye
1168f603e56STristan Ye context->new_phys_cpos = new_p_cpos;
1178f603e56STristan Ye
1188f603e56STristan Ye /*
1198f603e56STristan Ye * need I to append truncate log for old clusters?
1208f603e56STristan Ye */
1218f603e56STristan Ye if (old_blkno) {
1228f603e56STristan Ye if (ext_flags & OCFS2_EXT_REFCOUNTED)
1238f603e56STristan Ye ret = ocfs2_decrease_refcount(inode, handle,
1248f603e56STristan Ye ocfs2_blocks_to_clusters(osb->sb,
1258f603e56STristan Ye old_blkno),
1268f603e56STristan Ye len, context->meta_ac,
1278f603e56STristan Ye &context->dealloc, 1);
1288f603e56STristan Ye else
1298f603e56STristan Ye ret = ocfs2_truncate_log_append(osb, handle,
1308f603e56STristan Ye old_blkno, len);
1318f603e56STristan Ye }
1328f603e56STristan Ye
1336fdb702dSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, inode, 0);
1348f603e56STristan Ye out:
1354704aa30SJie Liu ocfs2_free_path(path);
1368f603e56STristan Ye return ret;
1378f603e56STristan Ye }
1388f603e56STristan Ye
139de474ee8STristan Ye /*
140e21e5744SLarry Chen * lock allocator, and reserve appropriate number of bits for
141e21e5744SLarry Chen * meta blocks.
142de474ee8STristan Ye */
ocfs2_lock_meta_allocator_move_extents(struct inode * inode,struct ocfs2_extent_tree * et,u32 clusters_to_move,u32 extents_to_split,struct ocfs2_alloc_context ** meta_ac,int extra_blocks,int * credits)143e21e5744SLarry Chen static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
144de474ee8STristan Ye struct ocfs2_extent_tree *et,
145de474ee8STristan Ye u32 clusters_to_move,
146de474ee8STristan Ye u32 extents_to_split,
147de474ee8STristan Ye struct ocfs2_alloc_context **meta_ac,
148de474ee8STristan Ye int extra_blocks,
149de474ee8STristan Ye int *credits)
150de474ee8STristan Ye {
151de474ee8STristan Ye int ret, num_free_extents;
152de474ee8STristan Ye unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move;
153de474ee8STristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
154de474ee8STristan Ye
155964f14a0SJun Piao num_free_extents = ocfs2_num_free_extents(et);
156de474ee8STristan Ye if (num_free_extents < 0) {
157de474ee8STristan Ye ret = num_free_extents;
158de474ee8STristan Ye mlog_errno(ret);
159de474ee8STristan Ye goto out;
160de474ee8STristan Ye }
161de474ee8STristan Ye
162de474ee8STristan Ye if (!num_free_extents ||
163de474ee8STristan Ye (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
164de474ee8STristan Ye extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
165de474ee8STristan Ye
166de474ee8STristan Ye ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac);
167de474ee8STristan Ye if (ret) {
168de474ee8STristan Ye mlog_errno(ret);
169de474ee8STristan Ye goto out;
170de474ee8STristan Ye }
171de474ee8STristan Ye
172de474ee8STristan Ye
17306f9da6eSGoldwyn Rodrigues *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
174de474ee8STristan Ye
175de474ee8STristan Ye mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n",
176de474ee8STristan Ye extra_blocks, clusters_to_move, *credits);
177de474ee8STristan Ye out:
178de474ee8STristan Ye if (ret) {
179de474ee8STristan Ye if (*meta_ac) {
180de474ee8STristan Ye ocfs2_free_alloc_context(*meta_ac);
181de474ee8STristan Ye *meta_ac = NULL;
182de474ee8STristan Ye }
183de474ee8STristan Ye }
184de474ee8STristan Ye
185de474ee8STristan Ye return ret;
186de474ee8STristan Ye }
187202ee5faSTristan Ye
188202ee5faSTristan Ye /*
189202ee5faSTristan Ye * Using one journal handle to guarantee the data consistency in case
190202ee5faSTristan Ye * crash happens anywhere.
191dda54e76STristan Ye *
192dda54e76STristan Ye * XXX: defrag can end up with finishing partial extent as requested,
193dda54e76STristan Ye * due to not enough contiguous clusters can be found in allocator.
194202ee5faSTristan Ye */
ocfs2_defrag_extent(struct ocfs2_move_extents_context * context,u32 cpos,u32 phys_cpos,u32 * len,int ext_flags)195202ee5faSTristan Ye static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
1964dfa66bdSTristan Ye u32 cpos, u32 phys_cpos, u32 *len, int ext_flags)
197202ee5faSTristan Ye {
1984dfa66bdSTristan Ye int ret, credits = 0, extra_blocks = 0, partial = context->partial;
199202ee5faSTristan Ye handle_t *handle;
200202ee5faSTristan Ye struct inode *inode = context->inode;
201202ee5faSTristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
202202ee5faSTristan Ye struct inode *tl_inode = osb->osb_tl_inode;
203202ee5faSTristan Ye struct ocfs2_refcount_tree *ref_tree = NULL;
204202ee5faSTristan Ye u32 new_phys_cpos, new_len;
205202ee5faSTristan Ye u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
2066194ae42SLarry Chen int need_free = 0;
207202ee5faSTristan Ye
2084dfa66bdSTristan Ye if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
20984e40080SDarrick J. Wong BUG_ON(!ocfs2_is_refcount_inode(inode));
210202ee5faSTristan Ye BUG_ON(!context->refcount_loc);
211202ee5faSTristan Ye
212202ee5faSTristan Ye ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
213202ee5faSTristan Ye &ref_tree, NULL);
214202ee5faSTristan Ye if (ret) {
215202ee5faSTristan Ye mlog_errno(ret);
216202ee5faSTristan Ye return ret;
217202ee5faSTristan Ye }
218202ee5faSTristan Ye
219202ee5faSTristan Ye ret = ocfs2_prepare_refcount_change_for_del(inode,
220202ee5faSTristan Ye context->refcount_loc,
221202ee5faSTristan Ye phys_blkno,
2224dfa66bdSTristan Ye *len,
223202ee5faSTristan Ye &credits,
224202ee5faSTristan Ye &extra_blocks);
225202ee5faSTristan Ye if (ret) {
226202ee5faSTristan Ye mlog_errno(ret);
227202ee5faSTristan Ye goto out;
228202ee5faSTristan Ye }
229202ee5faSTristan Ye }
230202ee5faSTristan Ye
231e21e5744SLarry Chen ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
232e21e5744SLarry Chen *len, 1,
233202ee5faSTristan Ye &context->meta_ac,
234202ee5faSTristan Ye extra_blocks, &credits);
235202ee5faSTristan Ye if (ret) {
236202ee5faSTristan Ye mlog_errno(ret);
237202ee5faSTristan Ye goto out;
238202ee5faSTristan Ye }
239202ee5faSTristan Ye
240202ee5faSTristan Ye /*
241202ee5faSTristan Ye * should be using allocation reservation strategy there?
242202ee5faSTristan Ye *
243202ee5faSTristan Ye * if (context->data_ac)
244202ee5faSTristan Ye * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
245202ee5faSTristan Ye */
246202ee5faSTristan Ye
2475955102cSAl Viro inode_lock(tl_inode);
248202ee5faSTristan Ye
249202ee5faSTristan Ye if (ocfs2_truncate_log_needs_flush(osb)) {
250202ee5faSTristan Ye ret = __ocfs2_flush_truncate_log(osb);
251202ee5faSTristan Ye if (ret < 0) {
252202ee5faSTristan Ye mlog_errno(ret);
253202ee5faSTristan Ye goto out_unlock_mutex;
254202ee5faSTristan Ye }
255202ee5faSTristan Ye }
256202ee5faSTristan Ye
257e21e5744SLarry Chen /*
258e21e5744SLarry Chen * Make sure ocfs2_reserve_cluster is called after
259e21e5744SLarry Chen * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
260e21e5744SLarry Chen *
261e21e5744SLarry Chen * If ocfs2_reserve_cluster is called
262e21e5744SLarry Chen * before __ocfs2_flush_truncate_log, dead lock on global bitmap
263e21e5744SLarry Chen * may happen.
264e21e5744SLarry Chen *
265e21e5744SLarry Chen */
266e21e5744SLarry Chen ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
267e21e5744SLarry Chen if (ret) {
268e21e5744SLarry Chen mlog_errno(ret);
269e21e5744SLarry Chen goto out_unlock_mutex;
270e21e5744SLarry Chen }
271e21e5744SLarry Chen
272202ee5faSTristan Ye handle = ocfs2_start_trans(osb, credits);
273202ee5faSTristan Ye if (IS_ERR(handle)) {
274202ee5faSTristan Ye ret = PTR_ERR(handle);
275202ee5faSTristan Ye mlog_errno(ret);
276202ee5faSTristan Ye goto out_unlock_mutex;
277202ee5faSTristan Ye }
278202ee5faSTristan Ye
2794dfa66bdSTristan Ye ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len,
280202ee5faSTristan Ye &new_phys_cpos, &new_len);
281202ee5faSTristan Ye if (ret) {
282202ee5faSTristan Ye mlog_errno(ret);
283202ee5faSTristan Ye goto out_commit;
284202ee5faSTristan Ye }
285202ee5faSTristan Ye
286202ee5faSTristan Ye /*
2874dfa66bdSTristan Ye * allowing partial extent moving is kind of 'pros and cons', it makes
2884dfa66bdSTristan Ye * whole defragmentation less likely to fail, on the contrary, the bad
2894dfa66bdSTristan Ye * thing is it may make the fs even more fragmented after moving, let
2904dfa66bdSTristan Ye * userspace make a good decision here.
291202ee5faSTristan Ye */
2924dfa66bdSTristan Ye if (new_len != *len) {
2934dfa66bdSTristan Ye mlog(0, "len_claimed: %u, len: %u\n", new_len, *len);
2944dfa66bdSTristan Ye if (!partial) {
295202ee5faSTristan Ye context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
296202ee5faSTristan Ye ret = -ENOSPC;
2976194ae42SLarry Chen need_free = 1;
298202ee5faSTristan Ye goto out_commit;
299202ee5faSTristan Ye }
3004dfa66bdSTristan Ye }
301202ee5faSTristan Ye
302202ee5faSTristan Ye mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos,
303202ee5faSTristan Ye phys_cpos, new_phys_cpos);
304202ee5faSTristan Ye
3054dfa66bdSTristan Ye ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos,
306202ee5faSTristan Ye new_phys_cpos, ext_flags);
307202ee5faSTristan Ye if (ret)
308202ee5faSTristan Ye mlog_errno(ret);
309202ee5faSTristan Ye
3104dfa66bdSTristan Ye if (partial && (new_len != *len))
3114dfa66bdSTristan Ye *len = new_len;
3124dfa66bdSTristan Ye
313202ee5faSTristan Ye /*
314202ee5faSTristan Ye * Here we should write the new page out first if we are
315202ee5faSTristan Ye * in write-back mode.
316202ee5faSTristan Ye */
3174dfa66bdSTristan Ye ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len);
318202ee5faSTristan Ye if (ret)
319202ee5faSTristan Ye mlog_errno(ret);
320202ee5faSTristan Ye
321202ee5faSTristan Ye out_commit:
3226194ae42SLarry Chen if (need_free && context->data_ac) {
3236194ae42SLarry Chen struct ocfs2_alloc_context *data_ac = context->data_ac;
3246194ae42SLarry Chen
3256194ae42SLarry Chen if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
3266194ae42SLarry Chen ocfs2_free_local_alloc_bits(osb, handle, data_ac,
3276194ae42SLarry Chen new_phys_cpos, new_len);
3286194ae42SLarry Chen else
3296194ae42SLarry Chen ocfs2_free_clusters(handle,
3306194ae42SLarry Chen data_ac->ac_inode,
3316194ae42SLarry Chen data_ac->ac_bh,
3326194ae42SLarry Chen ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
3336194ae42SLarry Chen new_len);
3346194ae42SLarry Chen }
3356194ae42SLarry Chen
336202ee5faSTristan Ye ocfs2_commit_trans(osb, handle);
337202ee5faSTristan Ye
338202ee5faSTristan Ye out_unlock_mutex:
3395955102cSAl Viro inode_unlock(tl_inode);
340202ee5faSTristan Ye
341202ee5faSTristan Ye if (context->data_ac) {
342202ee5faSTristan Ye ocfs2_free_alloc_context(context->data_ac);
343202ee5faSTristan Ye context->data_ac = NULL;
344202ee5faSTristan Ye }
345202ee5faSTristan Ye
346202ee5faSTristan Ye if (context->meta_ac) {
347202ee5faSTristan Ye ocfs2_free_alloc_context(context->meta_ac);
348202ee5faSTristan Ye context->meta_ac = NULL;
349202ee5faSTristan Ye }
350202ee5faSTristan Ye
351202ee5faSTristan Ye out:
352202ee5faSTristan Ye if (ref_tree)
353202ee5faSTristan Ye ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
354202ee5faSTristan Ye
355202ee5faSTristan Ye return ret;
356202ee5faSTristan Ye }
3571c06b912STristan Ye
3581c06b912STristan Ye /*
3591c06b912STristan Ye * find the victim alloc group, where #blkno fits.
3601c06b912STristan Ye */
ocfs2_find_victim_alloc_group(struct inode * inode,u64 vict_blkno,int type,int slot,int * vict_bit,struct buffer_head ** ret_bh)3611c06b912STristan Ye static int ocfs2_find_victim_alloc_group(struct inode *inode,
3621c06b912STristan Ye u64 vict_blkno,
3631c06b912STristan Ye int type, int slot,
3641c06b912STristan Ye int *vict_bit,
3651c06b912STristan Ye struct buffer_head **ret_bh)
3661c06b912STristan Ye {
3676aea6f50STristan Ye int ret, i, bits_per_unit = 0;
3681c06b912STristan Ye u64 blkno;
3691c06b912STristan Ye char namebuf[40];
3701c06b912STristan Ye
3711c06b912STristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3721c06b912STristan Ye struct buffer_head *ac_bh = NULL, *gd_bh = NULL;
3731c06b912STristan Ye struct ocfs2_chain_list *cl;
3741c06b912STristan Ye struct ocfs2_chain_rec *rec;
3751c06b912STristan Ye struct ocfs2_dinode *ac_dinode;
3761c06b912STristan Ye struct ocfs2_group_desc *bg;
3771c06b912STristan Ye
3781c06b912STristan Ye ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
3791c06b912STristan Ye ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
3801c06b912STristan Ye strlen(namebuf), &blkno);
3811c06b912STristan Ye if (ret) {
3821c06b912STristan Ye ret = -ENOENT;
3831c06b912STristan Ye goto out;
3841c06b912STristan Ye }
3851c06b912STristan Ye
3861c06b912STristan Ye ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh);
3871c06b912STristan Ye if (ret) {
3881c06b912STristan Ye mlog_errno(ret);
3891c06b912STristan Ye goto out;
3901c06b912STristan Ye }
3911c06b912STristan Ye
3921c06b912STristan Ye ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data;
3931c06b912STristan Ye cl = &(ac_dinode->id2.i_chain);
3941c06b912STristan Ye rec = &(cl->cl_recs[0]);
3951c06b912STristan Ye
3961c06b912STristan Ye if (type == GLOBAL_BITMAP_SYSTEM_INODE)
3976aea6f50STristan Ye bits_per_unit = osb->s_clustersize_bits -
3986aea6f50STristan Ye inode->i_sb->s_blocksize_bits;
3991c06b912STristan Ye /*
4001c06b912STristan Ye * 'vict_blkno' was out of the valid range.
4011c06b912STristan Ye */
4021c06b912STristan Ye if ((vict_blkno < le64_to_cpu(rec->c_blkno)) ||
4037fa05c6eSJoseph Qi (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
4046aea6f50STristan Ye bits_per_unit))) {
4051c06b912STristan Ye ret = -EINVAL;
4061c06b912STristan Ye goto out;
4071c06b912STristan Ye }
4081c06b912STristan Ye
4091c06b912STristan Ye for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {
4101c06b912STristan Ye
4111c06b912STristan Ye rec = &(cl->cl_recs[i]);
4121c06b912STristan Ye if (!rec)
4131c06b912STristan Ye continue;
4141c06b912STristan Ye
4151c06b912STristan Ye bg = NULL;
4161c06b912STristan Ye
4171c06b912STristan Ye do {
4181c06b912STristan Ye if (!bg)
4191c06b912STristan Ye blkno = le64_to_cpu(rec->c_blkno);
4201c06b912STristan Ye else
4211c06b912STristan Ye blkno = le64_to_cpu(bg->bg_next_group);
4221c06b912STristan Ye
4231c06b912STristan Ye if (gd_bh) {
4241c06b912STristan Ye brelse(gd_bh);
4251c06b912STristan Ye gd_bh = NULL;
4261c06b912STristan Ye }
4271c06b912STristan Ye
4281c06b912STristan Ye ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh);
4291c06b912STristan Ye if (ret) {
4301c06b912STristan Ye mlog_errno(ret);
4311c06b912STristan Ye goto out;
4321c06b912STristan Ye }
4331c06b912STristan Ye
4341c06b912STristan Ye bg = (struct ocfs2_group_desc *)gd_bh->b_data;
4351c06b912STristan Ye
4361c06b912STristan Ye if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
437236b9254SHeming Zhao via Ocfs2-devel (le16_to_cpu(bg->bg_bits) << bits_per_unit))) {
4381c06b912STristan Ye
4391c06b912STristan Ye *ret_bh = gd_bh;
4406aea6f50STristan Ye *vict_bit = (vict_blkno - blkno) >>
4416aea6f50STristan Ye bits_per_unit;
4421c06b912STristan Ye mlog(0, "find the victim group: #%llu, "
4431c06b912STristan Ye "total_bits: %u, vict_bit: %u\n",
4441c06b912STristan Ye blkno, le16_to_cpu(bg->bg_bits),
4451c06b912STristan Ye *vict_bit);
4461c06b912STristan Ye goto out;
4471c06b912STristan Ye }
4481c06b912STristan Ye
4491c06b912STristan Ye } while (le64_to_cpu(bg->bg_next_group));
4501c06b912STristan Ye }
4511c06b912STristan Ye
4521c06b912STristan Ye ret = -EINVAL;
4531c06b912STristan Ye out:
4541c06b912STristan Ye brelse(ac_bh);
4551c06b912STristan Ye
4561c06b912STristan Ye /*
4571c06b912STristan Ye * caller has to release the gd_bh properly.
4581c06b912STristan Ye */
4591c06b912STristan Ye return ret;
4601c06b912STristan Ye }
46199e4c750STristan Ye
46299e4c750STristan Ye /*
46399e4c750STristan Ye * XXX: helper to validate and adjust moving goal.
46499e4c750STristan Ye */
ocfs2_validate_and_adjust_move_goal(struct inode * inode,struct ocfs2_move_extents * range)46599e4c750STristan Ye static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
46699e4c750STristan Ye struct ocfs2_move_extents *range)
46799e4c750STristan Ye {
46899e4c750STristan Ye int ret, goal_bit = 0;
46999e4c750STristan Ye
47099e4c750STristan Ye struct buffer_head *gd_bh = NULL;
4717f4804d4SDan Carpenter struct ocfs2_group_desc *bg;
47299e4c750STristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
47399e4c750STristan Ye int c_to_b = 1 << (osb->s_clustersize_bits -
47499e4c750STristan Ye inode->i_sb->s_blocksize_bits);
47599e4c750STristan Ye
47699e4c750STristan Ye /*
477ea5e1675STristan Ye * make goal become cluster aligned.
478ea5e1675STristan Ye */
479ea5e1675STristan Ye range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb,
480ea5e1675STristan Ye range->me_goal);
481ea5e1675STristan Ye /*
48299e4c750STristan Ye * validate goal sits within global_bitmap, and return the victim
48399e4c750STristan Ye * group desc
48499e4c750STristan Ye */
48599e4c750STristan Ye ret = ocfs2_find_victim_alloc_group(inode, range->me_goal,
48699e4c750STristan Ye GLOBAL_BITMAP_SYSTEM_INODE,
48799e4c750STristan Ye OCFS2_INVALID_SLOT,
48899e4c750STristan Ye &goal_bit, &gd_bh);
48999e4c750STristan Ye if (ret)
49099e4c750STristan Ye goto out;
49199e4c750STristan Ye
49299e4c750STristan Ye bg = (struct ocfs2_group_desc *)gd_bh->b_data;
49399e4c750STristan Ye
49499e4c750STristan Ye /*
4957f4804d4SDan Carpenter * moving goal is not allowd to start with a group desc blok(#0 blk)
4967f4804d4SDan Carpenter * let's compromise to the latter cluster.
4977f4804d4SDan Carpenter */
4987f4804d4SDan Carpenter if (range->me_goal == le64_to_cpu(bg->bg_blkno))
4997f4804d4SDan Carpenter range->me_goal += c_to_b;
5007f4804d4SDan Carpenter
5017f4804d4SDan Carpenter /*
50299e4c750STristan Ye * movement is not gonna cross two groups.
50399e4c750STristan Ye */
50499e4c750STristan Ye if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize <
50599e4c750STristan Ye range->me_len) {
50699e4c750STristan Ye ret = -EINVAL;
50799e4c750STristan Ye goto out;
50899e4c750STristan Ye }
50999e4c750STristan Ye /*
51099e4c750STristan Ye * more exact validations/adjustments will be performed later during
51199e4c750STristan Ye * moving operation for each extent range.
51299e4c750STristan Ye */
51399e4c750STristan Ye mlog(0, "extents get ready to be moved to #%llu block\n",
51499e4c750STristan Ye range->me_goal);
51599e4c750STristan Ye
51699e4c750STristan Ye out:
51799e4c750STristan Ye brelse(gd_bh);
51899e4c750STristan Ye
51999e4c750STristan Ye return ret;
52099e4c750STristan Ye }
521e6b5859cSTristan Ye
ocfs2_probe_alloc_group(struct inode * inode,struct buffer_head * bh,int * goal_bit,u32 move_len,u32 max_hop,u32 * phys_cpos)522e6b5859cSTristan Ye static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
523e6b5859cSTristan Ye int *goal_bit, u32 move_len, u32 max_hop,
524e6b5859cSTristan Ye u32 *phys_cpos)
525e6b5859cSTristan Ye {
526e6b5859cSTristan Ye int i, used, last_free_bits = 0, base_bit = *goal_bit;
527e6b5859cSTristan Ye struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
528e6b5859cSTristan Ye u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
529e6b5859cSTristan Ye le64_to_cpu(gd->bg_blkno));
530e6b5859cSTristan Ye
531e6b5859cSTristan Ye for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) {
532e6b5859cSTristan Ye
533e6b5859cSTristan Ye used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap);
534e6b5859cSTristan Ye if (used) {
535e6b5859cSTristan Ye /*
536e6b5859cSTristan Ye * we even tried searching the free chunk by jumping
537e6b5859cSTristan Ye * a 'max_hop' distance, but still failed.
538e6b5859cSTristan Ye */
539e6b5859cSTristan Ye if ((i - base_bit) > max_hop) {
540e6b5859cSTristan Ye *phys_cpos = 0;
541e6b5859cSTristan Ye break;
542e6b5859cSTristan Ye }
543e6b5859cSTristan Ye
544e6b5859cSTristan Ye if (last_free_bits)
545e6b5859cSTristan Ye last_free_bits = 0;
546e6b5859cSTristan Ye
547e6b5859cSTristan Ye continue;
548e6b5859cSTristan Ye } else
549e6b5859cSTristan Ye last_free_bits++;
550e6b5859cSTristan Ye
551e6b5859cSTristan Ye if (last_free_bits == move_len) {
552236b9254SHeming Zhao via Ocfs2-devel i -= move_len;
553e6b5859cSTristan Ye *goal_bit = i;
554e6b5859cSTristan Ye *phys_cpos = base_cpos + i;
555e6b5859cSTristan Ye break;
556e6b5859cSTristan Ye }
557e6b5859cSTristan Ye }
558e6b5859cSTristan Ye
559e6b5859cSTristan Ye mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
560e6b5859cSTristan Ye }
5618473aa8aSTristan Ye
ocfs2_move_extent(struct ocfs2_move_extents_context * context,u32 cpos,u32 phys_cpos,u32 * new_phys_cpos,u32 len,int ext_flags)562e0847717STristan Ye static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
563e0847717STristan Ye u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
564e0847717STristan Ye u32 len, int ext_flags)
565e0847717STristan Ye {
566e0847717STristan Ye int ret, credits = 0, extra_blocks = 0, goal_bit = 0;
567e0847717STristan Ye handle_t *handle;
568e0847717STristan Ye struct inode *inode = context->inode;
569e0847717STristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
570e0847717STristan Ye struct inode *tl_inode = osb->osb_tl_inode;
571e0847717STristan Ye struct inode *gb_inode = NULL;
572e0847717STristan Ye struct buffer_head *gb_bh = NULL;
573e0847717STristan Ye struct buffer_head *gd_bh = NULL;
574e0847717STristan Ye struct ocfs2_group_desc *gd;
575e0847717STristan Ye struct ocfs2_refcount_tree *ref_tree = NULL;
576e0847717STristan Ye u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb,
577e0847717STristan Ye context->range->me_threshold);
578e0847717STristan Ye u64 phys_blkno, new_phys_blkno;
579e0847717STristan Ye
580e0847717STristan Ye phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
581e0847717STristan Ye
582e0847717STristan Ye if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) {
58384e40080SDarrick J. Wong BUG_ON(!ocfs2_is_refcount_inode(inode));
584e0847717STristan Ye BUG_ON(!context->refcount_loc);
585e0847717STristan Ye
586e0847717STristan Ye ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
587e0847717STristan Ye &ref_tree, NULL);
588e0847717STristan Ye if (ret) {
589e0847717STristan Ye mlog_errno(ret);
590e0847717STristan Ye return ret;
591e0847717STristan Ye }
592e0847717STristan Ye
593e0847717STristan Ye ret = ocfs2_prepare_refcount_change_for_del(inode,
594e0847717STristan Ye context->refcount_loc,
595e0847717STristan Ye phys_blkno,
596e0847717STristan Ye len,
597e0847717STristan Ye &credits,
598e0847717STristan Ye &extra_blocks);
599e0847717STristan Ye if (ret) {
600e0847717STristan Ye mlog_errno(ret);
601e0847717STristan Ye goto out;
602e0847717STristan Ye }
603e0847717STristan Ye }
604e0847717STristan Ye
605e21e5744SLarry Chen ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
606e21e5744SLarry Chen len, 1,
607e0847717STristan Ye &context->meta_ac,
608e21e5744SLarry Chen extra_blocks, &credits);
609e0847717STristan Ye if (ret) {
610e0847717STristan Ye mlog_errno(ret);
611e0847717STristan Ye goto out;
612e0847717STristan Ye }
613e0847717STristan Ye
614e0847717STristan Ye /*
615e0847717STristan Ye * need to count 2 extra credits for global_bitmap inode and
616e0847717STristan Ye * group descriptor.
617e0847717STristan Ye */
618e0847717STristan Ye credits += OCFS2_INODE_UPDATE_CREDITS + 1;
619e0847717STristan Ye
620e0847717STristan Ye /*
621e0847717STristan Ye * ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
622e0847717STristan Ye * logic, while we still need to lock the global_bitmap.
623e0847717STristan Ye */
624e0847717STristan Ye gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE,
625e0847717STristan Ye OCFS2_INVALID_SLOT);
626e0847717STristan Ye if (!gb_inode) {
627e0847717STristan Ye mlog(ML_ERROR, "unable to get global_bitmap inode\n");
628e0847717STristan Ye ret = -EIO;
629e0847717STristan Ye goto out;
630e0847717STristan Ye }
631e0847717STristan Ye
6325955102cSAl Viro inode_lock(gb_inode);
633e0847717STristan Ye
634e0847717STristan Ye ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
635e0847717STristan Ye if (ret) {
636e0847717STristan Ye mlog_errno(ret);
637e0847717STristan Ye goto out_unlock_gb_mutex;
638e0847717STristan Ye }
639e0847717STristan Ye
6405955102cSAl Viro inode_lock(tl_inode);
641e0847717STristan Ye
642e0847717STristan Ye handle = ocfs2_start_trans(osb, credits);
643e0847717STristan Ye if (IS_ERR(handle)) {
644e0847717STristan Ye ret = PTR_ERR(handle);
645e0847717STristan Ye mlog_errno(ret);
646e0847717STristan Ye goto out_unlock_tl_inode;
647e0847717STristan Ye }
648e0847717STristan Ye
649e0847717STristan Ye new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
650e0847717STristan Ye ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno,
651e0847717STristan Ye GLOBAL_BITMAP_SYSTEM_INODE,
652e0847717STristan Ye OCFS2_INVALID_SLOT,
653e0847717STristan Ye &goal_bit, &gd_bh);
654e0847717STristan Ye if (ret) {
655e0847717STristan Ye mlog_errno(ret);
656e0847717STristan Ye goto out_commit;
657e0847717STristan Ye }
658e0847717STristan Ye
659e0847717STristan Ye /*
660e0847717STristan Ye * probe the victim cluster group to find a proper
661e0847717STristan Ye * region to fit wanted movement, it even will perfrom
662e0847717STristan Ye * a best-effort attempt by compromising to a threshold
663e0847717STristan Ye * around the goal.
664e0847717STristan Ye */
665e0847717STristan Ye ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop,
666e0847717STristan Ye new_phys_cpos);
6673d75be7cSDan Carpenter if (!*new_phys_cpos) {
668e0847717STristan Ye ret = -ENOSPC;
669e0847717STristan Ye goto out_commit;
670e0847717STristan Ye }
671e0847717STristan Ye
672e0847717STristan Ye ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos,
673e0847717STristan Ye *new_phys_cpos, ext_flags);
674e0847717STristan Ye if (ret) {
675e0847717STristan Ye mlog_errno(ret);
676e0847717STristan Ye goto out_commit;
677e0847717STristan Ye }
678e0847717STristan Ye
679e0847717STristan Ye gd = (struct ocfs2_group_desc *)gd_bh->b_data;
680e0847717STristan Ye ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len,
681e0847717STristan Ye le16_to_cpu(gd->bg_chain));
682e0847717STristan Ye if (ret) {
683e0847717STristan Ye mlog_errno(ret);
684e0847717STristan Ye goto out_commit;
685e0847717STristan Ye }
686e0847717STristan Ye
687e0847717STristan Ye ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
688e0847717STristan Ye goal_bit, len);
689db66c715SYounger Liu if (ret) {
690db66c715SYounger Liu ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
691db66c715SYounger Liu le16_to_cpu(gd->bg_chain));
692e0847717STristan Ye mlog_errno(ret);
693db66c715SYounger Liu }
694e0847717STristan Ye
695e0847717STristan Ye /*
696e0847717STristan Ye * Here we should write the new page out first if we are
697e0847717STristan Ye * in write-back mode.
698e0847717STristan Ye */
699e0847717STristan Ye ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len);
700e0847717STristan Ye if (ret)
701e0847717STristan Ye mlog_errno(ret);
702e0847717STristan Ye
703e0847717STristan Ye out_commit:
704e0847717STristan Ye ocfs2_commit_trans(osb, handle);
705e0847717STristan Ye brelse(gd_bh);
706e0847717STristan Ye
707e0847717STristan Ye out_unlock_tl_inode:
7085955102cSAl Viro inode_unlock(tl_inode);
709e0847717STristan Ye
710e0847717STristan Ye ocfs2_inode_unlock(gb_inode, 1);
711e0847717STristan Ye out_unlock_gb_mutex:
7125955102cSAl Viro inode_unlock(gb_inode);
713e0847717STristan Ye brelse(gb_bh);
714e0847717STristan Ye iput(gb_inode);
715e0847717STristan Ye
716e0847717STristan Ye out:
717e0847717STristan Ye if (context->meta_ac) {
718e0847717STristan Ye ocfs2_free_alloc_context(context->meta_ac);
719e0847717STristan Ye context->meta_ac = NULL;
720e0847717STristan Ye }
721e0847717STristan Ye
722e0847717STristan Ye if (ref_tree)
723e0847717STristan Ye ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
724e0847717STristan Ye
725e0847717STristan Ye return ret;
726e0847717STristan Ye }
727ee16cc03STristan Ye
728ee16cc03STristan Ye /*
729ee16cc03STristan Ye * Helper to calculate the defraging length in one run according to threshold.
730ee16cc03STristan Ye */
ocfs2_calc_extent_defrag_len(u32 * alloc_size,u32 * len_defraged,u32 threshold,int * skip)731ee16cc03STristan Ye static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged,
732ee16cc03STristan Ye u32 threshold, int *skip)
733ee16cc03STristan Ye {
734ee16cc03STristan Ye if ((*alloc_size + *len_defraged) < threshold) {
735ee16cc03STristan Ye /*
736ee16cc03STristan Ye * proceed defragmentation until we meet the thresh
737ee16cc03STristan Ye */
738ee16cc03STristan Ye *len_defraged += *alloc_size;
739ee16cc03STristan Ye } else if (*len_defraged == 0) {
740ee16cc03STristan Ye /*
741ee16cc03STristan Ye * XXX: skip a large extent.
742ee16cc03STristan Ye */
743ee16cc03STristan Ye *skip = 1;
744ee16cc03STristan Ye } else {
745ee16cc03STristan Ye /*
746ee16cc03STristan Ye * split this extent to coalesce with former pieces as
747ee16cc03STristan Ye * to reach the threshold.
748ee16cc03STristan Ye *
749ee16cc03STristan Ye * we're done here with one cycle of defragmentation
750ee16cc03STristan Ye * in a size of 'thresh', resetting 'len_defraged'
751ee16cc03STristan Ye * forces a new defragmentation.
752ee16cc03STristan Ye */
753ee16cc03STristan Ye *alloc_size = threshold - *len_defraged;
754ee16cc03STristan Ye *len_defraged = 0;
755ee16cc03STristan Ye }
756ee16cc03STristan Ye }
75753069d4eSTristan Ye
__ocfs2_move_extents_range(struct buffer_head * di_bh,struct ocfs2_move_extents_context * context)75853069d4eSTristan Ye static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
75953069d4eSTristan Ye struct ocfs2_move_extents_context *context)
76053069d4eSTristan Ye {
76153069d4eSTristan Ye int ret = 0, flags, do_defrag, skip = 0;
76253069d4eSTristan Ye u32 cpos, phys_cpos, move_start, len_to_move, alloc_size;
76353069d4eSTristan Ye u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0;
76453069d4eSTristan Ye
76553069d4eSTristan Ye struct inode *inode = context->inode;
76653069d4eSTristan Ye struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
76753069d4eSTristan Ye struct ocfs2_move_extents *range = context->range;
76853069d4eSTristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
76953069d4eSTristan Ye
770f17c20ddSJunxiao Bi if ((i_size_read(inode) == 0) || (range->me_len == 0))
77153069d4eSTristan Ye return 0;
77253069d4eSTristan Ye
77353069d4eSTristan Ye if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
77453069d4eSTristan Ye return 0;
77553069d4eSTristan Ye
77653069d4eSTristan Ye context->refcount_loc = le64_to_cpu(di->i_refcount_loc);
77753069d4eSTristan Ye
77853069d4eSTristan Ye ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh);
77953069d4eSTristan Ye ocfs2_init_dealloc_ctxt(&context->dealloc);
78053069d4eSTristan Ye
78153069d4eSTristan Ye /*
78253069d4eSTristan Ye * TO-DO XXX:
78353069d4eSTristan Ye *
78453069d4eSTristan Ye * - xattr extents.
78553069d4eSTristan Ye */
78653069d4eSTristan Ye
78753069d4eSTristan Ye do_defrag = context->auto_defrag;
78853069d4eSTristan Ye
78953069d4eSTristan Ye /*
79053069d4eSTristan Ye * extents moving happens in unit of clusters, for the sake
79153069d4eSTristan Ye * of simplicity, we may ignore two clusters where 'byte_start'
79253069d4eSTristan Ye * and 'byte_start + len' were within.
79353069d4eSTristan Ye */
79453069d4eSTristan Ye move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start);
79553069d4eSTristan Ye len_to_move = (range->me_start + range->me_len) >>
79653069d4eSTristan Ye osb->s_clustersize_bits;
79753069d4eSTristan Ye if (len_to_move >= move_start)
79853069d4eSTristan Ye len_to_move -= move_start;
79953069d4eSTristan Ye else
80053069d4eSTristan Ye len_to_move = 0;
80153069d4eSTristan Ye
802dda54e76STristan Ye if (do_defrag) {
80353069d4eSTristan Ye defrag_thresh = range->me_threshold >> osb->s_clustersize_bits;
804dda54e76STristan Ye if (defrag_thresh <= 1)
805dda54e76STristan Ye goto done;
806dda54e76STristan Ye } else
80753069d4eSTristan Ye new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
80853069d4eSTristan Ye range->me_goal);
80953069d4eSTristan Ye
81053069d4eSTristan Ye mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, "
81153069d4eSTristan Ye "thresh: %u\n",
81253069d4eSTristan Ye (unsigned long long)OCFS2_I(inode)->ip_blkno,
81353069d4eSTristan Ye (unsigned long long)range->me_start,
81453069d4eSTristan Ye (unsigned long long)range->me_len,
81553069d4eSTristan Ye move_start, len_to_move, defrag_thresh);
81653069d4eSTristan Ye
81753069d4eSTristan Ye cpos = move_start;
81853069d4eSTristan Ye while (len_to_move) {
81953069d4eSTristan Ye ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size,
82053069d4eSTristan Ye &flags);
82153069d4eSTristan Ye if (ret) {
82253069d4eSTristan Ye mlog_errno(ret);
82353069d4eSTristan Ye goto out;
82453069d4eSTristan Ye }
82553069d4eSTristan Ye
82653069d4eSTristan Ye if (alloc_size > len_to_move)
82753069d4eSTristan Ye alloc_size = len_to_move;
82853069d4eSTristan Ye
82953069d4eSTristan Ye /*
83053069d4eSTristan Ye * XXX: how to deal with a hole:
83153069d4eSTristan Ye *
83253069d4eSTristan Ye * - skip the hole of course
83353069d4eSTristan Ye * - force a new defragmentation
83453069d4eSTristan Ye */
83553069d4eSTristan Ye if (!phys_cpos) {
83653069d4eSTristan Ye if (do_defrag)
83753069d4eSTristan Ye len_defraged = 0;
83853069d4eSTristan Ye
83953069d4eSTristan Ye goto next;
84053069d4eSTristan Ye }
84153069d4eSTristan Ye
84253069d4eSTristan Ye if (do_defrag) {
84353069d4eSTristan Ye ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged,
84453069d4eSTristan Ye defrag_thresh, &skip);
84553069d4eSTristan Ye /*
84653069d4eSTristan Ye * skip large extents
84753069d4eSTristan Ye */
84853069d4eSTristan Ye if (skip) {
84953069d4eSTristan Ye skip = 0;
85053069d4eSTristan Ye goto next;
85153069d4eSTristan Ye }
85253069d4eSTristan Ye
85353069d4eSTristan Ye mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, "
85453069d4eSTristan Ye "alloc_size: %u, len_defraged: %u\n",
85553069d4eSTristan Ye cpos, phys_cpos, alloc_size, len_defraged);
85653069d4eSTristan Ye
85753069d4eSTristan Ye ret = ocfs2_defrag_extent(context, cpos, phys_cpos,
8584dfa66bdSTristan Ye &alloc_size, flags);
85953069d4eSTristan Ye } else {
86053069d4eSTristan Ye ret = ocfs2_move_extent(context, cpos, phys_cpos,
86153069d4eSTristan Ye &new_phys_cpos, alloc_size,
86253069d4eSTristan Ye flags);
86353069d4eSTristan Ye
86453069d4eSTristan Ye new_phys_cpos += alloc_size;
86553069d4eSTristan Ye }
86653069d4eSTristan Ye
86753069d4eSTristan Ye if (ret < 0) {
86853069d4eSTristan Ye mlog_errno(ret);
86953069d4eSTristan Ye goto out;
87053069d4eSTristan Ye }
87153069d4eSTristan Ye
87253069d4eSTristan Ye context->clusters_moved += alloc_size;
87353069d4eSTristan Ye next:
87453069d4eSTristan Ye cpos += alloc_size;
87553069d4eSTristan Ye len_to_move -= alloc_size;
87653069d4eSTristan Ye }
87753069d4eSTristan Ye
878dda54e76STristan Ye done:
87953069d4eSTristan Ye range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE;
88053069d4eSTristan Ye
88153069d4eSTristan Ye out:
88253069d4eSTristan Ye range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb,
88353069d4eSTristan Ye context->clusters_moved);
88453069d4eSTristan Ye range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb,
88553069d4eSTristan Ye context->new_phys_cpos);
88653069d4eSTristan Ye
88753069d4eSTristan Ye ocfs2_schedule_truncate_log_flush(osb, 1);
88853069d4eSTristan Ye ocfs2_run_deallocs(osb, &context->dealloc);
88953069d4eSTristan Ye
89053069d4eSTristan Ye return ret;
89153069d4eSTristan Ye }
89253069d4eSTristan Ye
ocfs2_move_extents(struct ocfs2_move_extents_context * context)89353069d4eSTristan Ye static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
89453069d4eSTristan Ye {
89553069d4eSTristan Ye int status;
89653069d4eSTristan Ye handle_t *handle;
89753069d4eSTristan Ye struct inode *inode = context->inode;
89853069d4eSTristan Ye struct ocfs2_dinode *di;
89953069d4eSTristan Ye struct buffer_head *di_bh = NULL;
90053069d4eSTristan Ye struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
90153069d4eSTristan Ye
90253069d4eSTristan Ye if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
90353069d4eSTristan Ye return -EROFS;
90453069d4eSTristan Ye
9055955102cSAl Viro inode_lock(inode);
90653069d4eSTristan Ye
90753069d4eSTristan Ye /*
90853069d4eSTristan Ye * This prevents concurrent writes from other nodes
90953069d4eSTristan Ye */
91053069d4eSTristan Ye status = ocfs2_rw_lock(inode, 1);
91153069d4eSTristan Ye if (status) {
91253069d4eSTristan Ye mlog_errno(status);
91353069d4eSTristan Ye goto out;
91453069d4eSTristan Ye }
91553069d4eSTristan Ye
91653069d4eSTristan Ye status = ocfs2_inode_lock(inode, &di_bh, 1);
91753069d4eSTristan Ye if (status) {
91853069d4eSTristan Ye mlog_errno(status);
91953069d4eSTristan Ye goto out_rw_unlock;
92053069d4eSTristan Ye }
92153069d4eSTristan Ye
92253069d4eSTristan Ye /*
92353069d4eSTristan Ye * rememer ip_xattr_sem also needs to be held if necessary
92453069d4eSTristan Ye */
92553069d4eSTristan Ye down_write(&OCFS2_I(inode)->ip_alloc_sem);
92653069d4eSTristan Ye
92753069d4eSTristan Ye status = __ocfs2_move_extents_range(di_bh, context);
92853069d4eSTristan Ye
92953069d4eSTristan Ye up_write(&OCFS2_I(inode)->ip_alloc_sem);
93053069d4eSTristan Ye if (status) {
93153069d4eSTristan Ye mlog_errno(status);
93253069d4eSTristan Ye goto out_inode_unlock;
93353069d4eSTristan Ye }
93453069d4eSTristan Ye
93553069d4eSTristan Ye /*
93653069d4eSTristan Ye * We update ctime for these changes
93753069d4eSTristan Ye */
93853069d4eSTristan Ye handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
93953069d4eSTristan Ye if (IS_ERR(handle)) {
94053069d4eSTristan Ye status = PTR_ERR(handle);
94153069d4eSTristan Ye mlog_errno(status);
94253069d4eSTristan Ye goto out_inode_unlock;
94353069d4eSTristan Ye }
94453069d4eSTristan Ye
94553069d4eSTristan Ye status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
94653069d4eSTristan Ye OCFS2_JOURNAL_ACCESS_WRITE);
94753069d4eSTristan Ye if (status) {
94853069d4eSTristan Ye mlog_errno(status);
94953069d4eSTristan Ye goto out_commit;
95053069d4eSTristan Ye }
95153069d4eSTristan Ye
95253069d4eSTristan Ye di = (struct ocfs2_dinode *)di_bh->b_data;
9536861de97SJeff Layton inode_set_ctime_current(inode);
954*10fc3a18SJeff Layton di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
955*10fc3a18SJeff Layton di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
9566fdb702dSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, inode, 0);
95753069d4eSTristan Ye
95853069d4eSTristan Ye ocfs2_journal_dirty(handle, di_bh);
95953069d4eSTristan Ye
96053069d4eSTristan Ye out_commit:
96153069d4eSTristan Ye ocfs2_commit_trans(osb, handle);
96253069d4eSTristan Ye
96353069d4eSTristan Ye out_inode_unlock:
96453069d4eSTristan Ye brelse(di_bh);
96553069d4eSTristan Ye ocfs2_inode_unlock(inode, 1);
96653069d4eSTristan Ye out_rw_unlock:
96753069d4eSTristan Ye ocfs2_rw_unlock(inode, 1);
96853069d4eSTristan Ye out:
9695955102cSAl Viro inode_unlock(inode);
97053069d4eSTristan Ye
97153069d4eSTristan Ye return status;
97253069d4eSTristan Ye }
97353069d4eSTristan Ye
ocfs2_ioctl_move_extents(struct file * filp,void __user * argp)97453069d4eSTristan Ye int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
97553069d4eSTristan Ye {
97653069d4eSTristan Ye int status;
97753069d4eSTristan Ye
978496ad9aaSAl Viro struct inode *inode = file_inode(filp);
97953069d4eSTristan Ye struct ocfs2_move_extents range;
98085a258b7SDan Carpenter struct ocfs2_move_extents_context *context;
98185a258b7SDan Carpenter
98285a258b7SDan Carpenter if (!argp)
98385a258b7SDan Carpenter return -EINVAL;
98453069d4eSTristan Ye
985a561be71SAl Viro status = mnt_want_write_file(filp);
98653069d4eSTristan Ye if (status)
98753069d4eSTristan Ye return status;
98853069d4eSTristan Ye
989bfbca926SYounger Liu if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) {
990bfbca926SYounger Liu status = -EPERM;
99185a258b7SDan Carpenter goto out_drop;
992bfbca926SYounger Liu }
99353069d4eSTristan Ye
99453069d4eSTristan Ye if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
99553069d4eSTristan Ye status = -EPERM;
99685a258b7SDan Carpenter goto out_drop;
99753069d4eSTristan Ye }
99853069d4eSTristan Ye
99953069d4eSTristan Ye context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS);
100053069d4eSTristan Ye if (!context) {
100153069d4eSTristan Ye status = -ENOMEM;
100253069d4eSTristan Ye mlog_errno(status);
100385a258b7SDan Carpenter goto out_drop;
100453069d4eSTristan Ye }
100553069d4eSTristan Ye
100653069d4eSTristan Ye context->inode = inode;
100753069d4eSTristan Ye context->file = filp;
100853069d4eSTristan Ye
1009f6a56903SAl Viro if (copy_from_user(&range, argp, sizeof(range))) {
101053069d4eSTristan Ye status = -EFAULT;
101185a258b7SDan Carpenter goto out_free;
101253069d4eSTristan Ye }
101353069d4eSTristan Ye
1014bfbca926SYounger Liu if (range.me_start > i_size_read(inode)) {
1015bfbca926SYounger Liu status = -EINVAL;
101685a258b7SDan Carpenter goto out_free;
1017bfbca926SYounger Liu }
101853069d4eSTristan Ye
101953069d4eSTristan Ye if (range.me_start + range.me_len > i_size_read(inode))
102053069d4eSTristan Ye range.me_len = i_size_read(inode) - range.me_start;
102153069d4eSTristan Ye
102253069d4eSTristan Ye context->range = ⦥
102353069d4eSTristan Ye
102453069d4eSTristan Ye /*
102553069d4eSTristan Ye * ok, the default theshold for the defragmentation
102653069d4eSTristan Ye * is 1M, since our maximum clustersize was 1M also.
102753069d4eSTristan Ye * any thought?
102853069d4eSTristan Ye */
1029dda54e76STristan Ye if (!range.me_threshold)
103053069d4eSTristan Ye range.me_threshold = 1024 * 1024;
1031dda54e76STristan Ye
1032dda54e76STristan Ye if (range.me_threshold > i_size_read(inode))
1033dda54e76STristan Ye range.me_threshold = i_size_read(inode);
1034dda54e76STristan Ye
1035236b9254SHeming Zhao via Ocfs2-devel if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
1036236b9254SHeming Zhao via Ocfs2-devel context->auto_defrag = 1;
1037236b9254SHeming Zhao via Ocfs2-devel
10384dfa66bdSTristan Ye if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
10394dfa66bdSTristan Ye context->partial = 1;
104053069d4eSTristan Ye } else {
104153069d4eSTristan Ye /*
104253069d4eSTristan Ye * first best-effort attempt to validate and adjust the goal
104353069d4eSTristan Ye * (physical address in block), while it can't guarantee later
104453069d4eSTristan Ye * operation can succeed all the time since global_bitmap may
104553069d4eSTristan Ye * change a bit over time.
104653069d4eSTristan Ye */
104753069d4eSTristan Ye
104853069d4eSTristan Ye status = ocfs2_validate_and_adjust_move_goal(inode, &range);
104953069d4eSTristan Ye if (status)
105085a258b7SDan Carpenter goto out_copy;
105153069d4eSTristan Ye }
105253069d4eSTristan Ye
105353069d4eSTristan Ye status = ocfs2_move_extents(context);
105453069d4eSTristan Ye if (status)
105553069d4eSTristan Ye mlog_errno(status);
105685a258b7SDan Carpenter out_copy:
105753069d4eSTristan Ye /*
105853069d4eSTristan Ye * movement/defragmentation may end up being partially completed,
105953069d4eSTristan Ye * that's the reason why we need to return userspace the finished
106053069d4eSTristan Ye * length and new_offset even if failure happens somewhere.
106153069d4eSTristan Ye */
1062f6a56903SAl Viro if (copy_to_user(argp, &range, sizeof(range)))
106353069d4eSTristan Ye status = -EFAULT;
106453069d4eSTristan Ye
106585a258b7SDan Carpenter out_free:
106653069d4eSTristan Ye kfree(context);
106785a258b7SDan Carpenter out_drop:
10682a79f17eSAl Viro mnt_drop_write_file(filp);
106953069d4eSTristan Ye
107053069d4eSTristan Ye return status;
107153069d4eSTristan Ye }
1072