1328970deSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2fa60ce2cSMasahiro Yamada /*
3ccd979bdSMark Fasheh * localalloc.c
4ccd979bdSMark Fasheh *
5ccd979bdSMark Fasheh * Node local data allocation
6ccd979bdSMark Fasheh *
7ccd979bdSMark Fasheh * Copyright (C) 2002, 2004 Oracle. All rights reserved.
8ccd979bdSMark Fasheh */
9ccd979bdSMark Fasheh
10ccd979bdSMark Fasheh #include <linux/fs.h>
11ccd979bdSMark Fasheh #include <linux/types.h>
12ccd979bdSMark Fasheh #include <linux/slab.h>
13ccd979bdSMark Fasheh #include <linux/highmem.h>
14ccd979bdSMark Fasheh #include <linux/bitops.h>
15ccd979bdSMark Fasheh
16ccd979bdSMark Fasheh #include <cluster/masklog.h>
17ccd979bdSMark Fasheh
18ccd979bdSMark Fasheh #include "ocfs2.h"
19ccd979bdSMark Fasheh
20ccd979bdSMark Fasheh #include "alloc.h"
2113723d00SJoel Becker #include "blockcheck.h"
22ccd979bdSMark Fasheh #include "dlmglue.h"
23ccd979bdSMark Fasheh #include "inode.h"
24ccd979bdSMark Fasheh #include "journal.h"
25ccd979bdSMark Fasheh #include "localalloc.h"
26ccd979bdSMark Fasheh #include "suballoc.h"
27ccd979bdSMark Fasheh #include "super.h"
28ccd979bdSMark Fasheh #include "sysfile.h"
29a04733d8STao Ma #include "ocfs2_trace.h"
30ccd979bdSMark Fasheh
31ccd979bdSMark Fasheh #include "buffer_head_io.h"
32ccd979bdSMark Fasheh
33ccd979bdSMark Fasheh #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
34ccd979bdSMark Fasheh
35ccd979bdSMark Fasheh static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
36ccd979bdSMark Fasheh
37ccd979bdSMark Fasheh static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
38ccd979bdSMark Fasheh struct ocfs2_dinode *alloc,
39d02f00ccSMark Fasheh u32 *numbits,
40d02f00ccSMark Fasheh struct ocfs2_alloc_reservation *resv);
41ccd979bdSMark Fasheh
42ccd979bdSMark Fasheh static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
43ccd979bdSMark Fasheh
44ccd979bdSMark Fasheh static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
451fabe148SMark Fasheh handle_t *handle,
46ccd979bdSMark Fasheh struct ocfs2_dinode *alloc,
47ccd979bdSMark Fasheh struct inode *main_bm_inode,
48ccd979bdSMark Fasheh struct buffer_head *main_bm_bh);
49ccd979bdSMark Fasheh
50ccd979bdSMark Fasheh static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
51ccd979bdSMark Fasheh struct ocfs2_alloc_context **ac,
52ccd979bdSMark Fasheh struct inode **bitmap_inode,
53ccd979bdSMark Fasheh struct buffer_head **bitmap_bh);
54ccd979bdSMark Fasheh
55ccd979bdSMark Fasheh static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
561fabe148SMark Fasheh handle_t *handle,
57ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac);
58ccd979bdSMark Fasheh
59ccd979bdSMark Fasheh static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
60ccd979bdSMark Fasheh struct inode *local_alloc_inode);
61ccd979bdSMark Fasheh
626b82021bSMark Fasheh /*
636b82021bSMark Fasheh * ocfs2_la_default_mb() - determine a default size, in megabytes of
646b82021bSMark Fasheh * the local alloc.
656b82021bSMark Fasheh *
666b82021bSMark Fasheh * Generally, we'd like to pick as large a local alloc as
676b82021bSMark Fasheh * possible. Performance on large workloads tends to scale
686b82021bSMark Fasheh * proportionally to la size. In addition to that, the reservations
696b82021bSMark Fasheh * code functions more efficiently as it can reserve more windows for
706b82021bSMark Fasheh * write.
716b82021bSMark Fasheh *
726b82021bSMark Fasheh * Some things work against us when trying to choose a large local alloc:
736b82021bSMark Fasheh *
746b82021bSMark Fasheh * - We need to ensure our sizing is picked to leave enough space in
756b82021bSMark Fasheh * group descriptors for other allocations (such as block groups,
766b82021bSMark Fasheh * etc). Picking default sizes which are a multiple of 4 could help
776b82021bSMark Fasheh * - block groups are allocated in 2mb and 4mb chunks.
786b82021bSMark Fasheh *
796b82021bSMark Fasheh * - Likewise, we don't want to starve other nodes of bits on small
806b82021bSMark Fasheh * file systems. This can easily be taken care of by limiting our
816b82021bSMark Fasheh * default to a reasonable size (256M) on larger cluster sizes.
826b82021bSMark Fasheh *
836b82021bSMark Fasheh * - Some file systems can't support very large sizes - 4k and 8k in
846b82021bSMark Fasheh * particular are limited to less than 128 and 256 megabytes respectively.
856b82021bSMark Fasheh *
866b82021bSMark Fasheh * The following reference table shows group descriptor and local
876b82021bSMark Fasheh * alloc maximums at various cluster sizes (4k blocksize)
886b82021bSMark Fasheh *
896b82021bSMark Fasheh * csize: 4K group: 126M la: 121M
906b82021bSMark Fasheh * csize: 8K group: 252M la: 243M
916b82021bSMark Fasheh * csize: 16K group: 504M la: 486M
926b82021bSMark Fasheh * csize: 32K group: 1008M la: 972M
936b82021bSMark Fasheh * csize: 64K group: 2016M la: 1944M
946b82021bSMark Fasheh * csize: 128K group: 4032M la: 3888M
956b82021bSMark Fasheh * csize: 256K group: 8064M la: 7776M
966b82021bSMark Fasheh * csize: 512K group: 16128M la: 15552M
976b82021bSMark Fasheh * csize: 1024K group: 32256M la: 31104M
986b82021bSMark Fasheh */
996b82021bSMark Fasheh #define OCFS2_LA_MAX_DEFAULT_MB 256
1006b82021bSMark Fasheh #define OCFS2_LA_OLD_DEFAULT 8
ocfs2_la_default_mb(struct ocfs2_super * osb)1016b82021bSMark Fasheh unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
1026b82021bSMark Fasheh {
1036b82021bSMark Fasheh unsigned int la_mb;
1046b82021bSMark Fasheh unsigned int gd_mb;
1051739da40STao Ma unsigned int la_max_mb;
1066b82021bSMark Fasheh unsigned int megs_per_slot;
1076b82021bSMark Fasheh struct super_block *sb = osb->sb;
1086b82021bSMark Fasheh
1096b82021bSMark Fasheh gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
1108571882cSTao Ma 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
1116b82021bSMark Fasheh
1126b82021bSMark Fasheh /*
1136b82021bSMark Fasheh * This takes care of files systems with very small group
1146b82021bSMark Fasheh * descriptors - 512 byte blocksize at cluster sizes lower
1156b82021bSMark Fasheh * than 16K and also 1k blocksize with 4k cluster size.
1166b82021bSMark Fasheh */
1176b82021bSMark Fasheh if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
1186b82021bSMark Fasheh || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
1196b82021bSMark Fasheh return OCFS2_LA_OLD_DEFAULT;
1206b82021bSMark Fasheh
1216b82021bSMark Fasheh /*
1226b82021bSMark Fasheh * Leave enough room for some block groups and make the final
1236b82021bSMark Fasheh * value we work from a multiple of 4.
1246b82021bSMark Fasheh */
1256b82021bSMark Fasheh gd_mb -= 16;
1266b82021bSMark Fasheh gd_mb &= 0xFFFFFFFB;
1276b82021bSMark Fasheh
1286b82021bSMark Fasheh la_mb = gd_mb;
1296b82021bSMark Fasheh
1306b82021bSMark Fasheh /*
1316b82021bSMark Fasheh * Keep window sizes down to a reasonable default
1326b82021bSMark Fasheh */
1336b82021bSMark Fasheh if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
1346b82021bSMark Fasheh /*
1356b82021bSMark Fasheh * Some clustersize / blocksize combinations will have
1366b82021bSMark Fasheh * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
1376b82021bSMark Fasheh * default size, but get poor distribution when
1386b82021bSMark Fasheh * limited to exactly 256 megabytes.
1396b82021bSMark Fasheh *
1406b82021bSMark Fasheh * As an example, 16K clustersize at 4K blocksize
1416b82021bSMark Fasheh * gives us a cluster group size of 504M. Paring the
1426b82021bSMark Fasheh * local alloc size down to 256 however, would give us
1436b82021bSMark Fasheh * only one window and around 200MB left in the
1446b82021bSMark Fasheh * cluster group. Instead, find the first size below
1456b82021bSMark Fasheh * 256 which would give us an even distribution.
1466b82021bSMark Fasheh *
1476b82021bSMark Fasheh * Larger cluster group sizes actually work out pretty
1486b82021bSMark Fasheh * well when pared to 256, so we don't have to do this
1496b82021bSMark Fasheh * for any group that fits more than two
1506b82021bSMark Fasheh * OCFS2_LA_MAX_DEFAULT_MB windows.
1516b82021bSMark Fasheh */
1526b82021bSMark Fasheh if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
1536b82021bSMark Fasheh la_mb = 256;
1546b82021bSMark Fasheh else {
1556b82021bSMark Fasheh unsigned int gd_mult = gd_mb;
1566b82021bSMark Fasheh
1576b82021bSMark Fasheh while (gd_mult > 256)
1586b82021bSMark Fasheh gd_mult = gd_mult >> 1;
1596b82021bSMark Fasheh
1606b82021bSMark Fasheh la_mb = gd_mult;
1616b82021bSMark Fasheh }
1626b82021bSMark Fasheh }
1636b82021bSMark Fasheh
1646b82021bSMark Fasheh megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
1656b82021bSMark Fasheh megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
1666b82021bSMark Fasheh /* Too many nodes, too few disk clusters. */
1676b82021bSMark Fasheh if (megs_per_slot < la_mb)
1686b82021bSMark Fasheh la_mb = megs_per_slot;
1696b82021bSMark Fasheh
1701739da40STao Ma /* We can't store more bits than we can in a block. */
1711739da40STao Ma la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
1721739da40STao Ma ocfs2_local_alloc_size(sb) * 8);
1731739da40STao Ma if (la_mb > la_max_mb)
1741739da40STao Ma la_mb = la_max_mb;
1751739da40STao Ma
1766b82021bSMark Fasheh return la_mb;
1776b82021bSMark Fasheh }
1786b82021bSMark Fasheh
ocfs2_la_set_sizes(struct ocfs2_super * osb,int requested_mb)17973c8a800SMark Fasheh void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
18073c8a800SMark Fasheh {
18173c8a800SMark Fasheh struct super_block *sb = osb->sb;
1826b82021bSMark Fasheh unsigned int la_default_mb = ocfs2_la_default_mb(osb);
18373c8a800SMark Fasheh unsigned int la_max_mb;
18473c8a800SMark Fasheh
18573c8a800SMark Fasheh la_max_mb = ocfs2_clusters_to_megabytes(sb,
18673c8a800SMark Fasheh ocfs2_local_alloc_size(sb) * 8);
18773c8a800SMark Fasheh
188a04733d8STao Ma trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb);
18973c8a800SMark Fasheh
19073c8a800SMark Fasheh if (requested_mb == -1) {
19173c8a800SMark Fasheh /* No user request - use defaults */
19273c8a800SMark Fasheh osb->local_alloc_default_bits =
19373c8a800SMark Fasheh ocfs2_megabytes_to_clusters(sb, la_default_mb);
19473c8a800SMark Fasheh } else if (requested_mb > la_max_mb) {
19573c8a800SMark Fasheh /* Request is too big, we give the maximum available */
19673c8a800SMark Fasheh osb->local_alloc_default_bits =
19773c8a800SMark Fasheh ocfs2_megabytes_to_clusters(sb, la_max_mb);
19873c8a800SMark Fasheh } else {
19973c8a800SMark Fasheh osb->local_alloc_default_bits =
20073c8a800SMark Fasheh ocfs2_megabytes_to_clusters(sb, requested_mb);
20173c8a800SMark Fasheh }
20273c8a800SMark Fasheh
20373c8a800SMark Fasheh osb->local_alloc_bits = osb->local_alloc_default_bits;
20473c8a800SMark Fasheh }
20573c8a800SMark Fasheh
ocfs2_la_state_enabled(struct ocfs2_super * osb)2069c7af40bSMark Fasheh static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
2079c7af40bSMark Fasheh {
2089c7af40bSMark Fasheh return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
2099c7af40bSMark Fasheh osb->local_alloc_state == OCFS2_LA_ENABLED);
2109c7af40bSMark Fasheh }
2119c7af40bSMark Fasheh
ocfs2_local_alloc_seen_free_bits(struct ocfs2_super * osb,unsigned int num_clusters)2129c7af40bSMark Fasheh void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
2139c7af40bSMark Fasheh unsigned int num_clusters)
2149c7af40bSMark Fasheh {
2159c7af40bSMark Fasheh spin_lock(&osb->osb_lock);
2169c7af40bSMark Fasheh if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
2179c7af40bSMark Fasheh osb->local_alloc_state == OCFS2_LA_THROTTLED)
2189c7af40bSMark Fasheh if (num_clusters >= osb->local_alloc_default_bits) {
2199c7af40bSMark Fasheh cancel_delayed_work(&osb->la_enable_wq);
2209c7af40bSMark Fasheh osb->local_alloc_state = OCFS2_LA_ENABLED;
2219c7af40bSMark Fasheh }
2229c7af40bSMark Fasheh spin_unlock(&osb->osb_lock);
2239c7af40bSMark Fasheh }
2249c7af40bSMark Fasheh
ocfs2_la_enable_worker(struct work_struct * work)2259c7af40bSMark Fasheh void ocfs2_la_enable_worker(struct work_struct *work)
2269c7af40bSMark Fasheh {
2279c7af40bSMark Fasheh struct ocfs2_super *osb =
2289c7af40bSMark Fasheh container_of(work, struct ocfs2_super,
2299c7af40bSMark Fasheh la_enable_wq.work);
2309c7af40bSMark Fasheh spin_lock(&osb->osb_lock);
2319c7af40bSMark Fasheh osb->local_alloc_state = OCFS2_LA_ENABLED;
2329c7af40bSMark Fasheh spin_unlock(&osb->osb_lock);
2339c7af40bSMark Fasheh }
2349c7af40bSMark Fasheh
235ccd979bdSMark Fasheh /*
236ccd979bdSMark Fasheh * Tell us whether a given allocation should use the local alloc
237ccd979bdSMark Fasheh * file. Otherwise, it has to go to the main bitmap.
2389c7af40bSMark Fasheh *
2399c7af40bSMark Fasheh * This function does semi-dirty reads of local alloc size and state!
2409c7af40bSMark Fasheh * This is ok however, as the values are re-checked once under mutex.
241ccd979bdSMark Fasheh */
ocfs2_alloc_should_use_local(struct ocfs2_super * osb,u64 bits)242ccd979bdSMark Fasheh int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
243ccd979bdSMark Fasheh {
2442fbe8d1eSSunil Mushran int ret = 0;
2459c7af40bSMark Fasheh int la_bits;
246ccd979bdSMark Fasheh
2479c7af40bSMark Fasheh spin_lock(&osb->osb_lock);
2489c7af40bSMark Fasheh la_bits = osb->local_alloc_bits;
2499c7af40bSMark Fasheh
2509c7af40bSMark Fasheh if (!ocfs2_la_state_enabled(osb))
2512fbe8d1eSSunil Mushran goto bail;
252ccd979bdSMark Fasheh
253ccd979bdSMark Fasheh /* la_bits should be at least twice the size (in clusters) of
254ccd979bdSMark Fasheh * a new block group. We want to be sure block group
255ccd979bdSMark Fasheh * allocations go through the local alloc, so allow an
256ccd979bdSMark Fasheh * allocation to take up to half the bitmap. */
257ccd979bdSMark Fasheh if (bits > (la_bits / 2))
2582fbe8d1eSSunil Mushran goto bail;
259ccd979bdSMark Fasheh
2602fbe8d1eSSunil Mushran ret = 1;
2612fbe8d1eSSunil Mushran bail:
262a04733d8STao Ma trace_ocfs2_alloc_should_use_local(
263a04733d8STao Ma (unsigned long long)bits, osb->local_alloc_state, la_bits, ret);
2649c7af40bSMark Fasheh spin_unlock(&osb->osb_lock);
2652fbe8d1eSSunil Mushran return ret;
266ccd979bdSMark Fasheh }
267ccd979bdSMark Fasheh
ocfs2_load_local_alloc(struct ocfs2_super * osb)268ccd979bdSMark Fasheh int ocfs2_load_local_alloc(struct ocfs2_super *osb)
269ccd979bdSMark Fasheh {
270ccd979bdSMark Fasheh int status = 0;
271ccd979bdSMark Fasheh struct ocfs2_dinode *alloc = NULL;
272ccd979bdSMark Fasheh struct buffer_head *alloc_bh = NULL;
273ccd979bdSMark Fasheh u32 num_used;
274ccd979bdSMark Fasheh struct inode *inode = NULL;
275ccd979bdSMark Fasheh struct ocfs2_local_alloc *la;
276ccd979bdSMark Fasheh
277ebcee4b5SMark Fasheh if (osb->local_alloc_bits == 0)
2782fbe8d1eSSunil Mushran goto bail;
2792fbe8d1eSSunil Mushran
280ebcee4b5SMark Fasheh if (osb->local_alloc_bits >= osb->bitmap_cpg) {
2812fbe8d1eSSunil Mushran mlog(ML_NOTICE, "Requested local alloc window %d is larger "
2822fbe8d1eSSunil Mushran "than max possible %u. Using defaults.\n",
283ebcee4b5SMark Fasheh osb->local_alloc_bits, (osb->bitmap_cpg - 1));
284ebcee4b5SMark Fasheh osb->local_alloc_bits =
285ebcee4b5SMark Fasheh ocfs2_megabytes_to_clusters(osb->sb,
2866b82021bSMark Fasheh ocfs2_la_default_mb(osb));
2872fbe8d1eSSunil Mushran }
2882fbe8d1eSSunil Mushran
289ccd979bdSMark Fasheh /* read the alloc off disk */
290ccd979bdSMark Fasheh inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
291ccd979bdSMark Fasheh osb->slot_num);
292ccd979bdSMark Fasheh if (!inode) {
293ccd979bdSMark Fasheh status = -EINVAL;
294ccd979bdSMark Fasheh mlog_errno(status);
295ccd979bdSMark Fasheh goto bail;
296ccd979bdSMark Fasheh }
297ccd979bdSMark Fasheh
298b657c95cSJoel Becker status = ocfs2_read_inode_block_full(inode, &alloc_bh,
299b657c95cSJoel Becker OCFS2_BH_IGNORE_CACHE);
300ccd979bdSMark Fasheh if (status < 0) {
301ccd979bdSMark Fasheh mlog_errno(status);
302ccd979bdSMark Fasheh goto bail;
303ccd979bdSMark Fasheh }
304ccd979bdSMark Fasheh
305ccd979bdSMark Fasheh alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
306ccd979bdSMark Fasheh la = OCFS2_LOCAL_ALLOC(alloc);
307ccd979bdSMark Fasheh
308ccd979bdSMark Fasheh if (!(le32_to_cpu(alloc->i_flags) &
309ccd979bdSMark Fasheh (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
310b0697053SMark Fasheh mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
311b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno);
312ccd979bdSMark Fasheh status = -EINVAL;
313ccd979bdSMark Fasheh goto bail;
314ccd979bdSMark Fasheh }
315ccd979bdSMark Fasheh
316ccd979bdSMark Fasheh if ((la->la_size == 0) ||
317ccd979bdSMark Fasheh (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
318ccd979bdSMark Fasheh mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
319ccd979bdSMark Fasheh le16_to_cpu(la->la_size));
320ccd979bdSMark Fasheh status = -EINVAL;
321ccd979bdSMark Fasheh goto bail;
322ccd979bdSMark Fasheh }
323ccd979bdSMark Fasheh
324ccd979bdSMark Fasheh /* do a little verification. */
325ccd979bdSMark Fasheh num_used = ocfs2_local_alloc_count_bits(alloc);
326ccd979bdSMark Fasheh
327ccd979bdSMark Fasheh /* hopefully the local alloc has always been recovered before
328ccd979bdSMark Fasheh * we load it. */
329ccd979bdSMark Fasheh if (num_used
330ccd979bdSMark Fasheh || alloc->id1.bitmap1.i_used
331ccd979bdSMark Fasheh || alloc->id1.bitmap1.i_total
332532e1e54SJunxiao Bi || la->la_bm_off) {
333532e1e54SJunxiao Bi mlog(ML_ERROR, "inconsistent detected, clean journal with"
334532e1e54SJunxiao Bi " unrecovered local alloc, please run fsck.ocfs2!\n"
335ccd979bdSMark Fasheh "found = %u, set = %u, taken = %u, off = %u\n",
336ccd979bdSMark Fasheh num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
337ccd979bdSMark Fasheh le32_to_cpu(alloc->id1.bitmap1.i_total),
338ccd979bdSMark Fasheh OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
339ccd979bdSMark Fasheh
340532e1e54SJunxiao Bi status = -EINVAL;
341532e1e54SJunxiao Bi goto bail;
342532e1e54SJunxiao Bi }
343532e1e54SJunxiao Bi
344ccd979bdSMark Fasheh osb->local_alloc_bh = alloc_bh;
345ccd979bdSMark Fasheh osb->local_alloc_state = OCFS2_LA_ENABLED;
346ccd979bdSMark Fasheh
347ccd979bdSMark Fasheh bail:
348ccd979bdSMark Fasheh if (status < 0)
349ccd979bdSMark Fasheh brelse(alloc_bh);
350ccd979bdSMark Fasheh iput(inode);
351ccd979bdSMark Fasheh
352a04733d8STao Ma trace_ocfs2_load_local_alloc(osb->local_alloc_bits);
3532fbe8d1eSSunil Mushran
354c1e8d35eSTao Ma if (status)
355c1e8d35eSTao Ma mlog_errno(status);
356ccd979bdSMark Fasheh return status;
357ccd979bdSMark Fasheh }
358ccd979bdSMark Fasheh
359ccd979bdSMark Fasheh /*
360ccd979bdSMark Fasheh * return any unused bits to the bitmap and write out a clean
361ccd979bdSMark Fasheh * local_alloc.
362ccd979bdSMark Fasheh *
363ccd979bdSMark Fasheh * local_alloc_bh is optional. If not passed, we will simply use the
364ccd979bdSMark Fasheh * one off osb. If you do pass it however, be warned that it *will* be
365ccd979bdSMark Fasheh * returned brelse'd and NULL'd out.*/
ocfs2_shutdown_local_alloc(struct ocfs2_super * osb)366ccd979bdSMark Fasheh void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
367ccd979bdSMark Fasheh {
368ccd979bdSMark Fasheh int status;
3691fabe148SMark Fasheh handle_t *handle;
370ccd979bdSMark Fasheh struct inode *local_alloc_inode = NULL;
371ccd979bdSMark Fasheh struct buffer_head *bh = NULL;
372ccd979bdSMark Fasheh struct buffer_head *main_bm_bh = NULL;
373ccd979bdSMark Fasheh struct inode *main_bm_inode = NULL;
374ccd979bdSMark Fasheh struct ocfs2_dinode *alloc_copy = NULL;
375ccd979bdSMark Fasheh struct ocfs2_dinode *alloc = NULL;
376ccd979bdSMark Fasheh
3779c7af40bSMark Fasheh cancel_delayed_work(&osb->la_enable_wq);
378b918c430SYi Li if (osb->ocfs2_wq)
37935ddf78eSjiangyiwen flush_workqueue(osb->ocfs2_wq);
3809c7af40bSMark Fasheh
381ccd979bdSMark Fasheh if (osb->local_alloc_state == OCFS2_LA_UNUSED)
3828898a5a5SMark Fasheh goto out;
383ccd979bdSMark Fasheh
384ccd979bdSMark Fasheh local_alloc_inode =
385ccd979bdSMark Fasheh ocfs2_get_system_file_inode(osb,
386ccd979bdSMark Fasheh LOCAL_ALLOC_SYSTEM_INODE,
387ccd979bdSMark Fasheh osb->slot_num);
388ccd979bdSMark Fasheh if (!local_alloc_inode) {
389ccd979bdSMark Fasheh status = -ENOENT;
390ccd979bdSMark Fasheh mlog_errno(status);
3918898a5a5SMark Fasheh goto out;
392ccd979bdSMark Fasheh }
393ccd979bdSMark Fasheh
394ccd979bdSMark Fasheh osb->local_alloc_state = OCFS2_LA_DISABLED;
395ccd979bdSMark Fasheh
396d02f00ccSMark Fasheh ocfs2_resmap_uninit(&osb->osb_la_resmap);
397d02f00ccSMark Fasheh
398ccd979bdSMark Fasheh main_bm_inode = ocfs2_get_system_file_inode(osb,
399ccd979bdSMark Fasheh GLOBAL_BITMAP_SYSTEM_INODE,
400ccd979bdSMark Fasheh OCFS2_INVALID_SLOT);
401ccd979bdSMark Fasheh if (!main_bm_inode) {
402ccd979bdSMark Fasheh status = -EINVAL;
403ccd979bdSMark Fasheh mlog_errno(status);
4048898a5a5SMark Fasheh goto out;
405ccd979bdSMark Fasheh }
406ccd979bdSMark Fasheh
4075955102cSAl Viro inode_lock(main_bm_inode);
4088898a5a5SMark Fasheh
409e63aecb6SMark Fasheh status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
410ccd979bdSMark Fasheh if (status < 0) {
411ccd979bdSMark Fasheh mlog_errno(status);
4128898a5a5SMark Fasheh goto out_mutex;
413ccd979bdSMark Fasheh }
414ccd979bdSMark Fasheh
415ccd979bdSMark Fasheh /* WINDOW_MOVE_CREDITS is a bit heavy... */
41665eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
417ccd979bdSMark Fasheh if (IS_ERR(handle)) {
418ccd979bdSMark Fasheh mlog_errno(PTR_ERR(handle));
419ccd979bdSMark Fasheh handle = NULL;
4208898a5a5SMark Fasheh goto out_unlock;
421ccd979bdSMark Fasheh }
422ccd979bdSMark Fasheh
423ccd979bdSMark Fasheh bh = osb->local_alloc_bh;
424ccd979bdSMark Fasheh alloc = (struct ocfs2_dinode *) bh->b_data;
425ccd979bdSMark Fasheh
426d8b2fa65SFuqian Huang alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS);
427ccd979bdSMark Fasheh if (!alloc_copy) {
428ccd979bdSMark Fasheh status = -ENOMEM;
4298898a5a5SMark Fasheh goto out_commit;
430ccd979bdSMark Fasheh }
431ccd979bdSMark Fasheh
4320cf2f763SJoel Becker status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
4330cf2f763SJoel Becker bh, OCFS2_JOURNAL_ACCESS_WRITE);
434ccd979bdSMark Fasheh if (status < 0) {
435ccd979bdSMark Fasheh mlog_errno(status);
4368898a5a5SMark Fasheh goto out_commit;
437ccd979bdSMark Fasheh }
438ccd979bdSMark Fasheh
439ccd979bdSMark Fasheh ocfs2_clear_local_alloc(alloc);
440ec20cec7SJoel Becker ocfs2_journal_dirty(handle, bh);
441ccd979bdSMark Fasheh
442ccd979bdSMark Fasheh brelse(bh);
443ccd979bdSMark Fasheh osb->local_alloc_bh = NULL;
444ccd979bdSMark Fasheh osb->local_alloc_state = OCFS2_LA_UNUSED;
445ccd979bdSMark Fasheh
446ccd979bdSMark Fasheh status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
447ccd979bdSMark Fasheh main_bm_inode, main_bm_bh);
448ccd979bdSMark Fasheh if (status < 0)
449ccd979bdSMark Fasheh mlog_errno(status);
450ccd979bdSMark Fasheh
4518898a5a5SMark Fasheh out_commit:
45202dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle);
453ccd979bdSMark Fasheh
4548898a5a5SMark Fasheh out_unlock:
455ccd979bdSMark Fasheh brelse(main_bm_bh);
456ccd979bdSMark Fasheh
457e63aecb6SMark Fasheh ocfs2_inode_unlock(main_bm_inode, 1);
4588898a5a5SMark Fasheh
4598898a5a5SMark Fasheh out_mutex:
4605955102cSAl Viro inode_unlock(main_bm_inode);
461ccd979bdSMark Fasheh iput(main_bm_inode);
462ccd979bdSMark Fasheh
4638898a5a5SMark Fasheh out:
464ccd979bdSMark Fasheh iput(local_alloc_inode);
465ccd979bdSMark Fasheh
466ccd979bdSMark Fasheh kfree(alloc_copy);
467ccd979bdSMark Fasheh }
468ccd979bdSMark Fasheh
469ccd979bdSMark Fasheh /*
470ccd979bdSMark Fasheh * We want to free the bitmap bits outside of any recovery context as
471ccd979bdSMark Fasheh * we'll need a cluster lock to do so, but we must clear the local
472ccd979bdSMark Fasheh * alloc before giving up the recovered nodes journal. To solve this,
473ccd979bdSMark Fasheh * we kmalloc a copy of the local alloc before it's change for the
474ccd979bdSMark Fasheh * caller to process with ocfs2_complete_local_alloc_recovery
475ccd979bdSMark Fasheh */
ocfs2_begin_local_alloc_recovery(struct ocfs2_super * osb,int slot_num,struct ocfs2_dinode ** alloc_copy)476ccd979bdSMark Fasheh int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
477ccd979bdSMark Fasheh int slot_num,
478ccd979bdSMark Fasheh struct ocfs2_dinode **alloc_copy)
479ccd979bdSMark Fasheh {
480ccd979bdSMark Fasheh int status = 0;
481ccd979bdSMark Fasheh struct buffer_head *alloc_bh = NULL;
482ccd979bdSMark Fasheh struct inode *inode = NULL;
483ccd979bdSMark Fasheh struct ocfs2_dinode *alloc;
484ccd979bdSMark Fasheh
485a04733d8STao Ma trace_ocfs2_begin_local_alloc_recovery(slot_num);
486ccd979bdSMark Fasheh
487ccd979bdSMark Fasheh *alloc_copy = NULL;
488ccd979bdSMark Fasheh
489ccd979bdSMark Fasheh inode = ocfs2_get_system_file_inode(osb,
490ccd979bdSMark Fasheh LOCAL_ALLOC_SYSTEM_INODE,
491ccd979bdSMark Fasheh slot_num);
492ccd979bdSMark Fasheh if (!inode) {
493ccd979bdSMark Fasheh status = -EINVAL;
494ccd979bdSMark Fasheh mlog_errno(status);
495ccd979bdSMark Fasheh goto bail;
496ccd979bdSMark Fasheh }
497ccd979bdSMark Fasheh
4985955102cSAl Viro inode_lock(inode);
499ccd979bdSMark Fasheh
500b657c95cSJoel Becker status = ocfs2_read_inode_block_full(inode, &alloc_bh,
501b657c95cSJoel Becker OCFS2_BH_IGNORE_CACHE);
502ccd979bdSMark Fasheh if (status < 0) {
503ccd979bdSMark Fasheh mlog_errno(status);
504ccd979bdSMark Fasheh goto bail;
505ccd979bdSMark Fasheh }
506ccd979bdSMark Fasheh
507ccd979bdSMark Fasheh *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
508ccd979bdSMark Fasheh if (!(*alloc_copy)) {
509ccd979bdSMark Fasheh status = -ENOMEM;
510ccd979bdSMark Fasheh goto bail;
511ccd979bdSMark Fasheh }
512ccd979bdSMark Fasheh memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
513ccd979bdSMark Fasheh
514ccd979bdSMark Fasheh alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
515ccd979bdSMark Fasheh ocfs2_clear_local_alloc(alloc);
516ccd979bdSMark Fasheh
51713723d00SJoel Becker ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
5188cb471e8SJoel Becker status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
519ccd979bdSMark Fasheh if (status < 0)
520ccd979bdSMark Fasheh mlog_errno(status);
521ccd979bdSMark Fasheh
522ccd979bdSMark Fasheh bail:
523d787ab09STim Gardner if (status < 0) {
524ccd979bdSMark Fasheh kfree(*alloc_copy);
525ccd979bdSMark Fasheh *alloc_copy = NULL;
526ccd979bdSMark Fasheh }
527ccd979bdSMark Fasheh
528ccd979bdSMark Fasheh brelse(alloc_bh);
529ccd979bdSMark Fasheh
530ccd979bdSMark Fasheh if (inode) {
5315955102cSAl Viro inode_unlock(inode);
532ccd979bdSMark Fasheh iput(inode);
533ccd979bdSMark Fasheh }
534ccd979bdSMark Fasheh
535c1e8d35eSTao Ma if (status)
536c1e8d35eSTao Ma mlog_errno(status);
537ccd979bdSMark Fasheh return status;
538ccd979bdSMark Fasheh }
539ccd979bdSMark Fasheh
540ccd979bdSMark Fasheh /*
541ccd979bdSMark Fasheh * Step 2: By now, we've completed the journal recovery, we've stamped
542ccd979bdSMark Fasheh * a clean local alloc on disk and dropped the node out of the
543ccd979bdSMark Fasheh * recovery map. Dlm locks will no longer stall, so lets clear out the
544ccd979bdSMark Fasheh * main bitmap.
545ccd979bdSMark Fasheh */
ocfs2_complete_local_alloc_recovery(struct ocfs2_super * osb,struct ocfs2_dinode * alloc)546ccd979bdSMark Fasheh int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
547ccd979bdSMark Fasheh struct ocfs2_dinode *alloc)
548ccd979bdSMark Fasheh {
549ccd979bdSMark Fasheh int status;
5501fabe148SMark Fasheh handle_t *handle;
551ccd979bdSMark Fasheh struct buffer_head *main_bm_bh = NULL;
5528898a5a5SMark Fasheh struct inode *main_bm_inode;
553ccd979bdSMark Fasheh
554ccd979bdSMark Fasheh main_bm_inode = ocfs2_get_system_file_inode(osb,
555ccd979bdSMark Fasheh GLOBAL_BITMAP_SYSTEM_INODE,
556ccd979bdSMark Fasheh OCFS2_INVALID_SLOT);
557ccd979bdSMark Fasheh if (!main_bm_inode) {
558ccd979bdSMark Fasheh status = -EINVAL;
559ccd979bdSMark Fasheh mlog_errno(status);
5608898a5a5SMark Fasheh goto out;
561ccd979bdSMark Fasheh }
562ccd979bdSMark Fasheh
5635955102cSAl Viro inode_lock(main_bm_inode);
5648898a5a5SMark Fasheh
565e63aecb6SMark Fasheh status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
566ccd979bdSMark Fasheh if (status < 0) {
567ccd979bdSMark Fasheh mlog_errno(status);
5688898a5a5SMark Fasheh goto out_mutex;
569ccd979bdSMark Fasheh }
570ccd979bdSMark Fasheh
57165eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
572ccd979bdSMark Fasheh if (IS_ERR(handle)) {
573ccd979bdSMark Fasheh status = PTR_ERR(handle);
574ccd979bdSMark Fasheh handle = NULL;
575ccd979bdSMark Fasheh mlog_errno(status);
5768898a5a5SMark Fasheh goto out_unlock;
577ccd979bdSMark Fasheh }
578ccd979bdSMark Fasheh
579ccd979bdSMark Fasheh /* we want the bitmap change to be recorded on disk asap */
5801fabe148SMark Fasheh handle->h_sync = 1;
581ccd979bdSMark Fasheh
582ccd979bdSMark Fasheh status = ocfs2_sync_local_to_main(osb, handle, alloc,
583ccd979bdSMark Fasheh main_bm_inode, main_bm_bh);
584ccd979bdSMark Fasheh if (status < 0)
585ccd979bdSMark Fasheh mlog_errno(status);
586ccd979bdSMark Fasheh
58702dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle);
588ccd979bdSMark Fasheh
5898898a5a5SMark Fasheh out_unlock:
590e63aecb6SMark Fasheh ocfs2_inode_unlock(main_bm_inode, 1);
5918898a5a5SMark Fasheh
5928898a5a5SMark Fasheh out_mutex:
5935955102cSAl Viro inode_unlock(main_bm_inode);
5948898a5a5SMark Fasheh
595ccd979bdSMark Fasheh brelse(main_bm_bh);
596ccd979bdSMark Fasheh
597ccd979bdSMark Fasheh iput(main_bm_inode);
598ccd979bdSMark Fasheh
5998898a5a5SMark Fasheh out:
6004d0ddb2cSTao Ma if (!status)
601b89c5428STiger Yang ocfs2_init_steal_slots(osb);
602c1e8d35eSTao Ma if (status)
603c1e8d35eSTao Ma mlog_errno(status);
604ccd979bdSMark Fasheh return status;
605ccd979bdSMark Fasheh }
606ccd979bdSMark Fasheh
607ccd979bdSMark Fasheh /*
6089c7af40bSMark Fasheh * make sure we've got at least bits_wanted contiguous bits in the
609137cebf9Shongnanli * local alloc. You lose them when you drop i_rwsem.
610ccd979bdSMark Fasheh *
611ccd979bdSMark Fasheh * We will add ourselves to the transaction passed in, but may start
612ccd979bdSMark Fasheh * our own in order to shift windows.
613ccd979bdSMark Fasheh */
ocfs2_reserve_local_alloc_bits(struct ocfs2_super * osb,u32 bits_wanted,struct ocfs2_alloc_context * ac)614ccd979bdSMark Fasheh int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
615ccd979bdSMark Fasheh u32 bits_wanted,
616ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac)
617ccd979bdSMark Fasheh {
618ccd979bdSMark Fasheh int status;
619ccd979bdSMark Fasheh struct ocfs2_dinode *alloc;
620ccd979bdSMark Fasheh struct inode *local_alloc_inode;
621ccd979bdSMark Fasheh unsigned int free_bits;
622ccd979bdSMark Fasheh
623ccd979bdSMark Fasheh BUG_ON(!ac);
624ccd979bdSMark Fasheh
625ccd979bdSMark Fasheh local_alloc_inode =
626ccd979bdSMark Fasheh ocfs2_get_system_file_inode(osb,
627ccd979bdSMark Fasheh LOCAL_ALLOC_SYSTEM_INODE,
628ccd979bdSMark Fasheh osb->slot_num);
629ccd979bdSMark Fasheh if (!local_alloc_inode) {
630ccd979bdSMark Fasheh status = -ENOENT;
631ccd979bdSMark Fasheh mlog_errno(status);
632ccd979bdSMark Fasheh goto bail;
633ccd979bdSMark Fasheh }
634da5cbf2fSMark Fasheh
6355955102cSAl Viro inode_lock(local_alloc_inode);
636da5cbf2fSMark Fasheh
6379c7af40bSMark Fasheh /*
6389c7af40bSMark Fasheh * We must double check state and allocator bits because
639137cebf9Shongnanli * another process may have changed them while holding i_rwsem.
6409c7af40bSMark Fasheh */
6419c7af40bSMark Fasheh spin_lock(&osb->osb_lock);
6429c7af40bSMark Fasheh if (!ocfs2_la_state_enabled(osb) ||
6439c7af40bSMark Fasheh (bits_wanted > osb->local_alloc_bits)) {
6449c7af40bSMark Fasheh spin_unlock(&osb->osb_lock);
645ccd979bdSMark Fasheh status = -ENOSPC;
646ccd979bdSMark Fasheh goto bail;
647ccd979bdSMark Fasheh }
6489c7af40bSMark Fasheh spin_unlock(&osb->osb_lock);
649ccd979bdSMark Fasheh
650ccd979bdSMark Fasheh alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
651ccd979bdSMark Fasheh
652e407e397SJoel Becker #ifdef CONFIG_OCFS2_DEBUG_FS
653ccd979bdSMark Fasheh if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
654ccd979bdSMark Fasheh ocfs2_local_alloc_count_bits(alloc)) {
65593f5920dSJun Piao status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n",
656b0697053SMark Fasheh (unsigned long long)le64_to_cpu(alloc->i_blkno),
657ccd979bdSMark Fasheh le32_to_cpu(alloc->id1.bitmap1.i_used),
658ccd979bdSMark Fasheh ocfs2_local_alloc_count_bits(alloc));
659ccd979bdSMark Fasheh goto bail;
660ccd979bdSMark Fasheh }
6615a58c3efSJan Kara #endif
662ccd979bdSMark Fasheh
663ccd979bdSMark Fasheh free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
664ccd979bdSMark Fasheh le32_to_cpu(alloc->id1.bitmap1.i_used);
665ccd979bdSMark Fasheh if (bits_wanted > free_bits) {
666ccd979bdSMark Fasheh /* uhoh, window change time. */
667ccd979bdSMark Fasheh status =
668ccd979bdSMark Fasheh ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
669ccd979bdSMark Fasheh if (status < 0) {
670ccd979bdSMark Fasheh if (status != -ENOSPC)
671ccd979bdSMark Fasheh mlog_errno(status);
672ccd979bdSMark Fasheh goto bail;
673ccd979bdSMark Fasheh }
6749c7af40bSMark Fasheh
6759c7af40bSMark Fasheh /*
6769c7af40bSMark Fasheh * Under certain conditions, the window slide code
6779c7af40bSMark Fasheh * might have reduced the number of bits available or
678679edeb0SRandy Dunlap * disabled the local alloc entirely. Re-check
6799c7af40bSMark Fasheh * here and return -ENOSPC if necessary.
6809c7af40bSMark Fasheh */
6819c7af40bSMark Fasheh status = -ENOSPC;
6829c7af40bSMark Fasheh if (!ocfs2_la_state_enabled(osb))
6839c7af40bSMark Fasheh goto bail;
6849c7af40bSMark Fasheh
6859c7af40bSMark Fasheh free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
6869c7af40bSMark Fasheh le32_to_cpu(alloc->id1.bitmap1.i_used);
6879c7af40bSMark Fasheh if (bits_wanted > free_bits)
6889c7af40bSMark Fasheh goto bail;
689ccd979bdSMark Fasheh }
690ccd979bdSMark Fasheh
6918fccfc82SMark Fasheh ac->ac_inode = local_alloc_inode;
692a4a48911STao Ma /* We should never use localalloc from another slot */
693a4a48911STao Ma ac->ac_alloc_slot = osb->slot_num;
6948fccfc82SMark Fasheh ac->ac_which = OCFS2_AC_USE_LOCAL;
695ccd979bdSMark Fasheh get_bh(osb->local_alloc_bh);
696ccd979bdSMark Fasheh ac->ac_bh = osb->local_alloc_bh;
697ccd979bdSMark Fasheh status = 0;
698ccd979bdSMark Fasheh bail:
699bda0233bSSunil Mushran if (status < 0 && local_alloc_inode) {
7005955102cSAl Viro inode_unlock(local_alloc_inode);
7018fccfc82SMark Fasheh iput(local_alloc_inode);
702bda0233bSSunil Mushran }
703ccd979bdSMark Fasheh
704a04733d8STao Ma trace_ocfs2_reserve_local_alloc_bits(
705a04733d8STao Ma (unsigned long long)ac->ac_max_block,
706a04733d8STao Ma bits_wanted, osb->slot_num, status);
7072fbe8d1eSSunil Mushran
708c1e8d35eSTao Ma if (status)
709c1e8d35eSTao Ma mlog_errno(status);
710ccd979bdSMark Fasheh return status;
711ccd979bdSMark Fasheh }
712ccd979bdSMark Fasheh
ocfs2_claim_local_alloc_bits(struct ocfs2_super * osb,handle_t * handle,struct ocfs2_alloc_context * ac,u32 bits_wanted,u32 * bit_off,u32 * num_bits)713ccd979bdSMark Fasheh int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
7141fabe148SMark Fasheh handle_t *handle,
715ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac,
716415cb800SMark Fasheh u32 bits_wanted,
717ccd979bdSMark Fasheh u32 *bit_off,
718ccd979bdSMark Fasheh u32 *num_bits)
719ccd979bdSMark Fasheh {
720ccd979bdSMark Fasheh int status, start;
721ccd979bdSMark Fasheh struct inode *local_alloc_inode;
722ccd979bdSMark Fasheh void *bitmap;
723ccd979bdSMark Fasheh struct ocfs2_dinode *alloc;
724ccd979bdSMark Fasheh struct ocfs2_local_alloc *la;
725ccd979bdSMark Fasheh
726ccd979bdSMark Fasheh BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
727ccd979bdSMark Fasheh
728ccd979bdSMark Fasheh local_alloc_inode = ac->ac_inode;
729ccd979bdSMark Fasheh alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
730ccd979bdSMark Fasheh la = OCFS2_LOCAL_ALLOC(alloc);
731ccd979bdSMark Fasheh
732d02f00ccSMark Fasheh start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
733d02f00ccSMark Fasheh ac->ac_resv);
734ccd979bdSMark Fasheh if (start == -1) {
735ccd979bdSMark Fasheh /* TODO: Shouldn't we just BUG here? */
736ccd979bdSMark Fasheh status = -ENOSPC;
737ccd979bdSMark Fasheh mlog_errno(status);
738ccd979bdSMark Fasheh goto bail;
739ccd979bdSMark Fasheh }
740ccd979bdSMark Fasheh
741ccd979bdSMark Fasheh bitmap = la->la_bitmap;
742ccd979bdSMark Fasheh *bit_off = le32_to_cpu(la->la_bm_off) + start;
743ccd979bdSMark Fasheh *num_bits = bits_wanted;
744ccd979bdSMark Fasheh
7450cf2f763SJoel Becker status = ocfs2_journal_access_di(handle,
7460cf2f763SJoel Becker INODE_CACHE(local_alloc_inode),
747ccd979bdSMark Fasheh osb->local_alloc_bh,
748ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
749ccd979bdSMark Fasheh if (status < 0) {
750ccd979bdSMark Fasheh mlog_errno(status);
751ccd979bdSMark Fasheh goto bail;
752ccd979bdSMark Fasheh }
753ccd979bdSMark Fasheh
754d02f00ccSMark Fasheh ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
755d02f00ccSMark Fasheh bits_wanted);
756d02f00ccSMark Fasheh
757ccd979bdSMark Fasheh while(bits_wanted--)
758ccd979bdSMark Fasheh ocfs2_set_bit(start++, bitmap);
759ccd979bdSMark Fasheh
7600dd3256eSMarcin Slusarz le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
761ec20cec7SJoel Becker ocfs2_journal_dirty(handle, osb->local_alloc_bh);
762ccd979bdSMark Fasheh
763ccd979bdSMark Fasheh bail:
764c1e8d35eSTao Ma if (status)
765c1e8d35eSTao Ma mlog_errno(status);
766ccd979bdSMark Fasheh return status;
767ccd979bdSMark Fasheh }
768ccd979bdSMark Fasheh
ocfs2_free_local_alloc_bits(struct ocfs2_super * osb,handle_t * handle,struct ocfs2_alloc_context * ac,u32 bit_off,u32 num_bits)769fb951eb5SZongxun Wang int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
770fb951eb5SZongxun Wang handle_t *handle,
771fb951eb5SZongxun Wang struct ocfs2_alloc_context *ac,
772fb951eb5SZongxun Wang u32 bit_off,
773fb951eb5SZongxun Wang u32 num_bits)
774fb951eb5SZongxun Wang {
775fb951eb5SZongxun Wang int status, start;
776fb951eb5SZongxun Wang u32 clear_bits;
777fb951eb5SZongxun Wang struct inode *local_alloc_inode;
778fb951eb5SZongxun Wang void *bitmap;
779fb951eb5SZongxun Wang struct ocfs2_dinode *alloc;
780fb951eb5SZongxun Wang struct ocfs2_local_alloc *la;
781fb951eb5SZongxun Wang
782fb951eb5SZongxun Wang BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
783fb951eb5SZongxun Wang
784fb951eb5SZongxun Wang local_alloc_inode = ac->ac_inode;
785fb951eb5SZongxun Wang alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
786fb951eb5SZongxun Wang la = OCFS2_LOCAL_ALLOC(alloc);
787fb951eb5SZongxun Wang
788fb951eb5SZongxun Wang bitmap = la->la_bitmap;
789fb951eb5SZongxun Wang start = bit_off - le32_to_cpu(la->la_bm_off);
790fb951eb5SZongxun Wang clear_bits = num_bits;
791fb951eb5SZongxun Wang
792fb951eb5SZongxun Wang status = ocfs2_journal_access_di(handle,
793fb951eb5SZongxun Wang INODE_CACHE(local_alloc_inode),
794fb951eb5SZongxun Wang osb->local_alloc_bh,
795fb951eb5SZongxun Wang OCFS2_JOURNAL_ACCESS_WRITE);
796fb951eb5SZongxun Wang if (status < 0) {
797fb951eb5SZongxun Wang mlog_errno(status);
798fb951eb5SZongxun Wang goto bail;
799fb951eb5SZongxun Wang }
800fb951eb5SZongxun Wang
801fb951eb5SZongxun Wang while (clear_bits--)
802fb951eb5SZongxun Wang ocfs2_clear_bit(start++, bitmap);
803fb951eb5SZongxun Wang
804fb951eb5SZongxun Wang le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
805fb951eb5SZongxun Wang ocfs2_journal_dirty(handle, osb->local_alloc_bh);
806fb951eb5SZongxun Wang
807fb951eb5SZongxun Wang bail:
808fb951eb5SZongxun Wang return status;
809fb951eb5SZongxun Wang }
810fb951eb5SZongxun Wang
ocfs2_local_alloc_count_bits(struct ocfs2_dinode * alloc)811ccd979bdSMark Fasheh static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
812ccd979bdSMark Fasheh {
813a75613ecSAkinobu Mita u32 count;
814ccd979bdSMark Fasheh struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
815ccd979bdSMark Fasheh
816a75613ecSAkinobu Mita count = memweight(la->la_bitmap, le16_to_cpu(la->la_size));
817ccd979bdSMark Fasheh
818a04733d8STao Ma trace_ocfs2_local_alloc_count_bits(count);
819ccd979bdSMark Fasheh return count;
820ccd979bdSMark Fasheh }
821ccd979bdSMark Fasheh
ocfs2_local_alloc_find_clear_bits(struct ocfs2_super * osb,struct ocfs2_dinode * alloc,u32 * numbits,struct ocfs2_alloc_reservation * resv)822ccd979bdSMark Fasheh static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
823ccd979bdSMark Fasheh struct ocfs2_dinode *alloc,
824d02f00ccSMark Fasheh u32 *numbits,
825d02f00ccSMark Fasheh struct ocfs2_alloc_reservation *resv)
826ccd979bdSMark Fasheh {
827dec5b0d4Szhong jiang int numfound = 0, bitoff, left, startoff;
828d02f00ccSMark Fasheh int local_resv = 0;
829d02f00ccSMark Fasheh struct ocfs2_alloc_reservation r;
830ccd979bdSMark Fasheh void *bitmap = NULL;
831d02f00ccSMark Fasheh struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
832ccd979bdSMark Fasheh
833ccd979bdSMark Fasheh if (!alloc->id1.bitmap1.i_total) {
834ccd979bdSMark Fasheh bitoff = -1;
835ccd979bdSMark Fasheh goto bail;
836ccd979bdSMark Fasheh }
837ccd979bdSMark Fasheh
838d02f00ccSMark Fasheh if (!resv) {
839d02f00ccSMark Fasheh local_resv = 1;
840d02f00ccSMark Fasheh ocfs2_resv_init_once(&r);
841d02f00ccSMark Fasheh ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
842d02f00ccSMark Fasheh resv = &r;
843d02f00ccSMark Fasheh }
844d02f00ccSMark Fasheh
845d02f00ccSMark Fasheh numfound = *numbits;
846d02f00ccSMark Fasheh if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
847d02f00ccSMark Fasheh if (numfound < *numbits)
848d02f00ccSMark Fasheh *numbits = numfound;
849d02f00ccSMark Fasheh goto bail;
850d02f00ccSMark Fasheh }
851d02f00ccSMark Fasheh
852d02f00ccSMark Fasheh /*
853d02f00ccSMark Fasheh * Code error. While reservations are enabled, local
854d02f00ccSMark Fasheh * allocation should _always_ go through them.
855d02f00ccSMark Fasheh */
856d02f00ccSMark Fasheh BUG_ON(osb->osb_resv_level != 0);
857d02f00ccSMark Fasheh
858d02f00ccSMark Fasheh /*
859d02f00ccSMark Fasheh * Reservations are disabled. Handle this the old way.
860d02f00ccSMark Fasheh */
861d02f00ccSMark Fasheh
862ccd979bdSMark Fasheh bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
863ccd979bdSMark Fasheh
864ccd979bdSMark Fasheh numfound = bitoff = startoff = 0;
865ccd979bdSMark Fasheh left = le32_to_cpu(alloc->id1.bitmap1.i_total);
866ccd979bdSMark Fasheh while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
867ccd979bdSMark Fasheh if (bitoff == left) {
868ccd979bdSMark Fasheh /* mlog(0, "bitoff (%d) == left", bitoff); */
869ccd979bdSMark Fasheh break;
870ccd979bdSMark Fasheh }
871ccd979bdSMark Fasheh /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
872ccd979bdSMark Fasheh "numfound = %d\n", bitoff, startoff, numfound);*/
873ccd979bdSMark Fasheh
874ccd979bdSMark Fasheh /* Ok, we found a zero bit... is it contig. or do we
875ccd979bdSMark Fasheh * start over?*/
876ccd979bdSMark Fasheh if (bitoff == startoff) {
877ccd979bdSMark Fasheh /* we found a zero */
878ccd979bdSMark Fasheh numfound++;
879ccd979bdSMark Fasheh startoff++;
880ccd979bdSMark Fasheh } else {
881ccd979bdSMark Fasheh /* got a zero after some ones */
882ccd979bdSMark Fasheh numfound = 1;
883ccd979bdSMark Fasheh startoff = bitoff+1;
884ccd979bdSMark Fasheh }
885ccd979bdSMark Fasheh /* we got everything we needed */
886d02f00ccSMark Fasheh if (numfound == *numbits) {
887ccd979bdSMark Fasheh /* mlog(0, "Found it all!\n"); */
888ccd979bdSMark Fasheh break;
889ccd979bdSMark Fasheh }
890ccd979bdSMark Fasheh }
891ccd979bdSMark Fasheh
892a04733d8STao Ma trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound);
893ccd979bdSMark Fasheh
8943e4218dfSTao Ma if (numfound == *numbits)
895ccd979bdSMark Fasheh bitoff = startoff - numfound;
8963e4218dfSTao Ma else
897ccd979bdSMark Fasheh bitoff = -1;
898ccd979bdSMark Fasheh
899ccd979bdSMark Fasheh bail:
900d02f00ccSMark Fasheh if (local_resv)
901d02f00ccSMark Fasheh ocfs2_resv_discard(resmap, resv);
902d02f00ccSMark Fasheh
903a04733d8STao Ma trace_ocfs2_local_alloc_find_clear_bits(*numbits,
904a04733d8STao Ma le32_to_cpu(alloc->id1.bitmap1.i_total),
905a04733d8STao Ma bitoff, numfound);
906a04733d8STao Ma
907ccd979bdSMark Fasheh return bitoff;
908ccd979bdSMark Fasheh }
909ccd979bdSMark Fasheh
ocfs2_clear_local_alloc(struct ocfs2_dinode * alloc)910ccd979bdSMark Fasheh static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
911ccd979bdSMark Fasheh {
912ccd979bdSMark Fasheh struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
913ccd979bdSMark Fasheh int i;
914ccd979bdSMark Fasheh
915ccd979bdSMark Fasheh alloc->id1.bitmap1.i_total = 0;
916ccd979bdSMark Fasheh alloc->id1.bitmap1.i_used = 0;
917ccd979bdSMark Fasheh la->la_bm_off = 0;
918ccd979bdSMark Fasheh for(i = 0; i < le16_to_cpu(la->la_size); i++)
919ccd979bdSMark Fasheh la->la_bitmap[i] = 0;
920ccd979bdSMark Fasheh }
921ccd979bdSMark Fasheh
922ccd979bdSMark Fasheh #if 0
923ccd979bdSMark Fasheh /* turn this on and uncomment below to aid debugging window shifts. */
924ccd979bdSMark Fasheh static void ocfs2_verify_zero_bits(unsigned long *bitmap,
925ccd979bdSMark Fasheh unsigned int start,
926ccd979bdSMark Fasheh unsigned int count)
927ccd979bdSMark Fasheh {
928ccd979bdSMark Fasheh unsigned int tmp = count;
929ccd979bdSMark Fasheh while(tmp--) {
930ccd979bdSMark Fasheh if (ocfs2_test_bit(start + tmp, bitmap)) {
931ccd979bdSMark Fasheh printk("ocfs2_verify_zero_bits: start = %u, count = "
932ccd979bdSMark Fasheh "%u\n", start, count);
933ccd979bdSMark Fasheh printk("ocfs2_verify_zero_bits: bit %u is set!",
934ccd979bdSMark Fasheh start + tmp);
935ccd979bdSMark Fasheh BUG();
936ccd979bdSMark Fasheh }
937ccd979bdSMark Fasheh }
938ccd979bdSMark Fasheh }
939ccd979bdSMark Fasheh #endif
940ccd979bdSMark Fasheh
941ccd979bdSMark Fasheh /*
942ccd979bdSMark Fasheh * sync the local alloc to main bitmap.
943ccd979bdSMark Fasheh *
944ccd979bdSMark Fasheh * assumes you've already locked the main bitmap -- the bitmap inode
945ccd979bdSMark Fasheh * passed is used for caching.
946ccd979bdSMark Fasheh */
ocfs2_sync_local_to_main(struct ocfs2_super * osb,handle_t * handle,struct ocfs2_dinode * alloc,struct inode * main_bm_inode,struct buffer_head * main_bm_bh)947ccd979bdSMark Fasheh static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
9481fabe148SMark Fasheh handle_t *handle,
949ccd979bdSMark Fasheh struct ocfs2_dinode *alloc,
950ccd979bdSMark Fasheh struct inode *main_bm_inode,
951ccd979bdSMark Fasheh struct buffer_head *main_bm_bh)
952ccd979bdSMark Fasheh {
953ccd979bdSMark Fasheh int status = 0;
954ccd979bdSMark Fasheh int bit_off, left, count, start;
955ccd979bdSMark Fasheh u64 la_start_blk;
956ccd979bdSMark Fasheh u64 blkno;
957ccd979bdSMark Fasheh void *bitmap;
958ccd979bdSMark Fasheh struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
959ccd979bdSMark Fasheh
960a04733d8STao Ma trace_ocfs2_sync_local_to_main(
961ccd979bdSMark Fasheh le32_to_cpu(alloc->id1.bitmap1.i_total),
962ccd979bdSMark Fasheh le32_to_cpu(alloc->id1.bitmap1.i_used));
963ccd979bdSMark Fasheh
964ccd979bdSMark Fasheh if (!alloc->id1.bitmap1.i_total) {
965ccd979bdSMark Fasheh goto bail;
966ccd979bdSMark Fasheh }
967ccd979bdSMark Fasheh
968ccd979bdSMark Fasheh if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
969ccd979bdSMark Fasheh le32_to_cpu(alloc->id1.bitmap1.i_total)) {
970ccd979bdSMark Fasheh goto bail;
971ccd979bdSMark Fasheh }
972ccd979bdSMark Fasheh
973ccd979bdSMark Fasheh la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
974ccd979bdSMark Fasheh le32_to_cpu(la->la_bm_off));
975ccd979bdSMark Fasheh bitmap = la->la_bitmap;
976*7982f975SColin Ian King start = count = 0;
977ccd979bdSMark Fasheh left = le32_to_cpu(alloc->id1.bitmap1.i_total);
978ccd979bdSMark Fasheh
979ccd979bdSMark Fasheh while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
980ccd979bdSMark Fasheh != -1) {
981ccd979bdSMark Fasheh if ((bit_off < left) && (bit_off == start)) {
982ccd979bdSMark Fasheh count++;
983ccd979bdSMark Fasheh start++;
984ccd979bdSMark Fasheh continue;
985ccd979bdSMark Fasheh }
986ccd979bdSMark Fasheh if (count) {
987ccd979bdSMark Fasheh blkno = la_start_blk +
988ccd979bdSMark Fasheh ocfs2_clusters_to_blocks(osb->sb,
989ccd979bdSMark Fasheh start - count);
990ccd979bdSMark Fasheh
991a04733d8STao Ma trace_ocfs2_sync_local_to_main_free(
992b0697053SMark Fasheh count, start - count,
993b0697053SMark Fasheh (unsigned long long)la_start_blk,
994b0697053SMark Fasheh (unsigned long long)blkno);
995ccd979bdSMark Fasheh
996b4414eeaSMark Fasheh status = ocfs2_release_clusters(handle,
997b4414eeaSMark Fasheh main_bm_inode,
998b4414eeaSMark Fasheh main_bm_bh, blkno,
999b4414eeaSMark Fasheh count);
1000ccd979bdSMark Fasheh if (status < 0) {
1001ccd979bdSMark Fasheh mlog_errno(status);
1002ccd979bdSMark Fasheh goto bail;
1003ccd979bdSMark Fasheh }
1004ccd979bdSMark Fasheh }
1005ccd979bdSMark Fasheh if (bit_off >= left)
1006ccd979bdSMark Fasheh break;
1007ccd979bdSMark Fasheh count = 1;
1008ccd979bdSMark Fasheh start = bit_off + 1;
1009ccd979bdSMark Fasheh }
1010ccd979bdSMark Fasheh
1011ccd979bdSMark Fasheh bail:
1012c1e8d35eSTao Ma if (status)
1013c1e8d35eSTao Ma mlog_errno(status);
1014ccd979bdSMark Fasheh return status;
1015ccd979bdSMark Fasheh }
1016ccd979bdSMark Fasheh
10179c7af40bSMark Fasheh enum ocfs2_la_event {
10189c7af40bSMark Fasheh OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
10199c7af40bSMark Fasheh OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
10209c7af40bSMark Fasheh * enough bits theoretically
10219c7af40bSMark Fasheh * free, but a contiguous
10229c7af40bSMark Fasheh * allocation could not be
10239c7af40bSMark Fasheh * found. */
10249c7af40bSMark Fasheh OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
10259c7af40bSMark Fasheh * enough bits free to satisfy
10269c7af40bSMark Fasheh * our request. */
10279c7af40bSMark Fasheh };
10289c7af40bSMark Fasheh #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
10299c7af40bSMark Fasheh /*
10309c7af40bSMark Fasheh * Given an event, calculate the size of our next local alloc window.
10319c7af40bSMark Fasheh *
1032137cebf9Shongnanli * This should always be called under i_rwsem of the local alloc inode
10339c7af40bSMark Fasheh * so that local alloc disabling doesn't race with processes trying to
10349c7af40bSMark Fasheh * use the allocator.
10359c7af40bSMark Fasheh *
10369c7af40bSMark Fasheh * Returns the state which the local alloc was left in. This value can
10379c7af40bSMark Fasheh * be ignored by some paths.
10389c7af40bSMark Fasheh */
ocfs2_recalc_la_window(struct ocfs2_super * osb,enum ocfs2_la_event event)10399c7af40bSMark Fasheh static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
10409c7af40bSMark Fasheh enum ocfs2_la_event event)
10419c7af40bSMark Fasheh {
10429c7af40bSMark Fasheh unsigned int bits;
10439c7af40bSMark Fasheh int state;
10449c7af40bSMark Fasheh
10459c7af40bSMark Fasheh spin_lock(&osb->osb_lock);
10469c7af40bSMark Fasheh if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
10479c7af40bSMark Fasheh WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
10489c7af40bSMark Fasheh goto out_unlock;
10499c7af40bSMark Fasheh }
10509c7af40bSMark Fasheh
10519c7af40bSMark Fasheh /*
10529c7af40bSMark Fasheh * ENOSPC and fragmentation are treated similarly for now.
10539c7af40bSMark Fasheh */
10549c7af40bSMark Fasheh if (event == OCFS2_LA_EVENT_ENOSPC ||
10559c7af40bSMark Fasheh event == OCFS2_LA_EVENT_FRAGMENTED) {
10569c7af40bSMark Fasheh /*
10579c7af40bSMark Fasheh * We ran out of contiguous space in the primary
10589c7af40bSMark Fasheh * bitmap. Drastically reduce the number of bits used
10599c7af40bSMark Fasheh * by local alloc until we have to disable it.
10609c7af40bSMark Fasheh */
10619c7af40bSMark Fasheh bits = osb->local_alloc_bits >> 1;
10629c7af40bSMark Fasheh if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
10639c7af40bSMark Fasheh /*
10649c7af40bSMark Fasheh * By setting state to THROTTLED, we'll keep
10659c7af40bSMark Fasheh * the number of local alloc bits used down
10669c7af40bSMark Fasheh * until an event occurs which would give us
10679c7af40bSMark Fasheh * reason to assume the bitmap situation might
10689c7af40bSMark Fasheh * have changed.
10699c7af40bSMark Fasheh */
10709c7af40bSMark Fasheh osb->local_alloc_state = OCFS2_LA_THROTTLED;
10719c7af40bSMark Fasheh osb->local_alloc_bits = bits;
10729c7af40bSMark Fasheh } else {
10739c7af40bSMark Fasheh osb->local_alloc_state = OCFS2_LA_DISABLED;
10749c7af40bSMark Fasheh }
107535ddf78eSjiangyiwen queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq,
10769c7af40bSMark Fasheh OCFS2_LA_ENABLE_INTERVAL);
10779c7af40bSMark Fasheh goto out_unlock;
10789c7af40bSMark Fasheh }
10799c7af40bSMark Fasheh
10809c7af40bSMark Fasheh /*
10819c7af40bSMark Fasheh * Don't increase the size of the local alloc window until we
10829c7af40bSMark Fasheh * know we might be able to fulfill the request. Otherwise, we
10839c7af40bSMark Fasheh * risk bouncing around the global bitmap during periods of
10849c7af40bSMark Fasheh * low space.
10859c7af40bSMark Fasheh */
10869c7af40bSMark Fasheh if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
10879c7af40bSMark Fasheh osb->local_alloc_bits = osb->local_alloc_default_bits;
10889c7af40bSMark Fasheh
10899c7af40bSMark Fasheh out_unlock:
10909c7af40bSMark Fasheh state = osb->local_alloc_state;
10919c7af40bSMark Fasheh spin_unlock(&osb->osb_lock);
10929c7af40bSMark Fasheh
10939c7af40bSMark Fasheh return state;
10949c7af40bSMark Fasheh }
10959c7af40bSMark Fasheh
ocfs2_local_alloc_reserve_for_window(struct ocfs2_super * osb,struct ocfs2_alloc_context ** ac,struct inode ** bitmap_inode,struct buffer_head ** bitmap_bh)1096ccd979bdSMark Fasheh static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
1097ccd979bdSMark Fasheh struct ocfs2_alloc_context **ac,
1098ccd979bdSMark Fasheh struct inode **bitmap_inode,
1099ccd979bdSMark Fasheh struct buffer_head **bitmap_bh)
1100ccd979bdSMark Fasheh {
1101ccd979bdSMark Fasheh int status;
1102ccd979bdSMark Fasheh
1103cd861280SRobert P. J. Day *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
1104ccd979bdSMark Fasheh if (!(*ac)) {
1105ccd979bdSMark Fasheh status = -ENOMEM;
1106ccd979bdSMark Fasheh mlog_errno(status);
1107ccd979bdSMark Fasheh goto bail;
1108ccd979bdSMark Fasheh }
1109ccd979bdSMark Fasheh
11109c7af40bSMark Fasheh retry_enospc:
11117e9b7937SYounger Liu (*ac)->ac_bits_wanted = osb->local_alloc_bits;
1112ccd979bdSMark Fasheh status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
11139c7af40bSMark Fasheh if (status == -ENOSPC) {
11149c7af40bSMark Fasheh if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
11159c7af40bSMark Fasheh OCFS2_LA_DISABLED)
11169c7af40bSMark Fasheh goto bail;
11179c7af40bSMark Fasheh
11189c7af40bSMark Fasheh ocfs2_free_ac_resource(*ac);
11199c7af40bSMark Fasheh memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
11209c7af40bSMark Fasheh goto retry_enospc;
11219c7af40bSMark Fasheh }
1122ccd979bdSMark Fasheh if (status < 0) {
1123ccd979bdSMark Fasheh mlog_errno(status);
1124ccd979bdSMark Fasheh goto bail;
1125ccd979bdSMark Fasheh }
1126ccd979bdSMark Fasheh
1127ccd979bdSMark Fasheh *bitmap_inode = (*ac)->ac_inode;
1128ccd979bdSMark Fasheh igrab(*bitmap_inode);
1129ccd979bdSMark Fasheh *bitmap_bh = (*ac)->ac_bh;
1130ccd979bdSMark Fasheh get_bh(*bitmap_bh);
1131ccd979bdSMark Fasheh status = 0;
1132ccd979bdSMark Fasheh bail:
1133ccd979bdSMark Fasheh if ((status < 0) && *ac) {
1134ccd979bdSMark Fasheh ocfs2_free_alloc_context(*ac);
1135ccd979bdSMark Fasheh *ac = NULL;
1136ccd979bdSMark Fasheh }
1137ccd979bdSMark Fasheh
1138c1e8d35eSTao Ma if (status)
1139c1e8d35eSTao Ma mlog_errno(status);
1140ccd979bdSMark Fasheh return status;
1141ccd979bdSMark Fasheh }
1142ccd979bdSMark Fasheh
1143ccd979bdSMark Fasheh /*
1144ccd979bdSMark Fasheh * pass it the bitmap lock in lock_bh if you have it.
1145ccd979bdSMark Fasheh */
ocfs2_local_alloc_new_window(struct ocfs2_super * osb,handle_t * handle,struct ocfs2_alloc_context * ac)1146ccd979bdSMark Fasheh static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
11471fabe148SMark Fasheh handle_t *handle,
1148ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac)
1149ccd979bdSMark Fasheh {
1150ccd979bdSMark Fasheh int status = 0;
1151ccd979bdSMark Fasheh u32 cluster_off, cluster_count;
1152ccd979bdSMark Fasheh struct ocfs2_dinode *alloc = NULL;
1153ccd979bdSMark Fasheh struct ocfs2_local_alloc *la;
1154ccd979bdSMark Fasheh
1155ccd979bdSMark Fasheh alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1156ccd979bdSMark Fasheh la = OCFS2_LOCAL_ALLOC(alloc);
1157ccd979bdSMark Fasheh
1158a04733d8STao Ma trace_ocfs2_local_alloc_new_window(
1159a04733d8STao Ma le32_to_cpu(alloc->id1.bitmap1.i_total),
1160ebcee4b5SMark Fasheh osb->local_alloc_bits);
1161883d4caeSMark Fasheh
1162883d4caeSMark Fasheh /* Instruct the allocation code to try the most recently used
1163883d4caeSMark Fasheh * cluster group. We'll re-record the group used this pass
1164883d4caeSMark Fasheh * below. */
1165883d4caeSMark Fasheh ac->ac_last_group = osb->la_last_gd;
1166883d4caeSMark Fasheh
1167ccd979bdSMark Fasheh /* we used the generic suballoc reserve function, but we set
1168ccd979bdSMark Fasheh * everything up nicely, so there's no reason why we can't use
1169ccd979bdSMark Fasheh * the more specific cluster api to claim bits. */
11701ed9b777SJoel Becker status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
1171ccd979bdSMark Fasheh &cluster_off, &cluster_count);
11729c7af40bSMark Fasheh if (status == -ENOSPC) {
11739c7af40bSMark Fasheh retry_enospc:
11749c7af40bSMark Fasheh /*
11759c7af40bSMark Fasheh * Note: We could also try syncing the journal here to
11769c7af40bSMark Fasheh * allow use of any free bits which the current
11779c7af40bSMark Fasheh * transaction can't give us access to. --Mark
11789c7af40bSMark Fasheh */
11799c7af40bSMark Fasheh if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
11809c7af40bSMark Fasheh OCFS2_LA_DISABLED)
11819c7af40bSMark Fasheh goto bail;
11829c7af40bSMark Fasheh
11837e9b7937SYounger Liu ac->ac_bits_wanted = osb->local_alloc_bits;
11841ed9b777SJoel Becker status = ocfs2_claim_clusters(handle, ac,
11859c7af40bSMark Fasheh osb->local_alloc_bits,
11869c7af40bSMark Fasheh &cluster_off,
11879c7af40bSMark Fasheh &cluster_count);
11889c7af40bSMark Fasheh if (status == -ENOSPC)
11899c7af40bSMark Fasheh goto retry_enospc;
11909c7af40bSMark Fasheh /*
11919c7af40bSMark Fasheh * We only shrunk the *minimum* number of in our
11929c7af40bSMark Fasheh * request - it's entirely possible that the allocator
11939c7af40bSMark Fasheh * might give us more than we asked for.
11949c7af40bSMark Fasheh */
11959c7af40bSMark Fasheh if (status == 0) {
11969c7af40bSMark Fasheh spin_lock(&osb->osb_lock);
11979c7af40bSMark Fasheh osb->local_alloc_bits = cluster_count;
11989c7af40bSMark Fasheh spin_unlock(&osb->osb_lock);
11999c7af40bSMark Fasheh }
12009c7af40bSMark Fasheh }
1201ccd979bdSMark Fasheh if (status < 0) {
1202ccd979bdSMark Fasheh if (status != -ENOSPC)
1203ccd979bdSMark Fasheh mlog_errno(status);
1204ccd979bdSMark Fasheh goto bail;
1205ccd979bdSMark Fasheh }
1206ccd979bdSMark Fasheh
1207883d4caeSMark Fasheh osb->la_last_gd = ac->ac_last_group;
1208883d4caeSMark Fasheh
1209ccd979bdSMark Fasheh la->la_bm_off = cpu_to_le32(cluster_off);
1210ccd979bdSMark Fasheh alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
1211ccd979bdSMark Fasheh /* just in case... In the future when we find space ourselves,
1212ccd979bdSMark Fasheh * we don't have to get all contiguous -- but we'll have to
1213ccd979bdSMark Fasheh * set all previously used bits in bitmap and update
1214ccd979bdSMark Fasheh * la_bits_set before setting the bits in the main bitmap. */
1215ccd979bdSMark Fasheh alloc->id1.bitmap1.i_used = 0;
1216ccd979bdSMark Fasheh memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1217ccd979bdSMark Fasheh le16_to_cpu(la->la_size));
1218ccd979bdSMark Fasheh
1219d02f00ccSMark Fasheh ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1220d02f00ccSMark Fasheh OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1221d02f00ccSMark Fasheh
1222a04733d8STao Ma trace_ocfs2_local_alloc_new_window_result(
1223a04733d8STao Ma OCFS2_LOCAL_ALLOC(alloc)->la_bm_off,
1224a04733d8STao Ma le32_to_cpu(alloc->id1.bitmap1.i_total));
1225ccd979bdSMark Fasheh
1226ccd979bdSMark Fasheh bail:
1227c1e8d35eSTao Ma if (status)
1228c1e8d35eSTao Ma mlog_errno(status);
1229ccd979bdSMark Fasheh return status;
1230ccd979bdSMark Fasheh }
1231ccd979bdSMark Fasheh
1232ccd979bdSMark Fasheh /* Note that we do *NOT* lock the local alloc inode here as
1233ccd979bdSMark Fasheh * it's been locked already for us. */
ocfs2_local_alloc_slide_window(struct ocfs2_super * osb,struct inode * local_alloc_inode)1234ccd979bdSMark Fasheh static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1235ccd979bdSMark Fasheh struct inode *local_alloc_inode)
1236ccd979bdSMark Fasheh {
1237ccd979bdSMark Fasheh int status = 0;
1238ccd979bdSMark Fasheh struct buffer_head *main_bm_bh = NULL;
1239ccd979bdSMark Fasheh struct inode *main_bm_inode = NULL;
12401fabe148SMark Fasheh handle_t *handle = NULL;
1241ccd979bdSMark Fasheh struct ocfs2_dinode *alloc;
1242ccd979bdSMark Fasheh struct ocfs2_dinode *alloc_copy = NULL;
1243ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac = NULL;
1244ccd979bdSMark Fasheh
12459c7af40bSMark Fasheh ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
12469c7af40bSMark Fasheh
1247ccd979bdSMark Fasheh /* This will lock the main bitmap for us. */
1248ccd979bdSMark Fasheh status = ocfs2_local_alloc_reserve_for_window(osb,
1249ccd979bdSMark Fasheh &ac,
1250ccd979bdSMark Fasheh &main_bm_inode,
1251ccd979bdSMark Fasheh &main_bm_bh);
1252ccd979bdSMark Fasheh if (status < 0) {
1253ccd979bdSMark Fasheh if (status != -ENOSPC)
1254ccd979bdSMark Fasheh mlog_errno(status);
1255ccd979bdSMark Fasheh goto bail;
1256ccd979bdSMark Fasheh }
1257ccd979bdSMark Fasheh
125865eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
1259ccd979bdSMark Fasheh if (IS_ERR(handle)) {
1260ccd979bdSMark Fasheh status = PTR_ERR(handle);
1261ccd979bdSMark Fasheh handle = NULL;
1262ccd979bdSMark Fasheh mlog_errno(status);
1263ccd979bdSMark Fasheh goto bail;
1264ccd979bdSMark Fasheh }
1265ccd979bdSMark Fasheh
1266ccd979bdSMark Fasheh alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1267ccd979bdSMark Fasheh
1268ccd979bdSMark Fasheh /* We want to clear the local alloc before doing anything
1269ccd979bdSMark Fasheh * else, so that if we error later during this operation,
1270ccd979bdSMark Fasheh * local alloc shutdown won't try to double free main bitmap
1271ccd979bdSMark Fasheh * bits. Make a copy so the sync function knows which bits to
1272ccd979bdSMark Fasheh * free. */
1273d8b2fa65SFuqian Huang alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS);
1274ccd979bdSMark Fasheh if (!alloc_copy) {
1275ccd979bdSMark Fasheh status = -ENOMEM;
1276ccd979bdSMark Fasheh mlog_errno(status);
1277ccd979bdSMark Fasheh goto bail;
1278ccd979bdSMark Fasheh }
1279ccd979bdSMark Fasheh
12800cf2f763SJoel Becker status = ocfs2_journal_access_di(handle,
12810cf2f763SJoel Becker INODE_CACHE(local_alloc_inode),
1282ccd979bdSMark Fasheh osb->local_alloc_bh,
1283ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
1284ccd979bdSMark Fasheh if (status < 0) {
1285ccd979bdSMark Fasheh mlog_errno(status);
1286ccd979bdSMark Fasheh goto bail;
1287ccd979bdSMark Fasheh }
1288ccd979bdSMark Fasheh
1289ccd979bdSMark Fasheh ocfs2_clear_local_alloc(alloc);
1290ec20cec7SJoel Becker ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1291ccd979bdSMark Fasheh
1292ccd979bdSMark Fasheh status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1293ccd979bdSMark Fasheh main_bm_inode, main_bm_bh);
1294ccd979bdSMark Fasheh if (status < 0) {
1295ccd979bdSMark Fasheh mlog_errno(status);
1296ccd979bdSMark Fasheh goto bail;
1297ccd979bdSMark Fasheh }
1298ccd979bdSMark Fasheh
1299ccd979bdSMark Fasheh status = ocfs2_local_alloc_new_window(osb, handle, ac);
1300ccd979bdSMark Fasheh if (status < 0) {
1301ccd979bdSMark Fasheh if (status != -ENOSPC)
1302ccd979bdSMark Fasheh mlog_errno(status);
1303ccd979bdSMark Fasheh goto bail;
1304ccd979bdSMark Fasheh }
1305ccd979bdSMark Fasheh
1306ccd979bdSMark Fasheh atomic_inc(&osb->alloc_stats.moves);
1307ccd979bdSMark Fasheh
1308ccd979bdSMark Fasheh bail:
1309ccd979bdSMark Fasheh if (handle)
131002dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle);
1311ccd979bdSMark Fasheh
1312ccd979bdSMark Fasheh brelse(main_bm_bh);
1313ccd979bdSMark Fasheh
1314ccd979bdSMark Fasheh iput(main_bm_inode);
1315ccd979bdSMark Fasheh kfree(alloc_copy);
1316ccd979bdSMark Fasheh
1317ccd979bdSMark Fasheh if (ac)
1318ccd979bdSMark Fasheh ocfs2_free_alloc_context(ac);
1319ccd979bdSMark Fasheh
1320c1e8d35eSTao Ma if (status)
1321c1e8d35eSTao Ma mlog_errno(status);
1322ccd979bdSMark Fasheh return status;
1323ccd979bdSMark Fasheh }
1324ccd979bdSMark Fasheh
1325