mballoc.c (863e99a8c1ea2b0391491904297f57a0f6a1fdd6) mballoc.c (28623c2f5b0dca3c3ea34fd6108940661352e276)
1/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *

--- 10 unchanged lines hidden (view full) ---

19
20/*
21 * mballoc.c contains the multiblocks allocation routines
22 */
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/debugfs.h>
1/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *

--- 10 unchanged lines hidden (view full) ---

19
20/*
21 * mballoc.c contains the multiblocks allocation routines
22 */
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/debugfs.h>
27#include <linux/log2.h>
27#include <linux/slab.h>
28#include <trace/events/ext4.h>
29
30/*
31 * MUSTDO:
32 * - test ext4_ext_search_left() and ext4_ext_search_right()
33 * - search for metadata in few groups
34 *

--- 1298 unchanged lines hidden (view full) ---

1333 mb_clear_bit(block, buddy2);
1334 buddy = buddy2;
1335 } while (1);
1336 }
1337 mb_set_largest_free_order(sb, e4b->bd_info);
1338 mb_check_buddy(e4b);
1339}
1340
28#include <linux/slab.h>
29#include <trace/events/ext4.h>
30
31/*
32 * MUSTDO:
33 * - test ext4_ext_search_left() and ext4_ext_search_right()
34 * - search for metadata in few groups
35 *

--- 1298 unchanged lines hidden (view full) ---

1334 mb_clear_bit(block, buddy2);
1335 buddy = buddy2;
1336 } while (1);
1337 }
1338 mb_set_largest_free_order(sb, e4b->bd_info);
1339 mb_check_buddy(e4b);
1340}
1341
1341static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1342static int mb_find_extent(struct ext4_buddy *e4b, int block,
1342 int needed, struct ext4_free_extent *ex)
1343{
1344 int next = block;
1343 int needed, struct ext4_free_extent *ex)
1344{
1345 int next = block;
1345 int max;
1346 int max, order;
1346 void *buddy;
1347
1348 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1349 BUG_ON(ex == NULL);
1350
1347 void *buddy;
1348
1349 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1350 BUG_ON(ex == NULL);
1351
1351 buddy = mb_find_buddy(e4b, order, &max);
1352 buddy = mb_find_buddy(e4b, 0, &max);
1352 BUG_ON(buddy == NULL);
1353 BUG_ON(block >= max);
1354 if (mb_test_bit(block, buddy)) {
1355 ex->fe_len = 0;
1356 ex->fe_start = 0;
1357 ex->fe_group = 0;
1358 return 0;
1359 }
1360
1353 BUG_ON(buddy == NULL);
1354 BUG_ON(block >= max);
1355 if (mb_test_bit(block, buddy)) {
1356 ex->fe_len = 0;
1357 ex->fe_start = 0;
1358 ex->fe_group = 0;
1359 return 0;
1360 }
1361
1361 /* FIXME dorp order completely ? */
1362 if (likely(order == 0)) {
1363 /* find actual order */
1364 order = mb_find_order_for_block(e4b, block);
1365 block = block >> order;
1366 }
1362 /* find actual order */
1363 order = mb_find_order_for_block(e4b, block);
1364 block = block >> order;
1367
1368 ex->fe_len = 1 << order;
1369 ex->fe_start = block << order;
1370 ex->fe_group = e4b->bd_group;
1371
1372 /* calc difference from given start */
1373 next = next - ex->fe_start;
1374 ex->fe_len -= next;

--- 169 unchanged lines hidden (view full) ---

1544 if (bex->fe_len < gex->fe_len)
1545 return;
1546
1547 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1548 && bex->fe_group == e4b->bd_group) {
1549 /* recheck chunk's availability - we don't know
1550 * when it was found (within this lock-unlock
1551 * period or not) */
1365
1366 ex->fe_len = 1 << order;
1367 ex->fe_start = block << order;
1368 ex->fe_group = e4b->bd_group;
1369
1370 /* calc difference from given start */
1371 next = next - ex->fe_start;
1372 ex->fe_len -= next;

--- 169 unchanged lines hidden (view full) ---

1542 if (bex->fe_len < gex->fe_len)
1543 return;
1544
1545 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1546 && bex->fe_group == e4b->bd_group) {
1547 /* recheck chunk's availability - we don't know
1548 * when it was found (within this lock-unlock
1549 * period or not) */
1552 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
1550 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1553 if (max >= gex->fe_len) {
1554 ext4_mb_use_best_found(ac, e4b);
1555 return;
1556 }
1557 }
1558}
1559
1560/*

--- 75 unchanged lines hidden (view full) ---

1636 int err;
1637
1638 BUG_ON(ex.fe_len <= 0);
1639 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1640 if (err)
1641 return err;
1642
1643 ext4_lock_group(ac->ac_sb, group);
1551 if (max >= gex->fe_len) {
1552 ext4_mb_use_best_found(ac, e4b);
1553 return;
1554 }
1555 }
1556}
1557
1558/*

--- 75 unchanged lines hidden (view full) ---

1634 int err;
1635
1636 BUG_ON(ex.fe_len <= 0);
1637 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1638 if (err)
1639 return err;
1640
1641 ext4_lock_group(ac->ac_sb, group);
1644 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
1642 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1645
1646 if (max > 0) {
1647 ac->ac_b_ex = ex;
1648 ext4_mb_use_best_found(ac, e4b);
1649 }
1650
1651 ext4_unlock_group(ac->ac_sb, group);
1652 ext4_mb_unload_buddy(e4b);

--- 14 unchanged lines hidden (view full) ---

1667 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1668 return 0;
1669
1670 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1671 if (err)
1672 return err;
1673
1674 ext4_lock_group(ac->ac_sb, group);
1643
1644 if (max > 0) {
1645 ac->ac_b_ex = ex;
1646 ext4_mb_use_best_found(ac, e4b);
1647 }
1648
1649 ext4_unlock_group(ac->ac_sb, group);
1650 ext4_mb_unload_buddy(e4b);

--- 14 unchanged lines hidden (view full) ---

1665 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1666 return 0;
1667
1668 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1669 if (err)
1670 return err;
1671
1672 ext4_lock_group(ac->ac_sb, group);
1675 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
1673 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1676 ac->ac_g_ex.fe_len, &ex);
1677
1678 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1679 ext4_fsblk_t start;
1680
1681 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1682 ex.fe_start;
1683 /* use do_div to get remainder (would be 64-bit modulo) */

--- 99 unchanged lines hidden (view full) ---

1783 */
1784 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1785 "%d free clusters as per "
1786 "group info. But bitmap says 0",
1787 free);
1788 break;
1789 }
1790
1674 ac->ac_g_ex.fe_len, &ex);
1675
1676 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1677 ext4_fsblk_t start;
1678
1679 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1680 ex.fe_start;
1681 /* use do_div to get remainder (would be 64-bit modulo) */

--- 99 unchanged lines hidden (view full) ---

1781 */
1782 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1783 "%d free clusters as per "
1784 "group info. But bitmap says 0",
1785 free);
1786 break;
1787 }
1788
1791 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1789 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1792 BUG_ON(ex.fe_len <= 0);
1793 if (free < ex.fe_len) {
1794 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1795 "%d free clusters as per "
1796 "group info. But got %d blocks",
1797 free, ex.fe_len);
1798 /*
1799 * The number of free blocks differs. This mostly

--- 35 unchanged lines hidden (view full) ---

1835 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1836
1837 a = first_group_block + sbi->s_stripe - 1;
1838 do_div(a, sbi->s_stripe);
1839 i = (a * sbi->s_stripe) - first_group_block;
1840
1841 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1842 if (!mb_test_bit(i, bitmap)) {
1790 BUG_ON(ex.fe_len <= 0);
1791 if (free < ex.fe_len) {
1792 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1793 "%d free clusters as per "
1794 "group info. But got %d blocks",
1795 free, ex.fe_len);
1796 /*
1797 * The number of free blocks differs. This mostly

--- 35 unchanged lines hidden (view full) ---

1833 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1834
1835 a = first_group_block + sbi->s_stripe - 1;
1836 do_div(a, sbi->s_stripe);
1837 i = (a * sbi->s_stripe) - first_group_block;
1838
1839 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1840 if (!mb_test_bit(i, bitmap)) {
1843 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1841 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
1844 if (max >= sbi->s_stripe) {
1845 ac->ac_found++;
1846 ac->ac_b_ex = ex;
1847 ext4_mb_use_best_found(ac, e4b);
1848 break;
1849 }
1850 }
1851 i += sbi->s_stripe;

--- 5 unchanged lines hidden (view full) ---

1857 ext4_group_t group, int cr)
1858{
1859 unsigned free, fragments;
1860 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1861 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1862
1863 BUG_ON(cr < 0 || cr >= 4);
1864
1842 if (max >= sbi->s_stripe) {
1843 ac->ac_found++;
1844 ac->ac_b_ex = ex;
1845 ext4_mb_use_best_found(ac, e4b);
1846 break;
1847 }
1848 }
1849 i += sbi->s_stripe;

--- 5 unchanged lines hidden (view full) ---

1855 ext4_group_t group, int cr)
1856{
1857 unsigned free, fragments;
1858 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1859 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1860
1861 BUG_ON(cr < 0 || cr >= 4);
1862
1863 free = grp->bb_free;
1864 if (free == 0)
1865 return 0;
1866 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
1867 return 0;
1868
1865 /* We only do this if the grp has never been initialized */
1866 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1867 int ret = ext4_mb_init_group(ac->ac_sb, group);
1868 if (ret)
1869 return 0;
1870 }
1871
1869 /* We only do this if the grp has never been initialized */
1870 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1871 int ret = ext4_mb_init_group(ac->ac_sb, group);
1872 if (ret)
1873 return 0;
1874 }
1875
1872 free = grp->bb_free;
1873 fragments = grp->bb_fragments;
1876 fragments = grp->bb_fragments;
1874 if (free == 0)
1875 return 0;
1876 if (fragments == 0)
1877 return 0;
1878
1879 switch (cr) {
1880 case 0:
1881 BUG_ON(ac->ac_2order == 0);
1882
1883 if (grp->bb_largest_free_order < ac->ac_2order)

--- 274 unchanged lines hidden (view full) ---

2158{
2159 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2160 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2161
2162 BUG_ON(!cachep);
2163 return cachep;
2164}
2165
1877 if (fragments == 0)
1878 return 0;
1879
1880 switch (cr) {
1881 case 0:
1882 BUG_ON(ac->ac_2order == 0);
1883
1884 if (grp->bb_largest_free_order < ac->ac_2order)

--- 274 unchanged lines hidden (view full) ---

2159{
2160 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2161 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2162
2163 BUG_ON(!cachep);
2164 return cachep;
2165}
2166
2167/*
2168 * Allocate the top-level s_group_info array for the specified number
2169 * of groups
2170 */
2171int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2172{
2173 struct ext4_sb_info *sbi = EXT4_SB(sb);
2174 unsigned size;
2175 struct ext4_group_info ***new_groupinfo;
2176
2177 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2178 EXT4_DESC_PER_BLOCK_BITS(sb);
2179 if (size <= sbi->s_group_info_size)
2180 return 0;
2181
2182 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2183 new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
2184 if (!new_groupinfo) {
2185 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2186 return -ENOMEM;
2187 }
2188 if (sbi->s_group_info) {
2189 memcpy(new_groupinfo, sbi->s_group_info,
2190 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2191 ext4_kvfree(sbi->s_group_info);
2192 }
2193 sbi->s_group_info = new_groupinfo;
2194 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2195 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2196 sbi->s_group_info_size);
2197 return 0;
2198}
2199
2166/* Create and initialize ext4_group_info data for the given group. */
2167int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2168 struct ext4_group_desc *desc)
2169{
2170 int i;
2171 int metalen = 0;
2172 struct ext4_sb_info *sbi = EXT4_SB(sb);
2173 struct ext4_group_info **meta_group_info;

--- 73 unchanged lines hidden (view full) ---

2247 return -ENOMEM;
2248} /* ext4_mb_add_groupinfo */
2249
2250static int ext4_mb_init_backend(struct super_block *sb)
2251{
2252 ext4_group_t ngroups = ext4_get_groups_count(sb);
2253 ext4_group_t i;
2254 struct ext4_sb_info *sbi = EXT4_SB(sb);
2200/* Create and initialize ext4_group_info data for the given group. */
2201int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2202 struct ext4_group_desc *desc)
2203{
2204 int i;
2205 int metalen = 0;
2206 struct ext4_sb_info *sbi = EXT4_SB(sb);
2207 struct ext4_group_info **meta_group_info;

--- 73 unchanged lines hidden (view full) ---

2281 return -ENOMEM;
2282} /* ext4_mb_add_groupinfo */
2283
2284static int ext4_mb_init_backend(struct super_block *sb)
2285{
2286 ext4_group_t ngroups = ext4_get_groups_count(sb);
2287 ext4_group_t i;
2288 struct ext4_sb_info *sbi = EXT4_SB(sb);
2255 struct ext4_super_block *es = sbi->s_es;
2256 int num_meta_group_infos;
2257 int num_meta_group_infos_max;
2258 int array_size;
2289 int err;
2259 struct ext4_group_desc *desc;
2260 struct kmem_cache *cachep;
2261
2290 struct ext4_group_desc *desc;
2291 struct kmem_cache *cachep;
2292
2262 /* This is the number of blocks used by GDT */
2263 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2264 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2293 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2294 if (err)
2295 return err;
2265
2296
2266 /*
2267 * This is the total number of blocks used by GDT including
2268 * the number of reserved blocks for GDT.
2269 * The s_group_info array is allocated with this value
2270 * to allow a clean online resize without a complex
2271 * manipulation of pointer.
2272 * The drawback is the unused memory when no resize
2273 * occurs but it's very low in terms of pages
2274 * (see comments below)
2275 * Need to handle this properly when META_BG resizing is allowed
2276 */
2277 num_meta_group_infos_max = num_meta_group_infos +
2278 le16_to_cpu(es->s_reserved_gdt_blocks);
2279
2280 /*
2281 * array_size is the size of s_group_info array. We round it
2282 * to the next power of two because this approximation is done
2283 * internally by kmalloc so we can have some more memory
2284 * for free here (e.g. may be used for META_BG resize).
2285 */
2286 array_size = 1;
2287 while (array_size < sizeof(*sbi->s_group_info) *
2288 num_meta_group_infos_max)
2289 array_size = array_size << 1;
2290 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2291 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2292 * So a two level scheme suffices for now. */
2293 sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2294 if (sbi->s_group_info == NULL) {
2295 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2296 return -ENOMEM;
2297 }
2298 sbi->s_buddy_cache = new_inode(sb);
2299 if (sbi->s_buddy_cache == NULL) {
2300 ext4_msg(sb, KERN_ERR, "can't get new inode");
2301 goto err_freesgi;
2302 }
2303 /* To avoid potentially colliding with an valid on-disk inode number,
2304 * use EXT4_BAD_INO for the buddy cache inode number. This inode is
2305 * not in the inode hash, so it should never be found by iget(), but

--- 11 unchanged lines hidden (view full) ---

2317 }
2318
2319 return 0;
2320
2321err_freebuddy:
2322 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2323 while (i-- > 0)
2324 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2297 sbi->s_buddy_cache = new_inode(sb);
2298 if (sbi->s_buddy_cache == NULL) {
2299 ext4_msg(sb, KERN_ERR, "can't get new inode");
2300 goto err_freesgi;
2301 }
2302 /* To avoid potentially colliding with an valid on-disk inode number,
2303 * use EXT4_BAD_INO for the buddy cache inode number. This inode is
2304 * not in the inode hash, so it should never be found by iget(), but

--- 11 unchanged lines hidden (view full) ---

2316 }
2317
2318 return 0;
2319
2320err_freebuddy:
2321 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2322 while (i-- > 0)
2323 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2325 i = num_meta_group_infos;
2324 i = sbi->s_group_info_size;
2326 while (i-- > 0)
2327 kfree(sbi->s_group_info[i]);
2328 iput(sbi->s_buddy_cache);
2329err_freesgi:
2330 ext4_kvfree(sbi->s_group_info);
2331 return -ENOMEM;
2332}
2333

--- 2720 unchanged lines hidden ---
2325 while (i-- > 0)
2326 kfree(sbi->s_group_info[i]);
2327 iput(sbi->s_buddy_cache);
2328err_freesgi:
2329 ext4_kvfree(sbi->s_group_info);
2330 return -ENOMEM;
2331}
2332

--- 2720 unchanged lines hidden ---