xref: /openbmc/linux/fs/ext2/ialloc.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  *  linux/fs/ext2/ialloc.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * Copyright (C) 1992, 1993, 1994, 1995
61da177e4SLinus Torvalds  * Remy Card (card@masi.ibp.fr)
71da177e4SLinus Torvalds  * Laboratoire MASI - Institut Blaise Pascal
81da177e4SLinus Torvalds  * Universite Pierre et Marie Curie (Paris VI)
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *  BSD ufs-inspired inode and directory allocation by
111da177e4SLinus Torvalds  *  Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
121da177e4SLinus Torvalds  *  Big-endian to little-endian byte-swapping/bitmaps by
131da177e4SLinus Torvalds  *        David S. Miller (davem@caip.rutgers.edu), 1995
141da177e4SLinus Torvalds  */
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds #include <linux/quotaops.h>
171da177e4SLinus Torvalds #include <linux/sched.h>
181da177e4SLinus Torvalds #include <linux/backing-dev.h>
191da177e4SLinus Torvalds #include <linux/buffer_head.h>
201da177e4SLinus Torvalds #include <linux/random.h>
211da177e4SLinus Torvalds #include "ext2.h"
221da177e4SLinus Torvalds #include "xattr.h"
231da177e4SLinus Torvalds #include "acl.h"
241da177e4SLinus Torvalds 
251da177e4SLinus Torvalds /*
261da177e4SLinus Torvalds  * ialloc.c contains the inodes allocation and deallocation routines
271da177e4SLinus Torvalds  */
281da177e4SLinus Torvalds 
291da177e4SLinus Torvalds /*
301da177e4SLinus Torvalds  * The free inodes are managed by bitmaps.  A file system contains several
311da177e4SLinus Torvalds  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
321da177e4SLinus Torvalds  * block for inodes, N blocks for the inode table and data blocks.
331da177e4SLinus Torvalds  *
341da177e4SLinus Torvalds  * The file system contains group descriptors which are located after the
351da177e4SLinus Torvalds  * super block.  Each descriptor contains the number of the bitmap block and
361da177e4SLinus Torvalds  * the free blocks count in the block.
371da177e4SLinus Torvalds  */
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds /*
411da177e4SLinus Torvalds  * Read the inode allocation bitmap for a given block_group, reading
421da177e4SLinus Torvalds  * into the specified slot in the superblock's bitmap cache.
431da177e4SLinus Torvalds  *
441da177e4SLinus Torvalds  * Return buffer_head of bitmap on success or NULL.
451da177e4SLinus Torvalds  */
461da177e4SLinus Torvalds static struct buffer_head *
read_inode_bitmap(struct super_block * sb,unsigned long block_group)471da177e4SLinus Torvalds read_inode_bitmap(struct super_block * sb, unsigned long block_group)
481da177e4SLinus Torvalds {
491da177e4SLinus Torvalds 	struct ext2_group_desc *desc;
501da177e4SLinus Torvalds 	struct buffer_head *bh = NULL;
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds 	desc = ext2_get_group_desc(sb, block_group, NULL);
531da177e4SLinus Torvalds 	if (!desc)
541da177e4SLinus Torvalds 		goto error_out;
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds 	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
571da177e4SLinus Torvalds 	if (!bh)
581da177e4SLinus Torvalds 		ext2_error(sb, "read_inode_bitmap",
591da177e4SLinus Torvalds 			    "Cannot read inode bitmap - "
601da177e4SLinus Torvalds 			    "block_group = %lu, inode_bitmap = %u",
611da177e4SLinus Torvalds 			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
621da177e4SLinus Torvalds error_out:
631da177e4SLinus Torvalds 	return bh;
641da177e4SLinus Torvalds }
651da177e4SLinus Torvalds 
ext2_release_inode(struct super_block * sb,int group,int dir)661da177e4SLinus Torvalds static void ext2_release_inode(struct super_block *sb, int group, int dir)
671da177e4SLinus Torvalds {
681da177e4SLinus Torvalds 	struct ext2_group_desc * desc;
691da177e4SLinus Torvalds 	struct buffer_head *bh;
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds 	desc = ext2_get_group_desc(sb, group, &bh);
721da177e4SLinus Torvalds 	if (!desc) {
731da177e4SLinus Torvalds 		ext2_error(sb, "ext2_release_inode",
741da177e4SLinus Torvalds 			"can't get descriptor for group %d", group);
751da177e4SLinus Torvalds 		return;
761da177e4SLinus Torvalds 	}
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds 	spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
79fba4d399SMarcin Slusarz 	le16_add_cpu(&desc->bg_free_inodes_count, 1);
801da177e4SLinus Torvalds 	if (dir)
81fba4d399SMarcin Slusarz 		le16_add_cpu(&desc->bg_used_dirs_count, -1);
821da177e4SLinus Torvalds 	spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
83bc2fbaa4SMikulas Patocka 	percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter);
841da177e4SLinus Torvalds 	if (dir)
851da177e4SLinus Torvalds 		percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
861da177e4SLinus Torvalds 	mark_buffer_dirty(bh);
871da177e4SLinus Torvalds }
881da177e4SLinus Torvalds 
891da177e4SLinus Torvalds /*
901da177e4SLinus Torvalds  * NOTE! When we get the inode, we're the only people
911da177e4SLinus Torvalds  * that have access to it, and as such there are no
921da177e4SLinus Torvalds  * race conditions we have to worry about. The inode
931da177e4SLinus Torvalds  * is not on the hash-lists, and it cannot be reached
941da177e4SLinus Torvalds  * through the filesystem because the directory entry
951da177e4SLinus Torvalds  * has been deleted earlier.
961da177e4SLinus Torvalds  *
971da177e4SLinus Torvalds  * HOWEVER: we must make sure that we get no aliases,
981da177e4SLinus Torvalds  * which means that we have to call "clear_inode()"
991da177e4SLinus Torvalds  * _before_ we mark the inode not in use in the inode
1001da177e4SLinus Torvalds  * bitmaps. Otherwise a newly created file might use
1011da177e4SLinus Torvalds  * the same inode number (not actually the same pointer
1021da177e4SLinus Torvalds  * though), and then we'd have two inodes sharing the
1031da177e4SLinus Torvalds  * same inode number and space on the harddisk.
1041da177e4SLinus Torvalds  */
ext2_free_inode(struct inode * inode)1051da177e4SLinus Torvalds void ext2_free_inode (struct inode * inode)
1061da177e4SLinus Torvalds {
1071da177e4SLinus Torvalds 	struct super_block * sb = inode->i_sb;
1081da177e4SLinus Torvalds 	int is_directory;
1091da177e4SLinus Torvalds 	unsigned long ino;
110524e4a1dSFrancis Moreau 	struct buffer_head *bitmap_bh;
1111da177e4SLinus Torvalds 	unsigned long block_group;
1121da177e4SLinus Torvalds 	unsigned long bit;
1131da177e4SLinus Torvalds 	struct ext2_super_block * es;
1141da177e4SLinus Torvalds 
1151da177e4SLinus Torvalds 	ino = inode->i_ino;
1161da177e4SLinus Torvalds 	ext2_debug ("freeing inode %lu\n", ino);
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds 	/*
1191da177e4SLinus Torvalds 	 * Note: we must free any quota before locking the superblock,
1201da177e4SLinus Torvalds 	 * as writing the quota to disk may need the lock as well.
1211da177e4SLinus Torvalds 	 */
1221da177e4SLinus Torvalds 	/* Quota is already initialized in iput() */
12363936ddaSChristoph Hellwig 	dquot_free_inode(inode);
1249f754758SChristoph Hellwig 	dquot_drop(inode);
1251da177e4SLinus Torvalds 
1261da177e4SLinus Torvalds 	es = EXT2_SB(sb)->s_es;
1271da177e4SLinus Torvalds 	is_directory = S_ISDIR(inode->i_mode);
1281da177e4SLinus Torvalds 
1291da177e4SLinus Torvalds 	if (ino < EXT2_FIRST_INO(sb) ||
1301da177e4SLinus Torvalds 	    ino > le32_to_cpu(es->s_inodes_count)) {
1311da177e4SLinus Torvalds 		ext2_error (sb, "ext2_free_inode",
1321da177e4SLinus Torvalds 			    "reserved or nonexistent inode %lu", ino);
133524e4a1dSFrancis Moreau 		return;
1341da177e4SLinus Torvalds 	}
1351da177e4SLinus Torvalds 	block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
1361da177e4SLinus Torvalds 	bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
1371da177e4SLinus Torvalds 	bitmap_bh = read_inode_bitmap(sb, block_group);
1381da177e4SLinus Torvalds 	if (!bitmap_bh)
139524e4a1dSFrancis Moreau 		return;
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds 	/* Ok, now we can actually update the inode bitmaps.. */
1421da177e4SLinus Torvalds 	if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
1431da177e4SLinus Torvalds 				bit, (void *) bitmap_bh->b_data))
1441da177e4SLinus Torvalds 		ext2_error (sb, "ext2_free_inode",
1451da177e4SLinus Torvalds 			      "bit already cleared for inode %lu", ino);
1461da177e4SLinus Torvalds 	else
1471da177e4SLinus Torvalds 		ext2_release_inode(sb, block_group, is_directory);
1481da177e4SLinus Torvalds 	mark_buffer_dirty(bitmap_bh);
1491751e8a6SLinus Torvalds 	if (sb->s_flags & SB_SYNCHRONOUS)
1501da177e4SLinus Torvalds 		sync_dirty_buffer(bitmap_bh);
151524e4a1dSFrancis Moreau 
1521da177e4SLinus Torvalds 	brelse(bitmap_bh);
1531da177e4SLinus Torvalds }
1541da177e4SLinus Torvalds 
1551da177e4SLinus Torvalds /*
1561da177e4SLinus Torvalds  * We perform asynchronous prereading of the new inode's inode block when
1571da177e4SLinus Torvalds  * we create the inode, in the expectation that the inode will be written
1581da177e4SLinus Torvalds  * back soon.  There are two reasons:
1591da177e4SLinus Torvalds  *
1601da177e4SLinus Torvalds  * - When creating a large number of files, the async prereads will be
1611da177e4SLinus Torvalds  *   nicely merged into large reads
1621da177e4SLinus Torvalds  * - When writing out a large number of inodes, we don't need to keep on
1631da177e4SLinus Torvalds  *   stalling the writes while we read the inode block.
1641da177e4SLinus Torvalds  *
1651da177e4SLinus Torvalds  * FIXME: ext2_get_group_desc() needs to be simplified.
1661da177e4SLinus Torvalds  */
ext2_preread_inode(struct inode * inode)1671da177e4SLinus Torvalds static void ext2_preread_inode(struct inode *inode)
1681da177e4SLinus Torvalds {
1691da177e4SLinus Torvalds 	unsigned long block_group;
1701da177e4SLinus Torvalds 	unsigned long offset;
1711da177e4SLinus Torvalds 	unsigned long block;
1721da177e4SLinus Torvalds 	struct ext2_group_desc * gdp;
1731da177e4SLinus Torvalds 
1741da177e4SLinus Torvalds 	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
175ef2fb679SEric Sandeen 	gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
1761da177e4SLinus Torvalds 	if (gdp == NULL)
1771da177e4SLinus Torvalds 		return;
1781da177e4SLinus Torvalds 
1791da177e4SLinus Torvalds 	/*
1801da177e4SLinus Torvalds 	 * Figure out the offset within the block group inode table
1811da177e4SLinus Torvalds 	 */
1821da177e4SLinus Torvalds 	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
1831da177e4SLinus Torvalds 				EXT2_INODE_SIZE(inode->i_sb);
1841da177e4SLinus Torvalds 	block = le32_to_cpu(gdp->bg_inode_table) +
1851da177e4SLinus Torvalds 				(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
1861da177e4SLinus Torvalds 	sb_breadahead(inode->i_sb, block);
1871da177e4SLinus Torvalds }
1881da177e4SLinus Torvalds 
1891da177e4SLinus Torvalds /*
1901da177e4SLinus Torvalds  * There are two policies for allocating an inode.  If the new inode is
1911da177e4SLinus Torvalds  * a directory, then a forward search is made for a block group with both
1921da177e4SLinus Torvalds  * free space and a low directory-to-inode ratio; if that fails, then of
1931da177e4SLinus Torvalds  * the groups with above-average free space, that group with the fewest
1941da177e4SLinus Torvalds  * directories already is chosen.
1951da177e4SLinus Torvalds  *
1961da177e4SLinus Torvalds  * For other inodes, search forward from the parent directory\'s block
1971da177e4SLinus Torvalds  * group to find a free inode.
1981da177e4SLinus Torvalds  */
find_group_dir(struct super_block * sb,struct inode * parent)1991da177e4SLinus Torvalds static int find_group_dir(struct super_block *sb, struct inode *parent)
2001da177e4SLinus Torvalds {
2011da177e4SLinus Torvalds 	int ngroups = EXT2_SB(sb)->s_groups_count;
2021da177e4SLinus Torvalds 	int avefreei = ext2_count_free_inodes(sb) / ngroups;
2031da177e4SLinus Torvalds 	struct ext2_group_desc *desc, *best_desc = NULL;
2041da177e4SLinus Torvalds 	int group, best_group = -1;
2051da177e4SLinus Torvalds 
2061da177e4SLinus Torvalds 	for (group = 0; group < ngroups; group++) {
207ef2fb679SEric Sandeen 		desc = ext2_get_group_desc (sb, group, NULL);
2081da177e4SLinus Torvalds 		if (!desc || !desc->bg_free_inodes_count)
2091da177e4SLinus Torvalds 			continue;
2101da177e4SLinus Torvalds 		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
2111da177e4SLinus Torvalds 			continue;
2121da177e4SLinus Torvalds 		if (!best_desc ||
2131da177e4SLinus Torvalds 		    (le16_to_cpu(desc->bg_free_blocks_count) >
2141da177e4SLinus Torvalds 		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
2151da177e4SLinus Torvalds 			best_group = group;
2161da177e4SLinus Torvalds 			best_desc = desc;
2171da177e4SLinus Torvalds 		}
2181da177e4SLinus Torvalds 	}
2191da177e4SLinus Torvalds 
2201da177e4SLinus Torvalds 	return best_group;
2211da177e4SLinus Torvalds }
2221da177e4SLinus Torvalds 
2231da177e4SLinus Torvalds /*
2241da177e4SLinus Torvalds  * Orlov's allocator for directories.
2251da177e4SLinus Torvalds  *
2261da177e4SLinus Torvalds  * We always try to spread first-level directories.
2271da177e4SLinus Torvalds  *
2281da177e4SLinus Torvalds  * If there are blockgroups with both free inodes and free blocks counts
2291da177e4SLinus Torvalds  * not worse than average we return one with smallest directory count.
2301da177e4SLinus Torvalds  * Otherwise we simply return a random group.
2311da177e4SLinus Torvalds  *
2321da177e4SLinus Torvalds  * For the rest rules look so:
2331da177e4SLinus Torvalds  *
2341da177e4SLinus Torvalds  * It's OK to put directory into a group unless
2351da177e4SLinus Torvalds  * it has too many directories already (max_dirs) or
2361da177e4SLinus Torvalds  * it has too few free inodes left (min_inodes) or
2371da177e4SLinus Torvalds  * it has too few free blocks left (min_blocks) or
2381da177e4SLinus Torvalds  * it's already running too large debt (max_debt).
2391cc8dcf5SBenoit Boissinot  * Parent's group is preferred, if it doesn't satisfy these
2401da177e4SLinus Torvalds  * conditions we search cyclically through the rest. If none
2411da177e4SLinus Torvalds  * of the groups look good we just look for a group with more
2421da177e4SLinus Torvalds  * free inodes than average (starting at parent's group).
2431da177e4SLinus Torvalds  *
2441da177e4SLinus Torvalds  * Debt is incremented each time we allocate a directory and decremented
2451da177e4SLinus Torvalds  * when we allocate an inode, within 0--255.
2461da177e4SLinus Torvalds  */
2471da177e4SLinus Torvalds 
2481da177e4SLinus Torvalds #define INODE_COST 64
2491da177e4SLinus Torvalds #define BLOCK_COST 256
2501da177e4SLinus Torvalds 
find_group_orlov(struct super_block * sb,struct inode * parent)2511da177e4SLinus Torvalds static int find_group_orlov(struct super_block *sb, struct inode *parent)
2521da177e4SLinus Torvalds {
2531da177e4SLinus Torvalds 	int parent_group = EXT2_I(parent)->i_block_group;
2541da177e4SLinus Torvalds 	struct ext2_sb_info *sbi = EXT2_SB(sb);
2551da177e4SLinus Torvalds 	struct ext2_super_block *es = sbi->s_es;
2561da177e4SLinus Torvalds 	int ngroups = sbi->s_groups_count;
2571da177e4SLinus Torvalds 	int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
2581da177e4SLinus Torvalds 	int freei;
2591da177e4SLinus Torvalds 	int avefreei;
2601da177e4SLinus Torvalds 	int free_blocks;
2611da177e4SLinus Torvalds 	int avefreeb;
2621da177e4SLinus Torvalds 	int blocks_per_dir;
2631da177e4SLinus Torvalds 	int ndirs;
2641da177e4SLinus Torvalds 	int max_debt, max_dirs, min_blocks, min_inodes;
2651da177e4SLinus Torvalds 	int group = -1, i;
2661da177e4SLinus Torvalds 	struct ext2_group_desc *desc;
2671da177e4SLinus Torvalds 
2681da177e4SLinus Torvalds 	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
2691da177e4SLinus Torvalds 	avefreei = freei / ngroups;
2701da177e4SLinus Torvalds 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
2711da177e4SLinus Torvalds 	avefreeb = free_blocks / ngroups;
2721da177e4SLinus Torvalds 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
2731da177e4SLinus Torvalds 
2742b0143b5SDavid Howells 	if ((parent == d_inode(sb->s_root)) ||
2751da177e4SLinus Torvalds 	    (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) {
2761da177e4SLinus Torvalds 		int best_ndir = inodes_per_group;
2771da177e4SLinus Torvalds 		int best_group = -1;
2781da177e4SLinus Torvalds 
2798032bf12SJason A. Donenfeld 		parent_group = get_random_u32_below(ngroups);
2801da177e4SLinus Torvalds 		for (i = 0; i < ngroups; i++) {
2811da177e4SLinus Torvalds 			group = (parent_group + i) % ngroups;
282ef2fb679SEric Sandeen 			desc = ext2_get_group_desc (sb, group, NULL);
2831da177e4SLinus Torvalds 			if (!desc || !desc->bg_free_inodes_count)
2841da177e4SLinus Torvalds 				continue;
2851da177e4SLinus Torvalds 			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
2861da177e4SLinus Torvalds 				continue;
2871da177e4SLinus Torvalds 			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
2881da177e4SLinus Torvalds 				continue;
2891da177e4SLinus Torvalds 			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
2901da177e4SLinus Torvalds 				continue;
2911da177e4SLinus Torvalds 			best_group = group;
2921da177e4SLinus Torvalds 			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
2931da177e4SLinus Torvalds 		}
2941da177e4SLinus Torvalds 		if (best_group >= 0) {
2951da177e4SLinus Torvalds 			group = best_group;
2961da177e4SLinus Torvalds 			goto found;
2971da177e4SLinus Torvalds 		}
2981da177e4SLinus Torvalds 		goto fallback;
2991da177e4SLinus Torvalds 	}
3001da177e4SLinus Torvalds 
3011da177e4SLinus Torvalds 	if (ndirs == 0)
3021da177e4SLinus Torvalds 		ndirs = 1;	/* percpu_counters are approximate... */
3031da177e4SLinus Torvalds 
3041da177e4SLinus Torvalds 	blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs;
3051da177e4SLinus Torvalds 
3061da177e4SLinus Torvalds 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
3071da177e4SLinus Torvalds 	min_inodes = avefreei - inodes_per_group / 4;
3081da177e4SLinus Torvalds 	min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4;
3091da177e4SLinus Torvalds 
3101da177e4SLinus Torvalds 	max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
3111da177e4SLinus Torvalds 	if (max_debt * INODE_COST > inodes_per_group)
3121da177e4SLinus Torvalds 		max_debt = inodes_per_group / INODE_COST;
3131da177e4SLinus Torvalds 	if (max_debt > 255)
3141da177e4SLinus Torvalds 		max_debt = 255;
3151da177e4SLinus Torvalds 	if (max_debt == 0)
3161da177e4SLinus Torvalds 		max_debt = 1;
3171da177e4SLinus Torvalds 
3181da177e4SLinus Torvalds 	for (i = 0; i < ngroups; i++) {
3191da177e4SLinus Torvalds 		group = (parent_group + i) % ngroups;
320ef2fb679SEric Sandeen 		desc = ext2_get_group_desc (sb, group, NULL);
3211da177e4SLinus Torvalds 		if (!desc || !desc->bg_free_inodes_count)
3221da177e4SLinus Torvalds 			continue;
3231da177e4SLinus Torvalds 		if (sbi->s_debts[group] >= max_debt)
3241da177e4SLinus Torvalds 			continue;
3251da177e4SLinus Torvalds 		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
3261da177e4SLinus Torvalds 			continue;
3271da177e4SLinus Torvalds 		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
3281da177e4SLinus Torvalds 			continue;
3291da177e4SLinus Torvalds 		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
3301da177e4SLinus Torvalds 			continue;
3311da177e4SLinus Torvalds 		goto found;
3321da177e4SLinus Torvalds 	}
3331da177e4SLinus Torvalds 
3341da177e4SLinus Torvalds fallback:
3351da177e4SLinus Torvalds 	for (i = 0; i < ngroups; i++) {
3361da177e4SLinus Torvalds 		group = (parent_group + i) % ngroups;
337ef2fb679SEric Sandeen 		desc = ext2_get_group_desc (sb, group, NULL);
3381da177e4SLinus Torvalds 		if (!desc || !desc->bg_free_inodes_count)
3391da177e4SLinus Torvalds 			continue;
3401da177e4SLinus Torvalds 		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
3411da177e4SLinus Torvalds 			goto found;
3421da177e4SLinus Torvalds 	}
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds 	if (avefreei) {
3451da177e4SLinus Torvalds 		/*
3461da177e4SLinus Torvalds 		 * The free-inodes counter is approximate, and for really small
3471da177e4SLinus Torvalds 		 * filesystems the above test can fail to find any blockgroups
3481da177e4SLinus Torvalds 		 */
3491da177e4SLinus Torvalds 		avefreei = 0;
3501da177e4SLinus Torvalds 		goto fallback;
3511da177e4SLinus Torvalds 	}
3521da177e4SLinus Torvalds 
3531da177e4SLinus Torvalds 	return -1;
3541da177e4SLinus Torvalds 
3551da177e4SLinus Torvalds found:
3561da177e4SLinus Torvalds 	return group;
3571da177e4SLinus Torvalds }
3581da177e4SLinus Torvalds 
find_group_other(struct super_block * sb,struct inode * parent)3591da177e4SLinus Torvalds static int find_group_other(struct super_block *sb, struct inode *parent)
3601da177e4SLinus Torvalds {
3611da177e4SLinus Torvalds 	int parent_group = EXT2_I(parent)->i_block_group;
3621da177e4SLinus Torvalds 	int ngroups = EXT2_SB(sb)->s_groups_count;
3631da177e4SLinus Torvalds 	struct ext2_group_desc *desc;
3641da177e4SLinus Torvalds 	int group, i;
3651da177e4SLinus Torvalds 
3661da177e4SLinus Torvalds 	/*
3671da177e4SLinus Torvalds 	 * Try to place the inode in its parent directory
3681da177e4SLinus Torvalds 	 */
3691da177e4SLinus Torvalds 	group = parent_group;
370ef2fb679SEric Sandeen 	desc = ext2_get_group_desc (sb, group, NULL);
3711da177e4SLinus Torvalds 	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
3721da177e4SLinus Torvalds 			le16_to_cpu(desc->bg_free_blocks_count))
3731da177e4SLinus Torvalds 		goto found;
3741da177e4SLinus Torvalds 
3751da177e4SLinus Torvalds 	/*
3761da177e4SLinus Torvalds 	 * We're going to place this inode in a different blockgroup from its
3771da177e4SLinus Torvalds 	 * parent.  We want to cause files in a common directory to all land in
3781da177e4SLinus Torvalds 	 * the same blockgroup.  But we want files which are in a different
3791da177e4SLinus Torvalds 	 * directory which shares a blockgroup with our parent to land in a
3801da177e4SLinus Torvalds 	 * different blockgroup.
3811da177e4SLinus Torvalds 	 *
3821da177e4SLinus Torvalds 	 * So add our directory's i_ino into the starting point for the hash.
3831da177e4SLinus Torvalds 	 */
3841da177e4SLinus Torvalds 	group = (group + parent->i_ino) % ngroups;
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds 	/*
3871da177e4SLinus Torvalds 	 * Use a quadratic hash to find a group with a free inode and some
3881da177e4SLinus Torvalds 	 * free blocks.
3891da177e4SLinus Torvalds 	 */
3901da177e4SLinus Torvalds 	for (i = 1; i < ngroups; i <<= 1) {
3911da177e4SLinus Torvalds 		group += i;
3921da177e4SLinus Torvalds 		if (group >= ngroups)
3931da177e4SLinus Torvalds 			group -= ngroups;
394ef2fb679SEric Sandeen 		desc = ext2_get_group_desc (sb, group, NULL);
3951da177e4SLinus Torvalds 		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
3961da177e4SLinus Torvalds 				le16_to_cpu(desc->bg_free_blocks_count))
3971da177e4SLinus Torvalds 			goto found;
3981da177e4SLinus Torvalds 	}
3991da177e4SLinus Torvalds 
4001da177e4SLinus Torvalds 	/*
4011da177e4SLinus Torvalds 	 * That failed: try linear search for a free inode, even if that group
4021da177e4SLinus Torvalds 	 * has no free blocks.
4031da177e4SLinus Torvalds 	 */
4041da177e4SLinus Torvalds 	group = parent_group;
4051da177e4SLinus Torvalds 	for (i = 0; i < ngroups; i++) {
4061da177e4SLinus Torvalds 		if (++group >= ngroups)
4071da177e4SLinus Torvalds 			group = 0;
408ef2fb679SEric Sandeen 		desc = ext2_get_group_desc (sb, group, NULL);
4091da177e4SLinus Torvalds 		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
4101da177e4SLinus Torvalds 			goto found;
4111da177e4SLinus Torvalds 	}
4121da177e4SLinus Torvalds 
4131da177e4SLinus Torvalds 	return -1;
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds found:
4161da177e4SLinus Torvalds 	return group;
4171da177e4SLinus Torvalds }
4181da177e4SLinus Torvalds 
ext2_new_inode(struct inode * dir,umode_t mode,const struct qstr * qstr)4193ea40bc9SAl Viro struct inode *ext2_new_inode(struct inode *dir, umode_t mode,
4202a7dba39SEric Paris 			     const struct qstr *qstr)
4211da177e4SLinus Torvalds {
4221da177e4SLinus Torvalds 	struct super_block *sb;
4231da177e4SLinus Torvalds 	struct buffer_head *bitmap_bh = NULL;
4241da177e4SLinus Torvalds 	struct buffer_head *bh2;
4251da177e4SLinus Torvalds 	int group, i;
4261da177e4SLinus Torvalds 	ino_t ino = 0;
4271da177e4SLinus Torvalds 	struct inode * inode;
4281da177e4SLinus Torvalds 	struct ext2_group_desc *gdp;
4291da177e4SLinus Torvalds 	struct ext2_super_block *es;
4301da177e4SLinus Torvalds 	struct ext2_inode_info *ei;
4311da177e4SLinus Torvalds 	struct ext2_sb_info *sbi;
4321da177e4SLinus Torvalds 	int err;
4331da177e4SLinus Torvalds 
4341da177e4SLinus Torvalds 	sb = dir->i_sb;
4351da177e4SLinus Torvalds 	inode = new_inode(sb);
4361da177e4SLinus Torvalds 	if (!inode)
4371da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
4381da177e4SLinus Torvalds 
4391da177e4SLinus Torvalds 	ei = EXT2_I(inode);
4401da177e4SLinus Torvalds 	sbi = EXT2_SB(sb);
4411da177e4SLinus Torvalds 	es = sbi->s_es;
4421da177e4SLinus Torvalds 	if (S_ISDIR(mode)) {
4431da177e4SLinus Torvalds 		if (test_opt(sb, OLDALLOC))
4441da177e4SLinus Torvalds 			group = find_group_dir(sb, dir);
4451da177e4SLinus Torvalds 		else
4461da177e4SLinus Torvalds 			group = find_group_orlov(sb, dir);
4471da177e4SLinus Torvalds 	} else
4481da177e4SLinus Torvalds 		group = find_group_other(sb, dir);
4491da177e4SLinus Torvalds 
4501da177e4SLinus Torvalds 	if (group == -1) {
4511da177e4SLinus Torvalds 		err = -ENOSPC;
4521da177e4SLinus Torvalds 		goto fail;
4531da177e4SLinus Torvalds 	}
4541da177e4SLinus Torvalds 
4551da177e4SLinus Torvalds 	for (i = 0; i < sbi->s_groups_count; i++) {
4561da177e4SLinus Torvalds 		gdp = ext2_get_group_desc(sb, group, &bh2);
457f7a1c358SJan Kara 		if (!gdp) {
458f7a1c358SJan Kara 			if (++group == sbi->s_groups_count)
459f7a1c358SJan Kara 				group = 0;
460f7a1c358SJan Kara 			continue;
461f7a1c358SJan Kara 		}
4621da177e4SLinus Torvalds 		brelse(bitmap_bh);
4631da177e4SLinus Torvalds 		bitmap_bh = read_inode_bitmap(sb, group);
4641da177e4SLinus Torvalds 		if (!bitmap_bh) {
4651da177e4SLinus Torvalds 			err = -EIO;
4661da177e4SLinus Torvalds 			goto fail;
4671da177e4SLinus Torvalds 		}
4681da177e4SLinus Torvalds 		ino = 0;
4691da177e4SLinus Torvalds 
4701da177e4SLinus Torvalds repeat_in_this_group:
4711da177e4SLinus Torvalds 		ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data,
4721da177e4SLinus Torvalds 					      EXT2_INODES_PER_GROUP(sb), ino);
4731da177e4SLinus Torvalds 		if (ino >= EXT2_INODES_PER_GROUP(sb)) {
4741da177e4SLinus Torvalds 			/*
4751da177e4SLinus Torvalds 			 * Rare race: find_group_xx() decided that there were
4761da177e4SLinus Torvalds 			 * free inodes in this group, but by the time we tried
4771da177e4SLinus Torvalds 			 * to allocate one, they're all gone.  This can also
4781da177e4SLinus Torvalds 			 * occur because the counters which find_group_orlov()
4791da177e4SLinus Torvalds 			 * uses are approximate.  So just go and search the
4801da177e4SLinus Torvalds 			 * next block group.
4811da177e4SLinus Torvalds 			 */
4821da177e4SLinus Torvalds 			if (++group == sbi->s_groups_count)
4831da177e4SLinus Torvalds 				group = 0;
4841da177e4SLinus Torvalds 			continue;
4851da177e4SLinus Torvalds 		}
4861da177e4SLinus Torvalds 		if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),
4871da177e4SLinus Torvalds 						ino, bitmap_bh->b_data)) {
4881da177e4SLinus Torvalds 			/* we lost this inode */
4891da177e4SLinus Torvalds 			if (++ino >= EXT2_INODES_PER_GROUP(sb)) {
4901da177e4SLinus Torvalds 				/* this group is exhausted, try next group */
4911da177e4SLinus Torvalds 				if (++group == sbi->s_groups_count)
4921da177e4SLinus Torvalds 					group = 0;
4931da177e4SLinus Torvalds 				continue;
4941da177e4SLinus Torvalds 			}
4951da177e4SLinus Torvalds 			/* try to find free inode in the same group */
4961da177e4SLinus Torvalds 			goto repeat_in_this_group;
4971da177e4SLinus Torvalds 		}
4981da177e4SLinus Torvalds 		goto got;
4991da177e4SLinus Torvalds 	}
5001da177e4SLinus Torvalds 
5011da177e4SLinus Torvalds 	/*
5021da177e4SLinus Torvalds 	 * Scanned all blockgroups.
5031da177e4SLinus Torvalds 	 */
504dc1f7380SChengguang Xu 	brelse(bitmap_bh);
5051da177e4SLinus Torvalds 	err = -ENOSPC;
5061da177e4SLinus Torvalds 	goto fail;
5071da177e4SLinus Torvalds got:
5081da177e4SLinus Torvalds 	mark_buffer_dirty(bitmap_bh);
5091751e8a6SLinus Torvalds 	if (sb->s_flags & SB_SYNCHRONOUS)
5101da177e4SLinus Torvalds 		sync_dirty_buffer(bitmap_bh);
5111da177e4SLinus Torvalds 	brelse(bitmap_bh);
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds 	ino += group * EXT2_INODES_PER_GROUP(sb) + 1;
5141da177e4SLinus Torvalds 	if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
5151da177e4SLinus Torvalds 		ext2_error (sb, "ext2_new_inode",
5161da177e4SLinus Torvalds 			    "reserved inode or inode > inodes count - "
5171da177e4SLinus Torvalds 			    "block_group = %d,inode=%lu", group,
5181da177e4SLinus Torvalds 			    (unsigned long) ino);
5191da177e4SLinus Torvalds 		err = -EIO;
5201da177e4SLinus Torvalds 		goto fail;
5211da177e4SLinus Torvalds 	}
5221da177e4SLinus Torvalds 
523bc2fbaa4SMikulas Patocka 	percpu_counter_dec(&sbi->s_freeinodes_counter);
5241da177e4SLinus Torvalds 	if (S_ISDIR(mode))
5251da177e4SLinus Torvalds 		percpu_counter_inc(&sbi->s_dirs_counter);
5261da177e4SLinus Torvalds 
5271da177e4SLinus Torvalds 	spin_lock(sb_bgl_lock(sbi, group));
528fba4d399SMarcin Slusarz 	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
5291da177e4SLinus Torvalds 	if (S_ISDIR(mode)) {
5301da177e4SLinus Torvalds 		if (sbi->s_debts[group] < 255)
5311da177e4SLinus Torvalds 			sbi->s_debts[group]++;
532fba4d399SMarcin Slusarz 		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
5331da177e4SLinus Torvalds 	} else {
5341da177e4SLinus Torvalds 		if (sbi->s_debts[group])
5351da177e4SLinus Torvalds 			sbi->s_debts[group]--;
5361da177e4SLinus Torvalds 	}
5371da177e4SLinus Torvalds 	spin_unlock(sb_bgl_lock(sbi, group));
5381da177e4SLinus Torvalds 
5391da177e4SLinus Torvalds 	mark_buffer_dirty(bh2);
540ffba102dSDmitry Monakhov 	if (test_opt(sb, GRPID)) {
5411da177e4SLinus Torvalds 		inode->i_mode = mode;
542ffba102dSDmitry Monakhov 		inode->i_uid = current_fsuid();
543ffba102dSDmitry Monakhov 		inode->i_gid = dir->i_gid;
544ffba102dSDmitry Monakhov 	} else
545f2d40141SChristian Brauner 		inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
5461da177e4SLinus Torvalds 
5471da177e4SLinus Torvalds 	inode->i_ino = ino;
5481da177e4SLinus Torvalds 	inode->i_blocks = 0;
549*fc4eed64SJeff Layton 	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
5501da177e4SLinus Torvalds 	memset(ei->i_data, 0, sizeof(ei->i_data));
551ef8b6461SDuane Griffin 	ei->i_flags =
552ef8b6461SDuane Griffin 		ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
5531da177e4SLinus Torvalds 	ei->i_faddr = 0;
5541da177e4SLinus Torvalds 	ei->i_frag_no = 0;
5551da177e4SLinus Torvalds 	ei->i_frag_size = 0;
5561da177e4SLinus Torvalds 	ei->i_file_acl = 0;
5571da177e4SLinus Torvalds 	ei->i_dir_acl = 0;
5581da177e4SLinus Torvalds 	ei->i_dtime = 0;
559a686cd89SMartin J. Bligh 	ei->i_block_alloc_info = NULL;
5601da177e4SLinus Torvalds 	ei->i_block_group = group;
5611da177e4SLinus Torvalds 	ei->i_dir_start_lookup = 0;
5621da177e4SLinus Torvalds 	ei->i_state = EXT2_STATE_NEW;
5631da177e4SLinus Torvalds 	ext2_set_inode_flags(inode);
5641da177e4SLinus Torvalds 	spin_lock(&sbi->s_next_gen_lock);
5651da177e4SLinus Torvalds 	inode->i_generation = sbi->s_next_generation++;
5661da177e4SLinus Torvalds 	spin_unlock(&sbi->s_next_gen_lock);
56741080b5aSAl Viro 	if (insert_inode_locked(inode) < 0) {
568ef6919c2SJan Kara 		ext2_error(sb, "ext2_new_inode",
569ef6919c2SJan Kara 			   "inode number already in use - inode=%lu",
570ef6919c2SJan Kara 			   (unsigned long) ino);
571ef6919c2SJan Kara 		err = -EIO;
572ef6919c2SJan Kara 		goto fail;
57341080b5aSAl Viro 	}
5741da177e4SLinus Torvalds 
575c2edb305SJan Kara 	err = dquot_initialize(inode);
576c2edb305SJan Kara 	if (err)
577c2edb305SJan Kara 		goto fail_drop;
578c2edb305SJan Kara 
57963936ddaSChristoph Hellwig 	err = dquot_alloc_inode(inode);
58063936ddaSChristoph Hellwig 	if (err)
5819ed6c2fbSChris Sykes 		goto fail_drop;
5829ed6c2fbSChris Sykes 
5831da177e4SLinus Torvalds 	err = ext2_init_acl(inode, dir);
5849ed6c2fbSChris Sykes 	if (err)
5859ed6c2fbSChris Sykes 		goto fail_free_drop;
5869ed6c2fbSChris Sykes 
5872a7dba39SEric Paris 	err = ext2_init_security(inode, dir, qstr);
5889ed6c2fbSChris Sykes 	if (err)
5899ed6c2fbSChris Sykes 		goto fail_free_drop;
5909ed6c2fbSChris Sykes 
5911da177e4SLinus Torvalds 	mark_inode_dirty(inode);
5921da177e4SLinus Torvalds 	ext2_debug("allocating inode %lu\n", inode->i_ino);
5931da177e4SLinus Torvalds 	ext2_preread_inode(inode);
5941da177e4SLinus Torvalds 	return inode;
5951da177e4SLinus Torvalds 
5969ed6c2fbSChris Sykes fail_free_drop:
59763936ddaSChristoph Hellwig 	dquot_free_inode(inode);
5989ed6c2fbSChris Sykes 
5999ed6c2fbSChris Sykes fail_drop:
6009f754758SChristoph Hellwig 	dquot_drop(inode);
6011da177e4SLinus Torvalds 	inode->i_flags |= S_NOQUOTA;
6026d6b77f1SMiklos Szeredi 	clear_nlink(inode);
6032e5afe54SAl Viro 	discard_new_inode(inode);
6041da177e4SLinus Torvalds 	return ERR_PTR(err);
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds fail:
6071da177e4SLinus Torvalds 	make_bad_inode(inode);
6081da177e4SLinus Torvalds 	iput(inode);
6091da177e4SLinus Torvalds 	return ERR_PTR(err);
6101da177e4SLinus Torvalds }
6111da177e4SLinus Torvalds 
ext2_count_free_inodes(struct super_block * sb)6121da177e4SLinus Torvalds unsigned long ext2_count_free_inodes (struct super_block * sb)
6131da177e4SLinus Torvalds {
6141da177e4SLinus Torvalds 	struct ext2_group_desc *desc;
6151da177e4SLinus Torvalds 	unsigned long desc_count = 0;
6161da177e4SLinus Torvalds 	int i;
6171da177e4SLinus Torvalds 
6181da177e4SLinus Torvalds #ifdef EXT2FS_DEBUG
6191da177e4SLinus Torvalds 	struct ext2_super_block *es;
6201da177e4SLinus Torvalds 	unsigned long bitmap_count = 0;
6211da177e4SLinus Torvalds 	struct buffer_head *bitmap_bh = NULL;
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds 	es = EXT2_SB(sb)->s_es;
6241da177e4SLinus Torvalds 	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
6251da177e4SLinus Torvalds 		unsigned x;
6261da177e4SLinus Torvalds 
6271da177e4SLinus Torvalds 		desc = ext2_get_group_desc (sb, i, NULL);
6281da177e4SLinus Torvalds 		if (!desc)
6291da177e4SLinus Torvalds 			continue;
6301da177e4SLinus Torvalds 		desc_count += le16_to_cpu(desc->bg_free_inodes_count);
6311da177e4SLinus Torvalds 		brelse(bitmap_bh);
6321da177e4SLinus Torvalds 		bitmap_bh = read_inode_bitmap(sb, i);
6331da177e4SLinus Torvalds 		if (!bitmap_bh)
6341da177e4SLinus Torvalds 			continue;
6351da177e4SLinus Torvalds 
6361da177e4SLinus Torvalds 		x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
6371da177e4SLinus Torvalds 		printk("group %d: stored = %d, counted = %u\n",
6381da177e4SLinus Torvalds 			i, le16_to_cpu(desc->bg_free_inodes_count), x);
6391da177e4SLinus Torvalds 		bitmap_count += x;
6401da177e4SLinus Torvalds 	}
6411da177e4SLinus Torvalds 	brelse(bitmap_bh);
6421da177e4SLinus Torvalds 	printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
643ecd0afa3SAkinobu Mita 		(unsigned long)
6441da177e4SLinus Torvalds 		percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter),
6451da177e4SLinus Torvalds 		desc_count, bitmap_count);
6461da177e4SLinus Torvalds 	return desc_count;
6471da177e4SLinus Torvalds #else
6481da177e4SLinus Torvalds 	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
6491da177e4SLinus Torvalds 		desc = ext2_get_group_desc (sb, i, NULL);
6501da177e4SLinus Torvalds 		if (!desc)
6511da177e4SLinus Torvalds 			continue;
6521da177e4SLinus Torvalds 		desc_count += le16_to_cpu(desc->bg_free_inodes_count);
6531da177e4SLinus Torvalds 	}
6541da177e4SLinus Torvalds 	return desc_count;
6551da177e4SLinus Torvalds #endif
6561da177e4SLinus Torvalds }
6571da177e4SLinus Torvalds 
6581da177e4SLinus Torvalds /* Called at mount-time, super-block is locked */
ext2_count_dirs(struct super_block * sb)6591da177e4SLinus Torvalds unsigned long ext2_count_dirs (struct super_block * sb)
6601da177e4SLinus Torvalds {
6611da177e4SLinus Torvalds 	unsigned long count = 0;
6621da177e4SLinus Torvalds 	int i;
6631da177e4SLinus Torvalds 
6641da177e4SLinus Torvalds 	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
6651da177e4SLinus Torvalds 		struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL);
6661da177e4SLinus Torvalds 		if (!gdp)
6671da177e4SLinus Torvalds 			continue;
6681da177e4SLinus Torvalds 		count += le16_to_cpu(gdp->bg_used_dirs_count);
6691da177e4SLinus Torvalds 	}
6701da177e4SLinus Torvalds 	return count;
6711da177e4SLinus Torvalds }
6721da177e4SLinus Torvalds 
673