diff options
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 215 |
1 files changed, 159 insertions, 56 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index ae3eb57dccdd..617f5a2d800a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -410,6 +410,43 @@ out: return 0; } +struct orlov_stats { + __u32 free_inodes; + __u32 free_blocks; + __u32 used_dirs; +}; + +/* + * Helper function for Orlov's allocator; returns critical information + * for a particular block group or flex_bg. If flex_size is 1, then g + * is a block group number; otherwise it is flex_bg number. + */ +void get_orlov_stats(struct super_block *sb, ext4_group_t g, + int flex_size, struct orlov_stats *stats) +{ + struct ext4_group_desc *desc; + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + int i; + + stats->free_inodes = 0; + stats->free_blocks = 0; + stats->used_dirs = 0; + + g *= flex_size; + + for (i = 0; i < flex_size; i++) { + if (g >= ngroups) + break; + desc = ext4_get_group_desc(sb, g++, NULL); + if (!desc) + continue; + + stats->free_inodes += ext4_free_inodes_count(sb, desc); + stats->free_blocks += ext4_free_blks_count(sb, desc); + stats->used_dirs += ext4_used_dirs_count(sb, desc); + } +} + /* * Orlov's allocator for directories. * @@ -425,35 +462,34 @@ out: * it has too many directories already (max_dirs) or * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or - * it's already running too large debt (max_debt). * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). - * - * Debt is incremented each time we allocate a directory and decremented - * when we allocate an inode, within 0--255. */ -#define INODE_COST 64 -#define BLOCK_COST 256 - static int find_group_orlov(struct super_block *sb, struct inode *parent, - ext4_group_t *group) + ext4_group_t *group, int mode) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; ext4_group_t ngroups = sbi->s_groups_count; int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext4_fsblk_t freeb, avefreeb; - ext4_fsblk_t blocks_per_dir; unsigned int ndirs; - int max_debt, max_dirs, min_inodes; + int max_dirs, min_inodes; ext4_grpblk_t min_blocks; - ext4_group_t i; + ext4_group_t i, grp, g; struct ext4_group_desc *desc; + struct orlov_stats stats; + int flex_size = ext4_flex_bg_size(sbi); + + if (flex_size > 1) { + ngroups = (ngroups + flex_size - 1) >> + sbi->s_log_groups_per_flex; + parent_group >>= sbi->s_log_groups_per_flex; + } freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); avefreei = freei / ngroups; @@ -462,71 +498,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, do_div(avefreeb, ngroups); ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); - if ((parent == sb->s_root->d_inode) || - (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { + if (S_ISDIR(mode) && + ((parent == sb->s_root->d_inode) || + (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { int best_ndir = inodes_per_group; - ext4_group_t grp; int ret = -1; get_random_bytes(&grp, sizeof(grp)); parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { - grp = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, grp, NULL); - if (!desc || !ext4_free_inodes_count(sb, desc)) + g = (parent_group + i) % ngroups; + get_orlov_stats(sb, g, flex_size, &stats); + if (!stats.free_inodes) continue; - if (ext4_used_dirs_count(sb, desc) >= best_ndir) + if (stats.used_dirs >= best_ndir) continue; - if (ext4_free_inodes_count(sb, desc) < avefreei) + if (stats.free_inodes < avefreei) continue; - if (ext4_free_blks_count(sb, desc) < avefreeb) + if (stats.free_blocks < avefreeb) continue; - *group = grp; + grp = g; ret = 0; - best_ndir = ext4_used_dirs_count(sb, desc); + best_ndir = stats.used_dirs; + } + if (ret) + goto fallback; + found_flex_bg: + if (flex_size == 1) { + *group = grp; + return 0; + } + + /* + * We pack inodes at the beginning of the flexgroup's + * inode tables. Block allocation decisions will do + * something similar, although regular files will + * start at 2nd block group of the flexgroup. See + * ext4_ext_find_goal() and ext4_find_near(). + */ + grp *= flex_size; + for (i = 0; i < flex_size; i++) { + if (grp+i >= sbi->s_groups_count) + break; + desc = ext4_get_group_desc(sb, grp+i, NULL); + if (desc && ext4_free_inodes_count(sb, desc)) { + *group = grp+i; + return 0; + } } - if (ret == 0) - return ret; goto fallback; } - blocks_per_dir = ext4_blocks_count(es) - freeb; - do_div(blocks_per_dir, ndirs); - max_dirs = ndirs / ngroups + inodes_per_group / 16; - min_inodes = avefreei - inodes_per_group / 4; - min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; - - max_debt = EXT4_BLOCKS_PER_GROUP(sb); - max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); - if (max_debt * INODE_COST > inodes_per_group) - max_debt = inodes_per_group / INODE_COST; - if (max_debt > 255) - max_debt = 255; - if (max_debt == 0) - max_debt = 1; + min_inodes = avefreei - inodes_per_group*flex_size / 4; + if (min_inodes < 1) + min_inodes = 1; + min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; + + /* + * Start looking in the flex group where we last allocated an + * inode for this parent directory + */ + if (EXT4_I(parent)->i_last_alloc_group != ~0) { + parent_group = EXT4_I(parent)->i_last_alloc_group; + if (flex_size > 1) + parent_group >>= sbi->s_log_groups_per_flex; + } for (i = 0; i < ngroups; i++) { - *group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, *group, NULL); - if (!desc || !ext4_free_inodes_count(sb, desc)) - continue; - if (ext4_used_dirs_count(sb, desc) >= max_dirs) + grp = (parent_group + i) % ngroups; + get_orlov_stats(sb, grp, flex_size, &stats); + if (stats.used_dirs >= max_dirs) continue; - if (ext4_free_inodes_count(sb, desc) < min_inodes) + if (stats.free_inodes < min_inodes) continue; - if (ext4_free_blks_count(sb, desc) < min_blocks) + if (stats.free_blocks < min_blocks) continue; - return 0; + goto found_flex_bg; } fallback: + ngroups = sbi->s_groups_count; + avefreei = freei / ngroups; + parent_group = EXT4_I(parent)->i_block_group; for (i = 0; i < ngroups; i++) { - *group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, *group, NULL); + grp = (parent_group + i) % ngroups; + desc = ext4_get_group_desc(sb, grp, NULL); if (desc && ext4_free_inodes_count(sb, desc) && - ext4_free_inodes_count(sb, desc) >= avefreei) + ext4_free_inodes_count(sb, desc) >= avefreei) { + *group = grp; return 0; + } } if (avefreei) { @@ -542,12 +604,51 @@ fallback: } static int find_group_other(struct super_block *sb, struct inode *parent, - ext4_group_t *group) + ext4_group_t *group, int mode) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *desc; - ext4_group_t i; + ext4_group_t i, last; + int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); + + /* + * Try to place the inode is the same flex group as its + * parent. If we can't find space, use the Orlov algorithm to + * find another flex group, and store that information in the + * parent directory's inode information so that use that flex + * group for future allocations. + */ + if (flex_size > 1) { + int retry = 0; + + try_again: + parent_group &= ~(flex_size-1); + last = parent_group + flex_size; + if (last > ngroups) + last = ngroups; + for (i = parent_group; i < last; i++) { + desc = ext4_get_group_desc(sb, i, NULL); + if (desc && ext4_free_inodes_count(sb, desc)) { + *group = i; + return 0; + } + } + if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) { + retry = 1; + parent_group = EXT4_I(parent)->i_last_alloc_group; + goto try_again; + } + /* + * If this didn't work, use the Orlov search algorithm + * to find a new flex group; we pass in the mode to + * avoid the topdir algorithms. + */ + *group = parent_group + flex_size; + if (*group > ngroups) + *group = 0; + return find_group_orlov(sb, parent, group, mode); + } /* * Try to place the inode in its parent directory @@ -716,10 +817,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) sbi = EXT4_SB(sb); es = sbi->s_es; - if (sbi->s_log_groups_per_flex) { + if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { ret2 = find_group_flex(sb, dir, &group); if (ret2 == -1) { - ret2 = find_group_other(sb, dir, &group); + ret2 = find_group_other(sb, dir, &group, mode); if (ret2 == 0 && once) once = 0; printk(KERN_NOTICE "ext4: find_group_flex " @@ -733,11 +834,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) if (test_opt(sb, OLDALLOC)) ret2 = find_group_dir(sb, dir, &group); else - ret2 = find_group_orlov(sb, dir, &group); + ret2 = find_group_orlov(sb, dir, &group, mode); } else - ret2 = find_group_other(sb, dir, &group); + ret2 = find_group_other(sb, dir, &group, mode); got_group: + EXT4_I(dir)->i_last_alloc_group = group; err = -ENOSPC; if (ret2 == -1) goto out; @@ -894,6 +996,7 @@ got: ei->i_file_acl = 0; ei->i_dtime = 0; ei->i_block_group = group; + ei->i_last_alloc_group = ~0; ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) |