diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 8 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 58 | ||||
-rw-r--r-- | fs/ext4/inode.c | 149 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 164 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 123 | ||||
-rw-r--r-- | fs/ext4/namei.c | 30 | ||||
-rw-r--r-- | fs/ext4/resize.c | 7 | ||||
-rw-r--r-- | fs/ext4/super.c | 69 |
8 files changed, 349 insertions, 259 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ac75ea953d83..0737e05ba3dd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1700,7 +1700,7 @@ retry_alloc: /* * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. + * group_no and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { group_no++; @@ -2011,11 +2011,7 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, ext4_group_t group) { - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext4_group_sparse(group)) - return 0; - return EXT4_SB(sb)->s_gdb_count; + return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; } /** diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 575b5215c808..da18a74b966a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -782,14 +782,15 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); ext4_group_t block_group; int bit; - struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bitmap_bh; struct inode *inode = NULL; + long err = -EIO; /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext4_warning(sb, __FUNCTION__, "bad orphan ino %lu! e2fsck was run?", ino); - goto out; + goto error; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); @@ -798,38 +799,49 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) if (!bitmap_bh) { ext4_warning(sb, __FUNCTION__, "inode bitmap error for orphan %lu", ino); - goto out; + goto error; } /* Having the inode bit set should be a 100% indicator that this * is a valid orphan (no e2fsck run on fs). Orphans also include * inodes that were being truncated, so we can't check i_nlink==0. */ - if (!ext4_test_bit(bit, bitmap_bh->b_data) || - !(inode = iget(sb, ino)) || is_bad_inode(inode) || - NEXT_ORPHAN(inode) > max_ino) { - ext4_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext4_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } + if (!ext4_test_bit(bit, bitmap_bh->b_data)) + goto bad_orphan; + + inode = ext4_iget(sb, ino); + if (IS_ERR(inode)) + goto iget_failed; + + if (NEXT_ORPHAN(inode) > max_ino) + goto bad_orphan; + brelse(bitmap_bh); + return inode; + +iget_failed: + err = PTR_ERR(inode); + inode = NULL; +bad_orphan: + ext4_warning(sb, __FUNCTION__, + "bad orphan inode %lu! e2fsck was run?", ino); + printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext4_test_bit(bit, bitmap_bh->b_data)); + printk(KERN_NOTICE "inode=%p\n", inode); + if (inode) { + printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", + is_bad_inode(inode)); + printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", + NEXT_ORPHAN(inode)); + printk(KERN_NOTICE "max_ino=%lu\n", max_ino); /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) + if (inode->i_nlink == 0) inode->i_blocks = 0; iput(inode); - inode = NULL; } -out: brelse(bitmap_bh); - return inode; +error: + return ERR_PTR(err); } unsigned long ext4_count_free_inodes (struct super_block * sb) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 05c4145dd27d..7dd9b50d5ebc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -429,16 +429,13 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * ext4_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain - * @goal: place to store the result. * * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. + * returns it. */ - static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, - Indirect chain[4], Indirect *partial) + Indirect *partial) { struct ext4_block_alloc_info *block_i; @@ -839,7 +836,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext4_init_block_alloc_info(inode); - goal = ext4_find_goal(inode, iblock, chain, partial); + goal = ext4_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -895,7 +892,16 @@ out: return err; } -#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 +/* + * Number of credits we need for writing DIO_MAX_BLOCKS: + * We need sb + group descriptor + bitmap + inode -> 4 + * For B blocks with A block pointers per block we need: + * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). + * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. + */ +#define DIO_CREDITS 25 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, @@ -942,49 +948,31 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = ext4_journal_current_handle(); - int ret = 0; + int ret = 0, started = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - if (!create) - goto get_block; /* A read */ - - if (max_blocks == 1) - goto get_block; /* A single block get */ - - if (handle->h_transaction->t_state == T_LOCKED) { - /* - * Huge direct-io writes can hold off commits for long - * periods of time. Let this commit run. - */ - ext4_journal_stop(handle); - handle = ext4_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) + if (create && !handle) { + /* Direct IO write... */ + if (max_blocks > DIO_MAX_BLOCKS) + max_blocks = DIO_MAX_BLOCKS; + handle = ext4_journal_start(inode, DIO_CREDITS + + 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto get_block; - } - - if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) { - /* - * Getting low on buffer credits... - */ - ret = ext4_journal_extend(handle, DIO_CREDITS); - if (ret > 0) { - /* - * Couldn't extend the transaction. Start a new one. - */ - ret = ext4_journal_restart(handle, DIO_CREDITS); + goto out; } + started = 1; } -get_block: - if (ret == 0) { - ret = ext4_get_blocks_wrap(handle, inode, iblock, + ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, bh_result, create, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } + if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); + ret = 0; } + if (started) + ext4_journal_stop(handle); +out: return ret; } @@ -1674,7 +1662,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, @@ -1683,7 +1672,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle = NULL; + handle_t *handle; ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); @@ -1691,17 +1680,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { loff_t final_size = offset + count; - handle = ext4_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } ret = ext4_orphan_add(handle, inode); - if (ret) - goto out_stop; + if (ret) { + ext4_journal_stop(handle); + goto out; + } orphan = 1; ei->i_disksize = inode->i_size; + ext4_journal_stop(handle); } } @@ -1709,18 +1702,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, ext4_get_block, NULL); - /* - * Reacquire the handle: ext4_get_block() can restart the transaction - */ - handle = ext4_journal_current_handle(); - -out_stop: - if (handle) { + if (orphan) { int err; - if (orphan && inode->i_nlink) + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + goto out; + } + if (inode->i_nlink) ext4_orphan_del(handle, inode); - if (orphan && ret > 0) { + if (ret > 0) { loff_t end = offset + ret; if (end > inode->i_size) { ei->i_disksize = end; @@ -2683,21 +2679,31 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, } } -void ext4_read_inode(struct inode * inode) +struct inode *ext4_iget(struct super_block *sb, unsigned long ino) { struct ext4_iloc iloc; struct ext4_inode *raw_inode; - struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_inode_info *ei; struct buffer_head *bh; + struct inode *inode; + long ret; int block; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT4_I(inode); #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; - if (__ext4_get_inode_loc(inode, &iloc, 0)) + ret = __ext4_get_inode_loc(inode, &iloc, 0); + if (ret < 0) goto bad_inode; bh = iloc.bh; raw_inode = ext4_raw_inode(&iloc); @@ -2723,6 +2729,7 @@ void ext4_read_inode(struct inode * inode) !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } /* The only unlinked inodes we let through here have @@ -2750,17 +2757,12 @@ void ext4_read_inode(struct inode * inode) ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); - if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 && - EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { - /* - * When mke2fs creates big inodes it does not zero out - * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE, - * so ignore those first few inodes. - */ + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb)) { brelse (bh); + ret = -EIO; goto bad_inode; } if (ei->i_extra_isize == 0) { @@ -2814,11 +2816,12 @@ void ext4_read_inode(struct inode * inode) } brelse (iloc.bh); ext4_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } static int ext4_inode_blocks_set(handle_t *handle, diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 76e5fedc0a0b..dd0fcfcb35ce 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -420,6 +420,7 @@ #define MB_DEFAULT_GROUP_PREALLOC 512 static struct kmem_cache *ext4_pspace_cachep; +static struct kmem_cache *ext4_ac_cachep; #ifdef EXT4_BB_MAX_BLOCKS #undef EXT4_BB_MAX_BLOCKS @@ -680,7 +681,6 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) { char *bb; - /* FIXME!! is this needed */ BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); BUG_ON(max == NULL); @@ -964,7 +964,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, grp->bb_fragments = fragments; if (free != grp->bb_free) { - printk(KERN_DEBUG + ext4_error(sb, __FUNCTION__, "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", group, free, grp->bb_free); grp->bb_free = free; @@ -1821,13 +1821,24 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, i = ext4_find_next_zero_bit(bitmap, EXT4_BLOCKS_PER_GROUP(sb), i); if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { - BUG_ON(free != 0); + /* + * IF we corrupt the bitmap we won't find any + * free blocks even though group info says we + * we have free blocks + */ + ext4_error(sb, __FUNCTION__, "%d free blocks as per " + "group info. But bitmap says 0\n", + free); break; } mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); BUG_ON(ex.fe_len <= 0); - BUG_ON(free < ex.fe_len); + if (free < ex.fe_len) { + ext4_error(sb, __FUNCTION__, "%d free blocks as per " + "group info. But got %d blocks\n", + free, ex.fe_len); + } ext4_mb_measure_extent(ac, &ex, e4b); @@ -2959,12 +2970,19 @@ int __init init_ext4_mballoc(void) if (ext4_pspace_cachep == NULL) return -ENOMEM; + ext4_ac_cachep = + kmem_cache_create("ext4_alloc_context", + sizeof(struct ext4_allocation_context), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + if (ext4_ac_cachep == NULL) { + kmem_cache_destroy(ext4_pspace_cachep); + return -ENOMEM; + } #ifdef CONFIG_PROC_FS proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); if (proc_root_ext4 == NULL) printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); #endif - return 0; } @@ -2972,6 +2990,7 @@ void exit_ext4_mballoc(void) { /* XXX: synchronize_rcu(); */ kmem_cache_destroy(ext4_pspace_cachep); + kmem_cache_destroy(ext4_ac_cachep); #ifdef CONFIG_PROC_FS remove_proc_entry(EXT4_ROOT, proc_root_fs); #endif @@ -3069,7 +3088,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, out_err: sb->s_dirt = 1; - put_bh(bitmap_bh); + brelse(bitmap_bh); return err; } @@ -3354,13 +3373,10 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, ac->ac_pa = pa; /* we don't correct pa_pstart or pa_plen here to avoid - * possible race when tte group is being loaded concurrently + * possible race when the group is being loaded concurrently * instead we correct pa later, after blocks are marked - * in on-disk bitmap -- see ext4_mb_release_context() */ - /* - * FIXME!! but the other CPUs can look at this particular - * pa and think that it have enought free blocks if we - * don't update pa_free here right ? + * in on-disk bitmap -- see ext4_mb_release_context() + * Other CPUs are prevented from allocating from this pa by lg_mutex */ mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); } @@ -3699,7 +3715,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long end; @@ -3715,9 +3731,13 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, BUG_ON(group != e4b->bd_group && pa->pa_len != 0); end = bit + pa->pa_len; - ac.ac_sb = sb; - ac.ac_inode = pa->pa_inode; - ac.ac_op = EXT4_MB_HISTORY_DISCARD; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = pa->pa_inode; + ac->ac_op = EXT4_MB_HISTORY_DISCARD; + } while (bit < end) { bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); @@ -3733,24 +3753,28 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, (unsigned) group); free += next - bit; - ac.ac_b_ex.fe_group = group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = next - bit; - ac.ac_b_ex.fe_logical = 0; - ext4_mb_store_history(&ac); + if (ac) { + ac->ac_b_ex.fe_group = group; + ac->ac_b_ex.fe_start = bit; + ac->ac_b_ex.fe_len = next - bit; + ac->ac_b_ex.fe_logical = 0; + ext4_mb_store_history(ac); + } mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } if (free != pa->pa_free) { - printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n", + printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", pa, (unsigned long) pa->pa_lstart, (unsigned long) pa->pa_pstart, (unsigned long) pa->pa_len); - printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free); + ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", + free, pa->pa_free); } - BUG_ON(free != pa->pa_free); atomic_add(free, &sbi->s_mb_discarded); + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); return err; } @@ -3758,12 +3782,15 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac; struct super_block *sb = e4b->bd_sb; ext4_group_t group; ext4_grpblk_t bit; - ac.ac_op = EXT4_MB_HISTORY_DISCARD; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + + if (ac) + ac->ac_op = EXT4_MB_HISTORY_DISCARD; BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); @@ -3771,13 +3798,16 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); - ac.ac_sb = sb; - ac.ac_inode = NULL; - ac.ac_b_ex.fe_group = group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = pa->pa_len; - ac.ac_b_ex.fe_logical = 0; - ext4_mb_store_history(&ac); + if (ac) { + ac->ac_sb = sb; + ac->ac_inode = NULL; + ac->ac_b_ex.fe_group = group; + ac->ac_b_ex.fe_start = bit; + ac->ac_b_ex.fe_len = pa->pa_len; + ac->ac_b_ex.fe_logical = 0; + ext4_mb_store_history(ac); + kmem_cache_free(ext4_ac_cachep, ac); + } return 0; } @@ -4231,7 +4261,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, struct ext4_allocation_request *ar, int *errp) { - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac = NULL; struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block = 0; @@ -4257,53 +4287,60 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } inquota = ar->len; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (!ac) { + *errp = -ENOMEM; + return 0; + } + ext4_mb_poll_new_transaction(sb, handle); - *errp = ext4_mb_initialize_context(&ac, ar); + *errp = ext4_mb_initialize_context(ac, ar); if (*errp) { ar->len = 0; goto out; } - ac.ac_op = EXT4_MB_HISTORY_PREALLOC; - if (!ext4_mb_use_preallocated(&ac)) { + ac->ac_op = EXT4_MB_HISTORY_PREALLOC; + if (!ext4_mb_use_preallocated(ac)) { - ac.ac_op = EXT4_MB_HISTORY_ALLOC; - ext4_mb_normalize_request(&ac, ar); + ac->ac_op = EXT4_MB_HISTORY_ALLOC; + ext4_mb_normalize_request(ac, ar); repeat: /* allocate space in core */ - ext4_mb_regular_allocator(&ac); + ext4_mb_regular_allocator(ac); /* as we've just preallocated more space than * user requested orinally, we store allocated * space in a special descriptor */ - if (ac.ac_status == AC_STATUS_FOUND && - ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) - ext4_mb_new_preallocation(&ac); + if (ac->ac_status == AC_STATUS_FOUND && + ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) + ext4_mb_new_preallocation(ac); } - if (likely(ac.ac_status == AC_STATUS_FOUND)) { - ext4_mb_mark_diskspace_used(&ac, handle); + if (likely(ac->ac_status == AC_STATUS_FOUND)) { + ext4_mb_mark_diskspace_used(ac, handle); *errp = 0; - block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex); - ar->len = ac.ac_b_ex.fe_len; + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); + ar->len = ac->ac_b_ex.fe_len; } else { - freed = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); + freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); if (freed) goto repeat; *errp = -ENOSPC; - ac.ac_b_ex.fe_len = 0; + ac->ac_b_ex.fe_len = 0; ar->len = 0; - ext4_mb_show_ac(&ac); + ext4_mb_show_ac(ac); } - ext4_mb_release_context(&ac); + ext4_mb_release_context(ac); out: if (ar->len < inquota) DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); + kmem_cache_free(ext4_ac_cachep, ac); return block; } static void ext4_mb_poll_new_transaction(struct super_block *sb, @@ -4405,9 +4442,9 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, unsigned long block, unsigned long count, int metadata, unsigned long *freed) { - struct buffer_head *bitmap_bh = 0; + struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; - struct ext4_allocation_context ac; + struct ext4_allocation_context *ac = NULL; struct ext4_group_desc *gdp; struct ext4_super_block *es; unsigned long overflow; @@ -4436,9 +4473,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, ext4_debug("freeing block %lu\n", block); - ac.ac_op = EXT4_MB_HISTORY_FREE; - ac.ac_inode = inode; - ac.ac_sb = sb; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + if (ac) { + ac->ac_op = EXT4_MB_HISTORY_FREE; + ac->ac_inode = inode; + ac->ac_sb = sb; + } do_more: overflow = 0; @@ -4504,10 +4544,12 @@ do_more: BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); err = ext4_journal_dirty_metadata(handle, bitmap_bh); - ac.ac_b_ex.fe_group = block_group; - ac.ac_b_ex.fe_start = bit; - ac.ac_b_ex.fe_len = count; - ext4_mb_store_history(&ac); + if (ac) { + ac->ac_b_ex.fe_group = block_group; + ac->ac_b_ex.fe_start = bit; + ac->ac_b_ex.fe_len = count; + ext4_mb_store_history(ac); + } if (metadata) { /* blocks being freed are metadata. these blocks shouldn't @@ -4548,5 +4590,7 @@ do_more: error_return: brelse(bitmap_bh); ext4_std_error(sb, err); + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); return; } diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 3ebc2332f52e..8c6c685b9d22 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -61,10 +61,9 @@ static int finish_range(handle_t *handle, struct inode *inode, retval = ext4_journal_restart(handle, needed); if (retval) goto err_out; - } - if (needed) { + } else if (needed) { retval = ext4_journal_extend(handle, needed); - if (retval != 0) { + if (retval) { /* * IF not able to extend the journal restart the journal */ @@ -220,6 +219,26 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode, } +static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) +{ + int retval = 0, needed; + + if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) + return 0; + /* + * We are freeing a blocks. During this we touch + * superblock, group descriptor and block bitmap. + * So allocate a credit of 3. We may update + * quota (user and group). + */ + needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + + if (ext4_journal_extend(handle, needed) != 0) + retval = ext4_journal_restart(handle, needed); + + return retval; +} + static int free_dind_blocks(handle_t *handle, struct inode *inode, __le32 i_data) { @@ -234,11 +253,14 @@ static int free_dind_blocks(handle_t *handle, tmp_idata = (__le32 *)bh->b_data; for (i = 0; i < max_entries; i++) { - if (tmp_idata[i]) + if (tmp_idata[i]) { + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, le32_to_cpu(tmp_idata[i]), 1, 1); + } } put_bh(bh); + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); return 0; } @@ -267,29 +289,32 @@ static int free_tind_blocks(handle_t *handle, } } put_bh(bh); + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); return 0; } -static int free_ind_block(handle_t *handle, struct inode *inode) +static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) { int retval; - struct ext4_inode_info *ei = EXT4_I(inode); - if (ei->i_data[EXT4_IND_BLOCK]) + /* ei->i_data[EXT4_IND_BLOCK] */ + if (i_data[0]) { + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, - le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); + le32_to_cpu(i_data[0]), 1, 1); + } - if (ei->i_data[EXT4_DIND_BLOCK]) { - retval = free_dind_blocks(handle, inode, - ei->i_data[EXT4_DIND_BLOCK]); + /* ei->i_data[EXT4_DIND_BLOCK] */ + if (i_data[1]) { + retval = free_dind_blocks(handle, inode, i_data[1]); if (retval) return retval; } - if (ei->i_data[EXT4_TIND_BLOCK]) { - retval = free_tind_blocks(handle, inode, - ei->i_data[EXT4_TIND_BLOCK]); + /* ei->i_data[EXT4_TIND_BLOCK] */ + if (i_data[2]) { + retval = free_tind_blocks(handle, inode, i_data[2]); if (retval) return retval; } @@ -297,15 +322,13 @@ static int free_ind_block(handle_t *handle, struct inode *inode) } static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, - struct inode *tmp_inode, int retval) + struct inode *tmp_inode) { + int retval; + __le32 i_data[3]; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); - retval = free_ind_block(handle, inode); - if (retval) - goto err_out; - /* * One credit accounted for writing the * i_data field of the original inode @@ -317,6 +340,11 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, goto err_out; } + i_data[0] = ei->i_data[EXT4_IND_BLOCK]; + i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; + i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; + + down_write(&EXT4_I(inode)->i_data_sem); /* * We have the extent map build with the tmp inode. * Now copy the i_data across @@ -336,8 +364,15 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, spin_lock(&inode->i_lock); inode->i_blocks += tmp_inode->i_blocks; spin_unlock(&inode->i_lock); + up_write(&EXT4_I(inode)->i_data_sem); + /* + * We mark the inode dirty after, because we decrement the + * i_blocks when freeing the indirect meta-data blocks + */ + retval = free_ind_block(handle, inode, i_data); ext4_mark_inode_dirty(handle, inode); + err_out: return retval; } @@ -365,6 +400,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, } } put_bh(bh); + extend_credit_for_blkdel(handle, inode); ext4_free_blocks(handle, inode, block, 1, 1); return retval; } @@ -414,7 +450,12 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) return -EINVAL; - down_write(&EXT4_I(inode)->i_data_sem); + if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) + /* + * don't migrate fast symlink + */ + return retval; + handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -448,13 +489,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, ext4_orphan_add(handle, tmp_inode); ext4_journal_stop(handle); - ei = EXT4_I(inode); - i_data = ei->i_data; - memset(&lb, 0, sizeof(lb)); - - /* 32 bit block address 4 bytes */ - max_entries = inode->i_sb->s_blocksize >> 2; - /* * start with one credit accounted for * superblock modification. @@ -463,7 +497,20 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, * trascation that created the inode. Later as and * when we add extents we extent the journal */ + /* + * inode_mutex prevent write and truncate on the file. Read still goes + * through. We take i_data_sem in ext4_ext_swap_inode_data before we + * switch the inode format to prevent read. + */ + mutex_lock(&(inode->i_mutex)); handle = ext4_journal_start(inode, 1); + + ei = EXT4_I(inode); + i_data = ei->i_data; + memset(&lb, 0, sizeof(lb)); + + /* 32 bit block address 4 bytes */ + max_entries = inode->i_sb->s_blocksize >> 2; for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { if (i_data[i]) { retval = update_extent_range(handle, tmp_inode, @@ -501,19 +548,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, */ retval = finish_range(handle, tmp_inode, &lb); err_out: - /* - * We are either freeing extent information or indirect - * blocks. During this we touch superblock, group descriptor - * and block bitmap. Later we mark the tmp_inode dirty - * via ext4_ext_tree_init. So allocate a credit of 4 - * We may update quota (user and group). - * - * FIXME!! we may be touching bitmaps in different block groups. - */ - if (ext4_journal_extend(handle, - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) - ext4_journal_restart(handle, - 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); if (retval) /* * Failure case delete the extent information with the @@ -522,7 +556,11 @@ err_out: free_ext_block(handle, tmp_inode); else retval = ext4_ext_swap_inode_data(handle, inode, - tmp_inode, retval); + tmp_inode); + + /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ + if (ext4_journal_extend(handle, 1) != 0) + ext4_journal_restart(handle, 1); /* * Mark the tmp_inode as of size zero @@ -550,8 +588,7 @@ err_out: tmp_inode->i_nlink = 0; ext4_journal_stop(handle); - - up_write(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&(inode->i_mutex)); if (tmp_inode) iput(tmp_inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 67b6d8a1ceff..a9347fb43bcc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1039,17 +1039,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str if (!ext4_valid_inum(dir->i_sb, ino)) { ext4_error(dir->i_sb, "ext4_lookup", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext4_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } @@ -1078,18 +1072,13 @@ struct dentry *ext4_get_parent(struct dentry *child) if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { ext4_error(child->d_inode->i_sb, "ext4_get_parent", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(child->d_inode->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext4_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + parent = d_alloc_anon(inode); if (!parent) { iput(inode); @@ -2234,6 +2223,7 @@ retry: inode->i_op = &ext4_fast_symlink_inode_operations; memcpy((char*)&EXT4_I(inode)->i_data,symname,l); inode->i_size = l-1; + EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; } EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_add_nondir(handle, dentry, inode); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4fbba60816f4..9477a2bd6ff2 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -779,12 +779,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = iget(sb, EXT4_RESIZE_INO); - if (!inode || is_bad_inode(inode)) { + inode = ext4_iget(sb, EXT4_RESIZE_INO); + if (IS_ERR(inode)) { ext4_warning(sb, __FUNCTION__, "Error opening resize inode"); - iput(inode); - return -ENOENT; + return PTR_ERR(inode); } } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 055a0cd0168e..13383ba18f1d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -777,11 +777,10 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext4_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -850,7 +849,6 @@ static struct quotactl_ops ext4_qctl_operations = { static const struct super_operations ext4_sops = { .alloc_inode = ext4_alloc_inode, .destroy_inode = ext4_destroy_inode, - .read_inode = ext4_read_inode, .write_inode = ext4_write_inode, .dirty_inode = ext4_dirty_inode, .delete_inode = ext4_delete_inode, @@ -1458,7 +1456,7 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, } /* Called at mount-time, super-block is locked */ -static int ext4_check_descriptors (struct super_block * sb) +static int ext4_check_descriptors(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); @@ -1466,8 +1464,6 @@ static int ext4_check_descriptors (struct super_block * sb) ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; - struct ext4_group_desc * gdp = NULL; - int desc_block = 0; int flexbg_flag = 0; ext4_group_t i; @@ -1476,17 +1472,15 @@ static int ext4_check_descriptors (struct super_block * sb) ext4_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); + if (i == sbi->s_groups_count - 1 || flexbg_flag) last_block = ext4_blocks_count(sbi->s_es) - 1; else last_block = first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext4_group_desc *) - sbi->s_group_desc[desc_block++]->b_data; block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { @@ -1524,8 +1518,6 @@ static int ext4_check_descriptors (struct super_block * sb) } if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); - gdp = (struct ext4_group_desc *) - ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); @@ -1811,6 +1803,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; + int ret = -EINVAL; int blocksize; int db_count; int i; @@ -1926,6 +1919,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) printk(KERN_WARNING "EXT4-fs warning: feature flags set on rev 0 fs, " "running e2fsck is recommended\n"); + + /* + * Since ext4 is still considered development code, we require + * that the TEST_FILESYS flag in s->flags be set. + */ + if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { + printk(KERN_WARNING "EXT4-fs: %s: not marked " + "OK to use with test code.\n", sb->s_id); + goto failed_mount; + } + /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, @@ -2243,19 +2247,24 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) * so we can safely mount the rest of the filesystem now. */ - root = iget(sb, EXT4_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { + root = ext4_iget(sb, EXT4_ROOT_INO); + if (IS_ERR(root)) { printk(KERN_ERR "EXT4-fs: get root inode failed\n"); - iput(root); + ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; + iput(root); printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); goto failed_mount4; } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); + iput(root); + ret = -ENOMEM; + goto failed_mount4; + } ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -2336,7 +2345,7 @@ out_fail: sb->s_fs_info = NULL; kfree(sbi); lock_kernel(); - return -EINVAL; + return ret; } /* @@ -2372,8 +2381,8 @@ static journal_t *ext4_get_journal(struct super_block *sb, * things happen if we iget() an unused inode, as the subsequent * iput() will try to delete it. */ - journal_inode = iget(sb, journal_inum); - if (!journal_inode) { + journal_inode = ext4_iget(sb, journal_inum); + if (IS_ERR(journal_inode)) { printk(KERN_ERR "EXT4-fs: no journal found.\n"); return NULL; } @@ -2386,7 +2395,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", journal_inode, journal_inode->i_size); - if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { + if (!S_ISREG(journal_inode->i_mode)) { printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); iput(journal_inode); return NULL; @@ -3149,16 +3158,16 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; /* Quotafile not on the same filesystem? */ - if (nd.mnt->mnt_sb != sb) { - path_release(&nd); + if (nd.path.mnt->mnt_sb != sb) { + path_put(&nd.path); return -EXDEV; } /* Quotafile not of fs root? */ - if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) printk(KERN_WARNING "EXT4-fs: Quota file not on filesystem root. " "Journalled quota will not work.\n"); - path_release(&nd); + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path); } |