diff options
author | Paul Moore <pmoore@redhat.com> | 2014-01-28 23:44:16 +0400 |
---|---|---|
committer | Paul Moore <pmoore@redhat.com> | 2014-02-05 19:39:48 +0400 |
commit | 825e587af2e90e9b953849f3347a01d8f383d577 (patch) | |
tree | e48942a05882da47544e179c6a0c920e00137a6a /fs/ext4 | |
parent | 8ed814602876bec9bad2649ca17f34b499357a1c (diff) | |
parent | d8ec26d7f8287f5788a494f56e8814210f0e64be (diff) | |
download | linux-825e587af2e90e9b953849f3347a01d8f383d577.tar.xz |
Merge tag 'v3.13' into stable-3.14
Linux 3.13
Conflicts:
security/selinux/hooks.c
Trivial merge issue in selinux_inet_conn_request() likely due to me
including patches that I sent to the stable folks in my next tree
resulting in the patch hitting twice (I think). Thankfully it was an
easy fix this time, but regardless, lesson learned, I will not do that
again.
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 13 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 29 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 9 | ||||
-rw-r--r-- | fs/ext4/extents.c | 80 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/inline.c | 3 | ||||
-rw-r--r-- | fs/ext4/inode.c | 66 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 21 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 2 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 40 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 5 | ||||
-rw-r--r-- | fs/ext4/super.c | 180 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 1 |
14 files changed, 247 insertions, 208 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index dc5d572ebd6a..6ea7b1436bbc 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -640,6 +640,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) struct ext4_group_desc *gdp; ext4_group_t i; ext4_group_t ngroups = ext4_get_groups_count(sb); + struct ext4_group_info *grp; #ifdef EXT4FS_DEBUG struct ext4_super_block *es; ext4_fsblk_t bitmap_count; @@ -655,7 +656,11 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += ext4_free_group_clusters(sb, gdp); + grp = NULL; + if (EXT4_SB(sb)->s_group_info) + grp = ext4_get_group_info(sb, i); + if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + desc_count += ext4_free_group_clusters(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_block_bitmap(sb, i); if (bitmap_bh == NULL) @@ -679,7 +684,11 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += ext4_free_group_clusters(sb, gdp); + grp = NULL; + if (EXT4_SB(sb)->s_group_info) + grp = ext4_get_group_info(sb, i); + if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + desc_count += ext4_free_group_clusters(sb, gdp); } return desc_count; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af815ea9d7cc..ece55565b9cd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -29,6 +29,7 @@ #include <linux/wait.h> #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h> +#include <linux/ratelimit.h> #include <crypto/hash.h> #ifdef __KERNEL__ #include <linux/compat.h> @@ -267,6 +268,16 @@ struct ext4_io_submit { /* Translate # of blks to # of clusters */ #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ (sbi)->s_cluster_bits) +/* Mask out the low bits to get the starting block of the cluster */ +#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ + ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ + ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) +/* Get the cluster offset */ +#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ + ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ + ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) /* * Structure of a blocks group descriptor @@ -1314,6 +1325,11 @@ struct ext4_sb_info { unsigned long s_es_last_sorted; struct percpu_counter s_extent_cache_cnt; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; + + /* Ratelimit ext4 messages. */ + struct ratelimit_state s_err_ratelimit_state; + struct ratelimit_state s_warning_ratelimit_state; + struct ratelimit_state s_msg_ratelimit_state; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1396,7 +1412,18 @@ static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ } +/* Add these declarations here only so that these functions can be + * found by name. Otherwise, they are very hard to locate. */ +static inline int ext4_test_inode_flag(struct inode *inode, int bit); +static inline void ext4_set_inode_flag(struct inode *inode, int bit); +static inline void ext4_clear_inode_flag(struct inode *inode, int bit); EXT4_INODE_BIT_FNS(flag, flags, 0) + +/* Add these declarations here only so that these functions can be + * found by name. Otherwise, they are very hard to locate. */ +static inline int ext4_test_inode_state(struct inode *inode, int bit); +static inline void ext4_set_inode_state(struct inode *inode, int bit); +static inline void ext4_clear_inode_state(struct inode *inode, int bit); #if (BITS_PER_LONG < 64) EXT4_INODE_BIT_FNS(state, state_flags, 0) @@ -2734,8 +2761,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first, struct inode *second); extern void ext4_double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode); -void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2); -void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2); extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 start_orig, __u64 start_donor, __u64 len, __u64 *moved_len); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 17ac112ab101..3fe29de832c8 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, if (WARN_ON_ONCE(err)) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); + ext4_error_inode(inode, where, line, + bh->b_blocknr, + "journal_dirty_metadata failed: " + "handle type %u started at line %u, " + "credits %u/%u, errcode %d", + handle->h_type, + handle->h_line_no, + handle->h_requested_credits, + handle->h_buffer_credits, err); } } else { if (inode) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 54d52afcdb19..3384dc4bed40 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) { ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); + ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); + ext4_lblk_t last = lblock + len - 1; - if (len == 0) + if (lblock > last) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } @@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + ext4_fsblk_t pblock = 0; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; + + /* Check for overlapping extents */ + lblock = le32_to_cpu(ext->ee_block); + len = ext4_ext_get_actual_len(ext); + if ((lblock <= prev) && prev) { + pblock = ext4_ext_pblock(ext); + es->s_last_error_block = cpu_to_le64(pblock); + return 0; + } ext++; entries--; + prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); @@ -1666,7 +1683,7 @@ int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - unsigned short ext1_ee_len, ext2_ee_len, max_len; + unsigned short ext1_ee_len, ext2_ee_len; /* * Make sure that both extents are initialized. We don't merge @@ -1677,11 +1694,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) return 0; - if (ext4_ext_is_uninitialized(ex1)) - max_len = EXT_UNINIT_MAX_LEN; - else - max_len = EXT_INIT_MAX_LEN; - ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); @@ -1694,7 +1706,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (ext1_ee_len + ext2_ee_len > max_len) + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) @@ -1720,7 +1732,6 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_extent_header *eh; unsigned int depth, len; int merge_done = 0; - int uninitialized = 0; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); @@ -1730,12 +1741,8 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) break; /* merge with next extent! */ - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(ex + 1)); - if (uninitialized) - ext4_ext_mark_uninitialized(ex); if (ex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - ex - 1) @@ -1844,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, depth = ext_depth(inode); if (!path[depth].p_ext) goto out; - b2 = le32_to_cpu(path[depth].p_ext->ee_block); - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block)); /* * get the next allocated block if the extent in the path @@ -1855,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, b2 = ext4_ext_next_allocated_block(path); if (b2 == EXT_MAX_BLOCKS) goto out; - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, b2); } /* check for wrap through zero on extent logical start block*/ @@ -1890,7 +1896,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *npath = NULL; int depth, len, err; ext4_lblk_t next; - unsigned uninitialized = 0; int mb_flags = 0; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { @@ -1942,18 +1947,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, if (err) return err; - /* - * ext4_can_extents_be_merged should have checked - * that either both extents are uninitialized, or - * both aren't. Thus we need to check only one of - * them here. - */ - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninitialized) - ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -1976,20 +1971,10 @@ prepend: if (err) return err; - /* - * ext4_can_extents_be_merged should have checked - * that either both extents are uninitialized, or - * both aren't. Thus we need to check only one of - * them here. - */ - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex->ee_block = newext->ee_block; ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninitialized) - ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -2535,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * extent, we have to mark the cluster as used (store negative * cluster number in partial_cluster). */ - unaligned = pblk & (sbi->s_cluster_ratio - 1); + unaligned = EXT4_PBLK_COFF(sbi, pblk); if (unaligned && (ee_len == num) && (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) *partial_cluster = EXT4_B2C(sbi, pblk); @@ -2629,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, * accidentally freeing it later on */ pblk = ext4_ext_pblock(ex); - if (pblk & (sbi->s_cluster_ratio - 1)) + if (EXT4_PBLK_COFF(sbi, pblk)) *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); ex--; @@ -3784,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_lblk_t lblk_start, lblk_end; - lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); + lblk_start = EXT4_LBLK_CMASK(sbi, lblk); lblk_end = lblk_start + sbi->s_cluster_ratio - 1; return ext4_find_delalloc_range(inode, lblk_start, lblk_end); @@ -3843,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); /* Check towards left side */ - c_offset = lblk_start & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start); if (c_offset) { - lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); + lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start); lblk_to = lblk_from + c_offset - 1; if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) @@ -3853,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, } /* Now check towards right. */ - c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks); if (allocated_clusters && c_offset) { lblk_from = lblk_start + num_blks; lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; @@ -4061,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, struct ext4_ext_path *path) { struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ext4_lblk_t ex_cluster_start, ex_cluster_end; ext4_lblk_t rr_cluster_start; ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); @@ -4079,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, (rr_cluster_start == ex_cluster_start)) { if (rr_cluster_start == ex_cluster_end) ee_start += ee_len - 1; - map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + - c_offset; + map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset; map->m_len = min(map->m_len, (unsigned) sbi->s_cluster_ratio - c_offset); /* @@ -4234,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; newex.ee_block = cpu_to_le32(map->m_lblk); - cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); /* * If we are doing bigalloc, check to see if the extent returned @@ -4302,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * needed so that future calls to get_implied_cluster_alloc() * work correctly. */ - offset = map->m_lblk & (sbi->s_cluster_ratio - 1); + offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ar.len = EXT4_NUM_B2C(sbi, offset+allocated); ar.goal -= offset; ar.logical -= offset; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 137193ff389b..0ee59a6644e2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -432,7 +432,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ext4fs_dirhash(qstr->name, qstr->len, &hinfo); grp = hinfo.hash; } else - get_random_bytes(&grp, sizeof(grp)); + grp = prandom_u32(); parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d9ecbf1113a7..bae987549dc3 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -994,11 +994,9 @@ static int ext4_add_dirent_to_inline(handle_t *handle, struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; - unsigned short reclen; int err; struct ext4_dir_entry_2 *de; - reclen = EXT4_DIR_REC_LEN(namelen); err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start, inline_size, name, namelen, &de); @@ -1442,6 +1440,7 @@ int ext4_read_inline_dir(struct file *file, if (ret < 0) goto out; + ret = 0; sb = inode->i_sb; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); offset = ctx->pos; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e274e9c1171f..61d49ff22c81 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file, */ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1238,10 +1236,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } return -ENOSPC; } ei->i_reserved_meta_blocks += md_needed; @@ -1255,7 +1249,6 @@ repeat: */ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1297,10 +1289,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } @@ -2178,6 +2166,9 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) * * @handle - handle for journal operations * @mpd - extent to map + * @give_up_on_write - we set this to true iff there is a fatal error and there + * is no hope of writing the data. The caller should discard + * dirty pages to avoid infinite loops. * * The function maps extent starting at mpd->lblk of length mpd->len. If it is * delayed, blocks are allocated, if it is unwritten, we may need to convert @@ -2295,6 +2286,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct address_space *mapping = mpd->inode->i_mapping; struct pagevec pvec; unsigned int nr_pages; + long left = mpd->wbc->nr_to_write; pgoff_t index = mpd->first_page; pgoff_t end = mpd->last_page; int tag; @@ -2330,6 +2322,17 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (page->index > end) goto out; + /* + * Accumulated enough dirty pages? This doesn't apply + * to WB_SYNC_ALL mode. For integrity sync we have to + * keep going because someone may be concurrently + * dirtying pages, and we might have synced a lot of + * newly appeared dirty pages, but have not synced all + * of the old dirty pages. + */ + if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) + goto out; + /* If we can't merge this page, we are done. */ if (mpd->map.m_len > 0 && mpd->next_page != page->index) goto out; @@ -2364,19 +2367,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (err <= 0) goto out; err = 0; - - /* - * Accumulated enough dirty pages? This doesn't apply - * to WB_SYNC_ALL mode. For integrity sync we have to - * keep going because someone may be concurrently - * dirtying pages, and we might have synced a lot of - * newly appeared dirty pages, but have not synced all - * of the old dirty pages. - */ - if (mpd->wbc->sync_mode == WB_SYNC_NONE && - mpd->next_page - mpd->first_page >= - mpd->wbc->nr_to_write) - goto out; + left--; } pagevec_release(&pvec); cond_resched(); @@ -2420,16 +2411,15 @@ static int ext4_writepages(struct address_space *mapping, * because that could violate lock ordering on umount */ if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - return 0; + goto out_writepages; if (ext4_should_journal_data(inode)) { struct blk_plug plug; - int ret; blk_start_plug(&plug); ret = write_cache_pages(mapping, wbc, __writepage, mapping); blk_finish_plug(&plug); - return ret; + goto out_writepages; } /* @@ -2442,8 +2432,10 @@ static int ext4_writepages(struct address_space *mapping, * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) - return -EROFS; + if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { + ret = -EROFS; + goto out_writepages; + } if (ext4_should_dioread_nolock(inode)) { /* @@ -4690,6 +4682,15 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, generic_fillattr(inode, stat); /* + * If there is inline data in the inode, the inode will normally not + * have data blocks allocated (it may have an external xattr block). + * Report at least one sector for such files, so tools like tar, rsync, + * others doen't incorrectly think the file is completely sparse. + */ + if (unlikely(ext4_has_inline_data(inode))) + stat->blocks += (stat->size + 511) >> 9; + + /* * We can't update i_blocks if the block allocation is delayed * otherwise in the case of system crash before the real block * allocation is done, we will have i_blocks inconsistent with @@ -4700,9 +4701,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * blocks for this file. */ delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), - EXT4_I(inode)->i_reserved_data_blocks); - - stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); + EXT4_I(inode)->i_reserved_data_blocks); + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); return 0; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a569d335f804..60589b60e9b0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb, /* Protect orig inodes against a truncate and make sure, * that only 1 swap_inode_boot_loader is running. */ - ext4_inode_double_lock(inode, inode_bl); + lock_two_nondirectories(inode, inode_bl); truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); @@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode_bl); - ext4_inode_double_unlock(inode, inode_bl); + unlock_two_nondirectories(inode, inode_bl); iput(inode_bl); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a41e3ba8cfaa..04a5c7504be9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); + + BUG_ON(atomic_read(&pa->pa_count)); + BUG_ON(pa->pa_deleted == 0); kmem_cache_free(ext4_pspace_cachep, pa); } @@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, ext4_group_t grp; ext4_fsblk_t grp_blk; - if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) - return; - /* in this short window concurrent discard can set pa_deleted */ spin_lock(&pa->pa_lock); + if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { + spin_unlock(&pa->pa_lock); + return; + } + if (pa->pa_deleted == 1) { spin_unlock(&pa->pa_lock); return; @@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ - ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); + ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; ac->ac_inode = ar->inode; @@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * blocks at the beginning or the end unless we are explicitly * requested to avoid doing so. */ - overflow = block & (sbi->s_cluster_ratio - 1); + overflow = EXT4_PBLK_COFF(sbi, block); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { overflow = sbi->s_cluster_ratio - overflow; @@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += overflow; } } - overflow = count & (sbi->s_cluster_ratio - 1); + overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { if (count > overflow) @@ -4794,8 +4799,8 @@ do_more: " group:%d block:%d count:%lu failed" " with %d", block_group, bit, count, err); - } - + } else + EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 214461e42a05..04434ad3e8e0 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -259,7 +259,7 @@ static unsigned int mmp_new_seq(void) u32 new_seq; do { - get_random_bytes(&new_seq, sizeof(u32)); + new_seq = prandom_u32(); } while (new_seq > EXT4_MMP_SEQ_MAX); return new_seq; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 7fa4d855dbd5..773b503bd18c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1203,42 +1203,6 @@ mext_check_arguments(struct inode *orig_inode, } /** - * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2 - * - * @inode1: the inode structure - * @inode2: the inode structure - * - * Lock two inodes' i_mutex - */ -void -ext4_inode_double_lock(struct inode *inode1, struct inode *inode2) -{ - BUG_ON(inode1 == inode2); - if (inode1 < inode2) { - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); - } -} - -/** - * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2 - * - * @inode1: the inode that is released first - * @inode2: the inode that is released second - * - */ - -void -ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2) -{ - mutex_unlock(&inode1->i_mutex); - mutex_unlock(&inode2->i_mutex); -} - -/** * ext4_move_extents - Exchange the specified range of a file * * @o_filp: file structure of the original file @@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } /* Protect orig and donor inodes against a truncate */ - ext4_inode_double_lock(orig_inode, donor_inode); + lock_two_nondirectories(orig_inode, donor_inode); /* Wait for all existing dio workers */ ext4_inode_block_unlocked_dio(orig_inode); @@ -1535,7 +1499,7 @@ out: ext4_double_up_write_data_sem(orig_inode, donor_inode); ext4_inode_resume_unlocked_dio(orig_inode); ext4_inode_resume_unlocked_dio(donor_inode); - ext4_inode_double_unlock(orig_inode, donor_inode); + unlock_two_nondirectories(orig_inode, donor_inode); return ret; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index d7d0c7b46ed4..d488f80ee32d 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -197,14 +197,15 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head) static void ext4_add_complete_io(ext4_io_end_t *io_end) { struct ext4_inode_info *ei = EXT4_I(io_end->inode); + struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb); struct workqueue_struct *wq; unsigned long flags; /* Only reserved conversions from writeback should enter here */ WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); - WARN_ON(!io_end->handle); + WARN_ON(!io_end->handle && sbi->s_journal); spin_lock_irqsave(&ei->i_completed_io_lock, flags); - wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; + wq = sbi->rsv_conversion_wq; if (list_empty(&ei->i_rsv_conversion_list)) queue_work(wq, &ei->i_rsv_conversion_work); list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2c2e6cbc6bed..1f7784de05b6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -411,20 +411,26 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } +#define ext4_error_ratelimit(sb) \ + ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ + "EXT4-fs error") + void __ext4_error(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) { struct va_format vaf; va_list args; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", - sb->s_id, function, line, current->comm, &vaf); - va_end(args); + if (ext4_error_ratelimit(sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", + sb->s_id, function, line, current->comm, &vaf); + va_end(args); + } save_error_info(sb, function, line); - ext4_handle_error(sb); } @@ -438,22 +444,23 @@ void __ext4_error_inode(struct inode *inode, const char *function, es->s_last_error_ino = cpu_to_le32(inode->i_ino); es->s_last_error_block = cpu_to_le64(block); + if (ext4_error_ratelimit(inode->i_sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: block %llu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, &vaf); + else + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, &vaf); + va_end(args); + } save_error_info(inode->i_sb, function, line); - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: block %llu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, &vaf); - else - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, &vaf); - va_end(args); - ext4_handle_error(inode->i_sb); } @@ -469,27 +476,28 @@ void __ext4_error_file(struct file *file, const char *function, es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); + if (ext4_error_ratelimit(inode->i_sb)) { + path = d_path(&(file->f_path), pathname, sizeof(pathname)); + if (IS_ERR(path)) + path = "(unknown)"; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "block %llu: comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, path, &vaf); + else + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, path, &vaf); + va_end(args); + } save_error_info(inode->i_sb, function, line); - path = d_path(&(file->f_path), pathname, sizeof(pathname)); - if (IS_ERR(path)) - path = "(unknown)"; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "block %llu: comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, path, &vaf); - else - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, path, &vaf); - va_end(args); - ext4_handle_error(inode->i_sb); } @@ -543,11 +551,13 @@ void __ext4_std_error(struct super_block *sb, const char *function, (sb->s_flags & MS_RDONLY)) return; - errstr = ext4_decode_error(sb, errno, nbuf); - printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", - sb->s_id, function, line, errstr); - save_error_info(sb, function, line); + if (ext4_error_ratelimit(sb)) { + errstr = ext4_decode_error(sb, errno, nbuf); + printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", + sb->s_id, function, line, errstr); + } + save_error_info(sb, function, line); ext4_handle_error(sb); } @@ -597,6 +607,9 @@ void __ext4_msg(struct super_block *sb, struct va_format vaf; va_list args; + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) + return; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -610,6 +623,10 @@ void __ext4_warning(struct super_block *sb, const char *function, struct va_format vaf; va_list args; + if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), + "EXT4-fs warning")) + return; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -633,18 +650,20 @@ __acquires(bitlock) es->s_last_error_block = cpu_to_le64(block); __save_error_info(sb, function, line); - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", - sb->s_id, function, line, grp); - if (ino) - printk(KERN_CONT "inode %lu: ", ino); - if (block) - printk(KERN_CONT "block %llu:", (unsigned long long) block); - printk(KERN_CONT "%pV\n", &vaf); - va_end(args); + if (ext4_error_ratelimit(sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", + sb->s_id, function, line, grp); + if (ino) + printk(KERN_CONT "inode %lu: ", ino); + if (block) + printk(KERN_CONT "block %llu:", + (unsigned long long) block); + printk(KERN_CONT "%pV\n", &vaf); + va_end(args); + } if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); @@ -773,7 +792,7 @@ static void ext4_put_super(struct super_block *sb) } ext4_es_unregister_shrinker(sbi); - del_timer(&sbi->s_err_report); + del_timer_sync(&sbi->s_err_report); ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); @@ -2606,6 +2625,12 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), @@ -2623,6 +2648,12 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(trigger_fs_error), + ATTR_LIST(err_ratelimit_interval_ms), + ATTR_LIST(err_ratelimit_burst), + ATTR_LIST(warning_ratelimit_interval_ms), + ATTR_LIST(warning_ratelimit_burst), + ATTR_LIST(msg_ratelimit_interval_ms), + ATTR_LIST(msg_ratelimit_burst), NULL, }; @@ -3037,7 +3068,6 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr; - unsigned long rnd; elr = kzalloc(sizeof(*elr), GFP_KERNEL); if (!elr) @@ -3052,10 +3082,8 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, * spread the inode table initialization requests * better. */ - get_random_bytes(&rnd, sizeof(rnd)); - elr->lr_next_sched = jiffies + (unsigned long)rnd % - (EXT4_DEF_LI_MAX_START_DELAY * HZ); - + elr->lr_next_sched = jiffies + (prandom_u32() % + (EXT4_DEF_LI_MAX_START_DELAY * HZ)); return elr; } @@ -3288,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb) } -static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) +static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; /* + * There's no need to reserve anything when we aren't using extents. + * The space estimates are exact, there are no unwritten extents, + * hole punching doesn't need new metadata... This is needed especially + * to keep ext2/3 backward compatibility. + */ + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) + return 0; + /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run * out of space like for example punch hole, or converting @@ -3300,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ - resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; + resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> + EXT4_SB(sb)->s_cluster_bits; do_div(resv_clusters, 50); resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); @@ -4043,10 +4080,10 @@ no_journal: "available"); } - err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); + err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); if (err) { ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " - "reserved pool", ext4_calculate_resv_clusters(sbi)); + "reserved pool", ext4_calculate_resv_clusters(sb)); goto failed_mount4a; } @@ -4118,6 +4155,11 @@ no_journal: if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ + /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ + ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); + ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); + ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); + kfree(orig_data); return 0; @@ -4151,7 +4193,7 @@ failed_mount_wq: } failed_mount3: ext4_es_unregister_shrinker(sbi); - del_timer(&sbi->s_err_report); + del_timer_sync(&sbi->s_err_report); if (sbi->s_flex_groups) ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 03e9bebba198..1423c4816a47 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1352,6 +1352,7 @@ retry: new_extra_isize = s_min_extra_isize; kfree(is); is = NULL; kfree(bs); bs = NULL; + brelse(bh); goto retry; } error = -1; |