diff options
Diffstat (limited to 'fs')
58 files changed, 635 insertions, 360 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 69ec23daa25e..a1e6860b6f46 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -574,7 +574,7 @@ static int load_flat_file(struct linux_binprm *bprm, MAX_SHARED_LIBS * sizeof(unsigned long), FLAT_DATA_ALIGN); - pr_debug("Allocated data+bss+stack (%ld bytes): %lx\n", + pr_debug("Allocated data+bss+stack (%u bytes): %lx\n", data_len + bss_len + stack_len, datapos); fpos = ntohl(hdr->data_start); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 375f8c728d91..e3b0b4196d3d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4825,10 +4825,6 @@ skip_async: else flush = BTRFS_RESERVE_NO_FLUSH; spin_lock(&space_info->lock); - if (can_overcommit(fs_info, space_info, orig, flush, false)) { - spin_unlock(&space_info->lock); - break; - } if (list_empty(&space_info->tickets) && list_empty(&space_info->priority_tickets)) { spin_unlock(&space_info->lock); @@ -7589,6 +7585,10 @@ search: u64 offset; int cached; + /* If the block group is read-only, we can skip it entirely. */ + if (unlikely(block_group->ro)) + continue; + btrfs_grab_block_group(block_group, delalloc); search_start = block_group->key.objectid; @@ -7624,8 +7624,6 @@ have_block_group: if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) goto loop; - if (unlikely(block_group->ro)) - goto loop; /* * Ok we want to try and use the cluster allocator, so @@ -7839,6 +7837,7 @@ loop: failed_alloc = false; BUG_ON(index != get_block_group_index(block_group)); btrfs_release_block_group(block_group, delalloc); + cond_resched(); } up_read(&space_info->groups_sem); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f20ef211a73d..3a11ae63676e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2153,8 +2153,7 @@ process_leaf: u32 this_len = sizeof(*di) + name_len + data_len; char *name; - ret = verify_dir_item(fs_info, path->nodes[0], - path->slots[0], di); + ret = verify_dir_item(fs_info, path->nodes[0], i, di); if (ret) { ret = -EIO; goto out; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5eb7217738ed..e8b9a269fdde 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2702,7 +2702,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->chunk_mutex); old_total = btrfs_super_total_bytes(super_copy); - diff = new_size - device->total_bytes; + diff = round_down(new_size - device->total_bytes, fs_info->sectorsize); if (new_size <= device->total_bytes || device->is_tgtdev_for_dev_replace) { @@ -4406,7 +4406,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 diff; new_size = round_down(new_size, fs_info->sectorsize); - diff = old_size - new_size; + diff = round_down(old_size - new_size, fs_info->sectorsize); if (device->is_tgtdev_for_dev_replace) return -EINVAL; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e071d23f6148..ef7240ace576 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -271,6 +271,11 @@ out: if (ret < 0) err = ret; dput(last); + /* last_name no longer match cache index */ + if (fi->readdir_cache_idx >= 0) { + fi->readdir_cache_idx = -1; + fi->dir_release_count = 0; + } } return err; } diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 79dafa71effd..51f0aea70cb4 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -175,11 +175,8 @@ ext2_get_acl(struct inode *inode, int type) return acl; } -/* - * inode->i_mutex: down - */ -int -ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int +__ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int name_index; void *value = NULL; @@ -189,13 +186,6 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch(type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: @@ -222,6 +212,31 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) } /* + * inode->i_mutex: down + */ +int +ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int error; + int update_mode = 0; + umode_t mode = inode->i_mode; + + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + return error; + update_mode = 1; + } + error = __ext2_set_acl(inode, acl, type); + if (!error && update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } + return error; +} + +/* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * * dir->i_mutex: down @@ -238,12 +253,12 @@ ext2_init_acl(struct inode *inode, struct inode *dir) return error; if (default_acl) { - error = ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) - error = ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } return error; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 09441ae07a5b..46ff2229ff5e 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -193,13 +193,6 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); - } break; case ACL_TYPE_DEFAULT: @@ -221,8 +214,9 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, value, size, xattr_flags); kfree(value); - if (!error) + if (!error) { set_cached_acl(inode, type, acl); + } return error; } @@ -233,6 +227,8 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) handle_t *handle; int error, credits, retries = 0; size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0; + umode_t mode = inode->i_mode; + int update_mode = 0; error = dquot_initialize(inode); if (error) @@ -247,7 +243,20 @@ retry: if (IS_ERR(handle)) return PTR_ERR(handle); + if ((type == ACL_TYPE_ACCESS) && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + goto out_stop; + update_mode = 1; + } + error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */); + if (!error && update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + ext4_mark_inode_dirty(handle, inode); + } +out_stop: ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9ebde0cd632e..a2bb7d2870e4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -961,7 +961,7 @@ struct ext4_inode_info { /* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to + * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ @@ -1049,10 +1049,8 @@ struct ext4_inode_info { ext4_group_t i_last_alloc_group; /* allocation reservation info for delalloc */ - /* In case of bigalloc, these refer to clusters rather than blocks */ + /* In case of bigalloc, this refer to clusters rather than blocks */ unsigned int i_reserved_data_blocks; - unsigned int i_reserved_meta_blocks; - unsigned int i_allocated_meta_blocks; ext4_lblk_t i_da_metadata_calc_last_lblock; int i_da_metadata_calc_len; @@ -2022,7 +2020,8 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) #define is_dx(dir) (ext4_has_feature_dir_index((dir)->i_sb) && \ ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) -#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_MAX(dir) unlikely((dir)->i_nlink >= EXT4_LINK_MAX && \ + !(ext4_has_feature_dir_nlink((dir)->i_sb) && is_dx(dir))) #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) /* Legal values for the dx_root hash_version field: */ @@ -2462,6 +2461,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid); int ext4_inode_is_fast_symlink(struct inode *inode); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, + bool wait, struct buffer_head **bhs); int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_get_block(struct inode *inode, sector_t iblock, @@ -3074,7 +3075,7 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle, struct inode *inode, struct buffer_head *bh); #define S_SHIFT 12 -static const unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { +static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index dabad1bc8617..48143e32411c 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -227,6 +227,9 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); +int ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc); /* * Wrapper functions with which ext4 calls into JBD. */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e0a8425ff74d..97f0fd06728d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4652,7 +4652,7 @@ retry: static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ext4_lblk_t len, loff_t new_size, - int flags, int mode) + int flags) { struct inode *inode = file_inode(file); handle_t *handle; @@ -4815,7 +4815,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, round_down(offset, 1 << blkbits) >> blkbits, (round_up((offset + len), 1 << blkbits) - round_down(offset, 1 << blkbits)) >> blkbits, - new_size, flags, mode); + new_size, flags); if (ret) goto out_dio; @@ -4841,7 +4841,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, inode->i_mtime = inode->i_ctime = current_time(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags, mode); + flags); up_write(&EXT4_I(inode)->i_mmap_sem); if (ret) goto out_dio; @@ -4976,8 +4976,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags, mode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); ext4_inode_resume_unlocked_dio(inode); if (ret) goto out; @@ -5837,7 +5836,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, if (e1_blk > lblk1) next1 = e1_blk; if (e2_blk > lblk2) - next2 = e1_blk; + next2 = e2_blk; /* Do we have something to swap */ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) goto finish; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 58294c9a7e1d..0d7cf0cc9b87 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -537,6 +537,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, lastoff = page_offset(page); bh = head = page_buffers(page); do { + if (lastoff + bh->b_size <= startoff) + goto next; if (buffer_uptodate(bh) || buffer_unwritten(bh)) { if (whence == SEEK_DATA) @@ -551,6 +553,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); goto out; } +next: lastoff += bh->b_size; bh = bh->b_this_page; } while (bh != head); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3c600f02673f..c774bdc22759 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -892,7 +892,7 @@ static int ext4_dio_get_block_unwritten_async(struct inode *inode, /* * Get block function for non-AIO DIO writes when we create unwritten extent if * blocks are not allocated yet. The extent will be converted to written - * after IO is complete from ext4_ext_direct_IO() function. + * after IO is complete by ext4_direct_IO_write(). */ static int ext4_dio_get_block_unwritten_sync(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -907,7 +907,7 @@ static int ext4_dio_get_block_unwritten_sync(struct inode *inode, /* * Mark inode as having pending DIO writes to unwritten extents. - * ext4_ext_direct_IO() checks this flag and converts extents to + * ext4_direct_IO_write() checks this flag and converts extents to * written. */ if (!ret && buffer_unwritten(bh_result)) @@ -1015,6 +1015,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return ERR_PTR(-EIO); } +/* Read a contiguous batch of blocks. */ +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, + bool wait, struct buffer_head **bhs) +{ + int i, err; + + for (i = 0; i < bh_count; i++) { + bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */); + if (IS_ERR(bhs[i])) { + err = PTR_ERR(bhs[i]); + bh_count = i; + goto out_brelse; + } + } + + for (i = 0; i < bh_count; i++) + /* Note that NULL bhs[i] is valid because of holes. */ + if (bhs[i] && !buffer_uptodate(bhs[i])) + ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, + &bhs[i]); + + if (!wait) + return 0; + + for (i = 0; i < bh_count; i++) + if (bhs[i]) + wait_on_buffer(bhs[i]); + + for (i = 0; i < bh_count; i++) { + if (bhs[i] && !buffer_uptodate(bhs[i])) { + err = -EIO; + goto out_brelse; + } + } + return 0; + +out_brelse: + for (i = 0; i < bh_count; i++) { + brelse(bhs[i]); + bhs[i] = NULL; + } + return err; +} + int ext4_walk_page_buffers(handle_t *handle, struct buffer_head *head, unsigned from, @@ -5658,22 +5702,16 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, return err; } -/* - * Expand an inode by new_extra_isize bytes. - * Returns 0 on success or negative error number on failure. - */ -static int ext4_expand_extra_isize(struct inode *inode, - unsigned int new_extra_isize, - struct ext4_iloc iloc, - handle_t *handle) +static int __ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc, + handle_t *handle, int *no_expand) { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; + int error; - if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) - return 0; - - raw_inode = ext4_raw_inode(&iloc); + raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); @@ -5688,8 +5726,98 @@ static int ext4_expand_extra_isize(struct inode *inode, } /* try to expand with EAs present */ - return ext4_expand_extra_isize_ea(inode, new_extra_isize, - raw_inode, handle); + error = ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); + if (error) { + /* + * Inode size expansion failed; don't try again + */ + *no_expand = 1; + } + + return error; +} + +/* + * Expand an inode by new_extra_isize bytes. + * Returns 0 on success or negative error number on failure. + */ +static int ext4_try_to_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc iloc, + handle_t *handle) +{ + int no_expand; + int error; + + if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) + return -EOVERFLOW; + + /* + * In nojournal mode, we can immediately attempt to expand + * the inode. When journaled, we first need to obtain extra + * buffer credits since we may write into the EA block + * with this same handle. If journal_extend fails, then it will + * only result in a minor loss of functionality for that inode. + * If this is felt to be critical, then e2fsck should be run to + * force a large enough s_min_extra_isize. + */ + if (ext4_handle_valid(handle) && + jbd2_journal_extend(handle, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0) + return -ENOSPC; + + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return -EBUSY; + + error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc, + handle, &no_expand); + ext4_write_unlock_xattr(inode, &no_expand); + + return error; +} + +int ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc) +{ + handle_t *handle; + int no_expand; + int error, rc; + + if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { + brelse(iloc->bh); + return -EOVERFLOW; + } + + handle = ext4_journal_start(inode, EXT4_HT_INODE, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + brelse(iloc->bh); + return error; + } + + ext4_write_lock_xattr(inode, &no_expand); + + BUFFER_TRACE(iloc.bh, "get_write_access"); + error = ext4_journal_get_write_access(handle, iloc->bh); + if (error) { + brelse(iloc->bh); + goto out_stop; + } + + error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc, + handle, &no_expand); + + rc = ext4_mark_iloc_dirty(handle, inode, iloc); + if (!error) + error = rc; + + ext4_write_unlock_xattr(inode, &no_expand); +out_stop: + ext4_journal_stop(handle); + return error; } /* @@ -5709,44 +5837,18 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) { struct ext4_iloc iloc; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - static unsigned int mnt_count; - int err, ret; + int err; might_sleep(); trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) return err; - if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && - !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { - /* - * In nojournal mode, we can immediately attempt to expand - * the inode. When journaled, we first need to obtain extra - * buffer credits since we may write into the EA block - * with this same handle. If journal_extend fails, then it will - * only result in a minor loss of functionality for that inode. - * If this is felt to be critical, then e2fsck should be run to - * force a large enough s_min_extra_isize. - */ - if (!ext4_handle_valid(handle) || - jbd2_journal_extend(handle, - EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) { - ret = ext4_expand_extra_isize(inode, - sbi->s_want_extra_isize, - iloc, handle); - if (ret) { - if (mnt_count != - le16_to_cpu(sbi->s_es->s_mnt_count)) { - ext4_warning(inode->i_sb, - "Unable to expand inode %lu. Delete" - " some EAs or run e2fsck.", - inode->i_ino); - mnt_count = - le16_to_cpu(sbi->s_es->s_mnt_count); - } - } - } - } + + if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize) + ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize, + iloc, handle); + return ext4_mark_iloc_dirty(handle, inode, &iloc); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 42b3a73143cf..afb66d4ab5cf 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -64,18 +64,16 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) ei1 = EXT4_I(inode1); ei2 = EXT4_I(inode2); - memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags)); - memswap(&inode1->i_version, &inode2->i_version, - sizeof(inode1->i_version)); - memswap(&inode1->i_blocks, &inode2->i_blocks, - sizeof(inode1->i_blocks)); - memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes)); - memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime)); - memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime)); + swap(inode1->i_flags, inode2->i_flags); + swap(inode1->i_version, inode2->i_version); + swap(inode1->i_blocks, inode2->i_blocks); + swap(inode1->i_bytes, inode2->i_bytes); + swap(inode1->i_atime, inode2->i_atime); + swap(inode1->i_mtime, inode2->i_mtime); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); - memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); - memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); + swap(ei1->i_flags, ei2->i_flags); + swap(ei1->i_disksize, ei2->i_disksize); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); @@ -351,11 +349,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) raw_inode = ext4_raw_inode(&iloc); if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { - err = -EOVERFLOW; + err = ext4_expand_extra_isize(inode, + EXT4_SB(sb)->s_want_extra_isize, + &iloc); + if (err) + goto out_unlock; + } else { brelse(iloc.bh); - goto out_unlock; } - brelse(iloc.bh); dquot_initialize(inode); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 581e357e8406..5a1052627a81 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2295,9 +2295,12 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) int err, buddy_loaded = 0; struct ext4_buddy e4b; struct ext4_group_info *grinfo; + unsigned char blocksize_bits = min_t(unsigned char, + sb->s_blocksize_bits, + EXT4_MAX_BLOCK_LOG_SIZE); struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; + ext4_grpblk_t counters[blocksize_bits + 2]; } sg; group--; @@ -2306,8 +2309,6 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); - i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + - sizeof(struct ext4_group_info); grinfo = ext4_get_group_info(sb, group); /* Load the group info in memory only if not already loaded. */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { @@ -2319,7 +2320,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) buddy_loaded = 1; } - memcpy(&sg, ext4_get_group_info(sb, group), i); + memcpy(&sg, ext4_get_group_info(sb, group), sizeof(sg)); if (buddy_loaded) ext4_mb_unload_buddy(&e4b); @@ -2327,7 +2328,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, sg.info.bb_fragments, sg.info.bb_first_free); for (i = 0; i <= 13; i++) - seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? + seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); seq_printf(seq, " ]\n"); @@ -2892,8 +2893,10 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) break; } - if (discard_bio) + if (discard_bio) { submit_bio_wait(discard_bio); + bio_put(discard_bio); + } } list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 13f0cadb1238..c1cf020d1889 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1342,13 +1342,12 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; struct buffer_head *bh, *ret = NULL; - ext4_lblk_t start, block, b; + ext4_lblk_t start, block; const u8 *name = d_name->name; - int ra_max = 0; /* Number of bh's in the readahead + size_t ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ - int ra_ptr = 0; /* Current index into readahead + size_t ra_ptr = 0; /* Current index into readahead buffer */ - int num = 0; ext4_lblk_t nblocks; int i, namelen, retval; struct ext4_filename fname; @@ -1411,31 +1410,17 @@ restart: if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; - b = block; - for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { - /* - * Terminate if we reach the end of the - * directory and must wrap, or if our - * search has finished at this block. - */ - if (b >= nblocks || (num && block == start)) { - bh_use[ra_max] = NULL; - break; - } - num++; - bh = ext4_getblk(NULL, dir, b++, 0); - if (IS_ERR(bh)) { - if (ra_max == 0) { - ret = bh; - goto cleanup_and_exit; - } - break; - } - bh_use[ra_max] = bh; - if (bh) - ll_rw_block(REQ_OP_READ, - REQ_META | REQ_PRIO, - 1, &bh); + if (block < start) + ra_max = start - block; + else + ra_max = nblocks - block; + ra_max = min(ra_max, ARRAY_SIZE(bh_use)); + retval = ext4_bread_batch(dir, block, ra_max, + false /* wait */, bh_use); + if (retval) { + ret = ERR_PTR(retval); + ra_max = 0; + goto cleanup_and_exit; } } if ((bh = bh_use[ra_ptr++]) == NULL) @@ -2395,19 +2380,22 @@ out: } /* - * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2, - * since this indicates that nlinks count was previously 1. + * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2 + * since this indicates that nlinks count was previously 1 to avoid overflowing + * the 16-bit i_links_count field on disk. Directories with i_nlink == 1 mean + * that subdirectory link counts are not being maintained accurately. + * + * The caller has already checked for i_nlink overflow in case the DIR_LINK + * feature is not enabled and returned -EMLINK. The is_dx() check is a proxy + * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set + * on regular files) and to avoid creating huge/slow non-HTREE directories. */ static void ext4_inc_count(handle_t *handle, struct inode *inode) { inc_nlink(inode); - if (is_dx(inode) && inode->i_nlink > 1) { - /* limit is 16-bit i_links_count */ - if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { - set_nlink(inode, 1); - ext4_set_feature_dir_nlink(inode->i_sb); - } - } + if (is_dx(inode) && + (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2)) + set_nlink(inode, 1); } /* diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c3ed9021b781..035cd3f4785e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1927,7 +1927,8 @@ retry: n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); - n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_blocks_count = (ext4_fsblk_t)n_group * + EXT4_BLOCKS_PER_GROUP(sb); n_group--; /* set to last group number */ } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0886fe82e9c4..d61a70e2193a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -978,8 +978,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_es_shk_nr = 0; ei->i_es_shrink_lblk = 0; ei->i_reserved_data_blocks = 0; - ei->i_reserved_meta_blocks = 0; - ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index cff4f41ced61..82a5af9f6668 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -317,28 +317,41 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash) */ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) { - unsigned long block = 0; - struct buffer_head *bh; - int blocksize = ea_inode->i_sb->s_blocksize; - size_t csize, copied = 0; - void *copy_pos = buf; - - while (copied < size) { - csize = (size - copied) > blocksize ? blocksize : size - copied; - bh = ext4_bread(NULL, ea_inode, block, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); - if (!bh) - return -EFSCORRUPTED; + int blocksize = 1 << ea_inode->i_blkbits; + int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits; + int tail_size = (size % blocksize) ?: blocksize; + struct buffer_head *bhs_inline[8]; + struct buffer_head **bhs = bhs_inline; + int i, ret; + + if (bh_count > ARRAY_SIZE(bhs_inline)) { + bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS); + if (!bhs) + return -ENOMEM; + } - memcpy(copy_pos, bh->b_data, csize); - brelse(bh); + ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, + true /* wait */, bhs); + if (ret) + goto free_bhs; - copy_pos += csize; - block += 1; - copied += csize; + for (i = 0; i < bh_count; i++) { + /* There shouldn't be any holes in ea_inode. */ + if (!bhs[i]) { + ret = -EFSCORRUPTED; + goto put_bhs; + } + memcpy((char *)buf + blocksize * i, bhs[i]->b_data, + i < bh_count - 1 ? blocksize : tail_size); } - return 0; + ret = 0; +put_bhs: + for (i = 0; i < bh_count; i++) + brelse(bhs[i]); +free_bhs: + if (bhs != bhs_inline) + kfree(bhs); + return ret; } static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, @@ -451,6 +464,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, } /* Do not add ea_inode to the cache. */ ea_inode_cache = NULL; + err = 0; } else if (err) goto out; @@ -1815,9 +1829,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); - if (!error) - ext4_xattr_block_cache_insert(ea_block_cache, - bs->bh); ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); if (error == -EFSCORRUPTED) @@ -1973,6 +1984,7 @@ inserted: } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ ea_bdebug(bs->bh, "keeping this block"); + ext4_xattr_block_cache_insert(ea_block_cache, bs->bh); new_bh = bs->bh; get_bh(new_bh); } else { @@ -2625,23 +2637,21 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct buffer_head *bh = NULL; + struct buffer_head *bh; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + static unsigned int mnt_count; size_t min_offs; size_t ifree, bfree; int total_ino; void *base, *end; int error = 0, tried_min_extra_isize = 0; - int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); + int s_min_extra_isize = le16_to_cpu(sbi->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ - int no_expand; - - if (ext4_write_trylock_xattr(inode, &no_expand) == 0) - return 0; retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) - goto out; + return 0; header = IHDR(inode, raw_inode); @@ -2676,6 +2686,7 @@ retry: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EFSCORRUPTED; + brelse(bh); goto cleanup; } base = BHDR(bh); @@ -2683,11 +2694,11 @@ retry: min_offs = end - base; bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base, NULL); + brelse(bh); if (bfree + ifree < isize_diff) { if (!tried_min_extra_isize && s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } error = -ENOSPC; @@ -2705,7 +2716,6 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } goto cleanup; @@ -2717,18 +2727,13 @@ shift: EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; - brelse(bh); -out: - ext4_write_unlock_xattr(inode, &no_expand); - return 0; cleanup: - brelse(bh); - /* - * Inode size expansion failed; don't try again - */ - no_expand = 1; - ext4_write_unlock_xattr(inode, &no_expand); + if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { + ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", + inode->i_ino); + mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count); + } return error; } diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index a140c5e3dc54..b4b8438c42ef 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -211,7 +211,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { + if (acl && !ipage) { error = posix_acl_update_mode(inode, &inode->i_mode, &acl); if (error) return error; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 56bbf592e487..5b876f6d3f6b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -879,6 +879,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) struct inode *inode; struct f2fs_inode_info *fi; bool is_dir = (type == DIR_INODE); + unsigned long ino = 0; trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, get_pages(sbi, is_dir ? @@ -901,8 +902,17 @@ retry: inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->inode_lock[type]); if (inode) { + unsigned long cur_ino = inode->i_ino; + filemap_fdatawrite(inode->i_mapping); iput(inode); + /* We need to give cpu to another writers. */ + if (ino == cur_ino) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + } else { + ino = cur_ino; + } } else { /* * We should submit bio, since it exists several diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a0e6d2c65a9e..2706130c261b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1538,7 +1538,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) { - inode_unlock(inode); ret = -EPERM; goto unlock_out; } @@ -1549,9 +1548,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); ret = -EPERM; - goto out; + goto unlock_out; } } @@ -1564,7 +1562,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) f2fs_mark_inode_dirty_sync(inode, false); unlock_out: inode_unlock(inode); -out: mnt_drop_write_file(filp); return ret; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 9adc202fcd6f..71191d89917d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -11,6 +11,7 @@ */ #include <linux/proc_fs.h> #include <linux/f2fs_fs.h> +#include <linux/seq_file.h> #include "f2fs.h" #include "segment.h" diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c index 9b92058a1240..6bb5d7c42888 100644 --- a/fs/hfsplus/posix_acl.c +++ b/fs/hfsplus/posix_acl.c @@ -51,8 +51,8 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) return acl; } -int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, - int type) +static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, + int type) { int err; char *xattr_name; @@ -64,12 +64,6 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, switch (type) { case ACL_TYPE_ACCESS: xattr_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - err = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (err) - return err; - } - err = 0; break; case ACL_TYPE_DEFAULT: @@ -105,6 +99,18 @@ end_set_acl: return err; } +int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int err; + + if (type == ACL_TYPE_ACCESS && acl) { + err = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (err) + return err; + } + return __hfsplus_set_posix_acl(inode, acl, type); +} + int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) { int err = 0; @@ -122,15 +128,15 @@ int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) return err; if (default_acl) { - err = hfsplus_set_posix_acl(inode, default_acl, - ACL_TYPE_DEFAULT); + err = __hfsplus_set_posix_acl(inode, default_acl, + ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!err) - err = hfsplus_set_posix_acl(inode, acl, - ACL_TYPE_ACCESS); + err = __hfsplus_set_posix_acl(inode, acl, + ACL_TYPE_ACCESS); posix_acl_release(acl); } return err; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 8cf898a59730..217a5e7815da 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -410,7 +410,11 @@ static int parse_options(char *options, struct iso9660_options *popt) if (match_int(&args[0], &option)) return 0; n = option; - if (n > 99) + /* + * Track numbers are supposed to be in range 1-99, the + * mount option starts indexing at 0. + */ + if (n >= 99) return 0; popt->session = n + 1; break; @@ -543,7 +547,7 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) vol_desc_start=0; ms_info.addr_format=CDROM_LBA; - if(session >= 0 && session <= 99) { + if (session > 0) { struct cdrom_tocentry Te; Te.cdte_track=session; Te.cdte_format=CDROM_LBA; diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 7bc186f4ed4d..2e71b6e7e646 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -77,13 +77,6 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: ea_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (rc) - return rc; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: ea_name = XATTR_NAME_POSIX_ACL_DEFAULT; @@ -115,12 +108,27 @@ int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int rc; tid_t tid; + int update_mode = 0; + umode_t mode = inode->i_mode; tid = txBegin(inode->i_sb, 0); mutex_lock(&JFS_IP(inode)->commit_mutex); + if (type == ACL_TYPE_ACCESS && acl) { + rc = posix_acl_update_mode(inode, &mode, &acl); + if (rc) + goto end_tx; + update_mode = 1; + } rc = __jfs_set_acl(tid, inode, type, acl); - if (!rc) + if (!rc) { + if (update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } rc = txCommit(tid, 1, &inode, 0); + } +end_tx: txEnd(tid); mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index bd9b641ada2c..7ddcb445a3d9 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -98,7 +98,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) goto out; } - VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; + VolumeSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits; if (VolumeSize) { if (newLVSize > VolumeSize) { @@ -211,7 +211,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) txQuiesce(sb); /* Reset size of direct inode */ - sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size; + sbi->direct_inode->i_size = i_size_read(sb->s_bdev->bd_inode); if (sbi->mntflag & JFS_INLINELOG) { /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index e8aad7d87b8c..78b41e1d5c67 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -313,7 +313,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, } case Opt_resize_nosize: { - *newLVSize = sb->s_bdev->bd_inode->i_size >> + *newLVSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits; if (*newLVSize == 0) pr_err("JFS: Cannot determine volume size\n"); @@ -579,7 +579,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_unload; } inode->i_ino = 0; - inode->i_size = sb->s_bdev->bd_inode->i_size; + inode->i_size = i_size_read(sb->s_bdev->bd_inode); inode->i_mapping->a_ops = &jfs_metapage_aops; hlist_add_fake(&inode->i_hash); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); diff --git a/fs/mount.h b/fs/mount.h index de45d9e76748..6790767d1883 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -16,7 +16,7 @@ struct mnt_namespace { u64 event; unsigned int mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; -}; +} __randomize_layout; struct mnt_pcp { int mnt_count; @@ -69,7 +69,7 @@ struct mount { struct hlist_head mnt_pins; struct fs_pin mnt_umount; struct dentry *mnt_ex_mountpoint; -}; +} __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ diff --git a/fs/namei.c b/fs/namei.c index 88fd38d1e3e7..ddb6a7c2b3d4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -524,7 +524,7 @@ struct nameidata { struct inode *link_inode; unsigned root_seq; int dfd; -}; +} __randomize_layout; static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) { diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ee5ddbd36088..efebe6cf4378 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -820,6 +820,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; + target->port = source->port; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ac484fe0dee..3522b1249019 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2372,16 +2372,40 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) } EXPORT_SYMBOL_GPL(nfs_access_add_cache); +#define NFS_MAY_READ (NFS4_ACCESS_READ) +#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND | \ + NFS4_ACCESS_DELETE) +#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND) +#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE +#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) +#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) +static int +nfs_access_calc_mask(u32 access_result, umode_t umode) +{ + int mask = 0; + + if (access_result & NFS_MAY_READ) + mask |= MAY_READ; + if (S_ISDIR(umode)) { + if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP) + mask |= MAY_EXEC; + } else if (S_ISREG(umode)) { + if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE) + mask |= MAY_EXEC; + } else if (access_result & NFS_MAY_WRITE) + mask |= MAY_WRITE; + return mask; +} + void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) { - entry->mask = 0; - if (access_result & NFS4_ACCESS_READ) - entry->mask |= MAY_READ; - if (access_result & - (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; + entry->mask = access_result; } EXPORT_SYMBOL_GPL(nfs_access_set_mask); @@ -2389,6 +2413,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; + int cache_mask; int status; trace_nfs_access_enter(inode); @@ -2404,7 +2429,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) goto out; /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE + | NFS_MAY_WRITE | NFS_MAY_READ; cache.cred = cred; cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); @@ -2418,7 +2444,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) } nfs_access_add_cache(inode, &cache); out_cached: - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) + cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode); + if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: trace_nfs_access_exit(inode, status); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5713eb32a45e..af330c31f627 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -617,6 +617,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result) goto out; } + if (iocb->ki_pos > i_size_read(inode)) + nfs_revalidate_mapping(inode, file->f_mapping); nfs_start_io_write(inode); result = generic_write_checks(iocb, from); @@ -750,7 +752,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) */ nfs_sync_mapping(filp->f_mapping); if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - nfs_zap_mapping(inode, filp->f_mapping); + nfs_zap_caches(inode); out: return status; } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 080fc6b278bd..44c638b7876c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -542,6 +542,10 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; + /* Is the deviceid already set? If so, we're good. */ + if (fl->dsaddr != NULL) + return 0; + /* find and reference the deviceid */ d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, lo->plh_lc_cred, gfp_flags); @@ -553,8 +557,6 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, if (filelayout_test_devid_unavailable(&dsaddr->id_node)) goto out_put; - fl->dsaddr = dsaddr; - if (fl->first_stripe_index >= dsaddr->stripe_count) { dprintk("%s Bad first_stripe_index %u\n", __func__, fl->first_stripe_index); @@ -570,6 +572,13 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, goto out_put; } status = 0; + + /* + * Atomic compare and xchange to ensure we don't scribble + * over a non-NULL pointer. + */ + if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL) + goto out_put; out: return status; out_put: diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1f2ac3dd0fe5..b0fa83a60754 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1842,6 +1842,10 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) int vers, ret; struct nfs_fh *fh; + if (!lseg || !(pnfs_is_valid_lseg(lseg) || + test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))) + goto out_err; + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); if (!ds) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 3efe946672be..60bad882c123 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -512,7 +512,7 @@ static const struct rpc_version mnt_version1 = { .counts = mnt_counts, }; -static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)]; +static unsigned int mnt3_counts[ARRAY_SIZE(mnt3_procedures)]; static const struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index df4a7d3ab915..d1e87ec0df84 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -220,15 +220,8 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, res.fattr); - if (status == 0) { - entry->mask = 0; - if (res.access & NFS3_ACCESS_READ) - entry->mask |= MAY_READ; - if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; - } + if (status == 0) + nfs_access_set_mask(entry, res.access); nfs_free_fattr(res.fattr); out: dprintk("NFS reply access: %d\n", status); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 50566acb5469..e9bea90dc017 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -660,9 +660,6 @@ int nfs4_detect_session_trunking(struct nfs_client *clp, if (!nfs4_check_server_scope(clp->cl_serverscope, res->server_scope)) goto out_err; - /* Session trunking passed, add the xprt */ - rpc_clnt_xprt_switch_add_xprt(clp->cl_rpcclient, xprt); - pr_info("NFS: %s: Session trunking succeeded for %s\n", clp->cl_hostname, xprt->address_strings[RPC_DISPLAY_ADDR]); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0b4e1091340..ffd2e712595d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2236,7 +2236,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred, int openflags) { struct nfs_access_entry cache; - u32 mask; + u32 mask, flags; /* access call failed or for some reason the server doesn't * support any access modes -- defer access call until later */ @@ -2250,16 +2250,20 @@ static int nfs4_opendata_access(struct rpc_cred *cred, */ if (openflags & __FMODE_EXEC) { /* ONLY check for exec rights */ - mask = MAY_EXEC; + if (S_ISDIR(state->inode->i_mode)) + mask = NFS4_ACCESS_LOOKUP; + else + mask = NFS4_ACCESS_EXECUTE; } else if ((fmode & FMODE_READ) && !opendata->file_created) - mask = MAY_READ; + mask = NFS4_ACCESS_READ; cache.cred = cred; cache.jiffies = jiffies; nfs_access_set_mask(&cache, opendata->o_res.access_result); nfs_access_add_cache(state->inode, &cache); - if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) + flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP; + if ((mask & ~cache.mask & flags) == 0) return 0; return -EACCES; @@ -6492,7 +6496,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&q->lock, flags); - freezable_schedule_timeout_interruptible(NFS4_LOCK_MAXTIMEOUT); + freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT); } finish_wait(q, &wait); @@ -7457,7 +7461,7 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data) cdata->res.server_scope = NULL; } /* Save the EXCHANGE_ID verifier session trunk tests */ - memcpy(clp->cl_confirm.data, cdata->args.verifier->data, + memcpy(clp->cl_confirm.data, cdata->args.verifier.data, sizeof(clp->cl_confirm.data)); } out: @@ -7470,10 +7474,6 @@ static void nfs4_exchange_id_release(void *data) struct nfs41_exchange_id_data *cdata = (struct nfs41_exchange_id_data *)data; - if (cdata->xprt) { - xprt_put(cdata->xprt); - rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); - } nfs_put_client(cdata->args.client); kfree(cdata->res.impl_id); kfree(cdata->res.server_scope); @@ -7494,7 +7494,6 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = { static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, u32 sp4_how, struct rpc_xprt *xprt) { - nfs4_verifier verifier; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], .rpc_cred = cred, @@ -7503,7 +7502,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, .rpc_client = clp->cl_rpcclient, .callback_ops = &nfs4_exchange_id_call_ops, .rpc_message = &msg, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT, }; struct nfs41_exchange_id_data *calldata; struct rpc_task *task; @@ -7518,8 +7517,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, return -ENOMEM; } - if (!xprt) - nfs4_init_boot_verifier(clp, &verifier); + nfs4_init_boot_verifier(clp, &calldata->args.verifier); status = nfs4_init_uniform_client_string(clp); if (status) @@ -7558,11 +7556,9 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (xprt) { calldata->xprt = xprt; task_setup_data.rpc_xprt = xprt; - task_setup_data.flags = - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC; - calldata->args.verifier = &clp->cl_confirm; - } else { - calldata->args.verifier = &verifier; + task_setup_data.flags |= RPC_TASK_SOFTCONN; + memcpy(calldata->args.verifier.data, clp->cl_confirm.data, + sizeof(calldata->args.verifier.data)); } calldata->args.client = clp; #ifdef CONFIG_NFS_V4_1_MIGRATION @@ -7581,12 +7577,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (IS_ERR(task)) return PTR_ERR(task); - if (!xprt) { - status = rpc_wait_for_completion_task(task); - if (!status) - status = calldata->rpc_status; - } else /* session trunking test */ - status = calldata->rpc_status; + status = calldata->rpc_status; rpc_put_task(task); out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fa3eb361d4f8..37c8af003275 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1785,7 +1785,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, int len = 0; encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); - encode_nfs4_verifier(xdr, args->verifier); + encode_nfs4_verifier(xdr, &args->verifier); encode_string(xdr, strlen(args->client->cl_owner_id), args->client->cl_owner_id); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index d40755a0984b..25f28fa64c57 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -159,13 +159,18 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, { struct pnfs_commit_bucket *b; struct pnfs_layout_segment *freeme; + int nwritten; int i; lockdep_assert_held(&cinfo->inode->i_lock); restart: for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { - if (pnfs_generic_transfer_commit_list(&b->written, dst, - cinfo, 0)) { + nwritten = pnfs_generic_transfer_commit_list(&b->written, + dst, cinfo, 0); + if (!nwritten) + continue; + cinfo->ds->nwritten -= nwritten; + if (list_empty(&b->written)) { freeme = b->wlseg; b->wlseg = NULL; spin_unlock(&cinfo->inode->i_lock); @@ -174,7 +179,6 @@ restart: goto restart; } } - cinfo->ds->nwritten = 0; } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); @@ -183,6 +187,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; struct pnfs_commit_bucket *bucket; struct pnfs_layout_segment *freeme; + struct list_head *pos; LIST_HEAD(pages); int i; @@ -193,6 +198,8 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) continue; freeme = bucket->clseg; bucket->clseg = NULL; + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, &pages); spin_unlock(&cinfo->inode->i_lock); nfs_retry_commit(&pages, freeme, cinfo, i); @@ -217,13 +224,6 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { if (list_empty(&bucket->committing)) continue; - /* - * If the layout segment is invalid, then let - * pnfs_generic_retry_commit() clean up the bucket. - */ - if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) && - !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags)) - break; data = nfs_commitdata_alloc(false); if (!data) break; @@ -243,9 +243,12 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, struct nfs_commit_info *cinfo) { struct pnfs_commit_bucket *bucket; + struct list_head *pos; bucket = &cinfo->ds->buckets[data->ds_commit_index]; spin_lock(&cinfo->inode->i_lock); + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, pages); data->lseg = bucket->clseg; bucket->clseg = NULL; @@ -330,7 +333,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, } } out: - cinfo->ds->ncommitting = 0; return PNFS_ATTEMPTED; } EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index b45083c0f9ae..49b0a9e7ff18 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -720,8 +720,8 @@ static const struct rpc_version nfs_cb_version4 = { .counts = nfs4_cb_counts, }; -static const struct rpc_version *nfs_cb_version[] = { - &nfs_cb_version4, +static const struct rpc_version *nfs_cb_version[2] = { + [1] = &nfs_cb_version4, }; static const struct rpc_program cb_program; @@ -795,7 +795,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c .saddress = (struct sockaddr *) &conn->cb_saddr, .timeout = &timeparms, .program = &cb_program, - .version = 0, + .version = 1, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index dc22ba8c710f..e50a387959bf 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -240,18 +240,6 @@ int ocfs2_set_acl(handle_t *handle, switch (type) { case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - umode_t mode; - - ret = posix_acl_update_mode(inode, &mode, &acl); - if (ret) - return ret; - - ret = ocfs2_acl_set_mode(inode, di_bh, - handle, mode); - if (ret) - return ret; - } break; case ACL_TYPE_DEFAULT: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; @@ -289,7 +277,19 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type) had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); if (had_lock < 0) return had_lock; + if (type == ACL_TYPE_ACCESS && acl) { + umode_t mode; + + status = posix_acl_update_mode(inode, &mode, &acl); + if (status) + goto unlock; + + status = ocfs2_acl_set_mode(inode, bh, NULL, mode); + if (status) + goto unlock; + } status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); +unlock: ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); brelse(bh); return status; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 641d9ee97f91..48b70e6490f3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -481,17 +481,30 @@ out_cleanup: } static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, - struct cattr *attr, struct dentry *hardlink) + struct cattr *attr, struct dentry *hardlink, + bool origin) { int err; const struct cred *old_cred; struct cred *override_cred; + struct dentry *parent = dentry->d_parent; - err = ovl_copy_up(dentry->d_parent); + err = ovl_copy_up(parent); if (err) return err; old_cred = ovl_override_creds(dentry->d_sb); + + /* + * When linking a file with copy up origin into a new parent, mark the + * new parent dir "impure". + */ + if (origin) { + err = ovl_set_impure(parent, ovl_dentry_upper(parent)); + if (err) + goto out_revert_creds; + } + err = -ENOMEM; override_cred = prepare_creds(); if (override_cred) { @@ -550,7 +563,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, inode_init_owner(inode, dentry->d_parent->d_inode, mode); attr.mode = inode->i_mode; - err = ovl_create_or_link(dentry, inode, &attr, NULL); + err = ovl_create_or_link(dentry, inode, &attr, NULL, false); if (err) iput(inode); @@ -609,7 +622,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir, inode = d_inode(old); ihold(inode); - err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old)); + err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old), + ovl_type_origin(old)); if (err) iput(inode); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 69f4fc26ee39..5bc71642b226 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -202,37 +202,38 @@ bool ovl_is_private_xattr(const char *name) sizeof(OVL_XATTR_PREFIX) - 1) == 0; } -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags) { int err; - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); + struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); const struct cred *old_cred; err = ovl_want_write(dentry); if (err) goto out; - if (!value && !OVL_TYPE_UPPER(type)) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (!value && !upperdentry) { + err = vfs_getxattr(realdentry, name, NULL, 0); if (err < 0) goto out_drop_write; } - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; + if (!upperdentry) { + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; - if (!OVL_TYPE_UPPER(type)) - ovl_path_upper(dentry, &realpath); + realdentry = ovl_dentry_upper(dentry); + } old_cred = ovl_override_creds(dentry->d_sb); if (value) - err = vfs_setxattr(realpath.dentry, name, value, size, flags); + err = vfs_setxattr(realdentry, name, value, size, flags); else { WARN_ON(flags != XATTR_REPLACE); - err = vfs_removexattr(realpath.dentry, name); + err = vfs_removexattr(realdentry, name); } revert_creds(old_cred); @@ -242,12 +243,13 @@ out: return err; } -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { - struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; const struct cred *old_cred; + struct dentry *realdentry = + ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); old_cred = ovl_override_creds(dentry->d_sb); res = vfs_getxattr(realdentry, name, value, size); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 9bc0e580a5b3..8aef2b304b2d 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -397,8 +397,19 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, if (!d_inode(index)) return 0; - err = -EISDIR; - if (d_is_dir(index)) + /* + * Directory index entries are going to be used for looking up + * redirected upper dirs by lower dir fh when decoding an overlay + * file handle of a merge dir. Whiteout index entries are going to be + * used as an indication that an exported overlay file handle should + * be treated as stale (i.e. after unlink of the overlay inode). + * We don't know the verification rules for directory and whiteout + * index entries, because they have not been implemented yet, so return + * EROFS if those entries are found to avoid corrupting an index that + * was created by a newer kernel. + */ + err = -EROFS; + if (d_is_dir(index) || ovl_is_whiteout(index)) goto fail; err = -EINVAL; @@ -436,8 +447,8 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n", - index, err); + pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", + index, d_inode(index)->i_mode & S_IFMT, err); goto out; } @@ -502,6 +513,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, goto out; } + inode = d_inode(index); if (d_is_negative(index)) { if (upper && d_inode(origin)->i_nlink > 1) { pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", @@ -511,11 +523,22 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, dput(index); index = NULL; - } else if (upper && d_inode(index) != d_inode(upper)) { - inode = d_inode(index); - pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n", - d_inode(index)->i_ino, - d_inode(upper)->i_ino); + } else if (upper && d_inode(upper) != inode) { + pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", + index, inode->i_ino, d_inode(upper)->i_ino); + goto fail; + } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || + ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { + /* + * Index should always be of the same file type as origin + * except for the case of a whiteout index. A whiteout + * index should only exist if all lower aliases have been + * unlinked, which means that finding a lower origin on lookup + * whose index is a whiteout should be treated as an error. + */ + pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); goto fail; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 60d26605e039..e927a62c97ae 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -47,7 +47,8 @@ enum ovl_flag { /* Is the real inode encoded in fid an upper inode? */ #define OVL_FH_FLAG_PATH_UPPER (1 << 2) -#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN) +#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \ + OVL_FH_FLAG_PATH_UPPER) #if defined(__LITTLE_ENDIAN) #define OVL_FH_FLAG_CPU_ENDIAN 0 @@ -199,6 +200,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); +struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); @@ -270,9 +272,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); int ovl_permission(struct inode *inode, int mask); -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags); -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags); +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0298463cf9c3..3d424a51cabb 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -703,7 +703,10 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, err = PTR_ERR(index); break; } - if (ovl_verify_index(index, lowerstack, numlower)) { + err = ovl_verify_index(index, lowerstack, numlower); + if (err) { + if (err == -EROFS) + break; err = ovl_cleanup(dir, index); if (err) break; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 44dc2d6ffe0f..d86e89f97201 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -692,7 +692,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, handler->name, buffer, size); + return ovl_xattr_get(dentry, inode, handler->name, buffer, size); } static int __maybe_unused @@ -742,7 +742,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, return err; } - err = ovl_xattr_set(dentry, handler->name, value, size, flags); + err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); if (!err) ovl_copyattr(ovl_inode_real(inode), inode); @@ -772,7 +772,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, name, buffer, size); + return ovl_xattr_get(dentry, inode, name, buffer, size); } static int ovl_other_xattr_set(const struct xattr_handler *handler, @@ -780,7 +780,7 @@ static int ovl_other_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { - return ovl_xattr_set(dentry, name, value, size, flags); + return ovl_xattr_set(dentry, inode, name, value, size, flags); } static const struct xattr_handler __maybe_unused @@ -1058,10 +1058,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, OVL_INDEXDIR_NAME, true); - err = PTR_ERR(ufs->indexdir); - if (IS_ERR(ufs->indexdir)) - goto out_put_lower_mnt; - if (ufs->indexdir) { /* Verify upper root is index dir origin */ err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, @@ -1090,6 +1086,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations; + err = -ENOMEM; ufs->creator_cred = cred = prepare_creds(); if (!cred) goto out_put_indexdir; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index c492ba75c659..f46ad75dc96a 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -157,9 +157,14 @@ struct dentry *ovl_dentry_real(struct dentry *dentry) return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); } +struct dentry *ovl_i_dentry_upper(struct inode *inode) +{ + return ovl_upperdentry_dereference(OVL_I(inode)); +} + struct inode *ovl_inode_upper(struct inode *inode) { - struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode)); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); return upperdentry ? d_inode(upperdentry) : NULL; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 18694598bebf..aa2b89071630 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -51,7 +51,7 @@ struct proc_dir_entry { spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ u8 namelen; char name[]; -}; +} __randomize_layout; union proc_op { int (*proc_get_link)(struct dentry *, struct path *); @@ -70,7 +70,7 @@ struct proc_inode { struct hlist_node sysctl_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; -}; +} __randomize_layout; /* * General functions @@ -279,7 +279,7 @@ struct proc_maps_private { #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif -}; +} __randomize_layout; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 3d2256a425ee..54415f0e3d18 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -23,7 +23,8 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) struct reiserfs_transaction_handle th; size_t jcreate_blocks; int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; - + int update_mode = 0; + umode_t mode = inode->i_mode; /* * Pessimism: We can't assume that anything from the xattr root up @@ -37,7 +38,16 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) error = journal_begin(&th, inode->i_sb, jcreate_blocks); reiserfs_write_unlock(inode->i_sb); if (error == 0) { + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + goto unlock; + update_mode = 1; + } error = __reiserfs_set_acl(&th, inode, type, acl); + if (!error && update_mode) + inode->i_mode = mode; +unlock: reiserfs_write_lock(inode->i_sb); error2 = journal_end(&th); reiserfs_write_unlock(inode->i_sb); @@ -241,11 +251,6 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - } break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index cadcd12a3d35..06ea26b8c996 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -854,6 +854,9 @@ wakeup: __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range); spin_unlock(&ctx->fault_pending_wqh.lock); + /* Flush pending events that may still wait on event_wqh */ + wake_up_all(&ctx->event_wqh); + wake_up_poll(&ctx->fd_wqh, POLLHUP); userfaultfd_ctx_put(ctx); return 0; @@ -1643,6 +1646,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); mmput(ctx->mm); + } else { + return -ENOSPC; } if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) return -EFAULT; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0a9880777c9c..c09c16b1ad3b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5435,6 +5435,7 @@ __xfs_bunmapi( xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_fileoff_t max_len; + xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5534,6 +5535,17 @@ __xfs_bunmapi( */ del = got; wasdel = isnullstartblock(del.br_startblock); + + /* + * Make sure we don't touch multiple AGF headers out of order + * in a single transaction, as that could cause AB-BA deadlocks. + */ + if (!wasdel) { + agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); + if (prev_agno != NULLAGNUMBER && prev_agno > agno) + break; + prev_agno = agno; + } if (got.br_startoff < start) { del.br_startoff = start; del.br_blockcount -= start - got.br_startoff; @@ -6499,6 +6511,15 @@ xfs_bmap_finish_one( xfs_fsblock_t firstfsb; int error = 0; + /* + * firstfsb is tied to the transaction lifetime and is used to + * ensure correct AG locking order and schedule work item + * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us + * to only making one bmap call per transaction, so it should + * be safe to have it as a local variable here. + */ + firstfsb = NULLFSBLOCK; + trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4da85fff69ad..e0bcc4a59efd 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -728,7 +728,8 @@ xfs_btree_firstrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ @@ -757,7 +758,8 @@ xfs_btree_lastrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index d478065b9544..8727a43115ef 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -136,6 +136,8 @@ __xfs_dir3_data_check( */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= + p + be16_to_cpu(dup->length)); XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == (char *)dup - (char *)hdr); @@ -164,6 +166,8 @@ __xfs_dir3_data_check( XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= + p + ops->data_entsize(dep->namelen)); XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*ops->data_entry_tag_p(dep)) == (char *)dep - (char *)hdr); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 900ea231f9a3..45b1c3b4e047 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1638,6 +1638,10 @@ xfs_refcount_recover_cow_leftovers( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) goto out_trans; + if (!agbp) { + error = -ENOMEM; + goto out_trans; + } cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6ce948c436d5..15751dc2a27d 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -111,6 +111,9 @@ restart: skipped = 0; break; } + /* we're done if id overflows back to zero */ + if (!next_index) + break; } if (skipped) { diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ab2270a87196..f45fbf0db9bb 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -170,6 +170,8 @@ xfs_reflink_find_shared( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; + if (!agbp) + return -ENOMEM; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); @@ -329,7 +331,7 @@ xfs_reflink_convert_cow_extent( xfs_filblks_t count_fsb, struct xfs_defer_ops *dfops) { - xfs_fsblock_t first_block; + xfs_fsblock_t first_block = NULLFSBLOCK; int nimaps = 1; if (imap->br_state == XFS_EXT_NORM) |