diff options
Diffstat (limited to 'fs')
46 files changed, 647 insertions, 321 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index ff911e779651..be1e34adc3c6 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -52,10 +52,9 @@ */ struct p9_rdir { - struct mutex mutex; int head; int tail; - uint8_t *buf; + uint8_t buf[]; }; /** @@ -93,33 +92,12 @@ static void p9stat_init(struct p9_wstat *stbuf) * */ -static int v9fs_alloc_rdir_buf(struct file *filp, int buflen) +static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen) { - struct p9_rdir *rdir; - struct p9_fid *fid; - int err = 0; - - fid = filp->private_data; - if (!fid->rdir) { - rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); - - if (rdir == NULL) { - err = -ENOMEM; - goto exit; - } - spin_lock(&filp->f_dentry->d_lock); - if (!fid->rdir) { - rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir); - mutex_init(&rdir->mutex); - rdir->head = rdir->tail = 0; - fid->rdir = (void *) rdir; - rdir = NULL; - } - spin_unlock(&filp->f_dentry->d_lock); - kfree(rdir); - } -exit: - return err; + struct p9_fid *fid = filp->private_data; + if (!fid->rdir) + fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); + return fid->rdir; } /** @@ -145,20 +123,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) buflen = fid->clnt->msize - P9_IOHDRSZ; - err = v9fs_alloc_rdir_buf(filp, buflen); - if (err) - goto exit; - rdir = (struct p9_rdir *) fid->rdir; + rdir = v9fs_alloc_rdir_buf(filp, buflen); + if (!rdir) + return -ENOMEM; - err = mutex_lock_interruptible(&rdir->mutex); - if (err) - return err; - while (err == 0) { + while (1) { if (rdir->tail == rdir->head) { err = v9fs_file_readn(filp, rdir->buf, NULL, buflen, filp->f_pos); if (err <= 0) - goto unlock_and_exit; + return err; rdir->head = 0; rdir->tail = err; @@ -169,9 +143,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) rdir->tail - rdir->head, &st); if (err) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); - err = -EIO; p9stat_free(&st); - goto unlock_and_exit; + return -EIO; } reclen = st.size+2; @@ -180,19 +153,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) p9stat_free(&st); - if (over) { - err = 0; - goto unlock_and_exit; - } + if (over) + return 0; + rdir->head += reclen; filp->f_pos += reclen; } } - -unlock_and_exit: - mutex_unlock(&rdir->mutex); -exit: - return err; } /** @@ -218,21 +185,16 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, buflen = fid->clnt->msize - P9_READDIRHDRSZ; - err = v9fs_alloc_rdir_buf(filp, buflen); - if (err) - goto exit; - rdir = (struct p9_rdir *) fid->rdir; + rdir = v9fs_alloc_rdir_buf(filp, buflen); + if (!rdir) + return -ENOMEM; - err = mutex_lock_interruptible(&rdir->mutex); - if (err) - return err; - - while (err == 0) { + while (1) { if (rdir->tail == rdir->head) { err = p9_client_readdir(fid, rdir->buf, buflen, filp->f_pos); if (err <= 0) - goto unlock_and_exit; + return err; rdir->head = 0; rdir->tail = err; @@ -245,8 +207,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, &curdirent); if (err < 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); - err = -EIO; - goto unlock_and_exit; + return -EIO; } /* d_off in dirent structure tracks the offset into @@ -261,20 +222,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, curdirent.d_type); oldoffset = curdirent.d_off; - if (over) { - err = 0; - goto unlock_and_exit; - } + if (over) + return 0; filp->f_pos = curdirent.d_off; rdir->head += err; } } - -unlock_and_exit: - mutex_unlock(&rdir->mutex); -exit: - return err; } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index c2483e97beee..7a3399767570 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -80,10 +80,6 @@ int v9fs_file_open(struct inode *inode, struct file *file) p9_client_clunk(fid); return err; } - if (file->f_flags & O_TRUNC) { - i_size_write(inode, 0); - inode->i_blocks = 0; - } if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))) generic_file_llseek(file, 0, SEEK_END); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 890bed538f9b..57d017ac68e4 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -192,9 +192,6 @@ int v9fs_uflags2omode(int uflags, int extended) break; } - if (uflags & O_TRUNC) - ret |= P9_OTRUNC; - if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 40895546e103..8d24ad66dfb8 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -186,7 +186,6 @@ static int v9fs_mapped_dotl_flags(int flags) { O_CREAT, P9_DOTL_CREATE }, { O_EXCL, P9_DOTL_EXCL }, { O_NOCTTY, P9_DOTL_NOCTTY }, - { O_TRUNC, P9_DOTL_TRUNC }, { O_APPEND, P9_DOTL_APPEND }, { O_NONBLOCK, P9_DOTL_NONBLOCK }, { O_DSYNC, P9_DOTL_DSYNC }, @@ -268,8 +267,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } /* Only creates */ - if (!(flags & O_CREAT) || dentry->d_inode) - return finish_no_open(file, res); + if (!(flags & O_CREAT)) + return finish_no_open(file, res); + else if (dentry->d_inode) { + if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + return -EEXIST; + else + return finish_no_open(file, res); + } v9ses = v9fs_inode2v9ses(dir); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0c42cdbabecf..49d0b43458b7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -33,6 +33,7 @@ #include <linux/elf.h> #include <linux/utsname.h> #include <linux/coredump.h> +#include <linux/sched.h> #include <asm/uaccess.h> #include <asm/param.h> #include <asm/page.h> @@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { - cputime_to_timeval(p->utime, &prstatus->pr_utime); - cputime_to_timeval(p->stime, &prstatus->pr_stime); + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + cputime_to_timeval(utime, &prstatus->pr_utime); + cputime_to_timeval(stime, &prstatus->pr_stime); } cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index dc84732e554f..cb240dd3b402 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { - cputime_to_timeval(p->utime, &prstatus->pr_utime); - cputime_to_timeval(p->stime, &prstatus->pr_stime); + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + cputime_to_timeval(utime, &prstatus->pr_utime); + cputime_to_timeval(stime, &prstatus->pr_stime); } cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 521e9d4424f6..5a3327b8f90d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3997,7 +3997,7 @@ again: * We make the other tasks wait for the flush only when we can flush * all things. */ - if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { + if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { flushing = true; space_info->flush = 1; } @@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) unsigned nr_extents = 0; int extra_reserve = 0; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; - int ret; + int ret = 0; bool delalloc_lock = true; /* If we are a free space inode we need to not flush since we will be in @@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) csum_bytes = BTRFS_I(inode)->csum_bytes; spin_unlock(&BTRFS_I(inode)->lock); - if (root->fs_info->quota_enabled) { + if (root->fs_info->quota_enabled) ret = btrfs_qgroup_reserve(root, num_bytes + nr_extents * root->leafsize); - if (ret) { - spin_lock(&BTRFS_I(inode)->lock); - calc_csum_metadata_size(inode, num_bytes, 0); - spin_unlock(&BTRFS_I(inode)->lock); - if (delalloc_lock) - mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); - return ret; - } - } - ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); + /* + * ret != 0 here means the qgroup reservation failed, we go straight to + * the shared error handling then. + */ + if (ret == 0) + ret = reserve_metadata_bytes(root, block_rsv, + to_reserve, flush); + if (ret) { u64 to_free = 0; unsigned dropped; @@ -5560,7 +5558,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, int empty_cluster = 2 * 1024 * 1024; struct btrfs_space_info *space_info; int loop = 0; - int index = 0; + int index = __get_raid_index(data); int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; bool found_uncached_bg = false; @@ -6788,11 +6786,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, &wc->flags[level]); if (ret < 0) { btrfs_tree_unlock_rw(eb, path->locks[level]); + path->locks[level] = 0; return ret; } BUG_ON(wc->refs[level] == 0); if (wc->refs[level] == 1) { btrfs_tree_unlock_rw(eb, path->locks[level]); + path->locks[level] = 0; return 1; } } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f169d6b11d7f..fdb7a8db3b57 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) return 0; + if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) || + test_bit(EXTENT_FLAG_LOGGING, &next->flags)) + return 0; + if (extent_map_end(prev) == next->start && prev->flags == next->flags && prev->bdev == next->bdev && @@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, if (!em) goto out; - list_move(&em->list, &tree->modified_extents); + if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) + list_move(&em->list, &tree->modified_extents); em->generation = gen; clear_bit(EXTENT_FLAG_PINNED, &em->flags); em->mod_start = em->start; @@ -280,6 +285,13 @@ out: } +void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) +{ + clear_bit(EXTENT_FLAG_LOGGING, &em->flags); + if (em->in_tree) + try_merge_map(tree, em); +} + /** * add_extent_mapping - add new extent map to the extent tree * @tree: tree to insert new map in diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 922943ce29e8..c6598c89cff8 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em); int __init extent_map_init(void); void extent_map_exit(void); int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); +void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em); struct extent_map *search_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index bd38cef42358..94aa53b38721 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, if (!contig) offset = page_offset(bvec->bv_page) + bvec->bv_offset; - if (!contig && (offset >= ordered->file_offset + ordered->len || - offset < ordered->file_offset)) { + if (offset >= ordered->file_offset + ordered->len || + offset < ordered->file_offset) { unsigned long bytes_left; sums->len = this_sum_bytes; this_sum_bytes = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 77061bf43edb..aeb84469d2c4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, struct btrfs_key key; struct btrfs_ioctl_defrag_range_args range; int num_defrag; + int index; + int ret; /* get the inode */ key.objectid = defrag->root; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = (u64)-1; + + index = srcu_read_lock(&fs_info->subvol_srcu); + inode_root = btrfs_read_fs_root_no_name(fs_info, &key); if (IS_ERR(inode_root)) { - kmem_cache_free(btrfs_inode_defrag_cachep, defrag); - return PTR_ERR(inode_root); + ret = PTR_ERR(inode_root); + goto cleanup; + } + if (btrfs_root_refs(&inode_root->root_item) == 0) { + ret = -ENOENT; + goto cleanup; } key.objectid = defrag->ino; @@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, key.offset = 0; inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); if (IS_ERR(inode)) { - kmem_cache_free(btrfs_inode_defrag_cachep, defrag); - return PTR_ERR(inode); + ret = PTR_ERR(inode); + goto cleanup; } + srcu_read_unlock(&fs_info->subvol_srcu, index); /* do a chunk of defrag */ clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); @@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, iput(inode); return 0; +cleanup: + srcu_read_unlock(&fs_info->subvol_srcu, index); + kmem_cache_free(btrfs_inode_defrag_cachep, defrag); + return ret; } /* @@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, if (err < 0 && num_written > 0) num_written = err; } -out: + if (sync) atomic_dec(&BTRFS_I(inode)->sync_writers); +out: sb_end_write(inode->i_sb); current->backing_dev_info = NULL; return num_written ? num_written : err; @@ -2241,6 +2256,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) if (lockend <= lockstart) lockend = lockstart + root->sectorsize; + lockend--; len = lockend - lockstart + 1; len = max_t(u64, len, root->sectorsize); @@ -2307,9 +2323,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) } } - *offset = start; - free_extent_map(em); - break; + if (!test_bit(EXTENT_FLAG_PREALLOC, + &em->flags)) { + *offset = start; + free_extent_map(em); + break; + } } } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 59ea2e4349c9..0be7a8742a43 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *info; - int ret = 0; + int ret; + bool re_search = false; spin_lock(&ctl->tree_lock); again: + ret = 0; if (!bytes) goto out_lock; @@ -1879,17 +1881,17 @@ again: info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!info) { - /* the tree logging code might be calling us before we - * have fully loaded the free space rbtree for this - * block group. So it is possible the entry won't - * be in the rbtree yet at all. The caching code - * will make sure not to put it in the rbtree if - * the logging code has pinned it. + /* + * If we found a partial bit of our free space in a + * bitmap but then couldn't find the other part this may + * be a problem, so WARN about it. */ + WARN_ON(re_search); goto out_lock; } } + re_search = false; if (!info->bitmap) { unlink_free_space(ctl, info); if (offset == info->offset) { @@ -1935,8 +1937,10 @@ again: } ret = remove_from_bitmap(ctl, info, &offset, &bytes); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { + re_search = true; goto again; + } BUG_ON(ret); /* logic error */ out_lock: spin_unlock(&ctl->tree_lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 16d9e8e191e6..cc93b23ca352 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; -static int btrfs_setsize(struct inode *inode, loff_t newsize); +static int btrfs_setsize(struct inode *inode, struct iattr *attr); static int btrfs_truncate(struct inode *inode); static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); static noinline int cow_file_range(struct inode *inode, @@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; + + /* 1 for the orphan item deletion. */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = btrfs_orphan_add(trans, inode); + btrfs_end_transaction(trans, root); + if (ret) + goto out; + ret = btrfs_truncate(inode); } else { nr_unlink++; @@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) block_end - cur_offset, 0); if (IS_ERR(em)) { err = PTR_ERR(em); + em = NULL; break; } last_byte = min(extent_map_end(em), block_end); @@ -3748,16 +3761,27 @@ next: return err; } -static int btrfs_setsize(struct inode *inode, loff_t newsize) +static int btrfs_setsize(struct inode *inode, struct iattr *attr) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; loff_t oldsize = i_size_read(inode); + loff_t newsize = attr->ia_size; + int mask = attr->ia_valid; int ret; if (newsize == oldsize) return 0; + /* + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a + * special case where we need to update the times despite not having + * these flags set. For all other operations the VFS set these flags + * explicitly if it wants a timestamp update. + */ + if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) + inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); + if (newsize > oldsize) { truncate_pagecache(inode, oldsize, newsize); ret = btrfs_cont_expand(inode, oldsize, newsize); @@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, &BTRFS_I(inode)->runtime_flags); + /* + * 1 for the orphan item we're going to add + * 1 for the orphan item deletion. + */ + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + /* + * We need to do this in case we fail at _any_ point during the + * actual truncate. Once we do the truncate_setsize we could + * invalidate pages which forces any outstanding ordered io to + * be instantly completed which will give us extents that need + * to be truncated. If we fail to get an orphan inode down we + * could have left over extents that were never meant to live, + * so we need to garuntee from this point on that everything + * will be consistent. + */ + ret = btrfs_orphan_add(trans, inode); + btrfs_end_transaction(trans, root); + if (ret) + return ret; + /* we don't support swapfiles, so vmtruncate shouldn't fail */ truncate_setsize(inode, newsize); ret = btrfs_truncate(inode); + if (ret && inode->i_nlink) + btrfs_orphan_del(NULL, inode); } return ret; @@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - err = btrfs_setsize(inode, attr->ia_size); + err = btrfs_setsize(inode, attr); if (err) return err; } @@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag return em; if (em) { /* - * if our em maps to a hole, there might - * actually be delalloc bytes behind it + * if our em maps to + * - a hole or + * - a pre-alloc extent, + * there might actually be delalloc bytes behind it. */ - if (em->block_start != EXTENT_MAP_HOLE) + if (em->block_start != EXTENT_MAP_HOLE && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) return em; else hole_em = em; @@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag */ em->block_start = hole_em->block_start; em->block_len = hole_len; + if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags)) + set_bit(EXTENT_FLAG_PREALLOC, &em->flags); } else { em->start = range_start; em->len = found; @@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode) /* * 1 for the truncate slack space - * 1 for the orphan item we're going to add - * 1 for the orphan item deletion * 1 for updating the inode. */ - trans = btrfs_start_transaction(root, 4); + trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out; @@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode) min_size); BUG_ON(ret); - ret = btrfs_orphan_add(trans, inode); - if (ret) { - btrfs_end_transaction(trans, root); - goto out; - } - /* * setattr is responsible for setting the ordered_data_close flag, * but that is only tested during the last file release. That @@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(trans, inode); if (ret) err = ret; - } else if (ret && inode->i_nlink > 0) { - /* - * Failed to do the truncate, remove us from the in memory - * orphan list. - */ - ret = btrfs_orphan_del(NULL, inode); } if (trans) { @@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) */ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) { - struct list_head *head = &root->fs_info->delalloc_inodes; struct btrfs_inode *binode; struct inode *inode; struct btrfs_delalloc_work *work, *next; struct list_head works; + struct list_head splice; int ret = 0; if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; INIT_LIST_HEAD(&works); - + INIT_LIST_HEAD(&splice); +again: spin_lock(&root->fs_info->delalloc_lock); - while (!list_empty(head)) { - binode = list_entry(head->next, struct btrfs_inode, + list_splice_init(&root->fs_info->delalloc_inodes, &splice); + while (!list_empty(&splice)) { + binode = list_entry(splice.next, struct btrfs_inode, delalloc_inodes); + + list_del_init(&binode->delalloc_inodes); + inode = igrab(&binode->vfs_inode); if (!inode) - list_del_init(&binode->delalloc_inodes); + continue; + + list_add_tail(&binode->delalloc_inodes, + &root->fs_info->delalloc_inodes); spin_unlock(&root->fs_info->delalloc_lock); - if (inode) { - work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); - if (!work) { - ret = -ENOMEM; - goto out; - } - list_add_tail(&work->list, &works); - btrfs_queue_worker(&root->fs_info->flush_workers, - &work->work); + + work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); + if (unlikely(!work)) { + ret = -ENOMEM; + goto out; } + list_add_tail(&work->list, &works); + btrfs_queue_worker(&root->fs_info->flush_workers, + &work->work); + cond_resched(); spin_lock(&root->fs_info->delalloc_lock); } spin_unlock(&root->fs_info->delalloc_lock); + list_for_each_entry_safe(work, next, &works, list) { + list_del_init(&work->list); + btrfs_wait_and_free_delalloc_work(work); + } + + spin_lock(&root->fs_info->delalloc_lock); + if (!list_empty(&root->fs_info->delalloc_inodes)) { + spin_unlock(&root->fs_info->delalloc_lock); + goto again; + } + spin_unlock(&root->fs_info->delalloc_lock); + /* the filemap_flush will queue IO into the worker threads, but * we have to make sure the IO is actually started and that * ordered extents get created before we return @@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) atomic_read(&root->fs_info->async_delalloc_pages) == 0)); } atomic_dec(&root->fs_info->async_submit_draining); + return 0; out: list_for_each_entry_safe(work, next, &works, list) { list_del_init(&work->list); btrfs_wait_and_free_delalloc_work(work); } + + if (!list_empty_careful(&splice)) { + spin_lock(&root->fs_info->delalloc_lock); + list_splice_tail(&splice, &root->fs_info->delalloc_inodes); + spin_unlock(&root->fs_info->delalloc_lock); + } return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4b4516770f05..338f2597bf7f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root, BUG_ON(ret); - d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: if (async_transid) { *async_transid = trans->transid; @@ -525,6 +524,10 @@ fail: } if (err && !ret) ret = err; + + if (!ret) + d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); + return ret; } @@ -1339,7 +1342,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - return -EINPROGRESS; + mnt_drop_write_file(file); + return -EINVAL; } mutex_lock(&root->fs_info->volume_mutex); @@ -1362,6 +1366,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, printk(KERN_INFO "btrfs: resizing devid %llu\n", (unsigned long long)devid); } + device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", @@ -1369,9 +1374,10 @@ static noinline int btrfs_ioctl_resize(struct file *file, ret = -EINVAL; goto out_free; } - if (device->fs_devices && device->fs_devices->seeding) { + + if (!device->writeable) { printk(KERN_INFO "btrfs: resizer unable to apply on " - "seeding device %llu\n", + "readonly device %llu\n", (unsigned long long)devid); ret = -EINVAL; goto out_free; @@ -1443,8 +1449,8 @@ out_free: kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); - mnt_drop_write_file(file); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); + mnt_drop_write_file(file); return ret; } @@ -2095,13 +2101,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, err = inode_permission(inode, MAY_WRITE | MAY_EXEC); if (err) goto out_dput; - - /* check if subvolume may be deleted by a non-root user */ - err = btrfs_may_delete(dir, dentry, 1); - if (err) - goto out_dput; } + /* check if subvolume may be deleted by a user */ + err = btrfs_may_delete(dir, dentry, 1); + if (err) + goto out_dput; + if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { err = -EINVAL; goto out_dput; @@ -2183,19 +2189,20 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) struct btrfs_ioctl_defrag_range_args *range; int ret; - if (btrfs_root_readonly(root)) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - return -EINPROGRESS; + mnt_drop_write_file(file); + return -EINVAL; } - ret = mnt_want_write_file(file); - if (ret) { - atomic_set(&root->fs_info->mutually_exclusive_operation_running, - 0); - return ret; + + if (btrfs_root_readonly(root)) { + ret = -EROFS; + goto out; } switch (inode->i_mode & S_IFMT) { @@ -2247,8 +2254,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EINVAL; } out: - mnt_drop_write_file(file); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); + mnt_drop_write_file(file); return ret; } @@ -2263,7 +2270,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - return -EINPROGRESS; + return -EINVAL; } mutex_lock(&root->fs_info->volume_mutex); @@ -2300,7 +2307,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 1)) { pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); mnt_drop_write_file(file); - return -EINPROGRESS; + return -EINVAL; } mutex_lock(&root->fs_info->volume_mutex); @@ -2316,8 +2323,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); - mnt_drop_write_file(file); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); + mnt_drop_write_file(file); return ret; } @@ -3437,8 +3444,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_ioctl_balance_args *bargs; struct btrfs_balance_control *bctl; + bool need_unlock; /* for mut. excl. ops lock */ int ret; - int need_to_clear_lock = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3447,14 +3454,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (ret) return ret; - mutex_lock(&fs_info->volume_mutex); +again: + if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { + mutex_lock(&fs_info->volume_mutex); + mutex_lock(&fs_info->balance_mutex); + need_unlock = true; + goto locked; + } + + /* + * mut. excl. ops lock is locked. Three possibilites: + * (1) some other op is running + * (2) balance is running + * (3) balance is paused -- special case (think resume) + */ mutex_lock(&fs_info->balance_mutex); + if (fs_info->balance_ctl) { + /* this is either (2) or (3) */ + if (!atomic_read(&fs_info->balance_running)) { + mutex_unlock(&fs_info->balance_mutex); + if (!mutex_trylock(&fs_info->volume_mutex)) + goto again; + mutex_lock(&fs_info->balance_mutex); + + if (fs_info->balance_ctl && + !atomic_read(&fs_info->balance_running)) { + /* this is (3) */ + need_unlock = false; + goto locked; + } + + mutex_unlock(&fs_info->balance_mutex); + mutex_unlock(&fs_info->volume_mutex); + goto again; + } else { + /* this is (2) */ + mutex_unlock(&fs_info->balance_mutex); + ret = -EINPROGRESS; + goto out; + } + } else { + /* this is (1) */ + mutex_unlock(&fs_info->balance_mutex); + pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + ret = -EINVAL; + goto out; + } + +locked: + BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running)); if (arg) { bargs = memdup_user(arg, sizeof(*bargs)); if (IS_ERR(bargs)) { ret = PTR_ERR(bargs); - goto out; + goto out_unlock; } if (bargs->flags & BTRFS_BALANCE_RESUME) { @@ -3474,13 +3528,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) bargs = NULL; } - if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, - 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + if (fs_info->balance_ctl) { ret = -EINPROGRESS; goto out_bargs; } - need_to_clear_lock = 1; bctl = kzalloc(sizeof(*bctl), GFP_NOFS); if (!bctl) { @@ -3501,11 +3552,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) } do_balance: - ret = btrfs_balance(bctl, bargs); /* - * bctl is freed in __cancel_balance or in free_fs_info if - * restriper was paused all the way until unmount + * Ownership of bctl and mutually_exclusive_operation_running + * goes to to btrfs_balance. bctl is freed in __cancel_balance, + * or, if restriper was paused all the way until unmount, in + * free_fs_info. mutually_exclusive_operation_running is + * cleared in __cancel_balance. */ + need_unlock = false; + + ret = btrfs_balance(bctl, bargs); + if (arg) { if (copy_to_user(arg, bargs, sizeof(*bargs))) ret = -EFAULT; @@ -3513,12 +3570,12 @@ do_balance: out_bargs: kfree(bargs); -out: - if (need_to_clear_lock) - atomic_set(&root->fs_info->mutually_exclusive_operation_running, - 0); +out_unlock: mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); + if (need_unlock) + atomic_set(&fs_info->mutually_exclusive_operation_running, 0); +out: mnt_drop_write_file(file); return ret; } @@ -3698,6 +3755,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto drop_write; } + if (!sa->qgroupid) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f10731297040..e5ed56729607 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, * if the disk i_size is already at the inode->i_size, or * this ordered extent is inside the disk i_size, we're done */ - if (disk_i_size == i_size || offset <= disk_i_size) { + if (disk_i_size == i_size) + goto out; + + /* + * We still need to update disk_i_size if outstanding_isize is greater + * than disk_i_size. + */ + if (offset <= disk_i_size && + (!ordered || ordered->outstanding_isize <= disk_i_size)) goto out; - } /* * walk backward from this ordered extent to disk_i_size. @@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, break; if (test->file_offset >= i_size) break; - if (test->file_offset >= disk_i_size) { + if (entry_end(test) > disk_i_size) { /* * we don't update disk_i_size now, so record this * undealt i_size. Or we will not know the real diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fe9d02c45f8e..a5c856234323 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -379,6 +379,13 @@ next1: ret = add_relation_rb(fs_info, found_key.objectid, found_key.offset); + if (ret == -ENOENT) { + printk(KERN_WARNING + "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", + (unsigned long long)found_key.objectid, + (unsigned long long)found_key.offset); + ret = 0; /* ignore the error */ + } if (ret) goto out; next2: @@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 qgroupid) { struct btrfs_root *quota_root; + struct btrfs_qgroup *qgroup; int ret = 0; quota_root = fs_info->quota_root; if (!quota_root) return -EINVAL; + /* check if there are no relations to this qgroup */ + spin_lock(&fs_info->qgroup_lock); + qgroup = find_qgroup_rb(fs_info, qgroupid); + if (qgroup) { + if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { + spin_unlock(&fs_info->qgroup_lock); + return -EBUSY; + } + } + spin_unlock(&fs_info->qgroup_lock); + ret = del_qgroup_item(trans, quota_root, qgroupid); spin_lock(&fs_info->qgroup_lock); del_qgroup_rb(quota_root->fs_info, qgroupid); - spin_unlock(&fs_info->qgroup_lock); return ret; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index bdbb94f245c9..67783e03d121 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) int corrected = 0; struct btrfs_key key; struct inode *inode = NULL; + struct btrfs_fs_info *fs_info; u64 end = offset + PAGE_SIZE - 1; struct btrfs_root *local_root; + int srcu_index; key.objectid = root; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; - local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); - if (IS_ERR(local_root)) + + fs_info = fixup->root->fs_info; + srcu_index = srcu_read_lock(&fs_info->subvol_srcu); + + local_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(local_root)) { + srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); return PTR_ERR(local_root); + } key.type = BTRFS_INODE_ITEM_KEY; key.objectid = inum; key.offset = 0; - inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); + inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); + srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) } if (PageUptodate(page)) { - struct btrfs_fs_info *fs_info; if (PageDirty(page)) { /* * we need to write the data to the defect sector. the @@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) u64 physical_for_dev_replace; u64 len; struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; + int srcu_index; key.objectid = root; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; + + srcu_index = srcu_read_lock(&fs_info->subvol_srcu); + local_root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(local_root)) + if (IS_ERR(local_root)) { + srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); return PTR_ERR(local_root); + } key.type = BTRFS_INODE_ITEM_KEY; key.objectid = inum; key.offset = 0; inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); + srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); if (IS_ERR(inode)) return PTR_ERR(inode); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 54454542ad40..321b7fb4e441 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx, (unsigned long)nce->ino); if (!nce_head) { nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); - if (!nce_head) + if (!nce_head) { + kfree(nce); return -ENOMEM; + } INIT_LIST_HEAD(nce_head); ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 99545df1b86c..d8982e9601d3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, function, line, errstr); return; } - trans->transaction->aborted = errno; + ACCESS_ONCE(trans->transaction->aborted) = errno; __btrfs_std_error(root->fs_info, function, line, errno, NULL); } /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 87fac9a21ea5..fc03aa60b684 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -333,12 +333,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, &root->fs_info->trans_block_rsv, num_bytes, flush); if (ret) - return ERR_PTR(ret); + goto reserve_fail; } again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); - if (!h) - return ERR_PTR(-ENOMEM); + if (!h) { + ret = -ENOMEM; + goto alloc_fail; + } /* * If we are JOIN_NOLOCK we're already committing a transaction and @@ -365,11 +367,7 @@ again: if (ret < 0) { /* We must get the transaction if we are JOIN_NOLOCK. */ BUG_ON(type == TRANS_JOIN_NOLOCK); - - if (type < TRANS_JOIN_NOLOCK) - sb_end_intwrite(root->fs_info->sb); - kmem_cache_free(btrfs_trans_handle_cachep, h); - return ERR_PTR(ret); + goto join_fail; } cur_trans = root->fs_info->running_transaction; @@ -410,6 +408,19 @@ got_it: if (!current->journal_info && type != TRANS_USERSPACE) current->journal_info = h; return h; + +join_fail: + if (type < TRANS_JOIN_NOLOCK) + sb_end_intwrite(root->fs_info->sb); + kmem_cache_free(btrfs_trans_handle_cachep, h); +alloc_fail: + if (num_bytes) + btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, + num_bytes); +reserve_fail: + if (qgroup_reserved) + btrfs_qgroup_free(root, qgroup_reserved); + return ERR_PTR(ret); } struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, @@ -1468,7 +1479,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, goto cleanup_transaction; } - if (cur_trans->aborted) { + /* Stop the commit early if ->aborted is set */ + if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; goto cleanup_transaction; } @@ -1574,6 +1586,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); + /* ->aborted might be set after the previous check, so check it */ + if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { + ret = cur_trans->aborted; + goto cleanup_transaction; + } /* * the reloc mutex makes sure that we stop * the balancing code from coming in and moving @@ -1657,6 +1674,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, goto cleanup_transaction; } + /* + * The tasks which save the space cache and inode cache may also + * update ->aborted, check it. + */ + if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { + ret = cur_trans->aborted; + mutex_unlock(&root->fs_info->tree_log_mutex); + mutex_unlock(&root->fs_info->reloc_mutex); + goto cleanup_transaction; + } + btrfs_prepare_extent_commit(trans, root); cur_trans = root->fs_info->running_transaction; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 83186c7e45d4..9027bb1e7466 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans, if (skip_csum) return 0; + if (em->compress_type) { + csum_offset = 0; + csum_len = block_len; + } + /* block start is already adjusted for the file extent offset. */ ret = btrfs_lookup_csums_range(log->fs_info->csum_root, em->block_start + csum_offset, @@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, em = list_entry(extents.next, struct extent_map, list); list_del_init(&em->list); - clear_bit(EXTENT_FLAG_LOGGING, &em->flags); /* * If we had an error we just need to delete everybody from our * private list. */ if (ret) { + clear_em_logging(tree, em); free_extent_map(em); continue; } @@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, write_unlock(&tree->lock); ret = log_one_extent(trans, inode, root, em, path); - free_extent_map(em); write_lock(&tree->lock); + clear_em_logging(tree, em); + free_extent_map(em); } WARN_ON(!list_empty(&extents)); write_unlock(&tree->lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cce6aa74012..5cbb7f4b1672 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } } else { ret = btrfs_get_bdev_and_sb(device_path, - FMODE_READ | FMODE_EXCL, + FMODE_WRITE | FMODE_EXCL, root->fs_info->bdev_holder, 0, &bdev, &bh); if (ret) @@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) ret = 0; /* Notify udev that device has changed */ - btrfs_kobject_uevent(bdev, KOBJ_CHANGE); + if (bdev) + btrfs_kobject_uevent(bdev, KOBJ_CHANGE); error_brelse: brelse(bh); @@ -2614,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, cache = btrfs_lookup_block_group(fs_info, chunk_offset); chunk_used = btrfs_block_group_used(&cache->item); - user_thresh = div_factor_fine(cache->key.offset, bargs->usage); + if (bargs->usage == 0) + user_thresh = 0; + else if (bargs->usage > 100) + user_thresh = cache->key.offset; + else + user_thresh = div_factor_fine(cache->key.offset, + bargs->usage); + if (chunk_used < user_thresh) ret = 0; @@ -2959,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) unset_balance_control(fs_info); ret = del_balance_item(fs_info->tree_root); BUG_ON(ret); + + atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, @@ -3138,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, out: if (bctl->flags & BTRFS_BALANCE_RESUME) __cancel_balance(fs_info); - else + else { kfree(bctl); + atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + } return ret; } @@ -3156,7 +3168,6 @@ static int balance_kthread(void *data) ret = btrfs_balance(fs_info->balance_ctl, NULL); } - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); @@ -3179,7 +3190,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) return 0; } - WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); if (IS_ERR(tsk)) return PTR_ERR(tsk); @@ -3233,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) btrfs_balance_sys(leaf, item, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); + WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); + mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->balance_mutex); @@ -3496,7 +3508,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { { 1, 1, 2, 2, 2, 2 /* raid1 */ }, { 1, 2, 1, 1, 1, 2 /* dup */ }, { 1, 1, 0, 2, 1, 1 /* raid0 */ }, - { 1, 1, 0, 1, 1, 1 /* single */ }, + { 1, 1, 1, 1, 1, 1 /* single */ }, }; static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index ce5cbd717bfc..210fce2df308 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -226,6 +226,8 @@ compose_mount_options_out: compose_mount_options_err: kfree(mountdata); mountdata = ERR_PTR(rc); + kfree(*devname); + *devname = NULL; goto compose_mount_options_out; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 17c3643e5950..12b3da39733b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1917,7 +1917,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs) } case AF_INET6: { struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; - struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs; + struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs; return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr); } default: diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 7ff49852b0cb..911649a47dd5 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf, #endif return -EINVAL; -#ifdef CONFIG_COMPAT - if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) -#else + /* + * can't compare against COMPAT/dlm_write_request32 because + * we don't yet know if is64bit is zero + */ if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) -#endif return -EINVAL; kbuf = kzalloc(count + 1, GFP_NOFS); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index b906ed17a839..9802de0f85e6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -281,6 +281,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int lvb_needs_unlock = 0; int error; if (gl->gl_lksb.sb_lkid == 0) { @@ -294,8 +295,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_update_request_times(gl); /* don't want to skip dlm_unlock writing the lvb when lock is ex */ + + if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE)) + lvb_needs_unlock = 1; + if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { + !lvb_needs_unlock) { gfs2_glock_free(gl); return; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index dd057bc6b65b..fc8dc20fdeb9 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -177,11 +177,31 @@ out_nofree: return mnt; } +static int +nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_getattr(mnt, dentry, stat); + generic_fillattr(dentry->d_inode, stat); + return 0; +} + +static int +nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_setattr(dentry, attr); + return -EACCES; +} + const struct inode_operations nfs_mountpoint_inode_operations = { .getattr = nfs_getattr, + .setattr = nfs_setattr, }; const struct inode_operations nfs_referral_inode_operations = { + .getattr = nfs_namespace_getattr, + .setattr = nfs_namespace_setattr, }; static void nfs_expire_automounts(struct work_struct *work) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index acc347268124..2e9779b58b7a 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -236,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; + nfs_put_client(clp); if (clp != old) { clp->cl_preserve_clid = true; - nfs_put_client(clp); clp = old; - atomic_inc(&clp->cl_count); } return clp; @@ -306,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new, .clientid = new->cl_clientid, .confirm = new->cl_confirm, }; - int status; + int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { @@ -332,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new, if (prev) nfs_put_client(prev); + prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); - if (status == 0) { + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + break; + case 0: nfs4_swap_callback_idents(pos, new); - nfs_put_client(pos); + prev = NULL; *result = pos; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - return 0; - } - if (status != -NFS4ERR_STALE_CLIENTID) { - nfs_put_client(pos); - dprintk("NFS: <-- %s status = %d, no result\n", - __func__, status); - return status; + default: + goto out; } spin_lock(&nn->nfs_client_lock); - prev = pos; } + spin_unlock(&nn->nfs_client_lock); - /* - * No matching nfs_client found. This should be impossible, - * because the new nfs_client has already been added to - * nfs_client_list by nfs_get_client(). - * - * Don't BUG(), since the caller is holding a mutex. - */ + /* No match found. The server lost our clientid */ +out: if (prev) nfs_put_client(prev); - spin_unlock(&nn->nfs_client_lock); - pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); - return -NFS4ERR_STALE_CLIENTID; + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; } #ifdef CONFIG_NFS_V4_1 @@ -432,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new, { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); struct nfs_client *pos, *n, *prev = NULL; - int error; + int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { @@ -448,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; - error = nfs_wait_client_init_complete(pos); - if (error < 0) { + nfs4_schedule_lease_recovery(pos); + status = nfs_wait_client_init_complete(pos); + if (status < 0) { nfs_put_client(pos); spin_lock(&nn->nfs_client_lock); continue; } - + status = pos->cl_cons_state; spin_lock(&nn->nfs_client_lock); + if (status < 0) + continue; } if (pos->rpc_ops != new->rpc_ops) @@ -473,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new, if (!nfs4_match_serverowners(pos, new)) continue; + atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); @@ -481,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new, return 0; } - /* - * No matching nfs_client found. This should be impossible, - * because the new nfs_client has already been added to - * nfs_client_list by nfs_get_client(). - * - * Don't BUG(), since the caller is holding a mutex. - */ + /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); - pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); - return -NFS4ERR_STALE_CLIENTID; + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9448c579d41a..e61f68d5ef21 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp, clp->cl_confirm = clid.confirm; status = nfs40_walk_client_list(clp, result, cred); - switch (status) { - case -NFS4ERR_STALE_CLIENTID: - set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - case 0: + if (status == 0) { /* Sustain the lease, even if it's empty. If the clientid4 * goes stale it's of no use for trunking discovery. */ nfs4_schedule_state_renewal(*result); - break; } - out: return status; } @@ -1863,6 +1858,7 @@ again: case -ETIMEDOUT: case -EAGAIN: ssleep(1); + case -NFS4ERR_STALE_CLIENTID: dprintk("NFS: %s after status %d, retrying\n", __func__, status); goto again; @@ -2022,8 +2018,18 @@ static int nfs4_reset_session(struct nfs_client *clp) nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); status = nfs4_proc_destroy_session(clp->cl_session, cred); - if (status && status != -NFS4ERR_BADSESSION && - status != -NFS4ERR_DEADSESSION) { + switch (status) { + case 0: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + break; + case -NFS4ERR_BACK_CHAN_BUSY: + case -NFS4ERR_DELAY: + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + status = 0; + ssleep(1); + goto out; + default: status = nfs4_recovery_handle_error(clp, status); goto out; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2e7e8c878e5d..b056b1628722 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2589,27 +2589,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; - int error; - dprintk("--> nfs_xdev_mount_common()\n"); + dprintk("--> nfs_xdev_mount()\n"); mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_err; - } - mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); - dprintk("<-- nfs_xdev_mount_common() = 0\n"); -out: - return mntroot; + if (IS_ERR(server)) + mntroot = ERR_CAST(server); + else + mntroot = nfs_fs_mount_common(server, flags, + dev_name, &mount_info, nfs_mod); -out_err: - dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); - goto out; + dprintk("<-- nfs_xdev_mount() = %ld\n", + IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); + return mntroot; } #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index fdb180769485..f3859354e41a 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " "cannot read source blocks: err=%d\n", ret); - else + else { + if (nilfs_sb_need_update(nilfs)) + set_nilfs_discontinued(nilfs); ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + } nilfs_remove_all_gcinodes(nilfs); clear_nilfs_gc_running(nilfs); diff --git a/fs/proc/array.c b/fs/proc/array.c index 6a91e6ffbcbd..f7ed9ee46eb9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, do { min_flt += t->min_flt; maj_flt += t->maj_flt; - gtime += t->gtime; + gtime += task_gtime(t); t = next_thread(t); } while (t != task); @@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt = task->min_flt; maj_flt = task->maj_flt; task_cputime_adjusted(task, &utime, &stime); - gtime = task->gtime; + gtime = task_gtime(task); } /* scale priority and nice values from timeslices to -20..20 */ diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index fe72cd073dea..3131a03d7d37 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -177,20 +177,6 @@ const struct file_operations proc_net_operations = { .readdir = proc_tgid_net_readdir, }; - -struct proc_dir_entry *proc_net_fops_create(struct net *net, - const char *name, umode_t mode, const struct file_operations *fops) -{ - return proc_create(name, mode, net->proc_net, fops); -} -EXPORT_SYMBOL_GPL(proc_net_fops_create); - -void proc_net_remove(struct net *net, const char *name) -{ - remove_proc_entry(name, net->proc_net); -} -EXPORT_SYMBOL_GPL(proc_net_remove); - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 7003e5266f25..288f068740f6 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) { char *hdr; - struct timeval timestamp; + struct timespec timestamp; size_t len; - do_gettimeofday(×tamp); + /* Report zeroed timestamp if called before timekeeping has resumed. */ + if (__getnstimeofday(×tamp)) { + timestamp.tv_sec = 0; + timestamp.tv_nsec = 0; + } hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", - (long)timestamp.tv_sec, (long)timestamp.tv_usec); + (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000)); WARN_ON_ONCE(!hdr); len = hdr ? strlen(hdr) : 0; persistent_ram_write(prz, hdr, len); diff --git a/fs/select.c b/fs/select.c index 2ef72d965036..8c1c96c27062 100644 --- a/fs/select.c +++ b/fs/select.c @@ -26,6 +26,7 @@ #include <linux/fs.h> #include <linux/rcupdate.h> #include <linux/hrtimer.h> +#include <linux/sched/rt.h> #include <asm/uaccess.h> diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 2df555c66d57..aec3d5c98c94 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj, } EXPORT_SYMBOL_GPL(sysfs_unmerge_group); +/** + * sysfs_add_link_to_group - add a symlink to an attribute group. + * @kobj: The kobject containing the group. + * @group_name: The name of the group. + * @target: The target kobject of the symlink to create. + * @link_name: The name of the symlink to create. + */ +int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, + struct kobject *target, const char *link_name) +{ + struct sysfs_dirent *dir_sd; + int error = 0; + + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); + if (!dir_sd) + return -ENOENT; + + error = sysfs_create_link_sd(dir_sd, target, link_name); + sysfs_put(dir_sd); + + return error; +} +EXPORT_SYMBOL_GPL(sysfs_add_link_to_group); + +/** + * sysfs_remove_link_from_group - remove a symlink from an attribute group. + * @kobj: The kobject containing the group. + * @group_name: The name of the group. + * @link_name: The name of the symlink to remove. + */ +void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, + const char *link_name) +{ + struct sysfs_dirent *dir_sd; + + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); + if (dir_sd) { + sysfs_hash_and_remove(dir_sd, NULL, link_name); + sysfs_put(dir_sd); + } +} +EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); EXPORT_SYMBOL_GPL(sysfs_create_group); EXPORT_SYMBOL_GPL(sysfs_update_group); diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 3c9eb5624f5e..8c940df97a52 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -21,26 +21,17 @@ #include "sysfs.h" -static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, - const char *name, int warn) +static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, + struct kobject *target, + const char *name, int warn) { - struct sysfs_dirent *parent_sd = NULL; struct sysfs_dirent *target_sd = NULL; struct sysfs_dirent *sd = NULL; struct sysfs_addrm_cxt acxt; enum kobj_ns_type ns_type; int error; - BUG_ON(!name); - - if (!kobj) - parent_sd = &sysfs_root; - else - parent_sd = kobj->sd; - - error = -EFAULT; - if (!parent_sd) - goto out_put; + BUG_ON(!name || !parent_sd); /* target->sd can go away beneath us but is protected with * sysfs_assoc_lock. Fetch target_sd from it. @@ -96,6 +87,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, } /** + * sysfs_create_link_sd - create symlink to a given object. + * @sd: directory we're creating the link in. + * @target: object we're pointing to. + * @name: name of the symlink. + */ +int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, + const char *name) +{ + return sysfs_do_create_link_sd(sd, target, name, 1); +} + +static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, + const char *name, int warn) +{ + struct sysfs_dirent *parent_sd = NULL; + + if (!kobj) + parent_sd = &sysfs_root; + else + parent_sd = kobj->sd; + + if (!parent_sd) + return -EFAULT; + + return sysfs_do_create_link_sd(parent_sd, target, name, warn); +} + +/** * sysfs_create_link - create symlink between two objects. * @kobj: object whose directory we're creating the link in. * @target: object we're pointing to. diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index d73c0932bbd6..d1e4043eb0c3 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd); * symlink.c */ extern const struct inode_operations sysfs_symlink_inode_operations; +int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target, + const char *name); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4111a40ebe1a..5f707e537171 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -86,11 +86,11 @@ xfs_destroy_ioend( } if (ioend->io_iocb) { + inode_dio_done(ioend->io_inode); if (ioend->io_isasync) { aio_complete(ioend->io_iocb, ioend->io_error ? ioend->io_error : ioend->io_result, 0); } - inode_dio_done(ioend->io_inode); } mempool_free(ioend, xfs_ioend_pool); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 0e92d12765d2..cdb2d3348583 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4680,9 +4680,6 @@ __xfs_bmapi_allocate( return error; } - if (bma->flags & XFS_BMAPI_STACK_SWITCH) - bma->stack_switch = 1; - error = xfs_bmap_alloc(bma); if (error) return error; @@ -4956,6 +4953,9 @@ xfs_bmapi_write( bma.flist = flist; bma.firstblock = firstblock; + if (flags & XFS_BMAPI_STACK_SWITCH) + bma.stack_switch = 1; + while (bno < end && n < *nmap) { inhole = eof || bma.got.br_startoff > bno; wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 56d1614760cf..fbbb9eb92e32 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -487,6 +487,7 @@ _xfs_buf_find( struct rb_node *parent; xfs_buf_t *bp; xfs_daddr_t blkno = map[0].bm_bn; + xfs_daddr_t eofs; int numblks = 0; int i; @@ -498,6 +499,23 @@ _xfs_buf_find( ASSERT(!(numbytes < (1 << btp->bt_sshift))); ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); + /* + * Corrupted block numbers can get through to here, unfortunately, so we + * have to check that the buffer falls within the filesystem bounds. + */ + eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); + if (blkno >= eofs) { + /* + * XXX (dgc): we should really be returning EFSCORRUPTED here, + * but none of the higher level infrastructure supports + * returning a specific error on buffer lookup failures. + */ + xfs_alert(btp->bt_mount, + "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", + __func__, blkno, eofs); + return NULL; + } + /* get tree root */ pag = xfs_perag_get(btp->bt_mount, xfs_daddr_to_agno(btp->bt_mount, blkno)); @@ -1487,6 +1505,8 @@ restart: while (!list_empty(&btp->bt_lru)) { bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); if (atomic_read(&bp->b_hold) > 1) { + trace_xfs_buf_wait_buftarg(bp, _RET_IP_); + list_move_tail(&bp->b_lru, &btp->bt_lru); spin_unlock(&btp->bt_lru_lock); delay(100); goto restart; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 77b09750e92c..3f9949fee391 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -652,7 +652,10 @@ xfs_buf_item_unlock( /* * If the buf item isn't tracking any data, free it, otherwise drop the - * reference we hold to it. + * reference we hold to it. If we are aborting the transaction, this may + * be the only reference to the buf item, so we free it anyway + * regardless of whether it is dirty or not. A dirty abort implies a + * shutdown, anyway. */ clean = 1; for (i = 0; i < bip->bli_format_count; i++) { @@ -664,7 +667,12 @@ xfs_buf_item_unlock( } if (clean) xfs_buf_item_relse(bp); - else + else if (aborted) { + if (atomic_dec_and_test(&bip->bli_refcount)) { + ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); + xfs_buf_item_relse(bp); + } + } else atomic_dec(&bip->bli_refcount); if (!hold) diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index d0e9c74d3d96..a8bd26b82ecb 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -246,10 +246,10 @@ xfs_swap_extents( goto out_unlock; } - error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); + error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); if (error) goto out_unlock; - truncate_pagecache_range(VFS_I(ip), 0, -1); + truncate_pagecache_range(VFS_I(tip), 0, -1); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index add06b4e9a63..364818eef40e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -351,6 +351,15 @@ xfs_iomap_prealloc_size( } if (shift) alloc_blocks >>= shift; + + /* + * If we are still trying to allocate more space than is + * available, squash the prealloc hard. This can happen if we + * have a large file on a small filesystem and the above + * lowspace thresholds are smaller than MAXEXTLEN. + */ + while (alloc_blocks >= freesp) + alloc_blocks >>= 4; } if (alloc_blocks < mp->m_writeio_blocks) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index da508463ff10..7d6df7c00c36 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify( return; } /* quietly fail */ - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_buf_ioerror(bp, EWRONGFS); } static void diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2e137d4a85ae..16a812977eab 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_item_iodone); DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); DEFINE_BUF_EVENT(xfs_buf_error_relse); +DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); DEFINE_BUF_EVENT(xfs_trans_read_buf_io); DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); |