diff options
Diffstat (limited to 'fs')
53 files changed, 509 insertions, 269 deletions
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 862fbc206755..564a7de17d99 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -378,7 +378,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device); if (ret) - btrfs_error(root->fs_info, ret, "kobj add dev failed"); + btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); printk_in_rcu(KERN_INFO "BTRFS: dev_replace from %s (devid %llu) to %s started\n", diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a9aadb2ad525..f556c3732c2c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2842,6 +2842,7 @@ int open_ctree(struct super_block *sb, !extent_buffer_uptodate(chunk_root->node)) { printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", sb->s_id); + chunk_root->node = NULL; goto fail_tree_roots; } btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); @@ -2879,7 +2880,7 @@ retry_root_backup: !extent_buffer_uptodate(tree_root->node)) { printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", sb->s_id); - + tree_root->node = NULL; goto recovery_tree_root; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1c2bd1723e40..07204bf601ed 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2296,9 +2296,22 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, static inline struct btrfs_delayed_ref_node * select_delayed_ref(struct btrfs_delayed_ref_head *head) { + struct btrfs_delayed_ref_node *ref; + if (list_empty(&head->ref_list)) return NULL; + /* + * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first. + * This is to prevent a ref count from going down to zero, which deletes + * the extent item from the extent tree, when there still are references + * to add, which would fail because they would not find the extent item. + */ + list_for_each_entry(ref, &head->ref_list, list) { + if (ref->action == BTRFS_ADD_DELAYED_REF) + return ref; + } + return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node, list); } @@ -4214,6 +4227,24 @@ out: space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); mutex_unlock(&fs_info->chunk_mutex); + /* + * When we allocate a new chunk we reserve space in the chunk block + * reserve to make sure we can COW nodes/leafs in the chunk tree or + * add new nodes/leafs to it if we end up needing to do it when + * inserting the chunk item and updating device items as part of the + * second phase of chunk allocation, performed by + * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a + * large number of new block groups to create in our transaction + * handle's new_bgs list to avoid exhausting the chunk block reserve + * in extreme cases - like having a single transaction create many new + * block groups when starting to write out the free space caches of all + * the block groups that were made dirty during the lifetime of the + * transaction. + */ + if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) { + btrfs_create_pending_block_groups(trans, trans->root); + btrfs_trans_release_chunk_metadata(trans); + } return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b33c0cf02668..e33dff356460 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4209,7 +4209,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; - u64 last_size = (u64)-1; + u64 last_size = new_size; u32 found_type = (u8)-1; int found_extent; int del_item; @@ -4493,8 +4493,7 @@ out: btrfs_abort_transaction(trans, root, ret); } error: - if (last_size != (u64)-1 && - root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) btrfs_ordered_update_i_size(inode, last_size, NULL); btrfs_free_path(path); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5d91776e12a2..0770c91586ca 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3090,7 +3090,7 @@ out_unlock: static long btrfs_ioctl_file_extent_same(struct file *file, struct btrfs_ioctl_same_args __user *argp) { - struct btrfs_ioctl_same_args *same; + struct btrfs_ioctl_same_args *same = NULL; struct btrfs_ioctl_same_extent_info *info; struct inode *src = file_inode(file); u64 off; @@ -3120,6 +3120,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, if (IS_ERR(same)) { ret = PTR_ERR(same); + same = NULL; goto out; } @@ -3190,6 +3191,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, out: mnt_drop_write_file(file); + kfree(same); return ret; } @@ -3586,6 +3588,20 @@ process_slot: u64 trim = 0; u64 aligned_end = 0; + /* + * Don't copy an inline extent into an offset + * greater than zero. Having an inline extent + * at such an offset results in chaos as btrfs + * isn't prepared for such cases. Just skip + * this case for the same reasons as commented + * at btrfs_ioctl_clone(). + */ + if (last_dest_end > 0) { + ret = -EOPNOTSUPP; + btrfs_end_transaction(trans, root); + goto out; + } + if (off > key.offset) { skip = off - key.offset; new_key.offset += skip; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e9ace099162c..8a8202956576 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1651,6 +1651,11 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info, /* Exclusive -> exclusive, nothing changed */ } } + + /* For exclusive extent, free its reserved bytes too */ + if (nr_old_roots == 0 && nr_new_roots == 1 && + cur_new_count == nr_new_roots) + qg->reserved -= num_bytes; if (dirty) qgroup_dirty(fs_info, qg); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c0f18e7266b6..f5021fcb154e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -761,7 +761,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->ordered)) { spin_lock(&info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); spin_unlock(&info->trans_lock); } @@ -1866,7 +1866,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } spin_lock(&root->fs_info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); @@ -2152,7 +2152,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); - if (current != root->fs_info->transaction_kthread) + if (current != root->fs_info->transaction_kthread && + current != root->fs_info->cleaner_kthread) btrfs_run_delayed_iputs(root); return ret; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index dc10c9dd36c1..ddd5e9471290 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1506,7 +1506,6 @@ static int __mark_caps_flushing(struct inode *inode, swap(cf, ci->i_prealloc_cap_flush); cf->caps = flushing; - cf->kick = false; spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); @@ -2123,8 +2122,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, static int __kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, - struct ceph_inode_info *ci, - bool kick_all) + struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; @@ -2150,9 +2148,7 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc, for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { cf = rb_entry(n, struct ceph_cap_flush, i_node); - if (cf->tid < first_tid) - continue; - if (kick_all || cf->kick) + if (cf->tid >= first_tid) break; } if (!n) { @@ -2161,7 +2157,6 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc, } cf = rb_entry(n, struct ceph_cap_flush, i_node); - cf->kick = false; first_tid = cf->tid + 1; @@ -2181,8 +2176,6 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, { struct ceph_inode_info *ci; struct ceph_cap *cap; - struct ceph_cap_flush *cf; - struct rb_node *n; dout("early_kick_flushing_caps mds%d\n", session->s_mds); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { @@ -2205,16 +2198,11 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, if ((cap->issued & ci->i_flushing_caps) != ci->i_flushing_caps) { spin_unlock(&ci->i_ceph_lock); - if (!__kick_flushing_caps(mdsc, session, ci, true)) + if (!__kick_flushing_caps(mdsc, session, ci)) continue; spin_lock(&ci->i_ceph_lock); } - for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { - cf = rb_entry(n, struct ceph_cap_flush, i_node); - cf->kick = true; - } - spin_unlock(&ci->i_ceph_lock); } } @@ -2228,7 +2216,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, dout("kick_flushing_caps mds%d\n", session->s_mds); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { - int delayed = __kick_flushing_caps(mdsc, session, ci, false); + int delayed = __kick_flushing_caps(mdsc, session, ci); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); @@ -2261,7 +2249,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, spin_unlock(&ci->i_ceph_lock); - delayed = __kick_flushing_caps(mdsc, session, ci, true); + delayed = __kick_flushing_caps(mdsc, session, ci); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 4347039ecc18..6706bde9ad1b 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -287,7 +287,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, return 0; spin_lock(&ctx->flc_lock); - list_for_each_entry(lock, &ctx->flc_flock, fl_list) { + list_for_each_entry(lock, &ctx->flc_posix, fl_list) { ++seen_fcntl; if (seen_fcntl > num_fcntl_locks) { err = -ENOSPC; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 860cc016e70d..2f2460d23a06 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -189,7 +189,6 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) struct ceph_cap_flush { u64 tid; int caps; - bool kick; struct rb_node g_node; // global union { struct rb_node i_node; // inode diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 4d6a30e76168..b863a09cd2f1 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -115,7 +115,7 @@ void config_item_init_type_name(struct config_item *item, const char *name, struct config_item_type *type) { - config_item_set_name(item, name); + config_item_set_name(item, "%s", name); item->ci_type = type; config_item_init(item); } @@ -124,7 +124,7 @@ EXPORT_SYMBOL(config_item_init_type_name); void config_group_init_type_name(struct config_group *group, const char *name, struct config_item_type *type) { - config_item_set_name(&group->cg_item, name); + config_item_set_name(&group->cg_item, "%s", name); group->cg_item.ci_type = type; config_group_init(group); } @@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault * @get_block: The filesystem method used to translate file offsets to blocks + * @complete_unwritten: The filesystem method used to convert unwritten blocks + * to written so the data written to them is exposed. This is required for + * required by write faults for filesystems that will return unwritten + * extent mappings from @get_block, but it is optional for reads as + * dax_insert_mapping() will always zero unwritten blocks. If the fs does + * not support unwritten extents, the it should pass NULL. * * When a page fault occurs, filesystems may call this helper in their * fault handler for DAX files. __dax_fault() assumes the caller has done all @@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, * as for normal BH based IO completions. */ error = dax_insert_mapping(inode, &bh, vma, vmf); - if (buffer_unwritten(&bh)) - complete_unwritten(&bh, !error); + if (buffer_unwritten(&bh)) { + if (complete_unwritten) + complete_unwritten(&bh, !error); + else + WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); + } out: if (error == -ENOMEM) diff --git a/fs/dcache.c b/fs/dcache.c index 5c8ea15e73a5..9b5fe503f6cb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3442,22 +3442,15 @@ void __init vfs_caches_init_early(void) inode_init_early(); } -void __init vfs_caches_init(unsigned long mempages) +void __init vfs_caches_init(void) { - unsigned long reserve; - - /* Base hash sizes on available memory, with a reserve equal to - 150% of current kernel size */ - - reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); - mempages -= reserve; - names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); dcache_init(); inode_init(); - files_init(mempages); + files_init(); + files_maxfiles_init(); mnt_init(); bdev_cache_init(); chrdev_init(); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9bedfa8dd3a5..f71e19a9dd3c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2072,8 +2072,6 @@ static int f2fs_set_data_page_dirty(struct page *page) return 1; } - mark_inode_dirty(inode); - if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); update_dirty_page(inode, page); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ada2a3dd701a..b0f38c3b37f4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1331,12 +1331,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (ret) return ret; - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); commit_inmem_pages(inode, false); + } ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); mnt_drop_write_file(filp); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return ret; } @@ -1387,8 +1388,8 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) f2fs_balance_fs(F2FS_I_SB(inode)); if (f2fs_is_atomic_file(inode)) { - commit_inmem_pages(inode, false); clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + commit_inmem_pages(inode, false); } if (f2fs_is_volatile_file(inode)) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e1e73617d13b..22fb5ef37966 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -556,27 +556,39 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) if (!fio.encrypted_page) goto put_out; - f2fs_submit_page_bio(&fio); + err = f2fs_submit_page_bio(&fio); + if (err) + goto put_page_out; + + /* write page */ + lock_page(fio.encrypted_page); + + if (unlikely(!PageUptodate(fio.encrypted_page))) + goto put_page_out; + if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) + goto put_page_out; + + set_page_dirty(fio.encrypted_page); + f2fs_wait_on_page_writeback(fio.encrypted_page, META); + if (clear_page_dirty_for_io(fio.encrypted_page)) + dec_page_count(fio.sbi, F2FS_DIRTY_META); + + set_page_writeback(fio.encrypted_page); /* allocate block address */ f2fs_wait_on_page_writeback(dn.node_page, NODE); - allocate_data_block(fio.sbi, NULL, fio.blk_addr, &fio.blk_addr, &sum, CURSEG_COLD_DATA); - dn.data_blkaddr = fio.blk_addr; - - /* write page */ - lock_page(fio.encrypted_page); - set_page_writeback(fio.encrypted_page); fio.rw = WRITE_SYNC; f2fs_submit_page_mbio(&fio); + dn.data_blkaddr = fio.blk_addr; set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); if (page->index == 0) set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); - +put_page_out: f2fs_put_page(fio.encrypted_page, 1); put_out: f2fs_put_dnode(&dn); @@ -605,8 +617,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .page = page, .encrypted_page = NULL, }; + set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA); - if (clear_page_dirty_for_io(page)) inode_dec_dirty_pages(inode); set_cold_data(page); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 38e75fb1e488..a13ffcc32992 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -141,6 +141,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) kunmap_atomic(dst_addr); SetPageUptodate(page); no_update: + set_page_dirty(page); + /* clear dirty state */ dirty = clear_page_dirty_for_io(page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1eb343768781..61b97f9cb9f6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -257,6 +257,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) if (!abort) { lock_page(cur->page); if (cur->page->mapping == inode->i_mapping) { + set_page_dirty(cur->page); f2fs_wait_on_page_writeback(cur->page, DATA); if (clear_page_dirty_for_io(cur->page)) inode_dec_dirty_pages(inode); diff --git a/fs/file_table.c b/fs/file_table.c index 7f9d407c7595..ad17e05ebf95 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -25,6 +25,7 @@ #include <linux/hardirq.h> #include <linux/task_work.h> #include <linux/ima.h> +#include <linux/swap.h> #include <linux/atomic.h> @@ -308,19 +309,24 @@ void put_filp(struct file *file) } } -void __init files_init(unsigned long mempages) +void __init files_init(void) { - unsigned long n; - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + percpu_counter_init(&nr_files, 0, GFP_KERNEL); +} - /* - * One file with associated inode and dcache is very roughly 1K. - * Per default don't use more than 10% of our memory for files. - */ +/* + * One file with associated inode and dcache is very roughly 1K. Per default + * do not use more than 10% of our memory for files. + */ +void __init files_maxfiles_init(void) +{ + unsigned long n; + unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2; + + memreserve = min(memreserve, totalram_pages - 1); + n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; - n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f0520bcf2094..518c6294bf6c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -702,6 +702,7 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page, else wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); } +EXPORT_SYMBOL_GPL(wbc_account_io); /** * inode_congested - test whether an inode is congested diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 80cc1b35d460..ebb5e37455a0 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2246,7 +2246,15 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, err = -EINVAL; if (old) { - struct fuse_dev *fud = fuse_get_dev(old); + struct fuse_dev *fud = NULL; + + /* + * Check against file->f_op because CUSE + * uses the same ioctl handler. + */ + if (old->f_op == file->f_op && + old->f_cred->user_ns == file->f_cred->user_ns) + fud = fuse_get_dev(old); if (fud) { mutex_lock(&fuse_mutex); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 0cf74df68617..973c24ce59ad 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1010,6 +1010,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0); if (!inode) goto out_dentry; + if (creat_flags == HUGETLB_SHMFS_INODE) + inode->i_flags |= S_PRIVATE; file = ERR_PTR(-ENOMEM); if (hugetlb_reserve_pages(inode, 0, diff --git a/fs/jfs/file.c b/fs/jfs/file.c index e98d39d75cf4..b9dc23cd04f2 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -76,7 +76,7 @@ static int jfs_open(struct inode *inode, struct file *file) if (ji->active_ag == -1) { struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); - atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); + atomic_inc(&jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 6f1cb2b5ee28..41aa3ca6a6a4 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -134,11 +134,11 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) * It has been committed since the last change, but was still * on the dirty inode list. */ - if (!test_cflag(COMMIT_Dirty, inode)) { + if (!test_cflag(COMMIT_Dirty, inode)) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait); return 0; - } + } if (jfs_commit_inode(inode, wait)) { jfs_err("jfs_write_inode: jfs_commit_inode failed!"); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index e33be921aa41..a5ac97b9a933 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1160,7 +1160,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = dtModify(tid, new_dir, &new_dname, &ino, old_ip->i_ino, JFS_RENAME); if (rc) - goto out4; + goto out_tx; drop_nlink(new_ip); if (S_ISDIR(new_ip->i_mode)) { drop_nlink(new_ip); @@ -1185,7 +1185,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if ((new_size = commitZeroLink(tid, new_ip)) < 0) { txAbort(tid, 1); /* Marks FS Dirty */ rc = new_size; - goto out4; + goto out_tx; } tblk = tid_to_tblock(tid); tblk->xflag |= COMMIT_DELETE; @@ -1203,7 +1203,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (rc) { jfs_err("jfs_rename didn't expect dtSearch to fail " "w/rc = %d", rc); - goto out4; + goto out_tx; } ino = old_ip->i_ino; @@ -1211,7 +1211,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (rc) { if (rc == -EIO) jfs_err("jfs_rename: dtInsert returned -EIO"); - goto out4; + goto out_tx; } if (S_ISDIR(old_ip->i_mode)) inc_nlink(new_dir); @@ -1226,7 +1226,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_err("jfs_rename did not expect dtDelete to return rc = %d", rc); txAbort(tid, 1); /* Marks Filesystem dirty */ - goto out4; + goto out_tx; } if (S_ISDIR(old_ip->i_mode)) { drop_nlink(old_dir); @@ -1285,7 +1285,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = txCommit(tid, ipcount, iplist, commit_flag); - out4: + out_tx: txEnd(tid); if (new_ip) mutex_unlock(&JFS_IP(new_ip)->commit_mutex); @@ -1308,13 +1308,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (new_ip && (new_ip->i_nlink == 0)) set_cflag(COMMIT_Nolink, new_ip); - out3: - free_UCSname(&new_dname); - out2: - free_UCSname(&old_dname); - out1: - if (new_ip && !S_ISDIR(new_ip->i_mode)) - IWRITE_UNLOCK(new_ip); /* * Truncating the directory index table is not guaranteed. It * may need to be done iteratively @@ -1325,7 +1318,13 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, clear_cflag(COMMIT_Stale, old_dir); } - + if (new_ip && !S_ISDIR(new_ip->i_mode)) + IWRITE_UNLOCK(new_ip); + out3: + free_UCSname(&new_dname); + out2: + free_UCSname(&old_dname); + out1: jfs_info("jfs_rename: returning %d", rc); return rc; } diff --git a/fs/locks.c b/fs/locks.c index 653faabb07f4..d3d558ba4da7 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -862,12 +862,11 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ -static int flock_lock_file(struct file *filp, struct file_lock *request) +static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; - struct inode *inode = file_inode(filp); int error = 0; bool found = false; LIST_HEAD(dispose); @@ -890,7 +889,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto find_conflict; list_for_each_entry(fl, &ctx->flc_flock, fl_list) { - if (filp != fl->fl_file) + if (request->fl_file != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; @@ -1164,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl, EXPORT_SYMBOL(posix_lock_file); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to + * posix_lock_inode_wait - Apply a POSIX-style lock to a file + * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address + * Variant of posix_lock_file_wait that does not take a filp, and so can be + * used after the filp has already been torn down. */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { - error = posix_lock_file(filp, fl, NULL); + error = __posix_lock_file(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1189,7 +1187,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } -EXPORT_SYMBOL(posix_lock_file_wait); +EXPORT_SYMBOL(posix_lock_inode_wait); /** * locks_mandatory_locked - Check for an active lock @@ -1851,18 +1849,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) } /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to + * flock_lock_inode_wait - Apply a FLOCK-style lock to a file + * @inode: inode of the file to apply to * @fl: The lock to be applied * - * Add a FLOCK style lock to a file. + * Apply a FLOCK style lock request to an inode. */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { - error = flock_lock_file(filp, fl); + error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1874,8 +1872,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } - -EXPORT_SYMBOL(flock_lock_file_wait); +EXPORT_SYMBOL(flock_lock_inode_wait); /** * sys_flock: - flock() system call. @@ -2401,7 +2398,8 @@ locks_remove_flock(struct file *filp) .fl_type = F_UNLCK, .fl_end = OFFSET_MAX, }; - struct file_lock_context *flctx = file_inode(filp)->i_flctx; + struct inode *inode = file_inode(filp); + struct file_lock_context *flctx = inode->i_flctx; if (list_empty(&flctx->flc_flock)) return; @@ -2409,7 +2407,7 @@ locks_remove_flock(struct file *filp) if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else - flock_lock_file(filp, &fl); + flock_lock_inode(inode, &fl); if (fl.fl_ops && fl.fl_ops->fl_release_private) fl.fl_ops->fl_release_private(&fl); diff --git a/fs/namei.c b/fs/namei.c index ae4e4c18b2ac..1c2105ed20c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -879,7 +879,7 @@ static inline int may_follow_link(struct nameidata *nd) return 0; /* Allowed if parent directory not sticky and world-writable. */ - parent = nd->path.dentry->d_inode; + parent = nd->inode; if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) return 0; @@ -1954,8 +1954,13 @@ OK: continue; } } - if (unlikely(!d_can_lookup(nd->path.dentry))) + if (unlikely(!d_can_lookup(nd->path.dentry))) { + if (nd->flags & LOOKUP_RCU) { + if (unlazy_walk(nd, NULL, 0)) + return -ECHILD; + } return -ENOTDIR; + } } } diff --git a/fs/namespace.c b/fs/namespace.c index c7cb8a526c05..2b8aa15fd6df 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1361,6 +1361,36 @@ enum umount_tree_flags { UMOUNT_PROPAGATE = 2, UMOUNT_CONNECTED = 4, }; + +static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how) +{ + /* Leaving mounts connected is only valid for lazy umounts */ + if (how & UMOUNT_SYNC) + return true; + + /* A mount without a parent has nothing to be connected to */ + if (!mnt_has_parent(mnt)) + return true; + + /* Because the reference counting rules change when mounts are + * unmounted and connected, umounted mounts may not be + * connected to mounted mounts. + */ + if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) + return true; + + /* Has it been requested that the mount remain connected? */ + if (how & UMOUNT_CONNECTED) + return false; + + /* Is the mount locked such that it needs to remain connected? */ + if (IS_MNT_LOCKED(mnt)) + return false; + + /* By default disconnect the mount */ + return true; +} + /* * mount_lock must be held * namespace_sem must be held for write @@ -1398,10 +1428,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - disconnect = !(((how & UMOUNT_CONNECTED) && - mnt_has_parent(p) && - (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) || - IS_MNT_LOCKED_AND_LAZY(p)); + disconnect = disconnect_mount(p, how); pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, disconnect ? &unmounted : NULL); @@ -1538,11 +1565,8 @@ void __detach_mounts(struct dentry *dentry) while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { - struct mount *p, *tmp; - list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { - hlist_add_head(&p->mnt_umount.s_list, &unmounted); - umount_mnt(p); - } + hlist_add_head(&mnt->mnt_umount.s_list, &unmounted); + umount_mnt(mnt); } else umount_tree(mnt, UMOUNT_CONNECTED); } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ecebb406cc1a..4a90c9bb3135 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -775,7 +775,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c12951b9551e..b3289d701eea 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1852,7 +1852,7 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, struct nfs42_layoutstat_devinfo *devinfo; int i; - for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + for (i = 0; i < FF_LAYOUT_MIRROR_COUNT(pls); i++) { if (*dev_count >= dev_limit) break; mirror = FF_LAYOUT_COMP(pls, i); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b77b328a06d7..0adc7d245b3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -442,8 +442,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; - else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else @@ -1244,9 +1245,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->nrequests == 0) + if (cur_size != new_isize) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } + if (nfsi->nrequests != 0) + invalid &= ~NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) @@ -1684,13 +1687,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_PAGECACHE; + | NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else if (server->caps & NFS_CAP_CHANGE_ATTR) + } else nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { @@ -1717,7 +1719,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - invalid &= ~NFS_INO_REVAL_PAGECACHE; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7e3c4604bea8..9b372b845f6a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,6 +296,22 @@ extern struct rpc_procinfo nfs4_procedures[]; #ifdef CONFIG_NFS_V4_SECURITY_LABEL extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); +static inline struct nfs4_label * +nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) +{ + if (!dst || !src) + return NULL; + + if (src->len > NFS4_MAXLABELLEN) + return NULL; + + dst->lfs = src->lfs; + dst->pi = src->pi; + dst->len = src->len; + memcpy(dst->label, src->label, src->len); + + return dst; +} static inline void nfs4_label_free(struct nfs4_label *label) { if (label) { @@ -316,6 +332,11 @@ static inline void nfs4_label_free(void *label) {} static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { } +static inline struct nfs4_label * +nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) +{ + return NULL; +} #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ /* proc.c */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index f486b80f927a..d731bbf974aa 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -135,7 +135,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return err; } -loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) +static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); struct nfs42_seek_args args = { @@ -171,6 +171,23 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } +loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) +{ + struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_llseek(filep, offset, whence); + if (err == -ENOTSUPP) + return -EOPNOTSUPP; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; +} + + static void nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6f228b5af819..3acb1eb72930 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -467,7 +467,10 @@ static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - do_renew_lease(server->nfs_client, timestamp); + struct nfs_client *clp = server->nfs_client; + + if (!nfs4_has_session(clp)) + do_renew_lease(clp, timestamp); } struct nfs4_call_sync_data { @@ -616,8 +619,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ - if (res->sr_status_flags != 0) - nfs4_schedule_lease_recovery(clp); + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); nfs41_update_target_slotid(slot->table, slot, res); break; case 1: @@ -910,6 +912,7 @@ struct nfs4_opendata { struct nfs_open_confirmres c_res; struct nfs4_string owner_name; struct nfs4_string group_name; + struct nfs4_label *a_label; struct nfs_fattr f_attr; struct nfs4_label *f_label; struct dentry *dir; @@ -1013,6 +1016,10 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (IS_ERR(p->f_label)) goto err_free_p; + p->a_label = nfs4_label_alloc(server, gfp_mask); + if (IS_ERR(p->a_label)) + goto err_free_f; + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask); if (IS_ERR(p->o_arg.seqid)) @@ -1041,7 +1048,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.label = label; + p->o_arg.label = nfs4_label_copy(p->a_label, label); p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: @@ -1074,6 +1081,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, return p; err_free_label: + nfs4_label_free(p->a_label); +err_free_f: nfs4_label_free(p->f_label); err_free_p: kfree(p); @@ -1093,6 +1102,7 @@ static void nfs4_opendata_free(struct kref *kref) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); + nfs4_label_free(p->a_label); nfs4_label_free(p->f_label); dput(p->dir); @@ -1198,12 +1208,15 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state, static void nfs_resync_open_stateid_locked(struct nfs4_state *state) { + if (!(state->n_wronly || state->n_rdonly || state->n_rdwr)) + return; if (state->n_wronly) set_bit(NFS_O_WRONLY_STATE, &state->flags); if (state->n_rdonly) set_bit(NFS_O_RDONLY_STATE, &state->flags); if (state->n_rdwr) set_bit(NFS_O_RDWR_STATE, &state->flags); + set_bit(NFS_OPEN_STATE, &state->flags); } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, @@ -5439,15 +5452,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) +static int do_vfs_lock(struct inode *inode, struct file_lock *fl) { int res = 0; switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX: - res = posix_lock_file_wait(file, fl); + res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK: - res = flock_lock_file_wait(file, fl); + res = flock_lock_inode_wait(inode, fl); break; default: BUG(); @@ -5484,7 +5497,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5496,7 +5508,6 @@ static void nfs4_locku_release_calldata(void *data) nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); - fput(calldata->fl.fl_file); kfree(calldata); } @@ -5509,7 +5520,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - do_vfs_lock(calldata->fl.fl_file, &calldata->fl); + do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl); if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; @@ -5617,7 +5628,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + if (do_vfs_lock(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -5758,7 +5769,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->timestamp); if (data->arg.new_lock) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { + if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) { rpc_restart_call_prepare(task); break; } @@ -6000,7 +6011,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); if (status < 0) goto out; down_read(&nfsi->rwsem); @@ -6008,7 +6019,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); up_read(&nfsi->rwsem); goto out; } @@ -7573,13 +7584,8 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) goto out; } ret = rpc_wait_for_completion_task(task); - if (!ret) { - struct nfs4_sequence_res *res = task->tk_msg.rpc_resp; - - if (task->tk_status == 0) - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + if (!ret) ret = task->tk_status; - } rpc_put_task(task); out: dprintk("<-- %s status=%d\n", __func__, ret); @@ -7967,16 +7973,17 @@ static void nfs4_layoutreturn_release(void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; + LIST_HEAD(freeme); dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); pnfs_clear_layoutreturn_waitbit(lo); - clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); - rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); + pnfs_free_lseg_list(&freeme); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); kfree(calldata); @@ -8590,7 +8597,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, .init_client = nfs40_init_client, .shutdown_client = nfs40_shutdown_client, @@ -8616,7 +8622,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, @@ -8639,7 +8644,6 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 605840dc89cf..f2e2ad894461 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2191,25 +2191,35 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp) } } -static void nfs41_handle_state_revoked(struct nfs_client *clp) +static void nfs41_handle_all_state_revoked(struct nfs_client *clp) { nfs4_reset_all_state(clp); dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname); } +static void nfs41_handle_some_state_revoked(struct nfs_client *clp) +{ + nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); + nfs4_schedule_state_manager(clp); + + dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname); +} + static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) { - /* This will need to handle layouts too */ - nfs_expire_all_delegations(clp); + /* FIXME: For now, we destroy all layouts. */ + pnfs_destroy_all_layouts(clp); + /* FIXME: For now, we test all delegations+open state+locks. */ + nfs41_handle_some_state_revoked(clp); dprintk("%s: Recallable state revoked on server %s!\n", __func__, clp->cl_hostname); } static void nfs41_handle_backchannel_fault(struct nfs_client *clp) { - nfs_expire_all_delegations(clp); - if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) - nfs4_schedule_state_manager(clp); + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + nfs4_schedule_state_manager(clp); + dprintk("%s: server %s declared a backchannel fault\n", __func__, clp->cl_hostname); } @@ -2231,10 +2241,11 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) nfs41_handle_server_reboot(clp); - if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | - SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | + if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED)) + nfs41_handle_all_state_revoked(clp); + if (flags & (SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | SEQ4_STATUS_ADMIN_STATE_REVOKED)) - nfs41_handle_state_revoked(clp); + nfs41_handle_some_state_revoked(clp); if (flags & SEQ4_STATUS_LEASE_MOVED) nfs4_schedule_lease_moved_recovery(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1da68d3b1eda..4984bbe55ff1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1100,8 +1100,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) mirror->pg_base = 0; mirror->pg_recoalesce = 0; - desc->pg_moreio = 0; - while (!list_empty(&head)) { struct nfs_page *req; @@ -1109,8 +1107,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; - if (desc->pg_error < 0) + if (desc->pg_error < 0) { + list_splice_tail(&head, &mirror->pg_list); + mirror->pg_recoalesce = 1; return 0; + } break; } } while (mirror->pg_recoalesce); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0ba9a02c9566..70bf706b1090 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, { struct pnfs_layout_segment *s; - if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) return false; list_for_each_entry(s, &lo->plh_segs, pls_list) @@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, return true; } +static bool +pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) +{ + if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return false; + lo->plh_return_iomode = 0; + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); + clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); + return true; +} + static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, struct pnfs_layout_hdr *lo, struct inode *inode) { @@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, if (pnfs_layout_need_return(lo, lseg)) { nfs4_stateid stateid; enum pnfs_iomode iomode; + bool send; stateid = lo->plh_stateid; iomode = lo->plh_return_iomode; - /* decreased in pnfs_send_layoutreturn() */ - lo->plh_block_lgets++; - lo->plh_return_iomode = 0; + send = pnfs_prepare_layoutreturn(lo); spin_unlock(&inode->i_lock); - pnfs_get_layout_hdr(lo); - - /* Send an async layoutreturn so we dont deadlock */ - pnfs_send_layoutreturn(lo, stateid, iomode, false); + if (send) { + /* Send an async layoutreturn so we dont deadlock */ + pnfs_send_layoutreturn(lo, stateid, iomode, false); + } } else spin_unlock(&inode->i_lock); } @@ -411,6 +422,10 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { + spin_unlock(&inode->i_lock); + return; + } pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); @@ -451,6 +466,8 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); if (atomic_dec_and_test(&lseg->pls_refcount)) { struct pnfs_layout_hdr *lo = lseg->pls_layout; + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + return; pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); pnfs_free_lseg_async(lseg); @@ -924,6 +941,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); } static int @@ -978,6 +996,7 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); nfs4_stateid stateid; int status = 0, empty; + bool send; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1007,17 +1026,18 @@ _pnfs_return_layout(struct inode *ino) /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { spin_unlock(&ino->i_lock); - pnfs_put_layout_hdr(lo); dprintk("NFS: %s no layout segments to return\n", __func__); - goto out; + goto out_put_layout_hdr; } set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - lo->plh_block_lgets++; + send = pnfs_prepare_layoutreturn(lo); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - - status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); + if (send) + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); +out_put_layout_hdr: + pnfs_put_layout_hdr(lo); out: dprintk("<-- %s status: %d\n", __func__, status); return status; @@ -1097,13 +1117,9 @@ bool pnfs_roc(struct inode *ino) out_noroc: if (lo) { stateid = lo->plh_stateid; - layoutreturn = - test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, - &lo->plh_flags); - if (layoutreturn) { - lo->plh_block_lgets++; - pnfs_get_layout_hdr(lo); - } + if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags)) + layoutreturn = pnfs_prepare_layoutreturn(lo); } spin_unlock(&ino->i_lock); if (layoutreturn) { @@ -1146,15 +1162,18 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) struct pnfs_layout_segment *lseg; nfs4_stateid stateid; u32 current_seqid; - bool found = false, layoutreturn = false; + bool layoutreturn = false; spin_lock(&ino->i_lock); - list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) - if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - found = true; - goto out; - } + list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) { + if (!test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) + continue; + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + continue; + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + spin_unlock(&ino->i_lock); + return true; + } lo = nfsi->layout; current_seqid = be32_to_cpu(lo->plh_stateid.seqid); @@ -1162,23 +1181,19 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) * a barrier, we choose the worst-case barrier. */ *barrier = current_seqid + atomic_read(&lo->plh_outstanding); -out: - if (!found) { - stateid = lo->plh_stateid; - layoutreturn = - test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, - &lo->plh_flags); - if (layoutreturn) { - lo->plh_block_lgets++; - pnfs_get_layout_hdr(lo); - } - } + stateid = lo->plh_stateid; + if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags)) + layoutreturn = pnfs_prepare_layoutreturn(lo); + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + spin_unlock(&ino->i_lock); if (layoutreturn) { - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); + return true; } - return found; + return false; } /* @@ -1695,7 +1710,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_lock(&inode->i_lock); /* set failure bit so that pnfs path will be retried later */ pnfs_layout_set_fail_bit(lo, iomode); - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); if (lo->plh_return_iomode == 0) lo->plh_return_iomode = range.iomode; else if (lo->plh_return_iomode != range.iomode) @@ -2207,13 +2221,12 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) if (ld->prepare_layoutcommit) { status = ld->prepare_layoutcommit(&data->args); if (status) { + put_rpccred(data->cred); spin_lock(&inode->i_lock); set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - spin_unlock(&inode->i_lock); - put_rpccred(data->cred); - goto clear_layoutcommitting; + goto out_unlock; } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 65869ca9c851..75a35a1afa79 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1379,24 +1379,27 @@ static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr, { struct nfs_pgio_args *argp = &hdr->args; struct nfs_pgio_res *resp = &hdr->res; + u64 size = argp->offset + resp->count; if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + fattr->size = size; + if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) { + fattr->valid &= ~NFS_ATTR_FATTR_SIZE; return; - if (argp->offset + resp->count != fattr->size) - return; - if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) + } + if (size != fattr->size) return; /* Set attribute barrier */ nfs_fattr_set_barrier(fattr); + /* ...and update size */ + fattr->valid |= NFS_ATTR_FATTR_SIZE; } void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) { - struct nfs_fattr *fattr = hdr->res.fattr; + struct nfs_fattr *fattr = &hdr->fattr; struct inode *inode = hdr->inode; - if (fattr == NULL) - return; spin_lock(&inode->i_lock); nfs_writeback_check_extend(hdr, fattr); nfs_post_op_update_inode_force_wcc_locked(inode, fattr); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 6904213a4363..ebf90e487c75 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -212,6 +212,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, BUG_ON(!ls->ls_file); if (nfsd4_layout_setlease(ls)) { + fput(ls->ls_file); put_nfs4_file(fp); kmem_cache_free(nfs4_layout_stateid_cache, ls); return NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 61dfb33f0559..95202719a1fd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4396,9 +4396,9 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } -static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { - if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) + if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } @@ -4601,9 +4601,6 @@ nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) { __be32 status; - status = nfs4_check_fh(fhp, ols); - if (status) - return status; status = nfsd4_check_openowner_confirmed(ols); if (status) return status; @@ -4690,6 +4687,9 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, status = nfserr_bad_stateid; break; } + if (status) + goto out; + status = nfs4_check_fh(fhp, s); done: if (!status && filpp) @@ -4798,7 +4798,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status) return status; - return nfs4_check_fh(current_fh, stp); + return nfs4_check_fh(current_fh, &stp->st_stid); } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 54633858733a..75e0563c09d1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2143,6 +2143,7 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#define WORD2_ABSENT_FS_ATTRS 0 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 @@ -2171,7 +2172,7 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, { return 0; } #endif -static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) +static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 *rdattr_err) { /* As per referral draft: */ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS || @@ -2184,6 +2185,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) } *bmval0 &= WORD0_ABSENT_FS_ATTRS; *bmval1 &= WORD1_ABSENT_FS_ATTRS; + *bmval2 &= WORD2_ABSENT_FS_ATTRS; return 0; } @@ -2246,8 +2248,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); if (exp->ex_fslocs.migrated) { - BUG_ON(bmval[2]); - status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); + status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err); if (status) goto out; } @@ -2286,8 +2287,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || - bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) || + bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { err = security_inode_getsecctx(d_inode(dentry), &context, &contextlen); contextsupport = (err == 0); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 92e48c70f0f0..39ddcaf0918f 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags) { struct fsnotify_mark *lmark, *mark; + LIST_HEAD(to_free); + /* + * We have to be really careful here. Anytime we drop mark_mutex, e.g. + * fsnotify_clear_marks_by_inode() can come and free marks. Even in our + * to_free list so we have to use mark_mutex even when accessing that + * list. And freeing mark requires us to drop mark_mutex. So we can + * reliably free only the first mark in the list. That's why we first + * move marks to free to to_free list in one go and then free marks in + * to_free list one by one. + */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->flags & flags) { - fsnotify_get_mark(mark); - fsnotify_destroy_mark_locked(mark, group); - fsnotify_put_mark(mark); - } + if (mark->flags & flags) + list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); + + while (1) { + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + if (list_empty(&to_free)) { + mutex_unlock(&group->mark_mutex); + break; + } + mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); + fsnotify_get_mark(mark); + fsnotify_destroy_mark_locked(mark, group); + mutex_unlock(&group->mark_mutex); + fsnotify_put_mark(mark); + } } /* diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1a35c6139656..0f5fd9db8194 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -685,7 +685,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { u64 s = i_size_read(inode); - sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + + sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) + (do_div(s, osb->s_clustersize) >> 9); ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, @@ -910,7 +910,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); ret = blkdev_issue_zeroout(osb->sb->s_bdev, - p_cpos << (osb->s_clustersize_bits - 9), + (u64)p_cpos << (osb->s_clustersize_bits - 9), zero_len_head >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8b23aa2f52dd..23157e40dd74 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) osb->dc_work_sequence = osb->dc_wake_sequence; processed = osb->blocked_lock_count; - while (processed) { - BUG_ON(list_empty(&osb->blocked_lock_list)); - + /* + * blocked lock processing in this loop might call iput which can + * remove items off osb->blocked_lock_list. Downconvert up to + * 'processed' number of locks, but stop short if we had some + * removed in ocfs2_mark_lockres_freeing when downconverting. + */ + while (processed && !list_empty(&osb->blocked_lock_list)) { lockres = list_entry(osb->blocked_lock_list.next, struct ocfs2_lock_res, l_blocked_list); list_del_init(&lockres->l_blocked_list); diff --git a/fs/pnode.h b/fs/pnode.h index 7114ce6e6b9e..0fcdbe7ca648 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -20,8 +20,6 @@ #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) -#define IS_MNT_LOCKED_AND_LAZY(m) \ - (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index d751fcb637bb..1ade1206bb89 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -75,3 +75,9 @@ config PROC_PAGE_MONITOR config PROC_CHILDREN bool "Include /proc/<pid>/task/<tid>/children file" default n + help + Provides a fast way to retrieve first level children pids of a task. See + <file:Documentation/filesystems/proc.txt> for more information. + + Say Y if you are running any user-space software which takes benefit from + this interface. For example, rkt is such a piece of software. diff --git a/fs/proc/base.c b/fs/proc/base.c index 87782e874b6a..aa50d1ac28fc 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -243,6 +243,11 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, len1 = arg_end - arg_start; len2 = env_end - env_start; + /* Empty ARGV. */ + if (len1 == 0) { + rv = 0; + goto out_free_page; + } /* * Inherently racy -- command line shares address space * with code and data. diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 91a4e6426321..92e6726f6e37 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) roundup(sizeof(CORE_STR), 4)) + roundup(sizeof(struct elf_prstatus), 4) + roundup(sizeof(struct elf_prpsinfo), 4) + - roundup(sizeof(struct task_struct), 4); + roundup(arch_task_struct_size, 4); *elf_buflen = PAGE_ALIGN(*elf_buflen); return size + *elf_buflen; } @@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) /* set up the task structure */ notes[2].name = CORE_STR; notes[2].type = NT_TASKSTRUCT; - notes[2].datasz = sizeof(struct task_struct); + notes[2].datasz = arch_task_struct_size; notes[2].data = current; nhdr->p_filesz += notesize(¬es[2]); diff --git a/fs/signalfd.c b/fs/signalfd.c index 7e412ad74836..270221fcef42 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (kinfo->si_code == BUS_MCEERR_AR || - kinfo->si_code == BUS_MCEERR_AO) + if (kinfo->si_signo == SIGBUS && + (kinfo->si_code == BUS_MCEERR_AR || + kinfo->si_code == BUS_MCEERR_AO)) err |= __put_user((short) kinfo->si_addr_lsb, &uinfo->ssi_addr_lsb); #endif diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 6afac3d561ac..8d0b3ade0ff0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1652,17 +1652,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) iinfo->i_ext.i_data, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); use->descTag.tagIdent = cpu_to_le16(TAG_IDENT_USE); - use->descTag.tagLocation = - cpu_to_le32(iinfo->i_location.logicalBlockNum); - crclen = sizeof(struct unallocSpaceEntry) + - iinfo->i_lenAlloc - sizeof(struct tag); - use->descTag.descCRCLength = cpu_to_le16(crclen); - use->descTag.descCRC = cpu_to_le16(crc_itu_t(0, (char *)use + - sizeof(struct tag), - crclen)); - use->descTag.tagChecksum = udf_tag_checksum(&use->descTag); + crclen = sizeof(struct unallocSpaceEntry); - goto out; + goto finish; } if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) @@ -1782,6 +1774,8 @@ static int udf_update_inode(struct inode *inode, int do_sync) efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE); crclen = sizeof(struct extendedFileEntry); } + +finish: if (iinfo->i_strat4096) { fe->icbTag.strategyType = cpu_to_le16(4096); fe->icbTag.strategyParameter = cpu_to_le16(1); @@ -1791,7 +1785,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) fe->icbTag.numEntries = cpu_to_le16(1); } - if (S_ISDIR(inode->i_mode)) + if (iinfo->i_use) + fe->icbTag.fileType = ICBTAG_FILE_TYPE_USE; + else if (S_ISDIR(inode->i_mode)) fe->icbTag.fileType = ICBTAG_FILE_TYPE_DIRECTORY; else if (S_ISREG(inode->i_mode)) fe->icbTag.fileType = ICBTAG_FILE_TYPE_REGULAR; @@ -1828,7 +1824,6 @@ static int udf_update_inode(struct inode *inode, int do_sync) crclen)); fe->descTag.tagChecksum = udf_tag_checksum(&fe->descTag); -out: set_buffer_uptodate(bh); unlock_buffer(bh); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 20de88d1bf86..dd714037c322 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + int blksize = mp->m_attr_geo->blksize; char *ptr; int len; xfs_daddr_t bno; - int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify( ASSERT(len >= blksize); while (len > 0) { + struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } - if (bip) { - struct xfs_attr3_rmt_hdr *rmt; - rmt = (struct xfs_attr3_rmt_hdr *)ptr; - rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); + /* + * Ensure we aren't writing bogus LSNs to disk. See + * xfs_attr3_rmt_hdr_set() for the explanation. + */ + if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set( rmt->rm_owner = cpu_to_be64(ino); rmt->rm_blkno = cpu_to_be64(bno); + /* + * Remote attribute blocks are written synchronously, so we don't + * have an LSN that we can stamp in them that makes any sense to log + * recovery. To ensure that log recovery handles overwrites of these + * blocks sanely (i.e. once they've been freed and reallocated as some + * other type of metadata) we need to ensure that the LSN has a value + * that tells log recovery to ignore the LSN and overwrite the buffer + * with whatever is in it's log. To do this, we use the magic + * NULLCOMMITLSN to indicate that the LSN is invalid. + */ + rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); + return sizeof(struct xfs_attr3_rmt_hdr); } @@ -434,14 +451,21 @@ xfs_attr_rmtval_set( /* * Allocate a single extent, up to the size of the value. + * + * Note that we have to consider this a data allocation as we + * write the remote attribute without logging the contents. + * Hence we must ensure that we aren't using blocks that are on + * the busy list so that we don't overwrite blocks which have + * recently been freed but their transactions are not yet + * committed to disk. If we overwrite the contents of a busy + * extent and then crash then the block may not contain the + * correct metadata after log recovery occurs. */ xfs_bmap_init(args->flist, args->firstblock); nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, args->total, &map, &nmap, - args->flist); + blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, + args->total, &map, &nmap, args->flist); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f0e8249722d4..db4acc1c3e73 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1514,18 +1514,27 @@ xfs_filemap_fault( struct vm_area_struct *vma, struct vm_fault *vmf) { - struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file)); + struct inode *inode = file_inode(vma->vm_file); int ret; - trace_xfs_filemap_fault(ip); + trace_xfs_filemap_fault(XFS_I(inode)); /* DAX can shortcut the normal fault path on write faults! */ - if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip))) + if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) return xfs_filemap_page_mkwrite(vma, vmf); - xfs_ilock(ip, XFS_MMAPLOCK_SHARED); - ret = filemap_fault(vma, vmf); - xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + if (IS_DAX(inode)) { + /* + * we do not want to trigger unwritten extent conversion on read + * faults - that is unnecessary overhead and would also require + * changes to xfs_get_blocks_direct() to map unwritten extent + * ioend for conversion on read-only mappings. + */ + ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); + } else + ret = filemap_fault(vma, vmf); + xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); return ret; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 01dd228ca05e..480ebba8464f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn( uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; break; case XFS_ATTR3_RMT_MAGIC: - lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); - uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; - break; + /* + * Remote attr blocks are written synchronously, rather than + * being logged. That means they do not contain a valid LSN + * (i.e. transactionally ordered) in them, and hence any time we + * see a buffer to replay over the top of a remote attribute + * block we should simply do so. + */ + goto recover_immediately; case XFS_SB_MAGIC: lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); uuid = &((struct xfs_dsb *)blk)->sb_uuid; |