From 694e096fd7c2a7d40760fc9c9dfc826bdd495ea4 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 13 Nov 2013 12:29:08 -0500 Subject: NFS: Enabling v4.2 should not recompile nfsd and lockd When CONFIG_NFS_V4_2 is toggled nfsd and lockd will be recompiled, instead of only the nfs client. This patch moves a small amount of code into the client directory to avoid unnecessary recompiles. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.h | 1 + fs/nfs/dns_resolve.c | 2 ++ fs/nfs/internal.h | 15 +++++++++++++++ fs/nfs/nfs4_fs.h | 8 ++++++++ 4 files changed, 26 insertions(+) (limited to 'fs') diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 8485978993e8..9838fb020473 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -36,6 +36,7 @@ #include #include +#include "../nfs4_fs.h" #include "../pnfs.h" #include "../netns.h" diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index fc0f95ec7358..d25f10fb4926 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -46,7 +46,9 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include #include #include +#include +#include "nfs4_fs.h" #include "dns_resolve.h" #include "cache_lib.h" #include "netns.h" diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index bca6a3e3c49c..8b5cc04a8611 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -269,6 +269,21 @@ extern const u32 nfs41_maxgetdevinfo_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); +static inline void nfs4_label_free(struct nfs4_label *label) +{ + if (label) { + kfree(label->label); + kfree(label); + } + return; +} +#else +static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } +static inline void nfs4_label_free(void *label) {} +#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ + /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3ce79b04522e..5609edc742a0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,6 +9,14 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H +#if defined(CONFIG_NFS_V4_2) +#define NFS4_MAX_MINOR_VERSION 2 +#elif defined(CONFIG_NFS_V4_1) +#define NFS4_MAX_MINOR_VERSION 1 +#else +#define NFS4_MAX_MINOR_VERSION 0 +#endif + #if IS_ENABLED(CONFIG_NFS_V4) #define NFS4_MAX_LOOP_ON_RECOVER (10) -- cgit v1.2.3 From 829e57d7c590832caad04355b044667902194f6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Nov 2013 14:33:50 -0500 Subject: NFS: Fix a warning in nfs_setsecurity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warning: linux-nfs/fs/nfs/inode.c:315:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 18ab2da4eeb6..00ad1c2b217d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -312,7 +312,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) } EXPORT_SYMBOL_GPL(nfs4_label_alloc); #else -void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label) { } -- cgit v1.2.3 From 4a82fd7c4e78a1b7a224f9ae8bb7e1fd95f670e0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 15 Nov 2013 16:36:16 -0500 Subject: NFSv4 wait on recovery for async session errors When the state manager is processing the NFS4CLNT_DELEGRETURN flag, session draining is off, but DELEGRETURN can still get a session error. The async handler calls nfs4_schedule_session_recovery returns -EAGAIN, and the DELEGRETURN done then restarts the RPC task in the prepare state. With the state manager still processing the NFS4CLNT_DELEGRETURN flag with session draining off, these DELEGRETURNs will cycle with errors filling up the session slots. This prevents OPEN reclaims (from nfs_delegation_claim_opens) required by the NFS4CLNT_DELEGRETURN state manager processing from completing, hanging the state manager in the __rpc_wait_for_completion_task in nfs4_run_open_task as seen in this kernel thread dump: kernel: 4.12.32.53-ma D 0000000000000000 0 3393 2 0x00000000 kernel: ffff88013995fb60 0000000000000046 ffff880138cc5400 ffff88013a9df140 kernel: ffff8800000265c0 ffffffff8116eef0 ffff88013fc10080 0000000300000001 kernel: ffff88013a4ad058 ffff88013995ffd8 000000000000fbc8 ffff88013a4ad058 kernel: Call Trace: kernel: [] ? cache_alloc_refill+0x1c0/0x240 kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] kernel: [] rpc_wait_bit_killable+0x42/0xa0 [sunrpc] kernel: [] __wait_on_bit+0x5f/0x90 kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] kernel: [] out_of_line_wait_on_bit+0x78/0x90 kernel: [] ? wake_bit_function+0x0/0x50 kernel: [] __rpc_wait_for_completion_task+0x2d/0x30 [sunrpc] kernel: [] nfs4_run_open_task+0x11c/0x160 [nfs] kernel: [] nfs4_open_recover_helper+0x87/0x120 [nfs] kernel: [] nfs4_open_recover+0xc6/0x150 [nfs] kernel: [] ? nfs4_open_recoverdata_alloc+0x2f/0x60 [nfs] kernel: [] nfs4_open_delegation_recall+0x6a/0xa0 [nfs] kernel: [] nfs_end_delegation_return+0x120/0x2e0 [nfs] kernel: [] ? queue_work+0x1f/0x30 kernel: [] nfs_client_return_marked_delegations+0xd7/0x110 [nfs] kernel: [] nfs4_run_state_manager+0x548/0x620 [nfs] kernel: [] ? nfs4_run_state_manager+0x0/0x620 [nfs] kernel: [] kthread+0x96/0xa0 kernel: [] child_rip+0xa/0x20 kernel: [] ? kthread+0x0/0xa0 kernel: [] ? child_rip+0x0/0x20 The state manager can not therefore process the DELEGRETURN session errors. Change the async handler to wait for recovery on session errors. Signed-off-by: Andy Adamson Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ab33c0792df..1f4edfbb4a70 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4803,7 +4803,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto restart_call; + goto wait_on_recovery; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); -- cgit v1.2.3 From c97cf606e43b85a6cf158b810375dd77312024db Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Nov 2013 16:34:14 -0500 Subject: NFSv4: Update list of irrecoverable errors on DELEGRETURN If the DELEGRETURN errors out with something like NFS4ERR_BAD_STATEID then there is no recovery possible. Just quit without returning an error. Also, note that the client must not assume that the NFSv4 lease has been renewed when it sees an error on DELEGRETURN. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1f4edfbb4a70..ca36d0d50b7d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4988,11 +4988,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); switch (task->tk_status) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: case 0: renew_lease(data->res.server, data->timestamp); break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + task->tk_status = 0; + break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == -EAGAIN) { -- cgit v1.2.3 From 69794ad70cd3b600319f175fc0cbe7abd510791f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 20 Nov 2013 12:57:19 -0500 Subject: NFSv4: close needs to handle NFS4ERR_ADMIN_REVOKED Also ensure that we zero out the stateid mode when exiting Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ca36d0d50b7d..f01e2aa53210 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2518,9 +2518,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) calldata->roc_barrier); nfs_set_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); - nfs4_close_clear_stateid_flags(state, - calldata->arg.fmode); break; + case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: @@ -2528,9 +2527,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) break; default: - if (nfs4_async_handle_error(task, server, state) == -EAGAIN) + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { rpc_restart_call_prepare(task); + goto out_release; + } } + nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); +out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); -- cgit v1.2.3 From 6d565409503f4e1f74ac30de14e8c91a2b826cd8 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Sun, 24 Nov 2013 00:40:49 +0000 Subject: Squashfs: fix failure to unlock pages on decompress error Direct decompression into the page cache. If we fall back to using an intermediate buffer (because we cannot grab all the page cache pages) and we get a decompress fail, we forgot to release the pages. Reported-by: Roman Peniaev Signed-off-by: Phillip Lougher --- fs/squashfs/file_direct.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 2943b2bfae48..62a0de6632e1 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -84,6 +84,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) */ res = squashfs_read_cache(target_page, block, bsize, pages, page); + if (res < 0) + goto mark_errored; + goto out; } @@ -119,7 +122,7 @@ mark_errored: * dealt with by the caller */ for (i = 0; i < pages; i++) { - if (page[i] == target_page) + if (page[i] == NULL || page[i] == target_page) continue; flush_dcache_page(page[i]); SetPageError(page[i]); -- cgit v1.2.3 From c170bbb45febc03ac4d34ba2b8bb55e06104b7e7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 24 Nov 2013 16:32:22 -0700 Subject: block: submit_bio_wait() conversions It was being open coded in a few places. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Joern Engel Cc: Prasad Joshi Cc: Neil Brown Cc: Chris Mason Acked-by: NeilBrown Signed-off-by: Jens Axboe --- block/blk-flush.c | 19 +------------------ drivers/md/md.c | 12 +----------- fs/btrfs/check-integrity.c | 32 +++++++++++++------------------- fs/btrfs/check-integrity.h | 2 ++ fs/btrfs/extent_io.c | 12 +----------- fs/btrfs/scrub.c | 33 ++++----------------------------- fs/hfsplus/wrapper.c | 17 +---------------- fs/logfs/dev_bdev.c | 13 +------------ 8 files changed, 24 insertions(+), 116 deletions(-) (limited to 'fs') diff --git a/block/blk-flush.c b/block/blk-flush.c index 331e627301ea..fb6f3c0ffa49 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -502,15 +502,6 @@ void blk_abort_flushes(struct request_queue *q) } } -static void bio_end_flush(struct bio *bio, int err) -{ - if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - if (bio->bi_private) - complete(bio->bi_private); - bio_put(bio); -} - /** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for @@ -526,7 +517,6 @@ static void bio_end_flush(struct bio *bio, int err) int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, sector_t *error_sector) { - DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q; struct bio *bio; int ret = 0; @@ -548,13 +538,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, return -ENXIO; bio = bio_alloc(gfp_mask, 0); - bio->bi_end_io = bio_end_flush; bio->bi_bdev = bdev; - bio->bi_private = &wait; - bio_get(bio); - submit_bio(WRITE_FLUSH, bio); - wait_for_completion_io(&wait); + ret = submit_bio_wait(WRITE_FLUSH, bio); /* * The driver must store the error location in ->bi_sector, if @@ -564,9 +550,6 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, if (error_sector) *error_sector = bio->bi_sector; - if (!bio_flagged(bio, BIO_UPTODATE)) - ret = -EIO; - bio_put(bio); return ret; } diff --git a/drivers/md/md.c b/drivers/md/md.c index b6b7a2866c9e..8700de376876 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -776,16 +776,10 @@ void md_super_wait(struct mddev *mddev) finish_wait(&mddev->sb_wait, &wq); } -static void bi_complete(struct bio *bio, int error) -{ - complete((struct completion*)bio->bi_private); -} - int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int rw, bool metadata_op) { struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); - struct completion event; int ret; rw |= REQ_SYNC; @@ -801,11 +795,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, else bio->bi_sector = sector + rdev->data_offset; bio_add_page(bio, page, size, 0); - init_completion(&event); - bio->bi_private = &event; - bio->bi_end_io = bi_complete; - submit_bio(rw, bio); - wait_for_completion(&event); + submit_bio_wait(rw, bio); ret = test_bit(BIO_UPTODATE, &bio->bi_flags); bio_put(bio); diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index e0aab4456974..f85b1c409003 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -323,7 +323,6 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); static int btrfsic_read_block(struct btrfsic_state *state, struct btrfsic_block_data_ctx *block_ctx); static void btrfsic_dump_database(struct btrfsic_state *state); -static void btrfsic_complete_bio_end_io(struct bio *bio, int err); static int btrfsic_test_for_metadata(struct btrfsic_state *state, char **datav, unsigned int num_pages); static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, @@ -1677,7 +1676,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, for (i = 0; i < num_pages;) { struct bio *bio; unsigned int j; - DECLARE_COMPLETION_ONSTACK(complete); bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); if (!bio) { @@ -1688,8 +1686,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, } bio->bi_bdev = block_ctx->dev->bdev; bio->bi_sector = dev_bytenr >> 9; - bio->bi_end_io = btrfsic_complete_bio_end_io; - bio->bi_private = &complete; for (j = i; j < num_pages; j++) { ret = bio_add_page(bio, block_ctx->pagev[j], @@ -1702,12 +1698,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, "btrfsic: error, failed to add a single page!\n"); return -1; } - submit_bio(READ, bio); - - /* this will also unplug the queue */ - wait_for_completion(&complete); - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (submit_bio_wait(READ, bio)) { printk(KERN_INFO "btrfsic: read error at logical %llu dev %s!\n", block_ctx->start, block_ctx->dev->name); @@ -1730,11 +1721,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, return block_ctx->len; } -static void btrfsic_complete_bio_end_io(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); -} - static void btrfsic_dump_database(struct btrfsic_state *state) { struct list_head *elem_all; @@ -2998,14 +2984,12 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) return submit_bh(rw, bh); } -void btrfsic_submit_bio(int rw, struct bio *bio) +static void __btrfsic_submit_bio(int rw, struct bio *bio) { struct btrfsic_dev_state *dev_state; - if (!btrfsic_is_initialized) { - submit_bio(rw, bio); + if (!btrfsic_is_initialized) return; - } mutex_lock(&btrfsic_mutex); /* since btrfsic_submit_bio() is also called before @@ -3097,10 +3081,20 @@ void btrfsic_submit_bio(int rw, struct bio *bio) } leave: mutex_unlock(&btrfsic_mutex); +} +void btrfsic_submit_bio(int rw, struct bio *bio) +{ + __btrfsic_submit_bio(rw, bio); submit_bio(rw, bio); } +int btrfsic_submit_bio_wait(int rw, struct bio *bio) +{ + __btrfsic_submit_bio(rw, bio); + return submit_bio_wait(rw, bio); +} + int btrfsic_mount(struct btrfs_root *root, struct btrfs_fs_devices *fs_devices, int including_extent_data, u32 print_mask) diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h index 8b59175cc502..13b8566c97ab 100644 --- a/fs/btrfs/check-integrity.h +++ b/fs/btrfs/check-integrity.h @@ -22,9 +22,11 @@ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY int btrfsic_submit_bh(int rw, struct buffer_head *bh); void btrfsic_submit_bio(int rw, struct bio *bio); +int btrfsic_submit_bio_wait(int rw, struct bio *bio); #else #define btrfsic_submit_bh submit_bh #define btrfsic_submit_bio submit_bio +#define btrfsic_submit_bio_wait submit_bio_wait #endif int btrfsic_mount(struct btrfs_root *root, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 856bc2b2192c..014beaa9458c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1952,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, return err; } -static void repair_io_failure_callback(struct bio *bio, int err) -{ - complete(bio->bi_private); -} - /* * this bypasses the standard btrfs submit functions deliberately, as * the standard behavior is to write all copies in a raid setup. here we only @@ -1973,7 +1968,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, { struct bio *bio; struct btrfs_device *dev; - DECLARE_COMPLETION_ONSTACK(compl); u64 map_length = 0; u64 sector; struct btrfs_bio *bbio = NULL; @@ -1989,8 +1983,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; - bio->bi_private = &compl; - bio->bi_end_io = repair_io_failure_callback; bio->bi_size = 0; map_length = length; @@ -2011,10 +2003,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, } bio->bi_bdev = dev->bdev; bio_add_page(bio, page, length, start - page_offset(page)); - btrfsic_submit_bio(WRITE_SYNC, bio); - wait_for_completion(&compl); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { /* try to remap that extent elsewhere? */ bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2544805544f0..3214ebe593bd 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -208,7 +208,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, int is_metadata, int have_csum, const u8 *csum, u64 generation, u16 csum_size); -static void scrub_complete_bio_end_io(struct bio *bio, int err); static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good, int force_write); @@ -1292,7 +1291,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, for (page_num = 0; page_num < sblock->page_count; page_num++) { struct bio *bio; struct scrub_page *page = sblock->pagev[page_num]; - DECLARE_COMPLETION_ONSTACK(complete); if (page->dev->bdev == NULL) { page->io_error = 1; @@ -1309,18 +1307,11 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } bio->bi_bdev = page->dev->bdev; bio->bi_sector = page->physical >> 9; - bio->bi_end_io = scrub_complete_bio_end_io; - bio->bi_private = &complete; bio_add_page(bio, page->page, PAGE_SIZE, 0); - btrfsic_submit_bio(READ, bio); - - /* this will also unplug the queue */ - wait_for_completion(&complete); - - page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (btrfsic_submit_bio_wait(READ, bio)) sblock->no_io_error_seen = 0; + bio_put(bio); } @@ -1389,11 +1380,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, sblock->checksum_error = 1; } -static void scrub_complete_bio_end_io(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); -} - static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good, int force_write) @@ -1428,7 +1414,6 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, sblock_bad->checksum_error || page_bad->io_error) { struct bio *bio; int ret; - DECLARE_COMPLETION_ONSTACK(complete); if (!page_bad->dev->bdev) { printk_ratelimited(KERN_WARNING @@ -1441,19 +1426,14 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; bio->bi_bdev = page_bad->dev->bdev; bio->bi_sector = page_bad->physical >> 9; - bio->bi_end_io = scrub_complete_bio_end_io; - bio->bi_private = &complete; ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); if (PAGE_SIZE != ret) { bio_put(bio); return -EIO; } - btrfsic_submit_bio(WRITE, bio); - /* this will also unplug the queue */ - wait_for_completion(&complete); - if (!bio_flagged(bio, BIO_UPTODATE)) { + if (btrfsic_submit_bio_wait(WRITE, bio)) { btrfs_dev_stat_inc_and_print(page_bad->dev, BTRFS_DEV_STAT_WRITE_ERRS); btrfs_dev_replace_stats_inc( @@ -3373,7 +3353,6 @@ static int write_page_nocow(struct scrub_ctx *sctx, struct bio *bio; struct btrfs_device *dev; int ret; - DECLARE_COMPLETION_ONSTACK(compl); dev = sctx->wr_ctx.tgtdev; if (!dev) @@ -3390,8 +3369,6 @@ static int write_page_nocow(struct scrub_ctx *sctx, spin_unlock(&sctx->stat_lock); return -ENOMEM; } - bio->bi_private = &compl; - bio->bi_end_io = scrub_complete_bio_end_io; bio->bi_size = 0; bio->bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; @@ -3402,10 +3379,8 @@ leave_with_eio: btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; } - btrfsic_submit_bio(WRITE_SYNC, bio); - wait_for_completion(&compl); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) goto leave_with_eio; bio_put(bio); diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index b51a6079108d..e9a97a0d4314 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -24,13 +24,6 @@ struct hfsplus_wd { u16 embed_count; }; -static void hfsplus_end_io_sync(struct bio *bio, int err) -{ - if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - complete(bio->bi_private); -} - /* * hfsplus_submit_bio - Perfrom block I/O * @sb: super block of volume for I/O @@ -53,7 +46,6 @@ static void hfsplus_end_io_sync(struct bio *bio, int err) int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, void **data, int rw) { - DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; u64 io_size; @@ -73,8 +65,6 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; bio->bi_bdev = sb->s_bdev; - bio->bi_end_io = hfsplus_end_io_sync; - bio->bi_private = &wait; if (!(rw & WRITE) && data) *data = (u8 *)buf + offset; @@ -93,12 +83,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, buf = (u8 *)buf + len; } - submit_bio(rw, bio); - wait_for_completion(&wait); - - if (!bio_flagged(bio, BIO_UPTODATE)) - ret = -EIO; - + ret = submit_bio_wait(rw, bio); out: bio_put(bio); return ret < 0 ? ret : 0; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 550475ca6a0e..0f95f0d0b313 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -14,16 +14,10 @@ #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) -static void request_complete(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); -} - static int sync_request(struct page *page, struct block_device *bdev, int rw) { struct bio bio; struct bio_vec bio_vec; - struct completion complete; bio_init(&bio); bio.bi_max_vecs = 1; @@ -35,13 +29,8 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) bio.bi_size = PAGE_SIZE; bio.bi_bdev = bdev; bio.bi_sector = page->index * (PAGE_SIZE >> 9); - init_completion(&complete); - bio.bi_private = &complete; - bio.bi_end_io = request_complete; - submit_bio(rw, &bio); - wait_for_completion(&complete); - return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; + return submit_bio_wait(rw, &bio); } static int bdev_readpage(void *_sb, struct page *page) -- cgit v1.2.3 From b0d8d2292160bb63de1972361ebed100c64b5b37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Dec 2013 09:44:51 -0800 Subject: vfs: fix subtle use-after-free of pipe_inode_info The pipe code was trying (and failing) to be very careful about freeing the pipe info only after the last access, with a pattern like: spin_lock(&inode->i_lock); if (!--pipe->files) { inode->i_pipe = NULL; kill = 1; } spin_unlock(&inode->i_lock); __pipe_unlock(pipe); if (kill) free_pipe_info(pipe); where the final freeing is done last. HOWEVER. The above is actually broken, because while the freeing is done at the end, if we have two racing processes releasing the pipe inode info, the one that *doesn't* free it will decrement the ->files count, and unlock the inode i_lock, but then still use the "pipe_inode_info" afterwards when it does the "__pipe_unlock(pipe)". This is *very* hard to trigger in practice, since the race window is very small, and adding debug options seems to just hide it by slowing things down. Simon originally reported this way back in July as an Oops in kmem_cache_allocate due to a single bit corruption (due to the final "spin_unlock(pipe->mutex.wait_lock)" incrementing a field in a different allocation that had re-used the free'd pipe-info), it's taken this long to figure out. Since the 'pipe->files' accesses aren't even protected by the pipe lock (we very much use the inode lock for that), the simple solution is to just drop the pipe lock early. And since there were two users of this pattern, create a helper function for it. Introduced commit ba5bb147330a ("pipe: take allocation and freeing of pipe_inode_info out of ->i_mutex"). Reported-by: Simon Kirby Reported-by: Ian Applegate Acked-by: Al Viro Cc: stable@kernel.org # v3.10+ Signed-off-by: Linus Torvalds --- fs/pipe.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index d2c45e14e6d8..0e0752ef2715 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait) return mask; } +static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) +{ + int kill = 0; + + spin_lock(&inode->i_lock); + if (!--pipe->files) { + inode->i_pipe = NULL; + kill = 1; + } + spin_unlock(&inode->i_lock); + + if (kill) + free_pipe_info(pipe); +} + static int pipe_release(struct inode *inode, struct file *file) { - struct pipe_inode_info *pipe = inode->i_pipe; - int kill = 0; + struct pipe_inode_info *pipe = file->private_data; __pipe_lock(pipe); if (file->f_mode & FMODE_READ) @@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } - spin_lock(&inode->i_lock); - if (!--pipe->files) { - inode->i_pipe = NULL; - kill = 1; - } - spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - if (kill) - free_pipe_info(pipe); - + put_pipe_info(inode, pipe); return 0; } @@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp) { struct pipe_inode_info *pipe; bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; - int kill = 0; int ret; filp->f_version = 0; @@ -1130,15 +1135,9 @@ err_wr: goto err; err: - spin_lock(&inode->i_lock); - if (!--pipe->files) { - inode->i_pipe = NULL; - kill = 1; - } - spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - if (kill) - free_pipe_info(pipe); + + put_pipe_info(inode, pipe); return ret; } -- cgit v1.2.3 From 95f19f658ce177387345b51bd48fed7b9cd7d4e8 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 15 Nov 2013 12:56:31 +0530 Subject: epoll: drop EPOLLWAKEUP if PM_SLEEP is disabled Drop EPOLLWAKEUP from epoll events mask if CONFIG_PM_SLEEP is disabled. Signed-off-by: Amit Pundir Cc: John Stultz Cc: Alexander Viro Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 3 +-- include/uapi/linux/eventpoll.h | 13 ++++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 79b65c3b9e87..8b5e2584c840 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1852,8 +1852,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) - epds.events &= ~EPOLLWAKEUP; + ep_take_care_of_epollwakeup(&epds); /* * We have to check that the file structure underneath the file descriptor diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 2c267bcbb85c..bc81fb2e1f0e 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -61,5 +61,16 @@ struct epoll_event { __u64 data; } EPOLL_PACKED; - +#ifdef CONFIG_PM_SLEEP +static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) +{ + if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) + epev->events &= ~EPOLLWAKEUP; +} +#else +static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) +{ + epev->events &= ~EPOLLWAKEUP; +} +#endif #endif /* _UAPI_LINUX_EVENTPOLL_H */ -- cgit v1.2.3 From f22e5edd2244609aed3906207a62223e7707a34d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Dec 2013 12:09:45 -0500 Subject: NFSv4.1: Prevent a 3-way deadlock between layoutreturn, open and state recovery Andy Adamson reports: The state manager is recovering expired state and recovery OPENs are being processed. If kswapd is pruning inodes at the same time, a deadlock can occur when kswapd calls evict_inode on an NFSv4.1 inode with a layout, and the resultant layoutreturn gets an error that the state mangager is to handle, causing the layoutreturn to wait on the (NFS client) cl_rpcwaitq. At the same time an open is waiting for the inode deletion to complete in __wait_on_freeing_inode. If the open is either the open called by the state manager, or an open from the same open owner that is holding the NFSv4 sequence id which causes the OPEN from the state manager to wait for the sequence id on the Seqid_waitqueue, then the state is deadlocked with kswapd. The fix is simply to have layoutreturn ignore all errors except NFS4ERR_DELAY. We already know that layouts are dropped on all server reboots, and that it has to be coded to deal with the "forgetful client model" that doesn't send layoutreturns. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1385402270-14284-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f01e2aa53210..e040359983ce 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7599,7 +7599,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; server = NFS_SERVER(lrp->args.inode); - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + switch (task->tk_status) { + default: + task->tk_status = 0; + case 0: + break; + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) + break; rpc_restart_call_prepare(task); return; } -- cgit v1.2.3 From 3873d064b8538686bbbd4b858dc8a07db1f7f43a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 2 Dec 2013 19:59:31 +0100 Subject: nfs: fix do_div() warning by instead using sector_div() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling a 32bit kernel with CONFIG_LBDAF=n the compiler complains like shown below. Fix this warning by instead using sector_div() which is provided by the kernel.h header file. fs/nfs/blocklayout/extents.c: In function ‘normalize’: include/asm-generic/div64.h:43:28: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/blocklayout/extents.c:47:13: note: in expansion of macro ‘do_div’ nfs/blocklayout/extents.c:47:2: warning: right shift count >= width of type [enabled by default] fs/nfs/blocklayout/extents.c:47:2: warning: passing argument 1 of ‘__div64_32’ from incompatible pointer type [enabled by default] include/asm-generic/div64.h:35:17: note: expected ‘uint64_t *’ but argument is of type ‘sector_t *’ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); Signed-off-by: Helge Deller Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 9c3e117c3ed1..4d0161442565 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -44,7 +44,7 @@ static inline sector_t normalize(sector_t s, int base) { sector_t tmp = s; /* Since do_div modifies its argument */ - return s - do_div(tmp, base); + return s - sector_div(tmp, base); } static inline sector_t normalize_up(sector_t s, int base) -- cgit v1.2.3 From d1b9432712a25eeb06114fb4b587133525a47de5 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 4 Dec 2013 18:19:06 +0800 Subject: aio: clean up aio ring in the fail path Clean up the aio ring file in the fail path of aio_setup_ring and ioctx_alloc. And maybe it can fix the GPF issue reported by Dave Jones: https://lkml.org/lkml/2013/11/25/898 Signed-off-by: Gu Zheng Signed-off-by: Benjamin LaHaise --- fs/aio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index ad460d78d6c5..a2f92aa23ee3 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -316,8 +316,10 @@ static int aio_setup_ring(struct kioctx *ctx) if (nr_pages > AIO_RING_PAGES) { ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); - if (!ctx->ring_pages) + if (!ctx->ring_pages) { + put_aio_ring_file(ctx); return -ENOMEM; + } } ctx->mmap_size = nr_pages * PAGE_SIZE; @@ -594,7 +596,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) aio_nr + nr_events < aio_nr) { spin_unlock(&aio_nr_lock); err = -EAGAIN; - goto err; + goto err_ctx; } aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); @@ -611,6 +613,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err_cleanup: aio_nr_sub(ctx->max_reqs); +err_ctx: + aio_free_ring(ctx); err: free_percpu(ctx->cpu); free_percpu(ctx->reqs.pcpu_count); -- cgit v1.2.3 From a8b14744429f90763f258ad0f69601cdcad610aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Dec 2013 09:20:40 -0500 Subject: sysfs: give different locking key to regular and bin files 027a485d12e0 ("sysfs: use a separate locking class for open files depending on mmap") assigned different lockdep key to sysfs_open_file->mutex depending on whether the file implements mmap or not in an attempt to avoid spurious lockdep warning caused by merging of regular and bin file paths. While this restored some of the original behavior of using different locks (at least lockdep is concerned) for the different clases of files. The restoration wasn't full because now the lockdep key assignment depends on whether the file has mmap or not instead of whether it's a regular file or not. This means that bin files which don't implement mmap will get assigned the same lockdep class as regular files. This is problematic because file_operations for bin files still implements the mmap file operation and checking whether the sysfs file actually implements mmap happens in the file operation after grabbing @sysfs_open_file->mutex. We still end up adding locking dependency from mmap locking to sysfs_open_file->mutex to the regular file mutex which triggers spurious circular locking warning. Fix it by restoring the original behavior fully by differentiating lockdep key by whether the file is regular or bin, instead of the existence of mmap. Signed-off-by: Tejun Heo Reported-by: Dave Jones Link: http://lkml.kernel.org/g/20131203184324.GA11320@redhat.com Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b94f93685093..35e7d08fe629 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -609,7 +609,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; struct sysfs_open_file *of; - bool has_read, has_write, has_mmap; + bool has_read, has_write; int error = -EACCES; /* need attr_sd for attr and ops, its parent for kobj */ @@ -621,7 +621,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file) has_read = battr->read || battr->mmap; has_write = battr->write || battr->mmap; - has_mmap = battr->mmap; } else { const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); @@ -633,7 +632,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file) has_read = ops->show; has_write = ops->store; - has_mmap = false; } /* check perms and supported operations */ @@ -661,9 +659,9 @@ static int sysfs_open_file(struct inode *inode, struct file *file) * open file has a separate mutex, it's okay as long as those don't * happen on the same file. At this point, we can't easily give * each file a separate locking class. Let's differentiate on - * whether the file has mmap or not for now. + * whether the file is bin or not for now. */ - if (has_mmap) + if (sysfs_is_bin(attr_sd)) mutex_init(&of->mutex); else mutex_init(&of->mutex); -- cgit v1.2.3