diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fuse/cuse.c | 4 | ||||
-rw-r--r-- | fs/fuse/file.c | 56 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 9 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 13 | ||||
-rw-r--r-- | fs/nfs/callback.h | 3 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 69 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 12 | ||||
-rw-r--r-- | fs/nfs/file.c | 12 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 | ||||
-rw-r--r-- | fs/nfs/inode.c | 2 | ||||
-rw-r--r-- | fs/nfs/internal.h | 6 | ||||
-rw-r--r-- | fs/nfs/nfs4file.c | 33 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 63 | ||||
-rw-r--r-- | fs/nfs/nfs4session.c | 54 | ||||
-rw-r--r-- | fs/nfs/nfs4session.h | 8 | ||||
-rw-r--r-- | fs/nfs/pnfs_nfs.c | 16 | ||||
-rw-r--r-- | fs/overlayfs/copy_up.c | 35 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 61 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 1 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 53 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 18 |
21 files changed, 369 insertions, 161 deletions
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 8e3ee1936c7e..c5b6b7165489 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -90,7 +90,7 @@ static struct list_head *cuse_conntbl_head(dev_t devt) static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to) { - struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp); loff_t pos = 0; return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE); @@ -98,7 +98,7 @@ static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to) static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from) { - struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp); loff_t pos = 0; /* * No locking or generic_write_checks(), the server is diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b03d253ece15..9dde38f12c07 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -528,6 +528,11 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +static void fuse_io_release(struct kref *kref) +{ + kfree(container_of(kref, struct fuse_io_priv, refcnt)); +} + static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) { if (io->err) @@ -585,8 +590,9 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) } io->iocb->ki_complete(io->iocb, res, 0); - kfree(io); } + + kref_put(&io->refcnt, fuse_io_release); } static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) @@ -613,6 +619,7 @@ static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, size_t num_bytes, struct fuse_io_priv *io) { spin_lock(&io->lock); + kref_get(&io->refcnt); io->size += num_bytes; io->reqs++; spin_unlock(&io->lock); @@ -691,7 +698,7 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode, static int fuse_do_readpage(struct file *file, struct page *page) { - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; @@ -984,7 +991,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, size_t res; unsigned offset; unsigned i; - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); @@ -1240,6 +1247,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t *nbytesp, int write) { size_t nbytes = 0; /* # bytes already packed in req */ + ssize_t ret = 0; /* Special case for kernel I/O: can copy directly into the buffer */ if (ii->type & ITER_KVEC) { @@ -1259,13 +1267,12 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; size_t start; - ssize_t ret = iov_iter_get_pages(ii, - &req->pages[req->num_pages], + ret = iov_iter_get_pages(ii, &req->pages[req->num_pages], *nbytesp - nbytes, req->max_pages - req->num_pages, &start); if (ret < 0) - return ret; + break; iov_iter_advance(ii, ret); nbytes += ret; @@ -1288,7 +1295,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, *nbytesp = nbytes; - return 0; + return ret; } static inline int fuse_iter_npages(const struct iov_iter *ii_p) @@ -1312,6 +1319,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; ssize_t res = 0; struct fuse_req *req; + int err = 0; if (io->async) req = fuse_get_req_for_background(fc, fuse_iter_npages(iter)); @@ -1332,11 +1340,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, size_t nres; fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, iter, &nbytes, write); - if (err) { - res = err; + err = fuse_get_user_pages(req, iter, &nbytes, write); + if (err && !nbytes) break; - } if (write) nres = fuse_send_write(req, io, pos, nbytes, owner); @@ -1346,11 +1352,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!io->async) fuse_release_user_pages(req, !write); if (req->out.h.error) { - if (!res) - res = req->out.h.error; + err = req->out.h.error; break; } else if (nres > nbytes) { - res = -EIO; + res = 0; + err = -EIO; break; } count -= nres; @@ -1374,7 +1380,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (res > 0) *ppos = pos; - return res; + return res > 0 ? res : err; } EXPORT_SYMBOL_GPL(fuse_direct_io); @@ -1398,7 +1404,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb->ki_filp); return __fuse_direct_read(&io, to, &iocb->ki_pos); } @@ -1406,7 +1412,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); ssize_t res; if (is_bad_inode(inode)) @@ -2843,6 +2849,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) loff_t i_size; size_t count = iov_iter_count(iter); struct fuse_io_priv *io; + bool is_sync = is_sync_kiocb(iocb); pos = offset; inode = file->f_mapping->host; @@ -2863,6 +2870,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) if (!io) return -ENOMEM; spin_lock_init(&io->lock); + kref_init(&io->refcnt); io->reqs = 1; io->bytes = -1; io->size = 0; @@ -2882,12 +2890,18 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && + if (!is_sync && (offset + count > i_size) && iov_iter_rw(iter) == WRITE) io->async = false; - if (io->async && is_sync_kiocb(iocb)) + if (io->async && is_sync) { + /* + * Additional reference to keep io around after + * calling fuse_aio_complete() + */ + kref_get(&io->refcnt); io->done = &wait; + } if (iov_iter_rw(iter) == WRITE) { ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); @@ -2900,14 +2914,14 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!is_sync_kiocb(iocb)) + if (!is_sync) return -EIOCBQUEUED; wait_for_completion(&wait); ret = fuse_get_res_by_io(io); } - kfree(io); + kref_put(&io->refcnt, fuse_io_release); if (iov_iter_rw(iter) == WRITE) { if (ret > 0) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ce394b5fe6b4..eddbe02c4028 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -22,6 +22,7 @@ #include <linux/rbtree.h> #include <linux/poll.h> #include <linux/workqueue.h> +#include <linux/kref.h> /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -243,6 +244,7 @@ struct fuse_args { /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { + struct kref refcnt; int async; spinlock_t lock; unsigned reqs; @@ -256,6 +258,13 @@ struct fuse_io_priv { struct completion *done; }; +#define FUSE_IO_PRIV_SYNC(f) \ +{ \ + .refcnt = { ATOMIC_INIT(1) }, \ + .async = 0, \ + .file = f, \ +} + /** * Request flags * diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index ddd0138f410c..8bc870e4c467 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -743,7 +743,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) static bool is_aligned_req(struct nfs_pageio_descriptor *pgio, - struct nfs_page *req, unsigned int alignment) + struct nfs_page *req, unsigned int alignment, bool is_write) { /* * Always accept buffered writes, higher layers take care of the @@ -758,7 +758,8 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio, if (IS_ALIGNED(req->wb_bytes, alignment)) return true; - if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) { + if (is_write && + (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) { /* * If the write goes up to the inode size, just write * the full page. Data past the inode size is @@ -775,7 +776,7 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio, static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - if (!is_aligned_req(pgio, req, SECTOR_SIZE)) { + if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) { nfs_pageio_reset_read_mds(pgio); return; } @@ -791,7 +792,7 @@ static size_t bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (!is_aligned_req(pgio, req, SECTOR_SIZE)) + if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) return 0; return pnfs_generic_pg_test(pgio, prev, req); } @@ -824,7 +825,7 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { u64 wb_size; - if (!is_aligned_req(pgio, req, PAGE_SIZE)) { + if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) { nfs_pageio_reset_write_mds(pgio); return; } @@ -846,7 +847,7 @@ static size_t bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (!is_aligned_req(pgio, req, PAGE_SIZE)) + if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) return 0; return pnfs_generic_pg_test(pgio, prev, req); } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index ff8195bd75ea..5fe1cecbf9f0 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -37,10 +37,11 @@ enum nfs4_callback_opnum { OP_CB_ILLEGAL = 10044, }; +struct nfs4_slot; struct cb_process_state { __be32 drc_status; struct nfs_client *clp; - u32 slotid; + struct nfs4_slot *slot; u32 minorversion; struct net *net; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f0939d097406..618ced381a14 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -354,47 +354,38 @@ out: * a single outstanding callback request at a time. */ static __be32 -validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) +validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, + const struct cb_sequenceargs * args) { - struct nfs4_slot *slot; - - dprintk("%s enter. slotid %u seqid %u\n", - __func__, args->csa_slotid, args->csa_sequenceid); + dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n", + __func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr); - if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) + if (args->csa_slotid > tbl->server_highest_slotid) return htonl(NFS4ERR_BADSLOT); - slot = tbl->slots + args->csa_slotid; - dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); - - /* Normal */ - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) - goto out_ok; - /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { dprintk("%s seqid %u is a replay\n", __func__, args->csa_sequenceid); + if (nfs4_test_locked_slot(tbl, slot->slot_nr)) + return htonl(NFS4ERR_DELAY); /* Signal process_op to set this error on next op */ if (args->csa_cachethis == 0) return htonl(NFS4ERR_RETRY_UNCACHED_REP); - /* The ca_maxresponsesize_cached is 0 with no DRC */ - else if (args->csa_cachethis == 1) - return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); + /* Liar! We never allowed you to set csa_cachethis != 0 */ + return htonl(NFS4ERR_SEQ_FALSE_RETRY); } /* Wraparound */ - if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { - slot->seq_nr = 1; - goto out_ok; - } + if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) { + if (args->csa_sequenceid == 1) + return htonl(NFS4_OK); + } else if (likely(args->csa_sequenceid == slot->seq_nr + 1)) + return htonl(NFS4_OK); /* Misordered request */ return htonl(NFS4ERR_SEQ_MISORDERED); -out_ok: - tbl->highest_used_slotid = args->csa_slotid; - return htonl(NFS4_OK); } /* @@ -473,6 +464,12 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, tbl = &clp->cl_session->bc_slot_table; slot = tbl->slots + args->csa_slotid; + /* Set up res before grabbing the spinlock */ + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { @@ -485,18 +482,26 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out_unlock; } - memcpy(&res->csr_sessionid, &args->csa_sessionid, - sizeof(res->csr_sessionid)); - res->csr_sequenceid = args->csa_sequenceid; - res->csr_slotid = args->csa_slotid; - res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + status = htonl(NFS4ERR_BADSLOT); + slot = nfs4_lookup_slot(tbl, args->csa_slotid); + if (IS_ERR(slot)) + goto out_unlock; + + res->csr_highestslotid = tbl->server_highest_slotid; + res->csr_target_highestslotid = tbl->target_highest_slotid; - status = validate_seqid(tbl, args); + status = validate_seqid(tbl, slot, args); if (status) goto out_unlock; + if (!nfs4_try_to_lock_slot(tbl, slot)) { + status = htonl(NFS4ERR_DELAY); + goto out_unlock; + } + cps->slot = slot; - cps->slotid = args->csa_slotid; + /* The ca_maxresponsesize_cached is 0 with no DRC */ + if (args->csa_cachethis != 0) + return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); /* * Check for pending referring calls. If a match is found, a @@ -513,7 +518,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, * If CB_SEQUENCE returns an error, then the state of the slot * (sequence ID, cached reply) MUST NOT change. */ - slot->seq_nr++; + slot->seq_nr = args->csa_sequenceid; out_unlock: spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 646cdac73488..976c90608e56 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -752,7 +752,8 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) return htonl(NFS_OK); } -static void nfs4_callback_free_slot(struct nfs4_session *session) +static void nfs4_callback_free_slot(struct nfs4_session *session, + struct nfs4_slot *slot) { struct nfs4_slot_table *tbl = &session->bc_slot_table; @@ -761,15 +762,17 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid = NFS4_NO_SLOT; + nfs4_free_slot(tbl, slot); nfs4_slot_tbl_drain_complete(tbl); spin_unlock(&tbl->slot_tbl_lock); } static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (cps->slotid != NFS4_NO_SLOT) - nfs4_callback_free_slot(cps->clp->cl_session); + if (cps->slot) { + nfs4_callback_free_slot(cps->clp->cl_session, cps->slot); + cps->slot = NULL; + } } #else /* CONFIG_NFS_V4_1 */ @@ -893,7 +896,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, - .slotid = NFS4_NO_SLOT, .net = SVC_NET(rqstp), }; unsigned int nops = 0; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 748bb813b8ec..89bf093d342a 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); * nfs_file_write() that a write error occurred, and hence cause it to * fall back to doing a synchronous write. */ -int +static int nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { struct nfs_open_context *ctx = nfs_file_open_context(file); @@ -263,9 +263,8 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) out: return ret; } -EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); -static int +int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; @@ -273,13 +272,15 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); - nfs_inode_dio_wait(inode); + inode_dio_wait(inode); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) break; inode_lock(inode); ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret) + ret = pnfs_sync_inode(inode, !!datasync); inode_unlock(inode); /* * If nfs_file_fsync_commit detected a server reboot, then @@ -293,6 +294,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_exit(inode, ret); return ret; } +EXPORT_SYMBOL_GPL(nfs_file_fsync); /* * Decide whether a read/modify/write cycle may be more efficient @@ -368,7 +370,7 @@ start: /* * Wait for O_DIRECT to complete */ - nfs_inode_dio_wait(mapping->host); + inode_dio_wait(mapping->host); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index eb370460ce20..add0e5a70bd6 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -418,6 +418,8 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, pnfs_error_mark_layout_for_return(ino, lseg); } else pnfs_error_mark_layout_for_return(ino, lseg); + ds = NULL; + goto out; } out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 86faecf8f328..33d18c411905 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -141,7 +141,7 @@ void nfs_evict_inode(struct inode *inode) int nfs_sync_inode(struct inode *inode) { - nfs_inode_dio_wait(inode); + inode_dio_wait(inode); return nfs_wb_all(inode); } EXPORT_SYMBOL_GPL(nfs_sync_inode); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9a547aa3ec8e..565f8135ae1f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -358,7 +358,7 @@ int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* file.c */ -int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); +int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); loff_t nfs_file_llseek(struct file *, loff_t, int); ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, @@ -515,10 +515,6 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); -static inline void nfs_inode_dio_wait(struct inode *inode) -{ - inode_dio_wait(inode); -} extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 57ca1c8039c1..22c35abbee9d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -128,37 +128,6 @@ nfs4_file_flush(struct file *file, fl_owner_t id) return vfs_fsync(file, 0); } -static int -nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) -{ - int ret; - struct inode *inode = file_inode(file); - - trace_nfs_fsync_enter(inode); - - nfs_inode_dio_wait(inode); - do { - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret != 0) - break; - inode_lock(inode); - ret = nfs_file_fsync_commit(file, start, end, datasync); - if (!ret) - ret = pnfs_sync_inode(inode, !!datasync); - inode_unlock(inode); - /* - * If nfs_file_fsync_commit detected a server reboot, then - * resend all dirty pages that might have been covered by - * the NFS_CONTEXT_RESEND_WRITES flag - */ - start = 0; - end = LLONG_MAX; - } while (ret == -EAGAIN); - - trace_nfs_fsync_exit(inode, ret); - return ret; -} - #ifdef CONFIG_NFS_V4_2 static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) { @@ -266,7 +235,7 @@ const struct file_operations nfs4_file_operations = { .open = nfs4_file_open, .flush = nfs4_file_flush, .release = nfs_file_release, - .fsync = nfs4_file_fsync, + .fsync = nfs_file_fsync, .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 400a70b3be7b..327b8c34d360 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6783,13 +6783,26 @@ nfs41_same_server_scope(struct nfs41_server_scope *a, return false; } +static void +nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) +{ +} + +static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { + .rpc_call_done = &nfs4_bind_one_conn_to_session_done, +}; + /* - * nfs4_proc_bind_conn_to_session() + * nfs4_proc_bind_one_conn_to_session() * * The 4.1 client currently uses the same TCP connection for the * fore and backchannel. */ -int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred) +static +int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + struct nfs_client *clp, + struct rpc_cred *cred) { int status; struct nfs41_bind_conn_to_session_args args = { @@ -6804,6 +6817,14 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred .rpc_resp = &res, .rpc_cred = cred, }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_xprt = xprt, + .callback_ops = &nfs4_bind_one_conn_to_session_ops, + .rpc_message = &msg, + .flags = RPC_TASK_TIMEOUT, + }; + struct rpc_task *task; dprintk("--> %s\n", __func__); @@ -6811,7 +6832,16 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) args.dir = NFS4_CDFC4_FORE; - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + /* Do not set the backchannel flag unless this is clnt->cl_xprt */ + if (xprt != rcu_access_pointer(clnt->cl_xprt)) + args.dir = NFS4_CDFC4_FORE; + + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) { + status = task->tk_status; + rpc_put_task(task); + } else + status = PTR_ERR(task); trace_nfs4_bind_conn_to_session(clp, status); if (status == 0) { if (memcmp(res.sessionid.data, @@ -6838,6 +6868,31 @@ out: return status; } +struct rpc_bind_conn_calldata { + struct nfs_client *clp; + struct rpc_cred *cred; +}; + +static int +nfs4_proc_bind_conn_to_session_callback(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + void *calldata) +{ + struct rpc_bind_conn_calldata *p = calldata; + + return nfs4_proc_bind_one_conn_to_session(clnt, xprt, p->clp, p->cred); +} + +int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred) +{ + struct rpc_bind_conn_calldata data = { + .clp = clp, + .cred = cred, + }; + return rpc_clnt_iterate_for_each_xprt(clp->cl_rpcclient, + nfs4_proc_bind_conn_to_session_callback, &data); +} + /* * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map * and operations we'd like to see to enable certain features in the allow map @@ -7320,7 +7375,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->bc_attrs.max_resp_sz = PAGE_SIZE; args->bc_attrs.max_resp_sz_cached = 0; args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; - args->bc_attrs.max_reqs = 1; + args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS; dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index e23366effcfb..332d06e64fa9 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -135,6 +135,43 @@ static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, return ERR_PTR(-ENOMEM); } +static void nfs4_lock_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot) +{ + u32 slotid = slot->slot_nr; + + __set_bit(slotid, tbl->used_slots); + if (slotid > tbl->highest_used_slotid || + tbl->highest_used_slotid == NFS4_NO_SLOT) + tbl->highest_used_slotid = slotid; + slot->generation = tbl->generation; +} + +/* + * nfs4_try_to_lock_slot - Given a slot try to allocate it + * + * Note: must be called with the slot_tbl_lock held. + */ +bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) +{ + if (nfs4_test_locked_slot(tbl, slot->slot_nr)) + return false; + nfs4_lock_slot(tbl, slot); + return true; +} + +/* + * nfs4_lookup_slot - Find a slot but don't allocate it + * + * Note: must be called with the slot_tbl_lock held. + */ +struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid) +{ + if (slotid <= tbl->max_slotid) + return nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + return ERR_PTR(-E2BIG); +} + /* * nfs4_alloc_slot - efficiently look for a free slot * @@ -153,18 +190,11 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) __func__, tbl->used_slots[0], tbl->highest_used_slotid, tbl->max_slotid + 1); slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); - if (slotid > tbl->max_slotid) - goto out; - ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); - if (IS_ERR(ret)) - goto out; - __set_bit(slotid, tbl->used_slots); - if (slotid > tbl->highest_used_slotid || - tbl->highest_used_slotid == NFS4_NO_SLOT) - tbl->highest_used_slotid = slotid; - ret->generation = tbl->generation; - -out: + if (slotid <= tbl->max_slotid) { + ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + if (!IS_ERR(ret)) + nfs4_lock_slot(tbl, ret); + } dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, !IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT); diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index e3ea2c5324d6..5b51298d1d03 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -77,6 +77,8 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs, const char *queue); extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); +extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid); +extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, @@ -88,6 +90,12 @@ static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl) return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); } +static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl, + u32 slotid) +{ + return !!test_bit(slotid, tbl->used_slots); +} + #if defined(CONFIG_NFS_V4_1) extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, u32 target_highest_slotid); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 81ac6480f9e7..4aaed890048f 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -606,12 +606,22 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, dprintk("%s: DS %s: trying address %s\n", __func__, ds->ds_remotestr, da->da_remotestr); - clp = get_v3_ds_connect(mds_srv->nfs_client, + if (!IS_ERR(clp)) { + struct xprt_create xprt_args = { + .ident = XPRT_TRANSPORT_TCP, + .net = clp->cl_net, + .dstaddr = (struct sockaddr *)&da->da_addr, + .addrlen = da->da_addrlen, + .servername = clp->cl_hostname, + }; + /* Add this address as an alias */ + rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, + rpc_clnt_test_and_add_xprt, NULL); + } else + clp = get_v3_ds_connect(mds_srv->nfs_client, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP, timeo, retrans, au_flavor); - if (!IS_ERR(clp)) - break; } if (IS_ERR(clp)) { diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index d894e7cd9a86..cc514da6f3e7 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -7,6 +7,7 @@ * the Free Software Foundation. */ +#include <linux/module.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/file.h> @@ -16,10 +17,41 @@ #include <linux/uaccess.h> #include <linux/sched.h> #include <linux/namei.h> +#include <linux/fdtable.h> +#include <linux/ratelimit.h> #include "overlayfs.h" #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) +static bool __read_mostly ovl_check_copy_up; +module_param_named(check_copy_up, ovl_check_copy_up, bool, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ovl_check_copy_up, + "Warn on copy-up when causing process also has a R/O fd open"); + +static int ovl_check_fd(const void *data, struct file *f, unsigned int fd) +{ + const struct dentry *dentry = data; + + if (f->f_inode == d_inode(dentry)) + pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n", + f, fd, current->pid, current->comm); + return 0; +} + +/* + * Check the fds open by this process and warn if something like the following + * scenario is about to occur: + * + * fd1 = open("foo", O_RDONLY); + * fd2 = open("foo", O_RDWR); + */ +static void ovl_do_check_copy_up(struct dentry *dentry) +{ + if (ovl_check_copy_up) + iterate_fd(current->files, 0, ovl_check_fd, dentry); +} + int ovl_copy_xattr(struct dentry *old, struct dentry *new) { ssize_t list_size, size, value_size = 0; @@ -235,6 +267,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (S_ISREG(stat->mode)) { struct path upperpath; + ovl_path_upper(dentry, &upperpath); BUG_ON(upperpath.dentry != NULL); upperpath.dentry = newdentry; @@ -309,6 +342,8 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, if (WARN_ON(!workdir)) return -EROFS; + ovl_do_check_copy_up(lowerpath->dentry); + ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 52f6de5d40a9..b3fc0a35bf62 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -596,21 +596,25 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) { struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *dir = upperdir->d_inode; - struct dentry *upper = ovl_dentry_upper(dentry); + struct dentry *upper; int err; inode_lock_nested(dir, I_MUTEX_PARENT); + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) + goto out_unlock; + err = -ESTALE; - if (upper->d_parent == upperdir) { - /* Don't let d_delete() think it can reset d_inode */ - dget(upper); + if (upper == ovl_dentry_upper(dentry)) { if (is_dir) err = vfs_rmdir(dir, upper); else err = vfs_unlink(dir, upper, NULL); - dput(upper); ovl_dentry_version_inc(dentry->d_parent); } + dput(upper); /* * Keeping this dentry hashed would mean having to release @@ -620,6 +624,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) */ if (!err) d_drop(dentry); +out_unlock: inode_unlock(dir); return err; @@ -714,7 +719,6 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, struct dentry *trap; bool old_opaque; bool new_opaque; - bool new_create = false; bool cleanup_whiteout = false; bool overwrite = !(flags & RENAME_EXCHANGE); bool is_dir = d_is_dir(old); @@ -840,29 +844,38 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, trap = lock_rename(new_upperdir, old_upperdir); - olddentry = ovl_dentry_upper(old); - newdentry = ovl_dentry_upper(new); - if (newdentry) { + + olddentry = lookup_one_len(old->d_name.name, old_upperdir, + old->d_name.len); + err = PTR_ERR(olddentry); + if (IS_ERR(olddentry)) + goto out_unlock; + + err = -ESTALE; + if (olddentry != ovl_dentry_upper(old)) + goto out_dput_old; + + newdentry = lookup_one_len(new->d_name.name, new_upperdir, + new->d_name.len); + err = PTR_ERR(newdentry); + if (IS_ERR(newdentry)) + goto out_dput_old; + + err = -ESTALE; + if (ovl_dentry_upper(new)) { if (opaquedir) { - newdentry = opaquedir; - opaquedir = NULL; + if (newdentry != opaquedir) + goto out_dput; } else { - dget(newdentry); + if (newdentry != ovl_dentry_upper(new)) + goto out_dput; } } else { - new_create = true; - newdentry = lookup_one_len(new->d_name.name, new_upperdir, - new->d_name.len); - err = PTR_ERR(newdentry); - if (IS_ERR(newdentry)) - goto out_unlock; + if (!d_is_negative(newdentry) && + (!new_opaque || !ovl_is_whiteout(newdentry))) + goto out_dput; } - err = -ESTALE; - if (olddentry->d_parent != old_upperdir) - goto out_dput; - if (newdentry->d_parent != new_upperdir) - goto out_dput; if (olddentry == trap) goto out_dput; if (newdentry == trap) @@ -925,6 +938,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, out_dput: dput(newdentry); +out_dput_old: + dput(olddentry); out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 99b4168c36ff..6a7090f4a441 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -166,6 +166,7 @@ extern const struct file_operations ovl_dir_operations; int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); void ovl_cache_free(struct list_head *list); +int ovl_check_d_type_supported(struct path *realpath); /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index fdaf28f75e12..6ec1e43a9a54 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -36,13 +36,14 @@ struct ovl_dir_cache { struct ovl_readdir_data { struct dir_context ctx; - bool is_merge; + bool is_lowest; struct rb_root root; struct list_head *list; struct list_head middle; struct ovl_cache_entry *first_maybe_whiteout; int count; int err; + bool d_type_supported; }; struct ovl_dir_file { @@ -139,9 +140,9 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; } -static int ovl_fill_lower(struct ovl_readdir_data *rdd, - const char *name, int namelen, - loff_t offset, u64 ino, unsigned int d_type) +static int ovl_fill_lowest(struct ovl_readdir_data *rdd, + const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) { struct ovl_cache_entry *p; @@ -193,10 +194,10 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, container_of(ctx, struct ovl_readdir_data, ctx); rdd->count++; - if (!rdd->is_merge) + if (!rdd->is_lowest) return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); else - return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); + return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); } static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) @@ -289,7 +290,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) .ctx.actor = ovl_fill_merge, .list = list, .root = RB_ROOT, - .is_merge = false, + .is_lowest = false, }; int idx, next; @@ -306,7 +307,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) * allows offsets to be reasonably constant */ list_add(&rdd.middle, rdd.list); - rdd.is_merge = true; + rdd.is_lowest = true; err = ovl_dir_read(&realpath, &rdd); list_del(&rdd.middle); } @@ -577,3 +578,39 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) } inode_unlock(upper->d_inode); } + +static int ovl_check_d_type(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct ovl_readdir_data *rdd = + container_of(ctx, struct ovl_readdir_data, ctx); + + /* Even if d_type is not supported, DT_DIR is returned for . and .. */ + if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) + return 0; + + if (d_type != DT_UNKNOWN) + rdd->d_type_supported = true; + + return 0; +} + +/* + * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values + * if error is encountered. + */ +int ovl_check_d_type_supported(struct path *realpath) +{ + int err; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_check_d_type, + .d_type_supported = false, + }; + + err = ovl_dir_read(realpath, &rdd); + if (err) + return err; + + return rdd.d_type_supported; +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 619ad4b016d2..ef64984c9bbc 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -936,7 +936,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; if (!ufs->config.lowerdir) { - pr_err("overlayfs: missing 'lowerdir'\n"); + if (!silent) + pr_err("overlayfs: missing 'lowerdir'\n"); goto out_free_config; } @@ -1028,6 +1029,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY; ufs->workdir = NULL; } + + /* + * Upper should support d_type, else whiteouts are visible. + * Given workdir and upper are on same fs, we can do + * iterate_dir() on workdir. + */ + err = ovl_check_d_type_supported(&workpath); + if (err < 0) + goto out_put_workdir; + + if (!err) { + pr_err("overlayfs: upper fs needs to support d_type.\n"); + err = -EINVAL; + goto out_put_workdir; + } } err = -ENOMEM; |