From 8b41e6715ed555e2d8e8dac52ec1f05a9f04dcb4 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:04 +0400 Subject: fuse: make request allocations for background processing explicit There are two types of processing requests in FUSE: synchronous (via fuse_request_send()) and asynchronous (via adding to fc->bg_queue). Fortunately, the type of processing is always known in advance, at the time of request allocation. This preparatory patch utilizes this fact making fuse_get_req() aware about the type. Next patches will use it. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 2 +- fs/fuse/dev.c | 20 ++++++++++++++++++-- fs/fuse/file.c | 16 ++++++++++++++-- fs/fuse/fuse_i.h | 2 ++ fs/fuse/inode.c | 2 ++ 5 files changed, 37 insertions(+), 5 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 6f96a8def147..b7c7f3060635 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -422,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc) BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); - req = fuse_get_req(fc, 1); + req = fuse_get_req_for_background(fc, 1); if (IS_ERR(req)) { rc = PTR_ERR(req); goto err; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 11dfa0c3fb46..3b8301f5c0e1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -130,7 +130,8 @@ static void fuse_req_init_context(struct fuse_req *req) req->in.h.pid = current->pid; } -struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) +static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, + bool for_background) { struct fuse_req *req; sigset_t oldset; @@ -156,14 +157,27 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) fuse_req_init_context(req); req->waiting = 1; + req->background = for_background; return req; out: atomic_dec(&fc->num_waiting); return ERR_PTR(err); } + +struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages) +{ + return __fuse_get_req(fc, npages, false); +} EXPORT_SYMBOL_GPL(fuse_get_req); +struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, + unsigned npages) +{ + return __fuse_get_req(fc, npages, true); +} +EXPORT_SYMBOL_GPL(fuse_get_req_for_background); + /* * Return request in fuse_file->reserved_req. However that may * currently be in use. If that is the case, wait for it to become @@ -232,6 +246,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, fuse_req_init_context(req); req->waiting = 1; + req->background = 0; return req; } @@ -442,6 +457,7 @@ __acquires(fc->lock) static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { + BUG_ON(req->background); spin_lock(&fc->lock); if (!fc->connected) req->out.h.error = -ENOTCONN; @@ -469,7 +485,7 @@ EXPORT_SYMBOL_GPL(fuse_request_send); static void fuse_request_send_nowait_locked(struct fuse_conn *fc, struct fuse_req *req) { - req->background = 1; + BUG_ON(!req->background); fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 34b80ba95bad..4884790b9639 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -126,11 +126,13 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) struct fuse_req *req = ff->reserved_req; if (sync) { + req->background = 0; fuse_request_send(ff->fc, req); path_put(&req->misc.release.path); fuse_put_request(ff->fc, req); } else { req->end = fuse_release_end; + req->background = 1; fuse_request_send_background(ff->fc, req); } kfree(ff); @@ -282,6 +284,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags) WARN_ON(atomic_read(&ff->count) > 1); fuse_prepare_release(ff, flags, FUSE_RELEASE); ff->reserved_req->force = 1; + ff->reserved_req->background = 0; fuse_request_send(ff->fc, ff->reserved_req); fuse_put_request(ff->fc, ff->reserved_req); kfree(ff); @@ -661,7 +664,12 @@ static int fuse_readpages_fill(void *_data, struct page *page) int nr_alloc = min_t(unsigned, data->nr_pages, FUSE_MAX_PAGES_PER_REQ); fuse_send_readpages(req, data->file); - data->req = req = fuse_get_req(fc, nr_alloc); + if (fc->async_read) + req = fuse_get_req_for_background(fc, nr_alloc); + else + req = fuse_get_req(fc, nr_alloc); + + data->req = req; if (IS_ERR(req)) { unlock_page(page); return PTR_ERR(req); @@ -696,7 +704,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, data.file = file; data.inode = inode; - data.req = fuse_get_req(fc, nr_alloc); + if (fc->async_read) + data.req = fuse_get_req_for_background(fc, nr_alloc); + else + data.req = fuse_get_req(fc, nr_alloc); data.nr_pages = nr_pages; err = PTR_ERR(data.req); if (IS_ERR(data.req)) @@ -1375,6 +1386,7 @@ static int fuse_writepage_locked(struct page *page) if (!req) goto err; + req->background = 1; /* writeback always goes to bg_queue */ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!tmp_page) goto err_free; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6aeba864f070..47c94d28ff88 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -708,6 +708,8 @@ void fuse_request_free(struct fuse_req *req); * caller should specify # elements in req->pages[] explicitly */ struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); +struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, + unsigned npages); /** * Get a request, may fail with -ENOMEM, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 137185c3884f..e26607df93df 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -346,6 +346,7 @@ static void fuse_send_destroy(struct fuse_conn *fc) fc->destroy_req = NULL; req->in.h.opcode = FUSE_DESTROY; req->force = 1; + req->background = 0; fuse_request_send(fc, req); fuse_put_request(fc, req); } @@ -1043,6 +1044,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) init_req = fuse_request_alloc(0); if (!init_req) goto err_put_root; + init_req->background = 1; if (is_bdev) { fc->destroy_req = fuse_request_alloc(0); -- cgit v1.2.3 From 796523fb24028639c007f71e02ca21730f7c0af6 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:15 +0400 Subject: fuse: add flag fc->initialized Existing flag fc->blocked is used to suspend request allocation both in case of many background request submitted and period of time before init_reply arrives from userspace. Next patch will skip blocking allocations of synchronous request (disregarding fc->blocked). This is mostly OK, but we still need to suspend allocations if init_reply is not arrived yet. The patch introduces flag fc->initialized which will serve this purpose. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 1 + fs/fuse/dev.c | 1 + fs/fuse/fuse_i.h | 4 ++++ fs/fuse/inode.c | 3 +++ 4 files changed, 9 insertions(+) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b7c7f3060635..f563e78852b7 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -505,6 +505,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) cc->fc.connected = 1; cc->fc.blocked = 0; + cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { fuse_conn_put(&cc->fc); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3b8301f5c0e1..39c2fe3ba93d 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2087,6 +2087,7 @@ void fuse_abort_conn(struct fuse_conn *fc) if (fc->connected) { fc->connected = 0; fc->blocked = 0; + fc->initialized = 1; end_io_requests(fc); end_queued_requests(fc); end_polls(fc); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 47c94d28ff88..6bf30f2af901 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -417,6 +417,10 @@ struct fuse_conn { /** Batching of FORGET requests (positive indicates FORGET batch) */ int forget_batch; + /** Flag indicating that INIT reply has been received. Allocating + * any fuse request will be suspended until the flag is set */ + int initialized; + /** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e26607df93df..4958f8099f16 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -363,6 +363,7 @@ void fuse_conn_kill(struct fuse_conn *fc) spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; + fc->initialized = 1; spin_unlock(&fc->lock); /* Flush all readers on this fs */ kill_fasync(&fc->fasync, SIGIO, POLL_IN); @@ -583,6 +584,7 @@ void fuse_conn_init(struct fuse_conn *fc) fc->polled_files = RB_ROOT; fc->reqctr = 0; fc->blocked = 1; + fc->initialized = 0; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } @@ -882,6 +884,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->conn_init = 1; } fc->blocked = 0; + fc->initialized = 1; wake_up_all(&fc->blocked_waitq); } -- cgit v1.2.3 From 0aada88476a33690c9569b094191ce92a38e6541 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:28 +0400 Subject: fuse: skip blocking on allocations of synchronous requests A task may have at most one synchronous request allocated. So these requests need not be otherwise limited. The patch re-works fuse_get_req() to follow this idea. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 1 - fs/fuse/dev.c | 30 ++++++++++++++++++++---------- fs/fuse/inode.c | 3 +-- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index f563e78852b7..c59c097eb2e9 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -504,7 +504,6 @@ static int cuse_channel_open(struct inode *inode, struct file *file) cc->fc.release = cuse_fc_release; cc->fc.connected = 1; - cc->fc.blocked = 0; cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 39c2fe3ba93d..d692b85115bd 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -130,21 +130,30 @@ static void fuse_req_init_context(struct fuse_req *req) req->in.h.pid = current->pid; } +static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) +{ + return !fc->initialized || (for_background && fc->blocked); +} + static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, bool for_background) { struct fuse_req *req; - sigset_t oldset; - int intr; int err; - atomic_inc(&fc->num_waiting); - block_sigs(&oldset); - intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked); - restore_sigs(&oldset); - err = -EINTR; - if (intr) - goto out; + + if (fuse_block_alloc(fc, for_background)) { + sigset_t oldset; + int intr; + + block_sigs(&oldset); + intr = wait_event_interruptible(fc->blocked_waitq, + !fuse_block_alloc(fc, for_background)); + restore_sigs(&oldset); + err = -EINTR; + if (intr) + goto out; + } err = -ENOTCONN; if (!fc->connected) @@ -239,7 +248,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, struct fuse_req *req; atomic_inc(&fc->num_waiting); - wait_event(fc->blocked_waitq, !fc->blocked); + wait_event(fc->blocked_waitq, fc->initialized); req = fuse_request_alloc(0); if (!req) req = get_reserved_req(fc, file); @@ -2106,6 +2115,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; + fc->initialized = 1; end_queued_requests(fc); end_polls(fc); wake_up_all(&fc->blocked_waitq); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4958f8099f16..17ec1f70524d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -583,7 +583,7 @@ void fuse_conn_init(struct fuse_conn *fc) fc->khctr = 0; fc->polled_files = RB_ROOT; fc->reqctr = 0; - fc->blocked = 1; + fc->blocked = 0; fc->initialized = 0; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); @@ -883,7 +883,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } - fc->blocked = 0; fc->initialized = 1; wake_up_all(&fc->blocked_waitq); } -- cgit v1.2.3 From 722d2bea8c601d0744e4a37170533fdf6214a678 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 21 Mar 2013 18:02:36 +0400 Subject: fuse: implement exclusive wakeup for blocked_waitq The patch solves thundering herd problem. So far as previous patches ensured that only allocations for background may block, it's safe to wake up one waiter. Whoever it is, it will wake up another one in request_end() afterwards. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d692b85115bd..367310588962 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -147,7 +147,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, int intr; block_sigs(&oldset); - intr = wait_event_interruptible(fc->blocked_waitq, + intr = wait_event_interruptible_exclusive(fc->blocked_waitq, !fuse_block_alloc(fc, for_background)); restore_sigs(&oldset); err = -EINTR; @@ -161,8 +161,11 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, req = fuse_request_alloc(npages); err = -ENOMEM; - if (!req) + if (!req) { + if (for_background) + wake_up(&fc->blocked_waitq); goto out; + } fuse_req_init_context(req); req->waiting = 1; @@ -262,6 +265,17 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { if (atomic_dec_and_test(&req->count)) { + if (unlikely(req->background)) { + /* + * We get here in the unlikely case that a background + * request was allocated but not sent + */ + spin_lock(&fc->lock); + if (!fc->blocked) + wake_up(&fc->blocked_waitq); + spin_unlock(&fc->lock); + } + if (req->waiting) atomic_dec(&fc->num_waiting); @@ -359,10 +373,15 @@ __releases(fc->lock) list_del(&req->intr_entry); req->state = FUSE_REQ_FINISHED; if (req->background) { - if (fc->num_background == fc->max_background) { + req->background = 0; + + if (fc->num_background == fc->max_background) fc->blocked = 0; - wake_up_all(&fc->blocked_waitq); - } + + /* Wake up next waiter, if any */ + if (!fc->blocked) + wake_up(&fc->blocked_waitq); + if (fc->num_background == fc->congestion_threshold && fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); -- cgit v1.2.3 From 3c18ef8117f0515b1e455713dfc2e18b06db9bb5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 17 Apr 2013 21:50:58 +0200 Subject: fuse: optimize wake_up Normally blocked_waitq will be inactive, so optimize this case. Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 367310588962..be5c7e13320c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -379,7 +379,7 @@ __releases(fc->lock) fc->blocked = 0; /* Wake up next waiter, if any */ - if (!fc->blocked) + if (!fc->blocked && waitqueue_active(&fc->blocked_waitq)) wake_up(&fc->blocked_waitq); if (fc->num_background == fc->congestion_threshold && -- cgit v1.2.3 From 187c5c36330bc8d15674d9e6d2a2412de6b1034d Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:20:25 +0400 Subject: fuse: move fuse_release_user_pages() up fuse_release_user_pages() will be indirectly used by fuse_send_read/write in future patches. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4884790b9639..1ee2fc92bc3e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -494,6 +494,18 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, req->out.args[0].size = count; } +static void fuse_release_user_pages(struct fuse_req *req, int write) +{ + unsigned i; + + for (i = 0; i < req->num_pages; i++) { + struct page *page = req->pages[i]; + if (write) + set_page_dirty_lock(page); + put_page(page); + } +} + static size_t fuse_send_read(struct fuse_req *req, struct file *file, loff_t pos, size_t count, fl_owner_t owner) { @@ -1046,18 +1058,6 @@ out: return written ? written : err; } -static void fuse_release_user_pages(struct fuse_req *req, int write) -{ - unsigned i; - - for (i = 0; i < req->num_pages; i++) { - struct page *page = req->pages[i]; - if (write) - set_page_dirty_lock(page); - put_page(page); - } -} - static inline void fuse_page_descs_length_init(struct fuse_req *req, unsigned index, unsigned nr_pages) { -- cgit v1.2.3 From 01e9d11a3e79035ca5cd89b035435acd4ba61ee1 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:20:41 +0400 Subject: fuse: add support of async IO The patch implements a framework to process an IO request asynchronously. The idea is to associate several fuse requests with a single kiocb by means of fuse_io_priv structure. The structure plays the same role for FUSE as 'struct dio' for direct-io.c. The framework is supposed to be used like this: - someone (who wants to process an IO asynchronously) allocates fuse_io_priv and initializes it setting 'async' field to non-zero value. - as soon as fuse request is filled, it can be submitted (in non-blocking way) by fuse_async_req_send() - when all submitted requests are ACKed by userspace, io->reqs drops to zero triggering aio_complete() In case of IO initiated by libaio, aio_complete() will finish processing the same way as in case of dio_complete() calling aio_complete(). But the framework may be also used for internal FUSE use when initial IO request was synchronous (from user perspective), but it's beneficial to process it asynchronously. Then the caller should wait on kiocb explicitly and aio_complete() will wake the caller up. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 17 +++++++++++ 2 files changed, 109 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1ee2fc92bc3e..4002889fbcc1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -506,6 +506,98 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +/** + * In case of short read, the caller sets 'pos' to the position of + * actual end of fuse request in IO request. Otherwise, if bytes_requested + * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1. + * + * An example: + * User requested DIO read of 64K. It was splitted into two 32K fuse requests, + * both submitted asynchronously. The first of them was ACKed by userspace as + * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The + * second request was ACKed as short, e.g. only 1K was read, resulting in + * pos == 33K. + * + * Thus, when all fuse requests are completed, the minimal non-negative 'pos' + * will be equal to the length of the longest contiguous fragment of + * transferred data starting from the beginning of IO request. + */ +static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) +{ + int left; + + spin_lock(&io->lock); + if (err) + io->err = io->err ? : err; + else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes)) + io->bytes = pos; + + left = --io->reqs; + spin_unlock(&io->lock); + + if (!left) { + long res; + + if (io->err) + res = io->err; + else if (io->bytes >= 0 && io->write) + res = -EIO; + else { + res = io->bytes < 0 ? io->size : io->bytes; + + if (!is_sync_kiocb(io->iocb)) { + struct path *path = &io->iocb->ki_filp->f_path; + struct inode *inode = path->dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + spin_unlock(&fc->lock); + } + } + + aio_complete(io->iocb, res, 0); + kfree(io); + } +} + +static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) +{ + struct fuse_io_priv *io = req->io; + ssize_t pos = -1; + + fuse_release_user_pages(req, !io->write); + + if (io->write) { + if (req->misc.write.in.size != req->misc.write.out.size) + pos = req->misc.write.in.offset - io->offset + + req->misc.write.out.size; + } else { + if (req->misc.read.in.size != req->out.args[0].size) + pos = req->misc.read.in.offset - io->offset + + req->out.args[0].size; + } + + fuse_aio_complete(io, req->out.h.error, pos); +} + +static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, + size_t num_bytes, struct fuse_io_priv *io) +{ + spin_lock(&io->lock); + io->size += num_bytes; + io->reqs++; + spin_unlock(&io->lock); + + req->io = io; + req->end = fuse_aio_complete_req; + + fuse_request_send_background(fc, req); + + return num_bytes; +} + static size_t fuse_send_read(struct fuse_req *req, struct file *file, loff_t pos, size_t count, fl_owner_t owner) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6bf30f2af901..aea072413c47 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -228,6 +228,20 @@ enum fuse_req_state { FUSE_REQ_FINISHED }; +/** The request IO state (for asynchronous processing) */ +struct fuse_io_priv { + int async; + spinlock_t lock; + unsigned reqs; + ssize_t bytes; + size_t size; + __u64 offset; + bool write; + int err; + struct kiocb *iocb; + struct file *file; +}; + /** * A request to the client */ @@ -332,6 +346,9 @@ struct fuse_req { /** Inode used in the request or NULL */ struct inode *inode; + /** AIO control block */ + struct fuse_io_priv *io; + /** Link on fi->writepages */ struct list_head writepages_entry; -- cgit v1.2.3 From 36cf66ed9f871fc0d0911921fba5873df3ddb2dc Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:20:51 +0400 Subject: fuse: make fuse_direct_io() aware about AIO The patch implements passing "struct fuse_io_priv *io" down the stack up to fuse_send_read/write where it is used to submit request asynchronously. io->async==0 designates synchronous processing. Non-trivial part of the patch is changes in fuse_direct_io(): resources like fuse requests and user pages cannot be released immediately in async case. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 6 ++++-- fs/fuse/dev.c | 2 +- fs/fuse/file.c | 64 +++++++++++++++++++++++++++++++++++++++++--------------- fs/fuse/fuse_i.h | 7 ++++++- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index c59c097eb2e9..b3aaf7b3578b 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -92,8 +92,9 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, { loff_t pos = 0; struct iovec iov = { .iov_base = buf, .iov_len = count }; + struct fuse_io_priv io = { .async = 0, .file = file }; - return fuse_direct_io(file, &iov, 1, count, &pos, 0); + return fuse_direct_io(&io, &iov, 1, count, &pos, 0); } static ssize_t cuse_write(struct file *file, const char __user *buf, @@ -101,12 +102,13 @@ static ssize_t cuse_write(struct file *file, const char __user *buf, { loff_t pos = 0; struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; + struct fuse_io_priv io = { .async = 0, .file = file }; /* * No locking or generic_write_checks(), the server is * responsible for locking and sanity checks. */ - return fuse_direct_io(file, &iov, 1, count, &pos, 1); + return fuse_direct_io(&io, &iov, 1, count, &pos, 1); } static int cuse_open(struct inode *inode, struct file *file) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index be5c7e13320c..07bdb14ae7a3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -111,7 +111,7 @@ static void restore_sigs(sigset_t *oldset) sigprocmask(SIG_SETMASK, oldset, NULL); } -static void __fuse_get_request(struct fuse_req *req) +void __fuse_get_request(struct fuse_req *req) { atomic_inc(&req->count); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4002889fbcc1..e207dcdf32c0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -593,14 +593,16 @@ static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, req->io = io; req->end = fuse_aio_complete_req; + __fuse_get_request(req); fuse_request_send_background(fc, req); return num_bytes; } -static size_t fuse_send_read(struct fuse_req *req, struct file *file, +static size_t fuse_send_read(struct fuse_req *req, struct fuse_io_priv *io, loff_t pos, size_t count, fl_owner_t owner) { + struct file *file = io->file; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -611,6 +613,10 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file, inarg->read_flags |= FUSE_READ_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } + + if (io->async) + return fuse_async_req_send(fc, req, count, io); + fuse_request_send(fc, req); return req->out.args[0].size; } @@ -631,6 +637,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, static int fuse_readpage(struct file *file, struct page *page) { + struct fuse_io_priv io = { .async = 0, .file = file }; struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; @@ -663,7 +670,7 @@ static int fuse_readpage(struct file *file, struct page *page) req->num_pages = 1; req->pages[0] = page; req->page_descs[0].length = count; - num_read = fuse_send_read(req, file, pos, count, NULL); + num_read = fuse_send_read(req, &io, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); @@ -873,9 +880,10 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, req->out.args[0].value = outarg; } -static size_t fuse_send_write(struct fuse_req *req, struct file *file, +static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, loff_t pos, size_t count, fl_owner_t owner) { + struct file *file = io->file; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; struct fuse_write_in *inarg = &req->misc.write.in; @@ -886,6 +894,10 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } + + if (io->async) + return fuse_async_req_send(fc, req, count, io); + fuse_request_send(fc, req); return req->misc.write.out.size; } @@ -909,11 +921,12 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, size_t res; unsigned offset; unsigned i; + struct fuse_io_priv io = { .async = 0, .file = file }; for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); - res = fuse_send_write(req, file, pos, count, NULL); + res = fuse_send_write(req, &io, pos, count, NULL); offset = req->page_descs[0].offset; count = res; @@ -1251,10 +1264,11 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p) return min(npages, FUSE_MAX_PAGES_PER_REQ); } -ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, +ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, unsigned long nr_segs, size_t count, loff_t *ppos, int write) { + struct file *file = io->file; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; size_t nmax = write ? fc->max_write : fc->max_read; @@ -1280,11 +1294,12 @@ ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, } if (write) - nres = fuse_send_write(req, file, pos, nbytes, owner); + nres = fuse_send_write(req, io, pos, nbytes, owner); else - nres = fuse_send_read(req, file, pos, nbytes, owner); + nres = fuse_send_read(req, io, pos, nbytes, owner); - fuse_release_user_pages(req, !write); + if (!io->async) + fuse_release_user_pages(req, !write); if (req->out.h.error) { if (!res) res = req->out.h.error; @@ -1314,16 +1329,18 @@ ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, } EXPORT_SYMBOL_GPL(fuse_direct_io); -static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, +static ssize_t __fuse_direct_read(struct fuse_io_priv *io, + const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { ssize_t res; + struct file *file = io->file; struct inode *inode = file_inode(file); if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs), + res = fuse_direct_io(io, iov, nr_segs, iov_length(iov, nr_segs), ppos, 0); fuse_invalidate_attr(inode); @@ -1334,21 +1351,24 @@ static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov, static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct fuse_io_priv io = { .async = 0, .file = file }; struct iovec iov = { .iov_base = buf, .iov_len = count }; - return __fuse_direct_read(file, &iov, 1, ppos); + return __fuse_direct_read(&io, &iov, 1, ppos); } -static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov, +static ssize_t __fuse_direct_write(struct fuse_io_priv *io, + const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { + struct file *file = io->file; struct inode *inode = file_inode(file); size_t count = iov_length(iov, nr_segs); ssize_t res; res = generic_write_checks(file, ppos, &count, 0); if (!res) { - res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1); - if (res > 0) + res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); + if (!io->async && res > 0) fuse_write_update_size(inode, *ppos); } @@ -1363,13 +1383,14 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; struct inode *inode = file_inode(file); ssize_t res; + struct fuse_io_priv io = { .async = 0, .file = file }; if (is_bad_inode(inode)) return -EIO; /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = __fuse_direct_write(file, &iov, 1, ppos); + res = __fuse_direct_write(&io, &iov, 1, ppos); mutex_unlock(&inode->i_mutex); return res; @@ -2339,14 +2360,23 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ssize_t ret = 0; struct file *file = NULL; loff_t pos = 0; + struct fuse_io_priv *io; file = iocb->ki_filp; pos = offset; + io = kzalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); + if (!io) + return -ENOMEM; + + io->file = file; + if (rw == WRITE) - ret = __fuse_direct_write(file, iov, nr_segs, &pos); + ret = __fuse_direct_write(io, iov, nr_segs, &pos); else - ret = __fuse_direct_read(file, iov, nr_segs, &pos); + ret = __fuse_direct_read(io, iov, nr_segs, &pos); + + kfree(io); return ret; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aea072413c47..337169a406c9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -732,6 +732,11 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages); struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc, unsigned npages); +/* + * Increment reference count on request + */ +void __fuse_get_request(struct fuse_req *req); + /** * Get a request, may fail with -ENOMEM, * useful for callers who doesn't use req->pages[] @@ -846,7 +851,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); -ssize_t fuse_direct_io(struct file *file, const struct iovec *iov, +ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, unsigned long nr_segs, size_t count, loff_t *ppos, int write); long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, -- cgit v1.2.3 From bcba24ccdc82f7415154cf87226c2577cea13a5c Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:21:08 +0400 Subject: fuse: enable asynchronous processing direct IO In case of synchronous DIO request (i.e. read(2) or write(2) for a file opened with O_DIRECT), the patch submits fuse requests asynchronously, but waits for their completions before return from fuse_direct_IO(). In case of asynchronous DIO request (i.e. libaio io_submit() or a file opened with O_DIRECT), the patch submits fuse requests asynchronously and return -EIOCBQUEUED immediately. The only special case is async DIO extending file. Here the patch falls back to old behaviour because we can't return -EIOCBQUEUED and update i_size later, without i_mutex hold. And we have no method to wait on real async I/O requests. The patch also clean __fuse_direct_write() up: it's better to update i_size in its callers. Thanks Brian for suggestion. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 51 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e207dcdf32c0..ba1d50369c24 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1366,11 +1366,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io, ssize_t res; res = generic_write_checks(file, ppos, &count, 0); - if (!res) { + if (!res) res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); - if (!io->async && res > 0) - fuse_write_update_size(inode, *ppos); - } fuse_invalidate_attr(inode); @@ -1391,6 +1388,8 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); res = __fuse_direct_write(&io, &iov, 1, ppos); + if (res > 0) + fuse_write_update_size(inode, *ppos); mutex_unlock(&inode->i_mutex); return res; @@ -2360,23 +2359,61 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ssize_t ret = 0; struct file *file = NULL; loff_t pos = 0; + struct inode *inode; + loff_t i_size; + size_t count = iov_length(iov, nr_segs); struct fuse_io_priv *io; file = iocb->ki_filp; pos = offset; + inode = file->f_mapping->host; + i_size = i_size_read(inode); - io = kzalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); + io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); if (!io) return -ENOMEM; - + spin_lock_init(&io->lock); + io->reqs = 1; + io->bytes = -1; + io->size = 0; + io->offset = offset; + io->write = (rw == WRITE); + io->err = 0; io->file = file; + /* + * By default, we want to optimize all I/Os with async request + * submission to the client filesystem. + */ + io->async = 1; + io->iocb = iocb; + + /* + * We cannot asynchronously extend the size of a file. We have no method + * to wait on real async I/O requests, so we must submit this request + * synchronously. + */ + if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) + io->async = false; if (rw == WRITE) ret = __fuse_direct_write(io, iov, nr_segs, &pos); else ret = __fuse_direct_read(io, iov, nr_segs, &pos); - kfree(io); + if (io->async) { + fuse_aio_complete(io, ret < 0 ? ret : 0, -1); + + /* we have a non-extending, async request, so return */ + if (ret > 0 && !is_sync_kiocb(iocb)) + return -EIOCBQUEUED; + + ret = wait_on_sync_kiocb(iocb); + } else { + kfree(io); + } + + if (rw == WRITE && ret > 0) + fuse_write_update_size(inode, pos); return ret; } -- cgit v1.2.3 From 439ee5f0c5080d4fd15fda0c5bbee1fb3a57894e Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 14 Dec 2012 19:21:26 +0400 Subject: fuse: optimize short direct reads If user requested direct read beyond EOF, we can skip sending fuse requests for positions beyond EOF because userspace would ACK them with zero bytes read anyway. We can trust to i_size in fuse_direct_IO for such cases because it's called from fuse_file_aio_read() and the latter updates fuse attributes including i_size. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ba1d50369c24..8f39f7b8cef2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1331,7 +1331,8 @@ EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t __fuse_direct_read(struct fuse_io_priv *io, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) + unsigned long nr_segs, loff_t *ppos, + size_t count) { ssize_t res; struct file *file = io->file; @@ -1340,8 +1341,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(io, iov, nr_segs, iov_length(iov, nr_segs), - ppos, 0); + res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0); fuse_invalidate_attr(inode); @@ -1353,7 +1353,7 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, { struct fuse_io_priv io = { .async = 0, .file = file }; struct iovec iov = { .iov_base = buf, .iov_len = count }; - return __fuse_direct_read(&io, &iov, 1, ppos); + return __fuse_direct_read(&io, &iov, 1, ppos, count); } static ssize_t __fuse_direct_write(struct fuse_io_priv *io, @@ -2369,6 +2369,13 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, inode = file->f_mapping->host; i_size = i_size_read(inode); + /* optimization for short read */ + if (rw != WRITE && offset + count > i_size) { + if (offset >= i_size) + return 0; + count = i_size - offset; + } + io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); if (!io) return -ENOMEM; @@ -2392,13 +2399,13 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) + if (!is_sync_kiocb(iocb) && (offset + count > i_size)) io->async = false; if (rw == WRITE) ret = __fuse_direct_write(io, iov, nr_segs, &pos); else - ret = __fuse_direct_read(io, iov, nr_segs, &pos); + ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); if (io->async) { fuse_aio_complete(io, ret < 0 ? ret : 0, -1); -- cgit v1.2.3 From efb9fa9e911b23c7ea5330215bda778a7c69dba8 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Tue, 18 Dec 2012 14:05:08 +0400 Subject: fuse: truncate file if async dio failed The patch improves error handling in fuse_direct_IO(): if we successfully submitted several fuse requests on behalf of synchronous direct write extending file and some of them failed, let's try to do our best to clean-up. Changed in v2: reuse fuse_do_setattr(). Thanks to Brian for suggestion. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 17 +++++++++-------- fs/fuse/file.c | 22 ++++++++++++++++++++-- fs/fuse/fuse_i.h | 3 +++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ff15522481d4..254df56b847b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1562,10 +1562,9 @@ void fuse_release_nowrite(struct inode *inode) * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ -static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, - struct file *file) +int fuse_do_setattr(struct inode *inode, struct iattr *attr, + struct file *file) { - struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; struct fuse_setattr_in inarg; @@ -1574,9 +1573,6 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, loff_t oldsize; int err; - if (!fuse_allow_current_process(fc)) - return -EACCES; - if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) attr->ia_valid |= ATTR_FORCE; @@ -1671,10 +1667,15 @@ error: static int fuse_setattr(struct dentry *entry, struct iattr *attr) { + struct inode *inode = entry->d_inode; + + if (!fuse_allow_current_process(get_fuse_conn(inode))) + return -EACCES; + if (attr->ia_valid & ATTR_FILE) - return fuse_do_setattr(entry, attr, attr->ia_file); + return fuse_do_setattr(inode, attr, attr->ia_file); else - return fuse_do_setattr(entry, attr, NULL); + return fuse_do_setattr(inode, attr, NULL); } static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8f39f7b8cef2..1f8e3d60dc07 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2352,6 +2352,20 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, return 0; } +static void fuse_do_truncate(struct file *file) +{ + struct inode *inode = file->f_mapping->host; + struct iattr attr; + + attr.ia_valid = ATTR_SIZE; + attr.ia_size = i_size_read(inode); + + attr.ia_file = file; + attr.ia_valid |= ATTR_FILE; + + fuse_do_setattr(inode, &attr, file); +} + static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -2419,8 +2433,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, kfree(io); } - if (rw == WRITE && ret > 0) - fuse_write_update_size(inode, pos); + if (rw == WRITE) { + if (ret > 0) + fuse_write_update_size(inode, pos); + else if (ret < 0 && offset + count > i_size) + fuse_do_truncate(file); + } return ret; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 337169a406c9..53b830e3b38f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -863,4 +863,7 @@ int fuse_dev_release(struct inode *inode, struct file *file); void fuse_write_update_size(struct inode *inode, loff_t pos); +int fuse_do_setattr(struct inode *inode, struct iattr *attr, + struct file *file); + #endif /* _FS_FUSE_I_H */ -- cgit v1.2.3 From 60b9df7a54804a965850db00beec4d3a2c002536 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 May 2013 14:37:21 +0200 Subject: fuse: add flag to turn on async direct IO Without async DIO write requests to a single file were always serialized. With async DIO that's no longer the case. So don't turn on async DIO by default for fear of breaking backward compatibility. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 8 ++++---- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 4 +++- include/uapi/linux/fuse.h | 7 ++++++- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1f8e3d60dc07..6ab7ca43f9e0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2371,14 +2371,14 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { ssize_t ret = 0; - struct file *file = NULL; + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; loff_t pos = 0; struct inode *inode; loff_t i_size; size_t count = iov_length(iov, nr_segs); struct fuse_io_priv *io; - file = iocb->ki_filp; pos = offset; inode = file->f_mapping->host; i_size = i_size_read(inode); @@ -2403,9 +2403,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, io->file = file; /* * By default, we want to optimize all I/Os with async request - * submission to the client filesystem. + * submission to the client filesystem if supported. */ - io->async = 1; + io->async = ff->fc->async_dio; io->iocb = iocb; /* diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 53b830e3b38f..fde7249a3a96 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -541,6 +541,9 @@ struct fuse_conn { /** Does the filesystem want adaptive readdirplus? */ unsigned readdirplus_auto:1; + /** Does the filesystem support asynchronous direct-IO submission? */ + unsigned async_dio:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 17ec1f70524d..6201f81e4d3a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -871,6 +871,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->do_readdirplus = 1; if (arg->flags & FUSE_READDIRPLUS_AUTO) fc->readdirplus_auto = 1; + if (arg->flags & FUSE_ASYNC_DIO) + fc->async_dio = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -898,7 +900,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | - FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO; + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 706d035fa748..60bb2f9f7b74 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -90,6 +90,9 @@ * 7.21 * - add FUSE_READDIRPLUS * - send the requested events in POLL request + * + * 7.22 + * - add FUSE_ASYNC_DIO */ #ifndef _LINUX_FUSE_H @@ -125,7 +128,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 21 +#define FUSE_KERNEL_MINOR_VERSION 22 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -215,6 +218,7 @@ struct fuse_file_lock { * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) * FUSE_READDIRPLUS_AUTO: adaptive readdirplus + * FUSE_ASYNC_DIO: asynchronous direct I/O submission */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -231,6 +235,7 @@ struct fuse_file_lock { #define FUSE_AUTO_INVAL_DATA (1 << 12) #define FUSE_DO_READDIRPLUS (1 << 13) #define FUSE_READDIRPLUS_AUTO (1 << 14) +#define FUSE_ASYNC_DIO (1 << 15) /** * CUSE INIT request/reply flags -- cgit v1.2.3