summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-03-25 20:18:27 +0300
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-25 20:18:27 +0300
commit53846a21c1766326bb14ce8ab6e997a0c120675d (patch)
tree37b04485e29844b4e734479181276a2f4d2447e4 /fs
parent2e9abdd9bad485970b37cd53a82f92702054984c (diff)
parent1ebbe2b20091d306453a5cf480a87e6cd28ae76f (diff)
downloadlinux-53846a21c1766326bb14ce8ab6e997a0c120675d.tar.xz
Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: (103 commits) SUNRPC,RPCSEC_GSS: spkm3--fix config dependencies SUNRPC,RPCSEC_GSS: spkm3: import contexts using NID_cast5_cbc LOCKD: Make nlmsvc_traverse_shares return void LOCKD: nlmsvc_traverse_blocks return is unused SUNRPC,RPCSEC_GSS: fix krb5 sequence numbers. NFSv4: Dont list system.nfs4_acl for filesystems that don't support it. SUNRPC,RPCSEC_GSS: remove unnecessary kmalloc of a checksum SUNRPC: Ensure rpc_call_async() always calls tk_ops->rpc_release() SUNRPC: Fix memory barriers for req->rq_received NFS: Fix a race in nfs_sync_inode() NFS: Clean up nfs_flush_list() NFS: Fix a race with PG_private and nfs_release_page() NFSv4: Ensure the callback daemon flushes signals SUNRPC: Fix a 'Busy inodes' error in rpc_pipefs NFS, NLM: Allow blocking locks to respect signals NFS: Make nfs_fhget() return appropriate error values NFSv4: Fix an oops in nfs4_fill_super lockd: blocks should hold a reference to the nlm_file NFSv4: SETCLIENTID_CONFIRM should handle NFS4ERR_DELAY/NFS4ERR_RESOURCE NFSv4: Send the delegation stateid for SETATTR calls ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/lockd/clntlock.c112
-rw-r--r--fs/lockd/clntproc.c317
-rw-r--r--fs/lockd/host.c12
-rw-r--r--fs/lockd/mon.c11
-rw-r--r--fs/lockd/svc4proc.c157
-rw-r--r--fs/lockd/svclock.c349
-rw-r--r--fs/lockd/svcproc.c151
-rw-r--r--fs/lockd/svcshare.c4
-rw-r--r--fs/lockd/svcsubs.c7
-rw-r--r--fs/lockd/xdr.c17
-rw-r--r--fs/lockd/xdr4.c21
-rw-r--r--fs/locks.c106
-rw-r--r--fs/namespace.c38
-rw-r--r--fs/nfs/callback.c20
-rw-r--r--fs/nfs/callback_xdr.c28
-rw-r--r--fs/nfs/delegation.c19
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c114
-rw-r--r--fs/nfs/direct.c949
-rw-r--r--fs/nfs/file.c49
-rw-r--r--fs/nfs/idmap.c47
-rw-r--r--fs/nfs/inode.c229
-rw-r--r--fs/nfs/iostat.h164
-rw-r--r--fs/nfs/mount_clnt.c17
-rw-r--r--fs/nfs/nfs2xdr.c4
-rw-r--r--fs/nfs/nfs3acl.c16
-rw-r--r--fs/nfs/nfs3proc.c246
-rw-r--r--fs/nfs/nfs3xdr.c6
-rw-r--r--fs/nfs/nfs4proc.c180
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/pagelist.c16
-rw-r--r--fs/nfs/proc.c156
-rw-r--r--fs/nfs/read.c102
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nfs/write.c288
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/proc/base.c39
40 files changed, 2342 insertions, 1672 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index c8d0a209120c..e207be68d4ca 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1555,6 +1555,7 @@ config RPCSEC_GSS_SPKM3
select CRYPTO
select CRYPTO_MD5
select CRYPTO_DES
+ select CRYPTO_CAST5
help
Provides for secure RPC calls by means of a gss-api
mechanism based on the SPKM3 public-key mechanism.
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index da6354baa0b8..bce744468708 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
/*
* Queue up a lock for blocking so that the GRANTED request can see it
*/
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
{
struct nlm_wait *block;
- BUG_ON(req->a_block != NULL);
block = kmalloc(sizeof(*block), GFP_KERNEL);
- if (block == NULL)
- return -ENOMEM;
- block->b_host = host;
- block->b_lock = fl;
- init_waitqueue_head(&block->b_wait);
- block->b_status = NLM_LCK_BLOCKED;
-
- list_add(&block->b_list, &nlm_blocked);
- req->a_block = block;
-
- return 0;
+ if (block != NULL) {
+ block->b_host = host;
+ block->b_lock = fl;
+ init_waitqueue_head(&block->b_wait);
+ block->b_status = NLM_LCK_BLOCKED;
+ list_add(&block->b_list, &nlm_blocked);
+ }
+ return block;
}
-void nlmclnt_finish_block(struct nlm_rqst *req)
+void nlmclnt_finish_block(struct nlm_wait *block)
{
- struct nlm_wait *block = req->a_block;
-
if (block == NULL)
return;
- req->a_block = NULL;
list_del(&block->b_list);
kfree(block);
}
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqst *req)
/*
* Block on a lock
*/
-long nlmclnt_block(struct nlm_rqst *req, long timeout)
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
{
- struct nlm_wait *block = req->a_block;
long ret;
/* A borken server might ask us to block even if we didn't
* request it. Just say no!
*/
- if (!req->a_args.block)
+ if (block == NULL)
return -EAGAIN;
/* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
ret = wait_event_interruptible_timeout(block->b_wait,
block->b_status != NLM_LCK_BLOCKED,
timeout);
-
- if (block->b_status != NLM_LCK_BLOCKED) {
- req->a_res.status = block->b_status;
- block->b_status = NLM_LCK_BLOCKED;
- }
-
- return ret;
+ if (ret < 0)
+ return -ERESTARTSYS;
+ req->a_res.status = block->b_status;
+ return 0;
}
/*
@@ -125,7 +114,15 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
list_for_each_entry(block, &nlm_blocked, b_list) {
struct file_lock *fl_blocked = block->b_lock;
- if (!nlm_compare_locks(fl_blocked, fl))
+ if (fl_blocked->fl_start != fl->fl_start)
+ continue;
+ if (fl_blocked->fl_end != fl->fl_end)
+ continue;
+ /*
+ * Careful! The NLM server will return the 32-bit "pid" that
+ * we put on the wire: in this case the lockowner "pid".
+ */
+ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
continue;
if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
continue;
@@ -147,34 +144,6 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
*/
/*
- * Mark the locks for reclaiming.
- * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
- * Maintain NLM lock reclaiming lists in the nlm_host instead.
- */
-static
-void nlmclnt_mark_reclaim(struct nlm_host *host)
-{
- struct file_lock *fl;
- struct inode *inode;
- struct list_head *tmp;
-
- list_for_each(tmp, &file_lock_list) {
- fl = list_entry(tmp, struct file_lock, fl_link);
-
- inode = fl->fl_file->f_dentry->d_inode;
- if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
- continue;
- if (fl->fl_u.nfs_fl.owner == NULL)
- continue;
- if (fl->fl_u.nfs_fl.owner->host != host)
- continue;
- if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
- continue;
- fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
- }
-}
-
-/*
* Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
* that we mark locks for reclaiming, and that we bump the pseudo NSM state.
*/
@@ -186,7 +155,12 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
host->h_state++;
host->h_nextrebind = 0;
nlm_rebind_host(host);
- nlmclnt_mark_reclaim(host);
+
+ /*
+ * Mark the locks for reclaiming.
+ */
+ list_splice_init(&host->h_granted, &host->h_reclaim);
+
dprintk("NLM: reclaiming locks for host %s", host->h_name);
}
@@ -215,9 +189,7 @@ reclaimer(void *ptr)
{
struct nlm_host *host = (struct nlm_host *) ptr;
struct nlm_wait *block;
- struct list_head *tmp;
- struct file_lock *fl;
- struct inode *inode;
+ struct file_lock *fl, *next;
daemonize("%s-reclaim", host->h_name);
allow_signal(SIGKILL);
@@ -229,23 +201,13 @@ reclaimer(void *ptr)
/* First, reclaim all locks that have been marked. */
restart:
- list_for_each(tmp, &file_lock_list) {
- fl = list_entry(tmp, struct file_lock, fl_link);
+ list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
+ list_del_init(&fl->fl_u.nfs_fl.list);
- inode = fl->fl_file->f_dentry->d_inode;
- if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
- continue;
- if (fl->fl_u.nfs_fl.owner == NULL)
- continue;
- if (fl->fl_u.nfs_fl.owner->host != host)
- continue;
- if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
- continue;
-
- fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
- nlmclnt_reclaim(host, fl);
if (signalled())
- break;
+ continue;
+ if (nlmclnt_reclaim(host, fl) == 0)
+ list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
goto restart;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 970b6a6aa337..f96e38155b5c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -132,59 +132,18 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
lock->caller = system_utsname.nodename;
lock->oh.data = req->a_owner;
- lock->oh.len = sprintf(req->a_owner, "%d@%s",
- current->pid, system_utsname.nodename);
- locks_copy_lock(&lock->fl, fl);
+ lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
+ (unsigned int)fl->fl_u.nfs_fl.owner->pid,
+ system_utsname.nodename);
+ lock->svid = fl->fl_u.nfs_fl.owner->pid;
+ lock->fl.fl_start = fl->fl_start;
+ lock->fl.fl_end = fl->fl_end;
+ lock->fl.fl_type = fl->fl_type;
}
static void nlmclnt_release_lockargs(struct nlm_rqst *req)
{
- struct file_lock *fl = &req->a_args.lock.fl;
-
- if (fl->fl_ops && fl->fl_ops->fl_release_private)
- fl->fl_ops->fl_release_private(fl);
-}
-
-/*
- * Initialize arguments for GRANTED call. The nlm_rqst structure
- * has been cleared already.
- */
-int
-nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
-{
- locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
- memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
- call->a_args.lock.caller = system_utsname.nodename;
- call->a_args.lock.oh.len = lock->oh.len;
-
- /* set default data area */
- call->a_args.lock.oh.data = call->a_owner;
-
- if (lock->oh.len > NLMCLNT_OHSIZE) {
- void *data = kmalloc(lock->oh.len, GFP_KERNEL);
- if (!data) {
- nlmclnt_freegrantargs(call);
- return 0;
- }
- call->a_args.lock.oh.data = (u8 *) data;
- }
-
- memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
- return 1;
-}
-
-void
-nlmclnt_freegrantargs(struct nlm_rqst *call)
-{
- struct file_lock *fl = &call->a_args.lock.fl;
- /*
- * Check whether we allocated memory for the owner.
- */
- if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
- kfree(call->a_args.lock.oh.data);
- }
- if (fl->fl_ops && fl->fl_ops->fl_release_private)
- fl->fl_ops->fl_release_private(fl);
+ BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
}
/*
@@ -193,9 +152,8 @@ nlmclnt_freegrantargs(struct nlm_rqst *call)
int
nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
{
- struct nfs_server *nfssrv = NFS_SERVER(inode);
struct nlm_host *host;
- struct nlm_rqst reqst, *call = &reqst;
+ struct nlm_rqst *call;
sigset_t oldset;
unsigned long flags;
int status, proto, vers;
@@ -209,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
/* Retrieve transport protocol from NFS client */
proto = NFS_CLIENT(inode)->cl_xprt->prot;
- if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
+ host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+ if (host == NULL)
return -ENOLCK;
- /* Create RPC client handle if not there, and copy soft
- * and intr flags from NFS client. */
- if (host->h_rpcclnt == NULL) {
- struct rpc_clnt *clnt;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return -ENOMEM;
- /* Bind an rpc client to this host handle (does not
- * perform a portmapper lookup) */
- if (!(clnt = nlm_bind_host(host))) {
- status = -ENOLCK;
- goto done;
- }
- clnt->cl_softrtry = nfssrv->client->cl_softrtry;
- clnt->cl_intr = nfssrv->client->cl_intr;
- }
+ nlmclnt_locks_init_private(fl, host);
+ /* Set up the argument struct */
+ nlmclnt_setlockargs(call, fl);
/* Keep the old signal mask */
spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -238,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
&& (current->flags & PF_EXITING)) {
sigfillset(&current->blocked); /* Mask all signals */
recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
- call = nlmclnt_alloc_call();
- if (!call) {
- status = -ENOMEM;
- goto out_restore;
- }
call->a_flags = RPC_TASK_ASYNC;
- } else {
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
- memset(call, 0, sizeof(*call));
- locks_init_lock(&call->a_args.lock.fl);
- locks_init_lock(&call->a_res.lock.fl);
}
- call->a_host = host;
-
- nlmclnt_locks_init_private(fl, host);
-
- /* Set up the argument struct */
- nlmclnt_setlockargs(call, fl);
+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
if (fl->fl_type != F_UNLCK) {
@@ -270,41 +206,58 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
else
status = -EINVAL;
- out_restore:
+ fl->fl_ops->fl_release_private(fl);
+ fl->fl_ops = NULL;
+
spin_lock_irqsave(&current->sighand->siglock, flags);
current->blocked = oldset;
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
-done:
dprintk("lockd: clnt proc returns %d\n", status);
- nlm_release_host(host);
return status;
}
EXPORT_SYMBOL(nlmclnt_proc);
/*
* Allocate an NLM RPC call struct
+ *
+ * Note: the caller must hold a reference to host. In case of failure,
+ * this reference will be released.
*/
-struct nlm_rqst *
-nlmclnt_alloc_call(void)
+struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
{
struct nlm_rqst *call;
- while (!signalled()) {
- call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL);
- if (call) {
- memset(call, 0, sizeof(*call));
+ for(;;) {
+ call = kzalloc(sizeof(*call), GFP_KERNEL);
+ if (call != NULL) {
locks_init_lock(&call->a_args.lock.fl);
locks_init_lock(&call->a_res.lock.fl);
+ call->a_host = host;
return call;
}
- printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+ if (signalled())
+ break;
+ printk("nlm_alloc_call: failed, waiting for memory\n");
schedule_timeout_interruptible(5*HZ);
}
+ nlm_release_host(host);
return NULL;
}
+void nlm_release_call(struct nlm_rqst *call)
+{
+ nlm_release_host(call->a_host);
+ nlmclnt_release_lockargs(call);
+ kfree(call);
+}
+
+static void nlmclnt_rpc_release(void *data)
+{
+ return nlm_release_call(data);
+}
+
static int nlm_wait_on_grace(wait_queue_head_t *queue)
{
DEFINE_WAIT(wait);
@@ -401,57 +354,45 @@ in_grace_period:
/*
* Generic NLM call, async version.
*/
-int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
{
struct nlm_host *host = req->a_host;
struct rpc_clnt *clnt;
- struct rpc_message msg = {
- .rpc_argp = &req->a_args,
- .rpc_resp = &req->a_res,
- };
- int status;
+ int status = -ENOLCK;
dprintk("lockd: call procedure %d on %s (async)\n",
(int)proc, host->h_name);
/* If we have no RPC client yet, create one. */
- if ((clnt = nlm_bind_host(host)) == NULL)
- return -ENOLCK;
- msg.rpc_proc = &clnt->cl_procinfo[proc];
+ clnt = nlm_bind_host(host);
+ if (clnt == NULL)
+ goto out_err;
+ msg->rpc_proc = &clnt->cl_procinfo[proc];
/* bootstrap and kick off the async RPC call */
- status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-
+ status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
+ if (status == 0)
+ return 0;
+out_err:
+ nlm_release_call(req);
return status;
}
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
{
- struct nlm_host *host = req->a_host;
- struct rpc_clnt *clnt;
- struct nlm_args *argp = &req->a_args;
- struct nlm_res *resp = &req->a_res;
struct rpc_message msg = {
- .rpc_argp = argp,
- .rpc_resp = resp,
+ .rpc_argp = &req->a_args,
+ .rpc_resp = &req->a_res,
};
- int status;
-
- dprintk("lockd: call procedure %d on %s (async)\n",
- (int)proc, host->h_name);
-
- /* If we have no RPC client yet, create one. */
- if ((clnt = nlm_bind_host(host)) == NULL)
- return -ENOLCK;
- msg.rpc_proc = &clnt->cl_procinfo[proc];
+ return __nlm_async_call(req, proc, &msg, tk_ops);
+}
- /* Increment host refcount */
- nlm_get_host(host);
- /* bootstrap and kick off the async RPC call */
- status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
- if (status < 0)
- nlm_release_host(host);
- return status;
+int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+ struct rpc_message msg = {
+ .rpc_argp = &req->a_res,
+ };
+ return __nlm_async_call(req, proc, &msg, tk_ops);
}
/*
@@ -463,36 +404,41 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
int status;
status = nlmclnt_call(req, NLMPROC_TEST);
- nlmclnt_release_lockargs(req);
if (status < 0)
- return status;
+ goto out;
- status = req->a_res.status;
- if (status == NLM_LCK_GRANTED) {
- fl->fl_type = F_UNLCK;
- } if (status == NLM_LCK_DENIED) {
- /*
- * Report the conflicting lock back to the application.
- */
- locks_copy_lock(fl, &req->a_res.lock.fl);
- fl->fl_pid = 0;
- } else {
- return nlm_stat_to_errno(req->a_res.status);
+ switch (req->a_res.status) {
+ case NLM_LCK_GRANTED:
+ fl->fl_type = F_UNLCK;
+ break;
+ case NLM_LCK_DENIED:
+ /*
+ * Report the conflicting lock back to the application.
+ */
+ fl->fl_start = req->a_res.lock.fl.fl_start;
+ fl->fl_end = req->a_res.lock.fl.fl_start;
+ fl->fl_type = req->a_res.lock.fl.fl_type;
+ fl->fl_pid = 0;
+ break;
+ default:
+ status = nlm_stat_to_errno(req->a_res.status);
}
-
- return 0;
+out:
+ nlm_release_call(req);
+ return status;
}
static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl));
- nlm_get_lockowner(new->fl_u.nfs_fl.owner);
+ new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
+ new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+ list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
}
static void nlmclnt_locks_release_private(struct file_lock *fl)
{
+ list_del(&fl->fl_u.nfs_fl.list);
nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
- fl->fl_ops = NULL;
}
static struct file_lock_operations nlmclnt_lock_ops = {
@@ -504,8 +450,8 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
{
BUG_ON(fl->fl_ops != NULL);
fl->fl_u.nfs_fl.state = 0;
- fl->fl_u.nfs_fl.flags = 0;
fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+ INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
fl->fl_ops = &nlmclnt_lock_ops;
}
@@ -552,57 +498,52 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
{
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
- long timeout;
- int status;
+ struct nlm_wait *block = NULL;
+ int status = -ENOLCK;
if (!host->h_monitored && nsm_monitor(host) < 0) {
printk(KERN_NOTICE "lockd: failed to monitor %s\n",
host->h_name);
- status = -ENOLCK;
goto out;
}
- if (req->a_args.block) {
- status = nlmclnt_prepare_block(req, host, fl);
- if (status < 0)
- goto out;
- }
+ block = nlmclnt_prepare_block(host, fl);
for(;;) {
status = nlmclnt_call(req, NLMPROC_LOCK);
if (status < 0)
goto out_unblock;
- if (resp->status != NLM_LCK_BLOCKED)
+ if (!req->a_args.block)
break;
- /* Wait on an NLM blocking lock */
- timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
/* Did a reclaimer thread notify us of a server reboot? */
if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD)
continue;
if (resp->status != NLM_LCK_BLOCKED)
break;
- if (timeout >= 0)
- continue;
- /* We were interrupted. Send a CANCEL request to the server
+ /* Wait on an NLM blocking lock */
+ status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+ /* if we were interrupted. Send a CANCEL request to the server
* and exit
*/
- status = (int)timeout;
- goto out_unblock;
+ if (status < 0)
+ goto out_unblock;
+ if (resp->status != NLM_LCK_BLOCKED)
+ break;
}
if (resp->status == NLM_LCK_GRANTED) {
fl->fl_u.nfs_fl.state = host->h_state;
- fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
fl->fl_flags |= FL_SLEEP;
+ /* Ensure the resulting lock will get added to granted list */
do_vfs_lock(fl);
}
status = nlm_stat_to_errno(resp->status);
out_unblock:
- nlmclnt_finish_block(req);
+ nlmclnt_finish_block(block);
/* Cancel the blocked request if it is still pending */
if (resp->status == NLM_LCK_BLOCKED)
nlmclnt_cancel(host, req->a_args.block, fl);
out:
- nlmclnt_release_lockargs(req);
+ nlm_release_call(req);
return status;
}
@@ -658,10 +599,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
struct nlm_res *resp = &req->a_res;
int status;
- /* Clean the GRANTED flag now so the lock doesn't get
- * reclaimed while we're stuck in the unlock call. */
- fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
-
/*
* Note: the server is supposed to either grant us the unlock
* request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
@@ -669,32 +606,24 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
*/
do_vfs_lock(fl);
- if (req->a_flags & RPC_TASK_ASYNC) {
- status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
- &nlmclnt_unlock_ops);
- /* Hrmf... Do the unlock early since locks_remove_posix()
- * really expects us to free the lock synchronously */
- if (status < 0) {
- nlmclnt_release_lockargs(req);
- kfree(req);
- }
- return status;
- }
+ if (req->a_flags & RPC_TASK_ASYNC)
+ return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
status = nlmclnt_call(req, NLMPROC_UNLOCK);
- nlmclnt_release_lockargs(req);
if (status < 0)
- return status;
+ goto out;
+ status = 0;
if (resp->status == NLM_LCK_GRANTED)
- return 0;
+ goto out;
if (resp->status != NLM_LCK_DENIED_NOLOCKS)
printk("lockd: unexpected unlock status: %d\n", resp->status);
-
/* What to do now? I'm out of my depth... */
-
- return -ENOLCK;
+ status = -ENOLCK;
+out:
+ nlm_release_call(req);
+ return status;
}
static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -716,9 +645,6 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
if (status != NLM_LCK_GRANTED)
printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
die:
- nlm_release_host(req->a_host);
- nlmclnt_release_lockargs(req);
- kfree(req);
return;
retry_rebind:
nlm_rebind_host(req->a_host);
@@ -728,6 +654,7 @@ die:
static const struct rpc_call_ops nlmclnt_unlock_ops = {
.rpc_call_done = nlmclnt_unlock_callback,
+ .rpc_release = nlmclnt_rpc_release,
};
/*
@@ -749,20 +676,15 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
- req = nlmclnt_alloc_call();
+ req = nlm_alloc_call(nlm_get_host(host));
if (!req)
return -ENOMEM;
- req->a_host = host;
req->a_flags = RPC_TASK_ASYNC;
nlmclnt_setlockargs(req, fl);
req->a_args.block = block;
- status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
- if (status < 0) {
- nlmclnt_release_lockargs(req);
- kfree(req);
- }
+ status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
spin_lock_irqsave(&current->sighand->siglock, flags);
current->blocked = oldset;
@@ -791,6 +713,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
switch (req->a_res.status) {
case NLM_LCK_GRANTED:
case NLM_LCK_DENIED_GRACE_PERIOD:
+ case NLM_LCK_DENIED:
/* Everything's good */
break;
case NLM_LCK_DENIED_NOLOCKS:
@@ -802,9 +725,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
}
die:
- nlm_release_host(req->a_host);
- nlmclnt_release_lockargs(req);
- kfree(req);
return;
retry_cancel:
@@ -818,6 +738,7 @@ retry_cancel:
static const struct rpc_call_ops nlmclnt_cancel_ops = {
.rpc_call_done = nlmclnt_cancel_callback,
+ .rpc_release = nlmclnt_rpc_release,
};
/*
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b1d8ae..112ebf8b8dfe 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
nlm_hosts[hash] = host;
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
+ INIT_LIST_HEAD(&host->h_granted);
+ INIT_LIST_HEAD(&host->h_reclaim);
if (++nrhosts > NLM_HOST_MAX)
next_gc = 0;
@@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host)
xprt->resvport = 1; /* NLM requires a reserved port */
/* Existing NLM servers accept AUTH_UNIX only */
- clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+ clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
host->h_version, RPC_AUTH_UNIX);
if (IS_ERR(clnt))
goto forgetit;
clnt->cl_autobind = 1; /* turn on pmap queries */
+ clnt->cl_softrtry = 1; /* All queries are soft */
host->h_rpcclnt = clnt;
}
@@ -242,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
{
if (host != NULL) {
dprintk("lockd: release host %s\n", host->h_name);
- atomic_dec(&host->h_count);
BUG_ON(atomic_read(&host->h_count) < 0);
+ if (atomic_dec_and_test(&host->h_count)) {
+ BUG_ON(!list_empty(&host->h_lockowners));
+ BUG_ON(!list_empty(&host->h_granted));
+ BUG_ON(!list_empty(&host->h_reclaim));
+ }
}
}
@@ -331,7 +338,6 @@ nlm_gc_hosts(void)
rpc_destroy_client(host->h_rpcclnt);
}
}
- BUG_ON(!list_empty(&host->h_lockowners));
kfree(host);
nrhosts--;
}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a89cb8aa2c88..3fc683f46b3e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
struct rpc_clnt *clnt;
int status;
struct nsm_args args;
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = res,
+ };
clnt = nsm_create();
if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
args.proc = NLMPROC_NSM_NOTIFY;
memset(res, 0, sizeof(*res));
- status = rpc_call(clnt, proc, &args, res, 0);
+ msg.rpc_proc = &clnt->cl_procinfo[proc];
+ status = rpc_call_sync(clnt, &msg, 0);
if (status < 0)
printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
status);
@@ -214,12 +219,16 @@ static struct rpc_procinfo nsm_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_mon,
.p_decode = (kxdrproc_t) xdr_decode_stat_res,
.p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2,
+ .p_statidx = SM_MON,
+ .p_name = "MONITOR",
},
[SM_UNMON] = {
.p_proc = SM_UNMON,
.p_encode = (kxdrproc_t) xdr_encode_unmon,
.p_decode = (kxdrproc_t) xdr_decode_stat,
.p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+ .p_statidx = SM_UNMON,
+ .p_name = "UNMONITOR",
},
};
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913aa06a..a2dd9ccb9b32 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
#define NLMDBG_FACILITY NLMDBG_CLIENT
-static u32 nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
/*
* Obtain client and file from arguments
*/
@@ -234,83 +230,89 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
}
/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+ dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+ -task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+ nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+ .rpc_call_done = nlm4svc_callback_exit,
+ .rpc_release = nlm4svc_callback_release,
+};
+
+/*
* `Async' versions of the above service routines. They aren't really,
* because we send the callback before the reply proper. I hope this
* doesn't break any clients.
*/
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
- void *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+ int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
{
- struct nlm_res res;
- u32 stat;
+ struct nlm_host *host;
+ struct nlm_rqst *call;
+ int stat;
- dprintk("lockd: TEST_MSG called\n");
- memset(&res, 0, sizeof(res));
+ host = nlmsvc_lookup_host(rqstp);
+ if (host == NULL)
+ return rpc_system_err;
+
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return rpc_system_err;
- if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
- return stat;
+ stat = func(rqstp, argp, &call->a_res);
+ if (stat != 0) {
+ nlm_release_call(call);
+ return stat;
+ }
+
+ call->a_flags = RPC_TASK_ASYNC;
+ if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+ return rpc_system_err;
+ return rpc_success;
}
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
+ dprintk("lockd: TEST_MSG called\n");
+ return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+ void *resp)
+{
dprintk("lockd: LOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
}
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: CANCEL_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
}
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: UNLOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
}
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: GRANTED_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
- stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
- return stat;
+ return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
}
/*
@@ -472,55 +474,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
- struct nlm_host *host;
- struct nlm_rqst *call;
-
- if (!(call = nlmclnt_alloc_call()))
- return rpc_system_err;
-
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
- if (!host) {
- kfree(call);
- return rpc_system_err;
- }
-
- call->a_flags = RPC_TASK_ASYNC;
- call->a_host = host;
- memcpy(&call->a_args, resp, sizeof(*resp));
-
- if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
- goto error;
-
- return rpc_success;
- error:
- kfree(call);
- nlm_release_host(host);
- return rpc_system_err;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
- struct nlm_rqst *call = data;
-
- if (task->tk_status < 0) {
- dprintk("lockd: %4d callback failed (errno = %d)\n",
- task->tk_pid, -task->tk_status);
- }
- nlm_release_host(call->a_host);
- kfree(call);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
- .rpc_call_done = nlm4svc_callback_exit,
-};
-
-/*
* NLM Server procedures.
*/
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced65d4a2..d2b66bad7d50 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,9 +39,12 @@
#define nlm_deadlock nlm_lck_denied
#endif
+static void nlmsvc_release_block(struct nlm_block *block);
static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
static int nlmsvc_remove_block(struct nlm_block *block);
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
static const struct rpc_call_ops nlmsvc_grant_ops;
/*
@@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
struct nlm_block **bp, *b;
dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+ kref_get(&block->b_count);
if (block->b_queued)
nlmsvc_remove_block(block);
bp = &nlm_blocked;
@@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *block)
if (b == block) {
*bp = block->b_next;
block->b_queued = 0;
+ nlmsvc_release_block(block);
return 1;
}
}
@@ -98,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
}
/*
- * Find a block for a given lock and optionally remove it from
- * the list.
+ * Find a block for a given lock
*/
static struct nlm_block *
-nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
+nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
{
struct nlm_block **head, *block;
struct file_lock *fl;
@@ -112,17 +116,14 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end, lock->fl.fl_type);
for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
- fl = &block->b_call.a_args.lock.fl;
+ fl = &block->b_call->a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
block->b_file, fl->fl_pid,
(long long)fl->fl_start,
(long long)fl->fl_end, fl->fl_type,
- nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+ nlmdbg_cookie2a(&block->b_call->a_args.cookie));
if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
- if (remove) {
- *head = block->b_next;
- block->b_queued = 0;
- }
+ kref_get(&block->b_count);
return block;
}
}
@@ -150,11 +151,13 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
for (block = nlm_blocked; block; block = block->b_next) {
dprintk("cookie: head of blocked queue %p, block %p\n",
nlm_blocked, block);
- if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+ if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
&& nlm_cmp_addr(sin, &block->b_host->h_addr))
break;
}
+ if (block != NULL)
+ kref_get(&block->b_count);
return block;
}
@@ -174,27 +177,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
{
struct nlm_block *block;
struct nlm_host *host;
- struct nlm_rqst *call;
+ struct nlm_rqst *call = NULL;
/* Create host handle for callback */
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
+ host = nlmsvc_lookup_host(rqstp);
if (host == NULL)
return NULL;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return NULL;
+
/* Allocate memory for block, and initialize arguments */
- if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (block == NULL)
goto failed;
- memset(block, 0, sizeof(*block));
- locks_init_lock(&block->b_call.a_args.lock.fl);
- locks_init_lock(&block->b_call.a_res.lock.fl);
+ kref_init(&block->b_count);
- if (!nlmclnt_setgrantargs(&block->b_call, lock))
+ if (!nlmsvc_setgrantargs(call, lock))
goto failed_free;
/* Set notifier function for VFS, and init args */
- block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
- block->b_call.a_args.cookie = *cookie; /* see above */
+ call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+ call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+ call->a_args.cookie = *cookie; /* see above */
dprintk("lockd: created block %p...\n", block);
@@ -202,22 +208,23 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
block->b_daemon = rqstp->rq_server;
block->b_host = host;
block->b_file = file;
+ file->f_count++;
/* Add to file's list of blocks */
block->b_fnext = file->f_blocks;
file->f_blocks = block;
/* Set up RPC arguments for callback */
- call = &block->b_call;
- call->a_host = host;
+ block->b_call = call;
call->a_flags = RPC_TASK_ASYNC;
+ call->a_block = block;
return block;
failed_free:
kfree(block);
failed:
- nlm_release_host(host);
+ nlm_release_call(call);
return NULL;
}
@@ -227,29 +234,26 @@ failed:
* It is the caller's responsibility to check whether the file
* can be closed hereafter.
*/
-static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+static int nlmsvc_unlink_block(struct nlm_block *block)
{
- struct file_lock *fl = &block->b_call.a_args.lock.fl;
- struct nlm_file *file = block->b_file;
- struct nlm_block **bp;
- int status = 0;
-
- dprintk("lockd: deleting block %p...\n", block);
+ int status;
+ dprintk("lockd: unlinking block %p...\n", block);
/* Remove block from list */
+ status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
nlmsvc_remove_block(block);
- if (unlock)
- status = posix_unblock_lock(file->f_file, fl);
+ return status;
+}
- /* If the block is in the middle of a GRANT callback,
- * don't kill it yet. */
- if (block->b_incall) {
- nlmsvc_insert_block(block, NLM_NEVER);
- block->b_done = 1;
- return status;
- }
+static void nlmsvc_free_block(struct kref *kref)
+{
+ struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+ struct nlm_file *file = block->b_file;
+ struct nlm_block **bp;
+ dprintk("lockd: freeing block %p...\n", block);
+
+ down(&file->f_sema);
/* Remove block from file's list of blocks */
for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
if (*bp == block) {
@@ -257,36 +261,93 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
break;
}
}
+ up(&file->f_sema);
- if (block->b_host)
- nlm_release_host(block->b_host);
- nlmclnt_freegrantargs(&block->b_call);
+ nlmsvc_freegrantargs(block->b_call);
+ nlm_release_call(block->b_call);
+ nlm_release_file(block->b_file);
kfree(block);
- return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+ if (block != NULL)
+ kref_put(&block->b_count, nlmsvc_free_block);
+}
+
+static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
+{
+ struct nlm_block *block;
+
+ down(&file->f_sema);
+ for (block = file->f_blocks; block != NULL; block = block->b_fnext)
+ block->b_host->h_inuse = 1;
+ up(&file->f_sema);
+}
+
+static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+{
+ struct nlm_block *block;
+
+restart:
+ down(&file->f_sema);
+ for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
+ if (host != NULL && host != block->b_host)
+ continue;
+ if (!block->b_queued)
+ continue;
+ kref_get(&block->b_count);
+ up(&file->f_sema);
+ nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ goto restart;
+ }
+ up(&file->f_sema);
}
/*
* Loop over all blocks and perform the action specified.
* (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
*/
-int
+void
nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
{
- struct nlm_block *block, *next;
- /* XXX: Will everything get cleaned up if we don't unlock here? */
+ if (action == NLM_ACT_MARK)
+ nlmsvc_act_mark(host, file);
+ else
+ nlmsvc_act_unlock(host, file);
+}
- down(&file->f_sema);
- for (block = file->f_blocks; block; block = next) {
- next = block->b_fnext;
- if (action == NLM_ACT_MARK)
- block->b_host->h_inuse = 1;
- else if (action == NLM_ACT_UNLOCK) {
- if (host == NULL || host == block->b_host)
- nlmsvc_delete_block(block, 1);
- }
+/*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+ locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+ memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+ call->a_args.lock.caller = system_utsname.nodename;
+ call->a_args.lock.oh.len = lock->oh.len;
+
+ /* set default data area */
+ call->a_args.lock.oh.data = call->a_owner;
+ call->a_args.lock.svid = lock->fl.fl_pid;
+
+ if (lock->oh.len > NLMCLNT_OHSIZE) {
+ void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+ if (!data)
+ return 0;
+ call->a_args.lock.oh.data = (u8 *) data;
}
- up(&file->f_sema);
- return 0;
+
+ memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+ return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+ if (call->a_args.lock.oh.data != call->a_owner)
+ kfree(call->a_args.lock.oh.data);
}
/*
@@ -297,9 +358,9 @@ u32
nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
{
- struct file_lock *conflock;
- struct nlm_block *block;
+ struct nlm_block *block, *newblock = NULL;
int error;
+ u32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -310,69 +371,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
wait);
- /* Get existing block (in case client is busy-waiting) */
- block = nlmsvc_lookup_block(file, lock, 0);
-
- lock->fl.fl_flags |= FL_LOCKD;
-
+ lock->fl.fl_flags &= ~FL_SLEEP;
again:
/* Lock file against concurrent access */
down(&file->f_sema);
+ /* Get existing block (in case client is busy-waiting) */
+ block = nlmsvc_lookup_block(file, lock);
+ if (block == NULL) {
+ if (newblock != NULL)
+ lock = &newblock->b_call->a_args.lock;
+ } else
+ lock = &block->b_call->a_args.lock;
- if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
- error = posix_lock_file(file->f_file, &lock->fl);
+ error = posix_lock_file(file->f_file, &lock->fl);
+ lock->fl.fl_flags &= ~FL_SLEEP;
- if (block)
- nlmsvc_delete_block(block, 0);
- up(&file->f_sema);
+ dprintk("lockd: posix_lock_file returned %d\n", error);
- dprintk("lockd: posix_lock_file returned %d\n", -error);
- switch(-error) {
+ switch(error) {
case 0:
- return nlm_granted;
- case EDEADLK:
- return nlm_deadlock;
- case EAGAIN:
- return nlm_lck_denied;
+ ret = nlm_granted;
+ goto out;
+ case -EAGAIN:
+ break;
+ case -EDEADLK:
+ ret = nlm_deadlock;
+ goto out;
default: /* includes ENOLCK */
- return nlm_lck_denied_nolocks;
- }
+ ret = nlm_lck_denied_nolocks;
+ goto out;
}
- if (!wait) {
- up(&file->f_sema);
- return nlm_lck_denied;
- }
+ ret = nlm_lck_denied;
+ if (!wait)
+ goto out;
- if (posix_locks_deadlock(&lock->fl, conflock)) {
- up(&file->f_sema);
- return nlm_deadlock;
- }
+ ret = nlm_lck_blocked;
+ if (block != NULL)
+ goto out;
/* If we don't have a block, create and initialize it. Then
* retry because we may have slept in kmalloc. */
/* We have to release f_sema as nlmsvc_create_block may try to
* to claim it while doing host garbage collection */
- if (block == NULL) {
+ if (newblock == NULL) {
up(&file->f_sema);
dprintk("lockd: blocking on this lock (allocating).\n");
- if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+ if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
return nlm_lck_denied_nolocks;
goto again;
}
/* Append to list of blocked */
- nlmsvc_insert_block(block, NLM_NEVER);
-
- if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
- /* Now add block to block list of the conflicting lock
- if we haven't done so. */
- dprintk("lockd: blocking on this lock.\n");
- posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
- }
-
+ nlmsvc_insert_block(newblock, NLM_NEVER);
+out:
up(&file->f_sema);
- return nlm_lck_blocked;
+ nlmsvc_release_block(newblock);
+ nlmsvc_release_block(block);
+ dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+ return ret;
}
/*
@@ -382,8 +439,6 @@ u32
nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
struct nlm_lock *conflock)
{
- struct file_lock *fl;
-
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
file->f_file->f_dentry->d_inode->i_sb->s_id,
file->f_file->f_dentry->d_inode->i_ino,
@@ -391,13 +446,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
- if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+ if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
- fl->fl_type, (long long)fl->fl_start,
- (long long)fl->fl_end);
+ conflock->fl.fl_type,
+ (long long)conflock->fl.fl_start,
+ (long long)conflock->fl.fl_end);
conflock->caller = "somehost"; /* FIXME */
conflock->oh.len = 0; /* don't return OH info */
- conflock->fl = *fl;
+ conflock->svid = conflock->fl.fl_pid;
return nlm_lck_denied;
}
@@ -453,9 +509,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
(long long)lock->fl.fl_end);
down(&file->f_sema);
- if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
- status = nlmsvc_delete_block(block, 1);
+ block = nlmsvc_lookup_block(file, lock);
up(&file->f_sema);
+ if (block != NULL) {
+ status = nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ }
return status ? nlm_lck_denied : nlm_granted;
}
@@ -473,7 +532,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
dprintk("lockd: VFS unblock notification for block %p\n", fl);
for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
- if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+ if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
nlmsvc_insert_block(block, 0);
svc_wake_up(block->b_daemon);
return;
@@ -508,17 +567,13 @@ static void
nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
- struct nlm_lock *lock = &block->b_call.a_args.lock;
- struct file_lock *conflock;
+ struct nlm_lock *lock = &block->b_call->a_args.lock;
int error;
dprintk("lockd: grant blocked lock %p\n", block);
- /* First thing is lock the file */
- down(&file->f_sema);
-
/* Unlink block request from list */
- nlmsvc_remove_block(block);
+ nlmsvc_unlink_block(block);
/* If b_granted is true this means we've been here before.
* Just retry the grant callback, possibly refreshing the RPC
@@ -529,24 +584,21 @@ nlmsvc_grant_blocked(struct nlm_block *block)
}
/* Try the lock operation again */
- if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
- /* Bummer, we blocked again */
+ lock->fl.fl_flags |= FL_SLEEP;
+ error = posix_lock_file(file->f_file, &lock->fl);
+ lock->fl.fl_flags &= ~FL_SLEEP;
+
+ switch (error) {
+ case 0:
+ break;
+ case -EAGAIN:
dprintk("lockd: lock still blocked\n");
nlmsvc_insert_block(block, NLM_NEVER);
- posix_block_lock(conflock, &lock->fl);
- up(&file->f_sema);
return;
- }
-
- /* Alright, no conflicting lock. Now lock it for real. If the
- * following yields an error, this is most probably due to low
- * memory. Retry the lock in a few seconds.
- */
- if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+ default:
printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
-error, __FUNCTION__);
nlmsvc_insert_block(block, 10 * HZ);
- up(&file->f_sema);
return;
}
@@ -554,17 +606,15 @@ callback:
/* Lock was granted by VFS. */
dprintk("lockd: GRANTing blocked lock.\n");
block->b_granted = 1;
- block->b_incall = 1;
/* Schedule next grant callback in 30 seconds */
nlmsvc_insert_block(block, 30 * HZ);
/* Call the client */
- nlm_get_host(block->b_call.a_host);
- if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+ kref_get(&block->b_count);
+ if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
&nlmsvc_grant_ops) < 0)
- nlm_release_host(block->b_call.a_host);
- up(&file->f_sema);
+ nlmsvc_release_block(block);
}
/*
@@ -578,20 +628,10 @@ callback:
static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
{
struct nlm_rqst *call = data;
- struct nlm_block *block;
+ struct nlm_block *block = call->a_block;
unsigned long timeout;
- struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client);
dprintk("lockd: GRANT_MSG RPC callback\n");
- dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
- nlmdbg_cookie2a(&call->a_args.cookie),
- NIPQUAD(peer_addr->sin_addr.s_addr));
- if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
- dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
- nlmdbg_cookie2a(&call->a_args.cookie),
- NIPQUAD(peer_addr->sin_addr.s_addr));
- return;
- }
/* Technically, we should down the file semaphore here. Since we
* move the block towards the head of the queue only, no harm
@@ -608,13 +648,18 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
}
nlmsvc_insert_block(block, timeout);
svc_wake_up(block->b_daemon);
- block->b_incall = 0;
+}
- nlm_release_host(call->a_host);
+void nlmsvc_grant_release(void *data)
+{
+ struct nlm_rqst *call = data;
+
+ nlmsvc_release_block(call->a_block);
}
static const struct rpc_call_ops nlmsvc_grant_ops = {
.rpc_call_done = nlmsvc_grant_callback,
+ .rpc_release = nlmsvc_grant_release,
};
/*
@@ -634,25 +679,17 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
return;
file = block->b_file;
- file->f_count++;
- down(&file->f_sema);
- block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
if (block) {
if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
/* Try again in a couple of seconds */
nlmsvc_insert_block(block, 10 * HZ);
- up(&file->f_sema);
} else {
/* Lock is now held by client, or has been rejected.
* In both cases, the block should be removed. */
- up(&file->f_sema);
- if (status == NLM_LCK_GRANTED)
- nlmsvc_delete_block(block, 0);
- else
- nlmsvc_delete_block(block, 1);
+ nlmsvc_unlink_block(block);
}
}
- nlm_release_file(file);
+ nlmsvc_release_block(block);
}
/*
@@ -675,10 +712,12 @@ nlmsvc_retry_blocked(void)
break;
dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
block, block->b_when, block->b_done);
+ kref_get(&block->b_count);
if (block->b_done)
- nlmsvc_delete_block(block, 0);
+ nlmsvc_unlink_block(block);
else
nlmsvc_grant_blocked(block);
+ nlmsvc_release_block(block);
}
if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9cf1fc..d210cf304e92 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
#define NLMDBG_FACILITY NLMDBG_CLIENT
-static u32 nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
#ifdef CONFIG_LOCKD_V4
static u32
cast_to_nlm(u32 status, u32 vers)
@@ -262,83 +258,91 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
}
/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+ dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+ -task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+ nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+ .rpc_call_done = nlmsvc_callback_exit,
+ .rpc_release = nlmsvc_callback_release,
+};
+
+/*
* `Async' versions of the above service routines. They aren't really,
* because we send the callback before the reply proper. I hope this
* doesn't break any clients.
*/
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
- void *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+ int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
{
- struct nlm_res res;
- u32 stat;
+ struct nlm_host *host;
+ struct nlm_rqst *call;
+ int stat;
- dprintk("lockd: TEST_MSG called\n");
- memset(&res, 0, sizeof(res));
+ host = nlmsvc_lookup_host(rqstp);
+ if (host == NULL)
+ return rpc_system_err;
- if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
- return stat;
+ call = nlm_alloc_call(host);
+ if (call == NULL)
+ return rpc_system_err;
+
+ stat = func(rqstp, argp, &call->a_res);
+ if (stat != 0) {
+ nlm_release_call(call);
+ return stat;
+ }
+
+ call->a_flags = RPC_TASK_ASYNC;
+ if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+ return rpc_system_err;
+ return rpc_success;
}
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
+ dprintk("lockd: TEST_MSG called\n");
+ return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+ void *resp)
+{
dprintk("lockd: LOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
}
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: CANCEL_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
}
static int
nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: UNLOCK_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
}
static int
nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
void *resp)
{
- struct nlm_res res;
- u32 stat;
-
dprintk("lockd: GRANTED_MSG called\n");
- memset(&res, 0, sizeof(res));
-
- if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
- stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
- return stat;
+ return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
}
/*
@@ -497,55 +501,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
}
/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
- struct nlm_host *host;
- struct nlm_rqst *call;
-
- if (!(call = nlmclnt_alloc_call()))
- return rpc_system_err;
-
- host = nlmclnt_lookup_host(&rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
- if (!host) {
- kfree(call);
- return rpc_system_err;
- }
-
- call->a_flags = RPC_TASK_ASYNC;
- call->a_host = host;
- memcpy(&call->a_args, resp, sizeof(*resp));
-
- if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
- goto error;
-
- return rpc_success;
- error:
- nlm_release_host(host);
- kfree(call);
- return rpc_system_err;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
- struct nlm_rqst *call = data;
-
- if (task->tk_status < 0) {
- dprintk("lockd: %4d callback failed (errno = %d)\n",
- task->tk_pid, -task->tk_status);
- }
- nlm_release_host(call->a_host);
- kfree(call);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
- .rpc_call_done = nlmsvc_callback_exit,
-};
-
-/*
* NLM Server procedures.
*/
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 4943fb7836ce..27288c83da96 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
* Traverse all shares for a given file (and host).
* NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
*/
-int
+void
nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
{
struct nlm_share *share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
}
shpp = &share->s_next;
}
-
- return 0;
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a385177f..c7a6e3ae44d6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
again:
file->f_locks = 0;
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
- if (!(fl->fl_flags & FL_LOCKD))
+ if (fl->fl_lmops != &nlmsvc_lock_operations)
continue;
/* update current lock count */
@@ -224,9 +224,8 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
if (file->f_count || file->f_blocks || file->f_shares)
return 1;
} else {
- if (nlmsvc_traverse_blocks(host, file, action)
- || nlmsvc_traverse_shares(host, file, action))
- return 1;
+ nlmsvc_traverse_blocks(host, file, action);
+ nlmsvc_traverse_shares(host, file, action);
}
return nlm_traverse_locks(host, file, action);
}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1d700a4dd0b5..f22a3764461a 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
|| !(p = nlm_decode_fh(p, &lock->fh))
|| !(p = nlm_decode_oh(p, &lock->oh)))
return NULL;
+ lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = ntohl(*p++);
+ fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
else
len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(lock->svid);
*p++ = htonl(start);
*p++ = htonl(len);
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
struct file_lock *fl = &resp->lock.fl;
*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(resp->lock.svid);
/* Encode owner handle. */
if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
- lock->fl.fl_pid = ~(u32) 0;
+ lock->svid = ~(u32) 0;
+ lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
memset(&resp->lock, 0, sizeof(resp->lock));
locks_init_lock(fl);
excl = ntohl(*p++);
- fl->fl_pid = ntohl(*p++);
+ resp->lock.svid = ntohl(*p++);
+ fl->fl_pid = (pid_t)resp->lock.svid;
if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
return -EIO;
@@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
.p_proc = NLMPROC_##proc, \
.p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
- .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \
+ .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105a5303..36eb175ec335 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
|| !(p = nlm4_decode_fh(p, &lock->fh))
|| !(p = nlm4_decode_oh(p, &lock->oh)))
return NULL;
+ lock->svid = ntohl(*p++);
locks_init_lock(fl);
fl->fl_owner = current->files;
- fl->fl_pid = ntohl(*p++);
+ fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
|| (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
return NULL;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(lock->svid);
start = loff_t_to_s64(fl->fl_start);
if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
struct file_lock *fl = &resp->lock.fl;
*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
- *p++ = htonl(fl->fl_pid);
+ *p++ = htonl(resp->lock.svid);
/* Encode owner handle. */
if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
p = xdr_encode_hyper(p, start);
p = xdr_encode_hyper(p, len);
- dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
- resp->status, fl->fl_pid, fl->fl_type,
+ dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+ resp->status, (int)resp->lock.svid, fl->fl_type,
(long long)fl->fl_start, (long long)fl->fl_end);
}
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
- lock->fl.fl_pid = ~(u32) 0;
+ lock->svid = ~(u32) 0;
+ lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
memset(&resp->lock, 0, sizeof(resp->lock));
locks_init_lock(fl);
excl = ntohl(*p++);
- fl->fl_pid = ntohl(*p++);
+ resp->lock.svid = ntohl(*p++);
+ fl->fl_pid = (pid_t)resp->lock.svid;
if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
return -EIO;
@@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
.p_proc = NLMPROC_##proc, \
.p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
- .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \
+ .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \
+ .p_statidx = NLMPROC_##proc, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nlm4_procedures[] = {
diff --git a/fs/locks.c b/fs/locks.c
index 909eab8fb1d0..56f996e98bbc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
#define for_each_lock(inode, lockp) \
for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
static LIST_HEAD(blocked_list);
static kmem_cache_t *filelock_cache;
@@ -153,6 +150,21 @@ static struct file_lock *locks_alloc_lock(void)
return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
}
+static void locks_release_private(struct file_lock *fl)
+{
+ if (fl->fl_ops) {
+ if (fl->fl_ops->fl_release_private)
+ fl->fl_ops->fl_release_private(fl);
+ fl->fl_ops = NULL;
+ }
+ if (fl->fl_lmops) {
+ if (fl->fl_lmops->fl_release_private)
+ fl->fl_lmops->fl_release_private(fl);
+ fl->fl_lmops = NULL;
+ }
+
+}
+
/* Free a lock which is not in use. */
static void locks_free_lock(struct file_lock *fl)
{
@@ -169,18 +181,7 @@ static void locks_free_lock(struct file_lock *fl)
if (!list_empty(&fl->fl_link))
panic("Attempting to free lock on active lock list");
- if (fl->fl_ops) {
- if (fl->fl_ops->fl_release_private)
- fl->fl_ops->fl_release_private(fl);
- fl->fl_ops = NULL;
- }
-
- if (fl->fl_lmops) {
- if (fl->fl_lmops->fl_release_private)
- fl->fl_lmops->fl_release_private(fl);
- fl->fl_lmops = NULL;
- }
-
+ locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
}
@@ -218,24 +219,46 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
locks_init_lock(lock);
}
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+ if (fl->fl_ops) {
+ if (fl->fl_ops->fl_copy_lock)
+ fl->fl_ops->fl_copy_lock(new, fl);
+ new->fl_ops = fl->fl_ops;
+ }
+ if (fl->fl_lmops) {
+ if (fl->fl_lmops->fl_copy_lock)
+ fl->fl_lmops->fl_copy_lock(new, fl);
+ new->fl_lmops = fl->fl_lmops;
+ }
+}
+
/*
* Initialize a new lock from an existing file_lock structure.
*/
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
{
new->fl_owner = fl->fl_owner;
new->fl_pid = fl->fl_pid;
- new->fl_file = fl->fl_file;
+ new->fl_file = NULL;
new->fl_flags = fl->fl_flags;
new->fl_type = fl->fl_type;
new->fl_start = fl->fl_start;
new->fl_end = fl->fl_end;
+ new->fl_ops = NULL;
+ new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ locks_release_private(new);
+
+ __locks_copy_lock(new, fl);
+ new->fl_file = fl->fl_file;
new->fl_ops = fl->fl_ops;
new->fl_lmops = fl->fl_lmops;
- if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
- fl->fl_ops->fl_copy_lock(new, fl);
- if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
- fl->fl_lmops->fl_copy_lock(new, fl);
+
+ locks_copy_private(new, fl);
}
EXPORT_SYMBOL(locks_copy_lock);
@@ -654,8 +677,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
return result;
}
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+ struct file_lock *conflock)
{
struct file_lock *cfl;
@@ -666,9 +690,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
if (posix_locks_conflict(cfl, fl))
break;
}
+ if (cfl) {
+ __locks_copy_lock(conflock, cfl);
+ unlock_kernel();
+ return 1;
+ }
unlock_kernel();
-
- return (cfl);
+ return 0;
}
EXPORT_SYMBOL(posix_test_lock);
@@ -904,7 +932,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
fl->fl_start = request->fl_start;
fl->fl_end = request->fl_end;
fl->fl_type = request->fl_type;
- fl->fl_u = request->fl_u;
+ locks_release_private(fl);
+ locks_copy_private(fl, request);
request = fl;
added = 1;
}
@@ -1544,7 +1573,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
*/
int fcntl_getlk(struct file *filp, struct flock __user *l)
{
- struct file_lock *fl, file_lock;
+ struct file_lock *fl, cfl, file_lock;
struct flock flock;
int error;
@@ -1568,7 +1597,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
- fl = posix_test_lock(filp, &file_lock);
+ fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
}
flock.l_type = F_UNLCK;
@@ -1698,7 +1727,7 @@ out:
*/
int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
{
- struct file_lock *fl, file_lock;
+ struct file_lock *fl, cfl, file_lock;
struct flock64 flock;
int error;
@@ -1722,7 +1751,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
- fl = posix_test_lock(filp, &file_lock);
+ fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
}
flock.l_type = F_UNLCK;
@@ -1936,21 +1965,6 @@ void locks_remove_flock(struct file *filp)
}
/**
- * posix_block_lock - blocks waiting for a file lock
- * @blocker: the lock which is blocking
- * @waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
- locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
-/**
* posix_unblock_lock - stop waiting for a file lock
* @filp: how the file was opened
* @waiter: the lock which was waiting
diff --git a/fs/namespace.c b/fs/namespace.c
index 39c81a8d6316..71e75bcf4d28 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
.show = show_vfsmnt
};
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+ struct vfsmount *mnt = v;
+ int err = 0;
+
+ /* device */
+ if (mnt->mnt_devname) {
+ seq_puts(m, "device ");
+ mangle(m, mnt->mnt_devname);
+ } else
+ seq_puts(m, "no device");
+
+ /* mount point */
+ seq_puts(m, " mounted on ");
+ seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+ seq_putc(m, ' ');
+
+ /* file system type */
+ seq_puts(m, "with fstype ");
+ mangle(m, mnt->mnt_sb->s_type->name);
+
+ /* optional statistics */
+ if (mnt->mnt_sb->s_op->show_stats) {
+ seq_putc(m, ' ');
+ err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+ }
+
+ seq_putc(m, '\n');
+ return err;
+}
+
+struct seq_operations mountstats_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_vfsstat,
+};
+
/**
* may_umount_tree - check if a mount tree is busy
* @mnt: root of mount tree
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a778..99d2cfbce863 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
complete(&nfs_callback_info.started);
- while (nfs_callback_info.users != 0 || !signalled()) {
+ for(;;) {
+ if (signalled()) {
+ if (nfs_callback_info.users == 0)
+ break;
+ flush_signals(current);
+ }
/*
* Listen for a request on the socket
*/
@@ -73,6 +78,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
svc_process(serv, rqstp);
}
+ svc_exit_thread(rqstp);
nfs_callback_info.pid = 0;
complete(&nfs_callback_info.stopped);
unlock_kernel();
@@ -134,11 +140,13 @@ int nfs_callback_down(void)
lock_kernel();
down(&nfs_callback_sema);
- if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
- goto out;
- kill_proc(nfs_callback_info.pid, SIGKILL, 1);
- wait_for_completion(&nfs_callback_info.stopped);
-out:
+ nfs_callback_info.users--;
+ do {
+ if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+ break;
+ if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+ break;
+ } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
up(&nfs_callback_sema);
unlock_kernel();
return ret;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a94..05c38cf40b69 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
{
- uint32_t *savep;
+ uint32_t *savep = NULL;
unsigned status = res->status;
if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
struct xdr_stream *xdr_out, void *resp)
{
- struct callback_op *op;
- unsigned int op_nr;
+ struct callback_op *op = &callback_ops[0];
+ unsigned int op_nr = OP_CB_ILLEGAL;
unsigned int status = 0;
long maxlen;
unsigned res;
dprintk("%s: start\n", __FUNCTION__);
status = decode_op_hdr(xdr_in, &op_nr);
- if (unlikely(status != 0)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
- op_nr = OP_CB_ILLEGAL;
- op = &callback_ops[0];
- status = htonl(NFS4ERR_OP_ILLEGAL);
- } else
- op = &callback_ops[op_nr];
+ if (likely(status == 0)) {
+ switch (op_nr) {
+ case OP_CB_GETATTR:
+ case OP_CB_RECALL:
+ op = &callback_ops[op_nr];
+ break;
+ default:
+ op_nr = OP_CB_ILLEGAL;
+ op = &callback_ops[0];
+ status = htonl(NFS4ERR_OP_ILLEGAL);
+ }
+ }
maxlen = xdr_out->end - xdr_out->p;
if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
decode_compound_hdr_arg(&xdr_in, &hdr_arg);
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
+ hdr_res.nops = NULL;
encode_compound_hdr_res(&xdr_out, &hdr_res);
for (;;) {
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e6..d3be923d4e43 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
nfs_free_delegation(delegation);
}
}
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+ struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+ int res = 0;
+
+ if (nfsi->delegation_state == 0)
+ return 0;
+ spin_lock(&clp->cl_lock);
+ delegation = nfsi->delegation;
+ if (delegation != NULL) {
+ memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+ res = 1;
+ }
+ spin_unlock(&clp->cl_lock);
+ return res;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce771..3858694652fa 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
static inline int nfs_have_delegation(struct inode *inode, int flags)
{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead692..06c48b385c94 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFS_PARANOIA 1
/* #define NFS_DEBUG_VERBOSE 1 */
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
{
int res = 0;
+ dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+ inode->i_sb->s_id, inode->i_ino);
+
lock_kernel();
/* Call generic open code in order to cache credentials */
if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
unsigned long timestamp;
int error;
- dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+ dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+ __FUNCTION__, (long long)desc->entry->cookie,
+ page->index);
again:
timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
status;
while((status = dir_decode(desc)) == 0) {
- dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+ __FUNCTION__, (unsigned long long)entry->cookie);
if (entry->prev_cookie == *desc->dir_cookie)
break;
if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
return status;
}
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
if (status)
break;
- dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+ dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+ (unsigned long long)entry->cookie, desc->current_index);
if (desc->file->f_pos == desc->current_index) {
*desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
return status;
}
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
struct page *page;
int status;
- dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+ dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+ __FUNCTION__, desc->page_index,
+ (long long) *desc->dir_cookie);
page = read_cache_page(inode->i_mapping, desc->page_index,
(filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
if (status < 0)
dir_page_release(desc);
out:
- dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
return status;
read_error:
page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
/* Always search-by-index from the beginning of the cache */
if (*desc->dir_cookie == 0) {
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+ (long long)desc->file->f_pos);
desc->page_index = 0;
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
desc->current_index = 0;
} else
- dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
for (;;) {
res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
schedule();
}
}
- dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
return res;
}
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
int loop_count = 0,
res;
- dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+ (unsigned long long)entry->cookie);
for(;;) {
unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
dir_page_release(desc);
if (dentry != NULL)
dput(dentry);
- dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+ dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+ (unsigned long long)*desc->dir_cookie, res);
return res;
}
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
struct page *page = NULL;
int status;
- dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+ dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+ (unsigned long long)*desc->dir_cookie);
page = alloc_page(GFP_HIGHUSER);
if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
desc->entry->cookie = desc->entry->prev_cookie = 0;
desc->entry->eof = 0;
out:
- dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+ dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+ __FUNCTION__, status);
return status;
out_release:
dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
struct nfs_fattr fattr;
long res;
+ dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (long long)filp->f_pos);
+ nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
lock_kernel();
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
}
unlock_kernel();
- if (res < 0)
- return res;
- return 0;
+ if (res > 0)
+ res = 0;
+ dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ res);
+ return res;
}
loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
*/
int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
{
+ dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ datasync);
+
return 0;
}
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
parent = dget_parent(dentry);
lock_kernel();
dir = parent->d_inode;
+ nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
}
if (is_bad_inode(inode)) {
- dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
goto out_bad;
}
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
out_valid:
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
d_drop(dentry);
unlock_kernel();
dput(parent);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ __FUNCTION__, dentry->d_parent->d_name.name,
+ dentry->d_name.name);
return 0;
}
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
+ nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
res = ERR_PTR(-ENAMETOOLONG);
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
res = ERR_PTR(error);
goto out_unlock;
}
- res = ERR_PTR(-EACCES);
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
- if (!inode)
+ res = (struct dentry *)inode;
+ if (IS_ERR(res))
goto out_unlock;
no_entry:
res = d_add_unique(dentry, inode);
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
struct dentry *res = NULL;
int error;
+ dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
/* Check that we are indeed trying to open this file */
if (!is_atomic_open(dir, nd))
goto no_open;
@@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
return NULL;
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
- if (!inode) {
+ if (IS_ERR(inode)) {
dput(dentry);
return NULL;
}
@@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
if (error < 0)
goto out_err;
}
- error = -ENOMEM;
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
- if (inode == NULL)
+ error = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out_err;
d_instantiate(dentry, inode);
return 0;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
int error;
int open_flags = 0;
- dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
struct iattr attr;
int status;
- dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct iattr attr;
int error;
- dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
attr.ia_valid = ATTR_MODE;
attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+ dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
lock_kernel();
nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
atomic_read(&dentry->d_count));
+ nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
#ifdef NFS_PARANOIA
if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
sillycounter++;
sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
- dfprintk(VFS, "trying to rename %s to %s\n",
- dentry->d_name.name, silly);
+ dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+ dentry->d_name.name, silly);
sdentry = lookup_one_len(silly, dentry->d_parent, slen);
/*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
struct rpc_cred *cred;
int res = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
if (mask == 0)
goto out;
/* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
res = PTR_ERR(cred);
unlock_kernel();
out:
+ dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+ inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
res = generic_permission(inode, mask, NULL);
unlock_kernel();
- return res;
+ goto out;
}
/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ae2f3b33fef..0f583cb16ddb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
- * (multiple copies of the same instance running on separate hosts)
+ * (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
- * system cache protocols. Applications that process datasets
- * considerably larger than the client's memory do not always benefit
- * from a local cache. A streaming video server, for instance, has no
+ * system cache protocols. Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
+ * 04 May 2005 support O_DIRECT with aio --cel
*
*/
@@ -54,10 +55,10 @@
#include <asm/uaccess.h>
#include <asm/atomic.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
static kmem_cache_t *nfs_direct_cachep;
/*
@@ -65,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
- struct list_head list; /* nfs_read_data structs */
- wait_queue_head_t wait; /* wait for i/o completion */
+
+ /* I/O parameters */
+ struct list_head list, /* nfs_read/write_data structs */
+ rewrite_list; /* saved nfs_write_data structs */
+ struct nfs_open_context *ctx; /* file open context info */
+ struct kiocb * iocb; /* controlling i/o request */
+ struct inode * inode; /* target file of i/o */
+ unsigned long user_addr; /* location of user's buffer */
+ size_t user_count; /* total bytes to move */
+ loff_t pos; /* starting offset in file */
struct page ** pages; /* pages in our buffer */
unsigned int npages; /* count of pages */
- atomic_t complete, /* i/os we're waiting for */
- count, /* bytes actually processed */
+
+ /* completion state */
+ spinlock_t lock; /* protect completion state */
+ int outstanding; /* i/os we're waiting for */
+ ssize_t count, /* bytes actually processed */
error; /* any reported error */
+ struct completion completion; /* wait for i/o completion */
+
+ /* commit state */
+ struct nfs_write_data * commit_data; /* special write_data for commits */
+ int flags;
+#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
+ struct nfs_writeverf verf; /* unstable write verifier */
};
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
/**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O. However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
*/
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
- struct page ***pages)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+ struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+ dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+ dentry->d_name.name, (long long) pos, nr_segs);
+
+ return -EINVAL;
+}
+
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+ int i;
+ for (i = 0; i < npages; i++) {
+ struct page *page = pages[i];
+ if (do_dirty && !PageCompound(page))
+ set_page_dirty_lock(page);
+ page_cache_release(page);
+ }
+ kfree(pages);
+}
+
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
{
int result = -ENOMEM;
unsigned long page_count;
size_t array_size;
- /* set an arbitrary limit to prevent type overflow */
- /* XXX: this can probably be as large as INT_MAX */
- if (size > MAX_DIRECTIO_SIZE) {
- *pages = NULL;
- return -EFBIG;
- }
-
page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
page_count -= user_addr >> PAGE_SHIFT;
@@ -108,75 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
page_count, (rw == READ), 0,
*pages, NULL);
up_read(&current->mm->mmap_sem);
- /*
- * If we got fewer pages than expected from get_user_pages(),
- * the user buffer runs off the end of a mapping; return EFAULT.
- */
- if (result >= 0 && result < page_count) {
- nfs_free_user_pages(*pages, result, 0);
+ if (result != page_count) {
+ /*
+ * If we got fewer pages than expected from
+ * get_user_pages(), the user buffer runs off the
+ * end of a mapping; return EFAULT.
+ */
+ if (result >= 0) {
+ nfs_free_user_pages(*pages, result, 0);
+ result = -EFAULT;
+ } else
+ kfree(*pages);
*pages = NULL;
- result = -EFAULT;
}
}
return result;
}
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
- int i;
- for (i = 0; i < npages; i++) {
- struct page *page = pages[i];
- if (do_dirty && !PageCompound(page))
- set_page_dirty_lock(page);
- page_cache_release(page);
- }
- kfree(pages);
+ struct nfs_direct_req *dreq;
+
+ dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ if (!dreq)
+ return NULL;
+
+ kref_init(&dreq->kref);
+ init_completion(&dreq->completion);
+ INIT_LIST_HEAD(&dreq->list);
+ INIT_LIST_HEAD(&dreq->rewrite_list);
+ dreq->iocb = NULL;
+ dreq->ctx = NULL;
+ spin_lock_init(&dreq->lock);
+ dreq->outstanding = 0;
+ dreq->count = 0;
+ dreq->error = 0;
+ dreq->flags = 0;
+
+ return dreq;
}
-/**
- * nfs_direct_req_release - release nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
static void nfs_direct_req_release(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+ if (dreq->ctx != NULL)
+ put_nfs_open_context(dreq->ctx);
kmem_cache_free(nfs_direct_cachep, dreq);
}
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+ ssize_t result = -EIOCBQUEUED;
+
+ /* Async requests don't wait here */
+ if (dreq->iocb)
+ goto out;
+
+ result = wait_for_completion_interruptible(&dreq->completion);
+
+ if (!result)
+ result = dreq->error;
+ if (!result)
+ result = dreq->count;
+
+out:
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return (ssize_t) result;
+}
+
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete. This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
*
+ * In addition, synchronous I/O uses a stack-allocated iocb. Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request. If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+ nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+ if (dreq->iocb) {
+ long res = (long) dreq->error;
+ if (!res)
+ res = (long) dreq->count;
+ aio_complete(dreq->iocb, res, 0);
+ }
+ complete_all(&dreq->completion);
+
+ kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
+/*
* Note we also set the number of requests we have in the dreq when we are
* done. This prevents races with I/O completion so we will always wait
* until all requests have been dispatched and completed.
*/
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
{
struct list_head *list;
struct nfs_direct_req *dreq;
- unsigned int reads = 0;
unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ dreq = nfs_direct_req_alloc();
if (!dreq)
return NULL;
- kref_init(&dreq->kref);
- init_waitqueue_head(&dreq->wait);
- INIT_LIST_HEAD(&dreq->list);
- atomic_set(&dreq->count, 0);
- atomic_set(&dreq->error, 0);
-
list = &dreq->list;
for(;;) {
struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -196,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
list_add(&data->pages, list);
data->req = (struct nfs_page *) dreq;
- reads++;
+ dreq->outstanding++;
if (nbytes <= rsize)
break;
nbytes -= rsize;
}
kref_get(&dreq->kref);
- atomic_set(&dreq->complete, reads);
return dreq;
}
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete. This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- if (likely(status >= 0))
- atomic_add(data->res.count, &dreq->count);
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+
+ spin_lock(&dreq->lock);
+
+ if (likely(task->tk_status >= 0))
+ dreq->count += data->res.count;
else
- atomic_set(&dreq->error, status);
+ dreq->error = task->tk_status;
- if (unlikely(atomic_dec_and_test(&dreq->complete))) {
- nfs_free_user_pages(dreq->pages, dreq->npages, 1);
- wake_up(&dreq->wait);
- kref_put(&dreq->kref, nfs_direct_req_release);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+
+ spin_unlock(&dreq->lock);
+ nfs_direct_complete(dreq);
}
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+static const struct rpc_call_ops nfs_read_direct_ops = {
+ .rpc_call_done = nfs_direct_read_result,
+ .rpc_release = nfs_readdata_release,
+};
+
+/*
* For each nfs_read_data struct that was allocated on the list, dispatch
* an NFS READ operation
*/
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
- struct inode *inode, struct nfs_open_context *ctx,
- unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
struct list_head *list = &dreq->list;
struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t rsize = NFS_SERVER(inode)->rsize;
unsigned int curpage, pgbase;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
curpage = 0;
- pgbase = user_addr & ~PAGE_MASK;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
- unsigned int bytes;
+ size_t bytes;
bytes = rsize;
if (count < rsize)
bytes = count;
+ BUG_ON(list_empty(list));
data = list_entry(list->next, struct nfs_read_data, pages);
list_del_init(&data->pages);
@@ -269,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->cred = ctx->cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
- data->args.offset = file_offset;
+ data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = &pages[curpage];
data->args.count = bytes;
@@ -277,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
data->res.eof = 0;
data->res.count = bytes;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_read_direct_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long) inode;
- data->complete = nfs_direct_read_result;
lock_kernel();
rpc_execute(&data->task);
unlock_kernel();
- dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
- file_offset += bytes;
+ pos += bytes;
pgbase += bytes;
curpage += pgbase >> PAGE_SHIFT;
pgbase &= ~PAGE_MASK;
count -= bytes;
} while (count != 0);
+ BUG_ON(!list_empty(list));
}
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
- int result = 0;
-
- if (intr) {
- result = wait_event_interruptible(dreq->wait,
- (atomic_read(&dreq->complete) == 0));
- } else {
- wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
- }
-
- if (!result)
- result = atomic_read(&dreq->error);
- if (!result)
- result = atomic_read(&dreq->count);
-
- kref_put(&dreq->kref, nfs_direct_req_release);
- return (ssize_t) result;
-}
-
-/**
- * nfs_direct_read_seg - Read in one iov segment. Generate separate
- * read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
{
ssize_t result;
sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_direct_req *dreq;
@@ -355,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
if (!dreq)
return -ENOMEM;
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
dreq->pages = pages;
dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+ nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
rpc_clnt_sigmask(clnt, &oldset);
- nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
- file_offset);
- result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+ nfs_direct_read_schedule(dreq);
+ result = nfs_direct_wait(dreq);
rpc_clnt_sigunmask(clnt, &oldset);
return result;
}
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- * then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
- const struct iovec *iov, loff_t file_offset,
- unsigned long nr_segs)
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ while (!list_empty(&dreq->list)) {
+ struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_release(data);
+ }
+}
- result = nfs_direct_read_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+ struct list_head *pos;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- return result;
- }
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
+ list_splice_init(&dreq->rewrite_list, &dreq->list);
+ list_for_each(pos, &dreq->list)
+ dreq->outstanding++;
+ dreq->count = 0;
+
+ nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+ if (unlikely(task->tk_status < 0)) {
+ dreq->error = task->tk_status;
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
+ if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
- return tot_bytes;
+ dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+ nfs_direct_write_complete(dreq, data->inode);
}
-/**
- * nfs_direct_write_seg - Write out one iov segment. Generate separate
- * write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
- struct nfs_open_context *ctx, unsigned long user_addr,
- size_t count, loff_t file_offset, struct page **pages,
- int nr_pages)
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+ .rpc_call_done = nfs_direct_commit_result,
+ .rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- const unsigned int wsize = NFS_SERVER(inode)->wsize;
- size_t request;
- int curpage, need_commit;
- ssize_t result, tot_bytes;
- struct nfs_writeverf first_verf;
- struct nfs_write_data *wdata;
-
- wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
- if (!wdata)
- return -ENOMEM;
+ struct nfs_write_data *data = dreq->commit_data;
+ struct rpc_task *task = &data->task;
- wdata->inode = inode;
- wdata->cred = ctx->cred;
- wdata->args.fh = NFS_FH(inode);
- wdata->args.context = ctx;
- wdata->args.stable = NFS_UNSTABLE;
- if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
- wdata->args.stable = NFS_FILE_SYNC;
- wdata->res.fattr = &wdata->fattr;
- wdata->res.verf = &wdata->verf;
+ data->inode = dreq->inode;
+ data->cred = dreq->ctx->cred;
- nfs_begin_data_update(inode);
-retry:
- need_commit = 0;
- tot_bytes = 0;
- curpage = 0;
- request = count;
- wdata->args.pgbase = user_addr & ~PAGE_MASK;
- wdata->args.offset = file_offset;
- do {
- wdata->args.count = request;
- if (wdata->args.count > wsize)
- wdata->args.count = wsize;
- wdata->args.pages = &pages[curpage];
+ data->args.fh = NFS_FH(data->inode);
+ data->args.offset = dreq->pos;
+ data->args.count = dreq->user_count;
+ data->res.count = 0;
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
- dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
- wdata->args.count, (long long) wdata->args.offset,
- user_addr + tot_bytes, wdata->args.pgbase, curpage);
+ rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+ &nfs_commit_direct_ops, data);
+ NFS_PROTO(data->inode)->commit_setup(data, 0);
- lock_kernel();
- result = NFS_PROTO(inode)->write(wdata);
- unlock_kernel();
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long)data->inode;
+ /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+ dreq->commit_data = NULL;
- if (result <= 0) {
- if (tot_bytes > 0)
- break;
- goto out;
- }
+ dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
- if (tot_bytes == 0)
- memcpy(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier));
- if (wdata->verf.committed != NFS_FILE_SYNC) {
- need_commit = 1;
- if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
- sizeof(first_verf.verifier)))
- goto sync_retry;
- }
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+}
- tot_bytes += result;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ int flags = dreq->flags;
- /* in case of a short write: stop now, let the app recover */
- if (result < wdata->args.count)
+ dreq->flags = 0;
+ switch (flags) {
+ case NFS_ODIRECT_DO_COMMIT:
+ nfs_direct_commit_schedule(dreq);
break;
+ case NFS_ODIRECT_RESCHED_WRITES:
+ nfs_direct_write_reschedule(dreq);
+ break;
+ default:
+ nfs_end_data_update(inode);
+ if (dreq->commit_data != NULL)
+ nfs_commit_free(dreq->commit_data);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+ }
+}
- wdata->args.offset += result;
- wdata->args.pgbase += result;
- curpage += wdata->args.pgbase >> PAGE_SHIFT;
- wdata->args.pgbase &= ~PAGE_MASK;
- request -= result;
- } while (request != 0);
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = nfs_commit_alloc(0);
+ if (dreq->commit_data != NULL)
+ dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+ dreq->commit_data = NULL;
+}
- /*
- * Commit data written so far, even in the event of an error
- */
- if (need_commit) {
- wdata->args.count = tot_bytes;
- wdata->args.offset = file_offset;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+ nfs_end_data_update(inode);
+ nfs_direct_free_writedata(dreq);
+ nfs_direct_complete(dreq);
+}
+#endif
- lock_kernel();
- result = NFS_PROTO(inode)->commit(wdata);
- unlock_kernel();
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
+{
+ struct list_head *list;
+ struct nfs_direct_req *dreq;
+ unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ return NULL;
+
+ list = &dreq->list;
+ for(;;) {
+ struct nfs_write_data *data = nfs_writedata_alloc(wpages);
- if (result < 0 || memcmp(&first_verf.verifier,
- &wdata->verf.verifier,
- sizeof(first_verf.verifier)) != 0)
- goto sync_retry;
+ if (unlikely(!data)) {
+ while (!list_empty(list)) {
+ data = list_entry(list->next,
+ struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_free(data);
+ }
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&data->pages);
+ list_add(&data->pages, list);
+
+ data->req = (struct nfs_page *) dreq;
+ dreq->outstanding++;
+ if (nbytes <= wsize)
+ break;
+ nbytes -= wsize;
}
- result = tot_bytes;
-out:
- nfs_end_data_update(inode);
- nfs_writedata_free(wdata);
- return result;
+ nfs_alloc_commit_data(dreq);
-sync_retry:
- wdata->args.stable = NFS_FILE_SYNC;
- goto retry;
+ kref_get(&dreq->kref);
+ return dreq;
}
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- * then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode,
- struct nfs_open_context *ctx, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
- ssize_t tot_bytes = 0;
- unsigned long seg = 0;
-
- while ((seg < nr_segs) && (tot_bytes >= 0)) {
- ssize_t result;
- int page_count;
- struct page **pages;
- const struct iovec *vec = &iov[seg++];
- unsigned long user_addr = (unsigned long) vec->iov_base;
- size_t size = vec->iov_len;
-
- page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
- if (page_count < 0) {
- nfs_free_user_pages(pages, 0, 0);
- if (tot_bytes > 0)
- break;
- return page_count;
- }
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ int status = task->tk_status;
+
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
+ spin_lock(&dreq->lock);
- result = nfs_direct_write_seg(inode, ctx, user_addr, size,
- file_offset, pages, page_count);
- nfs_free_user_pages(pages, page_count, 0);
+ if (likely(status >= 0))
+ dreq->count += data->res.count;
+ else
+ dreq->error = task->tk_status;
- if (result <= 0) {
- if (tot_bytes > 0)
+ if (data->res.verf->committed != NFS_FILE_SYNC) {
+ switch (dreq->flags) {
+ case 0:
+ memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
break;
- return result;
+ case NFS_ODIRECT_DO_COMMIT:
+ if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+ dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ }
}
- tot_bytes += result;
- file_offset += result;
- if (result < size)
- break;
}
- return tot_bytes;
+ /* In case we have to resend */
+ data->args.stable = NFS_FILE_SYNC;
+
+ spin_unlock(&dreq->lock);
}
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
*/
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_release(void *calldata)
{
- ssize_t result = -EINVAL;
- struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- /*
- * No support for async yet
- */
- if (!is_sync_kiocb(iocb))
- return result;
-
- ctx = (struct nfs_open_context *)file->private_data;
- switch (rw) {
- case READ:
- dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_read(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- case WRITE:
- dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
- dentry->d_name.name, file_offset, nr_segs);
-
- result = nfs_direct_write(inode, ctx, iov,
- file_offset, nr_segs);
- break;
- default:
- break;
+ spin_lock(&dreq->lock);
+ if (--dreq->outstanding) {
+ spin_unlock(&dreq->lock);
+ return;
}
+ spin_unlock(&dreq->lock);
+
+ nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+ .rpc_call_done = nfs_direct_write_result,
+ .rpc_release = nfs_direct_write_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+{
+ struct nfs_open_context *ctx = dreq->ctx;
+ struct inode *inode = ctx->dentry->d_inode;
+ struct list_head *list = &dreq->list;
+ struct page **pages = dreq->pages;
+ size_t count = dreq->user_count;
+ loff_t pos = dreq->pos;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ unsigned int curpage, pgbase;
+
+ curpage = 0;
+ pgbase = dreq->user_addr & ~PAGE_MASK;
+ do {
+ struct nfs_write_data *data;
+ size_t bytes;
+
+ bytes = wsize;
+ if (count < wsize)
+ bytes = count;
+
+ BUG_ON(list_empty(list));
+ data = list_entry(list->next, struct nfs_write_data, pages);
+ list_move_tail(&data->pages, &dreq->rewrite_list);
+
+ data->inode = inode;
+ data->cred = ctx->cred;
+ data->args.fh = NFS_FH(inode);
+ data->args.context = ctx;
+ data->args.offset = pos;
+ data->args.pgbase = pgbase;
+ data->args.pages = &pages[curpage];
+ data->args.count = bytes;
+ data->res.fattr = &data->fattr;
+ data->res.count = bytes;
+ data->res.verf = &data->verf;
+
+ rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+ &nfs_write_direct_ops, data);
+ NFS_PROTO(inode)->write_setup(data, sync);
+
+ data->task.tk_priority = RPC_PRIORITY_NORMAL;
+ data->task.tk_cookie = (unsigned long) inode;
+
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+
+ dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ bytes,
+ (unsigned long long)data->args.offset);
+
+ pos += bytes;
+ pgbase += bytes;
+ curpage += pgbase >> PAGE_SHIFT;
+ pgbase &= ~PAGE_MASK;
+
+ count -= bytes;
+ } while (count != 0);
+ BUG_ON(!list_empty(list));
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+{
+ ssize_t result;
+ sigset_t oldset;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_direct_req *dreq;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ int sync = 0;
+
+ dreq = nfs_direct_write_alloc(count, wsize);
+ if (!dreq)
+ return -ENOMEM;
+ if (dreq->commit_data == NULL || count < wsize)
+ sync = FLUSH_STABLE;
+
+ dreq->user_addr = user_addr;
+ dreq->user_count = count;
+ dreq->pos = pos;
+ dreq->pages = pages;
+ dreq->npages = nr_pages;
+ dreq->inode = inode;
+ dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
+ nfs_begin_data_update(inode);
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ nfs_direct_write_schedule(dreq, sync);
+ result = nfs_direct_wait(dreq);
+ rpc_clnt_sigunmask(clnt, &oldset);
+
return result;
}
@@ -640,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
* the request starts before the end of the file. For that check
* to work, we must generate a GETATTR before each direct read, and
* even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change. So our preference is simply
+ * READ where the file size could change. Our preference is simply
* to do all reads the application wants, and the server will take
* care of managing the end of file boundary.
- *
+ *
* This function also eliminates unnecessarily updating the file's
* atime locally, as the NFS server sets the file's atime, and this
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
{
ssize_t retval = -EINVAL;
- loff_t *ppos = &iocb->ki_pos;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = count,
- };
dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- if (!is_sync_kiocb(iocb))
- goto out;
if (count < 0)
goto out;
retval = -EFAULT;
- if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_WRITE, buf, count))
goto out;
retval = 0;
if (!count)
@@ -692,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
if (retval)
goto out;
- retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(READ, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
+ pages, page_count);
if (retval > 0)
- *ppos = pos + retval;
+ iocb->ki_pos = pos + retval;
out:
return retval;
@@ -704,8 +810,8 @@ out:
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
* @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode
@@ -725,28 +831,19 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
{
ssize_t retval;
+ int page_count;
+ struct page **pages;
struct file *file = iocb->ki_filp;
- struct nfs_open_context *ctx =
- (struct nfs_open_context *) file->private_data;
struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct iovec iov = {
- .iov_base = (char __user *)buf,
- };
dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name,
(unsigned long) count, (long long) pos);
- retval = -EINVAL;
- if (!is_sync_kiocb(iocb))
- goto out;
-
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
@@ -757,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
retval = 0;
if (!count)
goto out;
- iov.iov_len = count,
retval = -EFAULT;
- if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+ if (!access_ok(VERIFY_READ, buf, count))
goto out;
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
- retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+ retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
+ count, &pages);
+ if (retval < 0)
+ goto out;
+ page_count = retval;
+
+ retval = nfs_direct_write(iocb, (unsigned long) buf, count,
+ pos, pages, page_count);
+
+ /*
+ * XXX: nfs_end_data_update() already ensures this file's
+ * cached data is subsequently invalidated. Do we really
+ * need to call invalidate_inode_pages2() again here?
+ *
+ * For aio writes, this invalidation will almost certainly
+ * occur before the writes complete. Kind of racey.
+ */
if (mapping->nrpages)
invalidate_inode_pages2(mapping);
+
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -777,6 +890,10 @@ out:
return retval;
}
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
int nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -790,6 +907,10 @@ int nfs_init_directcache(void)
return 0;
}
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
void nfs_destroy_directcache(void)
{
if (kmem_cache_destroy(nfs_direct_cachep))
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f539..5263b2864a44 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
#include <asm/system.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_FILE
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
- struct nfs_server *server = NFS_SERVER(inode);
- int (*open)(struct inode *, struct file *);
int res;
res = nfs_check_flags(filp->f_flags);
if (res)
return res;
+ nfs_inc_stats(inode, NFSIOS_VFSOPEN);
lock_kernel();
- /* Do NFSv4 open() call */
- if ((open = server->rpc_ops->file_open) != NULL)
- res = open(inode, filp);
+ res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
unlock_kernel();
return res;
}
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
/* Ensure that dirty pages are flushed out with the right creds */
if (filp->f_mode & FMODE_WRITE)
filemap_fdatawrite(filp->f_mapping);
+ nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return NFS_PROTO(inode)->file_release(inode, filp);
}
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
+ nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
lock_kernel();
/* Ensure that data+attribute caches are up to date after close() */
status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
(unsigned long) count, (unsigned long) pos);
result = nfs_revalidate_file(inode, iocb->ki_filp);
+ nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
if (!result)
result = generic_file_aio_read(iocb, buf, count, pos);
return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+ nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
lock_kernel();
status = nfs_wb_all(inode);
if (!status) {
@@ -316,6 +318,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
return status;
}
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+ /* FIXME: we really should cancel any unstarted writes on this page */
+ return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+ return !nfs_wb_page(page->mapping->host, page);
+}
+
struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
@@ -324,6 +337,8 @@ struct address_space_operations nfs_file_aops = {
.writepages = nfs_writepages,
.prepare_write = nfs_prepare_write,
.commit_write = nfs_commit_write,
+ .invalidatepage = nfs_invalidate_page,
+ .releasepage = nfs_release_page,
#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
#endif
@@ -365,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
if (!count)
goto out;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
result = generic_file_aio_write(iocb, buf, count, pos);
out:
return result;
@@ -376,15 +392,17 @@ out_swapfile:
static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{
- struct file_lock *cfl;
+ struct file_lock cfl;
struct inode *inode = filp->f_mapping->host;
int status = 0;
lock_kernel();
/* Try local locking first */
- cfl = posix_test_lock(filp, fl);
- if (cfl != NULL) {
- locks_copy_lock(fl, cfl);
+ if (posix_test_lock(filp, fl, &cfl)) {
+ fl->fl_start = cfl.fl_start;
+ fl->fl_end = cfl.fl_end;
+ fl->fl_type = cfl.fl_type;
+ fl->fl_pid = cfl.fl_pid;
goto out;
}
@@ -425,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -446,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
else
status = do_vfs_lock(filp, fl);
unlock_kernel();
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
- sigset_t oldset;
int status;
- rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
/*
* Flush all pending writes before doing anything
* with locks..
@@ -489,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
nfs_sync_mapping(filp->f_mapping);
nfs_zap_caches(inode);
out:
- rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
@@ -504,9 +516,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
-
- if (!inode)
- return -EINVAL;
+ nfs_inc_stats(inode, NFSIOS_VFSLOCK);
/* No mandatory locks over NFS */
if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +541,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
inode->i_sb->s_id, inode->i_ino,
fl->fl_type, fl->fl_flags);
- if (!inode)
- return -EINVAL;
-
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333b..3fab5b0cfc5a 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
*/
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
struct dentry *idmap_dentry;
wait_queue_head_t idmap_wq;
struct idmap_msg idmap_im;
- struct semaphore idmap_lock; /* Serializes upcalls */
- struct semaphore idmap_im_lock; /* Protects the hashtable */
+ struct mutex idmap_lock; /* Serializes upcalls */
+ struct mutex idmap_im_lock; /* Protects the hashtable */
struct idmap_hashtable idmap_user_hash;
struct idmap_hashtable idmap_group_hash;
};
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
if (clp->cl_idmap != NULL)
return;
- if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+ if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
return;
- memset(idmap, 0, sizeof(*idmap));
-
snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
"%s/idmap", clp->cl_rpcclient->cl_pathname);
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
return;
}
- init_MUTEX(&idmap->idmap_lock);
- init_MUTEX(&idmap->idmap_im_lock);
+ mutex_init(&idmap->idmap_lock);
+ mutex_init(&idmap->idmap_im_lock);
init_waitqueue_head(&idmap->idmap_wq);
idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
if (!idmap)
return;
+ dput(idmap->idmap_dentry);
+ idmap->idmap_dentry = NULL;
rpc_unlink(idmap->idmap_path);
clp->cl_idmap = NULL;
kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
if (namelen >= IDMAP_NAMESZ)
return -EINVAL;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_name(h, name, namelen);
if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return (ret);
}
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
im = &idmap->idmap_im;
- down(&idmap->idmap_lock);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_lock);
+ mutex_lock(&idmap->idmap_im_lock);
he = idmap_lookup_id(h, id);
if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
}
set_current_state(TASK_UNINTERRUPTIBLE);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
schedule();
current->state = TASK_RUNNING;
remove_wait_queue(&idmap->idmap_wq, &wq);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
if (im->im_status & IDMAP_STATUS_SUCCESS) {
if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
out:
memset(im, 0, sizeof(*im));
- up(&idmap->idmap_im_lock);
- up(&idmap->idmap_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_lock);
return ret;
}
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (copy_from_user(&im_in, src, mlen) != 0)
return (-EFAULT);
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
ret = mlen;
im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
ret = mlen;
out:
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
return ret;
}
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
if (msg->errno >= 0)
return;
- down(&idmap->idmap_im_lock);
+ mutex_lock(&idmap->idmap_im_lock);
im->im_status = IDMAP_STATUS_LOOKUPFAIL;
wake_up(&idmap->idmap_wq);
- up(&idmap->idmap_im_lock);
+ mutex_unlock(&idmap->idmap_im_lock);
}
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3413996f9a86..2f7656b911b6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *);
static void nfs_umount_begin(struct super_block *);
static int nfs_statfs(struct super_block *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
+static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static void nfs_zap_acl_cache(struct inode *);
static struct rpc_program nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops = {
.clear_inode = nfs_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
static int
nfs_write_inode(struct inode *inode, int sync)
{
- int flags = sync ? FLUSH_WAIT : 0;
+ int flags = sync ? FLUSH_SYNC : 0;
int ret;
ret = nfs_commit_inode(inode, flags);
@@ -237,7 +241,6 @@ static struct inode *
nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
{
struct nfs_server *server = NFS_SB(sb);
- struct inode *rooti;
int error;
error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
return ERR_PTR(error);
}
- rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
- if (!rooti)
- return ERR_PTR(-ENOMEM);
- return rooti;
+ return nfs_fhget(sb, rootfh, fsinfo->fattr);
}
/*
@@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
sb->s_magic = NFS_SUPER_MAGIC;
+ server->io_stats = nfs_alloc_iostats();
+ if (server->io_stats == NULL)
+ return -ENOMEM;
+
root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
/* Did getting the root inode fail? */
if (IS_ERR(root_inode)) {
@@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
}
sb->s_root->d_op = server->rpc_ops->dentry_ops;
+ /* mount time stamp, in seconds */
+ server->mount_time = jiffies;
+
/* Get some general file system info */
if (server->namelen == 0 &&
server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
/* create transport and client */
xprt = xprt_create_proto(proto, &server->addr, &timeparms);
if (IS_ERR(xprt)) {
@@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
}
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
{
static struct proc_nfs_info {
int flag;
@@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
{ NFS_MOUNT_INTR, ",intr", "" },
- { NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
- { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+ { NFS_MOUNT_NONLM, ",nolock", "" },
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ 0, NULL, NULL }
};
struct proc_nfs_info *nfs_infop;
- struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
char buf[12];
char *proto;
- seq_printf(m, ",v%d", nfss->rpc_ops->version);
+ seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
seq_printf(m, ",rsize=%d", nfss->rsize);
seq_printf(m, ",wsize=%d", nfss->wsize);
- if (nfss->acregmin != 3*HZ)
+ if (nfss->acregmin != 3*HZ || showdefaults)
seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
- if (nfss->acregmax != 60*HZ)
+ if (nfss->acregmax != 60*HZ || showdefaults)
seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
- if (nfss->acdirmin != 30*HZ)
+ if (nfss->acdirmin != 30*HZ || showdefaults)
seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
- if (nfss->acdirmax != 60*HZ)
+ if (nfss->acdirmax != 60*HZ || showdefaults)
seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
if (nfss->flags & nfs_infop->flag)
@@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
proto = buf;
}
seq_printf(m, ",proto=%s", proto);
+ seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+ seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+ nfs_show_mount_options(m, nfss, 0);
+
seq_puts(m, ",addr=");
seq_escape(m, nfss->hostname, " \t\n\\");
+
+ return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+ int i, cpu;
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+ struct rpc_auth *auth = nfss->client->cl_auth;
+ struct nfs_iostats totals = { };
+
+ seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+ /*
+ * Display all mount option settings
+ */
+ seq_printf(m, "\n\topts:\t");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+ seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+ nfs_show_mount_options(m, nfss, 1);
+
+ seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
+ seq_printf(m, "\n\tcaps:\t");
+ seq_printf(m, "caps=0x%x", nfss->caps);
+ seq_printf(m, ",wtmult=%d", nfss->wtmult);
+ seq_printf(m, ",dtsize=%d", nfss->dtsize);
+ seq_printf(m, ",bsize=%d", nfss->bsize);
+ seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+ if (nfss->rpc_ops->version == 4) {
+ seq_printf(m, "\n\tnfsv4:\t");
+ seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+ }
+#endif
+
+ /*
+ * Display security flavor in effect for this mount
+ */
+ seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+ if (auth->au_flavor)
+ seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+ /*
+ * Display superblock I/O counters
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ struct nfs_iostats *stats;
+
+ if (!cpu_possible(cpu))
+ continue;
+
+ preempt_disable();
+ stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ totals.events[i] += stats->events[i];
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ totals.bytes[i] += stats->bytes[i];
+
+ preempt_enable();
+ }
+
+ seq_printf(m, "\n\tevents:\t");
+ for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+ seq_printf(m, "%lu ", totals.events[i]);
+ seq_printf(m, "\n\tbytes:\t");
+ for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+ seq_printf(m, "%Lu ", totals.bytes[i]);
+ seq_printf(m, "\n");
+
+ rpc_print_iostats(m, nfss->client);
+
return 0;
}
@@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
struct nfs_inode *nfsi = NFS_I(inode);
int mode = inode->i_mode;
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
@@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
.fh = fh,
.fattr = fattr
};
- struct inode *inode = NULL;
+ struct inode *inode = ERR_PTR(-ENOENT);
unsigned long hash;
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
hash = nfs_fattr_to_ino_t(fattr);
- if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+ inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+ if (inode == NULL) {
+ inode = ERR_PTR(-ENOMEM);
goto out_no_inode;
+ }
if (inode->i_state & I_NEW) {
struct nfs_inode *nfsi = NFS_I(inode);
@@ -834,7 +935,7 @@ out:
return inode;
out_no_inode:
- printk("nfs_fhget: iget failed\n");
+ dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
goto out;
}
@@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
struct nfs_fattr fattr;
int error;
+ nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
if (attr->ia_valid & ATTR_SIZE) {
if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
lock_kernel();
nfs_begin_data_update(inode);
- /* Write all dirty data if we're changing file permissions or size */
- if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
- filemap_write_and_wait(inode->i_mapping);
- nfs_wb_all(inode);
- }
+ /* Write all dirty data */
+ filemap_write_and_wait(inode->i_mapping);
+ nfs_wb_all(inode);
/*
* Return any delegations if we're going to change ACLs
*/
@@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
+ nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
inode->i_size = attr->ia_size;
vmtruncate(inode, attr->ia_size);
}
@@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
int err;
/* Flush out writes to the server in order to update c/mtime */
- nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+ nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
/*
* We may force a getattr if the user cares about atime.
@@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
return err;
}
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
{
struct nfs_open_context *ctx;
@@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
if (ctx != NULL) {
atomic_set(&ctx->count, 1);
ctx->dentry = dget(dentry);
+ ctx->vfsmnt = mntget(mnt);
ctx->cred = get_rpccred(cred);
ctx->state = NULL;
ctx->lockowner = current->files;
@@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
dput(ctx->dentry);
+ mntput(ctx->vfsmnt);
kfree(ctx);
}
}
@@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
* Ensure that mmap has a recent RPC credential for use when writing out
* shared pages
*/
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
return ctx;
}
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
{
struct inode *inode = filp->f_dentry->d_inode;
struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp)
cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
if (IS_ERR(cred))
return PTR_ERR(cred);
- ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+ ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
put_rpccred(cred);
if (ctx == NULL)
return -ENOMEM;
@@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode)
*/
int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
+ nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
&& !nfs_attribute_timeout(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
struct nfs_inode *nfsi = NFS_I(inode);
if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
if (S_ISREG(inode->i_mode))
nfs_sync_mapping(mapping);
invalidate_inode_pages2(mapping);
@@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
+ /* Has the inode gone and changed behind our back? */
+ if (nfsi->fileid != fattr->fileid
+ || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ return -EIO;
+ }
+
/* Are we in the process of updating data on the server? */
data_unstable = nfs_caches_unstable(inode);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
- if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
- nfsi->change_attr != fattr->change_attr) {
+ if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+ if (nfsi->change_attr == fattr->change_attr)
+ goto out;
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- /* Has the inode gone and changed behind our back? */
- if (nfsi->fileid != fattr->fileid
- || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
- return -EIO;
- }
-
- cur_size = i_size_read(inode);
- new_isize = nfs_size_to_loff_t(fattr->size);
-
/* Verify a few of the more important attributes */
if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (!data_unstable)
nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- if (cur_size != new_isize) {
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
- if (nfsi->npages == 0)
- nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
- }
+
+ cur_size = i_size_read(inode);
+ new_isize = nfs_size_to_loff_t(fattr->size);
+ if (cur_size != new_isize && nfsi->npages == 0)
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Have any file permissions changed? */
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (inode->i_nlink != fattr->nlink)
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+out:
if (!timespec_equal(&inode->i_atime, &fattr->atime))
nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
@@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_change_attribute = jiffies;
}
- if ((fattr->valid & NFS_ATTR_FATTR_V4)
- && nfsi->change_attr != fattr->change_attr) {
- dprintk("NFS: change_attr change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- nfsi->change_attr = fattr->change_attr;
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- nfsi->cache_change_attribute = jiffies;
- }
-
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_blksize = fattr->du.nfs2.blocksize;
}
+ if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+ if (nfsi->change_attr != fattr->change_attr) {
+ dprintk("NFS: change_attr change on server for file %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ nfsi->change_attr = fattr->change_attr;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfsi->cache_change_attribute = jiffies;
+ } else
+ invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+ }
+
/* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) {
+ nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
#endif /* CONFIG_NFS_V3 */
s = ERR_PTR(-ENOMEM);
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
goto out_err;
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s)
rpciod_down(); /* release rpciod */
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = {
.clear_inode = nfs4_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_stats = nfs_show_stats,
};
/*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+ server->retrans_timeo = timeparms.to_initval;
+ server->retrans_count = timeparms.to_retries;
+
clp = nfs4_get_client(&server->addr.sin_addr);
if (!clp) {
dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL);
}
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
if (!server)
return ERR_PTR(-ENOMEM);
- memset(server, 0, sizeof(struct nfs_server));
/* Zero out the NFS state stuff */
init_nfsv4_state(server);
server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb)
if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client);
- rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
+ rpciod_down();
+
+ nfs_free_iostats(server->io_stats);
kfree(server->hostname);
kfree(server);
}
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 000000000000..6350ecbde589
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,164 @@
+/*
+ * linux/fs/nfs/iostat.h
+ *
+ * Declarations for NFS client per-mount statistics
+ *
+ * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ * NFS client per-mount statistics provide information about the health of
+ * the NFS client and the health of each NFS mount point. Generally these
+ * are not for detailed problem diagnosis, but simply to indicate that there
+ * is a problem.
+ *
+ * These counters are not meant to be human-readable, but are meant to be
+ * integrated into system monitoring tools such as "sar" and "iostat". As
+ * such, the counters are sampled by the tools over time, and are never
+ * zeroed after a file system is mounted. Moving averages can be computed
+ * by the tools by taking the difference between two instantaneous samples
+ * and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS "1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1. SERVER - the number of payload bytes read from or written to the
+ * server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2. NORMAL - the number of bytes read or written by applications via
+ * the read(2) and write(2) system call interfaces.
+ *
+ * 3. DIRECT - the number of bytes read or written from files opened
+ * with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client. Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use. DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface. A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+ NFSIOS_NORMALREADBYTES = 0,
+ NFSIOS_NORMALWRITTENBYTES,
+ NFSIOS_DIRECTREADBYTES,
+ NFSIOS_DIRECTWRITTENBYTES,
+ NFSIOS_SERVERREADBYTES,
+ NFSIOS_SERVERWRITTENBYTES,
+ NFSIOS_READPAGES,
+ NFSIOS_WRITEPAGES,
+ __NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging. The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches. This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+ NFSIOS_INODEREVALIDATE = 0,
+ NFSIOS_DENTRYREVALIDATE,
+ NFSIOS_DATAINVALIDATE,
+ NFSIOS_ATTRINVALIDATE,
+ NFSIOS_VFSOPEN,
+ NFSIOS_VFSLOOKUP,
+ NFSIOS_VFSACCESS,
+ NFSIOS_VFSUPDATEPAGE,
+ NFSIOS_VFSREADPAGE,
+ NFSIOS_VFSREADPAGES,
+ NFSIOS_VFSWRITEPAGE,
+ NFSIOS_VFSWRITEPAGES,
+ NFSIOS_VFSGETDENTS,
+ NFSIOS_VFSSETATTR,
+ NFSIOS_VFSFLUSH,
+ NFSIOS_VFSFSYNC,
+ NFSIOS_VFSLOCK,
+ NFSIOS_VFSRELEASE,
+ NFSIOS_CONGESTIONWAIT,
+ NFSIOS_SETATTRTRUNC,
+ NFSIOS_EXTENDWRITE,
+ NFSIOS_SILLYRENAME,
+ NFSIOS_SHORTREAD,
+ NFSIOS_SHORTWRITE,
+ NFSIOS_DELAY,
+ __NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+ unsigned long long bytes[__NFSIOS_BYTESMAX];
+ unsigned long events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->events[stat] ++;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+ nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(server->io_stats, cpu);
+ iostats->bytes[stat] += addend;
+ put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+ nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+ return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+ if (stats != NULL)
+ free_percpu(stats);
+}
+
+#endif
+#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0b9a78353d6e..445abb4d4214 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
struct mnt_fhstatus result = {
.fh = fh
};
+ struct rpc_message msg = {
+ .rpc_argp = path,
+ .rpc_resp = &result,
+ };
char hostname[32];
int status;
- int call;
dprintk("NFS: nfs_mount(%08x:%s)\n",
(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
if (IS_ERR(mnt_clnt))
return PTR_ERR(mnt_clnt);
- call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
- status = rpc_call(mnt_clnt, call, path, &result, 0);
+ if (version == NFS_MNT3_VERSION)
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+ else
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+ status = rpc_call_sync(mnt_clnt, &msg, 0);
return status < 0? status : (result.status? -EACCES : 0);
}
@@ -137,6 +144,8 @@ static struct rpc_procinfo mnt_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MNTPROC_MNT,
+ .p_name = "MOUNT",
},
};
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
.p_encode = (kxdrproc_t) xdr_encode_dirpath,
.p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
.p_bufsiz = MNT_dirpath_sz << 2,
+ .p_statidx = MOUNTPROC3_MNT,
+ .p_name = "MOUNT",
},
};
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 6548a65de944..f0015fa876e1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs_xdr_##restype, \
.p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFSPROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs_procedures[] = {
PROC(GETATTR, fhandle, attrstat, 1),
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae941..33287879bd23 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
struct nfs3_getaclres res = {
.fattr = &fattr,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
struct posix_acl *acl;
int status, count;
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
return NULL;
dprintk("NFS call getacl\n");
- status = rpc_call(server->client_acl, ACLPROC3_GETACL,
- &args, &res, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
dprintk("NFS reply getacl: %d\n", status);
/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.acl_access = acl,
.pages = pages,
};
+ struct rpc_message msg = {
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status, count;
status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS call setacl\n");
nfs_begin_data_update(inode);
- status = rpc_call(server->client_acl, ACLPROC3_SETACL,
- &args, &fattr, 0);
+ msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+ status = rpc_call_sync(server->client_acl, &msg, 0);
spin_lock(&inode->i_lock);
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f0556..cf186f0d2b3b 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
#include <linux/smp_lock.h>
#include <linux/nfs_mount.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PROC
extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
return res;
}
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
- struct rpc_message msg = {
- .rpc_proc = &clnt->cl_procinfo[proc],
- .rpc_argp = argp,
- .rpc_resp = resp,
- };
- return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
- nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
- nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags)
static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
{
if (task->tk_status != -EJUKEBOX)
return 0;
+ nfs_inc_stats(inode, NFSIOS_DELAY);
task->tk_status = 0;
rpc_restart_call(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("%s: call fsinfo\n", __FUNCTION__);
nfs_fattr_init(info->fattr);
- status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
- status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_resp = info->fattr;
+ status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
}
return status;
@@ -107,12 +103,16 @@ static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr,
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
- if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ msg.rpc_argp = fhandle;
+ msg.rpc_resp = fattr;
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ }
dprintk("NFS reply lookup: %d\n", status);
if (status >= 0)
status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = entry->cred
+ .rpc_cred = entry->cred,
};
int mode = entry->mask;
int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
+ .rpc_argp = &args,
+ .rpc_resp = &fattr,
+ };
int status;
dprintk("NFS call readlink\n");
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
- &args, &fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_refresh_inode(inode, &fattr);
dprintk("NFS reply readlink: %d\n", status);
return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
again:
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
- nfs_post_op_update_inode(dir, &dir_attr);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_refresh_inode(dir, &dir_attr);
/* If the server doesn't support the exclusive creation semantics,
* try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
struct rpc_message *msg = &task->tk_msg;
struct nfs_fattr *dir_attr;
- if (nfs3_async_handle_jukebox(task))
+ if (nfs3_async_handle_jukebox(task, dir->d_inode))
return 1;
if (msg->rpc_argp) {
dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
.fromattr = &old_dir_attr,
.toattr = &new_dir_attr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
nfs_fattr_init(&old_dir_attr);
nfs_fattr_init(&new_dir_attr);
- status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_post_op_update_inode(old_dir, &old_dir_attr);
nfs_post_op_update_inode(new_dir, &new_dir_attr);
dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.dir_attr = &dir_attr,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
nfs_post_op_update_inode(inode, &fattr);
dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(&dir_attr);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int mode = sattr->ia_mode;
int status;
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &dir_attr,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
nfs_fattr_init(&dir_attr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fh,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
mode_t mode = sattr->ia_mode;
int status;
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
nfs_fattr_init(&dir_attr);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
if (status != 0)
goto out;
@@ -707,11 +759,16 @@ static int
nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT],
+ .rpc_argp = fhandle,
+ .rpc_resp = stat,
+ };
int status;
dprintk("NFS call fsstat\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
return status;
}
@@ -720,11 +777,16 @@ static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
return status;
}
@@ -733,40 +795,34 @@ static int
nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF],
+ .rpc_argp = fhandle,
+ .rpc_resp = info,
+ };
int status;
dprintk("NFS call pathconf\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply pathconf: %d\n", status);
return status;
}
extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
/* Call back common NFS readpage processing */
if (task->tk_status >= 0)
nfs_refresh_inode(data->inode, &data->fattr);
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_read_ops = {
- .rpc_call_done = nfs3_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_READ],
.rpc_argp = &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_write_ops = {
- .rpc_call_done = nfs3_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int stable;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
.rpc_argp = &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
+ data->args.stable = NFS_UNSTABLE;
if (how & FLUSH_STABLE) {
- if (!NFS_I(inode)->ncommit)
- stable = NFS_FILE_SYNC;
- else
- stable = NFS_DATA_SYNC;
- } else
- stable = NFS_UNSTABLE;
- data->args.stable = stable;
-
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ data->args.stable = NFS_FILE_SYNC;
+ if (NFS_I(data->inode)->ncommit)
+ data->args.stable = NFS_DATA_SYNC;
+ }
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
- if (nfs3_async_handle_jukebox(task))
- return;
+ if (nfs3_async_handle_jukebox(task, data->inode))
+ return -EAGAIN;
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs3_commit_ops = {
- .rpc_call_done = nfs3_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
.rpc_argp = &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
.rpc_cred = data->cred,
};
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = {
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.read_setup = nfs3_proc_read_setup,
+ .read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
+ .write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
+ .commit_done = nfs3_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs3_proc_lock,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 5224a191efb6..ec233619687e 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
.p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
.p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
- .p_timer = timer \
+ .p_timer = timer, \
+ .p_statidx = NFS3PROC_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
.p_timer = 1,
+ .p_name = "GETACL",
},
[ACLPROC3_SETACL] = {
.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
.p_timer = 0,
+ .p_name = "SETACL",
},
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f8c0066e02e1..47ece1dd3c67 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
if (!(data->f_attr.valid & NFS_ATTR_FATTR))
goto out;
inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
- if (inode == NULL)
+ if (IS_ERR(inode))
goto out;
state = nfs4_get_open_state(inode, data->owner);
if (state == NULL)
@@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
int status;
atomic_inc(&data->count);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+ */
+ data->rpc_status = -ENOMEM;
task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
- if (IS_ERR(task)) {
- nfs4_opendata_free(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0) {
data->cancelled = 1;
@@ -908,7 +915,7 @@ out_put_state_owner:
static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
- struct nfs4_state *res;
+ struct nfs4_state *res = ERR_PTR(-EIO);
int err;
do {
@@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
return res;
}
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_setattrargs arg = {
- .fh = fhandle,
+ .fh = NFS_FH(inode),
.iap = sattr,
.server = server,
.bitmask = server->attr_bitmask,
@@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
- if (state != NULL) {
+ if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ /* Use that stateid */
+ } else if (state != NULL) {
msg.rpc_cred = state->owner->so_cred;
nfs4_copy_stateid(&arg.stateid, state, current->files);
} else
@@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
return status;
}
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
- struct nfs_fh *fhandle, struct iattr *sattr,
- struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ struct iattr *sattr, struct nfs4_state *state)
{
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
- _nfs4_do_setattr(server, fattr, fhandle, sattr,
- state),
+ _nfs4_do_setattr(inode, fattr, sattr, state),
&exception);
} while (exception.retry);
return err;
@@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
if (ctx != NULL)
state = ctx->state;
- status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
- NFS_FH(inode), sattr, state);
+ status = nfs4_do_setattr(inode, fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
if (ctx != NULL)
@@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
d_instantiate(dentry, igrab(state->inode));
if (flags & O_EXCL) {
struct nfs_fattr fattr;
- status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
- NFS_FH(state->inode), sattr, state);
+ status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
if (status == 0)
nfs_setattr_update_inode(state->inode, sattr);
}
@@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
- struct inode *inode = data->inode;
+ struct nfs_server *server = NFS_SERVER(data->inode);
- if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, server) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status > 0)
- renew_lease(NFS_SERVER(inode), data->timestamp);
- /* Call back common NFS readpage processing */
- nfs_readpage_result(task, calldata);
+ renew_lease(server, data->timestamp);
+ return 0;
}
-static const struct rpc_call_ops nfs4_read_ops = {
- .rpc_call_done = nfs4_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- int flags;
data->timestamp = jiffies;
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp);
nfs_post_op_update_inode(inode, data->res.fattr);
}
- /* Call back common NFS writeback processing */
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_write_ops = {
- .rpc_call_done = nfs4_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
.rpc_argp = &data->args,
@@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
struct inode *inode = data->inode;
struct nfs_server *server = NFS_SERVER(inode);
int stable;
- int flags;
if (how & FLUSH_STABLE) {
if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
data->timestamp = jiffies;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (task->tk_status >= 0)
nfs_post_op_update_inode(inode, data->res.fattr);
- /* Call back common NFS writeback processing */
- nfs_commit_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs4_commit_ops = {
- .rpc_call_done = nfs4_commit_done,
- .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = data->cred,
};
- struct inode *inode = data->inode;
- struct nfs_server *server = NFS_SERVER(inode);
- int flags;
+ struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->attr_bitmask;
data->res.server = server;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
/*
@@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
rpc_wake_up_task(task);
task->tk_status = 0;
return -EAGAIN;
- case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
+ nfs_inc_server_stats((struct nfs_server *) server,
+ NFSIOS_DELAY);
+ case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
task->tk_status = 0;
return -EAGAIN;
@@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
return status;
}
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
{
struct nfs_fsinfo fsinfo;
struct rpc_message msg = {
@@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
return status;
}
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+ long timeout;
+ int err;
+ do {
+ err = _nfs4_proc_setclientid_confirm(clp, cred);
+ switch (err) {
+ case 0:
+ return err;
+ case -NFS4ERR_RESOURCE:
+ /* The IBM lawyers misread another document! */
+ case -NFS4ERR_DELAY:
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ }
+ } while (err == 0);
+ return err;
+}
+
struct nfs4_delegreturndata {
struct nfs4_delegreturnargs args;
struct nfs4_delegreturnres res;
@@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata)
kfree(calldata);
}
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_call_prepare = nfs4_delegreturn_prepare,
.rpc_call_done = nfs4_delegreturn_done,
.rpc_release = nfs4_delegreturn_release,
@@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data->rpc_status = 0;
task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
- if (IS_ERR(task)) {
- nfs4_delegreturn_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0) {
status = data->rpc_status;
@@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
struct nfs_seqid *seqid)
{
struct nfs4_unlockdata *data;
- struct rpc_task *task;
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
if (data == NULL) {
@@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
/* Unlock _before_ we do the RPC call */
do_vfs_lock(fl->fl_file, fl);
- task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
- if (IS_ERR(task))
- nfs4_locku_release_calldata(data);
- return task;
+ return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
}
static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.reclaim = 1;
task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
&nfs4_lock_ops, data);
- if (IS_ERR(task)) {
- nfs4_lock_release(data);
+ if (IS_ERR(task))
return PTR_ERR(task);
- }
ret = nfs4_wait_for_completion_rpc_task(task);
if (ret == 0) {
ret = data->rpc_status;
@@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
{
size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+ if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+ return 0;
if (buf && buflen < len)
return -ERANGE;
if (buf)
@@ -3644,8 +3617,11 @@ struct nfs_rpc_ops nfs_v4_clientops = {
.pathconf = nfs4_proc_pathconf,
.decode_dirent = nfs4_decode_dirent,
.read_setup = nfs4_proc_read_setup,
+ .read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
+ .write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
+ .commit_done = nfs4_commit_done,
.file_open = nfs_open,
.file_release = nfs_release,
.lock = nfs4_proc_lock,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7db..96e5b82c153b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
NIPQUAD(clp->cl_addr.s_addr), -status);
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0a1bd36a4837..7c5d70efe720 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
.p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ .p_statidx = NFSPROC4_CLNT_##proc, \
+ .p_name = #proc, \
}
struct rpc_procinfo nfs4_procedures[] = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e2..106aca388ebc 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
+ BUG_ON(PagePrivate(page));
+ BUG_ON(!PageLocked(page));
+ BUG_ON(page->mapping->host != inode);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
- spin_lock(&nfsi->req_lock);
- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
- spin_unlock(&nfsi->req_lock);
+ if (req->wb_page != NULL) {
+ spin_lock(&nfsi->req_lock);
+ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+ spin_unlock(&nfsi->req_lock);
+ }
nfs_unlock_request(req);
}
@@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req)
*/
void nfs_clear_request(struct nfs_page *req)
{
- if (req->wb_page) {
- page_cache_release(req->wb_page);
+ struct page *page = req->wb_page;
+ if (page != NULL) {
+ page_cache_release(page);
req->wb_page = NULL;
}
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03d..9dd85cac2df0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
{
struct nfs_fattr *fattr = info->fattr;
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("%s: call getattr\n", __FUNCTION__);
nfs_fattr_init(fattr);
- status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __FUNCTION__);
- status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+ msg.rpc_resp = &fsinfo;
+ status = rpc_call_sync(server->client_sys, &msg, 0);
dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status)
return status;
@@ -90,12 +97,16 @@ static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
+ .rpc_argp = fhandle,
+ .rpc_resp = fattr,
+ };
int status;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(server->client, NFSPROC_GETATTR,
- fhandle, fattr, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
.fh = NFS_FH(inode),
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = fattr,
+ };
int status;
/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n");
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
nfs_setattr_update_inode(inode, sattr);
dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
.fh = fhandle,
.fattr = fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call lookup %s\n", name->name);
nfs_fattr_init(fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
.pglen = pglen,
.pages = &page
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_READLINK],
+ .rpc_argp = &args,
+ };
int status;
dprintk("NFS call readlink\n");
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
dprintk("NFS reply readlink: %d\n", status);
return status;
}
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
nfs_fattr_init(&fattr);
dprintk("NFS call create %s\n", dentry->d_name.name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status, mode;
dprintk("NFS call mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
}
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == -EINVAL && S_ISFIFO(mode)) {
sattr->ia_mode = mode;
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = NULL
};
int status;
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
.toname = new_name->name,
.tolen = new_name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
- status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
nfs_mark_for_revalidate(old_dir);
nfs_mark_for_revalidate(new_dir);
dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
.toname = name->name,
.tolen = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_LINK],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call link %s\n", name->name);
- status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
nfs_mark_for_revalidate(inode);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
.tolen = path->len,
.sattr = sattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
+ .rpc_argp = &arg,
+ };
int status;
if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
nfs_fattr_init(fattr);
fhandle->size = 0;
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply symlink: %d\n", status);
return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
.fh = &fhandle,
.fattr = &fattr
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
int status;
dprintk("NFS call mkdir %s\n", dentry->d_name.name);
nfs_fattr_init(&fattr);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
.name = name->name,
.len = name->len
};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_RMDIR],
+ .rpc_argp = &arg,
+ };
int status;
dprintk("NFS call rmdir %s\n", name->name);
- status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
dprintk("NFS reply rmdir: %d\n", status);
return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
.fh = NFS_FH(dir),
.cookie = cookie,
.count = count,
- .pages = &page
+ .pages = &page,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READDIR],
.rpc_argp = &arg,
- .rpc_resp = NULL,
- .rpc_cred = cred
+ .rpc_cred = cred,
};
int status;
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsstat *stat)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call statfs\n");
nfs_fattr_init(stat->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply statfs: %d\n", status);
if (status)
goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
struct nfs2_fsstat fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &fsinfo,
+ };
int status;
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ status = rpc_call_sync(server->client, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
if (status)
goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
-
if (task->tk_status >= 0) {
nfs_refresh_inode(data->inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
if (data->args.offset + data->args.count >= data->res.fattr->size)
data->res.eof = 1;
}
- nfs_readpage_result(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_read_ops = {
- .rpc_call_done = nfs_read_done,
- .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_READ],
.rpc_argp = &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data)
.rpc_cred = data->cred,
};
- /* N.B. Do we need to test? Never called for swapfile inode */
- flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
- /* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
-
if (task->tk_status >= 0)
nfs_post_op_update_inode(data->inode, data->res.fattr);
- nfs_writeback_done(task, calldata);
+ return 0;
}
-static const struct rpc_call_ops nfs_write_ops = {
- .rpc_call_done = nfs_write_done,
- .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
{
- struct rpc_task *task = &data->task;
- struct inode *inode = data->inode;
- int flags;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_WRITE],
.rpc_argp = &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
- /* Set the initial flags for the task. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
/* Finalize the task. */
- rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
- rpc_call_setup(task, &msg, 0);
+ rpc_call_setup(&data->task, &msg, 0);
}
static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops nfs_v2_clientops = {
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs_decode_dirent,
.read_setup = nfs_proc_read_setup,
+ .read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
+ .write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.file_open = nfs_open,
.file_release = nfs_release,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8e..3961524fd4ab 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,17 +31,49 @@
#include <asm/system.h>
+#include "iostat.h"
+
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+ struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kmalloc(size, GFP_NOFS);
+ if (p->pagevec) {
+ memset(p->pagevec, 0, size);
+ } else {
+ mempool_free(p, nfs_rdata_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_rdata_mempool);
+}
+
void nfs_readdata_release(void *data)
{
nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
}
count -= result;
rdata->args.pgbase += result;
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
/* Note: result == 0 should only happen if we're caching
* a write that extends the file and punches a hole.
*/
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req)
* Set up the NFS read request struct
*/
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset)
{
struct inode *inode;
+ int flags;
data->req = req;
data->inode = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_readpage_result_partial;
if (nbytes > rsize) {
- nfs_read_rpcsetup(req, data, rsize, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ rsize, offset);
offset += rsize;
nbytes -= rsize;
} else {
- nfs_read_rpcsetup(req, data, nbytes, offset);
+ nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ nbytes, offset);
nbytes = 0;
}
nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_readpage_result_full;
- nfs_read_rpcsetup(req, data, count, 0);
+ nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
nfs_execute_read(data);
return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
/*
* Handle a read reply that fills part of a page.
*/
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
- if (status >= 0) {
+ if (nfs_readpage_result(task, data) != 0)
+ return;
+ if (task->tk_status >= 0) {
unsigned int request = data->args.count;
unsigned int result = data->res.count;
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_partial_ops = {
+ .rpc_call_done = nfs_readpage_result_partial,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_read_data *data = calldata;
unsigned int count = data->res.count;
+ if (nfs_readpage_result(task, data) != 0)
+ return;
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
nfs_list_remove_request(req);
- if (status >= 0) {
+ if (task->tk_status >= 0) {
if (count < PAGE_CACHE_SIZE) {
if (count < req->wb_bytes)
memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_read_full_ops = {
+ .rpc_call_done = nfs_readpage_result_full,
+ .rpc_release = nfs_readdata_release,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_read_data *data = calldata;
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;
- int status = task->tk_status;
+ int status;
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
- task->tk_pid, status);
+ task->tk_pid, task->tk_status);
+
+ status = NFS_PROTO(data->inode)->read_done(task, data);
+ if (status != 0)
+ return status;
+
+ nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
/* Is this a short read? */
if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
task->tk_status = -EIO;
}
spin_lock(&data->inode->i_lock);
NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
spin_unlock(&data->inode->i_lock);
- data->complete(data, status);
+ return 0;
}
/*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page)
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page->index);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+ nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
/*
* Try to flush any pending writes to the file..
*
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
nr_pages);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (!list_empty(&head)) {
int err = nfs_pagein_list(&head, server->rpages);
if (!ret)
+ nfs_add_stats(inode, NFSIOS_READPAGES, err);
ret = err;
}
put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d558..0e28189c2151 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
int status = -ENOMEM;
- data = kmalloc(sizeof(*data), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
- memset(data, 0, sizeof(*data));
data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b6835509..3f5225404c97 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
#include <linux/smp_lock.h>
#include "delegation.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
struct inode *,
struct page *,
unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
static int nfs_wait_on_write_congestion(struct address_space *, int);
static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
@@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
p->pagevec = &p->page_array[0];
else {
size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kzalloc(size, GFP_NOFS);
+ if (!p->pagevec) {
+ mempool_free(p, nfs_commit_mempool);
+ p = NULL;
+ }
+ }
+ }
+ return p;
+}
+
+void nfs_commit_free(struct nfs_write_data *p)
+{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
+ mempool_free(p, nfs_commit_mempool);
+}
+
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+ struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
p->pagevec = kmalloc(size, GFP_NOFS);
if (p->pagevec) {
memset(p->pagevec, 0, size);
} else {
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
p = NULL;
}
}
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
return p;
}
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
- mempool_free(p, nfs_commit_mempool);
+ mempool_free(p, nfs_wdata_mempool);
}
void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
if (i_size >= end)
return;
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
i_size_write(inode, end);
}
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
wdata->args.pgbase += result;
written += result;
count -= result;
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
} while (count);
/* Update file length */
nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
int priority = wb_priority(wbc);
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
/*
* Note: We need to ensure that we have a reference to the inode
* if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct inode *inode = mapping->host;
int err;
+ nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
err = generic_writepages(mapping, wbc);
if (err)
return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
if (err < 0)
goto out;
+ nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
wbc->nr_to_write -= err;
if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
err = nfs_wait_on_requests(inode, 0, 0);
@@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
+ SetPagePrivate(req->wb_page);
nfsi->npages++;
atomic_inc(&req->wb_count);
return 0;
@@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
BUG_ON (!NFS_WBACK_BUSY(req));
spin_lock(&nfsi->req_lock);
+ ClearPagePrivate(req->wb_page);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
nfsi->npages--;
if (!nfsi->npages) {
@@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
*
* Interruptible by signals only if mounted with intr flag.
*/
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req;
@@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
else
idx_end = idx_start + npages - 1;
- spin_lock(&nfsi->req_lock);
next = idx_start;
while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
if (req->wb_index > idx_end)
@@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
+ spin_lock(&nfsi->req_lock);
if (error < 0)
return error;
- spin_lock(&nfsi->req_lock);
res++;
}
- spin_unlock(&nfsi->req_lock);
return res;
}
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int ret;
+
+ spin_lock(&nfsi->req_lock);
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
+}
+
/*
* nfs_scan_dirty - Scan an inode for dirty requests
* @inode: NFS inode to scan
@@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
}
return res;
}
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+ return 0;
+}
#endif
static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
if (!bdi_write_congested(bdi))
return 0;
+
+ nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
if (intr) {
struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
sigset_t oldset;
@@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
- if (error < 0)
+ if (error < 0) {
+ if (new)
+ nfs_release_request(new);
return ERR_PTR(error);
+ }
continue;
}
spin_unlock(&nfsi->req_lock);
@@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page,
struct nfs_page *req;
int status = 0;
+ nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_name.name, count,
@@ -857,10 +918,12 @@ static inline int flush_task_priority(int how)
*/
static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
int how)
{
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
NFS_PROTO(inode)->write_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
@@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
list_del_init(&data->pages);
data->pagevec[0] = page;
- data->complete = nfs_writeback_done_partial;
if (nbytes > wsize) {
- nfs_write_rpcsetup(req, data, wsize, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ wsize, offset, how);
offset += wsize;
nbytes -= wsize;
} else {
- nfs_write_rpcsetup(req, data, nbytes, offset, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ nbytes, offset, how);
nbytes = 0;
}
nfs_execute_write(data);
@@ -978,16 +1045,13 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
unsigned int count;
- if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(head, inode, how);
-
data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
}
req = nfs_list_entry(data->pages.next);
- data->complete = nfs_writeback_done_full;
/* Set up the argument struct */
- nfs_write_rpcsetup(req, data, count, 0, how);
+ nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
nfs_execute_write(data);
return 0;
@@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
return -ENOMEM;
}
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
{
LIST_HEAD(one_request);
- struct nfs_page *req;
- int error = 0;
- unsigned int pages = 0;
+ int (*flush_one)(struct inode *, struct list_head *, int);
+ struct nfs_page *req;
+ int wpages = NFS_SERVER(inode)->wpages;
+ int wsize = NFS_SERVER(inode)->wsize;
+ int error;
- while (!list_empty(head)) {
- pages += nfs_coalesce_requests(head, &one_request, wpages);
+ flush_one = nfs_flush_one;
+ if (wsize < PAGE_CACHE_SIZE)
+ flush_one = nfs_flush_multi;
+ /* For single writes, FLUSH_STABLE is more efficient */
+ if (npages <= wpages && npages == NFS_I(inode)->npages
+ && nfs_list_entry(head->next)->wb_bytes <= wsize)
+ how |= FLUSH_STABLE;
+
+ do {
+ nfs_coalesce_requests(head, &one_request, wpages);
req = nfs_list_entry(one_request.next);
- error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+ error = flush_one(inode, &one_request, how);
if (error < 0)
- break;
- }
- if (error >= 0)
- return pages;
-
+ goto out_err;
+ } while (!list_empty(head));
+ return 0;
+out_err:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
@@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
/*
* Handle a write reply that flushed part of a page.
*/
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req = data->req;
struct page *page = req->wb_page;
@@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
- dprintk(", error = %d\n", status);
+ req->wb_context->error = task->tk_status;
+ dprintk(", error = %d\n", task->tk_status);
} else {
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
nfs_writepage_release(req);
}
+static const struct rpc_call_ops nfs_write_partial_ops = {
+ .rpc_call_done = nfs_writeback_done_partial,
+ .rpc_release = nfs_writedata_release,
+};
+
/*
* Handle a write reply that flushes a whole page.
*
@@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
+ struct nfs_write_data *data = calldata;
struct nfs_page *req;
struct page *page;
+ if (nfs_writeback_done(task, data) != 0)
+ return;
+
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
req->wb_bytes,
(long long)req_offset(req));
- if (status < 0) {
+ if (task->tk_status < 0) {
ClearPageUptodate(page);
SetPageError(page);
- req->wb_context->error = status;
+ req->wb_context->error = task->tk_status;
end_page_writeback(page);
nfs_inode_remove_request(req);
- dprintk(", error = %d\n", status);
+ dprintk(", error = %d\n", task->tk_status);
goto next;
}
end_page_writeback(page);
@@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
}
}
+static const struct rpc_call_ops nfs_write_full_ops = {
+ .rpc_call_done = nfs_writeback_done_full,
+ .rpc_release = nfs_writedata_release,
+};
+
+
/*
* This function is called when the WRITE call is complete.
*/
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
- struct nfs_write_data *data = calldata;
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
+ int status;
dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ status = NFS_PROTO(data->inode)->write_done(task, data);
+ if (status != 0)
+ return status;
+ nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
@@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
if (task->tk_status >= 0 && resp->count < argp->count) {
static unsigned long complain;
+ nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
/* Has the server at least made some progress? */
if (resp->count != 0) {
/* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
argp->stable = NFS_FILE_SYNC;
}
rpc_restart_call(task);
- return;
+ return -EAGAIN;
}
if (time_before(complain, jiffies)) {
printk(KERN_WARNING
@@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
-
- /*
- * Process the nfs_page list
- */
- data->complete(data, task->tk_status);
+ return 0;
}
@@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata)
* Set up the argument/result storage required for the RPC call.
*/
static void nfs_commit_rpcsetup(struct list_head *head,
- struct nfs_write_data *data, int how)
+ struct nfs_write_data *data,
+ int how)
{
struct nfs_page *first;
struct inode *inode;
+ int flags;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
-
+
+ /* Set up the initial task struct. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
NFS_PROTO(inode)->commit_setup(data, how);
data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
/*
* COMMIT call returned
*/
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
struct nfs_page *req;
@@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
dprintk("NFS: %4d nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
+ /* Call the NFS version-specific code */
+ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+ return;
+
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
@@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
}
sub_page_state(nr_unstable,res);
}
+
+static const struct rpc_call_ops nfs_commit_ops = {
+ .rpc_call_done = nfs_commit_done,
+ .rpc_release = nfs_commit_release,
+};
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+ return 0;
+}
#endif
static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_dirty(inode, &head, idx_start, npages);
spin_unlock(&nfsi->req_lock);
if (res) {
- struct nfs_server *server = NFS_SERVER(inode);
-
- /* For single writes, FLUSH_STABLE is more efficient */
- if (res == nfsi->npages && nfsi->npages <= server->wpages) {
- if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
- how |= FLUSH_STABLE;
- }
- error = nfs_flush_list(&head, server->wpages, how);
+ int error = nfs_flush_list(inode, &head, res, how);
+ if (error < 0)
+ return error;
}
- if (error < 0)
- return error;
return res;
}
@@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how)
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
- int res,
- error = 0;
+ int res;
spin_lock(&nfsi->req_lock);
res = nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&nfsi->req_lock);
if (res) {
- error = nfs_commit_list(inode, &head, how);
+ int error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
}
@@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
}
#endif
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
- unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ LIST_HEAD(head);
int nocommit = how & FLUSH_NOCOMMIT;
- int wait = how & FLUSH_WAIT;
- int error;
-
- how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+ int pages, ret;
+ how &= ~FLUSH_NOCOMMIT;
+ spin_lock(&nfsi->req_lock);
do {
- if (wait) {
- error = nfs_wait_on_requests(inode, idx_start, npages);
- if (error != 0)
- continue;
- }
- error = nfs_flush_inode(inode, idx_start, npages, how);
- if (error != 0)
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ if (ret != 0)
continue;
- if (!nocommit)
- error = nfs_commit_inode(inode, how);
- } while (error > 0);
- return error;
+ pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+ if (pages != 0) {
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_flush_list(inode, &head, pages, how);
+ spin_lock(&nfsi->req_lock);
+ continue;
+ }
+ if (nocommit)
+ break;
+ pages = nfs_scan_commit(inode, &head, 0, 0);
+ if (pages == 0)
+ break;
+ spin_unlock(&nfsi->req_lock);
+ ret = nfs_commit_list(inode, &head, how);
+ spin_lock(&nfsi->req_lock);
+ } while (ret >= 0);
+ spin_unlock(&nfsi->req_lock);
+ return ret;
}
int nfs_init_writepagecache(void)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8d3d23c8a4d2..c872bd07fc10 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
.p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ .p_statidx = NFSPROC4_CB_##call, \
+ .p_name = #proc, \
}
static struct rpc_procinfo nfs4_cb_procedures[] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb64549..f6ab762bea99 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
struct nfs4_stateid *lock_stp;
struct file *filp;
struct file_lock file_lock;
- struct file_lock *conflock;
+ struct file_lock conflock;
int status = 0;
unsigned int strhashval;
@@ -2775,11 +2775,11 @@ conflicting_lock:
/* XXX There is a race here. Future patch needed to provide
* an atomic posix_lock_and_test_file
*/
- if (!(conflock = posix_test_lock(filp, &file_lock))) {
+ if (!posix_test_lock(filp, &file_lock, &conflock)) {
status = nfserr_serverfault;
goto out;
}
- nfs4_set_lock_denied(conflock, &lock->lk_denied);
+ nfs4_set_lock_denied(&conflock, &lock->lk_denied);
out:
if (status && lock->lk_is_new && lock_sop)
release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
struct inode *inode;
struct file file;
struct file_lock file_lock;
- struct file_lock *conflicting_lock;
+ struct file_lock conflock;
int status;
if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
file.f_dentry = current_fh->fh_dentry;
status = nfs_ok;
- conflicting_lock = posix_test_lock(&file, &file_lock);
- if (conflicting_lock) {
+ if (posix_test_lock(&file, &file_lock, &conflock)) {
status = nfserr_denied;
- nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+ nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
}
out:
nfs4_unlock_state();
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb7568deb..8f1f49ceebec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
PROC_TGID_MAPS,
PROC_TGID_NUMA_MAPS,
PROC_TGID_MOUNTS,
+ PROC_TGID_MOUNTSTATS,
PROC_TGID_WCHAN,
#ifdef CONFIG_MMU
PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
PROC_TID_MAPS,
PROC_TID_NUMA_MAPS,
PROC_TID_MOUNTS,
+ PROC_TID_MOUNTSTATS,
PROC_TID_WCHAN,
#ifdef CONFIG_MMU
PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
+ E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
#ifdef CONFIG_MMU
E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
#endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
.poll = mounts_poll,
};
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+ struct task_struct *task = proc_task(inode);
+ int ret = seq_open(file, &mountstats_op);
+
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ struct namespace *namespace;
+ task_lock(task);
+ namespace = task->namespace;
+ if (namespace)
+ get_namespace(namespace);
+ task_unlock(task);
+
+ if (namespace)
+ m->private = namespace;
+ else {
+ seq_release(inode, file);
+ ret = -EINVAL;
+ }
+ }
+ return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+ .open = mountstats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = mounts_release,
+};
+
#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
inode->i_fop = &proc_smaps_operations;
break;
#endif
+ case PROC_TID_MOUNTSTATS:
+ case PROC_TGID_MOUNTSTATS:
+ inode->i_fop = &proc_mountstats_operations;
+ break;
#ifdef CONFIG_SECURITY
case PROC_TID_ATTR:
inode->i_nlink = 2;