diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-05-03 11:17:01 +0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-03 11:17:01 +0400 |
commit | 53ba4f2fa73225113a488584df0d85d3cba52943 (patch) | |
tree | d85b984d9818abc3ccc0237eb53b710d9e96c39e /fs/nfs | |
parent | bd6d29c25bb1a24a4c160ec5de43e0004e01f72b (diff) | |
parent | 66f41d4c5c8a5deed66fdcc84509376c9a0bf9d8 (diff) | |
download | linux-53ba4f2fa73225113a488584df0d85d3cba52943.tar.xz |
Merge commit 'v2.6.34-rc6' into core/locking
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/cache_lib.c | 1 | ||||
-rw-r--r-- | fs/nfs/callback.c | 2 | ||||
-rw-r--r-- | fs/nfs/callback.h | 8 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 166 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 107 | ||||
-rw-r--r-- | fs/nfs/client.c | 54 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 1 | ||||
-rw-r--r-- | fs/nfs/delegation.h | 6 | ||||
-rw-r--r-- | fs/nfs/dir.c | 10 | ||||
-rw-r--r-- | fs/nfs/direct.c | 1 | ||||
-rw-r--r-- | fs/nfs/dns_resolve.c | 19 | ||||
-rw-r--r-- | fs/nfs/file.c | 35 | ||||
-rw-r--r-- | fs/nfs/fscache.c | 1 | ||||
-rw-r--r-- | fs/nfs/inode.c | 113 | ||||
-rw-r--r-- | fs/nfs/internal.h | 2 | ||||
-rw-r--r-- | fs/nfs/iostat.h | 4 | ||||
-rw-r--r-- | fs/nfs/namespace.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs2xdr.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 10 | ||||
-rw-r--r-- | fs/nfs/nfs3xdr.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4namespace.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 125 | ||||
-rw-r--r-- | fs/nfs/nfs4renewd.c | 24 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 118 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 13 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 23 | ||||
-rw-r--r-- | fs/nfs/proc.c | 42 | ||||
-rw-r--r-- | fs/nfs/super.c | 29 | ||||
-rw-r--r-- | fs/nfs/symlink.c | 3 | ||||
-rw-r--r-- | fs/nfs/write.c | 312 |
32 files changed, 743 insertions, 493 deletions
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index b4ffd0146ea6..84690319e625 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -10,6 +10,7 @@ #include <linux/moduleparam.h> #include <linux/mount.h> #include <linux/namei.h> +#include <linux/slab.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/rpc_pipe_fs.h> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 73ab220354df..36dfdae95123 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -118,7 +118,6 @@ nfs4_callback_up(struct svc_serv *serv) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) ret = svc_create_xprt(serv, "tcp", PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { @@ -129,7 +128,6 @@ nfs4_callback_up(struct svc_serv *serv) ret = 0; else goto out_err; -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ return svc_prepare_thread(serv, &serv->sv_pools[0]); diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index d4036be0b589..85a7cfd1b8dd 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -119,6 +119,14 @@ struct cb_recallanyargs { }; extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); + +struct cb_recallslotargs { + struct sockaddr *crsa_addr; + uint32_t crsa_target_max_slots; +}; +extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, + void *dummy); + #endif /* CONFIG_NFS_V4_1 */ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index defa9b4c470e..a08770a7e857 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -7,6 +7,7 @@ */ #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/slab.h> #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -143,44 +144,49 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n * Return success if the sequenceID is one more than what we last saw on * this slot, accounting for wraparound. Increments the slot's sequence. * - * We don't yet implement a duplicate request cache, so at this time - * we will log replays, and process them as if we had not seen them before, - * but we don't bump the sequence in the slot. Not too worried about it, + * We don't yet implement a duplicate request cache, instead we set the + * back channel ca_maxresponsesize_cached to zero. This is OK for now * since we only currently implement idempotent callbacks anyway. * * We have a single slot backchannel at this time, so we don't bother * checking the used_slots bit array on the table. The lower layer guarantees * a single outstanding callback request at a time. */ -static int -validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) +static __be32 +validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) { struct nfs4_slot *slot; dprintk("%s enter. slotid %d seqid %d\n", - __func__, slotid, seqid); + __func__, args->csa_slotid, args->csa_sequenceid); - if (slotid > NFS41_BC_MAX_CALLBACKS) + if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) return htonl(NFS4ERR_BADSLOT); - slot = tbl->slots + slotid; + slot = tbl->slots + args->csa_slotid; dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); /* Normal */ - if (likely(seqid == slot->seq_nr + 1)) { + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { slot->seq_nr++; return htonl(NFS4_OK); } /* Replay */ - if (seqid == slot->seq_nr) { - dprintk("%s seqid %d is a replay - no DRC available\n", - __func__, seqid); - return htonl(NFS4_OK); + if (args->csa_sequenceid == slot->seq_nr) { + dprintk("%s seqid %d is a replay\n", + __func__, args->csa_sequenceid); + /* Signal process_op to set this error on next op */ + if (args->csa_cachethis == 0) + return htonl(NFS4ERR_RETRY_UNCACHED_REP); + + /* The ca_maxresponsesize_cached is 0 with no DRC */ + else if (args->csa_cachethis == 1) + return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE); } /* Wraparound */ - if (seqid == 1 && (slot->seq_nr + 1) == 0) { + if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { slot->seq_nr = 1; return htonl(NFS4_OK); } @@ -225,27 +231,87 @@ validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) return NULL; } -/* FIXME: referring calls should be processed */ -unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, +/* + * For each referring call triple, check the session's slot table for + * a match. If the slot is in use and the sequence numbers match, the + * client is still waiting for a response to the original request. + */ +static bool referring_call_exists(struct nfs_client *clp, + uint32_t nrclists, + struct referring_call_list *rclists) +{ + bool status = 0; + int i, j; + struct nfs4_session *session; + struct nfs4_slot_table *tbl; + struct referring_call_list *rclist; + struct referring_call *ref; + + /* + * XXX When client trunking is implemented, this becomes + * a session lookup from within the loop + */ + session = clp->cl_session; + tbl = &session->fc_slot_table; + + for (i = 0; i < nrclists; i++) { + rclist = &rclists[i]; + if (memcmp(session->sess_id.data, + rclist->rcl_sessionid.data, + NFS4_MAX_SESSIONID_LEN) != 0) + continue; + + for (j = 0; j < rclist->rcl_nrefcalls; j++) { + ref = &rclist->rcl_refcalls[j]; + + dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u " + "slotid %u\n", __func__, + ((u32 *)&rclist->rcl_sessionid.data)[0], + ((u32 *)&rclist->rcl_sessionid.data)[1], + ((u32 *)&rclist->rcl_sessionid.data)[2], + ((u32 *)&rclist->rcl_sessionid.data)[3], + ref->rc_sequenceid, ref->rc_slotid); + + spin_lock(&tbl->slot_tbl_lock); + status = (test_bit(ref->rc_slotid, tbl->used_slots) && + tbl->slots[ref->rc_slotid].seq_nr == + ref->rc_sequenceid); + spin_unlock(&tbl->slot_tbl_lock); + if (status) + goto out; + } + } + +out: + return status; +} + +__be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res) { struct nfs_client *clp; - int i, status; - - for (i = 0; i < args->csa_nrclists; i++) - kfree(args->csa_rclists[i].rcl_refcalls); - kfree(args->csa_rclists); + int i; + __be32 status; status = htonl(NFS4ERR_BADSESSION); clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); if (clp == NULL) goto out; - status = validate_seqid(&clp->cl_session->bc_slot_table, - args->csa_slotid, args->csa_sequenceid); + status = validate_seqid(&clp->cl_session->bc_slot_table, args); if (status) goto out_putclient; + /* + * Check for pending referring calls. If a match is found, a + * related callback was received before the response to the original + * call. + */ + if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { + status = htonl(NFS4ERR_DELAY); + goto out_putclient; + } + memcpy(&res->csr_sessionid, &args->csa_sessionid, sizeof(res->csr_sessionid)); res->csr_sequenceid = args->csa_sequenceid; @@ -256,15 +322,23 @@ unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, out_putclient: nfs_put_client(clp); out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - res->csr_status = status; - return res->csr_status; + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + + if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) + res->csr_status = 0; + else + res->csr_status = status; + dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, + ntohl(status), ntohl(res->csr_status)); + return status; } -unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) +__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) { struct nfs_client *clp; - int status; + __be32 status; fmode_t flags = 0; status = htonl(NFS4ERR_OP_NOT_IN_SESSION); @@ -289,4 +363,40 @@ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } + +/* Reduce the fore channel's max_slots to the target value */ +__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy) +{ + struct nfs_client *clp; + struct nfs4_slot_table *fc_tbl; + __be32 status; + + status = htonl(NFS4ERR_OP_NOT_IN_SESSION); + clp = nfs_find_client(args->crsa_addr, 4); + if (clp == NULL) + goto out; + + dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + args->crsa_target_max_slots); + + fc_tbl = &clp->cl_session->fc_slot_table; + + status = htonl(NFS4ERR_BAD_HIGH_SLOT); + if (args->crsa_target_max_slots > fc_tbl->max_slots || + args->crsa_target_max_slots < 1) + goto out_putclient; + + status = htonl(NFS4_OK); + if (args->crsa_target_max_slots == fc_tbl->max_slots) + goto out_putclient; + + fc_tbl->target_max_slots = args->crsa_target_max_slots; + nfs41_handle_recall_slot(clp); +out_putclient: + nfs_put_client(clp); /* balance nfs_find_client */ +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 8e1a2511c8be..05af212f0edf 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -9,6 +9,7 @@ #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/slab.h> #include "nfs4_fs.h" #include "callback.h" @@ -24,10 +25,14 @@ #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 4 + 1 + 3) #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ #define NFSDBG_FACILITY NFSDBG_CALLBACK +/* Internal error code */ +#define NFS4ERR_RESOURCE_HDR 11050 + typedef __be32 (*callback_process_op_t)(void *, void *); typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); @@ -173,7 +178,7 @@ static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) __be32 *p; p = read_buf(xdr, 4); if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); + return htonl(NFS4ERR_RESOURCE_HDR); *op = ntohl(*p); return 0; } @@ -215,10 +220,10 @@ out: #if defined(CONFIG_NFS_V4_1) -static unsigned decode_sessionid(struct xdr_stream *xdr, +static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { - uint32_t *p; + __be32 *p; int len = NFS4_MAX_SESSIONID_LEN; p = read_buf(xdr, len); @@ -229,12 +234,12 @@ static unsigned decode_sessionid(struct xdr_stream *xdr, return 0; } -static unsigned decode_rc_list(struct xdr_stream *xdr, +static __be32 decode_rc_list(struct xdr_stream *xdr, struct referring_call_list *rc_list) { - uint32_t *p; + __be32 *p; int i; - unsigned status; + __be32 status; status = decode_sessionid(xdr, &rc_list->rcl_sessionid); if (status) @@ -267,13 +272,13 @@ out: return status; } -static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp, +static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_sequenceargs *args) { - uint32_t *p; + __be32 *p; int i; - unsigned status; + __be32 status; status = decode_sessionid(xdr, &args->csa_sessionid); if (status) @@ -327,11 +332,11 @@ out_free: goto out; } -static unsigned decode_recallany_args(struct svc_rqst *rqstp, +static __be32 decode_recallany_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallanyargs *args) { - uint32_t *p; + __be32 *p; args->craa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); @@ -346,6 +351,20 @@ static unsigned decode_recallany_args(struct svc_rqst *rqstp, return 0; } +static __be32 decode_recallslot_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_recallslotargs *args) +{ + __be32 *p; + + args->crsa_addr = svc_addr(rqstp); + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + args->crsa_target_max_slots = ntohl(*p++); + return 0; +} + #endif /* CONFIG_NFS_V4_1 */ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) @@ -465,7 +484,7 @@ static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res) p = xdr_reserve_space(xdr, 8); if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); + return htonl(NFS4ERR_RESOURCE_HDR); *p++ = htonl(op); *p = res; return 0; @@ -499,10 +518,10 @@ out: #if defined(CONFIG_NFS_V4_1) -static unsigned encode_sessionid(struct xdr_stream *xdr, +static __be32 encode_sessionid(struct xdr_stream *xdr, const struct nfs4_sessionid *sid) { - uint32_t *p; + __be32 *p; int len = NFS4_MAX_SESSIONID_LEN; p = xdr_reserve_space(xdr, len); @@ -513,11 +532,11 @@ static unsigned encode_sessionid(struct xdr_stream *xdr, return 0; } -static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp, +static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_sequenceres *res) { - uint32_t *p; + __be32 *p; unsigned status = res->csr_status; if (unlikely(status != 0)) @@ -554,6 +573,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_RECALL: case OP_CB_SEQUENCE: case OP_CB_RECALL_ANY: + case OP_CB_RECALL_SLOT: *op = &callback_ops[op_nr]; break; @@ -562,7 +582,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_NOTIFY: case OP_CB_PUSH_DELEG: case OP_CB_RECALLABLE_OBJ_AVAIL: - case OP_CB_RECALL_SLOT: case OP_CB_WANTS_CANCELLED: case OP_CB_NOTIFY_LOCK: return htonl(NFS4ERR_NOTSUPP); @@ -602,20 +621,18 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) static __be32 process_op(uint32_t minorversion, int nop, struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, - struct xdr_stream *xdr_out, void *resp) + struct xdr_stream *xdr_out, void *resp, int* drc_status) { struct callback_op *op = &callback_ops[0]; - unsigned int op_nr = OP_CB_ILLEGAL; + unsigned int op_nr; __be32 status; long maxlen; __be32 res; dprintk("%s: start\n", __func__); status = decode_op_hdr(xdr_in, &op_nr); - if (unlikely(status)) { - status = htonl(NFS4ERR_OP_ILLEGAL); - goto out; - } + if (unlikely(status)) + return status; dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", __func__, minorversion, nop, op_nr); @@ -624,19 +641,32 @@ static __be32 process_op(uint32_t minorversion, int nop, preprocess_nfs4_op(op_nr, &op); if (status == htonl(NFS4ERR_OP_ILLEGAL)) op_nr = OP_CB_ILLEGAL; -out: + if (status) + goto encode_hdr; + + if (*drc_status) { + status = *drc_status; + goto encode_hdr; + } + maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { - if (likely(status == 0 && op->decode_args != NULL)) - status = op->decode_args(rqstp, xdr_in, argp); - if (likely(status == 0 && op->process_op != NULL)) + status = op->decode_args(rqstp, xdr_in, argp); + if (likely(status == 0)) status = op->process_op(argp, resp); } else status = htonl(NFS4ERR_RESOURCE); + /* Only set by OP_CB_SEQUENCE processing */ + if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) { + *drc_status = status; + status = 0; + } + +encode_hdr: res = encode_op_hdr(xdr_out, op_nr, status); - if (status == 0) - status = res; + if (unlikely(res)) + return res; if (op->encode_res != NULL && status == 0) status = op->encode_res(rqstp, xdr_out, resp); dprintk("%s: done, status = %d\n", __func__, ntohl(status)); @@ -652,7 +682,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_compound_hdr_res hdr_res = { NULL }; struct xdr_stream xdr_in, xdr_out; __be32 *p; - __be32 status; + __be32 status, drc_status = 0; unsigned int nops = 0; dprintk("%s: start\n", __func__); @@ -672,11 +702,18 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_system_err; while (status == 0 && nops != hdr_arg.nops) { - status = process_op(hdr_arg.minorversion, nops, - rqstp, &xdr_in, argp, &xdr_out, resp); + status = process_op(hdr_arg.minorversion, nops, rqstp, + &xdr_in, argp, &xdr_out, resp, &drc_status); nops++; } + /* Buffer overflow in decode_ops_hdr or encode_ops_hdr. Return + * resource error in cb_compound status without returning op */ + if (unlikely(status == htonl(NFS4ERR_RESOURCE_HDR))) { + status = htonl(NFS4ERR_RESOURCE); + nops--; + } + *hdr_res.status = status; *hdr_res.nops = htonl(nops); dprintk("%s: done, status = %u\n", __func__, ntohl(status)); @@ -713,6 +750,11 @@ static struct callback_op callback_ops[] = { .decode_args = (callback_decode_arg_t)decode_recallany_args, .res_maxsize = CB_OP_RECALLANY_RES_MAXSZ, }, + [OP_CB_RECALL_SLOT] = { + .process_op = (callback_process_op_t)nfs4_callback_recallslot, + .decode_args = (callback_decode_arg_t)decode_recallslot_args, + .res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ, + }, #endif /* CONFIG_NFS_V4_1 */ }; @@ -741,6 +783,7 @@ struct svc_version nfs4_callback_version1 = { .vs_proc = nfs4_callback_procedures1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_dispatch = NULL, + .vs_hidden = 1, }; struct svc_version nfs4_callback_version4 = { diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ee77713ce68b..acc9c4943b84 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -35,6 +35,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include <linux/in6.h> +#include <linux/slab.h> #include <net/ipv6.h> #include <linux/nfs_xdr.h> #include <linux/sunrpc/bc_xprt.h> @@ -164,30 +165,7 @@ error_0: return ERR_PTR(err); } -static void nfs4_shutdown_client(struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4 - if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) - nfs4_kill_renewd(clp); - BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); - if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) - nfs_idmap_delete(clp); - - rpc_destroy_wait_queue(&clp->cl_rpcwaitq); -#endif -} - -/* - * Destroy the NFS4 callback service - */ -static void nfs4_destroy_callback(struct nfs_client *clp) -{ #ifdef CONFIG_NFS_V4 - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_minorversion); -#endif /* CONFIG_NFS_V4 */ -} - /* * Clears/puts all minor version specific parts from an nfs_client struct * reverting it to minorversion 0. @@ -202,9 +180,33 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp) clp->cl_call_sync = _nfs4_call_sync; #endif /* CONFIG_NFS_V4_1 */ +} + +/* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_minorversion); +} +static void nfs4_shutdown_client(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + nfs4_clear_client_minor_version(clp); nfs4_destroy_callback(clp); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); +} +#else +static void nfs4_shutdown_client(struct nfs_client *clp) +{ } +#endif /* CONFIG_NFS_V4 */ /* * Destroy a shared client record @@ -213,7 +215,6 @@ static void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); - nfs4_clear_client_minor_version(clp); nfs4_shutdown_client(clp); nfs_fscache_release_client_cookie(clp); @@ -965,6 +966,8 @@ out_error: static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; + target->rsize = source->rsize; + target->wsize = source->wsize; target->acregmin = source->acregmin; target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; @@ -1293,7 +1296,8 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| + NFS_CAP_POSIX_LOCK; server->options = data->options; /* Get a client record */ diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 2563bebc4c67..15671245c6ee 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -10,6 +10,7 @@ #include <linux/kthread.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/spinlock.h> diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 944b627ec6e1..69e7b8140122 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -71,4 +71,10 @@ static inline int nfs_inode_return_delegation(struct inode *inode) } #endif +static inline int nfs_have_delegated_attributes(struct inode *inode) +{ + return nfs_have_delegation(inode, FMODE_READ) && + !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED); +} + #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3c7f03b669fb..a7bb5c694aa3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->entry = &my_entry; nfs_block_sillyrename(dentry); - res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) goto out; @@ -837,6 +837,8 @@ out_zap_parent: /* If we have submounts, don't unhash ! */ if (have_submounts(dentry)) goto out_valid; + if (dentry->d_flags & DCACHE_DISCONNECTED) + goto out_valid; shrink_dcache_parent(dentry); } d_drop(dentry); @@ -1025,12 +1027,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = NULL; goto out; /* This turned out not to be a regular file */ + case -EISDIR: case -ENOTDIR: goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; - /* case -EISDIR: */ /* case -EINVAL: */ default: goto out; @@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *dir; int openflags, ret = 0; - if (!is_atomic_open(nd)) + if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; parent = dget_parent(dentry); dir = parent->d_inode; @@ -1789,7 +1791,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (!nfs_have_delegation(inode, FMODE_READ) && + if (!nfs_have_delegated_attributes(inode) && !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) goto out_stale; res->jiffies = cache->jiffies; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0d289823e856..ad4cd31d6050 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -44,6 +44,7 @@ #include <linux/file.h> #include <linux/pagemap.h> #include <linux/kref.h> +#include <linux/slab.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 95e1ca765d47..76fd235d0024 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -9,6 +9,7 @@ #include <linux/hash.h> #include <linux/string.h> #include <linux/kmod.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/seq_file.h> @@ -36,6 +37,19 @@ struct nfs_dns_ent { }; +static void nfs_dns_ent_update(struct cache_head *cnew, + struct cache_head *ckey) +{ + struct nfs_dns_ent *new; + struct nfs_dns_ent *key; + + new = container_of(cnew, struct nfs_dns_ent, h); + key = container_of(ckey, struct nfs_dns_ent, h); + + memcpy(&new->addr, &key->addr, key->addrlen); + new->addrlen = key->addrlen; +} + static void nfs_dns_ent_init(struct cache_head *cnew, struct cache_head *ckey) { @@ -49,8 +63,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew, new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); if (new->hostname) { new->namelen = key->namelen; - memcpy(&new->addr, &key->addr, key->addrlen); - new->addrlen = key->addrlen; + nfs_dns_ent_update(cnew, ckey); } else { new->namelen = 0; new->addrlen = 0; @@ -234,7 +247,7 @@ static struct cache_detail nfs_dns_resolve = { .cache_show = nfs_dns_show, .match = nfs_dns_match, .init = nfs_dns_ent_init, - .update = nfs_dns_ent_init, + .update = nfs_dns_ent_update, .alloc = nfs_dns_ent_alloc, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 63f2071d6445..8d965bddb87e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -24,9 +24,9 @@ #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/aio.h> +#include <linux/gfp.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -123,11 +123,11 @@ nfs_file_open(struct inode *inode, struct file *filp) filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name); + nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_check_flags(filp->f_flags); if (res) return res; - nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_open(inode, filp); return res; } @@ -237,9 +237,9 @@ nfs_file_flush(struct file *file, fl_owner_t id) dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); if ((file->f_mode & FMODE_WRITE) == 0) return 0; - nfs_inc_stats(inode, NFSIOS_VFSFLUSH); /* Flush writes to the server and return any errors */ return nfs_do_fsync(ctx, inode); @@ -262,9 +262,11 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, (unsigned long) count, (unsigned long) pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); - nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); - if (!result) + if (!result) { result = generic_file_aio_read(iocb, iov, nr_segs, pos); + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); + } return result; } @@ -282,8 +284,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, (unsigned long) count, (unsigned long long) *ppos); res = nfs_revalidate_mapping(inode, filp->f_mapping); - if (!res) + if (!res) { res = generic_file_splice_read(filp, ppos, pipe, count, flags); + if (res > 0) + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); + } return res; } @@ -486,7 +491,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) { dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - if (gfp & __GFP_WAIT) + /* Only do I/O if gfp is a superset of GFP_KERNEL */ + if ((gfp & GFP_KERNEL) == GFP_KERNEL) nfs_wb_page(page->mapping->host, page); /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) @@ -596,6 +602,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, { struct dentry * dentry = iocb->ki_filp->f_path.dentry; struct inode * inode = dentry->d_inode; + unsigned long written = 0; ssize_t result; size_t count = iov_length(iov, nr_segs); @@ -622,14 +629,18 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, if (!count) goto out; - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (result > 0) + written = result; + /* Return error values for O_DSYNC and IS_SYNC() */ if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) result = err; } + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); out: return result; @@ -644,6 +655,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + unsigned long written = 0; ssize_t ret; dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", @@ -654,14 +666,17 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, * The combination of splice and an O_APPEND destination is disallowed. */ - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); - ret = generic_file_splice_write(pipe, filp, ppos, count, flags); + if (ret > 0) + written = ret; + if (ret >= 0 && nfs_need_sync_write(filp, inode)) { int err = nfs_do_fsync(nfs_file_open_context(filp), inode); if (err < 0) ret = err; } + if (ret > 0) + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); return ret; } diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 237874f1af23..a6b16ed93229 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -17,6 +17,7 @@ #include <linux/nfs_fs_sb.h> #include <linux/in6.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include "internal.h" #include "iostat.h" diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f141bde7756a..50a56edca0b5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -36,6 +36,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include <linux/nfs_xdr.h> +#include <linux/slab.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -97,22 +98,6 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, int sync) -{ - int ret; - - if (sync) { - ret = filemap_fdatawait(inode->i_mapping); - if (ret == 0) - ret = nfs_commit_inode(inode, FLUSH_SYNC); - } else - ret = nfs_commit_inode(inode, 0); - if (ret >= 0) - return 0; - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return ret; -} - void nfs_clear_inode(struct inode *inode) { /* @@ -130,16 +115,12 @@ void nfs_clear_inode(struct inode *inode) */ int nfs_sync_mapping(struct address_space *mapping) { - int ret; + int ret = 0; - if (mapping->nrpages == 0) - return 0; - unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_write_and_wait(mapping); - if (ret != 0) - goto out; - ret = nfs_wb_all(mapping->host); -out: + if (mapping->nrpages != 0) { + unmap_mapping_range(mapping, 0, 0, 0); + ret = nfs_wb_all(mapping->host); + } return ret; } @@ -511,17 +492,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - /* - * Flush out writes to the server in order to update c/mtime. - * - * Hold the i_mutex to suspend application writes temporarily; - * this prevents long-running writing applications from blocking - * nfs_wb_nocommit. - */ + /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { - mutex_lock(&inode->i_mutex); - nfs_wb_nocommit(inode); - mutex_unlock(&inode->i_mutex); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto out; } /* @@ -545,6 +520,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } +out: return err; } @@ -574,14 +550,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) nfs_revalidate_inode(server, inode); } -static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) +static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) { struct nfs_open_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - ctx->path.dentry = dget(dentry); - ctx->path.mnt = mntget(mnt); + ctx->path = *path; + path_get(&ctx->path); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -620,11 +596,6 @@ void put_nfs_open_context(struct nfs_open_context *ctx) __put_nfs_open_context(ctx, 0); } -static void put_nfs_open_context_sync(struct nfs_open_context *ctx) -{ - __put_nfs_open_context(ctx, 1); -} - /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages @@ -652,10 +623,10 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c list_for_each_entry(pos, &nfsi->open_files, list) { if (cred != NULL && pos->cred != cred) continue; - if ((pos->mode & mode) == mode) { - ctx = get_nfs_open_context(pos); - break; - } + if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) + continue; + ctx = get_nfs_open_context(pos); + break; } spin_unlock(&inode->i_lock); return ctx; @@ -671,7 +642,7 @@ static void nfs_file_clear_open_context(struct file *filp) spin_lock(&inode->i_lock); list_move_tail(&ctx->list, &NFS_I(inode)->open_files); spin_unlock(&inode->i_lock); - put_nfs_open_context_sync(ctx); + __put_nfs_open_context(ctx, filp->f_flags & O_DIRECT ? 0 : 1); } } @@ -686,7 +657,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); + ctx = alloc_nfs_open_context(&filp->f_path, cred); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; @@ -759,7 +730,7 @@ int nfs_attribute_timeout(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - if (nfs_have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } @@ -779,7 +750,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return __nfs_revalidate_inode(server, inode); } -static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -800,49 +771,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa return 0; } -static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) -{ - int ret = 0; - - mutex_lock(&inode->i_mutex); - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) { - ret = nfs_sync_mapping(mapping); - if (ret == 0) - ret = nfs_invalidate_mapping_nolock(inode, mapping); - } - mutex_unlock(&inode->i_mutex); - return ret; -} - -/** - * nfs_revalidate_mapping_nolock - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int ret = 0; - - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { - ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (ret < 0) - goto out; - } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - ret = nfs_invalidate_mapping_nolock(inode, mapping); -out: - return ret; -} - /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping - * - * This version of the function will take the inode->i_mutex and attempt to - * flush out all dirty data if it needs to invalidate the page cache. */ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { @@ -1420,6 +1352,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); nfsi->npages = 0; + nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29e464d23b32..11f82f03c5de 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); -extern int nfs_write_inode(struct inode *,int); +extern int nfs_write_inode(struct inode *, struct writeback_control *); extern void nfs_clear_inode(struct inode *); #ifdef CONFIG_NFS_V4 extern void nfs4_clear_inode(struct inode *); diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 46d779abafd3..1d8d5c813b01 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -57,12 +57,12 @@ static inline void nfs_add_fscache_stats(struct inode *inode, } #endif -static inline struct nfs_iostats *nfs_alloc_iostats(void) +static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) { return alloc_percpu(struct nfs_iostats); } -static inline void nfs_free_iostats(struct nfs_iostats *stats) +static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) { if (stats != NULL) free_percpu(stats); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 40c766782891..7888cf36022d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -8,6 +8,7 @@ */ #include <linux/dcache.h> +#include <linux/gfp.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/nfs_fs.h> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 7bc2da8efd4a..81cf14257916 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -12,7 +12,6 @@ #include <linux/param.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index bac60515a4b3..d150ae0c5ecd 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -1,4 +1,5 @@ #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs_fs.h> diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3f8881d1a050..e701002694e5 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> +#include <linux/slab.h> #include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs_fs.h> @@ -22,14 +23,14 @@ #define NFSDBG_FACILITY NFSDBG_PROC -/* A wrapper to handle the EJUKEBOX error message */ +/* A wrapper to handle the EJUKEBOX and EKEYEXPIRED error messages */ static int nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { int res; do { res = rpc_call_sync(clnt, msg, flags); - if (res != -EJUKEBOX) + if (res != -EJUKEBOX && res != -EKEYEXPIRED) break; schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; @@ -42,9 +43,10 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) static int nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { - if (task->tk_status != -EJUKEBOX) + if (task->tk_status != -EJUKEBOX && task->tk_status != -EKEYEXPIRED) return 0; - nfs_inc_stats(inode, NFSIOS_DELAY); + if (task->tk_status == -EJUKEBOX) + nfs_inc_stats(inode, NFSIOS_DELAY); task->tk_status = 0; rpc_restart_call(task); rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 5fe5492fbd29..56a86f6ac8b5 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -9,7 +9,6 @@ #include <linux/param.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0c6fda33d66e..a187200a7aac 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -46,6 +46,7 @@ enum nfs4_client_state { NFS4CLNT_DELEGRETURN, NFS4CLNT_SESSION_RESET, NFS4CLNT_SESSION_DRAINING, + NFS4CLNT_RECALL_SLOT, }; /* @@ -280,6 +281,7 @@ extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); +extern void nfs41_handle_recall_slot(struct nfs_client *clp); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index fa3408f20112..f071d12c613b 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -11,6 +11,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/nfs_fs.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> #include <linux/vfs.h> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 375f0fae2c6a..071fcedd517c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -39,6 +39,7 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/string.h> +#include <linux/slab.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs.h> #include <linux/nfs4.h> @@ -281,6 +282,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, } case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + case -EKEYEXPIRED: ret = nfs4_delay(server->client, &exception->timeout); if (ret != 0) break; @@ -418,7 +420,8 @@ static void nfs41_sequence_done(struct nfs_client *clp, clp->cl_last_renewal = timestamp; spin_unlock(&clp->cl_lock); /* Check sequence flags */ - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + if (atomic_read(&clp->cl_count) > 1) + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); } out: /* The session may be reset by one of the error handlers. */ @@ -724,8 +727,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (p->o_arg.seqid == NULL) goto err_free; - p->path.mnt = mntget(path->mnt); - p->path.dentry = dget(path->dentry); + path_get(path); + p->path = *path; p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); @@ -1163,7 +1166,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); - if (err != -NFS4ERR_DELAY) + if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) break; nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -1520,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_post_op_update_inode(dir, o_res->dir_attr); } else nfs_refresh_inode(dir, o_res->dir_attr); + if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) + server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(data); if (status != 0) @@ -1582,6 +1587,7 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state goto out; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + case -EKEYEXPIRED: nfs4_handle_exception(server, err, &exception); err = 0; } @@ -1660,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = PTR_ERR(state); if (IS_ERR(state)) goto err_opendata_put; - if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) + if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); @@ -1944,8 +1950,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; - calldata->path.mnt = mntget(path->mnt); - calldata->path.dentry = dget(path->dentry); + path_get(path); + calldata->path = *path; msg.rpc_argp = &calldata->arg, msg.rpc_resp = &calldata->res, @@ -2064,8 +2070,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st case -EDQUOT: case -ENOSPC: case -EROFS: - lookup_instantiate_filp(nd, (struct dentry *)state, NULL); - return 1; + return PTR_ERR(state); default: goto out_drop; } @@ -3145,10 +3150,19 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ +static void nfs4_renew_release(void *data) +{ + struct nfs_client *clp = data; + + if (atomic_read(&clp->cl_count) > 1) + nfs4_schedule_state_renewal(clp); + nfs_put_client(clp); +} + static void nfs4_renew_done(struct rpc_task *task, void *data) { - struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp; - unsigned long timestamp = (unsigned long)data; + struct nfs_client *clp = data; + unsigned long timestamp = task->tk_start; if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ @@ -3164,6 +3178,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) static const struct rpc_call_ops nfs4_renew_ops = { .rpc_call_done = nfs4_renew_done, + .rpc_release = nfs4_renew_release, }; int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) @@ -3174,8 +3189,10 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) .rpc_cred = cred, }; + if (!atomic_inc_not_zero(&clp->cl_count)) + return -EIO; return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, - &nfs4_renew_ops, (void *)jiffies); + &nfs4_renew_ops, clp); } int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) @@ -3452,6 +3469,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (server) nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: + case -EKEYEXPIRED: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; @@ -3564,6 +3582,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) case -NFS4ERR_RESOURCE: /* The IBM lawyers misread another document! */ case -NFS4ERR_DELAY: + case -EKEYEXPIRED: err = nfs4_delay(clp->cl_rpcclient, &timeout); } } while (err == 0); @@ -4179,7 +4198,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); - if (err != -NFS4ERR_DELAY) + if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) break; nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -4204,6 +4223,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request goto out; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: + case -EKEYEXPIRED: nfs4_handle_exception(server, err, &exception); err = 0; } @@ -4355,6 +4375,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) err = 0; goto out; case -NFS4ERR_DELAY: + case -EKEYEXPIRED: break; } err = nfs4_handle_exception(server, err, &exception); @@ -4500,7 +4521,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); - if (status != NFS4ERR_CLID_INUSE) + if (status != -NFS4ERR_CLID_INUSE) break; if (signalled()) @@ -4554,6 +4575,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: + case -EKEYEXPIRED: dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; @@ -4611,26 +4633,32 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) /* * Reset a slot table */ -static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots, - int old_max_slots, int ivalue) +static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, + int ivalue) { + struct nfs4_slot *new = NULL; int i; int ret = 0; - dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl); + dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, + max_reqs, tbl->max_slots); - /* - * Until we have dynamic slot table adjustment, insist - * upon the same slot table size - */ - if (max_slots != old_max_slots) { - dprintk("%s reset slot table does't match old\n", - __func__); - ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */ - goto out; + /* Does the newly negotiated max_reqs match the existing slot table? */ + if (max_reqs != tbl->max_slots) { + ret = -ENOMEM; + new = kmalloc(max_reqs * sizeof(struct nfs4_slot), + GFP_KERNEL); + if (!new) + goto out; + ret = 0; + kfree(tbl->slots); } spin_lock(&tbl->slot_tbl_lock); - for (i = 0; i < max_slots; ++i) + if (new) { + tbl->slots = new; + tbl->max_slots = max_reqs; + } + for (i = 0; i < tbl->max_slots; ++i) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, @@ -4648,16 +4676,12 @@ static int nfs4_reset_slot_tables(struct nfs4_session *session) int status; status = nfs4_reset_slot_table(&session->fc_slot_table, - session->fc_attrs.max_reqs, - session->fc_slot_table.max_slots, - 1); + session->fc_attrs.max_reqs, 1); if (status) return status; status = nfs4_reset_slot_table(&session->bc_slot_table, - session->bc_attrs.max_reqs, - session->bc_slot_table.max_slots, - 0); + session->bc_attrs.max_reqs, 0); return status; } @@ -4798,16 +4822,14 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.headerpadsz = 0; args->fc_attrs.max_rqst_sz = mxrqst_sz; args->fc_attrs.max_resp_sz = mxresp_sz; - args->fc_attrs.max_resp_sz_cached = mxresp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " - "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + "max_ops=%u max_reqs=%u\n", __func__, args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz, - args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops, - args->fc_attrs.max_reqs); + args->fc_attrs.max_ops, args->fc_attrs.max_reqs); /* Back channel attributes */ args->bc_attrs.headerpadsz = 0; @@ -5016,7 +5038,16 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) &res, args.sa_cache_this, 1); } -void nfs41_sequence_call_done(struct rpc_task *task, void *data) +static void nfs41_sequence_release(void *data) +{ + struct nfs_client *clp = (struct nfs_client *)data; + + if (atomic_read(&clp->cl_count) > 1) + nfs4_schedule_state_renewal(clp); + nfs_put_client(clp); +} + +static void nfs41_sequence_call_done(struct rpc_task *task, void *data) { struct nfs_client *clp = (struct nfs_client *)data; @@ -5024,6 +5055,8 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); + if (atomic_read(&clp->cl_count) == 1) + goto out; if (_nfs4_async_handle_error(task, NULL, clp, NULL) == -EAGAIN) { @@ -5032,7 +5065,7 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data) } } dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); - +out: kfree(task->tk_msg.rpc_argp); kfree(task->tk_msg.rpc_resp); @@ -5057,6 +5090,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) static const struct rpc_call_ops nfs41_sequence_ops = { .rpc_call_done = nfs41_sequence_call_done, .rpc_call_prepare = nfs41_sequence_prepare, + .rpc_release = nfs41_sequence_release, }; static int nfs41_proc_async_sequence(struct nfs_client *clp, @@ -5069,12 +5103,14 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, .rpc_cred = cred, }; + if (!atomic_inc_not_zero(&clp->cl_count)) + return -EIO; args = kzalloc(sizeof(*args), GFP_KERNEL); - if (!args) - return -ENOMEM; res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) { + if (!args || !res) { kfree(args); + kfree(res); + nfs_put_client(clp); return -ENOMEM; } res->sr_slotid = NFS4_MAX_SLOT_TABLE; @@ -5182,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) + if (IS_ERR(task)) { status = PTR_ERR(task); + goto out; + } rpc_put_task(task); + return 0; out: dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 0156c01c212c..d87f10327b72 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -36,11 +36,6 @@ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's * context. There is one renewd per nfs_server. * - * TODO: If the send queue gets backlogged (e.g., if the server goes down), - * we will keep filling the queue with periodic RENEW requests. We need a - * mechanism for ensuring that if renewd successfully sends off a request, - * then it only wakes up when the request is finished. Maybe use the - * child task framework of the RPC layer? */ #include <linux/mm.h> @@ -63,7 +58,7 @@ nfs4_renew_state(struct work_struct *work) struct nfs_client *clp = container_of(work, struct nfs_client, cl_renewd.work); struct rpc_cred *cred; - long lease, timeout; + long lease; unsigned long last, now; ops = nfs4_state_renewal_ops[clp->cl_minorversion]; @@ -75,7 +70,6 @@ nfs4_renew_state(struct work_struct *work) lease = clp->cl_lease_time; last = clp->cl_last_renewal; now = jiffies; - timeout = (2 * lease) / 3 + (long)last - (long)now; /* Are we close to a lease timeout? */ if (time_after(now, last + lease/3)) { cred = ops->get_state_renewal_cred_locked(clp); @@ -90,19 +84,15 @@ nfs4_renew_state(struct work_struct *work) /* Queue an asynchronous RENEW. */ ops->sched_state_renewal(clp, cred); put_rpccred(cred); + goto out_exp; } - timeout = (2 * lease) / 3; - spin_lock(&clp->cl_lock); - } else + } else { dprintk("%s: failed to call renewd. Reason: lease not expired \n", __func__); - if (timeout < 5 * HZ) /* safeguard */ - timeout = 5 * HZ; - dprintk("%s: requeueing work. Lease period = %ld\n", - __func__, (timeout + HZ - 1) / HZ); - cancel_delayed_work(&clp->cl_renewd); - schedule_delayed_work(&clp->cl_renewd, timeout); - spin_unlock(&clp->cl_lock); + spin_unlock(&clp->cl_lock); + } + nfs4_schedule_state_renewal(clp); +out_exp: nfs_expire_unreferenced_delegations(clp); out: dprintk("%s: done\n", __func__); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c1e2733f4fa4..6c5ed51f105e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1249,26 +1249,65 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) } #ifdef CONFIG_NFS_V4_1 +void nfs41_handle_recall_slot(struct nfs_client *clp) +{ + set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); + nfs4_schedule_state_recovery(clp); +} + +static void nfs4_reset_all_state(struct nfs_client *clp) +{ + if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { + clp->cl_boot_time = CURRENT_TIME; + nfs4_state_start_reclaim_nograce(clp); + nfs4_schedule_state_recovery(clp); + } +} + +static void nfs41_handle_server_reboot(struct nfs_client *clp) +{ + if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { + nfs4_state_start_reclaim_reboot(clp); + nfs4_schedule_state_recovery(clp); + } +} + +static void nfs41_handle_state_revoked(struct nfs_client *clp) +{ + /* Temporary */ + nfs4_reset_all_state(clp); +} + +static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) +{ + /* This will need to handle layouts too */ + nfs_expire_all_delegations(clp); +} + +static void nfs41_handle_cb_path_down(struct nfs_client *clp) +{ + nfs_expire_all_delegations(clp); + if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) + nfs4_schedule_state_recovery(clp); +} + void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) { if (!flags) return; - else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_reboot(clp); - nfs4_schedule_state_recovery(clp); - } else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | + else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) + nfs41_handle_server_reboot(clp); + else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | SEQ4_STATUS_ADMIN_STATE_REVOKED | - SEQ4_STATUS_RECALLABLE_STATE_REVOKED | - SEQ4_STATUS_LEASE_MOVED)) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_nograce(clp); - nfs4_schedule_state_recovery(clp); - } else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | + SEQ4_STATUS_LEASE_MOVED)) + nfs41_handle_state_revoked(clp); + else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) + nfs41_handle_recallable_state_revoked(clp); + else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | SEQ4_STATUS_BACKCHANNEL_FAULT | SEQ4_STATUS_CB_PATH_DOWN_SESSION)) - nfs_expire_all_delegations(clp); + nfs41_handle_cb_path_down(clp); } static int nfs4_reset_session(struct nfs_client *clp) @@ -1285,23 +1324,52 @@ static int nfs4_reset_session(struct nfs_client *clp) memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); status = nfs4_proc_create_session(clp); - if (status) + if (status) { status = nfs4_recovery_handle_error(clp, status); + goto out; + } + /* create_session negotiated new slot table */ + clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); -out: - /* - * Let the state manager reestablish state - */ - if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && - status == 0) + /* Let the state manager reestablish state */ + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs41_setup_state_renewal(clp); - +out: return status; } +static int nfs4_recall_slot(struct nfs_client *clp) +{ + struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table; + struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs; + struct nfs4_slot *new, *old; + int i; + + nfs4_begin_drain_session(clp); + new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), + GFP_KERNEL); + if (!new) + return -ENOMEM; + + spin_lock(&fc_tbl->slot_tbl_lock); + for (i = 0; i < fc_tbl->target_max_slots; i++) + new[i].seq_nr = fc_tbl->slots[i].seq_nr; + old = fc_tbl->slots; + fc_tbl->slots = new; + fc_tbl->max_slots = fc_tbl->target_max_slots; + fc_tbl->target_max_slots = 0; + fc_attrs->max_reqs = fc_tbl->max_slots; + spin_unlock(&fc_tbl->slot_tbl_lock); + + kfree(old); + nfs4_end_drain_session(clp); + return 0; +} + #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } +static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } #endif /* CONFIG_NFS_V4_1 */ /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors @@ -1314,6 +1382,7 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status) case -NFS4ERR_DELAY: case -NFS4ERR_CLID_INUSE: case -EAGAIN: + case -EKEYEXPIRED: break; case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery @@ -1397,6 +1466,15 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs_client_return_marked_delegations(clp); continue; } + /* Recall session slots */ + if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) + && nfs4_has_session(clp)) { + status = nfs4_recall_slot(clp); + if (status < 0) + goto out_error; + continue; + } + nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5cd5184b56db..38f3b582e7c2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -38,7 +38,6 @@ #include <linux/param.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> @@ -1578,6 +1577,14 @@ static void encode_create_session(struct xdr_stream *xdr, char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; uint32_t len; struct nfs_client *clp = args->client; + u32 max_resp_sz_cached; + + /* + * Assumes OPEN is the biggest non-idempotent compound. + * 2 is the verifier. + */ + max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + + RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT; len = scnprintf(machine_name, sizeof(machine_name), "%s", clp->cl_ipaddr); @@ -1592,7 +1599,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ - *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */ *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */ *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */ *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ @@ -5544,6 +5551,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf if (status != 0) goto out; status = decode_delegreturn(&xdr); + if (status != 0) + goto out; decode_getfattr(&xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a12c45b65dd4..29d9d36cd5f4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -112,12 +112,10 @@ void nfs_unlock_request(struct nfs_page *req) */ int nfs_set_page_tag_locked(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); - if (!nfs_lock_request_dontget(req)) return 0; if (req->wb_page != NULL) - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } @@ -126,10 +124,10 @@ int nfs_set_page_tag_locked(struct nfs_page *req) */ void nfs_clear_page_tag_locked(struct nfs_page *req) { - struct inode *inode = req->wb_context->path.dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - if (req->wb_page != NULL) { + struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + spin_lock(&inode->i_lock); radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); nfs_unlock_request(req); @@ -142,16 +140,22 @@ void nfs_clear_page_tag_locked(struct nfs_page *req) * nfs_clear_request - Free up all resources allocated to the request * @req: * - * Release page resources associated with a write request after it - * has completed. + * Release page and open context resources associated with a read/write + * request after it has completed. */ void nfs_clear_request(struct nfs_page *req) { struct page *page = req->wb_page; + struct nfs_open_context *ctx = req->wb_context; + if (page != NULL) { page_cache_release(page); req->wb_page = NULL; } + if (ctx != NULL) { + put_nfs_open_context(ctx); + req->wb_context = NULL; + } } @@ -165,9 +169,8 @@ static void nfs_free_request(struct kref *kref) { struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); - /* Release struct file or cached credential */ + /* Release struct file and open context */ nfs_clear_request(req); - put_nfs_open_context(req->wb_context); nfs_page_free(req); } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ef583854d8d0..0288be80444f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -29,7 +29,6 @@ #include <linux/types.h> #include <linux/param.h> -#include <linux/slab.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/errno.h> @@ -47,6 +46,39 @@ #define NFSDBG_FACILITY NFSDBG_PROC /* + * wrapper to handle the -EKEYEXPIRED error message. This should generally + * only happen if using krb5 auth and a user's TGT expires. NFSv2 doesn't + * support the NFSERR_JUKEBOX error code, but we handle this situation in the + * same way that we handle that error with NFSv3. + */ +static int +nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +{ + int res; + do { + res = rpc_call_sync(clnt, msg, flags); + if (res != -EKEYEXPIRED) + break; + schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + res = -ERESTARTSYS; + } while (!fatal_signal_pending(current)); + return res; +} + +#define rpc_call_sync(clnt, msg, flags) nfs_rpc_wrapper(clnt, msg, flags) + +static int +nfs_async_handle_expired_key(struct rpc_task *task) +{ + if (task->tk_status != -EKEYEXPIRED) + return 0; + task->tk_status = 0; + rpc_restart_call(task); + rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); + return 1; +} + +/* * Bare-bones access to getattr: this is for nfs_read_super. */ static int @@ -307,6 +339,8 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) { + if (nfs_async_handle_expired_key(task)) + return 0; nfs_mark_for_revalidate(dir); return 1; } @@ -560,6 +594,9 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { + if (nfs_async_handle_expired_key(task)) + return -EAGAIN; + nfs_invalidate_atime(data->inode); if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); @@ -579,6 +616,9 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { + if (nfs_async_handle_expired_key(task)) + return -EAGAIN; + if (task->tk_status >= 0) nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); return 0; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f1afee4eea77..b4148fc00f9f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -48,6 +48,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include <linux/in6.h> +#include <linux/slab.h> #include <net/ipv6.h> #include <linux/netdevice.h> #include <linux/nfs_xdr.h> @@ -2186,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (data->version == 4) { error = nfs4_try_mount(flags, dev_name, data, mnt); kfree(data->client_address); + kfree(data->nfs_server.export_path); goto out; } #endif /* CONFIG_NFS_V4 */ @@ -2214,7 +2216,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, } else { error = nfs_bdi_register(server); if (error) - goto error_splat_super; + goto error_splat_bdi; } if (!s->s_root) { @@ -2256,6 +2258,9 @@ out_err_nosb: error_splat_root: dput(mntroot); error_splat_super: + if (server && !s->s_root) + bdi_unregister(&server->backing_dev_info); +error_splat_bdi: deactivate_locked_super(s); goto out; } @@ -2326,7 +2331,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, } else { error = nfs_bdi_register(server); if (error) - goto error_splat_super; + goto error_splat_bdi; } if (!s->s_root) { @@ -2363,6 +2368,9 @@ out_err_noserver: return error; error_splat_super: + if (server && !s->s_root) + bdi_unregister(&server->backing_dev_info); +error_splat_bdi: deactivate_locked_super(s); dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); return error; @@ -2578,7 +2586,7 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type, } else { error = nfs_bdi_register(server); if (error) - goto error_splat_super; + goto error_splat_bdi; } if (!s->s_root) { @@ -2616,6 +2624,9 @@ out_free: error_splat_root: dput(mntroot); error_splat_super: + if (server && !s->s_root) + bdi_unregister(&server->backing_dev_info); +error_splat_bdi: deactivate_locked_super(s); goto out; } @@ -2647,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) devname = nfs_path(path->mnt->mnt_devname, path->mnt->mnt_root, path->dentry, page, PAGE_SIZE); - if (devname == NULL) + if (IS_ERR(devname)) goto out_freepage; tmp = kstrdup(devname, GFP_KERNEL); if (tmp == NULL) @@ -2811,7 +2822,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, } else { error = nfs_bdi_register(server); if (error) - goto error_splat_super; + goto error_splat_bdi; } if (!s->s_root) { @@ -2847,6 +2858,9 @@ out_err_noserver: return error; error_splat_super: + if (server && !s->s_root) + bdi_unregister(&server->backing_dev_info); +error_splat_bdi: deactivate_locked_super(s); dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); return error; @@ -2893,7 +2907,7 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, } else { error = nfs_bdi_register(server); if (error) - goto error_splat_super; + goto error_splat_bdi; } if (!s->s_root) { @@ -2929,6 +2943,9 @@ out_err_noserver: return error; error_splat_super: + if (server && !s->s_root) + bdi_unregister(&server->backing_dev_info); +error_splat_bdi: deactivate_locked_super(s); dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); return error; diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 412738dbfbc7..05c9e02f4153 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -19,7 +19,6 @@ #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/string.h> #include <linux/namei.h> @@ -50,7 +49,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) struct page *page; void *err; - err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping)); + err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) goto read_failed; page = read_cache_page(&inode->i_data, 0, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d63d964a0392..3aea3ca98ab7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -201,6 +201,7 @@ static int nfs_set_page_writeback(struct page *page) struct inode *inode = page->mapping->host; struct nfs_server *nfss = NFS_SERVER(inode); + page_cache_get(page); if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) { set_bdi_congested(&nfss->backing_dev_info, @@ -216,6 +217,7 @@ static void nfs_end_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); + page_cache_release(page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } @@ -421,6 +423,7 @@ static void nfs_mark_request_dirty(struct nfs_page *req) { __set_page_dirty_nobuffers(req->wb_page); + __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -438,6 +441,7 @@ nfs_mark_request_commit(struct nfs_page *req) radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); + nfsi->ncommit++; spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -501,57 +505,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) } #endif -/* - * Wait for a request to complete. - * - * Interruptible by fatal signals only. - */ -static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req; - pgoff_t idx_end, next; - unsigned int res = 0; - int error; - - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { - if (req->wb_index > idx_end) - break; - - next = req->wb_index + 1; - BUG_ON(!NFS_WBACK_BUSY(req)); - - kref_get(&req->wb_kref); - spin_unlock(&inode->i_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - spin_lock(&inode->i_lock); - if (error < 0) - return error; - res++; - } - return res; -} - -static void nfs_cancel_commit_list(struct list_head *head) -{ - struct nfs_page *req; - - while(!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_clear_request_commit(req); - nfs_inode_remove_request(req); - nfs_unlock_request(req); - } -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_need_commit(struct nfs_inode *nfsi) @@ -573,11 +526,17 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); + int ret; if (!nfs_need_commit(nfsi)) return 0; - return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + if (ret > 0) + nfsi->ncommit -= ret; + if (nfs_need_commit(NFS_I(inode))) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; } #else static inline int nfs_need_commit(struct nfs_inode *nfsi) @@ -642,9 +601,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - if (nfs_clear_request_commit(req)) - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT); + if (nfs_clear_request_commit(req) && + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) + NFS_I(inode)->ncommit--; /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { @@ -703,9 +663,11 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, req = nfs_setup_write_request(ctx, page, offset, count); if (IS_ERR(req)) return PTR_ERR(req); + nfs_mark_request_dirty(req); /* Update file length */ nfs_grow_file(page, offset, count); nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_mark_request_dirty(req); nfs_clear_page_tag_locked(req); return 0; } @@ -782,8 +744,6 @@ int nfs_updatepage(struct file *file, struct page *page, status = nfs_writepage_setup(ctx, page, offset, count); if (status < 0) nfs_set_pageerror(page); - else - __set_page_dirty_nobuffers(page); dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); @@ -792,13 +752,12 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { + struct page *page = req->wb_page; - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { - nfs_end_page_writeback(req->wb_page); + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) nfs_inode_remove_request(req); - } else - nfs_end_page_writeback(req->wb_page); nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } static int flush_task_priority(int how) @@ -822,7 +781,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req, int how) { struct inode *inode = req->wb_context->path.dentry->d_inode; - int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; int priority = flush_task_priority(how); struct rpc_task *task; struct rpc_message msg = { @@ -837,9 +795,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req, .callback_ops = call_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = flags, + .flags = RPC_TASK_ASYNC, .priority = priority, }; + int ret = 0; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -878,10 +837,18 @@ static int nfs_write_rpcsetup(struct nfs_page *req, (unsigned long long)data->args.offset); task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); + if (IS_ERR(task)) { + ret = PTR_ERR(task); + goto out; + } + if (how & FLUSH_SYNC) { + ret = rpc_wait_for_completion_task(task); + if (ret == 0) + ret = task->tk_status; + } rpc_put_task(task); - return 0; +out: + return ret; } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -890,9 +857,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req, */ static void nfs_redirty_request(struct nfs_page *req) { + struct page *page = req->wb_page; + nfs_mark_request_dirty(req); - nfs_end_page_writeback(req->wb_page); nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } /* @@ -1127,16 +1096,15 @@ static void nfs_writeback_release_full(void *calldata) if (nfs_write_need_commit(data)) { memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); nfs_mark_request_commit(req); - nfs_end_page_writeback(page); dprintk(" marked for commit\n"); goto next; } dprintk(" OK\n"); remove_request: - nfs_end_page_writeback(page); nfs_inode_remove_request(req); next: nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } nfs_writedata_release(calldata); } @@ -1233,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) +{ + if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) + return 1; + if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, + NFS_INO_COMMIT, nfs_wait_bit_killable, + TASK_KILLABLE)) + return 1; + return 0; +} + +static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +{ + clear_bit(NFS_INO_COMMIT, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); +} + + static void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; @@ -1250,7 +1237,6 @@ static int nfs_commit_rpcsetup(struct list_head *head, { struct nfs_page *first = nfs_list_entry(head->next); struct inode *inode = first->wb_context->path.dentry->d_inode; - int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; int priority = flush_task_priority(how); struct rpc_task *task; struct rpc_message msg = { @@ -1265,7 +1251,7 @@ static int nfs_commit_rpcsetup(struct list_head *head, .callback_ops = &nfs_commit_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = flags, + .flags = RPC_TASK_ASYNC, .priority = priority, }; @@ -1325,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) BDI_RECLAIMABLE); nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1380,6 +1367,7 @@ static void nfs_commit_release(void *calldata) next: nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(data->inode)); nfs_commitdata_release(calldata); } @@ -1391,11 +1379,14 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; -int nfs_commit_inode(struct inode *inode, int how) +static int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); - int res; + int may_wait = how & FLUSH_SYNC; + int res = 0; + if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) + goto out; spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); @@ -1403,95 +1394,60 @@ int nfs_commit_inode(struct inode *inode, int how) int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; - } + if (may_wait) + wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + } else + nfs_commit_clear_lock(NFS_I(inode)); +out: return res; } -#else -static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) -{ - return 0; -} -#endif -long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { - struct inode *inode = mapping->host; - pgoff_t idx_start, idx_end; - unsigned int npages = 0; - LIST_HEAD(head); - int nocommit = how & FLUSH_NOCOMMIT; - long pages, ret; - - /* FIXME */ - if (wbc->range_cyclic) - idx_start = 0; - else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (idx_end > idx_start) { - pgoff_t l_npages = 1 + idx_end - idx_start; - npages = l_npages; - if (sizeof(npages) != sizeof(l_npages) && - (pgoff_t)npages != l_npages) - npages = 0; + struct nfs_inode *nfsi = NFS_I(inode); + int flags = FLUSH_SYNC; + int ret = 0; + + /* Don't commit yet if this is a non-blocking flush and there are + * lots of outstanding writes for this mapping. + */ + if (wbc->sync_mode == WB_SYNC_NONE && + nfsi->ncommit <= (nfsi->npages >> 1)) + goto out_mark_dirty; + + if (wbc->nonblocking || wbc->for_background) + flags = 0; + ret = nfs_commit_inode(inode, flags); + if (ret >= 0) { + if (wbc->sync_mode == WB_SYNC_NONE) { + if (ret < wbc->nr_to_write) + wbc->nr_to_write -= ret; + else + wbc->nr_to_write = 0; } + return 0; } - how &= ~FLUSH_NOCOMMIT; - spin_lock(&inode->i_lock); - do { - ret = nfs_wait_on_requests_locked(inode, idx_start, npages); - if (ret != 0) - continue; - if (nocommit) - break; - pages = nfs_scan_commit(inode, &head, idx_start, npages); - if (pages == 0) - break; - if (how & FLUSH_INVALIDATE) { - spin_unlock(&inode->i_lock); - nfs_cancel_commit_list(&head); - ret = pages; - spin_lock(&inode->i_lock); - continue; - } - pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&inode->i_lock); - ret = nfs_commit_list(inode, &head, how); - spin_lock(&inode->i_lock); - - } while (ret >= 0); - spin_unlock(&inode->i_lock); +out_mark_dirty: + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } - -static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) +#else +static int nfs_commit_inode(struct inode *inode, int how) { - int ret; - - ret = nfs_writepages(mapping, wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, wbc, how); - if (ret < 0) - goto out; return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; } -/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ -static int nfs_write_mapping(struct address_space *mapping, int how) +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { - struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = 0, - .range_end = LLONG_MAX, - }; + return 0; +} +#endif - return __nfs_write_mapping(mapping, &wbc, how); +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return nfs_commit_unstable_pages(inode, wbc); } /* @@ -1499,37 +1455,27 @@ static int nfs_write_mapping(struct address_space *mapping, int how) */ int nfs_wb_all(struct inode *inode) { - return nfs_write_mapping(inode->i_mapping, 0); -} + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + }; -int nfs_wb_nocommit(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); + return sync_inode(inode, &wbc); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) { struct nfs_page *req; - loff_t range_start = page_offset(page); - loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); - struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - }; int ret = 0; BUG_ON(!PageLocked(page)); for (;;) { + wait_on_page_writeback(page); req = nfs_page_find_request(page); if (req == NULL) - goto out; - if (test_bit(PG_CLEAN, &req->wb_flags)) { - nfs_release_request(req); break; - } if (nfs_lock_request_dontget(req)) { nfs_inode_remove_request(req); /* @@ -1543,54 +1489,42 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret < 0) - goto out; + break; } - if (!PagePrivate(page)) - return 0; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); -out: return ret; } -static int nfs_wb_page_priority(struct inode *inode, struct page *page, - int how) +/* + * Write back all requests on one page - we do this before reading it. + */ +int nfs_wb_page(struct inode *inode, struct page *page) { loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, + .nr_to_write = 0, .range_start = range_start, .range_end = range_end, }; int ret; - do { + while(PagePrivate(page)) { + wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; - } else if (!PagePrivate(page)) - break; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); + } + ret = sync_inode(inode, &wbc); if (ret < 0) goto out_error; - } while (PagePrivate(page)); + } return 0; out_error: - __mark_inode_dirty(inode, I_DIRTY_PAGES); return ret; } -/* - * Write back all requests on one page - we do this before reading it. - */ -int nfs_wb_page(struct inode *inode, struct page* page) -{ - return nfs_wb_page_priority(inode, page, FLUSH_STABLE); -} - #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) |