diff options
Diffstat (limited to 'fs/nfsd/nfs4state.c')
-rw-r--r-- | fs/nfsd/nfs4state.c | 797 |
1 files changed, 541 insertions, 256 deletions
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 551d2958ec29..d5694987f86f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -149,14 +149,14 @@ void nfsd4_destroy_laundry_wq(void) static bool is_session_dead(struct nfsd4_session *ses) { - return ses->se_flags & NFS4_SESSION_DEAD; + return ses->se_dead; } static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; - ses->se_flags |= NFS4_SESSION_DEAD; + ses->se_dead = true; return nfs_ok; } @@ -572,13 +572,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -static void nfsd4_free_file_rcu(struct rcu_head *rcu) -{ - struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu); - - kmem_cache_free(file_slab, fp); -} - void put_nfs4_file(struct nfs4_file *fi) { @@ -586,7 +579,7 @@ put_nfs4_file(struct nfs4_file *fi) nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); - call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); + kfree_rcu(fi, fi_rcu); } } @@ -953,15 +946,6 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla spin_lock_init(&stid->sc_lock); INIT_LIST_HEAD(&stid->sc_cp_list); - /* - * It shouldn't be a problem to reuse an opaque stateid value. - * I don't think it is for 4.1. But with 4.0 I worry that, for - * example, a stray write retransmission could be accepted by - * the server when it should have been rejected. Therefore, - * adopt a trick from the sctp code to attempt to maximize the - * amount of time until an id is reused, by ensuring they always - * "increase" (mod INT_MAX): - */ return stid; out_free: kmem_cache_free(slab, stid); @@ -1057,6 +1041,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) return openlockstateid(stid); } +/* + * As the sc_free callback of deleg, this may be called by nfs4_put_stid + * in nfsd_break_one_deleg. + * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, + * this function mustn't ever sleep. + */ static void nfs4_free_deleg(struct nfs4_stid *stid) { struct nfs4_delegation *dp = delegstateid(stid); @@ -1184,7 +1174,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); dp->dl_cb_fattr.ncf_file_modified = false; - dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; get_nfs4_file(fp); dp->dl_stid.sc_file = fp; return dp; @@ -1386,7 +1375,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); - WARN_ON_ONCE(!(dp->dl_stid.sc_status & + WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 && + !(dp->dl_stid.sc_status & (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); @@ -1660,6 +1650,14 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) free_ol_stateid_reaplist(&reaplist); } +static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo) +{ + lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + + return list_empty(&oo->oo_owner.so_strhash) && + list_empty(&oo->oo_perclient); +} + static void unhash_openowner_locked(struct nfs4_openowner *oo) { struct nfs4_client *clp = oo->oo_owner.so_client; @@ -1909,113 +1907,145 @@ gen_sessionid(struct nfsd4_session *ses) */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) +static struct shrinker *nfsd_slot_shrinker; +static DEFINE_SPINLOCK(nfsd_session_list_lock); +static LIST_HEAD(nfsd_session_list); +/* The sum of "target_slots-1" on every session. The shrinker can push this + * down, though it can take a little while for the memory to actually + * be freed. The "-1" is because we can never free slot 0 while the + * session is active. + */ +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0); + static void -free_session_slots(struct nfsd4_session *ses) +free_session_slots(struct nfsd4_session *ses, int from) { int i; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) { - free_svc_cred(&ses->se_slots[i]->sl_cred); - kfree(ses->se_slots[i]); + if (from >= ses->se_fchannel.maxreqs) + return; + + for (i = from; i < ses->se_fchannel.maxreqs; i++) { + struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); + + /* + * Save the seqid in case we reactivate this slot. + * This will never require a memory allocation so GFP + * flag is irrelevant + */ + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0); + free_svc_cred(&slot->sl_cred); + kfree(slot); + } + ses->se_fchannel.maxreqs = from; + if (ses->se_target_maxslots > from) { + int new_target = from ?: 1; + atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots); + ses->se_target_maxslots = new_target; } } -/* - * We don't actually need to cache the rpc and session headers, so we - * can allocate a little less for each slot: +/** + * reduce_session_slots - reduce the target max-slots of a session if possible + * @ses: The session to affect + * @dec: how much to decrease the target by + * + * This interface can be used by a shrinker to reduce the target max-slots + * for a session so that some slots can eventually be freed. + * It uses spin_trylock() as it may be called in a context where another + * spinlock is held that has a dependency on client_lock. As shrinkers are + * best-effort, skiping a session is client_lock is already held has no + * great coast + * + * Return value: + * The number of slots that the target was reduced by. */ -static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) +static int +reduce_session_slots(struct nfsd4_session *ses, int dec) { - u32 size; + struct nfsd_net *nn = net_generic(ses->se_client->net, + nfsd_net_id); + int ret = 0; - if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) - size = 0; - else - size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; - return size + sizeof(struct nfsd4_slot); + if (ses->se_target_maxslots <= 1) + return ret; + if (!spin_trylock(&nn->client_lock)) + return ret; + ret = min(dec, ses->se_target_maxslots-1); + ses->se_target_maxslots -= ret; + atomic_sub(ret, &nfsd_total_target_slots); + ses->se_slot_gen += 1; + if (ses->se_slot_gen == 0) { + int i; + ses->se_slot_gen = 1; + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { + struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); + slot->sl_generation = 0; + } + } + spin_unlock(&nn->client_lock); + return ret; } -/* - * XXX: If we run out of reserved DRC memory we could (up to a point) - * re-negotiate active sessions and reduce their slot usage to make - * room for new connections. For now we just fail the create session. - */ -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) +static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs, + int index, gfp_t gfp) { - u32 slotsize = slot_bytes(ca); - u32 num = ca->maxreqs; - unsigned long avail, total_avail; - unsigned int scale_factor; + struct nfsd4_slot *slot; + size_t size; - spin_lock(&nfsd_drc_lock); - if (nfsd_drc_max_mem > nfsd_drc_mem_used) - total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; - else - /* We have handed out more space than we chose in - * set_max_drc() to allow. That isn't really a - * problem as long as that doesn't make us think we - * have lots more due to integer overflow. - */ - total_avail = 0; - avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); /* - * Never use more than a fraction of the remaining memory, - * unless it's the only way to give this client a slot. - * The chosen fraction is either 1/8 or 1/number of threads, - * whichever is smaller. This ensures there are adequate - * slots to support multiple clients per thread. - * Give the client one slot even if that would require - * over-allocation--it is better than failure. + * The RPC and NFS session headers are never saved in + * the slot reply cache buffer. */ - scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads); - - avail = clamp_t(unsigned long, avail, slotsize, - total_avail/scale_factor); - num = min_t(int, num, avail / slotsize); - num = max_t(int, num, 1); - nfsd_drc_mem_used += num * slotsize; - spin_unlock(&nfsd_drc_lock); - - return num; -} - -static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) -{ - int slotsize = slot_bytes(ca); + size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ? + 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; - spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * ca->maxreqs; - spin_unlock(&nfsd_drc_lock); + slot = kzalloc(struct_size(slot, sl_data, size), gfp); + if (!slot) + return NULL; + slot->sl_index = index; + return slot; } static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, struct nfsd4_channel_attrs *battrs) { int numslots = fattrs->maxreqs; - int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; + struct nfsd4_slot *slot; int i; - BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) - > PAGE_SIZE); - - new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); + new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; - /* allocate each struct nfsd4_slot and data cache in one piece */ - for (i = 0; i < numslots; i++) { - new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); - if (!new->se_slots[i]) - goto out_free; - } + xa_init(&new->se_slots); - memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); - memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs)); + slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL); + if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL))) + goto out_free; + for (i = 1; i < numslots; i++) { + const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + slot = nfsd4_alloc_slot(fattrs, i, gfp); + if (!slot) + break; + if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) { + kfree(slot); + break; + } + } + fattrs->maxreqs = i; + memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); + new->se_target_maxslots = i; + atomic_add(i - 1, &nfsd_total_target_slots); + new->se_cb_slot_avail = ~0U; + new->se_cb_highest_slot = min(battrs->maxreqs - 1, + NFSD_BC_SLOT_TABLE_SIZE - 1); + spin_lock_init(&new->se_lock); return new; out_free: - while (i--) - kfree(new->se_slots[i]); + kfree(slot); + xa_destroy(&new->se_slots); kfree(new); return NULL; } @@ -2121,17 +2151,47 @@ static void nfsd4_del_conns(struct nfsd4_session *s) static void __free_session(struct nfsd4_session *ses) { - free_session_slots(ses); + free_session_slots(ses, 0); + xa_destroy(&ses->se_slots); kfree(ses); } static void free_session(struct nfsd4_session *ses) { nfsd4_del_conns(ses); - nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); } +static unsigned long +nfsd_slot_count(struct shrinker *s, struct shrink_control *sc) +{ + unsigned long cnt = atomic_read(&nfsd_total_target_slots); + + return cnt ? cnt : SHRINK_EMPTY; +} + +static unsigned long +nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc) +{ + struct nfsd4_session *ses; + unsigned long scanned = 0; + unsigned long freed = 0; + + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) { + freed += reduce_session_slots(ses, 1); + scanned += 1; + if (scanned >= sc->nr_to_scan) { + /* Move starting point for next scan */ + list_move(&nfsd_session_list, &ses->se_all_sessions); + break; + } + } + spin_unlock(&nfsd_session_list_lock); + sc->nr_scanned = scanned; + return freed; +} + static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; @@ -2142,17 +2202,24 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru INIT_LIST_HEAD(&new->se_conns); - new->se_cb_seq_nr = 1; - new->se_flags = cses->flags; + atomic_set(&new->se_ref, 0); + new->se_dead = false; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; - atomic_set(&new->se_ref, 0); + + for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx) + new->se_cb_seq_nr[idx] = 1; + idx = hash_sessionid(&new->se_sessionid); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_add_tail(&new->se_all_sessions, &nfsd_session_list); + spin_unlock(&nfsd_session_list_lock); + { struct sockaddr *sa = svc_addr(rqstp); /* @@ -2222,6 +2289,9 @@ unhash_session(struct nfsd4_session *ses) spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); spin_unlock(&ses->se_client->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_del(&ses->se_all_sessions); + spin_unlock(&nfsd_session_list_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ @@ -2239,21 +2309,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) return 1; } -/* - * XXX Should we use a slab cache ? - * This type of memory management is somewhat inefficient, but we use it - * anyway since SETCLIENTID is not a common operation. - */ static struct nfs4_client *alloc_client(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client *clp; int i; - if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { + if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients && + atomic_read(&nn->nfsd_courtesy_clients) > 0) mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); - return NULL; - } + clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; @@ -2362,8 +2427,12 @@ unhash_client_locked(struct nfs4_client *clp) } list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); - list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) { list_del_init(&ses->se_hash); + list_del_init(&ses->se_all_sessions); + } + spin_unlock(&nfsd_session_list_lock); spin_unlock(&clp->cl_lock); } @@ -2685,6 +2754,7 @@ static const char *cb_state2str(int state) static int client_info_show(struct seq_file *m, void *v) { struct inode *inode = file_inode(m->file); + struct nfsd4_session *ses; struct nfs4_client *clp; u64 clid; @@ -2721,6 +2791,16 @@ static int client_info_show(struct seq_file *m, void *v) seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr); seq_printf(m, "admin-revoked states: %d\n", atomic_read(&clp->cl_admin_revoked)); + spin_lock(&clp->cl_lock); + seq_printf(m, "session slots:"); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + seq_printf(m, " %u", ses->se_fchannel.maxreqs); + seq_printf(m, "\nsession target slots:"); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + seq_printf(m, " %u", ses->se_target_maxslots); + spin_unlock(&clp->cl_lock); + seq_puts(m, "\n"); + drop_client(clp); return 0; @@ -2873,6 +2953,21 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) return 0; } +static char *nfs4_show_deleg_type(u32 dl_type) +{ + switch (dl_type) { + case OPEN_DELEGATE_READ: + return "r"; + case OPEN_DELEGATE_WRITE: + return "w"; + case OPEN_DELEGATE_READ_ATTRS_DELEG: + return "ra"; + case OPEN_DELEGATE_WRITE_ATTRS_DELEG: + return "wa"; + } + return "?"; +} + static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_delegation *ds; @@ -2886,8 +2981,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: deleg, "); - seq_printf(s, "access: %s", - ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); + seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type)); /* XXX: lease time, whether it's being recalled. */ @@ -3076,7 +3170,6 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); drop_client(clp); } @@ -3107,7 +3200,6 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - clear_and_wake_up_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); nfs4_put_stid(&dp->dl_stid); } @@ -3128,11 +3220,15 @@ static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags)) return; + /* set to proper status when nfsd4_cb_getattr_done runs */ ncf->ncf_cb_status = NFS4ERR_IO; + /* ensure that wake_bit is done when RUNNING is cleared */ + set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags); + refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&ncf->ncf_getattr); } @@ -3160,7 +3256,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = ktime_get_boottime_seconds(); - clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; @@ -3487,7 +3582,7 @@ static bool client_has_state(struct nfs4_client *clp) #endif || !list_empty(&clp->cl_delegations) || !list_empty(&clp->cl_sessions) - || !list_empty(&clp->async_copies); + || nfsd4_has_active_async_copies(clp); } static __be32 copy_impl_id(struct nfs4_client *clp, @@ -3525,6 +3620,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); + exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s", + utsname()->sysname, utsname()->release, + utsname()->version, utsname()->machine); + if (!exid->server_impl_name) + return nfserr_jukebox; + if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; @@ -3662,6 +3763,23 @@ out_copy: exid->seqid = conf->cl_cs_slot.sl_seqid + 1; nfsd4_set_ex_flags(conf, exid); + exid->nii_domain.len = sizeof("kernel.org") - 1; + exid->nii_domain.data = "kernel.org"; + + /* + * Note that RFC 8881 places no length limit on + * nii_name, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes. + */ + exid->nii_name.len = strlen(exid->server_impl_name); + if (exid->nii_name.len > NFS4_OPAQUE_LIMIT) + exid->nii_name.len = NFS4_OPAQUE_LIMIT; + exid->nii_name.data = exid->server_impl_name; + + /* just send zeros - the date is in nii_name */ + exid->nii_time.tv_sec = 0; + exid->nii_time.tv_nsec = 0; + dprintk("nfsd4_exchange_id seqid %d flags %x\n", conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; @@ -3678,10 +3796,18 @@ out_nolock: return status; } -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse) +void +nfsd4_exchange_id_release(union nfsd4_op_u *u) +{ + struct nfsd4_exchange_id *exid = &u->exchange_id; + + kfree(exid->server_impl_name); +} + +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags) { /* The slot is in use, and no response has been sent. */ - if (slot_inuse) { + if (flags & NFSD4_SLOT_INUSE) { if (seqid == slot_seqid) return nfserr_jukebox; else @@ -3690,6 +3816,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse) /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; + if ((flags & NFSD4_SLOT_REUSED) && seqid == 1) + return nfs_ok; if (seqid == slot_seqid) return nfserr_replay_cache; return nfserr_seq_misordered; @@ -3748,17 +3876,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); - /* - * Note decreasing slot size below client's request may make it - * difficult for client to function correctly, whereas - * decreasing the number of slots will (just?) affect - * performance. When short on memory we therefore prefer to - * decrease number of slots instead of their size. Clients that - * request larger slots than they need will get poor results: - * Note that we always allow at least one slot, because our - * accounting is soft and provides no guarantees either way. - */ - ca->maxreqs = nfsd4_get_drc_mem(ca, nn); return nfs_ok; } @@ -3836,11 +3953,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, return status; status = check_backchannel_attrs(&cr_ses->back_channel); if (status) - goto out_release_drc_mem; + goto out_err; status = nfserr_jukebox; new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); if (!new) - goto out_release_drc_mem; + goto out_err; conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session; @@ -3911,6 +4028,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; /* Upshifting from TCP to RDMA is not supported */ cr_ses->flags &= ~SESSION4_RDMA; + /* Report the correct number of backchannel slots */ + cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1; init_session(rqstp, new, conf, cr_ses); nfsd4_get_session_locked(new); @@ -3931,7 +4050,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, return status; out_expired_error: - old = NULL; /* * Revert the slot seq_nr change so the server will process * the client's resend instead of returning a cached response. @@ -3946,12 +4064,9 @@ out_cache_error: out_free_conn: spin_unlock(&nn->client_lock); free_conn(conn); - if (old) - expire_client(old); out_free_session: __free_session(new); -out_release_drc_mem: - nfsd4_put_drc_mem(&cr_ses->fore_channel); +out_err: return status; } @@ -4249,17 +4364,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (seq->slotid >= session->se_fchannel.maxreqs) goto out_put_session; - slot = session->se_slots[seq->slotid]; + slot = xa_load(&session->se_slots, seq->slotid); dprintk("%s: slotid %d\n", __func__, seq->slotid); - /* We do not negotiate the number of slots yet, so set the - * maxslots to the session maxreqs which is used to encode - * sr_highest_slotid and the sr_target_slot id to maxslots */ - seq->maxslots = session->se_fchannel.maxreqs; - trace_nfsd_slot_seqid_sequence(clp, seq, slot); - status = check_slot_seqid(seq->seqid, slot->sl_seqid, - slot->sl_flags & NFSD4_SLOT_INUSE); + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) @@ -4284,6 +4393,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out_put_session; + if (session->se_target_maxslots < session->se_fchannel.maxreqs && + slot->sl_generation == session->se_slot_gen && + seq->maxslots <= session->se_target_maxslots) + /* Client acknowledged our reduce maxreqs */ + free_session_slots(session, session->se_target_maxslots); + buflen = (seq->cachethis) ? session->se_fchannel.maxresp_cached : session->se_fchannel.maxresp_sz; @@ -4291,12 +4406,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfserr_rep_too_big; if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; - svc_reserve(rqstp, buflen); + svc_reserve_auth(rqstp, buflen); status = nfs_ok; - /* Success! bump slot seqid */ + /* Success! accept new slot seqid */ slot->sl_seqid = seq->seqid; + slot->sl_flags &= ~NFSD4_SLOT_REUSED; slot->sl_flags |= NFSD4_SLOT_INUSE; + slot->sl_generation = session->se_slot_gen; if (seq->cachethis) slot->sl_flags |= NFSD4_SLOT_CACHETHIS; else @@ -4306,7 +4423,51 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->session = session; cstate->clp = clp; + /* + * If the client ever uses the highest available slot, + * gently try to allocate another 20%. This allows + * fairly quick growth without grossly over-shooting what + * the client might use. + */ + if (seq->slotid == session->se_fchannel.maxreqs - 1 && + session->se_target_maxslots >= session->se_fchannel.maxreqs && + session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) { + int s = session->se_fchannel.maxreqs; + int cnt = DIV_ROUND_UP(s, 5); + void *prev_slot; + + do { + /* + * GFP_NOWAIT both allows allocation under a + * spinlock, and only succeeds if there is + * plenty of memory. + */ + slot = nfsd4_alloc_slot(&session->se_fchannel, s, + GFP_NOWAIT); + prev_slot = xa_load(&session->se_slots, s); + if (xa_is_value(prev_slot) && slot) { + slot->sl_seqid = xa_to_value(prev_slot); + slot->sl_flags |= NFSD4_SLOT_REUSED; + } + if (slot && + !xa_is_err(xa_store(&session->se_slots, s, slot, + GFP_NOWAIT))) { + s += 1; + session->se_fchannel.maxreqs = s; + atomic_add(s - session->se_target_maxslots, + &nfsd_total_target_slots); + session->se_target_maxslots = s; + } else { + kfree(slot); + slot = NULL; + } + } while (slot && --cnt > 0); + } + out: + seq->maxslots = max(session->se_target_maxslots, seq->maxslots); + seq->target_maxslots = session->se_target_maxslots; + switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; @@ -4658,8 +4819,8 @@ out: static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int count; struct nfsd_net *nn = shrink->private_data; + long count; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) @@ -4710,7 +4871,7 @@ static void init_nfs4_replay(struct nfs4_replay *rp) rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; - atomic_set(&rp->rp_locked, RP_UNLOCKED); + rp->rp_locked = RP_UNLOCKED; } static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, @@ -4718,9 +4879,9 @@ static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, { if (!nfsd4_has_session(cstate)) { wait_var_event(&so->so_replay.rp_locked, - atomic_cmpxchg(&so->so_replay.rp_locked, - RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); - if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED) + cmpxchg(&so->so_replay.rp_locked, + RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); + if (so->so_replay.rp_locked == RP_UNHASHED) return -EAGAIN; cstate->replay_owner = nfs4_get_stateowner(so); } @@ -4733,9 +4894,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) if (so != NULL) { cstate->replay_owner = NULL; - atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED); - smp_mb__after_atomic(); - wake_up_var(&so->so_replay.rp_locked); + store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED); nfs4_put_stateowner(so); } } @@ -4975,6 +5134,12 @@ retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); + if (nfs4_openowner_unhashed(oo)) { + mutex_unlock(&stp->st_mutex); + stp = NULL; + goto out_unlock; + } + retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; @@ -5034,9 +5199,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) * Some threads with a reference might be waiting for rp_locked, * so tell them to stop waiting. */ - atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); - smp_mb__after_atomic(); - wake_up_var(&oo->oo_owner.so_replay.rp_locked); + store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); @@ -5255,6 +5418,11 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + bool queued; + + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags)) + return; + /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease @@ -5263,7 +5431,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); + queued = nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!queued); + if (!queued) + refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ @@ -5437,7 +5608,7 @@ retry: static inline __be32 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) { - if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) + if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type)) return nfserr_openmode; else return nfs_ok; @@ -5669,8 +5840,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, - int flag) +static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp) { struct file_lease *fl; @@ -5679,7 +5849,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return NULL; fl->fl_lmops = &nfsd_lease_mng_ops; fl->c.flc_flags = FL_DELEG; - fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK; fl->c.flc_owner = (fl_owner_t)dp; fl->c.flc_pid = current->tgid; fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file; @@ -5790,17 +5960,30 @@ nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) return 0; } +#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; +} +#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */ +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return false; +} +#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */ + static struct nfs4_delegation * nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent) { - int status = 0; + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct nfs4_file *fp = stp->st_stid.sc_file; struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf = NULL; struct file_lease *fl; + int status = 0; u32 dl_type; /* @@ -5825,7 +6008,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, */ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) { nf = find_rw_file(fp); - dl_type = NFS4_OPEN_DELEGATE_WRITE; + dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; } /* @@ -5834,12 +6017,21 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, */ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) { nf = find_readable_file(fp); - dl_type = NFS4_OPEN_DELEGATE_READ; + dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ; } if (!nf) return ERR_PTR(-EAGAIN); + /* + * File delegations and associated locks cannot be recovered if the + * export is from an NFS proxy server. + */ + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + nfsd_file_put(nf); + return ERR_PTR(-EOPNOTSUPP); + } + spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) @@ -5866,7 +6058,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (!dp) goto out_delegees; - fl = nfs4_alloc_init_lease(dp, dl_type); + fl = nfs4_alloc_init_lease(dp); if (!fl) goto out_clnt_odstate; @@ -5923,20 +6115,20 @@ out_delegees: static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; if (status == -EAGAIN) open->op_why_no_deleg = WND4_CONTENTION; else { open->op_why_no_deleg = WND4_RESOURCE; switch (open->op_deleg_want) { - case NFS4_SHARE_WANT_READ_DELEG: - case NFS4_SHARE_WANT_WRITE_DELEG: - case NFS4_SHARE_WANT_ANY_DELEG: + case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: + case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: + case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: break; - case NFS4_SHARE_WANT_CANCEL: + case OPEN4_SHARE_ACCESS_WANT_CANCEL: open->op_why_no_deleg = WND4_CANCELLED; break; - case NFS4_SHARE_WANT_NO_DELEG: + case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: WARN_ON_ONCE(1); } } @@ -5957,7 +6149,7 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, path.dentry = file_dentry(nf->nf_file); rc = vfs_getattr(&path, stat, - (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), + (STATX_MODE | STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), AT_STATX_SYNC_AS_STAT); nfsd_file_put(nf); @@ -5992,13 +6184,14 @@ static void nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *currentfh) { - struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct svc_fh *parent = NULL; - int cb_up; - int status = 0; + struct nfs4_delegation *dp; struct kstat stat; + int status = 0; + int cb_up; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); open->op_recall = false; @@ -6039,21 +6232,22 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, destroy_delegation(dp); goto out_no_deleg; } - open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; + open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : + OPEN_DELEGATE_WRITE; dp->dl_cb_fattr.ncf_cur_fsize = stat.size; - dp->dl_cb_fattr.ncf_initial_cinfo = - nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry)); + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); } else { - open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; + open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : + OPEN_DELEGATE_READ; trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); } nfs4_put_stid(&dp->dl_stid); return; out_no_deleg: - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + open->op_delegate_type = OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + open->op_delegate_type != OPEN_DELEGATE_NONE) { dprintk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_recall = true; } @@ -6067,21 +6261,32 @@ out_no_deleg: static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, struct nfs4_delegation *dp) { - if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG && - dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; - open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; - } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG && - dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; - open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + if (deleg_is_write(dp->dl_type)) { + if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; + } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + } } /* Otherwise the client must be confused wanting a delegation * it already has, therefore we don't return - * NFS4_OPEN_DELEGATE_NONE_EXT and reason. + * OPEN_DELEGATE_NONE_EXT and reason. */ } +/* Are we returning only a delegation stateid? */ +static bool open_xor_delegation(struct nfsd4_open *open) +{ + if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) + return false; + /* Did we actually get a delegation? */ + if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type)) + return false; + return true; +} + /** * nfsd4_process_open2 - finish open processing * @rqstp: the RPC transaction being executed @@ -6127,6 +6332,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (!stp) { stp = init_open_stateid(fp, open); + if (!stp) { + status = nfserr_jukebox; + goto out; + } + if (!open->op_stp) new_stp = true; } @@ -6162,8 +6372,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { - if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; goto nodeleg; } @@ -6174,12 +6384,23 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * OPEN succeeds even if we fail. */ nfs4_open_delegation(open, stp, &resp->cstate.current_fh); + + /* + * If there is an existing open stateid, it must be updated and + * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new + * open stateid would have to be created. + */ + if (new_stp && open_xor_delegation(open)) { + memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid)); + open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID; + release_open_stateid(stp); + } nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ - if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && + if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp && open->op_deleg_want) nfsd4_deleg_xgrade_none_ext(open, dp); @@ -6190,7 +6411,7 @@ out: /* * To finish the open response, we just need to set the rflags. */ - open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; + open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX; if (nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED)) @@ -6562,6 +6783,7 @@ nfs4_laundromat(struct nfsd_net *nn) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); + nfsd4_async_copy_reaper(nn); nfs4_get_client_reaplist(nn, &reaplist, <); nfs4_process_client_reaplist(&reaplist); @@ -6666,38 +6888,34 @@ deleg_reaper(struct nfsd_net *nn) { struct list_head *pos, *next; struct nfs4_client *clp; - LIST_HEAD(cblist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state != NFSD4_ACTIVE || - list_empty(&clp->cl_delegations) || - atomic_read(&clp->cl_delegs_in_recall) || - test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || - (ktime_get_boottime_seconds() - - clp->cl_ra_time < 5)) { + + if (clp->cl_state != NFSD4_ACTIVE) + continue; + if (list_empty(&clp->cl_delegations)) + continue; + if (atomic_read(&clp->cl_delegs_in_recall)) + continue; + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags)) + continue; + if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) + continue; + if (clp->cl_cb_state != NFSD4_CB_UP) continue; - } - list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ kref_get(&clp->cl_nfsdfs.cl_ref); - set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); - } - spin_unlock(&nn->client_lock); - - while (!list_empty(&cblist)) { - clp = list_first_entry(&cblist, struct nfs4_client, - cl_ra_cblist); - list_del_init(&clp->cl_ra_cblist); clp->cl_ra->ra_keep = 0; clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) | BIT(RCA4_TYPE_MASK_WDATA_DLG); trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } + spin_unlock(&nn->client_lock); } static void @@ -6862,7 +7080,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, */ statusmask |= SC_STATUS_REVOKED; - statusmask |= SC_STATUS_ADMIN_REVOKED; + statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) @@ -7517,9 +7735,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, - SC_STATUS_REVOKED | SC_STATUS_FREEABLE, - &s, nn); + status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); if (status) goto out; dp = delegstateid(s); @@ -7627,7 +7843,7 @@ nfsd4_lm_notify(struct file_lock *fl) if (queue) { trace_nfsd_cb_notify_lock(lo, nbl); - nfsd4_run_cb(&nbl->nbl_cb); + nfsd4_try_run_cb(&nbl->nbl_cb); } } @@ -7926,7 +8142,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; - struct super_block *sb; __be32 status = 0; int lkflg; int err; @@ -7943,12 +8158,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval; - if ((status = fh_verify(rqstp, &cstate->current_fh, - S_IFREG, NFSD_MAY_LOCK))) { - dprintk("NFSD: nfsd4_lock: permission denied!\n"); + status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); + if (status != nfs_ok) return status; + if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) { + status = nfserr_notsupp; + goto out; } - sb = cstate->current_fh.fh_dentry->d_sb; if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) @@ -8001,9 +8217,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: - if (nfsd4_has_session(cstate) || - exportfs_lock_op_is_async(sb->s_export_op)) - flags |= FL_SLEEP; fallthrough; case NFS4_READ_LT: spin_lock(&fp->fi_lock); @@ -8014,9 +8227,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, type = F_RDLCK; break; case NFS4_WRITEW_LT: - if (nfsd4_has_session(cstate) || - exportfs_lock_op_is_async(sb->s_export_op)) - flags |= FL_SLEEP; fallthrough; case NFS4_WRITE_LT: spin_lock(&fp->fi_lock); @@ -8036,15 +8246,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - /* - * Most filesystems with their own ->lock operations will block - * the nfsd thread waiting to acquire the lock. That leads to - * deadlocks (we don't want every nfsd thread tied up waiting - * for file locks), so don't attempt blocking lock notifications - * on those filesystems: - */ - if (!exportfs_lock_op_is_async(sb->s_export_op)) - flags &= ~FL_SLEEP; + if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) && + nfsd4_has_session(cstate) && + locks_can_async_lock(nf->nf_file->f_op)) + flags |= FL_SLEEP; nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { @@ -8299,6 +8504,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_lock_range; goto put_stateid; } + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + status = nfserr_notsupp; + goto put_file; + } + file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); @@ -8694,7 +8904,6 @@ skip_grace: } /* initialization to perform when the nfsd service is started: */ - int nfs4_state_start(void) { @@ -8704,6 +8913,15 @@ nfs4_state_start(void) if (ret) return ret; + nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot"); + if (!nfsd_slot_shrinker) { + rhltable_destroy(&nfs4_file_rhltable); + return -ENOMEM; + } + nfsd_slot_shrinker->count_objects = nfsd_slot_count; + nfsd_slot_shrinker->scan_objects = nfsd_slot_scan; + shrinker_register(nfsd_slot_shrinker); + set_max_delegations(); return 0; } @@ -8745,6 +8963,7 @@ void nfs4_state_shutdown(void) { rhltable_destroy(&nfs4_file_rhltable); + shrinker_free(nfsd_slot_shrinker); } static void @@ -8862,11 +9081,82 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, } /** + * set_cb_time - vet and set the timespec for a cb_getattr update + * @cb: timestamp from the CB_GETATTR response + * @orig: original timestamp in the inode + * @now: current time + * + * Given a timestamp in a CB_GETATTR response, check it against the + * current timestamp in the inode and the current time. Returns true + * if the inode's timestamp needs to be updated, and false otherwise. + * @cb may also be changed if the timestamp needs to be clamped. + */ +static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig, + const struct timespec64 *now) +{ + + /* + * "When the time presented is before the original time, then the + * update is ignored." Also no need to update if there is no change. + */ + if (timespec64_compare(cb, orig) <= 0) + return false; + + /* + * "When the time presented is in the future, the server can either + * clamp the new time to the current time, or it may + * return NFS4ERR_DELAY to the client, allowing it to retry." + */ + if (timespec64_compare(cb, now) > 0) { + /* clamp it */ + *cb = *now; + } + + return true; +} + +static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp) +{ + struct inode *inode = d_inode(dentry); + struct timespec64 now = current_time(inode); + struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; + struct iattr attrs = { }; + int ret; + + if (deleg_attrs_deleg(dp->dl_type)) { + struct timespec64 atime = inode_get_atime(inode); + struct timespec64 mtime = inode_get_mtime(inode); + + attrs.ia_atime = ncf->ncf_cb_atime; + attrs.ia_mtime = ncf->ncf_cb_mtime; + + if (set_cb_time(&attrs.ia_atime, &atime, &now)) + attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + + if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) { + attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET; + attrs.ia_ctime = attrs.ia_mtime; + } + } else { + attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME; + attrs.ia_mtime = attrs.ia_ctime = now; + } + + if (!attrs.ia_valid) + return 0; + + attrs.ia_valid |= ATTR_DELEG; + inode_lock(inode); + ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); + inode_unlock(inode); + return ret; +} + +/** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context * @dentry: dentry of inode to be checked for a conflict - * @modified: return true if file was modified - * @size: new size of file if modified is true + * @pdp: returned WRITE delegation, if one was found * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server @@ -8876,25 +9166,24 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * 18.7.4. * * Returns 0 if there is no conflict; otherwise an nfs_stat - * code is returned. + * code is returned. If @pdp is set to a non-NULL value, then the + * caller must put the reference. */ __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, - bool *modified, u64 *size) + struct nfs4_delegation **pdp) { __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; struct nfs4_delegation *dp = NULL; struct file_lease *fl; - struct iattr attrs; struct nfs4_cb_fattr *ncf; struct inode *inode = d_inode(dentry); - *modified = false; ctx = locks_inode_context(inode); if (!ctx) - return 0; + return nfs_ok; #define NON_NFSD_LEASE ((void *)1) @@ -8929,8 +9218,8 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); - wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, - TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); + wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING, + TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); if (ncf->ncf_cb_status) { /* Recall delegation only if client didn't respond */ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); @@ -8950,20 +9239,16 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, * not update the file's metadata with the client's * modified size */ - attrs.ia_mtime = attrs.ia_ctime = current_time(inode); - attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG; - inode_lock(inode); - err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); - inode_unlock(inode); + err = cb_getattr_update_times(dentry, dp); if (err) { status = nfserrno(err); goto out_status; } ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; - *size = ncf->ncf_cur_fsize; - *modified = true; + *pdp = dp; + return nfs_ok; } - status = 0; + status = nfs_ok; out_status: nfs4_put_stid(&dp->dl_stid); return status; |