diff options
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r-- | fs/nfs/pnfs.c | 417 |
1 files changed, 244 insertions, 173 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2e00feacd4be..fe624c91bd00 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PNFS +#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) /* Locking: * @@ -190,7 +191,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); /* Need to hold i_lock if caller does not already hold reference */ void -get_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) { atomic_inc(&lo->plh_refcount); } @@ -199,43 +200,107 @@ static struct pnfs_layout_hdr * pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; - return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : - kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); + return ld->alloc_layout_hdr(ino, gfp_flags); } static void pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(lo->plh_inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (!list_empty(&lo->plh_layouts)) { + struct nfs_client *clp = server->nfs_client; + + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); + } put_rpccred(lo->plh_lc_cred); - return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); + return ld->free_layout_hdr(lo); } static void -destroy_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) { + struct nfs_inode *nfsi = NFS_I(lo->plh_inode); dprintk("%s: freeing layout cache %p\n", __func__, lo); - BUG_ON(!list_empty(&lo->plh_layouts)); - NFS_I(lo->plh_inode)->layout = NULL; - pnfs_free_layout_hdr(lo); + nfsi->layout = NULL; + /* Reset MDS Threshold I/O counters */ + nfsi->write_io = 0; + nfsi->read_io = 0; +} + +void +pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = lo->plh_inode; + + if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + pnfs_detach_layout_hdr(lo); + spin_unlock(&inode->i_lock); + pnfs_free_layout_hdr(lo); + } +} + +static int +pnfs_iomode_to_fail_bit(u32 iomode) +{ + return iomode == IOMODE_RW ? + NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; } static void -put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { - if (atomic_dec_and_test(&lo->plh_refcount)) - destroy_layout_hdr(lo); + lo->plh_retry_timestamp = jiffies; + if (test_and_set_bit(fail_bit, &lo->plh_flags)) + atomic_inc(&lo->plh_refcount); } -void -put_layout_hdr(struct pnfs_layout_hdr *lo) +static void +pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) +{ + if (test_and_clear_bit(fail_bit, &lo->plh_flags)) + atomic_dec(&lo->plh_refcount); +} + +static void +pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) { struct inode *inode = lo->plh_inode; + struct pnfs_layout_range range = { + .iomode = iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + LIST_HEAD(head); - if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { - destroy_layout_hdr(lo); - spin_unlock(&inode->i_lock); + spin_lock(&inode->i_lock); + pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); + pnfs_mark_matching_lsegs_invalid(lo, &head, &range); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&head); + dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, + iomode == IOMODE_RW ? "RW" : "READ"); +} + +static bool +pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) +{ + unsigned long start, end; + int fail_bit = pnfs_iomode_to_fail_bit(iomode); + + if (test_bit(fail_bit, &lo->plh_flags) == 0) + return false; + end = jiffies; + start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; + if (!time_in_range(lo->plh_retry_timestamp, start, end)) { + /* It is time to retry the failed layoutgets */ + pnfs_layout_clear_fail_bit(lo, fail_bit); + return false; } + return true; } static void @@ -249,33 +314,32 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) lseg->pls_layout = lo; } -static void free_lseg(struct pnfs_layout_segment *lseg) +static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) { struct inode *ino = lseg->pls_layout->plh_inode; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - /* Matched by get_layout_hdr in pnfs_insert_layout */ - put_layout_hdr(NFS_I(ino)->layout); } static void -put_lseg_common(struct pnfs_layout_segment *lseg) +pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) { - struct inode *inode = lseg->pls_layout->plh_inode; + struct inode *inode = lo->plh_inode; WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); - if (list_empty(&lseg->pls_layout->plh_segs)) { - set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); - /* Matched by initial refcount set in alloc_init_layout_hdr */ - put_layout_hdr_locked(lseg->pls_layout); - } + /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ + atomic_dec(&lo->plh_refcount); + if (list_empty(&lo->plh_segs)) + clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } void -put_lseg(struct pnfs_layout_segment *lseg) +pnfs_put_lseg(struct pnfs_layout_segment *lseg) { + struct pnfs_layout_hdr *lo; struct inode *inode; if (!lseg) @@ -284,17 +348,17 @@ put_lseg(struct pnfs_layout_segment *lseg) dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); - inode = lseg->pls_layout->plh_inode; + lo = lseg->pls_layout; + inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { - LIST_HEAD(free_me); - - put_lseg_common(lseg); - list_add(&lseg->pls_list, &free_me); + pnfs_get_layout_hdr(lo); + pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); - pnfs_free_lseg_list(&free_me); + pnfs_free_lseg(lseg); + pnfs_put_layout_hdr(lo); } } -EXPORT_SYMBOL_GPL(put_lseg); +EXPORT_SYMBOL_GPL(pnfs_put_lseg); static inline u64 end_offset(u64 start, u64 len) @@ -378,7 +442,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); if (atomic_dec_and_test(&lseg->pls_refcount)) { - put_lseg_common(lseg); + pnfs_layout_remove_lseg(lseg->pls_layout, lseg); list_add(&lseg->pls_list, tmp_list); rv = 1; } @@ -390,7 +454,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, * after call. */ int -mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, +pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *recall_range) { @@ -399,14 +463,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); - if (list_empty(&lo->plh_segs)) { - /* Reset MDS Threshold I/O counters */ - NFS_I(lo->plh_inode)->write_io = 0; - NFS_I(lo->plh_inode)->read_io = 0; - if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) - put_layout_hdr_locked(lo); + if (list_empty(&lo->plh_segs)) return 0; - } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (!recall_range || should_free_lseg(&lseg->pls_range, recall_range)) { @@ -426,25 +484,13 @@ void pnfs_free_lseg_list(struct list_head *free_me) { struct pnfs_layout_segment *lseg, *tmp; - struct pnfs_layout_hdr *lo; if (list_empty(free_me)) return; - lo = list_first_entry(free_me, struct pnfs_layout_segment, - pls_list)->pls_layout; - - if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { - struct nfs_client *clp; - - clp = NFS_SERVER(lo->plh_inode)->nfs_client; - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { list_del(&lseg->pls_list); - free_lseg(lseg); + pnfs_free_lseg(lseg); } } @@ -458,10 +504,15 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) lo = nfsi->layout; if (lo) { lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); - } - spin_unlock(&nfsi->vfs_inode.i_lock); - pnfs_free_lseg_list(&tmp_list); + pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + pnfs_get_layout_hdr(lo); + pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); + pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); + spin_unlock(&nfsi->vfs_inode.i_lock); + pnfs_free_lseg_list(&tmp_list); + pnfs_put_layout_hdr(lo); + } else + spin_unlock(&nfsi->vfs_inode.i_lock); } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); @@ -498,46 +549,54 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) } } +/* + * Compare 2 layout stateid sequence ids, to see which is newer, + * taking into account wraparound issues. + */ +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) +{ + return (s32)s1 - (s32)s2 > 0; +} + /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier) { - u32 oldseq, newseq; + u32 oldseq, newseq, new_barrier; + int empty = list_empty(&lo->plh_segs); oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if ((int)(newseq - oldseq) > 0) { + if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { - u32 new_barrier = be32_to_cpu(new->seqid); - - if ((int)(new_barrier - lo->plh_barrier)) - lo->plh_barrier = new_barrier; + new_barrier = be32_to_cpu(new->seqid); } else { /* Because of wraparound, we want to keep the barrier - * "close" to the current seqids. It needs to be - * within 2**31 to count as "behind", so if it - * gets too near that limit, give us a litle leeway - * and bring it to within 2**30. - * NOTE - and yes, this is all unsigned arithmetic. + * "close" to the current seqids. */ - if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) - lo->plh_barrier = newseq - (1 << 30); + new_barrier = newseq - atomic_read(&lo->plh_outstanding); } + if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) + lo->plh_barrier = new_barrier; } } +static bool +pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, + const nfs4_stateid *stateid) +{ + u32 seqid = be32_to_cpu(stateid->seqid); + + return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); +} + /* lget is set to 1 if called from inside send_layoutget call chain */ static bool -pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, - int lget) +pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) { - if ((stateid) && - (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) - return true; return lo->plh_block_lgets || - test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && (atomic_read(&lo->plh_outstanding) > lget)); @@ -551,7 +610,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - if (pnfs_layoutgets_blocked(lo, NULL, 1)) { + if (pnfs_layoutgets_blocked(lo, 1)) { status = -EAGAIN; } else if (list_empty(&lo->plh_segs)) { int seq; @@ -582,7 +641,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; - struct pnfs_layout_segment *lseg = NULL; + struct pnfs_layout_segment *lseg; dprintk("--> %s\n", __func__); @@ -599,16 +658,22 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp, gfp_flags); - if (!lseg) { - /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + if (IS_ERR(lseg)) { + switch (PTR_ERR(lseg)) { + case -ENOMEM: + case -ERESTARTSYS: + break; + default: + /* remember that LAYOUTGET failed and suspend trying */ + pnfs_layout_io_set_failed(lo, range->iomode); + } + return NULL; } return lseg; @@ -636,25 +701,24 @@ _pnfs_return_layout(struct inode *ino) spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo || pnfs_test_layout_returned(lo)) { + if (!lo) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout to return\n", __func__); goto out; } stateid = nfsi->layout->plh_stateid; /* Reference matched in nfs4_layoutreturn_release */ - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { spin_unlock(&ino->i_lock); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); dprintk("NFS: %s no layout segments to return\n", __func__); goto out; } lo->plh_block_lgets++; - pnfs_mark_layout_returned(lo); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); @@ -663,10 +727,10 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; - set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); - set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); - pnfs_clear_layout_returned(lo); - put_layout_hdr(lo); + spin_lock(&ino->i_lock); + lo->plh_block_lgets--; + spin_unlock(&ino->i_lock); + pnfs_put_layout_hdr(lo); goto out; } @@ -703,7 +767,7 @@ bool pnfs_roc(struct inode *ino) if (!found) goto out_nolayout; lo->plh_block_lgets++; - get_layout_hdr(lo); /* matched in pnfs_roc_release */ + pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); return true; @@ -720,8 +784,12 @@ void pnfs_roc_release(struct inode *ino) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; lo->plh_block_lgets--; - put_layout_hdr_locked(lo); - spin_unlock(&ino->i_lock); + if (atomic_dec_and_test(&lo->plh_refcount)) { + pnfs_detach_layout_hdr(lo); + spin_unlock(&ino->i_lock); + pnfs_free_layout_hdr(lo); + } else + spin_unlock(&ino->i_lock); } void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) @@ -730,32 +798,34 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; - if ((int)(barrier - lo->plh_barrier) > 0) + if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) lo->plh_barrier = barrier; spin_unlock(&ino->i_lock); } -bool pnfs_roc_drain(struct inode *ino, u32 *barrier) +bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) { struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg; + u32 current_seqid; bool found = false; spin_lock(&ino->i_lock); list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); found = true; - break; + goto out; } - if (!found) { - struct pnfs_layout_hdr *lo = nfsi->layout; - u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); + lo = nfsi->layout; + current_seqid = be32_to_cpu(lo->plh_stateid.seqid); - /* Since close does not return a layout stateid for use as - * a barrier, we choose the worst-case barrier. - */ - *barrier = current_seqid + atomic_read(&lo->plh_outstanding); - } + /* Since close does not return a layout stateid for use as + * a barrier, we choose the worst-case barrier. + */ + *barrier = current_seqid + atomic_read(&lo->plh_outstanding); +out: spin_unlock(&ino->i_lock); return found; } @@ -786,14 +856,13 @@ cmp_layout(struct pnfs_layout_range *l1, } static void -pnfs_insert_layout(struct pnfs_layout_hdr *lo, +pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *lp; dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lp, &lo->plh_segs, pls_list) { if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) continue; @@ -813,7 +882,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); out: - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); dprintk("%s:Return\n", __func__); } @@ -847,21 +916,19 @@ pnfs_find_alloc_layout(struct inode *ino, dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); - assert_spin_locked(&ino->i_lock); - if (nfsi->layout) { - if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) - return NULL; - else - return nfsi->layout; - } + if (nfsi->layout != NULL) + goto out_existing; spin_unlock(&ino->i_lock); new = alloc_init_layout_hdr(ino, ctx, gfp_flags); spin_lock(&ino->i_lock); - if (likely(nfsi->layout == NULL)) /* Won the race? */ + if (likely(nfsi->layout == NULL)) { /* Won the race? */ nfsi->layout = new; - else - pnfs_free_layout_hdr(new); + return new; + } + pnfs_free_layout_hdr(new); +out_existing: + pnfs_get_layout_hdr(nfsi->layout); return nfsi->layout; } @@ -904,11 +971,10 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && is_matching_lseg(&lseg->pls_range, range)) { - ret = get_lseg(lseg); + ret = pnfs_get_lseg(lseg); break; } if (lseg->pls_range.offset > range->offset) @@ -1013,7 +1079,6 @@ pnfs_update_layout(struct inode *ino, .length = count, }; unsigned pg_offset; - struct nfs_inode *nfsi = NFS_I(ino); struct nfs_server *server = NFS_SERVER(ino); struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; @@ -1021,16 +1086,16 @@ pnfs_update_layout(struct inode *ino, bool first = false; if (!pnfs_enabled_sb(NFS_SERVER(ino))) - return NULL; + goto out; if (pnfs_within_mdsthreshold(ctx, ino, iomode)) - return NULL; + goto out; spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { - dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); - goto out_unlock; + spin_unlock(&ino->i_lock); + goto out; } /* Do we even need to bother with this? */ @@ -1040,7 +1105,7 @@ pnfs_update_layout(struct inode *ino, } /* if LAYOUTGET already failed once we don't try again */ - if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) + if (pnfs_layout_io_test_failed(lo, iomode)) goto out_unlock; /* Check to see if the layout for the given range already exists */ @@ -1048,17 +1113,13 @@ pnfs_update_layout(struct inode *ino, if (lseg) goto out_unlock; - if (pnfs_layoutgets_blocked(lo, NULL, 0)) + if (pnfs_layoutgets_blocked(lo, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); - get_layout_hdr(lo); if (list_empty(&lo->plh_segs)) first = true; - /* Enable LAYOUTRETURNs */ - pnfs_clear_layout_returned(lo); - spin_unlock(&ino->i_lock); if (first) { /* The lo must be on the clp list if there is any @@ -1079,24 +1140,26 @@ pnfs_update_layout(struct inode *ino, arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); - if (!lseg && first) { - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } atomic_dec(&lo->plh_outstanding); - put_layout_hdr(lo); +out_put_layout_hdr: + pnfs_put_layout_hdr(lo); out: - dprintk("%s end, state 0x%lx lseg %p\n", __func__, - nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); + dprintk("%s: inode %s/%llu pNFS layout segment %s for " + "(%s, offset: %llu, length: %llu)\n", + __func__, ino->i_sb->s_id, + (unsigned long long)NFS_FILEID(ino), + lseg == NULL ? "not found" : "found", + iomode==IOMODE_RW ? "read/write" : "read-only", + (unsigned long long)pos, + (unsigned long long)count); return lseg; out_unlock: spin_unlock(&ino->i_lock); - goto out; + goto out_put_layout_hdr; } EXPORT_SYMBOL_GPL(pnfs_update_layout); -int +struct pnfs_layout_segment * pnfs_layout_process(struct nfs4_layoutget *lgp) { struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; @@ -1123,25 +1186,29 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget_reply; } - if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { + if (pnfs_layoutgets_blocked(lo, 1) || + pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to state\n", __func__); goto out_forget_reply; } + + /* Done processing layoutget. Set the layout stateid */ + pnfs_set_layout_stateid(lo, &res->stateid, false); + init_lseg(lo, lseg); lseg->pls_range = res->range; - *lgp->lsegpp = get_lseg(lseg); - pnfs_insert_layout(lo, lseg); + pnfs_get_lseg(lseg); + pnfs_layout_insert_lseg(lo, lseg); if (res->return_on_close) { set_bit(NFS_LSEG_ROC, &lseg->pls_flags); set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); } - /* Done processing layoutget. Set the layout stateid */ - pnfs_set_layout_stateid(lo, &res->stateid, false); spin_unlock(&ino->i_lock); + return lseg; out: - return status; + return ERR_PTR(status); out_forget_reply: spin_unlock(&ino->i_lock); @@ -1153,16 +1220,24 @@ out_forget_reply: void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + u64 rd_size = req->wb_bytes; + BUG_ON(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { nfs_pageio_reset_read_mds(pgio); return; } + + if (pgio->pg_dreq == NULL) + rd_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + rd_size, IOMODE_READ, GFP_KERNEL); /* If no lseg, fall back to read through mds */ @@ -1173,7 +1248,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); void -pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, u64 wb_size) { BUG_ON(pgio->pg_lseg != NULL); @@ -1181,10 +1257,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * nfs_pageio_reset_write_mds(pgio); return; } + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + wb_size, IOMODE_RW, GFP_NOFS); /* If no lseg, fall back to write through mds */ @@ -1362,12 +1439,12 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_write_through_mds(desc, data); } - put_lseg(lseg); + pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { - put_lseg(hdr->lseg); + pnfs_put_lseg(hdr->lseg); nfs_writehdr_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); @@ -1382,17 +1459,17 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; } hdr = &whdr->header; nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); - hdr->lseg = get_lseg(desc->pg_lseg); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); ret = nfs_generic_flush(desc, hdr); if (ret != 0) { - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); @@ -1517,12 +1594,12 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_read_through_mds(desc, data); } - put_lseg(lseg); + pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { - put_lseg(hdr->lseg); + pnfs_put_lseg(hdr->lseg); nfs_readhdr_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); @@ -1538,17 +1615,17 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } hdr = &rhdr->header; nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); - hdr->lseg = get_lseg(desc->pg_lseg); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); ret = nfs_generic_pagein(desc, hdr); if (ret != 0) { - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_multiple_reads(desc, &hdr->rpc_list); @@ -1574,13 +1651,7 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) { - if (lseg->pls_range.iomode == IOMODE_RW) { - dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - } else { - dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); - } + pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); } EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); @@ -1601,7 +1672,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) } if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ - get_lseg(hdr->lseg); + pnfs_get_lseg(hdr->lseg); } if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; |