summaryrefslogtreecommitdiff
path: root/fs/nfs/flexfilelayout/flexfilelayout.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/flexfilelayout/flexfilelayout.c')
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c245
1 files changed, 174 insertions, 71 deletions
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 3e724cb7ef01..b685e763ef11 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,6 +11,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/file.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -162,6 +163,21 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}
+static struct nfsd_file *
+ff_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, fmode_t mode)
+{
+ if (mode & FMODE_WRITE) {
+ /*
+ * Always request read and write access since this corresponds
+ * to a rw layout.
+ */
+ mode |= FMODE_READ;
+ }
+
+ return nfs_local_open_fh(clp, cred, fh, mode);
+}
+
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
@@ -237,7 +253,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- const struct cred *cred;
+ const struct cred *cred;
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
@@ -745,14 +761,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
- struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds))
continue;
if (check_device &&
@@ -760,10 +776,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
continue;
*best_idx = idx;
- return ds;
+ break;
}
- return NULL;
+ return ds;
}
static struct nfs4_pnfs_ds *
@@ -823,14 +839,6 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
}
static void
-ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req)
-{
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
-}
-
-static void
ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
@@ -839,8 +847,11 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs4_pnfs_ds *ds;
u32 ds_idx;
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
/* Use full layout for now */
if (!pgio->pg_lseg) {
ff_layout_pg_get_read(pgio, req, false);
@@ -852,6 +863,8 @@ retry:
if (!pgio->pg_lseg)
goto out_nolseg;
}
+ /* Reset wb_nio, since getting layout segment was successful */
+ req->wb_nio = 0;
ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
if (!ds) {
@@ -868,14 +881,24 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
pgio->pg_mirror_idx = ds_idx;
-
- if (NFS_SERVER(pgio->pg_inode)->flags &
- (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
- pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
- if (pgio->pg_error < 0)
- return;
+ if (pgio->pg_error < 0) {
+ if (pgio->pg_error != -EAGAIN)
+ return;
+ /* Retry getting layout segment if lower layer returned -EAGAIN */
+ if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) {
+ if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR)
+ pgio->pg_error = -ETIMEDOUT;
+ else
+ pgio->pg_error = -EIO;
+ return;
+ }
+ pgio->pg_error = 0;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
+ }
out_mds:
trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_READ,
@@ -895,7 +918,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
u32 i;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
@@ -918,7 +941,7 @@ retry:
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
- if (!ds) {
+ if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@@ -1081,6 +1104,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1091,32 +1115,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- switch (task->tk_status) {
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
+ switch (op_status) {
+ case NFS4_OK:
+ case NFS4ERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFS4ERR_BADSESSION:
+ case NFS4ERR_BADSLOT:
+ case NFS4ERR_BAD_HIGH_SLOT:
+ case NFS4ERR_DEADSESSION:
+ case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case NFS4ERR_SEQ_FALSE_RETRY:
+ case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- break;
- case -NFS4ERR_DELAY:
- case -NFS4ERR_GRACE:
+ goto out_retry;
+ case NFS4ERR_DELAY:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ fallthrough;
+ case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
- break;
- case -NFS4ERR_RETRY_UNCACHED_REP:
- break;
+ goto out_retry;
+ case NFS4ERR_RETRY_UNCACHED_REP:
+ goto out_retry;
/* Invalidate Layout errors */
- case -NFS4ERR_PNFS_NO_LAYOUT:
- case -ESTALE: /* mapped NFS4ERR_STALE */
- case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
- case -EISDIR: /* mapped NFS4ERR_ISDIR */
- case -NFS4ERR_FHEXPIRED:
- case -NFS4ERR_WRONG_TYPE:
+ case NFS4ERR_PNFS_NO_LAYOUT:
+ case NFS4ERR_STALE:
+ case NFS4ERR_BADHANDLE:
+ case NFS4ERR_ISDIR:
+ case NFS4ERR_FHEXPIRED:
+ case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@@ -1129,6 +1163,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
+ default:
+ break;
+ }
+
+ switch (task->tk_status) {
/* RPC connection errors */
case -ECONNREFUSED:
case -EHOSTDOWN:
@@ -1144,26 +1183,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- fallthrough;
+ break;
default:
- if (ff_layout_avoid_mds_available_ds(lseg))
- return -NFS4ERR_RESET_TO_PNFS;
-reset:
- dprintk("%s Retry through MDS. Error %d\n", __func__,
- task->tk_status);
- return -NFS4ERR_RESET_TO_MDS;
+ break;
}
+
+ if (ff_layout_avoid_mds_available_ds(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
+ task->tk_status);
+ return -NFS4ERR_RESET_TO_MDS;
+
+out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+ u32 op_status,
+ struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ switch (op_status) {
+ case NFS_OK:
+ case NFSERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFSERR_ACCES:
+ case NFSERR_BADHANDLE:
+ case NFSERR_FBIG:
+ case NFSERR_IO:
+ case NFSERR_NOSPC:
+ case NFSERR_ROFS:
+ case NFSERR_STALE:
+ goto out_reset_to_pnfs;
+ case NFSERR_JUKEBOX:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ goto out_retry;
+ default:
+ break;
+ }
+
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@@ -1182,6 +1251,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
+out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@@ -1192,6 +1262,7 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1210,10 +1281,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
- return ff_layout_async_handle_error_v3(task, lseg, idx);
- case 4:
- return ff_layout_async_handle_error_v4(task, state, clp,
+ return ff_layout_async_handle_error_v3(task, op_status, clp,
lseg, idx);
+ case 4:
+ return ff_layout_async_handle_error_v4(task, op_status, state,
+ clp, lseg, idx);
default:
/* should never happen */
WARN_ON_ONCE(1);
@@ -1240,6 +1312,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -ECONNRESET:
case -EHOSTDOWN:
case -EHOSTUNREACH:
+ case -ENETDOWN:
case -ENETUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
@@ -1265,6 +1338,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
+ case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1297,7 +1371,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
trace_ff_layout_read_error(hdr);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1467,7 +1542,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
trace_ff_layout_write_error(hdr);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1513,8 +1589,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
trace_ff_layout_commit_error(data);
}
- err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
- data->lseg, data->ds_commit_index);
+ err = ff_layout_async_handle_error(task, data->res.op_status,
+ NULL, data->ds_clp, data->lseg,
+ data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
@@ -1764,12 +1841,14 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
u32 idx = hdr->pgio_mirror_idx;
int vers;
struct nfs_fh *fh;
+ bool ds_fatal_error = false;
dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
@@ -1777,8 +1856,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -1810,16 +1891,23 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->args.offset = offset;
hdr->mds_offset = offset;
+ /* Start IO accounting for local read */
+ localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh, FMODE_READ);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_read_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1834,17 +1922,21 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
u32 idx = hdr->pgio_mirror_idx;
+ bool ds_fatal_error = false;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -1878,16 +1970,24 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
*/
hdr->args.offset = offset;
+ /* Start IO accounting for local write */
+ localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_write_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1916,6 +2016,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
u32 idx;
@@ -1929,7 +2030,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds))
goto out_err;
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
@@ -1954,10 +2055,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
if (fh)
data->args.fh = fh;
+ /* Start IO accounting for local commit */
+ localio = ff_local_open_fh(ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ data->task.tk_start = ktime_get();
+ ff_layout_commit_record_layoutstats_start(&data->task, data);
+ }
+
ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
- how, RPC_TASK_SOFTCONN);
+ how, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return ret;
out_err:
@@ -2095,12 +2204,6 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
}
static void
-encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
-static void
ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr,
const nfs4_stateid *stateid,
const struct nfs42_layoutstat_devinfo *devinfo)
@@ -2556,7 +2659,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server,
const struct nfs_fh *dummy)
{
#if IS_ENABLED(CONFIG_NFS_V4_2)
- server->caps |= NFS_CAP_LAYOUTSTATS;
+ server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN;
#endif
return 0;
}