summaryrefslogtreecommitdiff
path: root/net/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/ceph_common.c15
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/ceph/osd_client.c36
-rw-r--r--net/ceph/osdmap.c4
4 files changed, 57 insertions, 4 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e88599b9d..108533859a53 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
Opt_osdkeepalivetimeout,
Opt_mount_timeout,
Opt_osd_idle_ttl,
+ Opt_osd_request_timeout,
Opt_last_int,
/* int args above */
Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
{Opt_mount_timeout, "mount_timeout=%d"},
{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+ {Opt_osd_request_timeout, "osd_request_timeout=%d"},
/* int args above */
{Opt_fsid, "fsid=%s"},
{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+ opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
}
opt->mount_timeout = msecs_to_jiffies(intval * 1000);
break;
+ case Opt_osd_request_timeout:
+ /* 0 is "wait forever" (i.e. infinite timeout) */
+ if (intval < 0 || intval > INT_MAX / 1000) {
+ pr_err("osd_request_timeout out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+ break;
case Opt_share:
opt->flags &= ~CEPH_OPT_NOSHARE;
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, "osdkeepalivetimeout=%d,",
jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
+ if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT)
+ seq_printf(m, "osd_request_timeout=%d,",
+ jiffies_to_msecs(opt->osd_request_timeout) / 1000);
/* drop redundant comma */
if (m->count != pos)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 38dcf1eb427d..f76bb3332613 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
#include <linux/kthread.h>
#include <linux/net.h>
#include <linux/nsproxy.h>
+#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
{
struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
+ unsigned int noio_flag;
int ret;
BUG_ON(con->sock);
+
+ /* sock_create_kern() allocates with GFP_KERNEL */
+ noio_flag = memalloc_noio_save();
ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
+ memalloc_noio_restore(noio_flag);
if (ret)
return ret;
sock->sk->sk_allocation = GFP_NOFS;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b65bbf9f45eb..e15ea9e4c495 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req)
req->r_flags |= CEPH_OSD_FLAG_ONDISK;
atomic_inc(&req->r_osdc->num_requests);
+
+ req->r_start_stamp = jiffies;
}
static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req)
ceph_osdc_put_request(req);
}
+static void abort_request(struct ceph_osd_request *req, int err)
+{
+ dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
+
+ cancel_map_check(req);
+ complete_request(req, err);
+}
+
static void check_pool_dne(struct ceph_osd_request *req)
{
struct ceph_osd_client *osdc = req->r_osdc;
@@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work)
container_of(work, struct ceph_osd_client, timeout_work.work);
struct ceph_options *opts = osdc->client->options;
unsigned long cutoff = jiffies - opts->osd_keepalive_timeout;
+ unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout;
LIST_HEAD(slow_osds);
struct rb_node *n, *p;
@@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work)
struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
bool found = false;
- for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) {
+ for (p = rb_first(&osd->o_requests); p; ) {
struct ceph_osd_request *req =
rb_entry(p, struct ceph_osd_request, r_node);
+ p = rb_next(p); /* abort_request() */
+
if (time_before(req->r_stamp, cutoff)) {
dout(" req %p tid %llu on osd%d is laggy\n",
req, req->r_tid, osd->o_osd);
found = true;
}
+ if (opts->osd_request_timeout &&
+ time_before(req->r_start_stamp, expiry_cutoff)) {
+ pr_err_ratelimited("tid %llu on osd%d timeout\n",
+ req->r_tid, osd->o_osd);
+ abort_request(req, -ETIMEDOUT);
+ }
}
for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) {
struct ceph_osd_linger_request *lreq =
@@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work)
list_move_tail(&osd->o_keepalive_item, &slow_osds);
}
+ if (opts->osd_request_timeout) {
+ for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
+ struct ceph_osd_request *req =
+ rb_entry(p, struct ceph_osd_request, r_node);
+
+ p = rb_next(p); /* abort_request() */
+
+ if (time_before(req->r_start_stamp, expiry_cutoff)) {
+ pr_err_ratelimited("tid %llu on osd%d timeout\n",
+ req->r_tid, osdc->homeless_osd.o_osd);
+ abort_request(req, -ETIMEDOUT);
+ }
+ }
+ }
+
if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
maybe_request_map(osdc);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 6824c0ec8373..ffe9e904d4d1 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
dout("crush decode tunable chooseleaf_stable = %d\n",
c->chooseleaf_stable);
- crush_finalize(c);
-
done:
+ crush_finalize(c);
dout("crush_decode success\n");
return c;
@@ -1380,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end,
if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
(xorstate & CEPH_OSD_EXISTS)) {
pr_info("osd%d does not exist\n", osd);
- map->osd_weight[osd] = CEPH_OSD_IN;
ret = set_primary_affinity(map, osd,
CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
if (ret)