summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h3
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h17
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c25
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c9
4 files changed, 42 insertions, 12 deletions
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
index e45d828bfd1b..c1619f411d81 100644
--- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
+++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h
@@ -53,7 +53,8 @@ struct lnet_ioctl_config_o2iblnd_tunables {
__u32 lnd_fmr_pool_size;
__u32 lnd_fmr_flush_trigger;
__u32 lnd_fmr_cache;
- __u32 pad;
+ __u16 lnd_conns_per_peer;
+ __u16 pad;
};
struct lnet_ioctl_config_lnd_tunables {
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index ca6e09de0872..bb663d6d7b48 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -568,6 +568,8 @@ struct kib_peer {
lnet_nid_t ibp_nid; /* who's on the other end(s) */
struct lnet_ni *ibp_ni; /* LNet interface */
struct list_head ibp_conns; /* all active connections */
+ struct kib_conn *ibp_next_conn; /* next connection to send on for
+ * round robin */
struct list_head ibp_tx_queue; /* msgs waiting for a conn */
__u64 ibp_incarnation; /* incarnation of peer */
/* when (in jiffies) I was last alive */
@@ -581,7 +583,7 @@ struct kib_peer {
/* current active connection attempts */
unsigned short ibp_connecting;
/* reconnect this peer later */
- unsigned short ibp_reconnecting:1;
+ unsigned char ibp_reconnecting;
/* counter of how many times we triggered a conn race */
unsigned char ibp_races;
/* # consecutive reconnection attempts to this peer */
@@ -744,10 +746,19 @@ kiblnd_peer_active(struct kib_peer *peer)
static inline struct kib_conn *
kiblnd_get_conn_locked(struct kib_peer *peer)
{
+ struct list_head *next;
+
LASSERT(!list_empty(&peer->ibp_conns));
- /* just return the first connection */
- return list_entry(peer->ibp_conns.next, struct kib_conn, ibc_list);
+ /* Advance to next connection, be sure to skip the head node */
+ if (!peer->ibp_next_conn ||
+ peer->ibp_next_conn->ibc_list.next == &peer->ibp_conns)
+ next = peer->ibp_conns.next;
+ else
+ next = peer->ibp_next_conn->ibc_list.next;
+ peer->ibp_next_conn = list_entry(next, struct kib_conn, ibc_list);
+
+ return peer->ibp_next_conn;
}
static inline int
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index b13996555a02..32fa8cafe9ea 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1246,7 +1246,6 @@ kiblnd_connect_peer(struct kib_peer *peer)
LASSERT(net);
LASSERT(peer->ibp_connecting > 0);
- LASSERT(!peer->ibp_reconnecting);
cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP,
IB_QPT_RC);
@@ -1323,7 +1322,7 @@ kiblnd_reconnect_peer(struct kib_peer *peer)
LASSERT(!peer->ibp_accepting && !peer->ibp_connecting &&
list_empty(&peer->ibp_conns));
- peer->ibp_reconnecting = 0;
+ peer->ibp_reconnecting--;
if (!kiblnd_peer_active(peer)) {
list_splice_init(&peer->ibp_tx_queue, &txs);
@@ -1356,6 +1355,8 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
unsigned long flags;
int rc;
+ int i;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
/*
* If I get here, I've committed to send, so I complete the tx with
@@ -1452,7 +1453,8 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
/* Brand new peer */
LASSERT(!peer->ibp_connecting);
- peer->ibp_connecting = 1;
+ tunables = &peer->ibp_ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ peer->ibp_connecting = tunables->lnd_conns_per_peer;
/* always called with a ref on ni, which prevents ni being shutdown */
LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown);
@@ -1465,7 +1467,8 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
write_unlock_irqrestore(g_lock, flags);
- kiblnd_connect_peer(peer);
+ for (i = 0; i < tunables->lnd_conns_per_peer; i++)
+ kiblnd_connect_peer(peer);
kiblnd_peer_decref(peer);
}
@@ -1914,6 +1917,9 @@ kiblnd_close_conn_locked(struct kib_conn *conn, int error)
}
dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
+ if (peer->ibp_next_conn == conn)
+ /* clear next_conn so it won't be used */
+ peer->ibp_next_conn = NULL;
list_del(&conn->ibc_list);
/* connd (see below) takes over ibc_list's ref */
@@ -2183,7 +2189,11 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
kiblnd_conn_addref(conn);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- /* Schedule blocked txs */
+ /* Schedule blocked txs
+ * Note: if we are running with conns_per_peer > 1, these blocked
+ * txs will all get scheduled to the first connection which gets
+ * scheduled. We won't be using round robin on this first batch.
+ */
spin_lock(&conn->ibc_lock);
list_for_each_entry_safe(tx, tmp, &txs, tx_list) {
list_del(&tx->tx_list);
@@ -2552,7 +2562,6 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */
- LASSERT(!peer->ibp_reconnecting);
if (cp) {
msg_size = cp->ibcp_max_msg_size;
@@ -2570,7 +2579,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
*/
reconnect = (!list_empty(&peer->ibp_tx_queue) ||
peer->ibp_version != version) &&
- peer->ibp_connecting == 1 &&
+ peer->ibp_connecting &&
!peer->ibp_accepting;
if (!reconnect) {
reason = "no need";
@@ -2631,7 +2640,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
}
conn->ibc_reconnect = 1;
- peer->ibp_reconnecting = 1;
+ peer->ibp_reconnecting++;
peer->ibp_version = version;
if (incarnation)
peer->ibp_incarnation = incarnation;
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
index b9235400bf1d..39d07926d603 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
@@ -57,6 +57,10 @@ static int nscheds;
module_param(nscheds, int, 0444);
MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool");
+static unsigned int conns_per_peer = 1;
+module_param(conns_per_peer, uint, 0444);
+MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
+
/* NB: this value is shared by all CPTs, it can grow at runtime */
static int ntx = 512;
module_param(ntx, int, 0444);
@@ -271,6 +275,10 @@ int kiblnd_tunables_setup(struct lnet_ni *ni)
tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
if (!tunables->lnd_fmr_cache)
tunables->lnd_fmr_cache = fmr_cache;
+ if (!tunables->lnd_conns_per_peer) {
+ tunables->lnd_conns_per_peer = (conns_per_peer) ?
+ conns_per_peer : 1;
+ }
return 0;
}
@@ -284,4 +292,5 @@ void kiblnd_tunables_init(void)
default_tunables.lnd_fmr_pool_size = fmr_pool_size;
default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
default_tunables.lnd_fmr_cache = fmr_cache;
+ default_tunables.lnd_conns_per_peer = conns_per_peer;
}