diff options
4 files changed, 42 insertions, 12 deletions
diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h index e45d828bfd1b..c1619f411d81 100644 --- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h +++ b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h @@ -53,7 +53,8 @@ struct lnet_ioctl_config_o2iblnd_tunables { __u32 lnd_fmr_pool_size; __u32 lnd_fmr_flush_trigger; __u32 lnd_fmr_cache; - __u32 pad; + __u16 lnd_conns_per_peer; + __u16 pad; }; struct lnet_ioctl_config_lnd_tunables { diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index ca6e09de0872..bb663d6d7b48 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -568,6 +568,8 @@ struct kib_peer { lnet_nid_t ibp_nid; /* who's on the other end(s) */ struct lnet_ni *ibp_ni; /* LNet interface */ struct list_head ibp_conns; /* all active connections */ + struct kib_conn *ibp_next_conn; /* next connection to send on for + * round robin */ struct list_head ibp_tx_queue; /* msgs waiting for a conn */ __u64 ibp_incarnation; /* incarnation of peer */ /* when (in jiffies) I was last alive */ @@ -581,7 +583,7 @@ struct kib_peer { /* current active connection attempts */ unsigned short ibp_connecting; /* reconnect this peer later */ - unsigned short ibp_reconnecting:1; + unsigned char ibp_reconnecting; /* counter of how many times we triggered a conn race */ unsigned char ibp_races; /* # consecutive reconnection attempts to this peer */ @@ -744,10 +746,19 @@ kiblnd_peer_active(struct kib_peer *peer) static inline struct kib_conn * kiblnd_get_conn_locked(struct kib_peer *peer) { + struct list_head *next; + LASSERT(!list_empty(&peer->ibp_conns)); - /* just return the first connection */ - return list_entry(peer->ibp_conns.next, struct kib_conn, ibc_list); + /* Advance to next connection, be sure to skip the head node */ + if (!peer->ibp_next_conn || + peer->ibp_next_conn->ibc_list.next == &peer->ibp_conns) + next = peer->ibp_conns.next; + else + next = peer->ibp_next_conn->ibc_list.next; + peer->ibp_next_conn = list_entry(next, struct kib_conn, ibc_list); + + return peer->ibp_next_conn; } static inline int diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index b13996555a02..32fa8cafe9ea 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1246,7 +1246,6 @@ kiblnd_connect_peer(struct kib_peer *peer) LASSERT(net); LASSERT(peer->ibp_connecting > 0); - LASSERT(!peer->ibp_reconnecting); cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP, IB_QPT_RC); @@ -1323,7 +1322,7 @@ kiblnd_reconnect_peer(struct kib_peer *peer) LASSERT(!peer->ibp_accepting && !peer->ibp_connecting && list_empty(&peer->ibp_conns)); - peer->ibp_reconnecting = 0; + peer->ibp_reconnecting--; if (!kiblnd_peer_active(peer)) { list_splice_init(&peer->ibp_tx_queue, &txs); @@ -1356,6 +1355,8 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) rwlock_t *g_lock = &kiblnd_data.kib_global_lock; unsigned long flags; int rc; + int i; + struct lnet_ioctl_config_o2iblnd_tunables *tunables; /* * If I get here, I've committed to send, so I complete the tx with @@ -1452,7 +1453,8 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) /* Brand new peer */ LASSERT(!peer->ibp_connecting); - peer->ibp_connecting = 1; + tunables = &peer->ibp_ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + peer->ibp_connecting = tunables->lnd_conns_per_peer; /* always called with a ref on ni, which prevents ni being shutdown */ LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown); @@ -1465,7 +1467,8 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) write_unlock_irqrestore(g_lock, flags); - kiblnd_connect_peer(peer); + for (i = 0; i < tunables->lnd_conns_per_peer; i++) + kiblnd_connect_peer(peer); kiblnd_peer_decref(peer); } @@ -1914,6 +1917,9 @@ kiblnd_close_conn_locked(struct kib_conn *conn, int error) } dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev; + if (peer->ibp_next_conn == conn) + /* clear next_conn so it won't be used */ + peer->ibp_next_conn = NULL; list_del(&conn->ibc_list); /* connd (see below) takes over ibc_list's ref */ @@ -2183,7 +2189,11 @@ kiblnd_connreq_done(struct kib_conn *conn, int status) kiblnd_conn_addref(conn); write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - /* Schedule blocked txs */ + /* Schedule blocked txs + * Note: if we are running with conns_per_peer > 1, these blocked + * txs will all get scheduled to the first connection which gets + * scheduled. We won't be using round robin on this first batch. + */ spin_lock(&conn->ibc_lock); list_for_each_entry_safe(tx, tmp, &txs, tx_list) { list_del(&tx->tx_list); @@ -2552,7 +2562,6 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version, LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */ - LASSERT(!peer->ibp_reconnecting); if (cp) { msg_size = cp->ibcp_max_msg_size; @@ -2570,7 +2579,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version, */ reconnect = (!list_empty(&peer->ibp_tx_queue) || peer->ibp_version != version) && - peer->ibp_connecting == 1 && + peer->ibp_connecting && !peer->ibp_accepting; if (!reconnect) { reason = "no need"; @@ -2631,7 +2640,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version, } conn->ibc_reconnect = 1; - peer->ibp_reconnecting = 1; + peer->ibp_reconnecting++; peer->ibp_version = version; if (incarnation) peer->ibp_incarnation = incarnation; diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c index b9235400bf1d..39d07926d603 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -57,6 +57,10 @@ static int nscheds; module_param(nscheds, int, 0444); MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool"); +static unsigned int conns_per_peer = 1; +module_param(conns_per_peer, uint, 0444); +MODULE_PARM_DESC(conns_per_peer, "number of connections per peer"); + /* NB: this value is shared by all CPTs, it can grow at runtime */ static int ntx = 512; module_param(ntx, int, 0444); @@ -271,6 +275,10 @@ int kiblnd_tunables_setup(struct lnet_ni *ni) tunables->lnd_fmr_flush_trigger = fmr_flush_trigger; if (!tunables->lnd_fmr_cache) tunables->lnd_fmr_cache = fmr_cache; + if (!tunables->lnd_conns_per_peer) { + tunables->lnd_conns_per_peer = (conns_per_peer) ? + conns_per_peer : 1; + } return 0; } @@ -284,4 +292,5 @@ void kiblnd_tunables_init(void) default_tunables.lnd_fmr_pool_size = fmr_pool_size; default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger; default_tunables.lnd_fmr_cache = fmr_cache; + default_tunables.lnd_conns_per_peer = conns_per_peer; } |
