diff options
Diffstat (limited to 'net/rds/connection.c')
-rw-r--r-- | net/rds/connection.c | 132 |
1 files changed, 81 insertions, 51 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c index cfb05953b0e5..3176ead0ab4d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,7 +34,8 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/export.h> -#include <net/inet_hashtables.h> +#include <net/ipv6.h> +#include <net/inet6_hashtables.h> #include "rds.h" #include "loop.h" @@ -49,18 +50,21 @@ static unsigned long rds_conn_count; static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; static struct kmem_cache *rds_conn_slab; -static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) +static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr, + const struct in6_addr *faddr) { + static u32 rds6_hash_secret __read_mostly; static u32 rds_hash_secret __read_mostly; - unsigned long hash; + u32 lhash, fhash, hash; net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); + net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret); + hash = __inet6_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); - /* Pass NULL, don't need struct net for hash */ - hash = __inet_ehashfn(be32_to_cpu(laddr), 0, - be32_to_cpu(faddr), 0, - rds_hash_secret); return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; } @@ -72,20 +76,25 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) /* rcu read lock must be held or the connection spinlock */ static struct rds_connection *rds_conn_lookup(struct net *net, struct hlist_head *head, - __be32 laddr, __be32 faddr, - struct rds_transport *trans) + const struct in6_addr *laddr, + const struct in6_addr *faddr, + struct rds_transport *trans, + int dev_if) { struct rds_connection *conn, *ret = NULL; hlist_for_each_entry_rcu(conn, head, c_hash_node) { - if (conn->c_faddr == faddr && conn->c_laddr == laddr && - conn->c_trans == trans && net == rds_conn_net(conn)) { + if (ipv6_addr_equal(&conn->c_faddr, faddr) && + ipv6_addr_equal(&conn->c_laddr, laddr) && + conn->c_trans == trans && + net == rds_conn_net(conn) && + conn->c_dev_if == dev_if) { ret = conn; break; } } - rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret, - &laddr, &faddr); + rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret, + laddr, faddr); return ret; } @@ -99,8 +108,8 @@ static void rds_conn_path_reset(struct rds_conn_path *cp) { struct rds_connection *conn = cp->cp_conn; - rdsdebug("connection %pI4 to %pI4 reset\n", - &conn->c_laddr, &conn->c_faddr); + rdsdebug("connection %pI6c to %pI6c reset\n", + &conn->c_laddr, &conn->c_faddr); rds_stats_inc(s_conn_reset); rds_send_path_reset(cp); @@ -142,9 +151,12 @@ static void __rds_conn_path_init(struct rds_connection *conn, * are torn down as the module is removed, if ever. */ static struct rds_connection *__rds_conn_create(struct net *net, - __be32 laddr, __be32 faddr, - struct rds_transport *trans, gfp_t gfp, - int is_outgoing) + const struct in6_addr *laddr, + const struct in6_addr *faddr, + struct rds_transport *trans, + gfp_t gfp, + int is_outgoing, + int dev_if) { struct rds_connection *conn, *parent = NULL; struct hlist_head *head = rds_conn_bucket(laddr, faddr); @@ -154,9 +166,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rcu_read_lock(); - conn = rds_conn_lookup(net, head, laddr, faddr, trans); - if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && - laddr == faddr && !is_outgoing) { + conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); + if (conn && + conn->c_loopback && + conn->c_trans != &rds_loop_transport && + ipv6_addr_equal(laddr, faddr) && + !is_outgoing) { /* This is a looped back IB connection, and we're * called by the code handling the incoming connect. * We need a second connection object into which we @@ -181,8 +196,10 @@ static struct rds_connection *__rds_conn_create(struct net *net, } INIT_HLIST_NODE(&conn->c_hash_node); - conn->c_laddr = laddr; - conn->c_faddr = faddr; + conn->c_laddr = *laddr; + conn->c_isv6 = !ipv6_addr_v4mapped(laddr); + conn->c_faddr = *faddr; + conn->c_dev_if = dev_if; rds_conn_net_set(conn, net); @@ -199,7 +216,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, * can bind to the destination address then we'd rather the messages * flow through loopback rather than either transport. */ - loop_trans = rds_trans_get_preferred(net, faddr); + loop_trans = rds_trans_get_preferred(net, faddr, conn->c_dev_if); if (loop_trans) { rds_trans_put(loop_trans); conn->c_loopback = 1; @@ -233,10 +250,10 @@ static struct rds_connection *__rds_conn_create(struct net *net, goto out; } - rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", - conn, &laddr, &faddr, - strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name : - "[unknown]", is_outgoing ? "(outgoing)" : ""); + rdsdebug("allocated conn %p for %pI6c -> %pI6c over %s %s\n", + conn, laddr, faddr, + strnlen(trans->t_name, sizeof(trans->t_name)) ? + trans->t_name : "[unknown]", is_outgoing ? "(outgoing)" : ""); /* * Since we ran without holding the conn lock, someone could @@ -262,7 +279,8 @@ static struct rds_connection *__rds_conn_create(struct net *net, /* Creating normal conn */ struct rds_connection *found; - found = rds_conn_lookup(net, head, laddr, faddr, trans); + found = rds_conn_lookup(net, head, laddr, faddr, trans, + dev_if); if (found) { struct rds_conn_path *cp; int i; @@ -295,18 +313,22 @@ out: } struct rds_connection *rds_conn_create(struct net *net, - __be32 laddr, __be32 faddr, - struct rds_transport *trans, gfp_t gfp) + const struct in6_addr *laddr, + const struct in6_addr *faddr, + struct rds_transport *trans, gfp_t gfp, + int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 0); + return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create); struct rds_connection *rds_conn_create_outgoing(struct net *net, - __be32 laddr, __be32 faddr, - struct rds_transport *trans, gfp_t gfp) + const struct in6_addr *laddr, + const struct in6_addr *faddr, + struct rds_transport *trans, + gfp_t gfp, int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 1); + return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); @@ -502,12 +524,17 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, /* XXX too lazy to maintain counts.. */ list_for_each_entry(rm, list, m_conn_item) { + __be32 laddr; + __be32 faddr; + total++; + laddr = conn->c_laddr.s6_addr32[3]; + faddr = conn->c_faddr.s6_addr32[3]; if (total <= len) rds_inc_info_copy(&rm->m_inc, iter, - conn->c_laddr, - conn->c_faddr, + laddr, + faddr, 0); } @@ -584,7 +611,6 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, struct hlist_head *head; struct rds_connection *conn; size_t i; - int j; rcu_read_lock(); @@ -595,17 +621,20 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, i++, head++) { hlist_for_each_entry_rcu(conn, head, c_hash_node) { struct rds_conn_path *cp; - int npaths; - npaths = (conn->c_trans->t_mp_capable ? - RDS_MPATH_WORKERS : 1); - for (j = 0; j < npaths; j++) { - cp = &conn->c_path[j]; + /* XXX We only copy the information from the first + * path for now. The problem is that if there are + * more than one underlying paths, we cannot report + * information of all of them using the existing + * API. For example, there is only one next_tx_seq, + * which path's next_tx_seq should we report? It is + * a bug in the design of MPRDS. + */ + cp = conn->c_path; - /* XXX no cp_lock usage.. */ - if (!visitor(cp, buffer)) - continue; - } + /* XXX no cp_lock usage.. */ + if (!visitor(cp, buffer)) + continue; /* We copy as much as we can fit in the buffer, * but we count all items so that the caller @@ -624,12 +653,13 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) { struct rds_info_connection *cinfo = buffer; + struct rds_connection *conn = cp->cp_conn; cinfo->next_tx_seq = cp->cp_next_tx_seq; cinfo->next_rx_seq = cp->cp_next_rx_seq; - cinfo->laddr = cp->cp_conn->c_laddr; - cinfo->faddr = cp->cp_conn->c_faddr; - strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name, + cinfo->laddr = conn->c_laddr.s6_addr32[3]; + cinfo->faddr = conn->c_faddr.s6_addr32[3]; + strncpy(cinfo->transport, conn->c_trans->t_name, sizeof(cinfo->transport)); cinfo->flags = 0; |