diff options
author | Ka-Cheong Poon <ka-cheong.poon@oracle.com> | 2018-07-24 06:51:22 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-24 07:17:44 +0300 |
commit | 1e2b44e78eead7bcadfbf96f70d95773191541c9 (patch) | |
tree | e7944339dd957ae23cfd690cb0ad6962d98c053c /net/rds/af_rds.c | |
parent | eee2fa6ab3225192d6d894c54a6fb02ac9efdff6 (diff) | |
download | linux-1e2b44e78eead7bcadfbf96f70d95773191541c9.tar.xz |
rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests. RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.
v5: Fixed syntax problem (David Miller).
v4: Changed port history comments in rds.h (Sowmini Varadhan).
v3: Added support to set up IPv4 connection using mapped address
(David Miller).
Added support to set up connection between link local and non-link
addresses.
Various review comments from Santosh Shilimkar and Sowmini Varadhan.
v2: Fixed bound and peer address scope mismatched issue.
Added back rds_connect() IPv6 changes.
Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/af_rds.c')
-rw-r--r-- | net/rds/af_rds.c | 91 |
1 files changed, 77 insertions, 14 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index fc1a5c63b783..fc5c48b248fe 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -142,15 +142,32 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, uaddr_len = sizeof(*sin6); } } else { - /* If socket is not yet bound, set the return address family - * to be AF_UNSPEC (value 0) and the address size to be that - * of an IPv4 address. + /* If socket is not yet bound and the socket is connected, + * set the return address family to be the same as the + * connected address, but with 0 address value. If it is not + * connected, set the family to be AF_UNSPEC (value 0) and + * the address size to be that of an IPv4 address. */ if (ipv6_addr_any(&rs->rs_bound_addr)) { - sin = (struct sockaddr_in *)uaddr; - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_UNSPEC; - return sizeof(*sin); + if (ipv6_addr_any(&rs->rs_conn_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_UNSPEC; + return sizeof(*sin); + } + + if (ipv6_addr_type(&rs->rs_conn_addr) & + IPV6_ADDR_MAPPED) { + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + return sizeof(*sin); + } + + sin6 = (struct sockaddr_in6 *)uaddr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + return sizeof(*sin6); } if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { sin = (struct sockaddr_in *)uaddr; @@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct rds_sock *rs = rds_sk_to_rs(sk); + int addr_type; int ret = 0; lock_sock(sk); - switch (addr_len) { - case sizeof(struct sockaddr_in): + switch (uaddr->sa_family) { + case AF_INET: sin = (struct sockaddr_in *)uaddr; - if (sin->sin_family != AF_INET) { - ret = -EAFNOSUPPORT; + if (addr_len < sizeof(struct sockaddr_in)) { + ret = -EINVAL; break; } if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { @@ -509,12 +528,56 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, rs->rs_conn_port = sin->sin_port; break; - case sizeof(struct sockaddr_in6): - ret = -EPROTONOSUPPORT; + case AF_INET6: + sin6 = (struct sockaddr_in6 *)uaddr; + if (addr_len < sizeof(struct sockaddr_in6)) { + ret = -EINVAL; + break; + } + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + + if (!(addr_type & IPV6_ADDR_MAPPED)) { + ret = -EPROTOTYPE; + break; + } + + /* It is a mapped address. Need to do some sanity + * checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) { + ret = -EPROTOTYPE; + break; + } + } + + if (addr_type & IPV6_ADDR_LINKLOCAL) { + /* If socket is arleady bound to a link local address, + * the peer address must be on the same link. + */ + if (sin6->sin6_scope_id == 0 || + (!ipv6_addr_any(&rs->rs_bound_addr) && + rs->rs_bound_scope_id && + sin6->sin6_scope_id != rs->rs_bound_scope_id)) { + ret = -EINVAL; + break; + } + /* Remember the connected address scope ID. It will + * be checked against the binding local address when + * the socket is bound. + */ + rs->rs_bound_scope_id = sin6->sin6_scope_id; + } + rs->rs_conn_addr = sin6->sin6_addr; + rs->rs_conn_port = sin6->sin6_port; break; default: - ret = -EINVAL; + ret = -EAFNOSUPPORT; break; } |