diff options
| author | David S. Miller <davem@davemloft.net> | 2018-07-24 07:17:44 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2018-07-24 07:17:44 +0300 |
| commit | 176bd861ff5affe4a54bcd00266279542142170c (patch) | |
| tree | 398bab1a171b7da3be14fe9f7f1d5936032b7f74 /net/rds/send.c | |
| parent | a6c90dd321bfeb5e48fc2eb6623b7b976106f6d7 (diff) | |
| parent | b7ff8b1036f0b0df1390ba6b5e9bc7ec458e857a (diff) | |
| download | linux-176bd861ff5affe4a54bcd00266279542142170c.tar.xz | |
Merge branch 'rds-ipv6'
Ka-Cheong Poon says:
====================
rds: IPv6 support
This patch set adds IPv6 support to the kernel RDS and related
modules. Existing RDS apps using IPv4 address continue to run without
any problem. New RDS apps which want to use IPv6 address can do so by
passing the address in struct sockaddr_in6 to bind(), connect() or
sendmsg(). And those apps also need to use the new IPv6 equivalents
of some of the existing socket options as the existing options use a
32 bit integer to store IP address.
All RDS code now use struct in6_addr to store IP address. IPv4
address is stored as an IPv4 mapped address.
Header file changes
There are many data structures (RDS socket options) used by RDS apps
which use a 32 bit integer to store IP address. To support IPv6,
struct in6_addr needs to be used. To ensure backward compatibility, a
new data structure is introduced for each of those data structures
which use a 32 bit integer to represent an IP address. And new socket
options are introduced to use those new structures. This means that
existing apps should work without a problem with the new RDS module.
For apps which want to use IPv6, those new data structures and socket
options can be used. IPv4 mapped address is used to represent IPv4
address in the new data structures.
Internally, all RDS data structures which contain an IP address are
changed to use struct in6_addr to store the address. IPv4 address is
stored as an IPv4 mapped address. All the functions which take an IP
address as argument are also changed to use struct in6_addr.
RDS/RDMA/IB uses a private data (struct rds_ib_connect_private)
exchange between endpoints at RDS connection establishment time to
support RDMA. This private data exchange uses a 32 bit integer to
represent an IP address. This needs to be changed in order to support
IPv6. A new private data struct rds6_ib_connect_private is introduced
to handle this. To ensure backward compatibility, an IPv6 capable RDS
stack uses another RDMA listener port (RDS_CM_PORT) to accept IPv6
connection. And it continues to use the original RDS_PORT for IPv4 RDS
connections. When it needs to communicate with an IPv6 peer, it uses
the RDS_TCP_PORT to send the connection set up request.
RDS/TCP changes
TCP related code is changed to support IPv6. Note that only an IPv6
TCP listener on port RDS_TCP_PORT is created as it can accept both
IPv4 and IPv6 connection requests.
IB/RDMA changes
The initial private data exchange between IB endpoints using RDMA is
changed to support IPv6 address instead, if the peer address is IPv6.
To ensure backward compatibility, annother RDMA listener port
(RDS_CM_PORT) is used to accept IPv6 connection. An IPv6 capable RDS
module continues to use the original RDS_PORT for IPv4 RDS
connections. When it needs to communicate with an IPv6 peer, it uses
the RDS_CM_PORT to send the connection set up request.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/send.c')
| -rw-r--r-- | net/rds/send.c | 114 |
1 files changed, 99 insertions, 15 deletions
diff --git a/net/rds/send.c b/net/rds/send.c index 94c7f74909be..9604e1faa564 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -709,7 +709,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack, } EXPORT_SYMBOL_GPL(rds_send_drop_acked); -void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) +void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest) { struct rds_message *rm, *tmp; struct rds_connection *conn; @@ -721,8 +721,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) spin_lock_irqsave(&rs->rs_lock, flags); list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) { - if (dest && (dest->sin_addr.s_addr != rm->m_daddr || - dest->sin_port != rm->m_inc.i_hdr.h_dport)) + if (dest && + (!ipv6_addr_equal(&dest->sin6_addr, &rm->m_daddr) || + dest->sin6_port != rm->m_inc.i_hdr.h_dport)) continue; list_move(&rm->m_sock_item, &list); @@ -1059,8 +1060,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); - __be32 daddr; __be16 dport; struct rds_message *rm = NULL; struct rds_connection *conn; @@ -1069,10 +1070,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) int nonblock = msg->msg_flags & MSG_DONTWAIT; long timeo = sock_sndtimeo(sk, nonblock); struct rds_conn_path *cpath; + struct in6_addr daddr; + __u32 scope_id = 0; size_t total_payload_len = payload_len, rdma_payload_len = 0; bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); int num_sgs = ceil(payload_len, PAGE_SIZE); + int namelen; /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ @@ -1081,27 +1085,106 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } - if (msg->msg_namelen) { - /* XXX fail non-unicast destination IPs? */ - if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) { + namelen = msg->msg_namelen; + if (namelen != 0) { + if (namelen < sizeof(*usin)) { + ret = -EINVAL; + goto out; + } + switch (usin->sin_family) { + case AF_INET: + if (usin->sin_addr.s_addr == htonl(INADDR_ANY) || + usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) { + ret = -EINVAL; + goto out; + } + ipv6_addr_set_v4mapped(usin->sin_addr.s_addr, &daddr); + dport = usin->sin_port; + break; + + case AF_INET6: { + int addr_type; + + if (namelen < sizeof(*sin6)) { + ret = -EINVAL; + goto out; + } + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + + if (!(addr_type & IPV6_ADDR_MAPPED)) { + ret = -EINVAL; + goto out; + } + + /* It is a mapped address. Need to do some + * sanity checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) { + return -EINVAL; + goto out; + } + } + if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (sin6->sin6_scope_id == 0) { + ret = -EINVAL; + goto out; + } + scope_id = sin6->sin6_scope_id; + } + + daddr = sin6->sin6_addr; + dport = sin6->sin6_port; + break; + } + + default: ret = -EINVAL; goto out; } - daddr = usin->sin_addr.s_addr; - dport = usin->sin_port; } else { /* We only care about consistency with ->connect() */ lock_sock(sk); daddr = rs->rs_conn_addr; dport = rs->rs_conn_port; + scope_id = rs->rs_bound_scope_id; release_sock(sk); } lock_sock(sk); - if (daddr == 0 || rs->rs_bound_addr == 0) { + if (ipv6_addr_any(&rs->rs_bound_addr) || ipv6_addr_any(&daddr)) { release_sock(sk); - ret = -ENOTCONN; /* XXX not a great errno */ + ret = -ENOTCONN; goto out; + } else if (namelen != 0) { + /* Cannot send to an IPv4 address using an IPv6 source + * address and cannot send to an IPv6 address using an + * IPv4 source address. + */ + if (ipv6_addr_v4mapped(&daddr) ^ + ipv6_addr_v4mapped(&rs->rs_bound_addr)) { + release_sock(sk); + ret = -EOPNOTSUPP; + goto out; + } + /* If the socket is already bound to a link local address, + * it can only send to peers on the same link. But allow + * communicating beween link local and non-link local address. + */ + if (scope_id != rs->rs_bound_scope_id) { + if (!scope_id) { + scope_id = rs->rs_bound_scope_id; + } else if (rs->rs_bound_scope_id) { + release_sock(sk); + ret = -EINVAL; + goto out; + } + } } release_sock(sk); @@ -1155,13 +1238,14 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ - if (rs->rs_conn && rs->rs_conn->c_faddr == daddr) + if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) conn = rs->rs_conn; else { conn = rds_conn_create_outgoing(sock_net(sock->sk), - rs->rs_bound_addr, daddr, - rs->rs_transport, - sock->sk->sk_allocation); + &rs->rs_bound_addr, &daddr, + rs->rs_transport, + sock->sk->sk_allocation, + scope_id); if (IS_ERR(conn)) { ret = PTR_ERR(conn); goto out; |
