diff options
author | David S. Miller <davem@davemloft.net> | 2021-11-20 17:11:00 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-11-20 17:11:00 +0300 |
commit | 89f971182417cb27abd82cfc48a7f36b99352ddc (patch) | |
tree | b2ba5218fe17bae705e846e1af473c82fd69e915 | |
parent | 979594c5ff7b82e4787c8491680a2658bd88b780 (diff) | |
parent | 5fb62e9cd3adadd95303447ee8e3f62ee98b0e73 (diff) | |
download | linux-89f971182417cb27abd82cfc48a7f36b99352ddc.tar.xz |
Merge branch 'mptcp-more-socket-options'
Mat Martineau says:
====================
mptcp: More socket option support
These patches add MPTCP socket support for a few additional socket
options: IP_TOS, IP_FREEBIND, IP_TRANSPARENT, IPV6_FREEBIND, and
IPV6_TRANSPARENT.
Patch 1 exposes __ip_sock_set_tos() for use in patch 2.
Patch 2 adds IP_TOS support.
Patches 3 and 4 add the freebind and transparent support, with a
selftest for the latter.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip.h | 1 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 2 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 106 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 3 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/config | 8 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/mptcp_connect.c | 51 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_connect.sh | 80 |
7 files changed, 245 insertions, 6 deletions
diff --git a/include/net/ip.h b/include/net/ip.h index 7d1088888c10..81e23a102a0d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -783,5 +783,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); +void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 38d29b175ca6..445a9ecaefa1 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -576,7 +576,7 @@ out: return err; } -static void __ip_sock_set_tos(struct sock *sk, int val) +void __ip_sock_set_tos(struct sock *sk, int val) { if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0f1e661c2032..fb43e145cb57 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -390,6 +390,8 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, switch (optname) { case IPV6_V6ONLY: + case IPV6_TRANSPARENT: + case IPV6_FREEBIND: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); if (!ssock) { @@ -398,8 +400,24 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, } ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); - if (ret == 0) + if (ret != 0) { + release_sock(sk); + return ret; + } + + sockopt_seq_inc(msk); + + switch (optname) { + case IPV6_V6ONLY: sk->sk_ipv6only = ssock->sk->sk_ipv6only; + break; + case IPV6_TRANSPARENT: + inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; + break; + case IPV6_FREEBIND: + inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; + break; + } release_sock(sk); break; @@ -598,6 +616,85 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } +static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct sock *sk = (struct sock *)msk; + struct inet_sock *issk; + struct socket *ssock; + int err; + + err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); + if (err != 0) + return err; + + lock_sock(sk); + + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + release_sock(sk); + return -EINVAL; + } + + issk = inet_sk(ssock->sk); + + switch (optname) { + case IP_FREEBIND: + issk->freebind = inet_sk(sk)->freebind; + break; + case IP_TRANSPARENT: + issk->transparent = inet_sk(sk)->transparent; + break; + default: + release_sock(sk); + WARN_ON_ONCE(1); + return -EOPNOTSUPP; + } + + sockopt_seq_inc(msk); + release_sock(sk); + return 0; +} + +static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int err, val; + + err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); + + if (err != 0) + return err; + + lock_sock(sk); + sockopt_seq_inc(msk); + val = inet_sk(sk)->tos; + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + __ip_sock_set_tos(ssk, val); + } + release_sock(sk); + + return err; +} + +static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) +{ + switch (optname) { + case IP_FREEBIND: + case IP_TRANSPARENT: + return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); + case IP_TOS: + return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); + } + + return -EOPNOTSUPP; +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -637,6 +734,9 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, if (ssk) return tcp_setsockopt(ssk, level, optname, optval, optlen); + if (level == SOL_IP) + return mptcp_setsockopt_v4(msk, optname, optval, optlen); + if (level == SOL_IPV6) return mptcp_setsockopt_v6(msk, optname, optval, optlen); @@ -1003,6 +1103,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) ssk->sk_priority = sk->sk_priority; ssk->sk_bound_dev_if = sk->sk_bound_dev_if; ssk->sk_incoming_cpu = sk->sk_incoming_cpu; + __ip_sock_set_tos(ssk, inet_sk(sk)->tos); if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; @@ -1028,6 +1129,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) tcp_set_congestion_control(ssk, msk->ca_name, false, true); + + inet_sk(ssk)->transparent = inet_sk(sk)->transparent; + inet_sk(ssk)->freebind = inet_sk(sk)->freebind; } static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 49787a1d7b34..b8dd3441f7d0 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1425,6 +1425,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif + mptcp_sockopt_sync(msk, ssk); + ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) @@ -1441,7 +1443,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_info2sockaddr(remote, &addr, ssk->sk_family); mptcp_add_pending_subflow(msk, subflow); - mptcp_sockopt_sync(msk, ssk); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) goto failed_unlink; diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 0faaccd21447..419e71560fd1 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -13,5 +13,9 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NF_TABLES_IPV4=y -CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_TPROXY=m +CONFIG_NFT_SOCKET=m +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 95e81d557b08..ada9b80774d4 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -75,7 +75,12 @@ struct cfg_cmsg_types { unsigned int timestampns:1; }; +struct cfg_sockopt_types { + unsigned int transparent:1; +}; + static struct cfg_cmsg_types cfg_cmsg_types; +static struct cfg_sockopt_types cfg_sockopt_types; static void die_usage(void) { @@ -93,6 +98,7 @@ static void die_usage(void) fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); + fprintf(stderr, "\t-o option -- test sockopt <option>\n"); fprintf(stderr, "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); exit(1); @@ -185,6 +191,22 @@ static void set_mark(int fd, uint32_t mark) } } +static void set_transparent(int fd, int pf) +{ + int one = 1; + + switch (pf) { + case AF_INET: + if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) + perror("IP_TRANSPARENT"); + break; + case AF_INET6: + if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) + perror("IPV6_TRANSPARENT"); + break; + } +} + static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { @@ -212,6 +234,9 @@ static int sock_listen_mptcp(const char * const listenaddr, sizeof(one))) perror("setsockopt"); + if (cfg_sockopt_types.transparent) + set_transparent(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -944,6 +969,27 @@ static void parse_cmsg_types(const char *type) exit(1); } +static void parse_setsock_options(const char *name) +{ + char *next = strchr(name, ','); + unsigned int len = 0; + + if (next) { + parse_setsock_options(next + 1); + len = next - name; + } else { + len = strlen(name); + } + + if (strncmp(name, "TRANSPARENT", len) == 0) { + cfg_sockopt_types.transparent = 1; + return; + } + + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); + exit(1); +} + int main_loop(void) { int fd; @@ -1047,7 +1093,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) { + while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:o:")) != -1) { switch (c) { case 'j': cfg_join = true; @@ -1108,6 +1154,9 @@ static void parse_opts(int argc, char **argv) case 'c': parse_cmsg_types(optarg); break; + case 'o': + parse_setsock_options(optarg); + break; } } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 559173a8e387..a4226b608c68 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -671,6 +671,82 @@ run_tests() run_tests_lo $1 $2 $3 0 } +run_test_transparent() +{ + local connect_addr="$1" + local msg="$2" + + local connector_ns="$ns1" + local listener_ns="$ns2" + local lret=0 + local r6flag="" + + # skip if we don't want v6 + if ! $ipv6 && is_v6 "${connect_addr}"; then + return 0 + fi + +ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" +flush ruleset +table inet mangle { + chain divert { + type filter hook prerouting priority -150; + + meta l4proto tcp socket transparent 1 meta mark set 1 accept + tcp dport 20000 tproxy to :20000 meta mark set 1 accept + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: $msg, could not load nft ruleset" + return + fi + + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + r6flag="-6" + else + local_addr="0.0.0.0" + fi + + ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100 + if [ $? -ne 0 ]; then + ip netns exec "$listener_ns" nft flush ruleset + echo "SKIP: $msg, ip $r6flag rule failed" + return + fi + + ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100 + if [ $? -ne 0 ]; then + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + echo "SKIP: $msg, ip route add local $local_addr failed" + return + fi + + echo "INFO: test $msg" + + TEST_COUNT=10000 + local extra_args="-o TRANSPARENT" + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" + lret=$? + + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 + + if [ $lret -ne 0 ]; then + echo "FAIL: $msg, mptcp connection error" 1>&2 + ret=$lret + return 1 + fi + + echo "PASS: $msg" + return 0 +} + run_tests_peekmode() { local peekmode="$1" @@ -794,5 +870,9 @@ run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" stop_if_error "Tests with peek mode have failed" +# connect to ns4 ip address, ns2 should intercept/proxy +run_test_transparent 10.0.3.1 "tproxy ipv4" +run_test_transparent dead:beef:3::1 "tproxy ipv6" + display_time exit $ret |