From 7210e4e38f945dfa173c4a4e59ad827c9ecad541 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Oct 2014 19:50:22 +0200 Subject: netfilter: nf_tables: restrict nat/masq expressions to nat chain type This adds the missing validation code to avoid the use of nat/masq from non-nat chains. The validation assumes two possible configuration scenarios: 1) Use of nat from base chain that is not of nat type. Reject this configuration from the nft_*_init() path of the expression. 2) Use of nat from non-base chain. In this case, we have to wait until the non-base chain is referenced by at least one base chain via jump/goto. This is resolved from the nft_*_validate() path which is called from nf_tables_check_loops(). The user gets an -EOPNOTSUPP in both cases. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ include/net/netfilter/nft_masq.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d7292392fac..845c596bf594 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -530,6 +530,9 @@ enum nft_chain_type { NFT_CHAIN_T_MAX }; +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_type type); + struct nft_stats { u64 bytes; u64 pkts; diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h index c72729f954f4..e2a518b60e19 100644 --- a/include/net/netfilter/nft_masq.h +++ b/include/net/netfilter/nft_masq.h @@ -13,4 +13,7 @@ int nft_masq_init(const struct nft_ctx *ctx, int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + #endif /* _NFT_MASQ_H_ */ -- cgit v1.2.3 From 5188cd44c55db3e92cd9e77a40b5baa7ed4340f7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 30 Oct 2014 18:27:17 +0000 Subject: drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets UFO is now disabled on all drivers that work with virtio net headers, but userland may try to send UFO/IPv6 packets anyway. Instead of sending with ID=0, we should select identifiers on their behalf (as we used to). Signed-off-by: Ben Hutchings Fixes: 916e4cf46d02 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data") Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 3 +++ drivers/net/tun.c | 6 +++++- include/net/ipv6.h | 2 ++ net/ipv6/output_core.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 2aeaa61ece09..6f226de655a4 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -572,6 +573,8 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, pr_warn_once("macvtap: %s: using disabled UFO feature; please fix this program\n", current->comm); gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; default: return -EINVAL; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 280d3d2a9792..7302398f0b1f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -1139,6 +1140,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; } + skb_reset_network_header(skb); + if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -1159,6 +1162,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, current->comm); } skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; } default: @@ -1189,7 +1194,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } - skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); rxhash = skb_get_hash(skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 97f472012438..4292929392b0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,6 +671,8 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } +void ipv6_proxy_select_ident(struct sk_buff *skb); + int ip6_dst_hoplimit(struct dst_entry *dst); static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index fc24c390af05..97f41a3e68d9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -3,11 +3,45 @@ * not configured or static. These functions are needed by GSO/GRO implementation. */ #include +#include #include #include #include #include +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +void ipv6_proxy_select_ident(struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 hash, id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return; + + net_get_random_once(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); + hash = __ipv6_addr_jhash(&addrs[0], hash); + + id = ip_idents_reserve(hash, 1); + skb_shinfo(skb)->ip6_frag_id = htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); -- cgit v1.2.3 From 052b9498eea532deb5de75277a53f6e0623215dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:24:57 +0200 Subject: netfilter: nf_reject_ipv4: split nf_send_reset() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_iphdr_put(): to build the IPv4 header. * nf_reject_ip_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 10 ++++ net/ipv4/netfilter/nf_reject_ipv4.c | 88 ++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index e8427193c777..03e928a55229 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -1,6 +1,8 @@ #ifndef _IPV4_NF_REJECT_H #define _IPV4_NF_REJECT_H +#include +#include #include static inline void nf_send_unreach(struct sk_buff *skb_in, int code) @@ -10,4 +12,12 @@ static inline void nf_send_unreach(struct sk_buff *skb_in, int code) void nf_send_reset(struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl); +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth); + #endif /* _IPV4_NF_REJECT_H */ diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 92b303dbd5fc..1baaa83dfe5c 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -12,43 +12,39 @@ #include #include #include +#include -/* Send RST reply */ -void nf_send_reset(struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) { - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; + return NULL; oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); + sizeof(struct tcphdr), _oth); if (oth == NULL) - return; + return NULL; /* No RST for RST. */ if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; + return NULL; /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); + return NULL; - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; + return oth; +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); - skb_reserve(nskb, LL_MAX_HEADER); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl) +{ + struct iphdr *niph, *oiph = ip_hdr(oldskb); skb_reset_network_header(nskb); niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); @@ -57,10 +53,23 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) niph->tos = 0; niph->id = 0; niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; + niph->protocol = protocol; niph->check = 0; niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + niph->ttl = ttl; + + nskb->protocol = htons(ETH_P_IP); + + return niph; +} +EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); + +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) +{ + struct iphdr *niph = ip_hdr(nskb); + struct tcphdr *tcph; skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); @@ -69,9 +78,9 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) tcph->dest = oth->source; tcph->doff = sizeof(struct tcphdr) / 4; - if (oth->ack) + if (oth->ack) { tcph->seq = oth->ack_seq; - else { + } else { tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - ip_hdrlen(oldskb) - (oth->doff << 2)); @@ -84,16 +93,43 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); + +/* Send RST reply */ +void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); - nskb->protocol = htons(ETH_P_IP); + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; -- cgit v1.2.3 From 8bfcdf6671b1c8006c52c3eaf9fd1b5dfcf41c3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Oct 2014 12:35:54 +0100 Subject: netfilter: nf_reject_ipv6: split nf_send_reset6() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip6_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_ip6hdr_put(): to build the IPv6 header. * nf_reject_ip6_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_reject.h | 10 ++ net/ipv6/netfilter/nf_reject_ipv6.c | 175 ++++++++++++++++++++------------- 2 files changed, 119 insertions(+), 66 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 48e18810a9be..23216d48abf9 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -15,4 +15,14 @@ nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit); +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen); + #endif /* _IPV6_NF_REJECT_H */ diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 20d9defc6c59..015eb8a80766 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -12,116 +12,102 @@ #include #include #include +#include -void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook) { - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; u8 proto; __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } + int tcphoff; proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), + &proto, &frag_off); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { pr_debug("Cannot get TCP header.\n"); - return; + return NULL; } - otcplen = oldskb->len - tcphoff; + *otcplen = oldskb->len - tcphoff; /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; + if (proto != IPPROTO_TCP || *otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP or too short (len = %d)\n", + proto, *otcplen); + return NULL; } - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); + otcph = skb_header_pointer(oldskb, tcphoff, sizeof(struct tcphdr), + otcph); + if (otcph == NULL) + return NULL; /* No RST for RST. */ - if (otcph.rst) { + if (otcph->rst) { pr_debug("RST is set\n"); - return; + return NULL; } /* Check checksum. */ if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; + return NULL; } - skb_dst_set(nskb, dst); + return otcph; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); - skb_reserve(nskb, hh_len + dst->header_len); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit) +{ + struct ipv6hdr *ip6h; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = hoplimit; + ip6h->nexthdr = protocol; ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + nskb->protocol = htons(ETH_P_IPV6); + + return ip6h; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6hdr_put); + +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen) +{ + struct tcphdr *tcph; + int needs_ack; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; + tcph->source = oth->dest; + tcph->dest = oth->source; - if (otcph.ack) { + if (oth->ack) { needs_ack = 0; - tcph->seq = otcph.ack_seq; + tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + otcplen - (oth->doff<<2)); tcph->seq = 0; } @@ -139,6 +125,63 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) sizeof(struct tcphdr), IPPROTO_TCP, csum_partial(tcph, sizeof(struct tcphdr), 0)); +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); + +void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr _otcph; + const struct tcphdr *otcph; + unsigned int otcplen, hh_len; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; + struct dst_entry *dst = NULL; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + otcph = nf_reject_ip6_tcphdr_get(oldskb, &_otcph, &otcplen, hook); + if (!otcph) + return; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph->dest; + fl6.fl6_dport = otcph->source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + ip6_dst_hoplimit(dst)); + nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); -- cgit v1.2.3 From 5816c3dafb6c63fd5c7b9f3f707c8565811d9916 Mon Sep 17 00:00:00 2001 From: Ryo Munakata Date: Wed, 5 Nov 2014 23:45:58 +0900 Subject: net/9p: remove a comment about pref member which doesn't exist Signed-off-by: Ryo Munakata Signed-off-by: David S. Miller --- include/net/9p/transport.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index d9fa68f26c41..2a25dec30211 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -34,7 +34,6 @@ * @list: used to maintain a list of currently available transports * @name: the human-readable name of the transport * @maxsize: transport provided maximum packet size - * @pref: Preferences of this transport * @def: set if this transport should be considered the default * @create: member function to create a new connection on this transport * @close: member function to discard a connection on this transport -- cgit v1.2.3 From cfdf1e1ba5bf55e095cf4bcaa9585c4759f239e8 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 10 Nov 2014 11:45:13 -0800 Subject: udptunnel: Add SKB_GSO_UDP_TUNNEL during gro_complete. When doing GRO processing for UDP tunnels, we never add SKB_GSO_UDP_TUNNEL to gso_type - only the type of the inner protocol is added (such as SKB_GSO_TCPV4). The result is that if the packet is later resegmented we will do GSO but not treat it as a tunnel. This results in UDP fragmentation of the outer header instead of (i.e.) TCP segmentation of the inner header as was originally on the wire. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 ++ include/net/udp_tunnel.h | 9 +++++++++ net/ipv4/fou.c | 2 ++ 3 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ca309820d39e..cfb892b265e8 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -621,6 +621,8 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff) int vxlan_len = sizeof(struct vxlanhdr) + sizeof(struct ethhdr); int err = -ENOSYS; + udp_tunnel_gro_complete(skb, nhoff); + eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr)); type = eh->h_proto; diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index a47790bcaa38..2a50a70ef587 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -100,6 +100,15 @@ static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, return iptunnel_handle_offloads(skb, udp_csum, type); } +static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + nhoff - sizeof(struct udphdr)); + skb_shinfo(skb)->gso_type |= uh->check ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; +} + static inline void udp_tunnel_encap_enable(struct socket *sock) { #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 32e78924e246..606c520ffd5a 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -133,6 +133,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff) int err = -ENOSYS; const struct net_offload **offloads; + udp_tunnel_gro_complete(skb, nhoff); + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); -- cgit v1.2.3 From b326dd37b94e29bf6a15940f4fa66aa21a678ab1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 21:14:12 +0100 Subject: netfilter: nf_tables: restore synchronous object release from commit/abort The existing xtables matches and targets, when used from nft_compat, may sleep from the destroy path, ie. when removing rules. Since the objects are released via call_rcu from softirq context, this results in lockdep splats and possible lockups that may be hard to reproduce. Patrick also indicated that delayed object release via call_rcu can cause us problems in the ordering of event notifications when anonymous sets are in place. So, this patch restores the synchronous object release from the commit and abort paths. This includes a call to synchronize_rcu() to make sure that no packets are walking on the objects that are going to be released. This is slowier though, but it's simple and it resolves the aforementioned problems. This is a partial revert of c7c32e7 ("netfilter: nf_tables: defer all object release via rcu") that was introduced in 3.16 to speed up interaction with userspace. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 -- net/netfilter/nf_tables_api.c | 24 ++++++++---------------- 2 files changed, 8 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 845c596bf594..3ae969e3acf0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,14 +396,12 @@ struct nft_rule { /** * struct nft_trans - nf_tables object update in transaction * - * @rcu_head: rcu head to defer release of transaction data * @list: used internally * @msg_type: message type * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { - struct rcu_head rcu_head; struct list_head list; int msg_type; struct nft_ctx ctx; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 11ab4b078f3b..66e8425dbfe7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3484,13 +3484,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -/* Schedule objects for release via rcu to make sure no packets are accesing - * removed rules. - */ -static void nf_tables_commit_release_rcu(struct rcu_head *rt) +static void nf_tables_commit_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_DELTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3612,10 +3607,11 @@ static int nf_tables_commit(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); + nf_tables_commit_release(trans); } nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); @@ -3623,13 +3619,8 @@ static int nf_tables_commit(struct sk_buff *skb) return 0; } -/* Schedule objects for release via rcu to make sure no packets are accesing - * aborted rules. - */ -static void nf_tables_abort_release_rcu(struct rcu_head *rt) +static void nf_tables_abort_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_NEWTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3725,11 +3716,12 @@ static int nf_tables_abort(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); + nf_tables_abort_release(trans); } return 0; -- cgit v1.2.3 From 23e62de33d179e229e4c1dfd93f90a3c7355c519 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:12 -0800 Subject: net: Add vxlan_gso_check() helper Most NICs that report NETIF_F_GSO_UDP_TUNNEL support VXLAN, and not other UDP-based encapsulation protocols where the format and size of the header differs. This patch implements a generic ndo_gso_check() for VXLAN which will only advertise GSO support when the skb looks like it contains VXLAN (or no UDP tunnelling at all). Implementation shamelessly stolen from Tom Herbert: http://thread.gmane.org/gmane.linux.network/332428/focus=333111 Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 13 +++++++++++++ include/net/vxlan.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include/net') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index fa9dc45b75a6..6b658638b456 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1571,6 +1571,19 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } +bool vxlan_gso_check(struct sk_buff *skb) +{ + if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) && + (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(vxlan_gso_check); + #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index d5f59f3fc35d..afadf8e53f20 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -45,6 +45,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); +bool vxlan_gso_check(struct sk_buff *skb); + /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) /* IPv6 header + UDP + VXLAN + Ethernet header */ -- cgit v1.2.3 From 11bf7828a59880427403e13dcff8228d67e9e0f7 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 17 Nov 2014 16:24:54 -0800 Subject: vxlan: Inline vxlan_gso_check(). Suggested-by: Or Gerlitz Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 19 ------------------- include/net/vxlan.h | 18 +++++++++++++++++- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6b658638b456..e1e335c339e3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -67,12 +67,6 @@ #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ -/* VXLAN protocol header */ -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -}; - /* UDP port for VXLAN traffic. * The IANA assigned port is 4789, but the Linux default is 8472 * for compatibility with early adopters. @@ -1571,19 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -bool vxlan_gso_check(struct sk_buff *skb) -{ - if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) && - (skb->inner_protocol_type != ENCAP_TYPE_ETHER || - skb->inner_protocol != htons(ETH_P_TEB) || - (skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return false; - - return true; -} -EXPORT_SYMBOL_GPL(vxlan_gso_check); - #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index afadf8e53f20..57cccd0052e5 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -8,6 +8,12 @@ #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<gso_type & SKB_GSO_UDP_TUNNEL) && + (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return false; + + return true; +} /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) -- cgit v1.2.3