summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-10-04 03:53:36 +0400
committerDavid S. Miller <davem@davemloft.net>2014-10-04 03:53:36 +0400
commit6106253e69413785b29b9bfb493e2544c70461e7 (patch)
tree3433fc7ea36411d4511c776bb4bcafe2b1161e7f
parent01291202ed4ad548f9a7147d20425cb1d24f49a7 (diff)
parentbc1fc390e1728672b5b343b85185fcc1fe41043b (diff)
downloadlinux-6106253e69413785b29b9bfb493e2544c70461e7.tar.xz
Merge branch 'gudp'
Tom Herbert says: ==================== net: Generic UDP Encapsulation Generic UDP Encapsulation (GUE) is UDP encapsulation protocol which encapsulates packets of various IP protocols. The GUE protocol is described in http://tools.ietf.org/html/draft-herbert-gue-01. The receive path of GUE is implemented in the FOU over UDP module (FOU). This includes a UDP encap receive function for GUE as well as GUE specific GRO functions. Management and configuration of GUE ports shares most of the same code with FOU. For the transmit path, the previous FOU support for IPIP, sit, and GRE was simply extended for GUE (when GUE is enabled insert the GUE header on transmit in addition to UDP header inserted for FOU). Semantically GUE is the same as FOU in that the encapsulation (UDP and GUE headers) that are inserted on transmission and removed on reception so that IP packet is processed with the inner header. This patch set includes: - Some fixes to FOU, removal of IPv4,v6 specific GRO functions - Support to configure a GUE receive port - Implementation of GUE receive path (normal and GRO) - Additions to ip_tunnel netlink to configure GUE - GUE header inserion in ip_tunnel transmit path v2: - Include net/gue.h in patch set Testing: I ran performance numbers using netperf TCP_RR with 200 streams, comparing encapsulation without GUE, encapsulation with GUE, and encapsulation with FOU. GRE TCP_STREAM IPv4, FOU, UDP checksum enabled 14.04% TX CPU utilization 13.17% RX CPU utilization 9211 Mbps IPv4, GUE, UDP checksum enabled 14.99% TX CPU utilization 13.79% RX CPU utilization 9185 Mbps IPv4, FOU, UDP checksum disabled 13.14% TX CPU utilization 23.18% RX CPU utilization 9277 Mbps IPv4, GUE, UDP checksum disabled 13.66% TX CPU utilization 23.57% RX CPU utilization 9184 Mbps TCP_RR IPv4, FOU, UDP checksum enabled 94.2% CPU utilization 155/249/460 90/95/99% latencies 1.17018e+06 tps IPv4, GUE, UDP checksum enabled 93.9% CPU utilization 158/253/472 90/95/99% latencies 1.15045e+06 tps IPIP TCP_STREAM FOU, UDP checksum enabled 15.28% TX CPU utilization 13.92% RX CPU utilization 9342 Mbps GUE, UDP checksum enabled 13.99% TX CPU utilization 13.34% RX CPU utilization 9210 Mbps FOU, UDP checksum disabled 15.08% TX CPU utilization 24.64% RX CPU utilization 9226 Mbps GUE, UDP checksum disabled 15.90% TX CPU utilization 24.77% RX CPU utilization 9197 Mbps TCP_RR FOU, UDP checksum enabled 94.23% CPU utilization 149/237/429 90/95/99% latencies 1.19553e+06 tps GUE, UDP checksum enabled 93.75% CPU utilization 152/243/442 90/95/99% latencies 1.17027e+06 tps SIT TCP_STREAM FOU, UDP checksum enabled 14.47% TX CPU utilization 14.58% RX CPU utilization 9106 Mbps GUE, UDP checksum enabled 15.09% TX CPU utilization 14.84% RX CPU utilization 9080 Mbps FOU, UDP checksum disabled 15.70% TX CPU utilization 27.93% RX CPU utilization 9097 Mbps GUE, UDP checksum disabled 15.04% TX CPU utilization 27.54% RX CPU utilization 9073 Mbps TCP_RR FOU, UDP checksum enabled 96.9% CPU utilization 170/281/581 90/95/99% latencies 1.03372e+06 tps GUE, UDP checksum enabled 97.16% CPU utilization 172/286/576 90/95/99% latencies 1.00469e+06 tps ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/net/gue.h23
-rw-r--r--include/uapi/linux/fou.h7
-rw-r--r--include/uapi/linux/if_tunnel.h1
-rw-r--r--net/ipv4/fou.c224
-rw-r--r--net/ipv4/ip_tunnel.c15
-rw-r--r--net/ipv4/udp_offload.c1
-rw-r--r--net/ipv6/udp_offload.c1
8 files changed, 235 insertions, 40 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 910fb17ad148..22d54b9b700d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1886,6 +1886,9 @@ struct napi_gro_cb {
/* Number of checksums via CHECKSUM_UNNECESSARY */
u8 csum_cnt:3;
+ /* Used in foo-over-udp, set in udp[46]_gro_receive */
+ u8 is_ipv6:1;
+
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
diff --git a/include/net/gue.h b/include/net/gue.h
new file mode 100644
index 000000000000..b6c332788084
--- /dev/null
+++ b/include/net/gue.h
@@ -0,0 +1,23 @@
+#ifndef __NET_GUE_H
+#define __NET_GUE_H
+
+struct guehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hlen:4,
+ version:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 version:4,
+ hlen:4;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 next_hdr;
+ __u16 flags;
+ };
+ __u32 word;
+ };
+};
+
+#endif
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index e03376de453d..8df06894da23 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -13,6 +13,7 @@ enum {
FOU_ATTR_PORT, /* u16 */
FOU_ATTR_AF, /* u8 */
FOU_ATTR_IPPROTO, /* u8 */
+ FOU_ATTR_TYPE, /* u8 */
__FOU_ATTR_MAX,
};
@@ -27,6 +28,12 @@ enum {
__FOU_CMD_MAX,
};
+enum {
+ FOU_ENCAP_UNSPEC,
+ FOU_ENCAP_DIRECT,
+ FOU_ENCAP_GUE,
+};
+
#define FOU_CMD_MAX (__FOU_CMD_MAX - 1)
#endif /* _UAPI_LINUX_FOU_H */
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 7c832afdfa94..280d9e092283 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -64,6 +64,7 @@ enum {
enum tunnel_encap_types {
TUNNEL_ENCAP_NONE,
TUNNEL_ENCAP_FOU,
+ TUNNEL_ENCAP_GUE,
};
#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dced89fbe480..efa70ad44906 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <net/genetlink.h>
+#include <net/gue.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/udp.h>
@@ -27,6 +28,7 @@ struct fou {
};
struct fou_cfg {
+ u16 type;
u8 protocol;
struct udp_port_cfg udp_config;
};
@@ -64,15 +66,51 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
sizeof(struct udphdr));
}
+static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct fou *fou = fou_from_sock(sk);
+ size_t len;
+ struct guehdr *guehdr;
+ struct udphdr *uh;
+
+ if (!fou)
+ return 1;
+
+ len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (!pskb_may_pull(skb, len))
+ goto drop;
+
+ uh = udp_hdr(skb);
+ guehdr = (struct guehdr *)&uh[1];
+
+ len += guehdr->hlen << 2;
+ if (!pskb_may_pull(skb, len))
+ goto drop;
+
+ if (guehdr->version != 0)
+ goto drop;
+
+ if (guehdr->flags) {
+ /* No support yet */
+ goto drop;
+ }
+
+ return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
static struct sk_buff **fou_gro_receive(struct sk_buff **head,
- struct sk_buff *skb,
- const struct net_offload **offloads)
+ struct sk_buff *skb)
{
const struct net_offload *ops;
struct sk_buff **pp = NULL;
u8 proto = NAPI_GRO_CB(skb)->proto;
+ const struct net_offload **offloads;
rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
if (!ops || !ops->callbacks.gro_receive)
goto out_unlock;
@@ -85,14 +123,15 @@ out_unlock:
return pp;
}
-static int fou_gro_complete(struct sk_buff *skb, int nhoff,
- const struct net_offload **offloads)
+static int fou_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
u8 proto = NAPI_GRO_CB(skb)->proto;
int err = -ENOSYS;
+ const struct net_offload **offloads;
rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
@@ -105,26 +144,110 @@ out_unlock:
return err;
}
-static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff **gue_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
{
- return fou_gro_receive(head, skb, inet_offloads);
-}
+ const struct net_offload **offloads;
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ u8 proto;
+ struct guehdr *guehdr;
+ unsigned int hlen, guehlen;
+ unsigned int off;
+ int flush = 1;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*guehdr);
+ guehdr = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!guehdr))
+ goto out;
+ }
-static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
-{
- return fou_gro_complete(skb, nhoff, inet_offloads);
-}
+ proto = guehdr->next_hdr;
-static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- return fou_gro_receive(head, skb, inet6_offloads);
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ goto out_unlock;
+
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+ hlen = off + guehlen;
+ if (skb_gro_header_hard(skb, hlen)) {
+ guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!guehdr))
+ goto out_unlock;
+ }
+
+ flush = 0;
+
+ for (p = *head; p; p = p->next) {
+ const struct guehdr *guehdr2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ guehdr2 = (struct guehdr *)(p->data + off);
+
+ /* Compare base GUE header to be equal (covers
+ * hlen, version, next_hdr, and flags.
+ */
+ if (guehdr->word != guehdr2->word) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ /* Compare optional fields are the same. */
+ if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
+ guehdr->hlen << 2)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+ }
+
+ skb_gro_pull(skb, guehlen);
+
+ /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
+ skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
}
-static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
+static int gue_gro_complete(struct sk_buff *skb, int nhoff)
{
- return fou_gro_complete(skb, nhoff, inet6_offloads);
+ const struct net_offload **offloads;
+ struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+ const struct net_offload *ops;
+ unsigned int guehlen;
+ u8 proto;
+ int err = -ENOENT;
+
+ proto = guehdr->next_hdr;
+
+ guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
+
+out_unlock:
+ rcu_read_unlock();
+ return err;
}
static int fou_add_to_port_list(struct fou *fou)
@@ -162,6 +285,28 @@ static void fou_release(struct fou *fou)
kfree(fou);
}
+static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+ udp_sk(sk)->encap_rcv = fou_udp_recv;
+ fou->protocol = cfg->protocol;
+ fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+ fou->udp_offloads.ipproto = cfg->protocol;
+
+ return 0;
+}
+
+static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+ udp_sk(sk)->encap_rcv = gue_udp_recv;
+ fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+
+ return 0;
+}
+
static int fou_create(struct net *net, struct fou_cfg *cfg,
struct socket **sockp)
{
@@ -184,10 +329,24 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
sk = sock->sk;
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- fou->protocol = cfg->protocol;
- fou->port = cfg->udp_config.local_udp_port;
- udp_sk(sk)->encap_rcv = fou_udp_recv;
+ fou->port = cfg->udp_config.local_udp_port;
+
+ /* Initial for fou type */
+ switch (cfg->type) {
+ case FOU_ENCAP_DIRECT:
+ err = fou_encap_init(sk, fou, cfg);
+ if (err)
+ goto error;
+ break;
+ case FOU_ENCAP_GUE:
+ err = gue_encap_init(sk, fou, cfg);
+ if (err)
+ goto error;
+ break;
+ default:
+ err = -EINVAL;
+ goto error;
+ }
udp_sk(sk)->encap_type = 1;
udp_encap_enable();
@@ -199,23 +358,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
sk->sk_allocation = GFP_ATOMIC;
- switch (cfg->udp_config.family) {
- case AF_INET:
- fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
- fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
- break;
- case AF_INET6:
- fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
- fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
- break;
- default:
- err = -EPFNOSUPPORT;
- goto error;
- }
-
- fou->udp_offloads.port = cfg->udp_config.local_udp_port;
- fou->udp_offloads.ipproto = cfg->protocol;
-
if (cfg->udp_config.family == AF_INET) {
err = udp_add_offload(&fou->udp_offloads);
if (err)
@@ -272,6 +414,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
[FOU_ATTR_PORT] = { .type = NLA_U16, },
[FOU_ATTR_AF] = { .type = NLA_U8, },
[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+ [FOU_ATTR_TYPE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
@@ -299,6 +442,9 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[FOU_ATTR_IPPROTO])
cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
+ if (info->attrs[FOU_ATTR_TYPE])
+ cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
+
return 0;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2272de90c2d4..0bb8e141eacc 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,6 +56,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
+#include <net/gue.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -495,6 +496,8 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
return 0;
case TUNNEL_ENCAP_FOU:
return sizeof(struct udphdr);
+ case TUNNEL_ENCAP_GUE:
+ return sizeof(struct udphdr) + sizeof(struct guehdr);
default:
return -EINVAL;
}
@@ -546,6 +549,15 @@ static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
skb_reset_transport_header(skb);
uh = udp_hdr(skb);
+ if (e->type == TUNNEL_ENCAP_GUE) {
+ struct guehdr *guehdr = (struct guehdr *)&uh[1];
+
+ guehdr->version = 0;
+ guehdr->hlen = 0;
+ guehdr->flags = 0;
+ guehdr->next_hdr = *protocol;
+ }
+
uh->dest = e->dport;
uh->source = sport;
uh->len = htons(skb->len);
@@ -565,6 +577,7 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
case TUNNEL_ENCAP_NONE:
return 0;
case TUNNEL_ENCAP_FOU:
+ case TUNNEL_ENCAP_GUE:
return fou_build_header(skb, &t->encap, t->encap_hlen,
protocol, fl4);
default:
@@ -759,7 +772,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
df |= (inner_iph->frag_off&htons(IP_DF));
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
- + rt->dst.header_len;
+ + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8c35f2c939ee..507310ef4b56 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -334,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo);
skip:
+ NAPI_GRO_CB(skb)->is_ipv6 = 0;
return udp_gro_receive(head, skb, uh);
flush:
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 8f96988c1db2..6b8f543f6ac6 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -140,6 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
ip6_gro_compute_pseudo);
skip:
+ NAPI_GRO_CB(skb)->is_ipv6 = 1;
return udp_gro_receive(head, skb, uh);
flush: