summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-08-11 00:03:55 +0300
committerDavid S. Miller <davem@davemloft.net>2015-08-11 00:03:55 +0300
commitc73a91b8f944992ec6675fcd34247b9cdf8482ea (patch)
tree2b687217f51c006eea4bc1fa10891f70677ab3c0
parentfb811395cd5a71b9e94a068f524a6f4a21b67bdb (diff)
parent9f57c67c379d88a10e8ad676426fee5ae7341b14 (diff)
downloadlinux-c73a91b8f944992ec6675fcd34247b9cdf8482ea.tar.xz
Merge branch 'ovs-gre'
Pravin B Shelar says: ==================== GRE: Use flow based tunneling for OVS GRE vport. Following patches make use of new Using GRE tunnel meta data collection feature. This allows us to directly use netdev based GRE tunnel implementation. While doing so I have removed GRE demux API which were targeted for OVS. Most of GRE protocol code is now consolidated in ip_gre module. v5-v4: Fixed Kconfig dependency for vport-gre module. v3-v4: Added interface to ip-gre device to enable meta data collection. While doing this I split second patch into two patches. v2-v3: Add API to create GRE flow based device. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/gre.h92
-rw-r--r--include/net/ip_tunnels.h7
-rw-r--r--include/uapi/linux/if_tunnel.h1
-rw-r--r--net/ipv4/gre_demux.c235
-rw-r--r--net/ipv4/ip_gre.c446
-rw-r--r--net/ipv4/ip_tunnel.c37
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/openvswitch/Kconfig2
-rw-r--r--net/openvswitch/vport-gre.c237
-rw-r--r--net/openvswitch/vport-netdev.c21
-rw-r--r--net/openvswitch/vport-netdev.h2
-rw-r--r--net/openvswitch/vport-vxlan.c17
13 files changed, 503 insertions, 598 deletions
diff --git a/include/net/gre.h b/include/net/gre.h
index b53182018743..97eafdc47eea 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -4,6 +4,12 @@
#include <linux/skbuff.h>
#include <net/ip_tunnels.h>
+struct gre_base_hdr {
+ __be16 flags;
+ __be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
+
#define GREPROTO_CISCO 0
#define GREPROTO_PPTP 1
#define GREPROTO_MAX 2
@@ -14,91 +20,9 @@ struct gre_protocol {
void (*err_handler)(struct sk_buff *skb, u32 info);
};
-struct gre_base_hdr {
- __be16 flags;
- __be16 protocol;
-};
-#define GRE_HEADER_SECTION 4
-
int gre_add_protocol(const struct gre_protocol *proto, u8 version);
int gre_del_protocol(const struct gre_protocol *proto, u8 version);
-struct gre_cisco_protocol {
- int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
- int (*err_handler)(struct sk_buff *skb, u32 info,
- const struct tnl_ptk_info *tpi);
- u8 priority;
-};
-
-int gre_cisco_register(struct gre_cisco_protocol *proto);
-int gre_cisco_unregister(struct gre_cisco_protocol *proto);
-
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len);
-
-static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
- bool csum)
-{
- return iptunnel_handle_offloads(skb, csum,
- csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
-}
-
-
-static inline int ip_gre_calc_hlen(__be16 o_flags)
-{
- int addend = 4;
-
- if (o_flags&TUNNEL_CSUM)
- addend += 4;
- if (o_flags&TUNNEL_KEY)
- addend += 4;
- if (o_flags&TUNNEL_SEQ)
- addend += 4;
- return addend;
-}
-
-static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
-{
- __be16 tflags = 0;
-
- if (flags & GRE_CSUM)
- tflags |= TUNNEL_CSUM;
- if (flags & GRE_ROUTING)
- tflags |= TUNNEL_ROUTING;
- if (flags & GRE_KEY)
- tflags |= TUNNEL_KEY;
- if (flags & GRE_SEQ)
- tflags |= TUNNEL_SEQ;
- if (flags & GRE_STRICT)
- tflags |= TUNNEL_STRICT;
- if (flags & GRE_REC)
- tflags |= TUNNEL_REC;
- if (flags & GRE_VERSION)
- tflags |= TUNNEL_VERSION;
-
- return tflags;
-}
-
-static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
-{
- __be16 flags = 0;
-
- if (tflags & TUNNEL_CSUM)
- flags |= GRE_CSUM;
- if (tflags & TUNNEL_ROUTING)
- flags |= GRE_ROUTING;
- if (tflags & TUNNEL_KEY)
- flags |= GRE_KEY;
- if (tflags & TUNNEL_SEQ)
- flags |= GRE_SEQ;
- if (tflags & TUNNEL_STRICT)
- flags |= GRE_STRICT;
- if (tflags & TUNNEL_REC)
- flags |= GRE_REC;
- if (tflags & TUNNEL_VERSION)
- flags |= GRE_VERSION;
-
- return flags;
-}
-
+struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
+ u8 name_assign_type);
#endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 47984415f5d1..984dbfa15e13 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -82,6 +82,8 @@ struct ip_tunnel_dst {
__be32 saddr;
};
+struct metadata_dst;
+
struct ip_tunnel {
struct ip_tunnel __rcu *next;
struct hlist_node hash_node;
@@ -115,6 +117,7 @@ struct ip_tunnel {
unsigned int prl_count; /* # of entries in PRL */
int ip_tnl_net_id;
struct gro_cells gro_cells;
+ bool collect_md;
};
#define TUNNEL_CSUM __cpu_to_be16(0x01)
@@ -149,6 +152,7 @@ struct tnl_ptk_info {
struct ip_tunnel_net {
struct net_device *fb_tunnel_dev;
struct hlist_head tunnels[IP_TNL_HASH_SIZE];
+ struct ip_tunnel __rcu *collect_md_tun;
};
struct ip_tunnel_encap_ops {
@@ -235,7 +239,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 key);
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi, bool log_ecn_error);
+ const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
+ bool log_ecn_error);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index bd3cc11a431f..af4de90ba27d 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -112,6 +112,7 @@ enum {
IFLA_GRE_ENCAP_FLAGS,
IFLA_GRE_ENCAP_SPORT,
IFLA_GRE_ENCAP_DPORT,
+ IFLA_GRE_COLLECT_METADATA,
__IFLA_GRE_MAX,
};
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4a7b5b2a1ce3..d9c552a721fc 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -31,7 +31,6 @@
#include <net/xfrm.h>
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
{
@@ -61,197 +60,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len)
-{
- struct gre_base_hdr *greh;
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- greh = (struct gre_base_hdr *)skb->data;
- greh->flags = tnl_flags_to_gre_flags(tpi->flags);
- greh->protocol = tpi->proto;
-
- if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- if (tpi->flags&TUNNEL_SEQ) {
- *ptr = tpi->seq;
- ptr--;
- }
- if (tpi->flags&TUNNEL_KEY) {
- *ptr = tpi->key;
- ptr--;
- }
- if (tpi->flags&TUNNEL_CSUM &&
- !(skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
- *ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- skb->len, 0));
- }
- }
-}
-EXPORT_SYMBOL_GPL(gre_build_header);
-
-static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- bool *csum_err)
-{
- const struct gre_base_hdr *greh;
- __be32 *options;
- int hdr_len;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
- return -EINVAL;
-
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
- hdr_len = ip_gre_calc_hlen(tpi->flags);
-
- if (!pskb_may_pull(skb, hdr_len))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- tpi->proto = greh->protocol;
-
- options = (__be32 *)(greh + 1);
- if (greh->flags & GRE_CSUM) {
- if (skb_checksum_simple_validate(skb)) {
- *csum_err = true;
- return -EINVAL;
- }
-
- skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
- null_compute_pseudo);
-
- options++;
- }
-
- if (greh->flags & GRE_KEY) {
- tpi->key = *options;
- options++;
- } else
- tpi->key = 0;
-
- if (unlikely(greh->flags & GRE_SEQ)) {
- tpi->seq = *options;
- options++;
- } else
- tpi->seq = 0;
-
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IP
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
- tpi->proto = htons(ETH_P_IP);
- if ((*(u8 *)options & 0xF0) != 0x40) {
- hdr_len += 4;
- if (!pskb_may_pull(skb, hdr_len))
- return -EINVAL;
- }
- }
-
- return iptunnel_pull_header(skb, hdr_len, tpi->proto);
-}
-
-static int gre_cisco_rcv(struct sk_buff *skb)
-{
- struct tnl_ptk_info tpi;
- int i;
- bool csum_err = false;
-
-#ifdef CONFIG_NET_IPGRE_BROADCAST
- if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
- /* Looped back packet, drop it! */
- if (rt_is_output_route(skb_rtable(skb)))
- goto drop;
- }
-#endif
-
- if (parse_gre_header(skb, &tpi, &csum_err) < 0)
- goto drop;
-
- rcu_read_lock();
- for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
- struct gre_cisco_protocol *proto;
- int ret;
-
- proto = rcu_dereference(gre_cisco_proto_list[i]);
- if (!proto)
- continue;
- ret = proto->handler(skb, &tpi);
- if (ret == PACKET_RCVD) {
- rcu_read_unlock();
- return 0;
- }
- }
- rcu_read_unlock();
-
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-drop:
- kfree_skb(skb);
- return 0;
-}
-
-static void gre_cisco_err(struct sk_buff *skb, u32 info)
-{
- /* All the routers (except for Linux) return only
- * 8 bytes of packet payload. It means, that precise relaying of
- * ICMP in the real Internet is absolutely infeasible.
- *
- * Moreover, Cisco "wise men" put GRE key to the third word
- * in GRE header. It makes impossible maintaining even soft
- * state for keyed
- * GRE tunnels with enabled checksum. Tell them "thank you".
- *
- * Well, I wonder, rfc1812 was written by Cisco employee,
- * what the hell these idiots break standards established
- * by themselves???
- */
-
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- struct tnl_ptk_info tpi;
- bool csum_err = false;
- int i;
-
- if (parse_gre_header(skb, &tpi, &csum_err)) {
- if (!csum_err) /* ignore csum errors. */
- return;
- }
-
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- skb->dev->ifindex, 0, IPPROTO_GRE, 0);
- return;
- }
- if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
- IPPROTO_GRE, 0);
- return;
- }
-
- rcu_read_lock();
- for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
- struct gre_cisco_protocol *proto;
-
- proto = rcu_dereference(gre_cisco_proto_list[i]);
- if (!proto)
- continue;
-
- if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD)
- goto out;
-
- }
-out:
- rcu_read_unlock();
-}
-
static int gre_rcv(struct sk_buff *skb)
{
const struct gre_protocol *proto;
@@ -302,60 +110,19 @@ static const struct net_protocol net_gre_protocol = {
.netns_ok = 1,
};
-static const struct gre_protocol ipgre_protocol = {
- .handler = gre_cisco_rcv,
- .err_handler = gre_cisco_err,
-};
-
-int gre_cisco_register(struct gre_cisco_protocol *newp)
-{
- struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
- &gre_cisco_proto_list[newp->priority];
-
- return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY;
-}
-EXPORT_SYMBOL_GPL(gre_cisco_register);
-
-int gre_cisco_unregister(struct gre_cisco_protocol *del_proto)
-{
- struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
- &gre_cisco_proto_list[del_proto->priority];
- int ret;
-
- ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL;
-
- if (ret)
- return ret;
-
- synchronize_net();
- return 0;
-}
-EXPORT_SYMBOL_GPL(gre_cisco_unregister);
-
static int __init gre_init(void)
{
pr_info("GRE over IPv4 demultiplexor driver\n");
if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
pr_err("can't add protocol\n");
- goto err;
- }
-
- if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
- pr_info("%s: can't add ipgre handler\n", __func__);
- goto err_gre;
+ return -EAGAIN;
}
-
return 0;
-err_gre:
- inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
-err:
- return -EAGAIN;
}
static void __exit gre_exit(void)
{
- gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5fd706473c73..fb44d693796e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -25,6 +25,7 @@
#include <linux/udp.h>
#include <linux/if_arp.h>
#include <linux/mroute.h>
+#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
@@ -47,6 +48,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/gre.h>
+#include <net/dst_metadata.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -121,8 +123,127 @@ static int ipgre_tunnel_init(struct net_device *dev);
static int ipgre_net_id __read_mostly;
static int gre_tap_net_id __read_mostly;
-static int ipgre_err(struct sk_buff *skb, u32 info,
- const struct tnl_ptk_info *tpi)
+static int ip_gre_calc_hlen(__be16 o_flags)
+{
+ int addend = 4;
+
+ if (o_flags & TUNNEL_CSUM)
+ addend += 4;
+ if (o_flags & TUNNEL_KEY)
+ addend += 4;
+ if (o_flags & TUNNEL_SEQ)
+ addend += 4;
+ return addend;
+}
+
+static __be16 gre_flags_to_tnl_flags(__be16 flags)
+{
+ __be16 tflags = 0;
+
+ if (flags & GRE_CSUM)
+ tflags |= TUNNEL_CSUM;
+ if (flags & GRE_ROUTING)
+ tflags |= TUNNEL_ROUTING;
+ if (flags & GRE_KEY)
+ tflags |= TUNNEL_KEY;
+ if (flags & GRE_SEQ)
+ tflags |= TUNNEL_SEQ;
+ if (flags & GRE_STRICT)
+ tflags |= TUNNEL_STRICT;
+ if (flags & GRE_REC)
+ tflags |= TUNNEL_REC;
+ if (flags & GRE_VERSION)
+ tflags |= TUNNEL_VERSION;
+
+ return tflags;
+}
+
+static __be16 tnl_flags_to_gre_flags(__be16 tflags)
+{
+ __be16 flags = 0;
+
+ if (tflags & TUNNEL_CSUM)
+ flags |= GRE_CSUM;
+ if (tflags & TUNNEL_ROUTING)
+ flags |= GRE_ROUTING;
+ if (tflags & TUNNEL_KEY)
+ flags |= GRE_KEY;
+ if (tflags & TUNNEL_SEQ)
+ flags |= GRE_SEQ;
+ if (tflags & TUNNEL_STRICT)
+ flags |= GRE_STRICT;
+ if (tflags & TUNNEL_REC)
+ flags |= GRE_REC;
+ if (tflags & TUNNEL_VERSION)
+ flags |= GRE_VERSION;
+
+ return flags;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err)
+{
+ const struct gre_base_hdr *greh;
+ __be32 *options;
+ int hdr_len;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
+
+ tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ hdr_len = ip_gre_calc_hlen(tpi->flags);
+
+ if (!pskb_may_pull(skb, hdr_len))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ tpi->proto = greh->protocol;
+
+ options = (__be32 *)(greh + 1);
+ if (greh->flags & GRE_CSUM) {
+ if (skb_checksum_simple_validate(skb)) {
+ *csum_err = true;
+ return -EINVAL;
+ }
+
+ skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ null_compute_pseudo);
+ options++;
+ }
+
+ if (greh->flags & GRE_KEY) {
+ tpi->key = *options;
+ options++;
+ } else {
+ tpi->key = 0;
+ }
+ if (unlikely(greh->flags & GRE_SEQ)) {
+ tpi->seq = *options;
+ options++;
+ } else {
+ tpi->seq = 0;
+ }
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ tpi->proto = htons(ETH_P_IP);
+ if ((*(u8 *)options & 0xF0) != 0x40) {
+ hdr_len += 4;
+ if (!pskb_may_pull(skb, hdr_len))
+ return -EINVAL;
+ }
+ }
+ return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+}
+
+static void ipgre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
{
/* All the routers (except for Linux) return only
@@ -148,14 +269,14 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
switch (type) {
default:
case ICMP_PARAMETERPROB:
- return PACKET_RCVD;
+ return;
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
case ICMP_PORT_UNREACH:
/* Impossible event. */
- return PACKET_RCVD;
+ return;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -164,9 +285,10 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
break;
}
break;
+
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
- return PACKET_RCVD;
+ return;
break;
case ICMP_REDIRECT:
@@ -183,26 +305,85 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
iph->daddr, iph->saddr, tpi->key);
if (!t)
- return PACKET_REJECT;
+ return;
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
- return PACKET_RCVD;
+ return;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- return PACKET_RCVD;
+ return;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
- return PACKET_RCVD;
+}
+
+static void gre_err(struct sk_buff *skb, u32 info)
+{
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ *
+ * Moreover, Cisco "wise men" put GRE key to the third word
+ * in GRE header. It makes impossible maintaining even soft
+ * state for keyed
+ * GRE tunnels with enabled checksum. Tell them "thank you".
+ *
+ * Well, I wonder, rfc1812 was written by Cisco employee,
+ * what the hell these idiots break standards established
+ * by themselves???
+ */
+
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+
+ if (parse_gre_header(skb, &tpi, &csum_err)) {
+ if (!csum_err) /* ignore csum errors. */
+ return;
+ }
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+ skb->dev->ifindex, 0, IPPROTO_GRE, 0);
+ return;
+ }
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
+ IPPROTO_GRE, 0);
+ return;
+ }
+
+ ipgre_err(skb, info, &tpi);
+}
+
+static __be64 key_to_tunnel_id(__be32 key)
+{
+#ifdef __BIG_ENDIAN
+ return (__force __be64)((__force u32)key);
+#else
+ return (__force __be64)((__force u64)key << 32);
+#endif
+}
+
+/* Returns the least-significant 32 bits of a __be64. */
+static __be32 tunnel_id_to_key(__be64 x)
+{
+#ifdef __BIG_ENDIAN
+ return (__force __be32)x;
+#else
+ return (__force __be32)((__force u64)x >> 32);
+#endif
}
static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
{
struct net *net = dev_net(skb->dev);
+ struct metadata_dst *tun_dst = NULL;
struct ip_tunnel_net *itn;
const struct iphdr *iph;
struct ip_tunnel *tunnel;
@@ -218,40 +399,194 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
if (tunnel) {
skb_pop_mac_header(skb);
- ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
+ if (tunnel->collect_md) {
+ struct ip_tunnel_info *info;
+
+ tun_dst = metadata_dst_alloc(0, GFP_ATOMIC);
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ info = &tun_dst->u.tun_info;
+ info->key.ipv4_src = iph->saddr;
+ info->key.ipv4_dst = iph->daddr;
+ info->key.ipv4_tos = iph->tos;
+ info->key.ipv4_ttl = iph->ttl;
+
+ info->mode = IP_TUNNEL_INFO_RX;
+ info->key.tun_flags = tpi->flags &
+ (TUNNEL_CSUM | TUNNEL_KEY);
+ info->key.tun_id = key_to_tunnel_id(tpi->key);
+
+ info->key.tp_src = 0;
+ info->key.tp_dst = 0;
+ }
+
+ ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
}
return PACKET_REJECT;
}
+static int gre_rcv(struct sk_buff *skb)
+{
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
+ /* Looped back packet, drop it! */
+ if (rt_is_output_route(skb_rtable(skb)))
+ goto drop;
+ }
+#endif
+
+ if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+ goto drop;
+
+ if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
+ return 0;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
+ __be16 proto, __be32 key, __be32 seq)
+{
+ struct gre_base_hdr *greh;
+
+ skb_push(skb, hdr_len);
+
+ skb_reset_transport_header(skb);
+ greh = (struct gre_base_hdr *)skb->data;
+ greh->flags = tnl_flags_to_gre_flags(flags);
+ greh->protocol = proto;
+
+ if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+ if (flags & TUNNEL_SEQ) {
+ *ptr = seq;
+ ptr--;
+ }
+ if (flags & TUNNEL_KEY) {
+ *ptr = key;
+ ptr--;
+ }
+ if (flags & TUNNEL_CSUM &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
+ *ptr = 0;
+ *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+ skb->len, 0));
+ }
+ }
+}
+
static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct tnl_ptk_info tpi;
- tpi.flags = tunnel->parms.o_flags;
- tpi.proto = proto;
- tpi.key = tunnel->parms.o_key;
if (tunnel->parms.o_flags & TUNNEL_SEQ)
tunnel->o_seqno++;
- tpi.seq = htonl(tunnel->o_seqno);
/* Push GRE header. */
- gre_build_header(skb, &tpi, tunnel->tun_hlen);
-
- skb_set_inner_protocol(skb, tpi.proto);
+ build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ skb_set_inner_protocol(skb, proto);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
+static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
+ bool csum)
+{
+ return iptunnel_handle_offloads(skb, csum,
+ csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel_info *tun_info;
+ struct net *net = dev_net(dev);
+ const struct ip_tunnel_key *key;
+ struct flowi4 fl;
+ struct rtable *rt;
+ int min_headroom;
+ int tunnel_hlen;
+ __be16 df, flags;
+ int err;
+
+ tun_info = skb_tunnel_info(skb, AF_INET);
+ if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+ memset(&fl, 0, sizeof(fl));
+ fl.daddr = key->ipv4_dst;
+ fl.saddr = key->ipv4_src;
+ fl.flowi4_tos = RT_TOS(key->ipv4_tos);
+ fl.flowi4_mark = skb->mark;
+ fl.flowi4_proto = IPPROTO_GRE;
+
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt))
+ goto err_free_skb;
+
+ tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
+
+ min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ + tunnel_hlen + sizeof(struct iphdr);
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
+
+ /* Push Tunnel header. */
+ skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
+ if (IS_ERR(skb)) {
+ skb = NULL;
+ goto err_free_rt;
+ }
+
+ flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
+ tunnel_id_to_key(tun_info->key.tun_id), 0);
+
+ df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+ err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
+ key->ipv4_dst, IPPROTO_GRE,
+ key->ipv4_tos, key->ipv4_ttl, df, false);
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+ return;
+
+err_free_rt:
+ ip_rt_put(rt);
+err_free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+}
+
static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
+ if (tunnel->collect_md) {
+ gre_fb_xmit(skb, dev);
+ return NETDEV_TX_OK;
+ }
+
if (dev->header_ops) {
/* Need space for new headers */
if (skb_cow_head(skb, dev->needed_headroom -
@@ -277,7 +612,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
goto out;
__gre_xmit(skb, dev, tnl_params, skb->protocol);
-
return NETDEV_TX_OK;
free_skb:
@@ -292,6 +626,11 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ if (tunnel->collect_md) {
+ gre_fb_xmit(skb, dev);
+ return NETDEV_TX_OK;
+ }
+
skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
if (IS_ERR(skb))
goto out;
@@ -300,7 +639,6 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
goto free_skb;
__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
-
return NETDEV_TX_OK;
free_skb:
@@ -530,10 +868,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
return ip_tunnel_init(dev);
}
-static struct gre_cisco_protocol ipgre_protocol = {
- .handler = ipgre_rcv,
- .err_handler = ipgre_err,
- .priority = 0,
+static const struct gre_protocol ipgre_protocol = {
+ .handler = gre_rcv,
+ .err_handler = gre_err,
};
static int __net_init ipgre_init_net(struct net *net)
@@ -596,8 +933,10 @@ out:
return ipgre_tunnel_validate(tb, data);
}
-static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
- struct ip_tunnel_parm *parms)
+static void ipgre_netlink_parms(struct net_device *dev,
+ struct nlattr *data[],
+ struct nlattr *tb[],
+ struct ip_tunnel_parm *parms)
{
memset(parms, 0, sizeof(*parms));
@@ -635,6 +974,12 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
+
+ if (data[IFLA_GRE_COLLECT_METADATA]) {
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ t->collect_md = true;
+ }
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -712,7 +1057,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- ipgre_netlink_parms(data, tb, &p);
+ ipgre_netlink_parms(dev, data, tb, &p);
return ip_tunnel_newlink(dev, tb, &p);
}
@@ -730,7 +1075,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
- ipgre_netlink_parms(data, tb, &p);
+ ipgre_netlink_parms(dev, data, tb, &p);
return ip_tunnel_changelink(dev, tb, &p);
}
@@ -765,6 +1110,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_COLLECT_METADATA */
+ nla_total_size(0) +
0;
}
@@ -796,6 +1143,11 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (t->collect_md) {
+ if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
@@ -817,6 +1169,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -849,9 +1202,38 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
.get_link_net = ip_tunnel_get_link_net,
};
+struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
+ u8 name_assign_type)
+{
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net_device *dev;
+ struct ip_tunnel *t;
+ int err;
+
+ memset(&tb, 0, sizeof(tb));
+
+ dev = rtnl_create_link(net, name, name_assign_type,
+ &ipgre_tap_ops, tb);
+ if (IS_ERR(dev))
+ return dev;
+
+ /* Configure flow based GRE device. */
+ t = netdev_priv(dev);
+ t->collect_md = true;
+
+ err = ipgre_newlink(net, dev, tb, NULL);
+ if (err < 0)
+ goto out;
+ return dev;
+out:
+ free_netdev(dev);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
+
static int __net_init ipgre_tap_init_net(struct net *net)
{
- return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
+ return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
static void __net_exit ipgre_tap_exit_net(struct net *net)
@@ -881,7 +1263,7 @@ static int __init ipgre_init(void)
if (err < 0)
goto pnet_tap_faied;
- err = gre_cisco_register(&ipgre_protocol);
+ err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
if (err < 0) {
pr_info("%s: can't add protocol\n", __func__);
goto add_proto_failed;
@@ -900,7 +1282,7 @@ static int __init ipgre_init(void)
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- gre_cisco_unregister(&ipgre_protocol);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
add_proto_failed:
unregister_pernet_device(&ipgre_tap_net_ops);
pnet_tap_faied:
@@ -912,7 +1294,7 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
- gre_cisco_unregister(&ipgre_protocol);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
unregister_pernet_device(&ipgre_tap_net_ops);
unregister_pernet_device(&ipgre_net_ops);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 626d9e56a6bd..cbb51f3fac06 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -230,10 +230,13 @@ skip_key_lookup:
if (cand)
return cand;
+ t = rcu_dereference(itn->collect_md_tun);
+ if (t)
+ return t;
+
if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
return netdev_priv(itn->fb_tunnel_dev);
-
return NULL;
}
EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
@@ -261,11 +264,15 @@ static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{
struct hlist_head *head = ip_bucket(itn, &t->parms);
+ if (t->collect_md)
+ rcu_assign_pointer(itn->collect_md_tun, t);
hlist_add_head_rcu(&t->hash_node, head);
}
-static void ip_tunnel_del(struct ip_tunnel *t)
+static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{
+ if (t->collect_md)
+ rcu_assign_pointer(itn->collect_md_tun, NULL);
hlist_del_init_rcu(&t->hash_node);
}
@@ -419,7 +426,8 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
}
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi, bool log_ecn_error)
+ const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
+ bool log_ecn_error)
{
struct pcpu_sw_netstats *tstats;
const struct iphdr *iph = ip_hdr(skb);
@@ -478,6 +486,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
skb->dev = tunnel->dev;
}
+ if (tun_dst)
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+
gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
@@ -806,7 +817,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel_parm *p,
bool set_mtu)
{
- ip_tunnel_del(t);
+ ip_tunnel_del(itn, t);
t->parms.iph.saddr = p->iph.saddr;
t->parms.iph.daddr = p->iph.daddr;
t->parms.i_key = p->i_key;
@@ -967,7 +978,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
if (itn->fb_tunnel_dev != dev) {
- ip_tunnel_del(netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
unregister_netdevice_queue(dev, head);
}
}
@@ -1072,8 +1083,13 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
nt = netdev_priv(dev);
itn = net_generic(net, nt->ip_tnl_net_id);
- if (ip_tunnel_find(itn, p, dev->type))
- return -EEXIST;
+ if (nt->collect_md) {
+ if (rtnl_dereference(itn->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip_tunnel_find(itn, p, dev->type))
+ return -EEXIST;
+ }
nt->net = net;
nt->parms = *p;
@@ -1089,7 +1105,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
dev->mtu = mtu;
ip_tunnel_add(itn, nt);
-
out:
return err;
}
@@ -1163,6 +1178,10 @@ int ip_tunnel_init(struct net_device *dev)
iph->version = 4;
iph->ihl = 5;
+ if (tunnel->collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_init);
@@ -1176,7 +1195,7 @@ void ip_tunnel_uninit(struct net_device *dev)
itn = net_generic(net, tunnel->ip_tnl_net_id);
/* fb_tunnel_dev will be unregisted in net-exit call. */
if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
ip_tunnel_dst_reset_all(tunnel);
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 254238daf58b..f34c31defafe 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -198,7 +198,7 @@ static int ipip_rcv(struct sk_buff *skb)
goto drop;
if (iptunnel_pull_header(skb, 0, tpi.proto))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
return -1;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac35a28599be..94428fd85b2f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -742,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb)
goto drop;
if (iptunnel_pull_header(skb, 0, tpi.proto))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
return 1;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 15840401a2ce..422dc0567de9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -34,7 +34,7 @@ config OPENVSWITCH
config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH
- depends on NET_IPGRE_DEMUX
+ depends on NET_IPGRE
default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create GRE
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index b87656c66aaf..871801d2ac23 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -45,235 +45,43 @@
#include "datapath.h"
#include "vport.h"
+#include "vport-netdev.h"
static struct vport_ops ovs_gre_vport_ops;
-/* Returns the least-significant 32 bits of a __be64. */
-static __be32 be64_get_low32(__be64 x)
+static struct vport *gre_tnl_create(const struct vport_parms *parms)
{
-#ifdef __BIG_ENDIAN
- return (__force __be32)x;
-#else
- return (__force __be32)((__force u64)x >> 32);
-#endif
-}
-
-static __be16 filter_tnl_flags(__be16 flags)
-{
- return flags & (TUNNEL_CSUM | TUNNEL_KEY);
-}
-
-static struct sk_buff *__build_header(struct sk_buff *skb,
- int tunnel_hlen)
-{
- struct tnl_ptk_info tpi;
- const struct ip_tunnel_key *tun_key;
-
- tun_key = &OVS_CB(skb)->egress_tun_info->key;
-
- skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
- if (IS_ERR(skb))
- return skb;
-
- tpi.flags = filter_tnl_flags(tun_key->tun_flags);
- tpi.proto = htons(ETH_P_TEB);
- tpi.key = be64_get_low32(tun_key->tun_id);
- tpi.seq = 0;
- gre_build_header(skb, &tpi, tunnel_hlen);
-
- return skb;
-}
-
-static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
-{
-#ifdef __BIG_ENDIAN
- return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
-#else
- return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
-#endif
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_rcv(struct sk_buff *skb,
- const struct tnl_ptk_info *tpi)
-{
- struct ip_tunnel_info tun_info;
- struct ovs_net *ovs_net;
- struct vport *vport;
- __be64 key;
-
- ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
- vport = rcu_dereference(ovs_net->vport_net.gre_vport);
- if (unlikely(!vport))
- return PACKET_REJECT;
-
- key = key_to_tunnel_id(tpi->key, tpi->seq);
- ip_tunnel_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
- filter_tnl_flags(tpi->flags), NULL, 0);
-
- ovs_vport_receive(vport, skb, &tun_info);
- return PACKET_RCVD;
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_err(struct sk_buff *skb, u32 info,
- const struct tnl_ptk_info *tpi)
-{
- struct ovs_net *ovs_net;
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct net_device *dev;
struct vport *vport;
- ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
- vport = rcu_dereference(ovs_net->vport_net.gre_vport);
-
- if (unlikely(!vport))
- return PACKET_REJECT;
- else
- return PACKET_RCVD;
-}
-
-static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
-{
- struct net *net = ovs_dp_get_net(vport->dp);
- const struct ip_tunnel_key *tun_key;
- struct flowi4 fl;
- struct rtable *rt;
- int min_headroom;
- int tunnel_hlen;
- __be16 df;
- int err;
-
- if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
- err = -EINVAL;
- goto err_free_skb;
- }
-
- tun_key = &OVS_CB(skb)->egress_tun_info->key;
- rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto err_free_skb;
- }
-
- tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + tunnel_hlen + sizeof(struct iphdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(err))
- goto err_free_rt;
- }
-
- skb = vlan_hwaccel_push_inside(skb);
- if (unlikely(!skb)) {
- err = -ENOMEM;
- goto err_free_rt;
- }
-
- /* Push Tunnel header. */
- skb = __build_header(skb, tunnel_hlen);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- skb = NULL;
- goto err_free_rt;
+ vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ rtnl_lock();
+ dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ return ERR_CAST(dev);
}
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
- htons(IP_DF) : 0;
-
- skb->ignore_df = 1;
-
- return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
- tun_key->ipv4_dst, IPPROTO_GRE,
- tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
-err_free_rt:
- ip_rt_put(rt);
-err_free_skb:
- kfree_skb(skb);
- return err;
-}
-
-static struct gre_cisco_protocol gre_protocol = {
- .handler = gre_rcv,
- .err_handler = gre_err,
- .priority = 1,
-};
-
-static int gre_ports;
-static int gre_init(void)
-{
- int err;
-
- gre_ports++;
- if (gre_ports > 1)
- return 0;
-
- err = gre_cisco_register(&gre_protocol);
- if (err)
- pr_warn("cannot register gre protocol handler\n");
-
- return err;
-}
-
-static void gre_exit(void)
-{
- gre_ports--;
- if (gre_ports > 0)
- return;
-
- gre_cisco_unregister(&gre_protocol);
-}
+ dev_change_flags(dev, dev->flags | IFF_UP);
+ rtnl_unlock();
-static const char *gre_get_name(const struct vport *vport)
-{
- return vport_priv(vport);
+ return vport;
}
static struct vport *gre_create(const struct vport_parms *parms)
{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct ovs_net *ovs_net;
struct vport *vport;
- int err;
-
- err = gre_init();
- if (err)
- return ERR_PTR(err);
-
- ovs_net = net_generic(net, ovs_net_id);
- if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
- vport = ERR_PTR(-EEXIST);
- goto error;
- }
- vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
+ vport = gre_tnl_create(parms);
if (IS_ERR(vport))
- goto error;
-
- strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
- rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
- return vport;
-
-error:
- gre_exit();
- return vport;
-}
-
-static void gre_tnl_destroy(struct vport *vport)
-{
- struct net *net = ovs_dp_get_net(vport->dp);
- struct ovs_net *ovs_net;
-
- ovs_net = net_generic(net, ovs_net_id);
+ return vport;
- RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
- ovs_vport_deferred_free(vport);
- gre_exit();
+ return ovs_netdev_link(vport, parms->name);
}
static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
@@ -288,10 +96,9 @@ static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
- .destroy = gre_tnl_destroy,
- .get_name = gre_get_name,
- .send = gre_tnl_send,
+ .send = ovs_netdev_send,
.get_egress_tun_info = gre_get_egress_tun_info,
+ .destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE,
};
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index cddb7069b11b..4b70aaa4a746 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -147,7 +147,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
-void ovs_vport_free_rcu(struct rcu_head *rcu)
+static void vport_netdev_free(struct rcu_head *rcu)
{
struct vport *vport = container_of(rcu, struct vport, rcu);
@@ -155,7 +155,6 @@ void ovs_vport_free_rcu(struct rcu_head *rcu)
dev_put(vport->dev);
ovs_vport_free(vport);
}
-EXPORT_SYMBOL_GPL(ovs_vport_free_rcu);
void ovs_netdev_detach_dev(struct vport *vport)
{
@@ -175,9 +174,25 @@ static void netdev_destroy(struct vport *vport)
ovs_netdev_detach_dev(vport);
rtnl_unlock();
- call_rcu(&vport->rcu, ovs_vport_free_rcu);
+ call_rcu(&vport->rcu, vport_netdev_free);
}
+void ovs_netdev_tunnel_destroy(struct vport *vport)
+{
+ rtnl_lock();
+ if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
+ ovs_netdev_detach_dev(vport);
+
+ /* Early release so we can unregister the device */
+ dev_put(vport->dev);
+ rtnl_delete_link(vport->dev);
+ vport->dev = NULL;
+ rtnl_unlock();
+
+ call_rcu(&vport->rcu, vport_netdev_free);
+}
+EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
+
static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 804412697a90..497cc81f1aca 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -29,9 +29,9 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev);
struct vport *ovs_netdev_link(struct vport *vport, const char *name);
int ovs_netdev_send(struct vport *vport, struct sk_buff *skb);
void ovs_netdev_detach_dev(struct vport *);
-void ovs_vport_free_rcu(struct rcu_head *);
int __init ovs_netdev_init(void);
void ovs_netdev_exit(void);
+void ovs_netdev_tunnel_destroy(struct vport *vport);
#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index c6e937e36f8b..1e8b00a23a23 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,21 +146,6 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
-static void vxlan_destroy(struct vport *vport)
-{
- rtnl_lock();
- if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
- ovs_netdev_detach_dev(vport);
-
- /* Early release so we can unregister the device */
- dev_put(vport->dev);
- rtnl_delete_link(vport->dev);
- vport->dev = NULL;
- rtnl_unlock();
-
- call_rcu(&vport->rcu, ovs_vport_free_rcu);
-}
-
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
struct ip_tunnel_info *egress_tun_info)
{
@@ -183,7 +168,7 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create,
- .destroy = vxlan_destroy,
+ .destroy = ovs_netdev_tunnel_destroy,
.get_options = vxlan_get_options,
.send = ovs_netdev_send,
.get_egress_tun_info = vxlan_get_egress_tun_info,