summaryrefslogtreecommitdiff
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c377
-rw-r--r--net/openvswitch/datapath.c282
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/openvswitch/flow.h42
-rw-r--r--net/openvswitch/flow_netlink.c553
-rw-r--r--net/openvswitch/flow_netlink.h13
-rw-r--r--net/openvswitch/flow_table.c228
-rw-r--r--net/openvswitch/flow_table.h8
-rw-r--r--net/openvswitch/vport-geneve.c32
-rw-r--r--net/openvswitch/vport-gre.c14
-rw-r--r--net/openvswitch/vport-vxlan.c110
-rw-r--r--net/openvswitch/vport-vxlan.h11
-rw-r--r--net/openvswitch/vport.c16
-rw-r--r--net/openvswitch/vport.h20
14 files changed, 1239 insertions, 475 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 770064c83711..b491c1c296fe 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -185,10 +185,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
-static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
- const __be32 *mpls_lse)
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
+
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const __be32 *mpls_lse, const __be32 *mask)
{
__be32 *stack;
+ __be32 lse;
int err;
err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -196,14 +201,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
return err;
stack = (__be32 *)skb_mpls_header(skb);
+ lse = MASKED(*stack, *mpls_lse, *mask);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- __be32 diff[] = { ~(*stack), *mpls_lse };
+ __be32 diff[] = { ~(*stack), lse };
+
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
~skb->csum);
}
- *stack = *mpls_lse;
- key->mpls.top_lse = *mpls_lse;
+ *stack = lse;
+ flow_key->mpls.top_lse = lse;
return 0;
}
@@ -212,7 +219,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
int err;
err = skb_vlan_pop(skb);
- if (vlan_tx_tag_present(skb))
+ if (skb_vlan_tag_present(skb))
invalidate_flow_key(key);
else
key->eth.tci = 0;
@@ -222,7 +229,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
- if (vlan_tx_tag_present(skb))
+ if (skb_vlan_tag_present(skb))
invalidate_flow_key(key);
else
key->eth.tci = vlan->vlan_tci;
@@ -230,23 +237,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
}
-static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_ethernet *eth_key)
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+ u16 *dst = (u16 *)dst_;
+ const u16 *src = (const u16 *)src_;
+ const u16 *mask = (const u16 *)mask_;
+
+ SET_MASKED(dst[0], src[0], mask[0]);
+ SET_MASKED(dst[1], src[1], mask[1]);
+ SET_MASKED(dst[2], src[2], mask[2]);
+}
+
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_ethernet *key,
+ const struct ovs_key_ethernet *mask)
{
int err;
+
err = skb_ensure_writable(skb, ETH_HLEN);
if (unlikely(err))
return err;
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
- ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
- ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+ ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+ mask->eth_src);
+ ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+ mask->eth_dst);
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
- ether_addr_copy(key->eth.src, eth_key->eth_src);
- ether_addr_copy(key->eth.dst, eth_key->eth_dst);
+ ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
+ ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
return 0;
}
@@ -304,6 +327,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
}
}
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+ const __be32 mask[4], __be32 masked[4])
+{
+ masked[0] = MASKED(old[0], addr[0], mask[0]);
+ masked[1] = MASKED(old[1], addr[1], mask[1]);
+ masked[2] = MASKED(old[2], addr[2], mask[2]);
+ masked[3] = MASKED(old[3], addr[3], mask[3]);
+}
+
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
__be32 addr[4], const __be32 new_addr[4],
bool recalculate_csum)
@@ -315,29 +347,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
}
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
{
- nh->priority = tc >> 4;
- nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+ /* Bits 21-24 are always unmasked, so this retains their values. */
+ SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+ SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+ SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
}
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+ u8 mask)
{
- nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
- nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
- nh->flow_lbl[2] = fl & 0x000000FF;
-}
+ new_ttl = MASKED(nh->ttl, new_ttl, mask);
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
-{
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
nh->ttl = new_ttl;
}
-static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_ipv4 *key,
+ const struct ovs_key_ipv4 *mask)
{
struct iphdr *nh;
+ __be32 new_addr;
int err;
err = skb_ensure_writable(skb, skb_network_offset(skb) +
@@ -347,36 +379,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
nh = ip_hdr(skb);
- if (ipv4_key->ipv4_src != nh->saddr) {
- set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
- key->ipv4.addr.src = ipv4_key->ipv4_src;
- }
+ /* Setting an IP addresses is typically only a side effect of
+ * matching on them in the current userspace implementation, so it
+ * makes sense to check if the value actually changed.
+ */
+ if (mask->ipv4_src) {
+ new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
- if (ipv4_key->ipv4_dst != nh->daddr) {
- set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
- key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+ if (unlikely(new_addr != nh->saddr)) {
+ set_ip_addr(skb, nh, &nh->saddr, new_addr);
+ flow_key->ipv4.addr.src = new_addr;
+ }
}
+ if (mask->ipv4_dst) {
+ new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
- if (ipv4_key->ipv4_tos != nh->tos) {
- ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
- key->ip.tos = nh->tos;
+ if (unlikely(new_addr != nh->daddr)) {
+ set_ip_addr(skb, nh, &nh->daddr, new_addr);
+ flow_key->ipv4.addr.dst = new_addr;
+ }
}
-
- if (ipv4_key->ipv4_ttl != nh->ttl) {
- set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
- key->ip.ttl = ipv4_key->ipv4_ttl;
+ if (mask->ipv4_tos) {
+ ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
+ flow_key->ip.tos = nh->tos;
+ }
+ if (mask->ipv4_ttl) {
+ set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
+ flow_key->ip.ttl = nh->ttl;
}
return 0;
}
-static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_ipv6 *ipv6_key)
+static bool is_ipv6_mask_nonzero(const __be32 addr[4])
+{
+ return !!(addr[0] | addr[1] | addr[2] | addr[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_ipv6 *key,
+ const struct ovs_key_ipv6 *mask)
{
struct ipv6hdr *nh;
int err;
- __be32 *saddr;
- __be32 *daddr;
err = skb_ensure_writable(skb, skb_network_offset(skb) +
sizeof(struct ipv6hdr));
@@ -384,71 +429,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
return err;
nh = ipv6_hdr(skb);
- saddr = (__be32 *)&nh->saddr;
- daddr = (__be32 *)&nh->daddr;
-
- if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
- set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
- ipv6_key->ipv6_src, true);
- memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
- sizeof(ipv6_key->ipv6_src));
- }
- if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+ /* Setting an IP addresses is typically only a side effect of
+ * matching on them in the current userspace implementation, so it
+ * makes sense to check if the value actually changed.
+ */
+ if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
+ __be32 *saddr = (__be32 *)&nh->saddr;
+ __be32 masked[4];
+
+ mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+
+ if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
+ set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+ true);
+ memcpy(&flow_key->ipv6.addr.src, masked,
+ sizeof(flow_key->ipv6.addr.src));
+ }
+ }
+ if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
unsigned int offset = 0;
int flags = IP6_FH_F_SKIP_RH;
bool recalc_csum = true;
-
- if (ipv6_ext_hdr(nh->nexthdr))
- recalc_csum = ipv6_find_hdr(skb, &offset,
- NEXTHDR_ROUTING, NULL,
- &flags) != NEXTHDR_ROUTING;
-
- set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
- ipv6_key->ipv6_dst, recalc_csum);
- memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
- sizeof(ipv6_key->ipv6_dst));
+ __be32 *daddr = (__be32 *)&nh->daddr;
+ __be32 masked[4];
+
+ mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
+
+ if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
+ if (ipv6_ext_hdr(nh->nexthdr))
+ recalc_csum = (ipv6_find_hdr(skb, &offset,
+ NEXTHDR_ROUTING,
+ NULL, &flags)
+ != NEXTHDR_ROUTING);
+
+ set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+ recalc_csum);
+ memcpy(&flow_key->ipv6.addr.dst, masked,
+ sizeof(flow_key->ipv6.addr.dst));
+ }
+ }
+ if (mask->ipv6_tclass) {
+ ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+ flow_key->ip.tos = ipv6_get_dsfield(nh);
+ }
+ if (mask->ipv6_label) {
+ set_ipv6_fl(nh, ntohl(key->ipv6_label),
+ ntohl(mask->ipv6_label));
+ flow_key->ipv6.label =
+ *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+ }
+ if (mask->ipv6_hlimit) {
+ SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+ flow_key->ip.ttl = nh->hop_limit;
}
-
- set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
- key->ip.tos = ipv6_get_dsfield(nh);
-
- set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
- key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
-
- nh->hop_limit = ipv6_key->ipv6_hlimit;
- key->ip.ttl = ipv6_key->ipv6_hlimit;
return 0;
}
/* Must follow skb_ensure_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
- __be16 new_port, __sum16 *check)
+ __be16 new_port, __sum16 *check)
{
inet_proto_csum_replace2(check, skb, *port, new_port, 0);
*port = new_port;
- skb_clear_hash(skb);
-}
-
-static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
-{
- struct udphdr *uh = udp_hdr(skb);
-
- if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
- set_tp_port(skb, port, new_port, &uh->check);
-
- if (!uh->check)
- uh->check = CSUM_MANGLED_0;
- } else {
- *port = new_port;
- skb_clear_hash(skb);
- }
}
-static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_udp *key,
+ const struct ovs_key_udp *mask)
{
struct udphdr *uh;
+ __be16 src, dst;
int err;
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -457,23 +508,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
return err;
uh = udp_hdr(skb);
- if (udp_port_key->udp_src != uh->source) {
- set_udp_port(skb, &uh->source, udp_port_key->udp_src);
- key->tp.src = udp_port_key->udp_src;
- }
+ /* Either of the masks is non-zero, so do not bother checking them. */
+ src = MASKED(uh->source, key->udp_src, mask->udp_src);
+ dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
- if (udp_port_key->udp_dst != uh->dest) {
- set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
- key->tp.dst = udp_port_key->udp_dst;
+ if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (likely(src != uh->source)) {
+ set_tp_port(skb, &uh->source, src, &uh->check);
+ flow_key->tp.src = src;
+ }
+ if (likely(dst != uh->dest)) {
+ set_tp_port(skb, &uh->dest, dst, &uh->check);
+ flow_key->tp.dst = dst;
+ }
+
+ if (unlikely(!uh->check))
+ uh->check = CSUM_MANGLED_0;
+ } else {
+ uh->source = src;
+ uh->dest = dst;
+ flow_key->tp.src = src;
+ flow_key->tp.dst = dst;
}
+ skb_clear_hash(skb);
+
return 0;
}
-static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_tcp *key,
+ const struct ovs_key_tcp *mask)
{
struct tcphdr *th;
+ __be16 src, dst;
int err;
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -482,50 +550,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
return err;
th = tcp_hdr(skb);
- if (tcp_port_key->tcp_src != th->source) {
- set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
- key->tp.src = tcp_port_key->tcp_src;
+ src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+ if (likely(src != th->source)) {
+ set_tp_port(skb, &th->source, src, &th->check);
+ flow_key->tp.src = src;
}
-
- if (tcp_port_key->tcp_dst != th->dest) {
- set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
- key->tp.dst = tcp_port_key->tcp_dst;
+ dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+ if (likely(dst != th->dest)) {
+ set_tp_port(skb, &th->dest, dst, &th->check);
+ flow_key->tp.dst = dst;
}
+ skb_clear_hash(skb);
return 0;
}
-static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_sctp *key,
+ const struct ovs_key_sctp *mask)
{
+ unsigned int sctphoff = skb_transport_offset(skb);
struct sctphdr *sh;
+ __le32 old_correct_csum, new_csum, old_csum;
int err;
- unsigned int sctphoff = skb_transport_offset(skb);
err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
if (unlikely(err))
return err;
sh = sctp_hdr(skb);
- if (sctp_port_key->sctp_src != sh->source ||
- sctp_port_key->sctp_dst != sh->dest) {
- __le32 old_correct_csum, new_csum, old_csum;
+ old_csum = sh->checksum;
+ old_correct_csum = sctp_compute_cksum(skb, sctphoff);
- old_csum = sh->checksum;
- old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+ sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
+ sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
- sh->source = sctp_port_key->sctp_src;
- sh->dest = sctp_port_key->sctp_dst;
+ new_csum = sctp_compute_cksum(skb, sctphoff);
- new_csum = sctp_compute_cksum(skb, sctphoff);
+ /* Carry any checksum errors through. */
+ sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
- /* Carry any checksum errors through. */
- sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
-
- skb_clear_hash(skb);
- key->tp.src = sctp_port_key->sctp_src;
- key->tp.dst = sctp_port_key->sctp_dst;
- }
+ skb_clear_hash(skb);
+ flow_key->tp.src = sh->source;
+ flow_key->tp.dst = sh->dest;
return 0;
}
@@ -653,52 +720,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
key->ovs_flow_hash = hash;
}
-static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
- const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb,
+ struct sw_flow_key *flow_key,
+ const struct nlattr *a)
+{
+ /* Only tunnel set execution is supported without a mask. */
+ if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
+ OVS_CB(skb)->egress_tun_info = nla_data(a);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/* Mask is at the midpoint of the data. */
+#define get_mask(a, type) ((const type)nla_data(a) + 1)
+
+static int execute_masked_set_action(struct sk_buff *skb,
+ struct sw_flow_key *flow_key,
+ const struct nlattr *a)
{
int err = 0;
- switch (nla_type(nested_attr)) {
+ switch (nla_type(a)) {
case OVS_KEY_ATTR_PRIORITY:
- skb->priority = nla_get_u32(nested_attr);
- key->phy.priority = skb->priority;
+ SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
+ flow_key->phy.priority = skb->priority;
break;
case OVS_KEY_ATTR_SKB_MARK:
- skb->mark = nla_get_u32(nested_attr);
- key->phy.skb_mark = skb->mark;
+ SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
+ flow_key->phy.skb_mark = skb->mark;
break;
case OVS_KEY_ATTR_TUNNEL_INFO:
- OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
+ /* Masked data not supported for tunnel. */
+ err = -EINVAL;
break;
case OVS_KEY_ATTR_ETHERNET:
- err = set_eth_addr(skb, key, nla_data(nested_attr));
+ err = set_eth_addr(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_ethernet *));
break;
case OVS_KEY_ATTR_IPV4:
- err = set_ipv4(skb, key, nla_data(nested_attr));
+ err = set_ipv4(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_ipv4 *));
break;
case OVS_KEY_ATTR_IPV6:
- err = set_ipv6(skb, key, nla_data(nested_attr));
+ err = set_ipv6(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_ipv6 *));
break;
case OVS_KEY_ATTR_TCP:
- err = set_tcp(skb, key, nla_data(nested_attr));
+ err = set_tcp(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_tcp *));
break;
case OVS_KEY_ATTR_UDP:
- err = set_udp(skb, key, nla_data(nested_attr));
+ err = set_udp(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_udp *));
break;
case OVS_KEY_ATTR_SCTP:
- err = set_sctp(skb, key, nla_data(nested_attr));
+ err = set_sctp(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_sctp *));
break;
case OVS_KEY_ATTR_MPLS:
- err = set_mpls(skb, key, nla_data(nested_attr));
+ err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
+ __be32 *));
break;
}
@@ -818,6 +910,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = execute_set_action(skb, key, nla_data(a));
break;
+ case OVS_ACTION_ATTR_SET_MASKED:
+ case OVS_ACTION_ATTR_SET_TO_MASKED:
+ err = execute_masked_set_action(skb, key, nla_data(a));
+ break;
+
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, key, a);
break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b07349e82d78..5bae7243c577 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
static struct genl_family dp_datapath_genl_family;
+static const struct nla_policy flow_policy[];
+
static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
.name = OVS_FLOW_MCGROUP,
};
@@ -419,7 +421,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (!dp_ifindex)
return -ENODEV;
- if (vlan_tx_tag_present(skb)) {
+ if (skb_vlan_tag_present(skb)) {
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb)
return -ENOMEM;
@@ -461,10 +463,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
0, upcall_info->cmd);
upcall->dp_ifindex = dp_ifindex;
- nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- err = ovs_nla_put_flow(key, key, user_skb);
+ err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
BUG_ON(err);
- nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
@@ -664,46 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
}
}
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
{
- return NLMSG_ALIGN(sizeof(struct ovs_header))
- + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
- + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
- + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
- + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
- + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
- + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+ return ovs_identifier_is_ufid(sfid) &&
+ !(ufid_flags & OVS_UFID_F_OMIT_KEY);
}
-/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
- struct sk_buff *skb)
+static bool should_fill_mask(uint32_t ufid_flags)
{
- struct nlattr *nla;
- int err;
+ return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
+}
- /* Fill flow key. */
- nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
- if (!nla)
- return -EMSGSIZE;
+static bool should_fill_actions(uint32_t ufid_flags)
+{
+ return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
+}
- err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
- if (err)
- return err;
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
+ const struct sw_flow_id *sfid,
+ uint32_t ufid_flags)
+{
+ size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
- nla_nest_end(skb, nla);
+ /* OVS_FLOW_ATTR_UFID */
+ if (sfid && ovs_identifier_is_ufid(sfid))
+ len += nla_total_size(sfid->ufid_len);
- /* Fill flow mask. */
- nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
- if (!nla)
- return -EMSGSIZE;
+ /* OVS_FLOW_ATTR_KEY */
+ if (!sfid || should_fill_key(sfid, ufid_flags))
+ len += nla_total_size(ovs_key_attr_size());
- err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
- if (err)
- return err;
+ /* OVS_FLOW_ATTR_MASK */
+ if (should_fill_mask(ufid_flags))
+ len += nla_total_size(ovs_key_attr_size());
- nla_nest_end(skb, nla);
- return 0;
+ /* OVS_FLOW_ATTR_ACTIONS */
+ if (should_fill_actions(ufid_flags))
+ len += nla_total_size(acts->actions_len);
+
+ return len
+ + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
}
/* Called with ovs_mutex or RCU read lock. */
@@ -774,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
- u32 seq, u32 flags, u8 cmd)
+ u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
{
const int skb_orig_len = skb->len;
struct ovs_header *ovs_header;
@@ -787,19 +789,34 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
ovs_header->dp_ifindex = dp_ifindex;
- err = ovs_flow_cmd_fill_match(flow, skb);
+ err = ovs_nla_put_identifier(flow, skb);
if (err)
goto error;
+ if (should_fill_key(&flow->id, ufid_flags)) {
+ err = ovs_nla_put_masked_key(flow, skb);
+ if (err)
+ goto error;
+ }
+
+ if (should_fill_mask(ufid_flags)) {
+ err = ovs_nla_put_mask(flow, skb);
+ if (err)
+ goto error;
+ }
+
err = ovs_flow_cmd_fill_stats(flow, skb);
if (err)
goto error;
- err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
- if (err)
- goto error;
+ if (should_fill_actions(ufid_flags)) {
+ err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+ if (err)
+ goto error;
+ }
- return genlmsg_end(skb, ovs_header);
+ genlmsg_end(skb, ovs_header);
+ return 0;
error:
genlmsg_cancel(skb, ovs_header);
@@ -808,15 +825,19 @@ error:
/* May not be called with RCU read lock. */
static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+ const struct sw_flow_id *sfid,
struct genl_info *info,
- bool always)
+ bool always,
+ uint32_t ufid_flags)
{
struct sk_buff *skb;
+ size_t len;
if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
return NULL;
- skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+ len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
+ skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
@@ -827,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
int dp_ifindex,
struct genl_info *info, u8 cmd,
- bool always)
+ bool always, u32 ufid_flags)
{
struct sk_buff *skb;
int retval;
- skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
- always);
+ skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
+ &flow->id, info, always, ufid_flags);
if (IS_ERR_OR_NULL(skb))
return skb;
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0,
- cmd);
+ cmd, ufid_flags);
BUG_ON(retval < 0);
return skb;
}
@@ -848,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow *flow, *new_flow;
+ struct sw_flow *flow = NULL, *new_flow;
struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
+ struct sw_flow_key key;
struct sw_flow_actions *acts;
struct sw_flow_match match;
+ u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
int error;
bool log = !a[OVS_FLOW_ATTR_PROBE];
@@ -878,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
/* Extract key. */
- ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+ ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto err_kfree_flow;
- ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+ ovs_flow_mask_key(&new_flow->key, &key, &mask);
+
+ /* Extract flow identifier. */
+ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+ &key, log);
+ if (error)
+ goto err_kfree_flow;
/* Validate actions. */
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
@@ -894,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow;
}
- reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
+ ufid_flags);
if (IS_ERR(reply)) {
error = PTR_ERR(reply);
goto err_kfree_acts;
@@ -906,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = -ENODEV;
goto err_unlock_ovs;
}
+
/* Check if this is a duplicate flow */
- flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+ if (ovs_identifier_is_ufid(&new_flow->id))
+ flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
+ if (!flow)
+ flow = ovs_flow_tbl_lookup(&dp->table, &key);
if (likely(!flow)) {
rcu_assign_pointer(new_flow->sf_acts, acts);
@@ -923,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW);
+ OVS_FLOW_CMD_NEW,
+ ufid_flags);
BUG_ON(error < 0);
}
ovs_unlock();
@@ -941,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = -EEXIST;
goto err_unlock_ovs;
}
- /* The unmasked key has to be the same for flow updates. */
- if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
- /* Look for any overlapping flow. */
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ /* The flow identifier has to be the same for flow updates.
+ * Look for any overlapping flow.
+ */
+ if (unlikely(!ovs_flow_cmp(flow, &match))) {
+ if (ovs_identifier_is_key(&flow->id))
+ flow = ovs_flow_tbl_lookup_exact(&dp->table,
+ &match);
+ else /* UFID matches but key is different */
+ flow = NULL;
if (!flow) {
error = -ENOENT;
goto err_unlock_ovs;
@@ -959,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW);
+ OVS_FLOW_CMD_NEW,
+ ufid_flags);
BUG_ON(error < 0);
}
ovs_unlock();
@@ -1015,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct sw_flow_actions *old_acts = NULL, *acts = NULL;
struct sw_flow_match match;
+ struct sw_flow_id sfid;
+ u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
int error;
bool log = !a[OVS_FLOW_ATTR_PROBE];
+ bool ufid_present;
/* Extract key. */
error = -EINVAL;
@@ -1025,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto error;
}
+ ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
@@ -1041,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
}
/* Can allocate before locking if have acts. */
- reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
+ ufid_flags);
if (IS_ERR(reply)) {
error = PTR_ERR(reply);
goto err_kfree_acts;
@@ -1055,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
}
/* Check that the flow exists. */
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (ufid_present)
+ flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
+ else
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (unlikely(!flow)) {
error = -ENOENT;
goto err_unlock_ovs;
@@ -1071,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW);
+ OVS_FLOW_CMD_NEW,
+ ufid_flags);
BUG_ON(error < 0);
}
} else {
/* Could not alloc without acts before locking. */
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
- info, OVS_FLOW_CMD_NEW, false);
+ info, OVS_FLOW_CMD_NEW, false,
+ ufid_flags);
+
if (unlikely(IS_ERR(reply))) {
error = PTR_ERR(reply);
goto err_unlock_ovs;
@@ -1114,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct datapath *dp;
struct sw_flow_match match;
- int err;
+ struct sw_flow_id ufid;
+ u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+ int err = 0;
bool log = !a[OVS_FLOW_ATTR_PROBE];
+ bool ufid_present;
- if (!a[OVS_FLOW_ATTR_KEY]) {
+ ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+ if (a[OVS_FLOW_ATTR_KEY]) {
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+ log);
+ } else if (!ufid_present) {
OVS_NLERR(log,
"Flow get message rejected, Key attribute missing.");
- return -EINVAL;
+ err = -EINVAL;
}
-
- ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
if (err)
return err;
@@ -1135,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (ufid_present)
+ flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+ else
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (!flow) {
err = -ENOENT;
goto unlock;
}
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
- OVS_FLOW_CMD_NEW, true);
+ OVS_FLOW_CMD_NEW, true, ufid_flags);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -1161,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
struct sk_buff *reply;
- struct sw_flow *flow;
+ struct sw_flow *flow = NULL;
struct datapath *dp;
struct sw_flow_match match;
+ struct sw_flow_id ufid;
+ u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
int err;
bool log = !a[OVS_FLOW_ATTR_PROBE];
+ bool ufid_present;
- if (likely(a[OVS_FLOW_ATTR_KEY])) {
+ ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+ if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
log);
@@ -1182,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+ if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
err = ovs_flow_tbl_flush(&dp->table);
goto unlock;
}
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (ufid_present)
+ flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+ else
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (unlikely(!flow)) {
err = -ENOENT;
goto unlock;
@@ -1197,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_unlock();
reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
- info, false);
+ &flow->id, info, false, ufid_flags);
if (likely(reply)) {
if (likely(!IS_ERR(reply))) {
rcu_read_lock(); /*To keep RCU checker happy. */
err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_DEL);
+ OVS_FLOW_CMD_DEL,
+ ufid_flags);
rcu_read_unlock();
BUG_ON(err < 0);
@@ -1223,9 +1291,18 @@ unlock:
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct nlattr *a[__OVS_FLOW_ATTR_MAX];
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
struct table_instance *ti;
struct datapath *dp;
+ u32 ufid_flags;
+ int err;
+
+ err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
+ OVS_FLOW_ATTR_MAX, flow_policy);
+ if (err)
+ return err;
+ ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
rcu_read_lock();
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1248,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_FLOW_CMD_NEW) < 0)
+ OVS_FLOW_CMD_NEW, ufid_flags) < 0)
break;
cb->args[0] = bucket;
@@ -1264,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
+ [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
+ [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
};
static const struct genl_ops dp_flow_genl_ops[] = {
@@ -1349,7 +1428,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
goto nla_put_failure;
- return genlmsg_end(skb, ovs_header);
+ genlmsg_end(skb, ovs_header);
+ return 0;
nla_put_failure:
genlmsg_cancel(skb, ovs_header);
@@ -1723,7 +1803,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
if (err == -EMSGSIZE)
goto error;
- return genlmsg_end(skb, ovs_header);
+ genlmsg_end(skb, ovs_header);
+ return 0;
nla_put_failure:
err = -EMSGSIZE;
@@ -2113,14 +2194,55 @@ static int __net_init ovs_init_net(struct net *net)
return 0;
}
-static void __net_exit ovs_exit_net(struct net *net)
+static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
+ struct list_head *head)
{
- struct datapath *dp, *dp_next;
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ struct datapath *dp;
+
+ list_for_each_entry(dp, &ovs_net->dps, list_node) {
+ int i;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+ struct vport *vport;
+
+ hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
+ struct netdev_vport *netdev_vport;
+
+ if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
+ continue;
+
+ netdev_vport = netdev_vport_priv(vport);
+ if (dev_net(netdev_vport->dev) == dnet)
+ list_add(&vport->detach_list, head);
+ }
+ }
+ }
+}
+
+static void __net_exit ovs_exit_net(struct net *dnet)
+{
+ struct datapath *dp, *dp_next;
+ struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
+ struct vport *vport, *vport_next;
+ struct net *net;
+ LIST_HEAD(head);
ovs_lock();
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
+
+ rtnl_lock();
+ for_each_net(net)
+ list_vports_from_net(net, dnet, &head);
+ rtnl_unlock();
+
+ /* Detach all vports from given namespace. */
+ list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
+ list_del(&vport->detach_list);
+ ovs_dp_detach_port(vport);
+ }
+
ovs_unlock();
cancel_work_sync(&ovs_net->dp_notify_work);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index da2fae0873a5..50ec42f170a0 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -70,7 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
{
struct flow_stats *stats;
int node = numa_node_id();
- int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+ int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
stats = rcu_dereference(flow->stats[node]);
@@ -472,7 +472,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
*/
key->eth.tci = 0;
- if (vlan_tx_tag_present(skb))
+ if (skb_vlan_tag_present(skb))
key->eth.tci = htons(skb->vlan_tci);
else if (eth->h_proto == htons(ETH_P_8021Q))
if (unlikely(parse_vlan(skb, key)))
@@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
8)) - 1
> sizeof(key->tun_opts));
- memcpy(GENEVE_OPTS(key, tun_info->options_len),
+ memcpy(TUN_METADATA_OPTS(key, tun_info->options_len),
tun_info->options, tun_info->options_len);
key->tun_opts_len = tun_info->options_len;
} else {
@@ -717,6 +717,8 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr,
{
int err;
+ memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
+
/* Extract metadata from netlink attributes. */
err = ovs_nla_get_flow_metadata(attr, key, log);
if (err)
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a8b30f334388..a076e445ccc2 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel {
struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel;
- const struct geneve_opt *options;
+ const void *options;
u8 options_len;
};
@@ -61,10 +61,10 @@ struct ovs_tunnel_info {
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
*/
-#define GENEVE_OPTS(flow_key, opt_len) \
- ((struct geneve_opt *)((flow_key)->tun_opts + \
- FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
- opt_len))
+#define TUN_METADATA_OFFSET(opt_len) \
+ (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+#define TUN_METADATA_OPTS(flow_key, opt_len) \
+ ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be32 saddr, __be32 daddr,
@@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be16 tp_dst,
__be64 tun_id,
__be16 tun_flags,
- const struct geneve_opt *opts,
+ const void *opts,
u8 opts_len)
{
tun_info->tunnel.tun_id = tun_id;
@@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be16 tp_dst,
__be64 tun_id,
__be16 tun_flags,
- const struct geneve_opt *opts,
+ const void *opts,
u8 opts_len)
{
__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
@@ -197,6 +197,16 @@ struct sw_flow_match {
struct sw_flow_mask *mask;
};
+#define MAX_UFID_LENGTH 16 /* 128 bits */
+
+struct sw_flow_id {
+ u32 ufid_len;
+ union {
+ u32 ufid[MAX_UFID_LENGTH / 4];
+ struct sw_flow_key *unmasked_key;
+ };
+};
+
struct sw_flow_actions {
struct rcu_head rcu;
u32 actions_len;
@@ -213,13 +223,15 @@ struct flow_stats {
struct sw_flow {
struct rcu_head rcu;
- struct hlist_node hash_node[2];
- u32 hash;
+ struct {
+ struct hlist_node node[2];
+ u32 hash;
+ } flow_table, ufid_table;
int stats_last_writer; /* NUMA-node id of the last writer on
* 'stats[0]'.
*/
struct sw_flow_key key;
- struct sw_flow_key unmasked_key;
+ struct sw_flow_id id;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
@@ -243,6 +255,16 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
+static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
+{
+ return sfid->ufid_len;
+}
+
+static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
+{
+ return !ovs_identifier_is_ufid(sfid);
+}
+
void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
const struct sk_buff *);
void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d1eecf707613..22b18c145c92 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,14 @@
#include <net/mpls.h>
#include "flow_netlink.h"
+#include "vport-vxlan.h"
+
+struct ovs_len_tbl {
+ int len;
+ const struct ovs_len_tbl *next;
+};
+
+#define OVS_ATTR_NESTED -1
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
@@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+ * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
+ */
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
+ nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
}
@@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
}
+static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+ [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
+ [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
+ [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) },
+ [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 },
+ [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 },
+ [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
+ [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 },
+ [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
+ [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
+ [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
+ [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED },
+ [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED },
+};
+
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
-static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
- [OVS_KEY_ATTR_ENCAP] = -1,
- [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
- [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
- [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
- [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
- [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
- [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
- [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
- [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
- [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
- [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
- [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
- [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
- [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
- [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
- [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
- [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
- [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
- [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
- [OVS_KEY_ATTR_TUNNEL] = -1,
- [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
+static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+ [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
+ [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) },
+ [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) },
+ [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
+ [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) },
+ [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) },
+ [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) },
+ [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
+ [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) },
+ [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) },
+ [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) },
+ [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
+ [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
+ [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
+ [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
+ .next = ovs_tunnel_key_lens, },
+ [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
};
static bool is_all_zero(const u8 *fp, size_t size)
@@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
return -EINVAL;
}
- expected_len = ovs_key_lens[type];
- if (nla_len(nla) != expected_len && expected_len != -1) {
+ expected_len = ovs_key_lens[type].len;
+ if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
type, nla_len(nla), expected_len);
return -EINVAL;
@@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
}
- opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
- nla_len(a));
+ opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
nla_len(a), is_mask);
return 0;
}
+static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
+ [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 },
+};
+
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
+ unsigned long opt_key_offset;
+ struct ovs_vxlan_opts opts;
+ int err;
+
+ BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+ err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
+ if (err < 0)
+ return err;
+
+ memset(&opts, 0, sizeof(opts));
+
+ if (tb[OVS_VXLAN_EXT_GBP])
+ opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
+
+ if (!is_mask)
+ SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
+ else
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+ opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+ is_mask);
+ return 0;
+}
+
static int ipv4_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
int rem;
bool ttl = false;
__be16 tun_flags = 0;
+ int opts_type = 0;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
int err;
- static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
- [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
- [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
- [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
- [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
- [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
- [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
- [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
- [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
- [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
- [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
- [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
- };
-
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
OVS_NLERR(log, "Tunnel attr %d out of range max %d",
type, OVS_TUNNEL_KEY_ATTR_MAX);
return -EINVAL;
}
- if (ovs_tunnel_key_lens[type] != nla_len(a) &&
- ovs_tunnel_key_lens[type] != -1) {
+ if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
+ ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
- type, nla_len(a), ovs_tunnel_key_lens[type]);
+ type, nla_len(a), ovs_tunnel_key_lens[type].len);
return -EINVAL;
}
@@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
tun_flags |= TUNNEL_OAM;
break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
if (err)
return err;
- tun_flags |= TUNNEL_OPTIONS_PRESENT;
+ tun_flags |= TUNNEL_GENEVE_OPT;
+ opts_type = type;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
+ err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
+ if (err)
+ return err;
+
+ tun_flags |= TUNNEL_VXLAN_OPT;
+ opts_type = type;
break;
default:
OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
@@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
}
}
+ return opts_type;
+}
+
+static int vxlan_opt_to_nlattr(struct sk_buff *skb,
+ const void *tun_opts, int swkey_tun_opts_len)
+{
+ const struct ovs_vxlan_opts *opts = tun_opts;
+ struct nlattr *nla;
+
+ nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, nla);
return 0;
}
static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output,
- const struct geneve_opt *tun_opts,
- int swkey_tun_opts_len)
+ const void *tun_opts, int swkey_tun_opts_len)
{
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
@@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
- if (tun_opts &&
- nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
- swkey_tun_opts_len, tun_opts))
- return -EMSGSIZE;
+ if (tun_opts) {
+ if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ swkey_tun_opts_len, tun_opts))
+ return -EMSGSIZE;
+ else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+ vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+ return -EMSGSIZE;
+ }
return 0;
}
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output,
- const struct geneve_opt *tun_opts,
- int swkey_tun_opts_len)
+ const void *tun_opts, int swkey_tun_opts_len)
{
struct nlattr *nla;
int err;
@@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
}
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask, log))
+ is_mask, log) < 0)
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
@@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
return 0;
}
-static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
+static void nlattr_set(struct nlattr *attr, u8 val,
+ const struct ovs_len_tbl *tbl)
{
struct nlattr *nla;
int rem;
/* The nlattr stream should already have been validated */
nla_for_each_nested(nla, attr, rem) {
- /* We assume that ovs_key_lens[type] == -1 means that type is a
- * nested attribute
- */
- if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
- nlattr_set(nla, val, false);
+ if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+ nlattr_set(nla, val, tbl[nla_type(nla)].next);
else
memset(nla_data(nla), val, nla_len(nla));
}
@@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
static void mask_set_nlattr(struct nlattr *attr, u8 val)
{
- nlattr_set(attr, val, true);
+ nlattr_set(attr, val, ovs_key_lens);
}
/**
@@ -1095,6 +1180,59 @@ free_newmask:
return err;
}
+static size_t get_ufid_len(const struct nlattr *attr, bool log)
+{
+ size_t len;
+
+ if (!attr)
+ return 0;
+
+ len = nla_len(attr);
+ if (len < 1 || len > MAX_UFID_LENGTH) {
+ OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
+ nla_len(attr), MAX_UFID_LENGTH);
+ return 0;
+ }
+
+ return len;
+}
+
+/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
+ * or false otherwise.
+ */
+bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
+ bool log)
+{
+ sfid->ufid_len = get_ufid_len(attr, log);
+ if (sfid->ufid_len)
+ memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
+
+ return sfid->ufid_len;
+}
+
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+ const struct sw_flow_key *key, bool log)
+{
+ struct sw_flow_key *new_key;
+
+ if (ovs_nla_get_ufid(sfid, ufid, log))
+ return 0;
+
+ /* If UFID was not provided, use unmasked key. */
+ new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
+ if (!new_key)
+ return -ENOMEM;
+ memcpy(new_key, key, sizeof(*key));
+ sfid->unmasked_key = new_key;
+
+ return 0;
+}
+
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
+{
+ return attr ? nla_get_u32(attr) : 0;
+}
+
/**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
* @key: Receives extracted in_port, priority, tun_key and skb_mark.
@@ -1131,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
return metadata_from_nlattrs(&match, &attrs, a, false, log);
}
-int ovs_nla_put_flow(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output, struct sk_buff *skb)
+static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
+ const struct sw_flow_key *output, bool is_mask,
+ struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap;
- bool is_mask = (swkey != output);
if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
goto nla_put_failure;
@@ -1148,10 +1286,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
goto nla_put_failure;
if ((swkey->tun_key.ipv4_dst || is_mask)) {
- const struct geneve_opt *opts = NULL;
+ const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
- opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+ opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
swkey->tun_opts_len))
@@ -1346,6 +1484,49 @@ nla_put_failure:
return -EMSGSIZE;
}
+int ovs_nla_put_key(const struct sw_flow_key *swkey,
+ const struct sw_flow_key *output, int attr, bool is_mask,
+ struct sk_buff *skb)
+{
+ int err;
+ struct nlattr *nla;
+
+ nla = nla_nest_start(skb, attr);
+ if (!nla)
+ return -EMSGSIZE;
+ err = __ovs_nla_put_key(swkey, output, is_mask, skb);
+ if (err)
+ return err;
+ nla_nest_end(skb, nla);
+
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
+{
+ if (ovs_identifier_is_ufid(&flow->id))
+ return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
+ flow->id.ufid);
+
+ return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
+ OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
+{
+ return ovs_nla_put_key(&flow->key, &flow->key,
+ OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
+{
+ return ovs_nla_put_key(&flow->key, &flow->mask->key,
+ OVS_FLOW_ATTR_MASK, true, skb);
+}
+
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
@@ -1514,16 +1695,6 @@ static int validate_and_copy_sample(const struct nlattr *attr,
return 0;
}
-static int validate_tp_port(const struct sw_flow_key *flow_key,
- __be16 eth_type)
-{
- if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
- (flow_key->tp.src || flow_key->tp.dst))
- return 0;
-
- return -EINVAL;
-}
-
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key,
struct sw_flow_mask *mask)
@@ -1540,6 +1711,34 @@ void ovs_match_init(struct sw_flow_match *match,
}
}
+static int validate_geneve_opts(struct sw_flow_key *key)
+{
+ struct geneve_opt *option;
+ int opts_len = key->tun_opts_len;
+ bool crit_opt = false;
+
+ option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
+ while (opts_len > 0) {
+ int len;
+
+ if (opts_len < sizeof(*option))
+ return -EINVAL;
+
+ len = sizeof(*option) + option->length * 4;
+ if (len > opts_len)
+ return -EINVAL;
+
+ crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+ option = (struct geneve_opt *)((u8 *)option + len);
+ opts_len -= len;
+ };
+
+ key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+
+ return 0;
+}
+
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
@@ -1547,36 +1746,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_key key;
struct ovs_tunnel_info *tun_info;
struct nlattr *a;
- int err, start;
+ int err = 0, start, opts_type;
ovs_match_init(&match, &key, NULL);
- err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
- if (err)
- return err;
+ opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+ if (opts_type < 0)
+ return opts_type;
if (key.tun_opts_len) {
- struct geneve_opt *option = GENEVE_OPTS(&key,
- key.tun_opts_len);
- int opts_len = key.tun_opts_len;
- bool crit_opt = false;
-
- while (opts_len > 0) {
- int len;
-
- if (opts_len < sizeof(*option))
- return -EINVAL;
-
- len = sizeof(*option) + option->length * 4;
- if (len > opts_len)
- return -EINVAL;
-
- crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
-
- option = (struct geneve_opt *)((u8 *)option + len);
- opts_len -= len;
- };
-
- key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+ switch (opts_type) {
+ case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+ err = validate_geneve_opts(&key);
+ if (err < 0)
+ return err;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+ break;
+ }
};
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
@@ -1597,9 +1783,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
* everything else will go away after flow setup. We can append
* it to tun_info and then point there.
*/
- memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
- key.tun_opts_len);
- tun_info->options = (struct geneve_opt *)(tun_info + 1);
+ memcpy((tun_info + 1),
+ TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
+ tun_info->options = (tun_info + 1);
} else {
tun_info->options = NULL;
}
@@ -1609,21 +1795,43 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
return err;
}
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding.
+ */
+static bool validate_masked(u8 *data, int len)
+{
+ u8 *mask = data + len;
+
+ while (len--)
+ if (*data++ & ~*mask++)
+ return false;
+
+ return true;
+}
+
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun, __be16 eth_type, bool log)
+ bool *skip_copy, __be16 eth_type, bool masked, bool log)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
+ size_t key_len;
/* There can be only one key in a action */
if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
return -EINVAL;
+ key_len = nla_len(ovs_key);
+ if (masked)
+ key_len /= 2;
+
if (key_type > OVS_KEY_ATTR_MAX ||
- (ovs_key_lens[key_type] != nla_len(ovs_key) &&
- ovs_key_lens[key_type] != -1))
+ (ovs_key_lens[key_type].len != key_len &&
+ ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
+ return -EINVAL;
+
+ if (masked && !validate_masked(nla_data(ovs_key), key_len))
return -EINVAL;
switch (key_type) {
@@ -1640,7 +1848,10 @@ static int validate_set(const struct nlattr *a,
if (eth_p_mpls(eth_type))
return -EINVAL;
- *set_tun = true;
+ if (masked)
+ return -EINVAL; /* Masked tunnel set not supported. */
+
+ *skip_copy = true;
err = validate_and_copy_set_tun(a, sfa, log);
if (err)
return err;
@@ -1650,48 +1861,66 @@ static int validate_set(const struct nlattr *a,
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
- if (!flow_key->ip.proto)
- return -EINVAL;
-
ipv4_key = nla_data(ovs_key);
- if (ipv4_key->ipv4_proto != flow_key->ip.proto)
- return -EINVAL;
- if (ipv4_key->ipv4_frag != flow_key->ip.frag)
- return -EINVAL;
+ if (masked) {
+ const struct ovs_key_ipv4 *mask = ipv4_key + 1;
+ /* Non-writeable fields. */
+ if (mask->ipv4_proto || mask->ipv4_frag)
+ return -EINVAL;
+ } else {
+ if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+ return -EINVAL;
+ }
break;
case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
- if (!flow_key->ip.proto)
- return -EINVAL;
-
ipv6_key = nla_data(ovs_key);
- if (ipv6_key->ipv6_proto != flow_key->ip.proto)
- return -EINVAL;
- if (ipv6_key->ipv6_frag != flow_key->ip.frag)
- return -EINVAL;
+ if (masked) {
+ const struct ovs_key_ipv6 *mask = ipv6_key + 1;
+
+ /* Non-writeable fields. */
+ if (mask->ipv6_proto || mask->ipv6_frag)
+ return -EINVAL;
+
+ /* Invalid bits in the flow label mask? */
+ if (ntohl(mask->ipv6_label) & 0xFFF00000)
+ return -EINVAL;
+ } else {
+ if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+ return -EINVAL;
+ if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+ return -EINVAL;
+ }
if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
return -EINVAL;
break;
case OVS_KEY_ATTR_TCP:
- if (flow_key->ip.proto != IPPROTO_TCP)
+ if ((eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6)) ||
+ flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ break;
case OVS_KEY_ATTR_UDP:
- if (flow_key->ip.proto != IPPROTO_UDP)
+ if ((eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6)) ||
+ flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ break;
case OVS_KEY_ATTR_MPLS:
if (!eth_p_mpls(eth_type))
@@ -1699,15 +1928,45 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_SCTP:
- if (flow_key->ip.proto != IPPROTO_SCTP)
+ if ((eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6)) ||
+ flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ break;
default:
return -EINVAL;
}
+ /* Convert non-masked non-tunnel set actions to masked set actions. */
+ if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+ int start, len = key_len * 2;
+ struct nlattr *at;
+
+ *skip_copy = true;
+
+ start = add_nested_action_start(sfa,
+ OVS_ACTION_ATTR_SET_TO_MASKED,
+ log);
+ if (start < 0)
+ return start;
+
+ at = __add_action(sfa, key_type, NULL, len, log);
+ if (IS_ERR(at))
+ return PTR_ERR(at);
+
+ memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+ memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
+ /* Clear non-writeable bits from otherwise writeable fields. */
+ if (key_type == OVS_KEY_ATTR_IPV6) {
+ struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
+
+ mask->ipv6_label &= htonl(0x000FFFFF);
+ }
+ add_nested_action_end(*sfa, start);
+ }
+
return 0;
}
@@ -1769,6 +2028,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
+ [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
};
@@ -1864,7 +2124,14 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
- &skip_copy, eth_type, log);
+ &skip_copy, eth_type, false, log);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_SET_MASKED:
+ err = validate_set(a, key, sfa,
+ &skip_copy, eth_type, true, log);
if (err)
return err;
break;
@@ -1894,6 +2161,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
return 0;
}
+/* 'key' must be the masked key. */
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
@@ -1981,6 +2249,27 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
return 0;
}
+static int masked_set_action_to_set_action_attr(const struct nlattr *a,
+ struct sk_buff *skb)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ struct nlattr *nla;
+ size_t key_len = nla_len(ovs_key) / 2;
+
+ /* Revert the conversion we did from a non-masked set action to
+ * masked set action.
+ */
+ nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, nla);
+ return 0;
+}
+
int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
{
const struct nlattr *a;
@@ -1996,6 +2285,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
return err;
break;
+ case OVS_ACTION_ATTR_SET_TO_MASKED:
+ err = masked_set_action_to_set_action_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
case OVS_ACTION_ATTR_SAMPLE:
err = sample_action_to_attr(a, skb);
if (err)
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 577f12be3459..5c3d75bff310 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void);
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask);
-int ovs_nla_put_flow(const struct sw_flow_key *,
- const struct sw_flow_key *, struct sk_buff *);
+int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
+ int attr, bool is_mask, struct sk_buff *);
int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
bool log);
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
+
int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
const struct nlattr *mask, bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
const struct ovs_tunnel_info *);
+bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+ const struct sw_flow_key *key, bool log);
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
+
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5899bf161c61..4613df8c8290 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -85,6 +85,8 @@ struct sw_flow *ovs_flow_alloc(void)
flow->sf_acts = NULL;
flow->mask = NULL;
+ flow->id.unmasked_key = NULL;
+ flow->id.ufid_len = 0;
flow->stats_last_writer = NUMA_NO_NODE;
/* Initialize the default stat node. */
@@ -139,6 +141,8 @@ static void flow_free(struct sw_flow *flow)
{
int node;
+ if (ovs_identifier_is_key(&flow->id))
+ kfree(flow->id.unmasked_key);
kfree((struct sw_flow_actions __force *)flow->sf_acts);
for_each_node(node)
if (flow->stats[node])
@@ -200,18 +204,28 @@ static struct table_instance *table_instance_alloc(int new_size)
int ovs_flow_tbl_init(struct flow_table *table)
{
- struct table_instance *ti;
+ struct table_instance *ti, *ufid_ti;
ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
return -ENOMEM;
+ ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ if (!ufid_ti)
+ goto free_ti;
+
rcu_assign_pointer(table->ti, ti);
+ rcu_assign_pointer(table->ufid_ti, ufid_ti);
INIT_LIST_HEAD(&table->mask_list);
table->last_rehash = jiffies;
table->count = 0;
+ table->ufid_count = 0;
return 0;
+
+free_ti:
+ __table_instance_destroy(ti);
+ return -ENOMEM;
}
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
@@ -221,13 +235,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ bool deferred)
{
int i;
if (!ti)
return;
+ BUG_ON(!ufid_ti);
if (ti->keep_flows)
goto skip_flows;
@@ -236,18 +253,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
struct hlist_head *head = flex_array_get(ti->buckets, i);
struct hlist_node *n;
int ver = ti->node_ver;
+ int ufid_ver = ufid_ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del_rcu(&flow->hash_node[ver]);
+ hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
+ hlist_del_rcu(&flow->flow_table.node[ver]);
+ if (ovs_identifier_is_ufid(&flow->id))
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
ovs_flow_free(flow, deferred);
}
}
skip_flows:
- if (deferred)
+ if (deferred) {
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
- else
+ call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
+ } else {
__table_instance_destroy(ti);
+ __table_instance_destroy(ufid_ti);
+ }
}
/* No need for locking this function is called from RCU callback or
@@ -256,8 +279,9 @@ skip_flows:
void ovs_flow_tbl_destroy(struct flow_table *table)
{
struct table_instance *ti = rcu_dereference_raw(table->ti);
+ struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
- table_instance_destroy(ti, false);
+ table_instance_destroy(ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -272,7 +296,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
while (*bucket < ti->n_buckets) {
i = 0;
head = flex_array_get(ti->buckets, *bucket);
- hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
if (i < *last) {
i++;
continue;
@@ -294,16 +318,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
(hash & (ti->n_buckets - 1)));
}
-static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+static void table_instance_insert(struct table_instance *ti,
+ struct sw_flow *flow)
+{
+ struct hlist_head *head;
+
+ head = find_bucket(ti, flow->flow_table.hash);
+ hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
+}
+
+static void ufid_table_instance_insert(struct table_instance *ti,
+ struct sw_flow *flow)
{
struct hlist_head *head;
- head = find_bucket(ti, flow->hash);
- hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+ head = find_bucket(ti, flow->ufid_table.hash);
+ hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
}
static void flow_table_copy_flows(struct table_instance *old,
- struct table_instance *new)
+ struct table_instance *new, bool ufid)
{
int old_ver;
int i;
@@ -318,15 +352,21 @@ static void flow_table_copy_flows(struct table_instance *old,
head = flex_array_get(old->buckets, i);
- hlist_for_each_entry(flow, head, hash_node[old_ver])
- table_instance_insert(new, flow);
+ if (ufid)
+ hlist_for_each_entry(flow, head,
+ ufid_table.node[old_ver])
+ ufid_table_instance_insert(new, flow);
+ else
+ hlist_for_each_entry(flow, head,
+ flow_table.node[old_ver])
+ table_instance_insert(new, flow);
}
old->keep_flows = true;
}
static struct table_instance *table_instance_rehash(struct table_instance *ti,
- int n_buckets)
+ int n_buckets, bool ufid)
{
struct table_instance *new_ti;
@@ -334,32 +374,45 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
if (!new_ti)
return NULL;
- flow_table_copy_flows(ti, new_ti);
+ flow_table_copy_flows(ti, new_ti, ufid);
return new_ti;
}
int ovs_flow_tbl_flush(struct flow_table *flow_table)
{
- struct table_instance *old_ti;
- struct table_instance *new_ti;
+ struct table_instance *old_ti, *new_ti;
+ struct table_instance *old_ufid_ti, *new_ufid_ti;
- old_ti = ovsl_dereference(flow_table->ti);
new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!new_ti)
return -ENOMEM;
+ new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ if (!new_ufid_ti)
+ goto err_free_ti;
+
+ old_ti = ovsl_dereference(flow_table->ti);
+ old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
rcu_assign_pointer(flow_table->ti, new_ti);
+ rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
flow_table->last_rehash = jiffies;
flow_table->count = 0;
+ flow_table->ufid_count = 0;
- table_instance_destroy(old_ti, true);
+ table_instance_destroy(old_ti, old_ufid_ti, true);
return 0;
+
+err_free_ti:
+ __table_instance_destroy(new_ti);
+ return -ENOMEM;
}
-static u32 flow_hash(const struct sw_flow_key *key, int key_start,
- int key_end)
+static u32 flow_hash(const struct sw_flow_key *key,
+ const struct sw_flow_key_range *range)
{
+ int key_start = range->start;
+ int key_end = range->end;
const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2;
@@ -395,19 +448,20 @@ static bool cmp_key(const struct sw_flow_key *key1,
static bool flow_cmp_masked_key(const struct sw_flow *flow,
const struct sw_flow_key *key,
- int key_start, int key_end)
+ const struct sw_flow_key_range *range)
{
- return cmp_key(&flow->key, key, key_start, key_end);
+ return cmp_key(&flow->key, key, range->start, range->end);
}
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_match *match)
+static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_match *match)
{
struct sw_flow_key *key = match->key;
int key_start = flow_key_start(key);
int key_end = match->range.end;
- return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+ BUG_ON(ovs_identifier_is_ufid(&flow->id));
+ return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
}
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
@@ -416,18 +470,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
{
struct sw_flow *flow;
struct hlist_head *head;
- int key_start = mask->range.start;
- int key_end = mask->range.end;
u32 hash;
struct sw_flow_key masked_key;
ovs_flow_mask_key(&masked_key, unmasked, mask);
- hash = flow_hash(&masked_key, key_start, key_end);
+ hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
- hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
- if (flow->mask == mask && flow->hash == hash &&
- flow_cmp_masked_key(flow, &masked_key,
- key_start, key_end))
+ hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+ if (flow->mask == mask && flow->flow_table.hash == hash &&
+ flow_cmp_masked_key(flow, &masked_key, &mask->range))
return flow;
}
return NULL;
@@ -469,7 +520,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
/* Always called under ovs-mutex. */
list_for_each_entry(mask, &tbl->mask_list, list) {
flow = masked_flow_lookup(ti, match->key, mask);
- if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */
+ if (flow && ovs_identifier_is_key(&flow->id) &&
+ ovs_flow_cmp_unmasked_key(flow, match))
+ return flow;
+ }
+ return NULL;
+}
+
+static u32 ufid_hash(const struct sw_flow_id *sfid)
+{
+ return jhash(sfid->ufid, sfid->ufid_len, 0);
+}
+
+static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
+ const struct sw_flow_id *sfid)
+{
+ if (flow->id.ufid_len != sfid->ufid_len)
+ return false;
+
+ return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
+}
+
+bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
+{
+ if (ovs_identifier_is_ufid(&flow->id))
+ return flow_cmp_masked_key(flow, match->key, &match->range);
+
+ return ovs_flow_cmp_unmasked_key(flow, match);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
+ const struct sw_flow_id *ufid)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
+ struct sw_flow *flow;
+ struct hlist_head *head;
+ u32 hash;
+
+ hash = ufid_hash(ufid);
+ head = find_bucket(ti, hash);
+ hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+ if (flow->ufid_table.hash == hash &&
+ ovs_flow_cmp_ufid(flow, ufid))
return flow;
}
return NULL;
@@ -486,9 +578,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
return num;
}
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static struct table_instance *table_instance_expand(struct table_instance *ti,
+ bool ufid)
{
- return table_instance_rehash(ti, ti->n_buckets * 2);
+ return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
/* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -513,10 +606,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
+ struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+ hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
table->count--;
+ if (ovs_identifier_is_ufid(&flow->id)) {
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+ table->ufid_count--;
+ }
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
* accessible as long as the RCU read lock is held.
@@ -585,34 +683,64 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
}
/* Must be called with OVS mutex held. */
-int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- const struct sw_flow_mask *mask)
+static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *new_ti = NULL;
struct table_instance *ti;
- int err;
-
- err = flow_mask_insert(table, flow, mask);
- if (err)
- return err;
- flow->hash = flow_hash(&flow->key, flow->mask->range.start,
- flow->mask->range.end);
+ flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
ti = ovsl_dereference(table->ti);
table_instance_insert(ti, flow);
table->count++;
/* Expand table, if necessary, to make room. */
if (table->count > ti->n_buckets)
- new_ti = table_instance_expand(ti);
+ new_ti = table_instance_expand(ti, false);
else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
- new_ti = table_instance_rehash(ti, ti->n_buckets);
+ new_ti = table_instance_rehash(ti, ti->n_buckets, false);
if (new_ti) {
rcu_assign_pointer(table->ti, new_ti);
- table_instance_destroy(ti, true);
+ call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
table->last_rehash = jiffies;
}
+}
+
+/* Must be called with OVS mutex held. */
+static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
+{
+ struct table_instance *ti;
+
+ flow->ufid_table.hash = ufid_hash(&flow->id);
+ ti = ovsl_dereference(table->ufid_ti);
+ ufid_table_instance_insert(ti, flow);
+ table->ufid_count++;
+
+ /* Expand table, if necessary, to make room. */
+ if (table->ufid_count > ti->n_buckets) {
+ struct table_instance *new_ti;
+
+ new_ti = table_instance_expand(ti, true);
+ if (new_ti) {
+ rcu_assign_pointer(table->ufid_ti, new_ti);
+ call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+ }
+ }
+}
+
+/* Must be called with OVS mutex held. */
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+ const struct sw_flow_mask *mask)
+{
+ int err;
+
+ err = flow_mask_insert(table, flow, mask);
+ if (err)
+ return err;
+ flow_key_insert(table, flow);
+ if (ovs_identifier_is_ufid(&flow->id))
+ flow_ufid_insert(table, flow);
+
return 0;
}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 309fa6415689..616eda10d955 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -47,9 +47,11 @@ struct table_instance {
struct flow_table {
struct table_instance __rcu *ti;
+ struct table_instance __rcu *ufid_ti;
struct list_head mask_list;
unsigned long last_rehash;
unsigned int count;
+ unsigned int ufid_count;
};
extern struct kmem_cache *flow_stats_cache;
@@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match);
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_match *match);
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
+ const struct sw_flow_id *);
+
+bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 484864dd0e68..bf02fd5808c9 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -9,8 +9,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/version.h>
-
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/net.h>
@@ -90,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
opts_len = geneveh->opt_len * 4;
- flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+ flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
(geneveh->oam ? TUNNEL_OAM : 0) |
(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
@@ -172,7 +170,7 @@ error:
static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
{
- struct ovs_key_ipv4_tunnel *tun_key;
+ const struct ovs_key_ipv4_tunnel *tun_key;
struct ovs_tunnel_info *tun_info;
struct net *net = ovs_dp_get_net(vport->dp);
struct geneve_port *geneve_port = geneve_vport(vport);
@@ -180,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
__be16 sport;
struct rtable *rt;
struct flowi4 fl;
- u8 vni[3];
+ u8 vni[3], opts_len, *opts;
__be16 df;
int err;
@@ -191,16 +189,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
}
tun_key = &tun_info->tunnel;
-
- /* Route lookup */
- memset(&fl, 0, sizeof(fl));
- fl.daddr = tun_key->ipv4_dst;
- fl.saddr = tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_UDP;
-
- rt = ip_route_output_key(net, &fl);
+ rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto error;
@@ -211,12 +200,19 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
tunnel_id_to_vni(tun_key->tun_id, vni);
skb->ignore_df = 1;
+ if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
+ opts = (u8 *)tun_info->options;
+ opts_len = tun_info->options_len;
+ } else {
+ opts = NULL;
+ opts_len = 0;
+ }
+
err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
tun_key->ipv4_dst, tun_key->ipv4_tos,
tun_key->ipv4_ttl, df, sport, dport,
- tun_key->tun_flags, vni,
- tun_info->options_len, (u8 *)tun_info->options,
- false);
+ tun_key->tun_flags, vni, opts_len, opts,
+ !!(tun_key->tun_flags & TUNNEL_CSUM), false);
if (err < 0)
ip_rt_put(rt);
return err;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index d4168c442db5..f17ac9642f4e 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -134,7 +134,7 @@ static int gre_err(struct sk_buff *skb, u32 info,
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
- struct ovs_key_ipv4_tunnel *tun_key;
+ const struct ovs_key_ipv4_tunnel *tun_key;
struct flowi4 fl;
struct rtable *rt;
int min_headroom;
@@ -148,15 +148,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
}
tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
- /* Route lookup */
- memset(&fl, 0, sizeof(fl));
- fl.daddr = tun_key->ipv4_dst;
- fl.saddr = tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_GRE;
-
- rt = ip_route_output_key(net, &fl);
+ rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto err_free_skb;
@@ -166,7 +158,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr)
- + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+ + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
int head_delta = SKB_DATA_ALIGN(min_headroom -
skb_headroom(skb) +
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d7c46b301024..3277a7520e31 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -40,6 +40,7 @@
#include "datapath.h"
#include "vport.h"
+#include "vport-vxlan.h"
/**
* struct vxlan_port - Keeps track of open UDP ports
@@ -49,6 +50,7 @@
struct vxlan_port {
struct vxlan_sock *vs;
char name[IFNAMSIZ];
+ u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
};
static struct vport_ops ovs_vxlan_vport_ops;
@@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
}
/* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+ struct vxlan_metadata *md)
{
struct ovs_tunnel_info tun_info;
+ struct vxlan_port *vxlan_port;
struct vport *vport = vs->data;
struct iphdr *iph;
+ struct ovs_vxlan_opts opts = {
+ .gbp = md->gbp,
+ };
__be64 key;
+ __be16 flags;
+
+ flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);
+ vxlan_port = vxlan_vport(vport);
+ if (vxlan_port->exts & VXLAN_F_GBP && md->gbp)
+ flags |= TUNNEL_VXLAN_OPT;
/* Save outer tunnel values */
iph = ip_hdr(skb);
- key = cpu_to_be64(ntohl(vx_vni) >> 8);
+ key = cpu_to_be64(ntohl(md->vni) >> 8);
ovs_flow_tun_info_init(&tun_info, iph,
udp_hdr(skb)->source, udp_hdr(skb)->dest,
- key, TUNNEL_KEY, NULL, 0);
+ key, flags, &opts, sizeof(opts));
ovs_vport_receive(vport, skb, &tun_info);
}
@@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
+
+ if (vxlan_port->exts) {
+ struct nlattr *exts;
+
+ exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+ if (!exts)
+ return -EMSGSIZE;
+
+ if (vxlan_port->exts & VXLAN_F_GBP &&
+ nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, exts);
+ }
+
return 0;
}
@@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
ovs_vport_deferred_free(vport);
}
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+ [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
+};
+
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+{
+ struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
+ struct vxlan_port *vxlan_port;
+ int err;
+
+ if (nla_len(attr) < sizeof(struct nlattr))
+ return -EINVAL;
+
+ err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+ if (err < 0)
+ return err;
+
+ vxlan_port = vxlan_vport(vport);
+
+ if (exts[OVS_VXLAN_EXT_GBP])
+ vxlan_port->exts |= VXLAN_F_GBP;
+
+ return 0;
+}
+
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
@@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
vxlan_port = vxlan_vport(vport);
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
- vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+ a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
+ if (a) {
+ err = vxlan_configure_exts(vport, a);
+ if (err) {
+ ovs_vport_free(vport);
+ goto error;
+ }
+ }
+
+ vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
+ vxlan_port->exts);
if (IS_ERR(vs)) {
ovs_vport_free(vport);
return (void *)vs;
@@ -140,17 +203,34 @@ error:
return ERR_PTR(err);
}
+static int vxlan_ext_gbp(struct sk_buff *skb)
+{
+ const struct ovs_tunnel_info *tun_info;
+ const struct ovs_vxlan_opts *opts;
+
+ tun_info = OVS_CB(skb)->egress_tun_info;
+ opts = tun_info->options;
+
+ if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
+ tun_info->options_len >= sizeof(*opts))
+ return opts->gbp;
+ else
+ return 0;
+}
+
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
- struct ovs_key_ipv4_tunnel *tun_key;
+ const struct ovs_key_ipv4_tunnel *tun_key;
+ struct vxlan_metadata md = {0};
struct rtable *rt;
struct flowi4 fl;
__be16 src_port;
__be16 df;
int err;
+ u32 vxflags;
if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
err = -EINVAL;
@@ -158,15 +238,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
}
tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
- /* Route lookup */
- memset(&fl, 0, sizeof(fl));
- fl.daddr = tun_key->ipv4_dst;
- fl.saddr = tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_UDP;
-
- rt = ip_route_output_key(net, &fl);
+ rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto error;
@@ -178,13 +250,15 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
skb->ignore_df = 1;
src_port = udp_flow_src_port(net, skb, 0, 0, true);
+ md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
+ md.gbp = vxlan_ext_gbp(skb);
+ vxflags = vxlan_port->exts |
+ (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0);
- err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
- fl.saddr, tun_key->ipv4_dst,
+ err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst,
tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
src_port, dst_port,
- htonl(be64_to_cpu(tun_key->tun_id) << 8),
- false);
+ &md, false, vxflags);
if (err < 0)
ip_rt_put(rt);
return err;
diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h
new file mode 100644
index 000000000000..4b08233e73d5
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.h
@@ -0,0 +1,11 @@
+#ifndef VPORT_VXLAN_H
+#define VPORT_VXLAN_H 1
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct ovs_vxlan_opts {
+ __u32 gbp;
+};
+
+#endif
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 2034c6d9cb5a..067a3fff1d2c 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -274,10 +274,8 @@ void ovs_vport_del(struct vport *vport)
ASSERT_OVSL();
hlist_del_rcu(&vport->hash_node);
-
- vport->ops->destroy(vport);
-
module_put(vport->ops->owner);
+ vport->ops->destroy(vport);
}
/**
@@ -480,7 +478,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
stats = this_cpu_ptr(vport->percpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
- stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+ stats->rx_bytes += skb->len +
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
u64_stats_update_end(&stats->syncp);
OVS_CB(skb)->input_vport = vport;
@@ -594,14 +593,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
* The process may need to be changed if the corresponding process
* in vports ops changed.
*/
- memset(&fl, 0, sizeof(fl));
- fl.daddr = tun_key->ipv4_dst;
- fl.saddr = tun_key->ipv4_src;
- fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
- fl.flowi4_mark = skb_mark;
- fl.flowi4_proto = ipproto;
-
- rt = ip_route_output_key(net, &fl);
+ rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 99c8e71d9e6c..bc85331a6c60 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -103,6 +103,7 @@ struct vport_portids {
* @ops: Class structure.
* @percpu_stats: Points to per-CPU statistics used and maintained by vport
* @err_stats: Points to error statistics used and maintained by vport
+ * @detach_list: list used for detaching vport in net-exit call.
*/
struct vport {
struct rcu_head rcu;
@@ -117,6 +118,7 @@ struct vport {
struct pcpu_sw_netstats __percpu *percpu_stats;
struct vport_err_stats err_stats;
+ struct list_head detach_list;
};
/**
@@ -236,4 +238,22 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
int ovs_vport_ops_register(struct vport_ops *ops);
void ovs_vport_ops_unregister(struct vport_ops *ops);
+static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
+ const struct ovs_key_ipv4_tunnel *key,
+ u32 mark,
+ struct flowi4 *fl,
+ u8 protocol)
+{
+ struct rtable *rt;
+
+ memset(fl, 0, sizeof(*fl));
+ fl->daddr = key->ipv4_dst;
+ fl->saddr = key->ipv4_src;
+ fl->flowi4_tos = RT_TOS(key->ipv4_tos);
+ fl->flowi4_mark = mark;
+ fl->flowi4_proto = protocol;
+
+ rt = ip_route_output_key(net, fl);
+ return rt;
+}
#endif /* vport.h */